ozone: evdev: Sync caps lock LED state to evdev
[chromium-blink-merge.git] / components / autofill / core / browser / address_field.cc
blob0791eb3c9daf45029ad4f08f51c8f09ee7df2ba5
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/address_field.h"
7 #include <stddef.h>
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/autofill/core/browser/autofill_field.h"
15 #include "components/autofill/core/browser/autofill_regex_constants.h"
16 #include "components/autofill/core/browser/autofill_scanner.h"
17 #include "components/autofill/core/browser/field_types.h"
19 using base::UTF8ToUTF16;
21 namespace autofill {
23 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
24 if (scanner->IsEnd())
25 return NULL;
27 scoped_ptr<AddressField> address_field(new AddressField);
28 const AutofillField* const initial_field = scanner->Cursor();
29 size_t saved_cursor = scanner->SaveCursor();
31 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
32 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
34 // Allow address fields to appear in any order.
35 size_t begin_trailing_non_labeled_fields = 0;
36 bool has_trailing_non_labeled_fields = false;
37 while (!scanner->IsEnd()) {
38 const size_t cursor = scanner->SaveCursor();
39 if (address_field->ParseAddressLines(scanner) ||
40 address_field->ParseCity(scanner) ||
41 address_field->ParseState(scanner) ||
42 address_field->ParseZipCode(scanner) ||
43 address_field->ParseCountry(scanner) ||
44 address_field->ParseCompany(scanner)) {
45 has_trailing_non_labeled_fields = false;
46 continue;
47 } else if (ParseField(scanner, attention_ignored, NULL) ||
48 ParseField(scanner, region_ignored, NULL)) {
49 // We ignore the following:
50 // * Attention.
51 // * Province/Region/Other.
52 continue;
53 } else if (scanner->Cursor() != initial_field &&
54 ParseEmptyLabel(scanner, NULL)) {
55 // Ignore non-labeled fields within an address; the page
56 // MapQuest Driving Directions North America.html contains such a field.
57 // We only ignore such fields after we've parsed at least one other field;
58 // otherwise we'd effectively parse address fields before other field
59 // types after any non-labeled fields, and we want email address fields to
60 // have precedence since some pages contain fields labeled
61 // "Email address".
62 if (!has_trailing_non_labeled_fields) {
63 has_trailing_non_labeled_fields = true;
64 begin_trailing_non_labeled_fields = cursor;
67 continue;
68 } else {
69 // No field found.
70 break;
74 // If we have identified any address fields in this field then it should be
75 // added to the list of fields.
76 if (address_field->company_ ||
77 address_field->address1_ ||
78 address_field->address2_ ||
79 address_field->address3_ ||
80 address_field->street_address_ ||
81 address_field->city_ ||
82 address_field->state_ ||
83 address_field->zip_ ||
84 address_field->zip4_ ||
85 address_field->country_) {
86 // Don't slurp non-labeled fields at the end into the address.
87 if (has_trailing_non_labeled_fields)
88 scanner->RewindTo(begin_trailing_non_labeled_fields);
90 return address_field.Pass();
93 scanner->RewindTo(saved_cursor);
94 return NULL;
97 AddressField::AddressField()
98 : company_(NULL),
99 address1_(NULL),
100 address2_(NULL),
101 address3_(NULL),
102 street_address_(NULL),
103 city_(NULL),
104 state_(NULL),
105 zip_(NULL),
106 zip4_(NULL),
107 country_(NULL) {
110 bool AddressField::ClassifyField(ServerFieldTypeMap* map) const {
111 // The page can request the address lines as a single textarea input or as
112 // multiple text fields (or not at all), but it shouldn't be possible to
113 // request both.
114 DCHECK(!(address1_ && street_address_));
115 DCHECK(!(address2_ && street_address_));
116 DCHECK(!(address3_ && street_address_));
118 return AddClassification(company_, COMPANY_NAME, map) &&
119 AddClassification(address1_, ADDRESS_HOME_LINE1, map) &&
120 AddClassification(address2_, ADDRESS_HOME_LINE2, map) &&
121 AddClassification(address3_, ADDRESS_HOME_LINE3, map) &&
122 AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS, map) &&
123 AddClassification(city_, ADDRESS_HOME_CITY, map) &&
124 AddClassification(state_, ADDRESS_HOME_STATE, map) &&
125 AddClassification(zip_, ADDRESS_HOME_ZIP, map) &&
126 AddClassification(country_, ADDRESS_HOME_COUNTRY, map);
129 bool AddressField::ParseCompany(AutofillScanner* scanner) {
130 if (company_ && !company_->IsEmpty())
131 return false;
133 return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_);
136 bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
137 // We only match the string "address" in page text, not in element names,
138 // because sometimes every element in a group of address fields will have
139 // a name containing the string "address"; for example, on the page
140 // Kohl's - Register Billing Address.html the text element labeled "city"
141 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
142 // such as "address1", which appear as element names on various pages (eg
143 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
144 // EBay Registration Enter Information.html).
145 if (address1_ || street_address_)
146 return false;
148 // Ignore "Address Lookup" field. http://crbug.com/427622
149 if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL))
150 return false;
152 base::string16 pattern = UTF8ToUTF16(kAddressLine1Re);
153 base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
154 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
155 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
156 &address1_) &&
157 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
158 &street_address_) &&
159 !ParseFieldSpecifics(scanner, label_pattern,
160 MATCH_LABEL | MATCH_TEXT_AREA,
161 &street_address_))
162 return false;
164 if (street_address_)
165 return true;
167 // This code may not pick up pages that have an address field consisting of a
168 // sequence of unlabeled address fields. If we need to add this, see
169 // discussion on https://codereview.chromium.org/741493003/
170 pattern = UTF8ToUTF16(kAddressLine2Re);
171 label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
172 if (!ParseField(scanner, pattern, &address2_) &&
173 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
174 &address2_))
175 return true;
177 // Optionally parse address line 3. This uses the same label regexp as
178 // address 2 above.
179 pattern = UTF8ToUTF16(kAddressLinesExtraRe);
180 if (!ParseField(scanner, pattern, &address3_) &&
181 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
182 &address3_))
183 return true;
185 // Try for surplus lines, which we will promptly discard. Some pages have 4
186 // address lines (e.g. uk/ShoesDirect2.html)!
188 // Since these are rare, don't bother considering unlabeled lines as extra
189 // address lines.
190 pattern = UTF8ToUTF16(kAddressLinesExtraRe);
191 while (ParseField(scanner, pattern, NULL)) {
192 // Consumed a surplus line, try for another.
194 return true;
197 bool AddressField::ParseCountry(AutofillScanner* scanner) {
198 // Parse a country. The occasional page (e.g.
199 // Travelocity_New Member Information1.html) calls this a "location".
200 if (country_ && !country_->IsEmpty())
201 return false;
203 return ParseFieldSpecifics(scanner,
204 UTF8ToUTF16(kCountryRe),
205 MATCH_DEFAULT | MATCH_SELECT,
206 &country_);
209 bool AddressField::ParseZipCode(AutofillScanner* scanner) {
210 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
211 // is called a "post code".
212 if (zip_)
213 return false;
215 // Some sites use type="tel" for zip fields (to get a numerical input).
216 // http://crbug.com/426958
217 if (!ParseFieldSpecifics(scanner,
218 UTF8ToUTF16(kZipCodeRe),
219 MATCH_DEFAULT | MATCH_TELEPHONE,
220 &zip_)) {
221 return false;
224 // Look for a zip+4, whose field name will also often contain
225 // the substring "zip".
226 ParseFieldSpecifics(scanner,
227 UTF8ToUTF16(kZip4Re),
228 MATCH_DEFAULT | MATCH_TELEPHONE,
229 &zip4_);
230 return true;
233 bool AddressField::ParseCity(AutofillScanner* scanner) {
234 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
235 // the term "town".
236 if (city_)
237 return false;
239 // Select fields are allowed here. This occurs on top-100 site rediff.com.
240 return ParseFieldSpecifics(scanner,
241 UTF8ToUTF16(kCityRe),
242 MATCH_DEFAULT | MATCH_SELECT,
243 &city_);
246 bool AddressField::ParseState(AutofillScanner* scanner) {
247 if (state_)
248 return false;
250 return ParseFieldSpecifics(scanner,
251 UTF8ToUTF16(kStateRe),
252 MATCH_DEFAULT | MATCH_SELECT,
253 &state_);
256 } // namespace autofill