Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / components / autofill / core / browser / phone_field.cc
blob77a8d9f08aa6bf22eec63f325d6101b9478298eb
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/phone_field.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "components/autofill/core/browser/autofill_field.h"
13 #include "components/autofill/core/browser/autofill_regex_constants.h"
14 #include "components/autofill/core/browser/autofill_scanner.h"
16 namespace autofill {
17 namespace {
19 // This string includes all area code separators, including NoText.
20 std::string GetAreaRegex() {
21 std::string area_code = kAreaCodeRe;
22 area_code.append("|"); // Regexp separator.
23 area_code.append(kAreaCodeNotextRe);
24 return area_code;
27 } // namespace
29 PhoneField::~PhoneField() {}
31 // Phone field grammars - first matched grammar will be parsed. Grammars are
32 // separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
33 // parsed separately unless they are necessary parts of the match.
34 // The following notation is used to describe the patterns:
35 // <cc> - country code field.
36 // <ac> - area code field.
37 // <phone> - phone or prefix.
38 // <suffix> - suffix.
39 // <ext> - extension.
40 // :N means field is limited to N characters, otherwise it is unlimited.
41 // (pattern <field>)? means pattern is optional and matched separately.
42 const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = {
43 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
44 // (Ext: <ext>)?)?
45 { REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0 },
46 { REGEX_AREA, FIELD_AREA_CODE, 0 },
47 { REGEX_PHONE, FIELD_PHONE, 0 },
48 { REGEX_SEPARATOR, FIELD_NONE, 0 },
49 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
50 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3 },
51 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
52 { REGEX_PHONE, FIELD_SUFFIX, 4 },
53 { REGEX_SEPARATOR, FIELD_NONE, 0 },
54 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
55 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
56 { REGEX_PHONE, FIELD_AREA_CODE, 3 },
57 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
58 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
59 { REGEX_SEPARATOR, FIELD_NONE, 0 },
60 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
61 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
62 { REGEX_PHONE, FIELD_AREA_CODE, 3 },
63 { REGEX_PHONE, FIELD_PHONE, 3 },
64 { REGEX_PHONE, FIELD_SUFFIX, 4 },
65 { REGEX_SEPARATOR, FIELD_NONE, 0 },
66 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
67 { REGEX_AREA, FIELD_AREA_CODE, 0 },
68 { REGEX_PHONE, FIELD_PHONE, 0 },
69 { REGEX_SEPARATOR, FIELD_NONE, 0 },
70 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
71 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
72 { REGEX_PHONE, FIELD_PHONE, 3 },
73 { REGEX_PHONE, FIELD_SUFFIX, 4 },
74 { REGEX_SEPARATOR, FIELD_NONE, 0 },
75 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
76 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
77 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
78 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
79 { REGEX_SEPARATOR, FIELD_NONE, 0 },
80 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
81 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
82 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
83 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
84 { REGEX_SEPARATOR, FIELD_NONE, 0 },
85 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
86 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
87 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
88 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
89 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0 },
90 { REGEX_SEPARATOR, FIELD_NONE, 0 },
91 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
92 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
93 { REGEX_PREFIX, FIELD_PHONE, 0 },
94 { REGEX_SUFFIX, FIELD_SUFFIX, 0 },
95 { REGEX_SEPARATOR, FIELD_NONE, 0 },
96 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
97 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
98 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
99 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
100 { REGEX_SEPARATOR, FIELD_NONE, 0 },
101 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
102 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
103 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
104 { REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0 },
105 { REGEX_SEPARATOR, FIELD_NONE, 0 },
106 // Phone: <ac> - <phone> (Ext: <ext>)?
107 { REGEX_AREA, FIELD_AREA_CODE, 0 },
108 { REGEX_PHONE, FIELD_PHONE, 0 },
109 { REGEX_SEPARATOR, FIELD_NONE, 0 },
110 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
111 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
112 { REGEX_PHONE, FIELD_PHONE, 10 },
113 { REGEX_SEPARATOR, FIELD_NONE, 0 },
114 // Phone: <phone> (Ext: <ext>)?
115 { REGEX_PHONE, FIELD_PHONE, 0 },
116 { REGEX_SEPARATOR, FIELD_NONE, 0 },
119 // static
120 scoped_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner) {
121 if (scanner->IsEnd())
122 return nullptr;
124 size_t start_cursor = scanner->SaveCursor();
126 // The form owns the following variables, so they should not be deleted.
127 AutofillField* parsed_fields[FIELD_MAX];
129 for (size_t i = 0; i < arraysize(kPhoneFieldGrammars); ++i) {
130 memset(parsed_fields, 0, sizeof(parsed_fields));
131 size_t saved_cursor = scanner->SaveCursor();
133 // Attempt to parse according to the next grammar.
134 for (; i < arraysize(kPhoneFieldGrammars) &&
135 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR; ++i) {
136 if (!ParsePhoneField(
137 scanner,
138 GetRegExp(kPhoneFieldGrammars[i].regex),
139 &parsed_fields[kPhoneFieldGrammars[i].phone_part]))
140 break;
141 if (kPhoneFieldGrammars[i].max_size &&
142 (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
143 kPhoneFieldGrammars[i].max_size <
144 parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) {
145 break;
149 if (i >= arraysize(kPhoneFieldGrammars)) {
150 scanner->RewindTo(saved_cursor);
151 return nullptr; // Parsing failed.
153 if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR)
154 break; // Parsing succeeded.
156 // Proceed to the next grammar.
157 do {
158 ++i;
159 } while (i < arraysize(kPhoneFieldGrammars) &&
160 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR);
162 scanner->RewindTo(saved_cursor);
163 if (i + 1 == arraysize(kPhoneFieldGrammars)) {
164 return nullptr; // Tried through all the possibilities - did not match.
168 if (!parsed_fields[FIELD_PHONE]) {
169 scanner->RewindTo(start_cursor);
170 return nullptr;
173 scoped_ptr<PhoneField> phone_field(new PhoneField);
174 for (int i = 0; i < FIELD_MAX; ++i)
175 phone_field->parsed_phone_fields_[i] = parsed_fields[i];
177 // Look for optional fields.
179 // Look for a third text box.
180 if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
181 if (!ParsePhoneField(scanner, kPhoneSuffixRe,
182 &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
183 ParsePhoneField(scanner, kPhoneSuffixSeparatorRe,
184 &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
188 // Now look for an extension.
189 // The extension is not actually used, so this just eats the field so other
190 // parsers do not mistaken it for something else.
191 ParsePhoneField(scanner,
192 kPhoneExtensionRe,
193 &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
195 return phone_field.Pass();
198 bool PhoneField::ClassifyField(ServerFieldTypeMap* map) const {
199 bool ok = true;
201 DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
203 if ((parsed_phone_fields_[FIELD_COUNTRY_CODE]) ||
204 (parsed_phone_fields_[FIELD_AREA_CODE]) ||
205 (parsed_phone_fields_[FIELD_SUFFIX])) {
206 if (parsed_phone_fields_[FIELD_COUNTRY_CODE]) {
207 ok = ok && AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE],
208 PHONE_HOME_COUNTRY_CODE,
209 map);
212 ServerFieldType field_number_type = PHONE_HOME_NUMBER;
213 if (parsed_phone_fields_[FIELD_AREA_CODE]) {
214 ok = ok && AddClassification(parsed_phone_fields_[FIELD_AREA_CODE],
215 PHONE_HOME_CITY_CODE,
216 map);
217 } else if (parsed_phone_fields_[FIELD_COUNTRY_CODE]) {
218 // Only if we can find country code without city code, it means the phone
219 // number include city code.
220 field_number_type = PHONE_HOME_CITY_AND_NUMBER;
222 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
223 // we fill only the prefix depending on the size of the input field.
224 ok = ok && AddClassification(parsed_phone_fields_[FIELD_PHONE],
225 field_number_type,
226 map);
227 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
228 // we fill only the suffix depending on the size of the input field.
229 if (parsed_phone_fields_[FIELD_SUFFIX]) {
230 ok = ok && AddClassification(parsed_phone_fields_[FIELD_SUFFIX],
231 PHONE_HOME_NUMBER,
232 map);
234 } else {
235 ok = AddClassification(parsed_phone_fields_[FIELD_PHONE],
236 PHONE_HOME_WHOLE_NUMBER,
237 map);
240 return ok;
243 PhoneField::PhoneField() {
244 memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
247 // static
248 std::string PhoneField::GetRegExp(RegexType regex_id) {
249 switch (regex_id) {
250 case REGEX_COUNTRY:
251 return kCountryCodeRe;
252 case REGEX_AREA:
253 return GetAreaRegex();
254 case REGEX_AREA_NOTEXT:
255 return kAreaCodeNotextRe;
256 case REGEX_PHONE:
257 return kPhoneRe;
258 case REGEX_PREFIX_SEPARATOR:
259 return kPhonePrefixSeparatorRe;
260 case REGEX_PREFIX:
261 return kPhonePrefixRe;
262 case REGEX_SUFFIX_SEPARATOR:
263 return kPhoneSuffixSeparatorRe;
264 case REGEX_SUFFIX:
265 return kPhoneSuffixRe;
266 case REGEX_EXTENSION:
267 return kPhoneExtensionRe;
268 default:
269 NOTREACHED();
270 break;
272 return std::string();
275 // static
276 bool PhoneField::ParsePhoneField(AutofillScanner* scanner,
277 const std::string& regex,
278 AutofillField** field) {
279 return ParseFieldSpecifics(scanner,
280 base::UTF8ToUTF16(regex),
281 MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER,
282 field);
285 } // namespace autofill