1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/phone_field.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "components/autofill/core/browser/autofill_field.h"
13 #include "components/autofill/core/browser/autofill_regex_constants.h"
14 #include "components/autofill/core/browser/autofill_scanner.h"
19 // This string includes all area code separators, including NoText.
20 std::string
GetAreaRegex() {
21 std::string area_code
= kAreaCodeRe
;
22 area_code
.append("|"); // Regexp separator.
23 area_code
.append(kAreaCodeNotextRe
);
29 PhoneField::~PhoneField() {}
31 // Phone field grammars - first matched grammar will be parsed. Grammars are
32 // separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
33 // parsed separately unless they are necessary parts of the match.
34 // The following notation is used to describe the patterns:
35 // <cc> - country code field.
36 // <ac> - area code field.
37 // <phone> - phone or prefix.
40 // :N means field is limited to N characters, otherwise it is unlimited.
41 // (pattern <field>)? means pattern is optional and matched separately.
42 const PhoneField::Parser
PhoneField::kPhoneFieldGrammars
[] = {
43 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
45 { REGEX_COUNTRY
, FIELD_COUNTRY_CODE
, 0 },
46 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
47 { REGEX_PHONE
, FIELD_PHONE
, 0 },
48 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
49 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
50 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 3 },
51 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
52 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
53 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
54 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
55 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
56 { REGEX_PHONE
, FIELD_AREA_CODE
, 3 },
57 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
58 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 4 },
59 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
60 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
61 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 3 },
62 { REGEX_PHONE
, FIELD_AREA_CODE
, 3 },
63 { REGEX_PHONE
, FIELD_PHONE
, 3 },
64 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
65 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
66 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
67 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
68 { REGEX_PHONE
, FIELD_PHONE
, 0 },
69 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
70 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
71 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
72 { REGEX_PHONE
, FIELD_PHONE
, 3 },
73 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
74 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
75 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
76 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
77 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 0 },
78 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
79 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
80 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
81 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
82 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 0 },
83 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
84 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
85 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
86 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
87 { REGEX_PREFIX_SEPARATOR
, FIELD_AREA_CODE
, 0 },
88 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
89 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 0 },
90 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
91 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
92 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
93 { REGEX_PREFIX
, FIELD_PHONE
, 0 },
94 { REGEX_SUFFIX
, FIELD_SUFFIX
, 0 },
95 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
96 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
97 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
98 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
99 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 4 },
100 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
101 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
102 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
103 { REGEX_PREFIX_SEPARATOR
, FIELD_AREA_CODE
, 0 },
104 { REGEX_SUFFIX_SEPARATOR
, FIELD_PHONE
, 0 },
105 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
106 // Phone: <ac> - <phone> (Ext: <ext>)?
107 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
108 { REGEX_PHONE
, FIELD_PHONE
, 0 },
109 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
110 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
111 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 3 },
112 { REGEX_PHONE
, FIELD_PHONE
, 10 },
113 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
114 // Phone: <phone> (Ext: <ext>)?
115 { REGEX_PHONE
, FIELD_PHONE
, 0 },
116 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
120 scoped_ptr
<FormField
> PhoneField::Parse(AutofillScanner
* scanner
) {
121 if (scanner
->IsEnd())
124 size_t start_cursor
= scanner
->SaveCursor();
126 // The form owns the following variables, so they should not be deleted.
127 AutofillField
* parsed_fields
[FIELD_MAX
];
129 for (size_t i
= 0; i
< arraysize(kPhoneFieldGrammars
); ++i
) {
130 memset(parsed_fields
, 0, sizeof(parsed_fields
));
131 size_t saved_cursor
= scanner
->SaveCursor();
133 // Attempt to parse according to the next grammar.
134 for (; i
< arraysize(kPhoneFieldGrammars
) &&
135 kPhoneFieldGrammars
[i
].regex
!= REGEX_SEPARATOR
; ++i
) {
136 if (!ParsePhoneField(
138 GetRegExp(kPhoneFieldGrammars
[i
].regex
),
139 &parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]))
141 if (kPhoneFieldGrammars
[i
].max_size
&&
142 (!parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]->max_length
||
143 kPhoneFieldGrammars
[i
].max_size
<
144 parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]->max_length
)) {
149 if (i
>= arraysize(kPhoneFieldGrammars
)) {
150 scanner
->RewindTo(saved_cursor
);
151 return nullptr; // Parsing failed.
153 if (kPhoneFieldGrammars
[i
].regex
== REGEX_SEPARATOR
)
154 break; // Parsing succeeded.
156 // Proceed to the next grammar.
159 } while (i
< arraysize(kPhoneFieldGrammars
) &&
160 kPhoneFieldGrammars
[i
].regex
!= REGEX_SEPARATOR
);
162 scanner
->RewindTo(saved_cursor
);
163 if (i
+ 1 == arraysize(kPhoneFieldGrammars
)) {
164 return nullptr; // Tried through all the possibilities - did not match.
168 if (!parsed_fields
[FIELD_PHONE
]) {
169 scanner
->RewindTo(start_cursor
);
173 scoped_ptr
<PhoneField
> phone_field(new PhoneField
);
174 for (int i
= 0; i
< FIELD_MAX
; ++i
)
175 phone_field
->parsed_phone_fields_
[i
] = parsed_fields
[i
];
177 // Look for optional fields.
179 // Look for a third text box.
180 if (!phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
]) {
181 if (!ParsePhoneField(scanner
, kPhoneSuffixRe
,
182 &phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
])) {
183 ParsePhoneField(scanner
, kPhoneSuffixSeparatorRe
,
184 &phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
]);
188 // Now look for an extension.
189 // The extension is not actually used, so this just eats the field so other
190 // parsers do not mistaken it for something else.
191 ParsePhoneField(scanner
,
193 &phone_field
->parsed_phone_fields_
[FIELD_EXTENSION
]);
195 return phone_field
.Pass();
198 bool PhoneField::ClassifyField(ServerFieldTypeMap
* map
) const {
201 DCHECK(parsed_phone_fields_
[FIELD_PHONE
]); // Phone was correctly parsed.
203 if ((parsed_phone_fields_
[FIELD_COUNTRY_CODE
]) ||
204 (parsed_phone_fields_
[FIELD_AREA_CODE
]) ||
205 (parsed_phone_fields_
[FIELD_SUFFIX
])) {
206 if (parsed_phone_fields_
[FIELD_COUNTRY_CODE
]) {
207 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_COUNTRY_CODE
],
208 PHONE_HOME_COUNTRY_CODE
,
212 ServerFieldType field_number_type
= PHONE_HOME_NUMBER
;
213 if (parsed_phone_fields_
[FIELD_AREA_CODE
]) {
214 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_AREA_CODE
],
215 PHONE_HOME_CITY_CODE
,
217 } else if (parsed_phone_fields_
[FIELD_COUNTRY_CODE
]) {
218 // Only if we can find country code without city code, it means the phone
219 // number include city code.
220 field_number_type
= PHONE_HOME_CITY_AND_NUMBER
;
222 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
223 // we fill only the prefix depending on the size of the input field.
224 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_PHONE
],
227 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
228 // we fill only the suffix depending on the size of the input field.
229 if (parsed_phone_fields_
[FIELD_SUFFIX
]) {
230 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_SUFFIX
],
235 ok
= AddClassification(parsed_phone_fields_
[FIELD_PHONE
],
236 PHONE_HOME_WHOLE_NUMBER
,
243 PhoneField::PhoneField() {
244 memset(parsed_phone_fields_
, 0, sizeof(parsed_phone_fields_
));
248 std::string
PhoneField::GetRegExp(RegexType regex_id
) {
251 return kCountryCodeRe
;
253 return GetAreaRegex();
254 case REGEX_AREA_NOTEXT
:
255 return kAreaCodeNotextRe
;
258 case REGEX_PREFIX_SEPARATOR
:
259 return kPhonePrefixSeparatorRe
;
261 return kPhonePrefixRe
;
262 case REGEX_SUFFIX_SEPARATOR
:
263 return kPhoneSuffixSeparatorRe
;
265 return kPhoneSuffixRe
;
266 case REGEX_EXTENSION
:
267 return kPhoneExtensionRe
;
272 return std::string();
276 bool PhoneField::ParsePhoneField(AutofillScanner
* scanner
,
277 const std::string
& regex
,
278 AutofillField
** field
) {
279 return ParseFieldSpecifics(scanner
,
280 base::UTF8ToUTF16(regex
),
281 MATCH_DEFAULT
| MATCH_TELEPHONE
| MATCH_NUMBER
,
285 } // namespace autofill