1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/phone_field.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "components/autofill/core/browser/autofill_field.h"
13 #include "components/autofill/core/browser/autofill_regex_constants.h"
14 #include "components/autofill/core/browser/autofill_scanner.h"
19 // This string includes all area code separators, including NoText.
20 base::string16
GetAreaRegex() {
21 base::string16 area_code
= base::UTF8ToUTF16(kAreaCodeRe
);
22 area_code
.append(base::ASCIIToUTF16("|")); // Regexp separator.
23 area_code
.append(base::UTF8ToUTF16(kAreaCodeNotextRe
));
29 PhoneField::~PhoneField() {}
31 // Phone field grammars - first matched grammar will be parsed. Grammars are
32 // separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
33 // parsed separately unless they are necessary parts of the match.
34 // The following notation is used to describe the patterns:
35 // <cc> - country code field.
36 // <ac> - area code field.
37 // <phone> - phone or prefix.
40 // :N means field is limited to N characters, otherwise it is unlimited.
41 // (pattern <field>)? means pattern is optional and matched separately.
42 const PhoneField::Parser
PhoneField::kPhoneFieldGrammars
[] = {
43 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
45 { REGEX_COUNTRY
, FIELD_COUNTRY_CODE
, 0 },
46 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
47 { REGEX_PHONE
, FIELD_PHONE
, 0 },
48 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
49 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
50 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 3 },
51 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
52 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
53 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
54 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
55 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
56 { REGEX_PHONE
, FIELD_AREA_CODE
, 3 },
57 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
58 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 4 },
59 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
60 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
61 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 3 },
62 { REGEX_PHONE
, FIELD_AREA_CODE
, 3 },
63 { REGEX_PHONE
, FIELD_PHONE
, 3 },
64 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
65 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
66 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
67 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
68 { REGEX_PHONE
, FIELD_PHONE
, 0 },
69 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
70 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
71 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
72 { REGEX_PHONE
, FIELD_PHONE
, 3 },
73 { REGEX_PHONE
, FIELD_SUFFIX
, 4 },
74 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
75 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
76 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
77 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 0 },
78 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
79 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
80 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
81 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
82 { REGEX_AREA_NOTEXT
, FIELD_AREA_CODE
, 0 },
83 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
84 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
85 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
86 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
87 { REGEX_PREFIX_SEPARATOR
, FIELD_AREA_CODE
, 0 },
88 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 0 },
89 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 0 },
90 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
91 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
92 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
93 { REGEX_PREFIX
, FIELD_PHONE
, 0 },
94 { REGEX_SUFFIX
, FIELD_SUFFIX
, 0 },
95 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
96 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
97 { REGEX_PHONE
, FIELD_AREA_CODE
, 0 },
98 { REGEX_PREFIX_SEPARATOR
, FIELD_PHONE
, 3 },
99 { REGEX_SUFFIX_SEPARATOR
, FIELD_SUFFIX
, 4 },
100 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
101 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
102 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 0 },
103 { REGEX_PREFIX_SEPARATOR
, FIELD_AREA_CODE
, 0 },
104 { REGEX_SUFFIX_SEPARATOR
, FIELD_PHONE
, 0 },
105 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
106 // Phone: <ac> - <phone> (Ext: <ext>)?
107 { REGEX_AREA
, FIELD_AREA_CODE
, 0 },
108 { REGEX_PHONE
, FIELD_PHONE
, 0 },
109 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
110 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
111 { REGEX_PHONE
, FIELD_COUNTRY_CODE
, 3 },
112 { REGEX_PHONE
, FIELD_PHONE
, 10 },
113 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
114 // Phone: <phone> (Ext: <ext>)?
115 { REGEX_PHONE
, FIELD_PHONE
, 0 },
116 { REGEX_SEPARATOR
, FIELD_NONE
, 0 },
120 scoped_ptr
<FormField
> PhoneField::Parse(AutofillScanner
* scanner
) {
121 if (scanner
->IsEnd())
124 scanner
->SaveCursor();
126 // The form owns the following variables, so they should not be deleted.
127 AutofillField
* parsed_fields
[FIELD_MAX
];
129 for (size_t i
= 0; i
< arraysize(kPhoneFieldGrammars
); ++i
) {
130 memset(parsed_fields
, 0, sizeof(parsed_fields
));
131 scanner
->SaveCursor();
133 // Attempt to parse according to the next grammar.
134 for (; i
< arraysize(kPhoneFieldGrammars
) &&
135 kPhoneFieldGrammars
[i
].regex
!= REGEX_SEPARATOR
; ++i
) {
136 if (!ParseFieldSpecifics(
138 GetRegExp(kPhoneFieldGrammars
[i
].regex
),
139 MATCH_DEFAULT
| MATCH_TELEPHONE
,
140 &parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]))
142 if (kPhoneFieldGrammars
[i
].max_size
&&
143 (!parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]->max_length
||
144 kPhoneFieldGrammars
[i
].max_size
<
145 parsed_fields
[kPhoneFieldGrammars
[i
].phone_part
]->max_length
)) {
150 if (i
>= arraysize(kPhoneFieldGrammars
)) {
152 return NULL
; // Parsing failed.
154 if (kPhoneFieldGrammars
[i
].regex
== REGEX_SEPARATOR
)
155 break; // Parsing succeeded.
157 // Proceed to the next grammar.
160 } while (i
< arraysize(kPhoneFieldGrammars
) &&
161 kPhoneFieldGrammars
[i
].regex
!= REGEX_SEPARATOR
);
163 if (i
+ 1 == arraysize(kPhoneFieldGrammars
)) {
165 return NULL
; // Tried through all the possibilities - did not match.
171 if (!parsed_fields
[FIELD_PHONE
]) {
176 scoped_ptr
<PhoneField
> phone_field(new PhoneField
);
177 for (int i
= 0; i
< FIELD_MAX
; ++i
)
178 phone_field
->parsed_phone_fields_
[i
] = parsed_fields
[i
];
180 // Look for optional fields.
182 // Look for a third text box.
183 if (!phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
]) {
184 if (!ParseField(scanner
, base::UTF8ToUTF16(kPhoneSuffixRe
),
185 &phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
])) {
186 ParseField(scanner
, base::UTF8ToUTF16(kPhoneSuffixSeparatorRe
),
187 &phone_field
->parsed_phone_fields_
[FIELD_SUFFIX
]);
191 // Now look for an extension.
192 ParseField(scanner
, base::UTF8ToUTF16(kPhoneExtensionRe
),
193 &phone_field
->parsed_phone_fields_
[FIELD_EXTENSION
]);
195 return phone_field
.Pass();
198 bool PhoneField::ClassifyField(ServerFieldTypeMap
* map
) const {
201 DCHECK(parsed_phone_fields_
[FIELD_PHONE
]); // Phone was correctly parsed.
203 if ((parsed_phone_fields_
[FIELD_COUNTRY_CODE
] != NULL
) ||
204 (parsed_phone_fields_
[FIELD_AREA_CODE
] != NULL
) ||
205 (parsed_phone_fields_
[FIELD_SUFFIX
] != NULL
)) {
206 if (parsed_phone_fields_
[FIELD_COUNTRY_CODE
] != NULL
) {
207 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_COUNTRY_CODE
],
208 PHONE_HOME_COUNTRY_CODE
,
212 ServerFieldType field_number_type
= PHONE_HOME_NUMBER
;
213 if (parsed_phone_fields_
[FIELD_AREA_CODE
] != NULL
) {
214 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_AREA_CODE
],
215 PHONE_HOME_CITY_CODE
,
217 } else if (parsed_phone_fields_
[FIELD_COUNTRY_CODE
] != NULL
) {
218 // Only if we can find country code without city code, it means the phone
219 // number include city code.
220 field_number_type
= PHONE_HOME_CITY_AND_NUMBER
;
222 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
223 // we fill only the prefix depending on the size of the input field.
224 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_PHONE
],
227 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
228 // we fill only the suffix depending on the size of the input field.
229 if (parsed_phone_fields_
[FIELD_SUFFIX
] != NULL
) {
230 ok
= ok
&& AddClassification(parsed_phone_fields_
[FIELD_SUFFIX
],
235 ok
= AddClassification(parsed_phone_fields_
[FIELD_PHONE
],
236 PHONE_HOME_WHOLE_NUMBER
,
243 PhoneField::PhoneField() {
244 memset(parsed_phone_fields_
, 0, sizeof(parsed_phone_fields_
));
248 base::string16
PhoneField::GetRegExp(RegexType regex_id
) {
251 return base::UTF8ToUTF16(kCountryCodeRe
);
253 return GetAreaRegex();
254 case REGEX_AREA_NOTEXT
:
255 return base::UTF8ToUTF16(kAreaCodeNotextRe
);
257 return base::UTF8ToUTF16(kPhoneRe
);
258 case REGEX_PREFIX_SEPARATOR
:
259 return base::UTF8ToUTF16(kPhonePrefixSeparatorRe
);
261 return base::UTF8ToUTF16(kPhonePrefixRe
);
262 case REGEX_SUFFIX_SEPARATOR
:
263 return base::UTF8ToUTF16(kPhoneSuffixSeparatorRe
);
265 return base::UTF8ToUTF16(kPhoneSuffixRe
);
266 case REGEX_EXTENSION
:
267 return base::UTF8ToUTF16(kPhoneExtensionRe
);
272 return base::string16();
275 } // namespace autofill