1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/form_structure.h"
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/i18n/case_conversion.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/sha1.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h"
20 #include "components/autofill/core/browser/autofill_metrics.h"
21 #include "components/autofill/core/browser/autofill_type.h"
22 #include "components/autofill/core/browser/autofill_xml_parser.h"
23 #include "components/autofill/core/browser/field_types.h"
24 #include "components/autofill/core/browser/form_field.h"
25 #include "components/autofill/core/common/autofill_constants.h"
26 #include "components/autofill/core/common/form_data.h"
27 #include "components/autofill/core/common/form_data_predictions.h"
28 #include "components/autofill/core/common/form_field_data.h"
29 #include "components/autofill/core/common/form_field_data_predictions.h"
30 #include "third_party/icu/source/i18n/unicode/regex.h"
31 #include "third_party/webrtc/libjingle/xmllite/xmlelement.h"
36 // XML elements and attributes.
37 const char kAttributeAutofillUsed
[] = "autofillused";
38 const char kAttributeAutofillType
[] = "autofilltype";
39 const char kAttributeClientVersion
[] = "clientversion";
40 const char kAttributeDataPresent
[] = "datapresent";
41 const char kAttributeFieldID
[] = "fieldid";
42 const char kAttributeFieldType
[] = "fieldtype";
43 const char kAttributeFormSignature
[] = "formsignature";
44 const char kAttributeName
[] = "name";
45 const char kAttributeSignature
[] = "signature";
46 const char kClientVersion
[] = "6.1.1715.1442/en (GGLL)";
47 const char kXMLDeclaration
[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
48 const char kXMLElementAutofillQuery
[] = "autofillquery";
49 const char kXMLElementAutofillUpload
[] = "autofillupload";
50 const char kXMLElementFieldAssignments
[] = "fieldassignments";
51 const char kXMLElementField
[] = "field";
52 const char kXMLElementFields
[] = "fields";
53 const char kXMLElementForm
[] = "form";
54 const char kBillingMode
[] = "billing";
55 const char kShippingMode
[] = "shipping";
57 // Stip away >= 5 consecutive digits.
58 const char kIgnorePatternInFieldName
[] = "\\d{5,}+";
60 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
61 // |available_field_types| and returns the hex representation as a string.
62 std::string
EncodeFieldTypes(const ServerFieldTypeSet
& available_field_types
) {
63 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
64 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
65 const size_t kNumBytes
= (MAX_VALID_FIELD_TYPE
+ 0x7) / 8;
67 // Pack the types in |available_field_types| into |bit_field|.
68 std::vector
<uint8
> bit_field(kNumBytes
, 0);
69 for (ServerFieldTypeSet::const_iterator field_type
=
70 available_field_types
.begin();
71 field_type
!= available_field_types
.end();
73 // Set the appropriate bit in the field. The bit we set is the one
74 // |field_type| % 8 from the left of the byte.
75 const size_t byte
= *field_type
/ 8;
76 const size_t bit
= 0x80 >> (*field_type
% 8);
77 DCHECK(byte
< bit_field
.size());
78 bit_field
[byte
] |= bit
;
81 // Discard any trailing zeroes.
82 // If there are no available types, we return the empty string.
83 size_t data_end
= bit_field
.size();
84 for (; data_end
> 0 && !bit_field
[data_end
- 1]; --data_end
) {
87 // Print all meaningfull bytes into a string.
88 std::string data_presence
;
89 data_presence
.reserve(data_end
* 2 + 1);
90 for (size_t i
= 0; i
< data_end
; ++i
) {
91 base::StringAppendF(&data_presence
, "%02x", bit_field
[i
]);
97 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
98 // in upload xml, and also add them to the parent XmlElement.
99 void EncodeFieldForUpload(const AutofillField
& field
,
100 buzz::XmlElement
* parent
) {
101 // Don't upload checkable fields.
102 if (field
.is_checkable
)
105 ServerFieldTypeSet types
= field
.possible_types();
106 // |types| could be empty in unit-tests only.
107 for (ServerFieldTypeSet::iterator field_type
= types
.begin();
108 field_type
!= types
.end(); ++field_type
) {
109 buzz::XmlElement
*field_element
= new buzz::XmlElement(
110 buzz::QName(kXMLElementField
));
112 field_element
->SetAttr(buzz::QName(kAttributeSignature
),
113 field
.FieldSignature());
114 field_element
->SetAttr(buzz::QName(kAttributeAutofillType
),
115 base::IntToString(*field_type
));
116 parent
->AddElement(field_element
);
120 // Helper for |EncodeFormRequest()| that creates XmlElement for the given field
121 // in query xml, and also add it to the parent XmlElement.
122 void EncodeFieldForQuery(const AutofillField
& field
,
123 buzz::XmlElement
* parent
) {
124 buzz::XmlElement
*field_element
= new buzz::XmlElement(
125 buzz::QName(kXMLElementField
));
126 field_element
->SetAttr(buzz::QName(kAttributeSignature
),
127 field
.FieldSignature());
128 parent
->AddElement(field_element
);
131 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
132 // in field assignments xml, and also add them to the parent XmlElement.
133 void EncodeFieldForFieldAssignments(const AutofillField
& field
,
134 buzz::XmlElement
* parent
) {
135 ServerFieldTypeSet types
= field
.possible_types();
136 for (ServerFieldTypeSet::iterator field_type
= types
.begin();
137 field_type
!= types
.end(); ++field_type
) {
138 buzz::XmlElement
*field_element
= new buzz::XmlElement(
139 buzz::QName(kXMLElementFields
));
141 field_element
->SetAttr(buzz::QName(kAttributeFieldID
),
142 field
.FieldSignature());
143 field_element
->SetAttr(buzz::QName(kAttributeFieldType
),
144 base::IntToString(*field_type
));
145 field_element
->SetAttr(buzz::QName(kAttributeName
),
146 base::UTF16ToUTF8(field
.name
));
147 parent
->AddElement(field_element
);
151 // Returns |true| iff the |token| is a type hint for a contact field, as
152 // specified in the implementation section of http://is.gd/whatwg_autocomplete
153 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
154 // support filling either type of information.
155 bool IsContactTypeHint(const std::string
& token
) {
156 return token
== "home" || token
== "work" || token
== "mobile";
159 // Returns |true| iff the |token| is a type hint appropriate for a field of the
160 // given |field_type|, as specified in the implementation section of
161 // http://is.gd/whatwg_autocomplete
162 bool ContactTypeHintMatchesFieldType(const std::string
& token
,
163 HtmlFieldType field_type
) {
164 // The "home" and "work" type hints are only appropriate for email and phone
165 // number field types.
166 if (token
== "home" || token
== "work") {
167 return field_type
== HTML_TYPE_EMAIL
||
168 (field_type
>= HTML_TYPE_TEL
&&
169 field_type
<= HTML_TYPE_TEL_LOCAL_SUFFIX
);
172 // The "mobile" type hint is only appropriate for phone number field types.
173 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
174 // support filling either type of information.
175 if (token
== "mobile") {
176 return field_type
>= HTML_TYPE_TEL
&&
177 field_type
<= HTML_TYPE_TEL_LOCAL_SUFFIX
;
183 // Returns the Chrome Autofill-supported field type corresponding to the given
184 // |autocomplete_attribute_value|, if there is one, in the context of the given
185 // |field|. Chrome Autofill supports a subset of the field types listed at
186 // http://is.gd/whatwg_autocomplete
187 HtmlFieldType
FieldTypeFromAutocompleteAttributeValue(
188 const std::string
& autocomplete_attribute_value
,
189 const AutofillField
& field
) {
190 if (autocomplete_attribute_value
== "name")
191 return HTML_TYPE_NAME
;
193 if (autocomplete_attribute_value
== "given-name")
194 return HTML_TYPE_GIVEN_NAME
;
196 if (autocomplete_attribute_value
== "additional-name") {
197 if (field
.max_length
== 1)
198 return HTML_TYPE_ADDITIONAL_NAME_INITIAL
;
200 return HTML_TYPE_ADDITIONAL_NAME
;
203 if (autocomplete_attribute_value
== "family-name")
204 return HTML_TYPE_FAMILY_NAME
;
206 if (autocomplete_attribute_value
== "organization")
207 return HTML_TYPE_ORGANIZATION
;
209 if (autocomplete_attribute_value
== "street-address")
210 return HTML_TYPE_STREET_ADDRESS
;
212 if (autocomplete_attribute_value
== "address-line1")
213 return HTML_TYPE_ADDRESS_LINE1
;
215 if (autocomplete_attribute_value
== "address-line2")
216 return HTML_TYPE_ADDRESS_LINE2
;
218 if (autocomplete_attribute_value
== "address-line3")
219 return HTML_TYPE_ADDRESS_LINE3
;
221 // TODO(estade): remove support for "locality" and "region".
222 if (autocomplete_attribute_value
== "locality")
223 return HTML_TYPE_ADDRESS_LEVEL2
;
225 if (autocomplete_attribute_value
== "region")
226 return HTML_TYPE_ADDRESS_LEVEL1
;
228 if (autocomplete_attribute_value
== "address-level1")
229 return HTML_TYPE_ADDRESS_LEVEL1
;
231 if (autocomplete_attribute_value
== "address-level2")
232 return HTML_TYPE_ADDRESS_LEVEL2
;
234 if (autocomplete_attribute_value
== "address-level3")
235 return HTML_TYPE_ADDRESS_LEVEL3
;
237 if (autocomplete_attribute_value
== "country")
238 return HTML_TYPE_COUNTRY_CODE
;
240 if (autocomplete_attribute_value
== "country-name")
241 return HTML_TYPE_COUNTRY_NAME
;
243 if (autocomplete_attribute_value
== "postal-code")
244 return HTML_TYPE_POSTAL_CODE
;
246 // content_switches.h isn't accessible from here, hence we have
247 // to copy the string literal. This should be removed soon anyway.
248 if (autocomplete_attribute_value
== "address" &&
249 base::CommandLine::ForCurrentProcess()->HasSwitch(
250 "enable-experimental-web-platform-features")) {
251 return HTML_TYPE_FULL_ADDRESS
;
254 if (autocomplete_attribute_value
== "cc-name")
255 return HTML_TYPE_CREDIT_CARD_NAME
;
257 if (autocomplete_attribute_value
== "cc-number")
258 return HTML_TYPE_CREDIT_CARD_NUMBER
;
260 if (autocomplete_attribute_value
== "cc-exp") {
261 if (field
.max_length
== 5)
262 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR
;
263 else if (field
.max_length
== 7)
264 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR
;
266 return HTML_TYPE_CREDIT_CARD_EXP
;
269 if (autocomplete_attribute_value
== "cc-exp-month")
270 return HTML_TYPE_CREDIT_CARD_EXP_MONTH
;
272 if (autocomplete_attribute_value
== "cc-exp-year") {
273 if (field
.max_length
== 2)
274 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR
;
275 else if (field
.max_length
== 4)
276 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR
;
278 return HTML_TYPE_CREDIT_CARD_EXP_YEAR
;
281 if (autocomplete_attribute_value
== "cc-csc")
282 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE
;
284 if (autocomplete_attribute_value
== "cc-type")
285 return HTML_TYPE_CREDIT_CARD_TYPE
;
287 if (autocomplete_attribute_value
== "transaction-amount")
288 return HTML_TYPE_TRANSACTION_AMOUNT
;
290 if (autocomplete_attribute_value
== "transaction-currency")
291 return HTML_TYPE_TRANSACTION_CURRENCY
;
293 if (autocomplete_attribute_value
== "tel")
294 return HTML_TYPE_TEL
;
296 if (autocomplete_attribute_value
== "tel-country-code")
297 return HTML_TYPE_TEL_COUNTRY_CODE
;
299 if (autocomplete_attribute_value
== "tel-national")
300 return HTML_TYPE_TEL_NATIONAL
;
302 if (autocomplete_attribute_value
== "tel-area-code")
303 return HTML_TYPE_TEL_AREA_CODE
;
305 if (autocomplete_attribute_value
== "tel-local")
306 return HTML_TYPE_TEL_LOCAL
;
308 if (autocomplete_attribute_value
== "tel-local-prefix")
309 return HTML_TYPE_TEL_LOCAL_PREFIX
;
311 if (autocomplete_attribute_value
== "tel-local-suffix")
312 return HTML_TYPE_TEL_LOCAL_SUFFIX
;
314 if (autocomplete_attribute_value
== "email")
315 return HTML_TYPE_EMAIL
;
317 return HTML_TYPE_UNKNOWN
;
320 std::string
StripDigitsIfRequired(const base::string16
& input
) {
321 UErrorCode status
= U_ZERO_ERROR
;
322 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString
, icu_pattern
,
323 (kIgnorePatternInFieldName
));
324 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher
, matcher
,
325 (icu_pattern
, UREGEX_CASE_INSENSITIVE
, status
));
326 DCHECK_EQ(status
, U_ZERO_ERROR
);
328 icu::UnicodeString
icu_input(input
.data(), input
.length());
329 matcher
.reset(icu_input
);
331 icu::UnicodeString replaced_string
= matcher
.replaceAll("", status
);
333 std::string return_string
;
334 status
= U_ZERO_ERROR
;
335 base::UTF16ToUTF8(replaced_string
.getBuffer(),
336 static_cast<size_t>(replaced_string
.length()),
338 if (status
!= U_ZERO_ERROR
) {
339 DVLOG(1) << "Couldn't strip digits in " << base::UTF16ToUTF8(input
);
340 return base::UTF16ToUTF8(input
);
343 return return_string
;
348 FormStructure::FormStructure(const FormData
& form
)
349 : form_name_(form
.name
),
350 source_url_(form
.origin
),
351 target_url_(form
.action
),
353 active_field_count_(0),
354 upload_required_(USE_UPLOAD_RATES
),
355 has_author_specified_types_(false),
356 has_password_field_(false),
357 is_form_tag_(form
.is_form_tag
) {
358 // Copy the form fields.
359 std::map
<base::string16
, size_t> unique_names
;
360 for (std::vector
<FormFieldData
>::const_iterator field
=
362 field
!= form
.fields
.end(); ++field
) {
363 if (!ShouldSkipField(*field
)) {
364 // Add all supported form fields (including with empty names) to the
365 // signature. This is a requirement for Autofill servers.
366 form_signature_field_names_
.append("&");
367 form_signature_field_names_
.append(StripDigitsIfRequired(field
->name
));
369 ++active_field_count_
;
372 if (field
->form_control_type
== "password")
373 has_password_field_
= true;
375 // Generate a unique name for this field by appending a counter to the name.
376 // Make sure to prepend the counter with a non-numeric digit so that we are
377 // guaranteed to avoid collisions.
378 if (!unique_names
.count(field
->name
))
379 unique_names
[field
->name
] = 1;
381 ++unique_names
[field
->name
];
382 base::string16 unique_name
= field
->name
+ base::ASCIIToUTF16("_") +
383 base::IntToString16(unique_names
[field
->name
]);
384 fields_
.push_back(new AutofillField(*field
, unique_name
));
388 FormStructure::~FormStructure() {}
390 void FormStructure::DetermineHeuristicTypes() {
391 // First, try to detect field types based on each field's |autocomplete|
392 // attribute value. If there is at least one form field that specifies an
393 // autocomplete type hint, don't try to apply other heuristics to match fields
395 bool has_author_specified_sections
;
396 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_
,
397 &has_author_specified_sections
);
399 if (!has_author_specified_types_
) {
400 ServerFieldTypeMap field_type_map
;
401 FormField::ParseFormFields(fields_
.get(), is_form_tag_
, &field_type_map
);
402 for (size_t i
= 0; i
< field_count(); ++i
) {
403 AutofillField
* field
= fields_
[i
];
404 ServerFieldTypeMap::iterator iter
=
405 field_type_map
.find(field
->unique_name());
406 if (iter
!= field_type_map
.end())
407 field
->set_heuristic_type(iter
->second
);
411 UpdateAutofillCount();
412 IdentifySections(has_author_specified_sections
);
414 if (IsAutofillable()) {
415 AutofillMetrics::LogDeveloperEngagementMetric(
416 AutofillMetrics::FILLABLE_FORM_PARSED
);
417 if (has_author_specified_types_
) {
418 AutofillMetrics::LogDeveloperEngagementMetric(
419 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS
);
424 bool FormStructure::EncodeUploadRequest(
425 const ServerFieldTypeSet
& available_field_types
,
426 bool form_was_autofilled
,
427 std::string
* encoded_xml
) const {
428 DCHECK(ShouldBeCrowdsourced());
430 // Verify that |available_field_types| agrees with the possible field types we
432 for (std::vector
<AutofillField
*>::const_iterator field
= begin();
435 for (ServerFieldTypeSet::const_iterator type
=
436 (*field
)->possible_types().begin();
437 type
!= (*field
)->possible_types().end();
439 DCHECK(*type
== UNKNOWN_TYPE
||
440 *type
== EMPTY_TYPE
||
441 available_field_types
.count(*type
));
445 // Set up the <autofillupload> element and its attributes.
446 buzz::XmlElement
autofill_request_xml(
447 (buzz::QName(kXMLElementAutofillUpload
)));
448 autofill_request_xml
.SetAttr(buzz::QName(kAttributeClientVersion
),
450 autofill_request_xml
.SetAttr(buzz::QName(kAttributeFormSignature
),
452 autofill_request_xml
.SetAttr(buzz::QName(kAttributeAutofillUsed
),
453 form_was_autofilled
? "true" : "false");
454 autofill_request_xml
.SetAttr(buzz::QName(kAttributeDataPresent
),
455 EncodeFieldTypes(available_field_types
).c_str());
457 if (!EncodeFormRequest(FormStructure::UPLOAD
, &autofill_request_xml
))
458 return false; // Malformed form, skip it.
460 // Obtain the XML structure as a string.
461 *encoded_xml
= kXMLDeclaration
;
462 *encoded_xml
+= autofill_request_xml
.Str().c_str();
464 // To enable this logging, run with the flag --vmodule="form_structure=2".
465 VLOG(2) << "\n" << *encoded_xml
;
470 bool FormStructure::EncodeFieldAssignments(
471 const ServerFieldTypeSet
& available_field_types
,
472 std::string
* encoded_xml
) const {
473 DCHECK(ShouldBeCrowdsourced());
475 // Set up the <fieldassignments> element and its attributes.
476 buzz::XmlElement
autofill_request_xml(
477 (buzz::QName(kXMLElementFieldAssignments
)));
478 autofill_request_xml
.SetAttr(buzz::QName(kAttributeFormSignature
),
481 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS
,
482 &autofill_request_xml
))
483 return false; // Malformed form, skip it.
485 // Obtain the XML structure as a string.
486 *encoded_xml
= kXMLDeclaration
;
487 *encoded_xml
+= autofill_request_xml
.Str().c_str();
493 bool FormStructure::EncodeQueryRequest(
494 const std::vector
<FormStructure
*>& forms
,
495 std::vector
<std::string
>* encoded_signatures
,
496 std::string
* encoded_xml
) {
497 DCHECK(encoded_signatures
);
499 encoded_xml
->clear();
500 encoded_signatures
->clear();
501 encoded_signatures
->reserve(forms
.size());
503 // Set up the <autofillquery> element and attributes.
504 buzz::XmlElement
autofill_request_xml(
505 (buzz::QName(kXMLElementAutofillQuery
)));
506 autofill_request_xml
.SetAttr(buzz::QName(kAttributeClientVersion
),
509 // Some badly formatted web sites repeat forms - detect that and encode only
510 // one form as returned data would be the same for all the repeated forms.
511 std::set
<std::string
> processed_forms
;
512 for (ScopedVector
<FormStructure
>::const_iterator it
= forms
.begin();
515 std::string
signature((*it
)->FormSignature());
516 if (processed_forms
.find(signature
) != processed_forms
.end())
518 processed_forms
.insert(signature
);
519 scoped_ptr
<buzz::XmlElement
> encompassing_xml_element(
520 new buzz::XmlElement(buzz::QName(kXMLElementForm
)));
521 encompassing_xml_element
->SetAttr(buzz::QName(kAttributeSignature
),
524 if (!(*it
)->EncodeFormRequest(FormStructure::QUERY
,
525 encompassing_xml_element
.get()))
526 continue; // Malformed form, skip it.
528 autofill_request_xml
.AddElement(encompassing_xml_element
.release());
529 encoded_signatures
->push_back(signature
);
532 if (!encoded_signatures
->size())
535 // Note: Chrome used to also set 'accepts="e"' (where 'e' is for experiments),
536 // but no longer sets this because support for experiments is deprecated. If
537 // it ever resurfaces, re-add code here to set the attribute accordingly.
539 // Obtain the XML structure as a string.
540 *encoded_xml
= kXMLDeclaration
;
541 *encoded_xml
+= autofill_request_xml
.Str().c_str();
547 void FormStructure::ParseQueryResponse(
548 const std::string
& response_xml
,
549 const std::vector
<FormStructure
*>& forms
) {
550 AutofillMetrics::LogServerQueryMetric(
551 AutofillMetrics::QUERY_RESPONSE_RECEIVED
);
553 // Parse the field types from the server response to the query.
554 std::vector
<AutofillServerFieldInfo
> field_infos
;
555 UploadRequired upload_required
;
556 AutofillQueryXmlParser
parse_handler(&field_infos
,
558 buzz::XmlParser
parser(&parse_handler
);
559 parser
.Parse(response_xml
.c_str(), response_xml
.length(), true);
560 if (!parse_handler
.succeeded())
563 AutofillMetrics::LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED
);
565 bool heuristics_detected_fillable_field
= false;
566 bool query_response_overrode_heuristics
= false;
568 // Copy the field types into the actual form.
569 std::vector
<AutofillServerFieldInfo
>::iterator current_info
=
571 for (std::vector
<FormStructure
*>::const_iterator iter
= forms
.begin();
572 iter
!= forms
.end(); ++iter
) {
573 FormStructure
* form
= *iter
;
574 form
->upload_required_
= upload_required
;
576 for (std::vector
<AutofillField
*>::iterator field
= form
->fields_
.begin();
577 field
!= form
->fields_
.end(); ++field
) {
578 if (form
->ShouldSkipField(**field
))
581 // In some cases *successful* response does not return all the fields.
582 // Quit the update of the types then.
583 if (current_info
== field_infos
.end())
586 // If |form->has_author_specified_types| only password fields should be
588 if (!form
->has_author_specified_types_
||
589 (*field
)->form_control_type
== "password") {
590 // UNKNOWN_TYPE is reserved for use by the client.
591 DCHECK_NE(current_info
->field_type
, UNKNOWN_TYPE
);
593 ServerFieldType heuristic_type
= (*field
)->heuristic_type();
594 if (heuristic_type
!= UNKNOWN_TYPE
)
595 heuristics_detected_fillable_field
= true;
597 (*field
)->set_server_type(current_info
->field_type
);
598 if (heuristic_type
!= (*field
)->Type().GetStorableType())
599 query_response_overrode_heuristics
= true;
601 // Copy default value into the field if available.
602 if (!current_info
->default_value
.empty())
603 (*field
)->set_default_value(current_info
->default_value
);
609 form
->UpdateAutofillCount();
610 form
->IdentifySections(false);
613 AutofillMetrics::ServerQueryMetric metric
;
614 if (query_response_overrode_heuristics
) {
615 if (heuristics_detected_fillable_field
) {
616 metric
= AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS
;
618 metric
= AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS
;
621 metric
= AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS
;
623 AutofillMetrics::LogServerQueryMetric(metric
);
627 std::vector
<FormDataPredictions
> FormStructure::GetFieldTypePredictions(
628 const std::vector
<FormStructure
*>& form_structures
) {
629 std::vector
<FormDataPredictions
> forms
;
630 forms
.reserve(form_structures
.size());
631 for (size_t i
= 0; i
< form_structures
.size(); ++i
) {
632 FormStructure
* form_structure
= form_structures
[i
];
633 FormDataPredictions form
;
634 form
.data
.name
= form_structure
->form_name_
;
635 form
.data
.origin
= form_structure
->source_url_
;
636 form
.data
.action
= form_structure
->target_url_
;
637 form
.data
.is_form_tag
= form_structure
->is_form_tag_
;
638 form
.signature
= form_structure
->FormSignature();
640 for (std::vector
<AutofillField
*>::const_iterator field
=
641 form_structure
->fields_
.begin();
642 field
!= form_structure
->fields_
.end(); ++field
) {
643 form
.data
.fields
.push_back(FormFieldData(**field
));
645 FormFieldDataPredictions annotated_field
;
646 annotated_field
.signature
= (*field
)->FieldSignature();
647 annotated_field
.heuristic_type
=
648 AutofillType((*field
)->heuristic_type()).ToString();
649 annotated_field
.server_type
=
650 AutofillType((*field
)->server_type()).ToString();
651 annotated_field
.overall_type
= (*field
)->Type().ToString();
652 form
.fields
.push_back(annotated_field
);
655 forms
.push_back(form
);
660 std::string
FormStructure::FormSignature() const {
661 std::string
scheme(target_url_
.scheme());
662 std::string
host(target_url_
.host());
664 // If target host or scheme is empty, set scheme and host of source url.
665 // This is done to match the Toolbar's behavior.
666 if (scheme
.empty() || host
.empty()) {
667 scheme
= source_url_
.scheme();
668 host
= source_url_
.host();
671 std::string form_string
= scheme
+ "://" + host
+ "&" +
672 base::UTF16ToUTF8(form_name_
) +
673 form_signature_field_names_
;
675 return Hash64Bit(form_string
);
678 bool FormStructure::ShouldSkipField(const FormFieldData
& field
) const {
679 return field
.is_checkable
;
682 bool FormStructure::IsAutofillable() const {
683 if (autofill_count() < kRequiredAutofillFields
)
686 return ShouldBeParsed();
689 void FormStructure::UpdateAutofillCount() {
691 for (std::vector
<AutofillField
*>::const_iterator iter
= begin();
692 iter
!= end(); ++iter
) {
693 AutofillField
* field
= *iter
;
694 if (field
&& field
->IsFieldFillable())
699 bool FormStructure::ShouldBeParsed() const {
700 if (active_field_count() < kRequiredAutofillFields
)
703 // Rule out http(s)://*/search?...
704 // e.g. http://www.google.com/search?q=...
705 // http://search.yahoo.com/search?p=...
706 if (target_url_
.path() == "/search")
709 bool has_text_field
= false;
710 for (std::vector
<AutofillField
*>::const_iterator it
= begin();
711 it
!= end() && !has_text_field
; ++it
) {
712 has_text_field
|= (*it
)->form_control_type
!= "select-one";
715 return has_text_field
;
718 bool FormStructure::ShouldBeCrowdsourced() const {
719 return (has_password_field_
|| !has_author_specified_types_
) &&
723 void FormStructure::UpdateFromCache(const FormStructure
& cached_form
) {
724 // Map from field signatures to cached fields.
725 std::map
<std::string
, const AutofillField
*> cached_fields
;
726 for (size_t i
= 0; i
< cached_form
.field_count(); ++i
) {
727 const AutofillField
* field
= cached_form
.field(i
);
728 cached_fields
[field
->FieldSignature()] = field
;
731 for (std::vector
<AutofillField
*>::const_iterator iter
= begin();
732 iter
!= end(); ++iter
) {
733 AutofillField
* field
= *iter
;
735 std::map
<std::string
, const AutofillField
*>::const_iterator
736 cached_field
= cached_fields
.find(field
->FieldSignature());
737 if (cached_field
!= cached_fields
.end()) {
738 if (field
->form_control_type
!= "select-one" &&
739 field
->value
== cached_field
->second
->value
) {
740 // From the perspective of learning user data, text fields containing
741 // default values are equivalent to empty fields.
742 field
->value
= base::string16();
745 field
->set_heuristic_type(cached_field
->second
->heuristic_type());
746 field
->set_server_type(cached_field
->second
->server_type());
747 field
->SetHtmlType(cached_field
->second
->html_type(),
748 cached_field
->second
->html_mode());
752 UpdateAutofillCount();
754 // The form signature should match between query and upload requests to the
755 // server. On many websites, form elements are dynamically added, removed, or
756 // rearranged via JavaScript between page load and form submission, so we
757 // copy over the |form_signature_field_names_| corresponding to the query
759 DCHECK_EQ(cached_form
.form_name_
, form_name_
);
760 DCHECK_EQ(cached_form
.source_url_
, source_url_
);
761 DCHECK_EQ(cached_form
.target_url_
, target_url_
);
762 form_signature_field_names_
= cached_form
.form_signature_field_names_
;
765 void FormStructure::LogQualityMetrics(
766 const base::TimeTicks
& load_time
,
767 const base::TimeTicks
& interaction_time
,
768 const base::TimeTicks
& submission_time
) const {
769 size_t num_detected_field_types
= 0;
770 bool did_autofill_all_possible_fields
= true;
771 bool did_autofill_some_possible_fields
= false;
772 for (size_t i
= 0; i
< field_count(); ++i
) {
773 const AutofillField
* field
= this->field(i
);
775 // No further logging for empty fields nor for fields where the entered data
776 // does not appear to already exist in the user's stored Autofill data.
777 const ServerFieldTypeSet
& field_types
= field
->possible_types();
778 DCHECK(!field_types
.empty());
779 if (field_types
.count(EMPTY_TYPE
) || field_types
.count(UNKNOWN_TYPE
))
782 // Similarly, no further logging for password fields. Those are primarily
783 // related to a different feature code path, and so make more sense to track
784 // outside of this metric.
785 if (field
->form_control_type
== "password")
788 ++num_detected_field_types
;
789 if (field
->is_autofilled
)
790 did_autofill_some_possible_fields
= true;
792 did_autofill_all_possible_fields
= false;
794 // Collapse field types that Chrome treats as identical, e.g. home and
795 // billing address fields.
796 ServerFieldTypeSet collapsed_field_types
;
797 for (ServerFieldTypeSet::const_iterator it
= field_types
.begin();
798 it
!= field_types
.end();
800 // Since we currently only support US phone numbers, the (city code + main
801 // digits) number is almost always identical to the whole phone number.
802 // TODO(isherman): Improve this logic once we add support for
803 // international numbers.
804 if (*it
== PHONE_HOME_CITY_AND_NUMBER
)
805 collapsed_field_types
.insert(PHONE_HOME_WHOLE_NUMBER
);
807 collapsed_field_types
.insert(AutofillType(*it
).GetStorableType());
810 // Capture the field's type, if it is unambiguous.
811 ServerFieldType field_type
= UNKNOWN_TYPE
;
812 if (collapsed_field_types
.size() == 1)
813 field_type
= *collapsed_field_types
.begin();
815 ServerFieldType heuristic_type
=
816 AutofillType(field
->heuristic_type()).GetStorableType();
817 ServerFieldType server_type
=
818 AutofillType(field
->server_type()).GetStorableType();
819 ServerFieldType predicted_type
= field
->Type().GetStorableType();
821 // Log heuristic, server, and overall type quality metrics, independently of
822 // whether the field was autofilled.
823 if (heuristic_type
== UNKNOWN_TYPE
) {
824 AutofillMetrics::LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN
,
826 } else if (field_types
.count(heuristic_type
)) {
827 AutofillMetrics::LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH
,
830 AutofillMetrics::LogHeuristicTypePrediction(
831 AutofillMetrics::TYPE_MISMATCH
, field_type
);
834 if (server_type
== NO_SERVER_DATA
) {
835 AutofillMetrics::LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN
,
837 } else if (field_types
.count(server_type
)) {
838 AutofillMetrics::LogServerTypePrediction(AutofillMetrics::TYPE_MATCH
,
841 AutofillMetrics::LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH
,
845 if (predicted_type
== UNKNOWN_TYPE
) {
846 AutofillMetrics::LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN
,
848 } else if (field_types
.count(predicted_type
)) {
849 AutofillMetrics::LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH
,
852 AutofillMetrics::LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH
,
857 if (num_detected_field_types
< kRequiredAutofillFields
) {
858 AutofillMetrics::LogUserHappinessMetric(
859 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM
);
861 if (did_autofill_all_possible_fields
) {
862 AutofillMetrics::LogUserHappinessMetric(
863 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL
);
864 } else if (did_autofill_some_possible_fields
) {
865 AutofillMetrics::LogUserHappinessMetric(
866 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME
);
868 AutofillMetrics::LogUserHappinessMetric(
869 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE
);
872 // Unlike the other times, the |submission_time| should always be available.
873 DCHECK(!submission_time
.is_null());
875 // The |load_time| might be unset, in the case that the form was dynamically
877 if (!load_time
.is_null()) {
878 // Submission should always chronologically follow form load.
879 DCHECK(submission_time
> load_time
);
880 base::TimeDelta elapsed
= submission_time
- load_time
;
881 if (did_autofill_some_possible_fields
)
882 AutofillMetrics::LogFormFillDurationFromLoadWithAutofill(elapsed
);
884 AutofillMetrics::LogFormFillDurationFromLoadWithoutAutofill(elapsed
);
887 // The |interaction_time| might be unset, in the case that the user
888 // submitted a blank form.
889 if (!interaction_time
.is_null()) {
890 // Submission should always chronologically follow interaction.
891 DCHECK(submission_time
> interaction_time
);
892 base::TimeDelta elapsed
= submission_time
- interaction_time
;
893 if (did_autofill_some_possible_fields
) {
894 AutofillMetrics::LogFormFillDurationFromInteractionWithAutofill(
897 AutofillMetrics::LogFormFillDurationFromInteractionWithoutAutofill(
904 const AutofillField
* FormStructure::field(size_t index
) const {
905 if (index
>= fields_
.size()) {
910 return fields_
[index
];
913 AutofillField
* FormStructure::field(size_t index
) {
914 return const_cast<AutofillField
*>(
915 static_cast<const FormStructure
*>(this)->field(index
));
918 size_t FormStructure::field_count() const {
919 return fields_
.size();
922 size_t FormStructure::active_field_count() const {
923 return active_field_count_
;
926 FormData
FormStructure::ToFormData() const {
927 // |data.user_submitted| will always be false.
929 data
.name
= form_name_
;
930 data
.origin
= source_url_
;
931 data
.action
= target_url_
;
933 for (size_t i
= 0; i
< fields_
.size(); ++i
) {
934 data
.fields
.push_back(FormFieldData(*fields_
[i
]));
940 bool FormStructure::operator==(const FormData
& form
) const {
941 // TODO(jhawkins): Is this enough to differentiate a form?
942 if (form_name_
== form
.name
&&
943 source_url_
== form
.origin
&&
944 target_url_
== form
.action
) {
948 // TODO(jhawkins): Compare field names, IDs and labels once we have labels
954 bool FormStructure::operator!=(const FormData
& form
) const {
955 return !operator==(form
);
958 std::string
FormStructure::Hash64Bit(const std::string
& str
) {
959 std::string hash_bin
= base::SHA1HashString(str
);
960 DCHECK_EQ(20U, hash_bin
.length());
962 uint64 hash64
= (((static_cast<uint64
>(hash_bin
[0])) & 0xFF) << 56) |
963 (((static_cast<uint64
>(hash_bin
[1])) & 0xFF) << 48) |
964 (((static_cast<uint64
>(hash_bin
[2])) & 0xFF) << 40) |
965 (((static_cast<uint64
>(hash_bin
[3])) & 0xFF) << 32) |
966 (((static_cast<uint64
>(hash_bin
[4])) & 0xFF) << 24) |
967 (((static_cast<uint64
>(hash_bin
[5])) & 0xFF) << 16) |
968 (((static_cast<uint64
>(hash_bin
[6])) & 0xFF) << 8) |
969 ((static_cast<uint64
>(hash_bin
[7])) & 0xFF);
971 return base::Uint64ToString(hash64
);
974 bool FormStructure::EncodeFormRequest(
975 FormStructure::EncodeRequestType request_type
,
976 buzz::XmlElement
* encompassing_xml_element
) const {
977 if (!field_count()) // Nothing to add.
980 // Some badly formatted web sites repeat fields - limit number of fields to
981 // 48, which is far larger than any valid form and XML still fits into 2K.
982 // Do not send requests for forms with more than this many fields, as they are
983 // near certainly not valid/auto-fillable.
984 const size_t kMaxFieldsOnTheForm
= 48;
985 if (field_count() > kMaxFieldsOnTheForm
)
988 // Add the child nodes for the form fields.
989 for (size_t index
= 0; index
< field_count(); ++index
) {
990 const AutofillField
* field
= fields_
[index
];
991 switch (request_type
) {
992 case FormStructure::UPLOAD
:
993 EncodeFieldForUpload(*field
, encompassing_xml_element
);
995 case FormStructure::QUERY
:
996 if (ShouldSkipField(*field
))
998 EncodeFieldForQuery(*field
, encompassing_xml_element
);
1000 case FormStructure::FIELD_ASSIGNMENTS
:
1001 EncodeFieldForFieldAssignments(*field
, encompassing_xml_element
);
1008 void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1010 bool* found_sections
) {
1011 const std::string kDefaultSection
= "-default";
1013 *found_types
= false;
1014 *found_sections
= false;
1015 for (std::vector
<AutofillField
*>::iterator it
= fields_
.begin();
1016 it
!= fields_
.end(); ++it
) {
1017 AutofillField
* field
= *it
;
1019 // To prevent potential section name collisions, add a default suffix for
1020 // other fields. Without this, 'autocomplete' attribute values
1021 // "section--shipping street-address" and "shipping street-address" would be
1022 // parsed identically, given the section handling code below. We do this
1023 // before any validation so that fields with invalid attributes still end up
1024 // in the default section. These default section names will be overridden
1025 // by subsequent heuristic parsing steps if there are no author-specified
1027 field
->set_section(kDefaultSection
);
1029 // Canonicalize the attribute value by trimming whitespace, collapsing
1030 // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1031 std::string autocomplete_attribute
=
1032 base::CollapseWhitespaceASCII(field
->autocomplete_attribute
, false);
1033 autocomplete_attribute
= base::StringToLowerASCII(autocomplete_attribute
);
1035 // The autocomplete attribute is overloaded: it can specify either a field
1036 // type hint or whether autocomplete should be enabled at all. Ignore the
1037 // latter type of attribute value.
1038 if (autocomplete_attribute
.empty() ||
1039 autocomplete_attribute
== "on" ||
1040 autocomplete_attribute
== "off") {
1044 // Any other value, even it is invalid, is considered to be a type hint.
1045 // This allows a website's author to specify an attribute like
1046 // autocomplete="other" on a field to disable all Autofill heuristics for
1048 *found_types
= true;
1050 // Tokenize the attribute value. Per the spec, the tokens are parsed in
1052 std::vector
<std::string
> tokens
;
1053 Tokenize(autocomplete_attribute
, " ", &tokens
);
1055 // The final token must be the field type.
1056 // If it is not one of the known types, abort.
1057 DCHECK(!tokens
.empty());
1058 std::string field_type_token
= tokens
.back();
1060 HtmlFieldType field_type
=
1061 FieldTypeFromAutocompleteAttributeValue(field_type_token
, *field
);
1062 if (field_type
== HTML_TYPE_UNKNOWN
)
1065 // The preceding token, if any, may be a type hint.
1066 if (!tokens
.empty() && IsContactTypeHint(tokens
.back())) {
1067 // If it is, it must match the field type; otherwise, abort.
1068 // Note that an invalid token invalidates the entire attribute value, even
1069 // if the other tokens are valid.
1070 if (!ContactTypeHintMatchesFieldType(tokens
.back(), field_type
))
1073 // Chrome Autofill ignores these type hints.
1077 // The preceding token, if any, may be a fixed string that is either
1078 // "shipping" or "billing". Chrome Autofill treats these as implicit
1079 // section name suffixes.
1080 DCHECK_EQ(kDefaultSection
, field
->section());
1081 std::string section
= field
->section();
1082 HtmlFieldMode mode
= HTML_MODE_NONE
;
1083 if (!tokens
.empty()) {
1084 if (tokens
.back() == kShippingMode
)
1085 mode
= HTML_MODE_SHIPPING
;
1086 else if (tokens
.back() == kBillingMode
)
1087 mode
= HTML_MODE_BILLING
;
1090 if (mode
!= HTML_MODE_NONE
) {
1091 section
= "-" + tokens
.back();
1095 // The preceding token, if any, may be a named section.
1096 const std::string kSectionPrefix
= "section-";
1097 if (!tokens
.empty() &&
1098 StartsWithASCII(tokens
.back(), kSectionPrefix
, true)) {
1099 // Prepend this section name to the suffix set in the preceding block.
1100 section
= tokens
.back().substr(kSectionPrefix
.size()) + section
;
1104 // No other tokens are allowed. If there are any remaining, abort.
1105 if (!tokens
.empty())
1108 if (section
!= kDefaultSection
) {
1109 *found_sections
= true;
1110 field
->set_section(section
);
1113 // No errors encountered while parsing!
1114 // Update the |field|'s type based on what was parsed from the attribute.
1115 field
->SetHtmlType(field_type
, mode
);
1119 bool FormStructure::FillFields(
1120 const std::vector
<ServerFieldType
>& types
,
1121 const InputFieldComparator
& matches
,
1122 const base::Callback
<base::string16(const AutofillType
&)>& get_info
,
1123 const std::string
& address_language_code
,
1124 const std::string
& app_locale
) {
1125 bool filled_something
= false;
1126 for (size_t i
= 0; i
< field_count(); ++i
) {
1127 for (size_t j
= 0; j
< types
.size(); ++j
) {
1128 if (matches
.Run(types
[j
], *field(i
))) {
1129 AutofillField::FillFormField(*field(i
),
1130 get_info
.Run(field(i
)->Type()),
1131 address_language_code
,
1134 filled_something
= true;
1139 return filled_something
;
1142 std::set
<base::string16
> FormStructure::PossibleValues(ServerFieldType type
) {
1143 std::set
<base::string16
> values
;
1144 AutofillType
target_type(type
);
1145 for (std::vector
<AutofillField
*>::iterator iter
= fields_
.begin();
1146 iter
!= fields_
.end(); ++iter
) {
1147 AutofillField
* field
= *iter
;
1148 if (field
->Type().GetStorableType() != target_type
.GetStorableType() ||
1149 field
->Type().group() != target_type
.group()) {
1153 // No option values; anything goes.
1154 if (field
->option_values
.empty())
1155 return std::set
<base::string16
>();
1157 for (size_t i
= 0; i
< field
->option_values
.size(); ++i
) {
1158 if (!field
->option_values
[i
].empty())
1159 values
.insert(base::i18n::ToUpper(field
->option_values
[i
]));
1162 for (size_t i
= 0; i
< field
->option_contents
.size(); ++i
) {
1163 if (!field
->option_contents
[i
].empty())
1164 values
.insert(base::i18n::ToUpper(field
->option_contents
[i
]));
1171 base::string16
FormStructure::GetUniqueValue(HtmlFieldType type
) const {
1172 base::string16 value
;
1173 for (std::vector
<AutofillField
*>::const_iterator iter
= fields_
.begin();
1174 iter
!= fields_
.end(); ++iter
) {
1175 const AutofillField
* field
= *iter
;
1176 if (field
->html_type() != type
)
1179 // More than one value found; abort rather than choosing one arbitrarily.
1180 if (!value
.empty() && !field
->value
.empty())
1181 return base::string16();
1183 value
= field
->value
;
1189 void FormStructure::IdentifySections(bool has_author_specified_sections
) {
1190 if (fields_
.empty())
1193 if (!has_author_specified_sections
) {
1194 // Name sections after the first field in the section.
1195 base::string16 current_section
= fields_
.front()->unique_name();
1197 // Keep track of the types we've seen in this section.
1198 std::set
<ServerFieldType
> seen_types
;
1199 ServerFieldType previous_type
= UNKNOWN_TYPE
;
1201 for (AutofillField
* field
: fields_
) {
1202 const ServerFieldType current_type
= field
->Type().GetStorableType();
1204 bool already_saw_current_type
= seen_types
.count(current_type
) > 0;
1206 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1207 // evening phone number. Our phone number detection is also generally a
1208 // little off. Hence, ignore this field type as a signal here.
1209 if (AutofillType(current_type
).group() == PHONE_HOME
)
1210 already_saw_current_type
= false;
1212 // Ignore non-focusable field and presentation role fields while inferring
1213 // boundaries between sections.
1214 bool ignored_field
= !field
->is_focusable
||
1215 field
->role
== FormFieldData::ROLE_ATTRIBUTE_PRESENTATION
;
1217 already_saw_current_type
= false;
1219 // Some forms have adjacent fields of the same type. Two common examples:
1220 // * Forms with two email fields, where the second is meant to "confirm"
1222 // * Forms with a <select> menu for states in some countries, and a
1223 // freeform <input> field for states in other countries. (Usually,
1224 // only one of these two will be visible for any given choice of
1226 // Generally, adjacent fields of the same type belong in the same logical
1228 if (current_type
== previous_type
)
1229 already_saw_current_type
= false;
1231 if (current_type
!= UNKNOWN_TYPE
&& already_saw_current_type
) {
1232 // We reached the end of a section, so start a new section.
1234 current_section
= field
->unique_name();
1237 // Only consider a type "seen" if it was not ignored. Some forms have
1238 // sections for different locales, only one of which is enabled at a
1239 // time. Each section may duplicate some information (e.g. postal code)
1240 // and we don't want that to cause section splits.
1241 // Also only set |previous_type| when the field was not ignored. This
1242 // prevents ignored fields from breaking up fields that are otherwise
1244 if (!ignored_field
) {
1245 seen_types
.insert(current_type
);
1246 previous_type
= current_type
;
1249 field
->set_section(base::UTF16ToUTF8(current_section
));
1253 // Ensure that credit card and address fields are in separate sections.
1254 // This simplifies the section-aware logic in autofill_manager.cc.
1255 for (AutofillField
* field
: fields_
) {
1256 FieldTypeGroup field_type_group
= field
->Type().group();
1257 if (field_type_group
== CREDIT_CARD
)
1258 field
->set_section(field
->section() + "-cc");
1260 field
->set_section(field
->section() + "-default");
1264 } // namespace autofill