1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/password_form_conversion_utils.h"
9 #include "base/lazy_instance.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/metrics/histogram_macros.h"
12 #include "base/strings/string_util.h"
13 #include "components/autofill/content/renderer/form_autofill_util.h"
14 #include "components/autofill/core/common/password_form.h"
15 #include "components/autofill/core/common/password_form_field_prediction_map.h"
16 #include "third_party/WebKit/public/platform/WebString.h"
17 #include "third_party/WebKit/public/web/WebDocument.h"
18 #include "third_party/WebKit/public/web/WebFormControlElement.h"
19 #include "third_party/WebKit/public/web/WebInputElement.h"
20 #include "third_party/icu/source/i18n/unicode/regex.h"
22 using blink::WebDocument
;
23 using blink::WebFormControlElement
;
24 using blink::WebFormElement
;
25 using blink::WebInputElement
;
26 using blink::WebString
;
27 using blink::WebVector
;
32 // Layout classification of password forms
33 // A layout sequence of a form is the sequence of it's non-password and password
34 // input fields, represented by "N" and "P", respectively. A form like this
36 // <input type='text' ...>
37 // <input type='hidden' ...>
38 // <input type='password' ...>
39 // <input type='submit' ...>
41 // has the layout sequence "NP" -- "N" for the first field, and "P" for the
42 // third. The second and fourth fields are ignored, because they are not text
45 // The code below classifies the layout (see PasswordForm::Layout) of a form
46 // based on its layout sequence. This is done by assigning layouts regular
47 // expressions over the alphabet {N, P}. LAYOUT_OTHER is implicitly the type
48 // corresponding to all layout sequences not matching any other layout.
50 // LAYOUT_LOGIN_AND_SIGNUP is classified by NPN+P.*. This corresponds to a form
51 // which starts with a login section (NP) and continues with a sign-up section
52 // (N+P.*). The aim is to distinguish such forms from change password-forms
53 // (N*PPP?.*) and forms which use password fields to store private but
54 // non-password data (could look like, e.g., PN+P.*).
55 const char kLoginAndSignupRegex
[] =
56 "NP" // Login section.
57 "N+P" // Sign-up section.
58 ".*"; // Anything beyond that.
60 struct LoginAndSignupLazyInstanceTraits
61 : public base::DefaultLazyInstanceTraits
<icu::RegexMatcher
> {
62 static icu::RegexMatcher
* New(void* instance
) {
63 const icu::UnicodeString
icu_pattern(kLoginAndSignupRegex
);
65 UErrorCode status
= U_ZERO_ERROR
;
66 // Use placement new to initialize the instance in the preallocated space.
67 // The "(instance)" is very important to force POD type initialization.
68 scoped_ptr
<icu::RegexMatcher
> matcher(new (instance
) icu::RegexMatcher(
69 icu_pattern
, UREGEX_CASE_INSENSITIVE
, status
));
70 DCHECK(U_SUCCESS(status
));
71 return matcher
.release();
75 base::LazyInstance
<icu::RegexMatcher
, LoginAndSignupLazyInstanceTraits
>
76 login_and_signup_matcher
= LAZY_INSTANCE_INITIALIZER
;
78 bool MatchesLoginAndSignupPattern(base::StringPiece layout_sequence
) {
79 icu::RegexMatcher
* matcher
= login_and_signup_matcher
.Pointer();
80 icu::UnicodeString
icu_input(icu::UnicodeString::fromUTF8(
81 icu::StringPiece(layout_sequence
.data(), layout_sequence
.length())));
82 matcher
->reset(icu_input
);
84 UErrorCode status
= U_ZERO_ERROR
;
85 UBool match
= matcher
->find(0, status
);
86 DCHECK(U_SUCCESS(status
));
90 // Given the sequence of non-password and password text input fields of a form,
91 // represented as a string of Ns (non-password) and Ps (password), computes the
92 // layout type of that form.
93 PasswordForm::Layout
SequenceToLayout(base::StringPiece layout_sequence
) {
94 if (MatchesLoginAndSignupPattern(layout_sequence
))
95 return PasswordForm::Layout::LAYOUT_LOGIN_AND_SIGNUP
;
96 return PasswordForm::Layout::LAYOUT_OTHER
;
99 // Checks in a case-insensitive way if the autocomplete attribute for the given
100 // |element| is present and has the specified |value_in_lowercase|.
101 bool HasAutocompleteAttributeValue(const WebInputElement
& element
,
102 const char* value_in_lowercase
) {
103 return base::LowerCaseEqualsASCII(element
.getAttribute("autocomplete"),
107 // Helper to determine which password is the main (current) one, and which is
108 // the new password (e.g., on a sign-up or change password form), if any.
109 bool LocateSpecificPasswords(std::vector
<WebInputElement
> passwords
,
110 WebInputElement
* current_password
,
111 WebInputElement
* new_password
) {
112 DCHECK(current_password
&& current_password
->isNull());
113 DCHECK(new_password
&& new_password
->isNull());
115 // First, look for elements marked with either autocomplete='current-password'
116 // or 'new-password' -- if we find any, take the hint, and treat the first of
117 // each kind as the element we are looking for.
118 for (std::vector
<WebInputElement
>::const_iterator it
= passwords
.begin();
119 it
!= passwords
.end(); it
++) {
120 if (HasAutocompleteAttributeValue(*it
, "current-password") &&
121 current_password
->isNull()) {
122 *current_password
= *it
;
123 } else if (HasAutocompleteAttributeValue(*it
, "new-password") &&
124 new_password
->isNull()) {
129 // If we have seen an element with either of autocomplete attributes above,
130 // take that as a signal that the page author must have intentionally left the
131 // rest of the password fields unmarked. Perhaps they are used for other
132 // purposes, e.g., PINs, OTPs, and the like. So we skip all the heuristics we
133 // normally do, and ignore the rest of the password fields.
134 if (!current_password
->isNull() || !new_password
->isNull())
137 if (passwords
.empty())
140 switch (passwords
.size()) {
142 // Single password, easy.
143 *current_password
= passwords
[0];
146 if (passwords
[0].value() == passwords
[1].value()) {
147 // Two identical passwords: assume we are seeing a new password with a
148 // confirmation. This can be either a sign-up form or a password change
149 // form that does not ask for the old password.
150 *new_password
= passwords
[0];
152 // Assume first is old password, second is new (no choice but to guess).
153 *current_password
= passwords
[0];
154 *new_password
= passwords
[1];
158 if (!passwords
[0].value().isEmpty() &&
159 passwords
[0].value() == passwords
[1].value() &&
160 passwords
[0].value() == passwords
[2].value()) {
161 // All three passwords are the same and non-empty? This does not make
162 // any sense, give up.
164 } else if (passwords
[1].value() == passwords
[2].value()) {
165 // New password is the duplicated one, and comes second; or empty form
166 // with 3 password fields, in which case we will assume this layout.
167 *current_password
= passwords
[0];
168 *new_password
= passwords
[1];
169 } else if (passwords
[0].value() == passwords
[1].value()) {
170 // It is strange that the new password comes first, but trust more which
171 // fields are duplicated than the ordering of fields. Assume that
172 // any password fields after the new password contain sensitive
173 // information that isn't actually a password (security hint, SSN, etc.)
174 *new_password
= passwords
[0];
176 // Three different passwords, or first and last match with middle
177 // different. No idea which is which, so no luck.
184 void FindPredictedElements(
185 const WebFormElement
& form
,
186 const std::map
<autofill::FormData
,
187 autofill::PasswordFormFieldPredictionMap
>& form_predictions
,
188 WebVector
<WebFormControlElement
>* control_elements
,
189 std::map
<autofill::PasswordFormFieldPredictionType
, WebInputElement
>*
190 predicted_elements
) {
192 if (!WebFormElementToFormData(form
, WebFormControlElement(), EXTRACT_NONE
,
193 &form_data
, nullptr)) {
197 // Matching only requires that action and name of the form match to allow
198 // the username to be updated even if the form is changed after page load.
199 // See https://crbug.com/476092 for more details.
200 auto predictions_iterator
= form_predictions
.begin();
201 for (;predictions_iterator
!= form_predictions
.end();
202 ++predictions_iterator
) {
203 if (predictions_iterator
->first
.action
== form_data
.action
&&
204 predictions_iterator
->first
.name
== form_data
.name
) {
209 if (predictions_iterator
== form_predictions
.end())
212 std::vector
<blink::WebFormControlElement
> autofillable_elements
=
213 ExtractAutofillableElementsFromSet(*control_elements
);
215 const autofill::PasswordFormFieldPredictionMap
& field_predictions
=
216 predictions_iterator
->second
;
217 for (autofill::PasswordFormFieldPredictionMap::const_iterator prediction
=
218 field_predictions
.begin();
219 prediction
!= field_predictions
.end(); ++prediction
) {
220 const autofill::PasswordFormFieldPredictionType
& type
= prediction
->first
;
221 const autofill::FormFieldData
& target_field
= prediction
->second
;
223 for (size_t i
= 0; i
< autofillable_elements
.size(); ++i
) {
224 if (autofillable_elements
[i
].nameForAutofill() == target_field
.name
) {
225 WebInputElement
* input_element
=
226 toWebInputElement(&autofillable_elements
[i
]);
228 (*predicted_elements
)[type
] = *input_element
;
236 // Get information about a login form encapsulated in a PasswordForm struct.
237 // If an element of |form| has an entry in |nonscript_modified_values|, the
238 // associated string is used instead of the element's value to create
240 void GetPasswordForm(
241 const WebFormElement
& form
,
242 PasswordForm
* password_form
,
243 const std::map
<const blink::WebInputElement
, blink::WebString
>*
244 nonscript_modified_values
,
245 const std::map
<autofill::FormData
,
246 autofill::PasswordFormFieldPredictionMap
>*
248 WebInputElement latest_input_element
;
249 WebInputElement username_element
;
250 password_form
->username_marked_by_site
= false;
251 std::vector
<WebInputElement
> passwords
;
252 std::vector
<base::string16
> other_possible_usernames
;
254 WebVector
<WebFormControlElement
> control_elements
;
255 form
.getFormControlElements(control_elements
);
257 std::string layout_sequence
;
258 layout_sequence
.reserve(control_elements
.size());
259 for (size_t i
= 0; i
< control_elements
.size(); ++i
) {
260 WebFormControlElement control_element
= control_elements
[i
];
261 if (control_element
.isActivatedSubmit())
262 password_form
->submit_element
= control_element
.formControlName();
264 WebInputElement
* input_element
= toWebInputElement(&control_element
);
265 if (!input_element
|| !input_element
->isEnabled())
268 if (input_element
->isTextField()) {
269 if (input_element
->isPasswordField())
270 layout_sequence
.push_back('P');
272 layout_sequence
.push_back('N');
275 // If the password field is readonly, the page is likely using a virtual
276 // keyboard and bypassing the password field value (see
277 // http://crbug.com/475488). There is nothing Chrome can do to fill
278 // passwords for now. Continue processing in case when the password field
279 // was made readonly by JavaScript before submission. We can do this by
280 // checking whether password element was updated not from JavaScript.
281 if (input_element
->isPasswordField() &&
282 (!input_element
->isReadOnly() ||
283 (nonscript_modified_values
&&
284 nonscript_modified_values
->find(*input_element
) !=
285 nonscript_modified_values
->end()) ||
286 HasAutocompleteAttributeValue(*input_element
, "current_password") ||
287 HasAutocompleteAttributeValue(*input_element
, "new-password"))) {
288 passwords
.push_back(*input_element
);
289 // If we have not yet considered any element to be the username so far,
290 // provisionally select the input element just before the first password
291 // element to be the username. This choice will be overruled if we later
292 // find an element with autocomplete='username'.
293 if (username_element
.isNull() && !latest_input_element
.isNull()) {
294 username_element
= latest_input_element
;
295 // Remove the selected username from other_possible_usernames.
296 if (!latest_input_element
.value().isEmpty()) {
297 DCHECK(!other_possible_usernames
.empty());
298 DCHECK_EQ(base::string16(latest_input_element
.value()),
299 other_possible_usernames
.back());
300 other_possible_usernames
.pop_back();
305 // Various input types such as text, url, email can be a username field.
306 if (input_element
->isTextField() && !input_element
->isPasswordField()) {
307 if (HasAutocompleteAttributeValue(*input_element
, "username")) {
308 if (password_form
->username_marked_by_site
) {
309 // A second or subsequent element marked with autocomplete='username'.
310 // This makes us less confident that we have understood the form. We
311 // will stick to our choice that the first such element was the real
312 // username, but will start collecting other_possible_usernames from
313 // the extra elements marked with autocomplete='username'. Note that
314 // unlike username_element, other_possible_usernames is used only for
315 // autofill, not for form identification, and blank autofill entries
316 // are not useful, so we do not collect empty strings.
317 if (!input_element
->value().isEmpty())
318 other_possible_usernames
.push_back(input_element
->value());
320 // The first element marked with autocomplete='username'. Take the
321 // hint and treat it as the username (overruling the tentative choice
322 // we might have made before). Furthermore, drop all other possible
323 // usernames we have accrued so far: they come from fields not marked
324 // with the autocomplete attribute, making them unlikely alternatives.
325 username_element
= *input_element
;
326 password_form
->username_marked_by_site
= true;
327 other_possible_usernames
.clear();
330 if (password_form
->username_marked_by_site
) {
331 // Having seen elements with autocomplete='username', elements without
332 // this attribute are no longer interesting. No-op.
334 // No elements marked with autocomplete='username' so far whatsoever.
335 // If we have not yet selected a username element even provisionally,
336 // then remember this element for the case when the next field turns
337 // out to be a password. Save a non-empty username as a possible
338 // alternative, at least for now.
339 if (username_element
.isNull())
340 latest_input_element
= *input_element
;
341 if (!input_element
->value().isEmpty())
342 other_possible_usernames
.push_back(input_element
->value());
347 password_form
->layout
= SequenceToLayout(layout_sequence
);
349 std::map
<autofill::PasswordFormFieldPredictionType
, WebInputElement
>
351 if (form_predictions
) {
352 FindPredictedElements(form
, *form_predictions
, &control_elements
,
353 &predicted_elements
);
355 // Let server predictions override the selection of the username field. This
356 // allows instant adjusting without changing Chromium code.
357 if (!predicted_elements
[autofill::PREDICTION_USERNAME
].isNull() &&
358 username_element
!= predicted_elements
[autofill::PREDICTION_USERNAME
]) {
360 find(other_possible_usernames
.begin(), other_possible_usernames
.end(),
361 predicted_elements
[autofill::PREDICTION_USERNAME
].value());
362 if (it
!= other_possible_usernames
.end())
363 other_possible_usernames
.erase(it
);
364 if (!username_element
.isNull()) {
365 other_possible_usernames
.push_back(username_element
.value());
367 username_element
= predicted_elements
[autofill::PREDICTION_USERNAME
];
368 password_form
->was_parsed_using_autofill_predictions
= true;
371 if (!username_element
.isNull()) {
372 password_form
->username_element
= username_element
.nameForAutofill();
373 base::string16 username_value
= username_element
.value();
374 if (nonscript_modified_values
!= nullptr) {
375 auto username_iterator
=
376 nonscript_modified_values
->find(username_element
);
377 if (username_iterator
!= nonscript_modified_values
->end()) {
378 base::string16 typed_username_value
= username_iterator
->second
;
379 if (!StartsWith(username_value
, typed_username_value
, false)) {
380 // We check that |username_value| was not obtained by autofilling
381 // |typed_username_value|. In case when it was, |typed_username_value|
382 // is incomplete, so we should leave autofilled value.
383 username_value
= typed_username_value
;
387 password_form
->username_value
= username_value
;
390 WebInputElement password
;
391 WebInputElement new_password
;
392 if (!LocateSpecificPasswords(passwords
, &password
, &new_password
))
395 password_form
->action
= GetCanonicalActionForForm(form
);
396 if (!password_form
->action
.is_valid())
399 password_form
->origin
= GetCanonicalOriginForDocument(form
.document());
400 GURL::Replacements rep
;
402 password_form
->signon_realm
=
403 password_form
->origin
.ReplaceComponents(rep
).spec();
404 password_form
->other_possible_usernames
.swap(other_possible_usernames
);
406 if (!password
.isNull()) {
407 password_form
->password_element
= password
.nameForAutofill();
408 blink::WebString password_value
= password
.value();
409 if (nonscript_modified_values
!= nullptr) {
410 auto password_iterator
= nonscript_modified_values
->find(password
);
411 if (password_iterator
!= nonscript_modified_values
->end())
412 password_value
= password_iterator
->second
;
414 password_form
->password_value
= password_value
;
415 password_form
->password_autocomplete_set
= password
.autoComplete();
417 if (!new_password
.isNull()) {
418 password_form
->new_password_element
= new_password
.nameForAutofill();
419 password_form
->new_password_value
= new_password
.value();
420 if (HasAutocompleteAttributeValue(new_password
, "new-password"))
421 password_form
->new_password_marked_by_site
= true;
424 if (username_element
.isNull()) {
425 // To get a better idea on how password forms without a username field
426 // look like, report the total number of text and password fields.
427 UMA_HISTOGRAM_COUNTS_100(
428 "PasswordManager.EmptyUsernames.TextAndPasswordFieldCount",
429 layout_sequence
.size());
430 // For comparison, also report the number of password fields.
431 UMA_HISTOGRAM_COUNTS_100(
432 "PasswordManager.EmptyUsernames.PasswordFieldCount",
433 std::count(layout_sequence
.begin(), layout_sequence
.end(), 'P'));
436 password_form
->scheme
= PasswordForm::SCHEME_HTML
;
437 password_form
->ssl_valid
= false;
438 password_form
->preferred
= false;
439 password_form
->blacklisted_by_user
= false;
440 password_form
->type
= PasswordForm::TYPE_MANUAL
;
443 GURL
StripAuthAndParams(const GURL
& gurl
) {
444 // We want to keep the path but strip any authentication data, as well as
445 // query and ref portions of URL, for the form action and form origin.
446 GURL::Replacements rep
;
451 return gurl
.ReplaceComponents(rep
);
456 GURL
GetCanonicalActionForForm(const WebFormElement
& form
) {
457 WebString action
= form
.action();
459 action
= WebString(""); // missing 'action' attribute implies current URL
460 GURL
full_action(form
.document().completeURL(action
));
461 return StripAuthAndParams(full_action
);
464 GURL
GetCanonicalOriginForDocument(const WebDocument
& document
) {
465 GURL
full_origin(document
.url());
466 return StripAuthAndParams(full_origin
);
469 scoped_ptr
<PasswordForm
> CreatePasswordForm(
470 const WebFormElement
& web_form
,
471 const std::map
<const blink::WebInputElement
, blink::WebString
>*
472 nonscript_modified_values
,
473 const std::map
<autofill::FormData
,
474 autofill::PasswordFormFieldPredictionMap
>*
476 if (web_form
.isNull())
477 return scoped_ptr
<PasswordForm
>();
479 scoped_ptr
<PasswordForm
> password_form(new PasswordForm());
480 GetPasswordForm(web_form
, password_form
.get(), nonscript_modified_values
,
483 if (!password_form
->action
.is_valid())
484 return scoped_ptr
<PasswordForm
>();
486 WebFormElementToFormData(web_form
,
487 blink::WebFormControlElement(),
489 &password_form
->form_data
,
490 NULL
/* FormFieldData */);
492 return password_form
.Pass();
495 } // namespace autofill