Move LowerCaseEqualsASCII to base namespace
[chromium-blink-merge.git] / components / autofill / content / renderer / password_form_conversion_utils.cc
blob24deefb77d47932c07f7b49ecd440408bf68abbd
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/password_form_conversion_utils.h"
7 #include <vector>
9 #include "base/lazy_instance.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/metrics/histogram_macros.h"
12 #include "base/strings/string_util.h"
13 #include "components/autofill/content/renderer/form_autofill_util.h"
14 #include "components/autofill/core/common/password_form.h"
15 #include "components/autofill/core/common/password_form_field_prediction_map.h"
16 #include "third_party/WebKit/public/platform/WebString.h"
17 #include "third_party/WebKit/public/web/WebDocument.h"
18 #include "third_party/WebKit/public/web/WebFormControlElement.h"
19 #include "third_party/WebKit/public/web/WebInputElement.h"
20 #include "third_party/icu/source/i18n/unicode/regex.h"
22 using blink::WebDocument;
23 using blink::WebFormControlElement;
24 using blink::WebFormElement;
25 using blink::WebInputElement;
26 using blink::WebString;
27 using blink::WebVector;
29 namespace autofill {
30 namespace {
32 // Layout classification of password forms
33 // A layout sequence of a form is the sequence of it's non-password and password
34 // input fields, represented by "N" and "P", respectively. A form like this
35 // <form>
36 // <input type='text' ...>
37 // <input type='hidden' ...>
38 // <input type='password' ...>
39 // <input type='submit' ...>
40 // </form>
41 // has the layout sequence "NP" -- "N" for the first field, and "P" for the
42 // third. The second and fourth fields are ignored, because they are not text
43 // fields.
45 // The code below classifies the layout (see PasswordForm::Layout) of a form
46 // based on its layout sequence. This is done by assigning layouts regular
47 // expressions over the alphabet {N, P}. LAYOUT_OTHER is implicitly the type
48 // corresponding to all layout sequences not matching any other layout.
50 // LAYOUT_LOGIN_AND_SIGNUP is classified by NPN+P.*. This corresponds to a form
51 // which starts with a login section (NP) and continues with a sign-up section
52 // (N+P.*). The aim is to distinguish such forms from change password-forms
53 // (N*PPP?.*) and forms which use password fields to store private but
54 // non-password data (could look like, e.g., PN+P.*).
55 const char kLoginAndSignupRegex[] =
56 "NP" // Login section.
57 "N+P" // Sign-up section.
58 ".*"; // Anything beyond that.
60 struct LoginAndSignupLazyInstanceTraits
61 : public base::DefaultLazyInstanceTraits<icu::RegexMatcher> {
62 static icu::RegexMatcher* New(void* instance) {
63 const icu::UnicodeString icu_pattern(kLoginAndSignupRegex);
65 UErrorCode status = U_ZERO_ERROR;
66 // Use placement new to initialize the instance in the preallocated space.
67 // The "(instance)" is very important to force POD type initialization.
68 scoped_ptr<icu::RegexMatcher> matcher(new (instance) icu::RegexMatcher(
69 icu_pattern, UREGEX_CASE_INSENSITIVE, status));
70 DCHECK(U_SUCCESS(status));
71 return matcher.release();
75 base::LazyInstance<icu::RegexMatcher, LoginAndSignupLazyInstanceTraits>
76 login_and_signup_matcher = LAZY_INSTANCE_INITIALIZER;
78 bool MatchesLoginAndSignupPattern(base::StringPiece layout_sequence) {
79 icu::RegexMatcher* matcher = login_and_signup_matcher.Pointer();
80 icu::UnicodeString icu_input(icu::UnicodeString::fromUTF8(
81 icu::StringPiece(layout_sequence.data(), layout_sequence.length())));
82 matcher->reset(icu_input);
84 UErrorCode status = U_ZERO_ERROR;
85 UBool match = matcher->find(0, status);
86 DCHECK(U_SUCCESS(status));
87 return match == TRUE;
90 // Given the sequence of non-password and password text input fields of a form,
91 // represented as a string of Ns (non-password) and Ps (password), computes the
92 // layout type of that form.
93 PasswordForm::Layout SequenceToLayout(base::StringPiece layout_sequence) {
94 if (MatchesLoginAndSignupPattern(layout_sequence))
95 return PasswordForm::Layout::LAYOUT_LOGIN_AND_SIGNUP;
96 return PasswordForm::Layout::LAYOUT_OTHER;
99 // Checks in a case-insensitive way if the autocomplete attribute for the given
100 // |element| is present and has the specified |value_in_lowercase|.
101 bool HasAutocompleteAttributeValue(const WebInputElement& element,
102 const char* value_in_lowercase) {
103 return base::LowerCaseEqualsASCII(element.getAttribute("autocomplete"),
104 value_in_lowercase);
107 // Helper to determine which password is the main (current) one, and which is
108 // the new password (e.g., on a sign-up or change password form), if any.
109 bool LocateSpecificPasswords(std::vector<WebInputElement> passwords,
110 WebInputElement* current_password,
111 WebInputElement* new_password) {
112 DCHECK(current_password && current_password->isNull());
113 DCHECK(new_password && new_password->isNull());
115 // First, look for elements marked with either autocomplete='current-password'
116 // or 'new-password' -- if we find any, take the hint, and treat the first of
117 // each kind as the element we are looking for.
118 for (std::vector<WebInputElement>::const_iterator it = passwords.begin();
119 it != passwords.end(); it++) {
120 if (HasAutocompleteAttributeValue(*it, "current-password") &&
121 current_password->isNull()) {
122 *current_password = *it;
123 } else if (HasAutocompleteAttributeValue(*it, "new-password") &&
124 new_password->isNull()) {
125 *new_password = *it;
129 // If we have seen an element with either of autocomplete attributes above,
130 // take that as a signal that the page author must have intentionally left the
131 // rest of the password fields unmarked. Perhaps they are used for other
132 // purposes, e.g., PINs, OTPs, and the like. So we skip all the heuristics we
133 // normally do, and ignore the rest of the password fields.
134 if (!current_password->isNull() || !new_password->isNull())
135 return true;
137 if (passwords.empty())
138 return false;
140 switch (passwords.size()) {
141 case 1:
142 // Single password, easy.
143 *current_password = passwords[0];
144 break;
145 case 2:
146 if (passwords[0].value() == passwords[1].value()) {
147 // Two identical passwords: assume we are seeing a new password with a
148 // confirmation. This can be either a sign-up form or a password change
149 // form that does not ask for the old password.
150 *new_password = passwords[0];
151 } else {
152 // Assume first is old password, second is new (no choice but to guess).
153 *current_password = passwords[0];
154 *new_password = passwords[1];
156 break;
157 default:
158 if (!passwords[0].value().isEmpty() &&
159 passwords[0].value() == passwords[1].value() &&
160 passwords[0].value() == passwords[2].value()) {
161 // All three passwords are the same and non-empty? This does not make
162 // any sense, give up.
163 return false;
164 } else if (passwords[1].value() == passwords[2].value()) {
165 // New password is the duplicated one, and comes second; or empty form
166 // with 3 password fields, in which case we will assume this layout.
167 *current_password = passwords[0];
168 *new_password = passwords[1];
169 } else if (passwords[0].value() == passwords[1].value()) {
170 // It is strange that the new password comes first, but trust more which
171 // fields are duplicated than the ordering of fields. Assume that
172 // any password fields after the new password contain sensitive
173 // information that isn't actually a password (security hint, SSN, etc.)
174 *new_password = passwords[0];
175 } else {
176 // Three different passwords, or first and last match with middle
177 // different. No idea which is which, so no luck.
178 return false;
181 return true;
184 void FindPredictedElements(
185 const WebFormElement& form,
186 const std::map<autofill::FormData,
187 autofill::PasswordFormFieldPredictionMap>& form_predictions,
188 WebVector<WebFormControlElement>* control_elements,
189 std::map<autofill::PasswordFormFieldPredictionType, WebInputElement>*
190 predicted_elements) {
191 FormData form_data;
192 if (!WebFormElementToFormData(form, WebFormControlElement(), EXTRACT_NONE,
193 &form_data, nullptr)) {
194 return;
197 // Matching only requires that action and name of the form match to allow
198 // the username to be updated even if the form is changed after page load.
199 // See https://crbug.com/476092 for more details.
200 auto predictions_iterator = form_predictions.begin();
201 for (;predictions_iterator != form_predictions.end();
202 ++predictions_iterator) {
203 if (predictions_iterator->first.action == form_data.action &&
204 predictions_iterator->first.name == form_data.name) {
205 break;
209 if (predictions_iterator == form_predictions.end())
210 return;
212 std::vector<blink::WebFormControlElement> autofillable_elements =
213 ExtractAutofillableElementsFromSet(*control_elements);
215 const autofill::PasswordFormFieldPredictionMap& field_predictions =
216 predictions_iterator->second;
217 for (autofill::PasswordFormFieldPredictionMap::const_iterator prediction =
218 field_predictions.begin();
219 prediction != field_predictions.end(); ++prediction) {
220 const autofill::PasswordFormFieldPredictionType& type = prediction->first;
221 const autofill::FormFieldData& target_field = prediction->second;
223 for (size_t i = 0; i < autofillable_elements.size(); ++i) {
224 if (autofillable_elements[i].nameForAutofill() == target_field.name) {
225 WebInputElement* input_element =
226 toWebInputElement(&autofillable_elements[i]);
227 if (input_element) {
228 (*predicted_elements)[type] = *input_element;
230 break;
236 // Get information about a login form encapsulated in a PasswordForm struct.
237 // If an element of |form| has an entry in |nonscript_modified_values|, the
238 // associated string is used instead of the element's value to create
239 // the PasswordForm.
240 void GetPasswordForm(
241 const WebFormElement& form,
242 PasswordForm* password_form,
243 const std::map<const blink::WebInputElement, blink::WebString>*
244 nonscript_modified_values,
245 const std::map<autofill::FormData,
246 autofill::PasswordFormFieldPredictionMap>*
247 form_predictions) {
248 WebInputElement latest_input_element;
249 WebInputElement username_element;
250 password_form->username_marked_by_site = false;
251 std::vector<WebInputElement> passwords;
252 std::vector<base::string16> other_possible_usernames;
254 WebVector<WebFormControlElement> control_elements;
255 form.getFormControlElements(control_elements);
257 std::string layout_sequence;
258 layout_sequence.reserve(control_elements.size());
259 for (size_t i = 0; i < control_elements.size(); ++i) {
260 WebFormControlElement control_element = control_elements[i];
261 if (control_element.isActivatedSubmit())
262 password_form->submit_element = control_element.formControlName();
264 WebInputElement* input_element = toWebInputElement(&control_element);
265 if (!input_element || !input_element->isEnabled())
266 continue;
268 if (input_element->isTextField()) {
269 if (input_element->isPasswordField())
270 layout_sequence.push_back('P');
271 else
272 layout_sequence.push_back('N');
275 // If the password field is readonly, the page is likely using a virtual
276 // keyboard and bypassing the password field value (see
277 // http://crbug.com/475488). There is nothing Chrome can do to fill
278 // passwords for now. Continue processing in case when the password field
279 // was made readonly by JavaScript before submission. We can do this by
280 // checking whether password element was updated not from JavaScript.
281 if (input_element->isPasswordField() &&
282 (!input_element->isReadOnly() ||
283 (nonscript_modified_values &&
284 nonscript_modified_values->find(*input_element) !=
285 nonscript_modified_values->end()) ||
286 HasAutocompleteAttributeValue(*input_element, "current_password") ||
287 HasAutocompleteAttributeValue(*input_element, "new-password"))) {
288 passwords.push_back(*input_element);
289 // If we have not yet considered any element to be the username so far,
290 // provisionally select the input element just before the first password
291 // element to be the username. This choice will be overruled if we later
292 // find an element with autocomplete='username'.
293 if (username_element.isNull() && !latest_input_element.isNull()) {
294 username_element = latest_input_element;
295 // Remove the selected username from other_possible_usernames.
296 if (!latest_input_element.value().isEmpty()) {
297 DCHECK(!other_possible_usernames.empty());
298 DCHECK_EQ(base::string16(latest_input_element.value()),
299 other_possible_usernames.back());
300 other_possible_usernames.pop_back();
305 // Various input types such as text, url, email can be a username field.
306 if (input_element->isTextField() && !input_element->isPasswordField()) {
307 if (HasAutocompleteAttributeValue(*input_element, "username")) {
308 if (password_form->username_marked_by_site) {
309 // A second or subsequent element marked with autocomplete='username'.
310 // This makes us less confident that we have understood the form. We
311 // will stick to our choice that the first such element was the real
312 // username, but will start collecting other_possible_usernames from
313 // the extra elements marked with autocomplete='username'. Note that
314 // unlike username_element, other_possible_usernames is used only for
315 // autofill, not for form identification, and blank autofill entries
316 // are not useful, so we do not collect empty strings.
317 if (!input_element->value().isEmpty())
318 other_possible_usernames.push_back(input_element->value());
319 } else {
320 // The first element marked with autocomplete='username'. Take the
321 // hint and treat it as the username (overruling the tentative choice
322 // we might have made before). Furthermore, drop all other possible
323 // usernames we have accrued so far: they come from fields not marked
324 // with the autocomplete attribute, making them unlikely alternatives.
325 username_element = *input_element;
326 password_form->username_marked_by_site = true;
327 other_possible_usernames.clear();
329 } else {
330 if (password_form->username_marked_by_site) {
331 // Having seen elements with autocomplete='username', elements without
332 // this attribute are no longer interesting. No-op.
333 } else {
334 // No elements marked with autocomplete='username' so far whatsoever.
335 // If we have not yet selected a username element even provisionally,
336 // then remember this element for the case when the next field turns
337 // out to be a password. Save a non-empty username as a possible
338 // alternative, at least for now.
339 if (username_element.isNull())
340 latest_input_element = *input_element;
341 if (!input_element->value().isEmpty())
342 other_possible_usernames.push_back(input_element->value());
347 password_form->layout = SequenceToLayout(layout_sequence);
349 std::map<autofill::PasswordFormFieldPredictionType, WebInputElement>
350 predicted_elements;
351 if (form_predictions) {
352 FindPredictedElements(form, *form_predictions, &control_elements,
353 &predicted_elements);
355 // Let server predictions override the selection of the username field. This
356 // allows instant adjusting without changing Chromium code.
357 if (!predicted_elements[autofill::PREDICTION_USERNAME].isNull() &&
358 username_element != predicted_elements[autofill::PREDICTION_USERNAME]) {
359 auto it =
360 find(other_possible_usernames.begin(), other_possible_usernames.end(),
361 predicted_elements[autofill::PREDICTION_USERNAME].value());
362 if (it != other_possible_usernames.end())
363 other_possible_usernames.erase(it);
364 if (!username_element.isNull()) {
365 other_possible_usernames.push_back(username_element.value());
367 username_element = predicted_elements[autofill::PREDICTION_USERNAME];
368 password_form->was_parsed_using_autofill_predictions = true;
371 if (!username_element.isNull()) {
372 password_form->username_element = username_element.nameForAutofill();
373 base::string16 username_value = username_element.value();
374 if (nonscript_modified_values != nullptr) {
375 auto username_iterator =
376 nonscript_modified_values->find(username_element);
377 if (username_iterator != nonscript_modified_values->end()) {
378 base::string16 typed_username_value = username_iterator->second;
379 if (!StartsWith(username_value, typed_username_value, false)) {
380 // We check that |username_value| was not obtained by autofilling
381 // |typed_username_value|. In case when it was, |typed_username_value|
382 // is incomplete, so we should leave autofilled value.
383 username_value = typed_username_value;
387 password_form->username_value = username_value;
390 WebInputElement password;
391 WebInputElement new_password;
392 if (!LocateSpecificPasswords(passwords, &password, &new_password))
393 return;
395 password_form->action = GetCanonicalActionForForm(form);
396 if (!password_form->action.is_valid())
397 return;
399 password_form->origin = GetCanonicalOriginForDocument(form.document());
400 GURL::Replacements rep;
401 rep.SetPathStr("");
402 password_form->signon_realm =
403 password_form->origin.ReplaceComponents(rep).spec();
404 password_form->other_possible_usernames.swap(other_possible_usernames);
406 if (!password.isNull()) {
407 password_form->password_element = password.nameForAutofill();
408 blink::WebString password_value = password.value();
409 if (nonscript_modified_values != nullptr) {
410 auto password_iterator = nonscript_modified_values->find(password);
411 if (password_iterator != nonscript_modified_values->end())
412 password_value = password_iterator->second;
414 password_form->password_value = password_value;
415 password_form->password_autocomplete_set = password.autoComplete();
417 if (!new_password.isNull()) {
418 password_form->new_password_element = new_password.nameForAutofill();
419 password_form->new_password_value = new_password.value();
420 if (HasAutocompleteAttributeValue(new_password, "new-password"))
421 password_form->new_password_marked_by_site = true;
424 if (username_element.isNull()) {
425 // To get a better idea on how password forms without a username field
426 // look like, report the total number of text and password fields.
427 UMA_HISTOGRAM_COUNTS_100(
428 "PasswordManager.EmptyUsernames.TextAndPasswordFieldCount",
429 layout_sequence.size());
430 // For comparison, also report the number of password fields.
431 UMA_HISTOGRAM_COUNTS_100(
432 "PasswordManager.EmptyUsernames.PasswordFieldCount",
433 std::count(layout_sequence.begin(), layout_sequence.end(), 'P'));
436 password_form->scheme = PasswordForm::SCHEME_HTML;
437 password_form->ssl_valid = false;
438 password_form->preferred = false;
439 password_form->blacklisted_by_user = false;
440 password_form->type = PasswordForm::TYPE_MANUAL;
443 GURL StripAuthAndParams(const GURL& gurl) {
444 // We want to keep the path but strip any authentication data, as well as
445 // query and ref portions of URL, for the form action and form origin.
446 GURL::Replacements rep;
447 rep.ClearUsername();
448 rep.ClearPassword();
449 rep.ClearQuery();
450 rep.ClearRef();
451 return gurl.ReplaceComponents(rep);
454 } // namespace
456 GURL GetCanonicalActionForForm(const WebFormElement& form) {
457 WebString action = form.action();
458 if (action.isNull())
459 action = WebString(""); // missing 'action' attribute implies current URL
460 GURL full_action(form.document().completeURL(action));
461 return StripAuthAndParams(full_action);
464 GURL GetCanonicalOriginForDocument(const WebDocument& document) {
465 GURL full_origin(document.url());
466 return StripAuthAndParams(full_origin);
469 scoped_ptr<PasswordForm> CreatePasswordForm(
470 const WebFormElement& web_form,
471 const std::map<const blink::WebInputElement, blink::WebString>*
472 nonscript_modified_values,
473 const std::map<autofill::FormData,
474 autofill::PasswordFormFieldPredictionMap>*
475 form_predictions) {
476 if (web_form.isNull())
477 return scoped_ptr<PasswordForm>();
479 scoped_ptr<PasswordForm> password_form(new PasswordForm());
480 GetPasswordForm(web_form, password_form.get(), nonscript_modified_values,
481 form_predictions);
483 if (!password_form->action.is_valid())
484 return scoped_ptr<PasswordForm>();
486 WebFormElementToFormData(web_form,
487 blink::WebFormControlElement(),
488 EXTRACT_NONE,
489 &password_form->form_data,
490 NULL /* FormFieldData */);
492 return password_form.Pass();
495 } // namespace autofill