Move StartsWith[ASCII] to base namespace.
[chromium-blink-merge.git] / components / autofill / content / renderer / form_autofill_util.cc
blob51a2251258b73d0faf6bf61afbc8fec36b951d11
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/form_autofill_util.h"
7 #include <map>
8 #include <set>
10 #include "base/command_line.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_vector.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "components/autofill/core/common/autofill_data_validation.h"
17 #include "components/autofill/core/common/autofill_regexes.h"
18 #include "components/autofill/core/common/autofill_switches.h"
19 #include "components/autofill/core/common/form_data.h"
20 #include "components/autofill/core/common/form_field_data.h"
21 #include "third_party/WebKit/public/platform/WebString.h"
22 #include "third_party/WebKit/public/platform/WebVector.h"
23 #include "third_party/WebKit/public/web/WebDocument.h"
24 #include "third_party/WebKit/public/web/WebElement.h"
25 #include "third_party/WebKit/public/web/WebElementCollection.h"
26 #include "third_party/WebKit/public/web/WebFormControlElement.h"
27 #include "third_party/WebKit/public/web/WebFormElement.h"
28 #include "third_party/WebKit/public/web/WebInputElement.h"
29 #include "third_party/WebKit/public/web/WebLabelElement.h"
30 #include "third_party/WebKit/public/web/WebLocalFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeList.h"
33 #include "third_party/WebKit/public/web/WebOptionElement.h"
34 #include "third_party/WebKit/public/web/WebSelectElement.h"
35 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
37 using blink::WebDocument;
38 using blink::WebElement;
39 using blink::WebElementCollection;
40 using blink::WebFormControlElement;
41 using blink::WebFormElement;
42 using blink::WebFrame;
43 using blink::WebInputElement;
44 using blink::WebLabelElement;
45 using blink::WebNode;
46 using blink::WebNodeList;
47 using blink::WebOptionElement;
48 using blink::WebSelectElement;
49 using blink::WebTextAreaElement;
50 using blink::WebString;
51 using blink::WebVector;
53 namespace autofill {
54 namespace {
56 // A bit field mask for FillForm functions to not fill some fields.
57 enum FieldFilterMask {
58 FILTER_NONE = 0,
59 FILTER_DISABLED_ELEMENTS = 1 << 0,
60 FILTER_READONLY_ELEMENTS = 1 << 1,
61 FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2,
62 FILTER_ALL_NON_EDITABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
63 FILTER_READONLY_ELEMENTS |
64 FILTER_NON_FOCUSABLE_ELEMENTS,
67 void TruncateString(base::string16* str, size_t max_length) {
68 if (str->length() > max_length)
69 str->resize(max_length);
72 bool IsOptionElement(const WebElement& element) {
73 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
74 return element.hasHTMLTagName(kOption);
77 bool IsScriptElement(const WebElement& element) {
78 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
79 return element.hasHTMLTagName(kScript);
82 bool IsNoScriptElement(const WebElement& element) {
83 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
84 return element.hasHTMLTagName(kNoScript);
87 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
88 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
91 bool IsAutofillableElement(const WebFormControlElement& element) {
92 const WebInputElement* input_element = toWebInputElement(&element);
93 return IsAutofillableInputElement(input_element) ||
94 IsSelectElement(element) ||
95 IsTextAreaElement(element);
98 bool IsElementInControlElementSet(
99 const WebElement& element,
100 const std::vector<WebFormControlElement>& control_elements) {
101 if (!element.isFormControlElement())
102 return false;
103 const WebFormControlElement form_control_element =
104 element.toConst<WebFormControlElement>();
105 return std::find(control_elements.begin(),
106 control_elements.end(),
107 form_control_element) != control_elements.end();
110 bool IsElementInsideFormOrFieldSet(const WebElement& element) {
111 for (WebNode parent_node = element.parentNode();
112 !parent_node.isNull();
113 parent_node = parent_node.parentNode()) {
114 if (!parent_node.isElementNode())
115 continue;
117 WebElement cur_element = parent_node.to<WebElement>();
118 if (cur_element.hasHTMLTagName("form") ||
119 cur_element.hasHTMLTagName("fieldset")) {
120 return true;
123 return false;
126 // Returns true if |node| is an element and it is a container type that
127 // InferLabelForElement() can traverse.
128 bool IsTraversableContainerElement(const WebNode& node) {
129 if (!node.isElementNode())
130 return false;
132 std::string tag_name = node.toConst<WebElement>().tagName().utf8();
133 return (tag_name == "DD" ||
134 tag_name == "DIV" ||
135 tag_name == "FIELDSET" ||
136 tag_name == "LI" ||
137 tag_name == "TD" ||
138 tag_name == "TABLE");
141 // Returns the colspan for a <td> / <th>. Defaults to 1.
142 size_t CalculateTableCellColumnSpan(const WebElement& element) {
143 DCHECK(element.hasHTMLTagName("td") || element.hasHTMLTagName("th"));
145 size_t span = 1;
146 if (element.hasAttribute("colspan")) {
147 base::string16 colspan = element.getAttribute("colspan");
148 // Do not check return value to accept imperfect conversions.
149 base::StringToSizeT(colspan, &span);
150 // Handle overflow.
151 if (span == std::numeric_limits<size_t>::max())
152 span = 1;
153 span = std::max(span, static_cast<size_t>(1));
156 return span;
159 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
160 // to a single space. If |force_whitespace| is true, then the resulting string
161 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
162 // result includes a space only if |prefix| has trailing whitespace or |suffix|
163 // has leading whitespace.
164 // A few examples:
165 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
166 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
167 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
168 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
169 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
170 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
171 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
172 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
173 const base::string16 CombineAndCollapseWhitespace(
174 const base::string16& prefix,
175 const base::string16& suffix,
176 bool force_whitespace) {
177 base::string16 prefix_trimmed;
178 base::TrimPositions prefix_trailing_whitespace =
179 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
181 // Recursively compute the children's text.
182 base::string16 suffix_trimmed;
183 base::TrimPositions suffix_leading_whitespace =
184 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
186 if (prefix_trailing_whitespace || suffix_leading_whitespace ||
187 force_whitespace) {
188 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
189 } else {
190 return prefix_trimmed + suffix_trimmed;
194 // This is a helper function for the FindChildText() function (see below).
195 // Search depth is limited with the |depth| parameter.
196 // |divs_to_skip| is a list of <div> tags to ignore if encountered.
197 base::string16 FindChildTextInner(const WebNode& node,
198 int depth,
199 const std::set<WebNode>& divs_to_skip) {
200 if (depth <= 0 || node.isNull())
201 return base::string16();
203 // Skip over comments.
204 if (node.nodeType() == WebNode::CommentNode)
205 return FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
207 if (node.nodeType() != WebNode::ElementNode &&
208 node.nodeType() != WebNode::TextNode)
209 return base::string16();
211 // Ignore elements known not to contain inferable labels.
212 if (node.isElementNode()) {
213 const WebElement element = node.toConst<WebElement>();
214 if (IsOptionElement(element) ||
215 IsScriptElement(element) ||
216 IsNoScriptElement(element) ||
217 (element.isFormControlElement() &&
218 IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
219 return base::string16();
222 if (element.hasHTMLTagName("div") && ContainsKey(divs_to_skip, node))
223 return base::string16();
226 // Extract the text exactly at this node.
227 base::string16 node_text = node.nodeValue();
229 // Recursively compute the children's text.
230 // Preserve inter-element whitespace separation.
231 base::string16 child_text =
232 FindChildTextInner(node.firstChild(), depth - 1, divs_to_skip);
233 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
234 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
236 // Recursively compute the siblings' text.
237 // Again, preserve inter-element whitespace separation.
238 base::string16 sibling_text =
239 FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
240 add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
241 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
243 return node_text;
246 // Same as FindChildText() below, but with a list of div nodes to skip.
247 // TODO(thestig): See if other FindChildText() callers can benefit from this.
248 base::string16 FindChildTextWithIgnoreList(
249 const WebNode& node,
250 const std::set<WebNode>& divs_to_skip) {
251 if (node.isTextNode())
252 return node.nodeValue();
254 WebNode child = node.firstChild();
256 const int kChildSearchDepth = 10;
257 base::string16 node_text =
258 FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
259 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
260 return node_text;
263 // Returns the aggregated values of the descendants of |element| that are
264 // non-empty text nodes. This is a faster alternative to |innerText()| for
265 // performance critical operations. It does a full depth-first search so can be
266 // used when the structure is not directly known. However, unlike with
267 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
268 // Whitespace is trimmed from text accumulated at descendant nodes.
269 base::string16 FindChildText(const WebNode& node) {
270 return FindChildTextWithIgnoreList(node, std::set<WebNode>());
273 // Shared function for InferLabelFromPrevious() and InferLabelFromNext().
274 base::string16 InferLabelFromSibling(const WebFormControlElement& element,
275 bool forward) {
276 base::string16 inferred_label;
277 WebNode sibling = element;
278 while (true) {
279 sibling = forward ? sibling.nextSibling() : sibling.previousSibling();
280 if (sibling.isNull())
281 break;
283 // Skip over comments.
284 WebNode::NodeType node_type = sibling.nodeType();
285 if (node_type == WebNode::CommentNode)
286 continue;
288 // Otherwise, only consider normal HTML elements and their contents.
289 if (node_type != WebNode::TextNode &&
290 node_type != WebNode::ElementNode)
291 break;
293 // A label might be split across multiple "lightweight" nodes.
294 // Coalesce any text contained in multiple consecutive
295 // (a) plain text nodes or
296 // (b) inline HTML elements that are essentially equivalent to text nodes.
297 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
298 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
299 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
300 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
301 if (sibling.isTextNode() ||
302 HasTagName(sibling, kBold) || HasTagName(sibling, kStrong) ||
303 HasTagName(sibling, kSpan) || HasTagName(sibling, kFont)) {
304 base::string16 value = FindChildText(sibling);
305 // A text node's value will be empty if it is for a line break.
306 bool add_space = sibling.isTextNode() && value.empty();
307 inferred_label =
308 CombineAndCollapseWhitespace(value, inferred_label, add_space);
309 continue;
312 // If we have identified a partial label and have reached a non-lightweight
313 // element, consider the label to be complete.
314 base::string16 trimmed_label;
315 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
316 if (!trimmed_label.empty())
317 break;
319 // <img> and <br> tags often appear between the input element and its
320 // label text, so skip over them.
321 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
322 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
323 if (HasTagName(sibling, kImage) || HasTagName(sibling, kBreak))
324 continue;
326 // We only expect <p> and <label> tags to contain the full label text.
327 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
328 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
329 if (HasTagName(sibling, kPage) || HasTagName(sibling, kLabel))
330 inferred_label = FindChildText(sibling);
332 break;
335 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
336 return inferred_label;
339 // Helper for |InferLabelForElement()| that infers a label, if possible, from
340 // a previous sibling of |element|,
341 // e.g. Some Text <input ...>
342 // or Some <span>Text</span> <input ...>
343 // or <p>Some Text</p><input ...>
344 // or <label>Some Text</label> <input ...>
345 // or Some Text <img><input ...>
346 // or <b>Some Text</b><br/> <input ...>.
347 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
348 return InferLabelFromSibling(element, false /* forward? */);
351 // Same as InferLabelFromPrevious(), but in the other direction.
352 // Useful for cases like: <span><input type="checkbox">Label For Checkbox</span>
353 base::string16 InferLabelFromNext(const WebFormControlElement& element) {
354 return InferLabelFromSibling(element, true /* forward? */);
357 // Helper for |InferLabelForElement()| that infers a label, if possible, from
358 // the placeholder text. e.g. <input placeholder="foo">
359 base::string16 InferLabelFromPlaceholder(const WebFormControlElement& element) {
360 CR_DEFINE_STATIC_LOCAL(WebString, kPlaceholder, ("placeholder"));
361 if (element.hasAttribute(kPlaceholder))
362 return element.getAttribute(kPlaceholder);
364 return base::string16();
367 // Helper for |InferLabelForElement()| that infers a label, if possible, from
368 // enclosing list item,
369 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
370 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
371 WebNode parent = element.parentNode();
372 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
373 while (!parent.isNull() && parent.isElementNode() &&
374 !parent.to<WebElement>().hasHTMLTagName(kListItem)) {
375 parent = parent.parentNode();
378 if (!parent.isNull() && HasTagName(parent, kListItem))
379 return FindChildText(parent);
381 return base::string16();
384 // Helper for |InferLabelForElement()| that infers a label, if possible, from
385 // surrounding table structure,
386 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
387 // or <tr><th>Some Text</th><td><input ...></td></tr>
388 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
389 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
390 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
391 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
392 WebNode parent = element.parentNode();
393 while (!parent.isNull() && parent.isElementNode() &&
394 !parent.to<WebElement>().hasHTMLTagName(kTableCell)) {
395 parent = parent.parentNode();
398 if (parent.isNull())
399 return base::string16();
401 // Check all previous siblings, skipping non-element nodes, until we find a
402 // non-empty text block.
403 base::string16 inferred_label;
404 WebNode previous = parent.previousSibling();
405 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
406 while (inferred_label.empty() && !previous.isNull()) {
407 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
408 inferred_label = FindChildText(previous);
410 previous = previous.previousSibling();
413 return inferred_label;
416 // Helper for |InferLabelForElement()| that infers a label, if possible, from
417 // surrounding table structure,
419 // If there are multiple cells and the row with the input matches up with the
420 // previous row, then look for a specific cell within the previous row.
421 // e.g. <tr><td>Input 1 label</td><td>Input 2 label</td></tr>
422 // <tr><td><input name="input 1"></td><td><input name="input2"></td></tr>
424 // Otherwise, just look in the entire previous row.
425 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
426 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
427 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
428 base::string16 inferred_label;
430 // First find the <td> that contains |element|.
431 WebNode cell = element.parentNode();
432 while (!cell.isNull()) {
433 if (cell.isElementNode() &&
434 cell.to<WebElement>().hasHTMLTagName(kTableCell)) {
435 break;
437 cell = cell.parentNode();
440 // Not in a cell - bail out.
441 if (cell.isNull())
442 return inferred_label;
444 // Count the cell holding |element|.
445 size_t cell_count = CalculateTableCellColumnSpan(cell.to<WebElement>());
446 size_t cell_position = 0;
447 size_t cell_position_end = cell_count - 1;
449 // Count cells to the left to figure out |element|'s cell's position.
450 for (WebNode cell_it = cell.previousSibling();
451 !cell_it.isNull();
452 cell_it = cell_it.previousSibling()) {
453 if (cell_it.isElementNode() &&
454 cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) {
455 cell_position += CalculateTableCellColumnSpan(cell_it.to<WebElement>());
459 // Count cells to the right.
460 for (WebNode cell_it = cell.nextSibling();
461 !cell_it.isNull();
462 cell_it = cell_it.nextSibling()) {
463 if (cell_it.isElementNode() &&
464 cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) {
465 cell_count += CalculateTableCellColumnSpan(cell_it.to<WebElement>());
469 // Combine left + right.
470 cell_count += cell_position;
471 cell_position_end += cell_position;
473 // Find the current row.
474 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
475 WebNode parent = element.parentNode();
476 while (!parent.isNull() && parent.isElementNode() &&
477 !parent.to<WebElement>().hasHTMLTagName(kTableRow)) {
478 parent = parent.parentNode();
481 if (parent.isNull())
482 return inferred_label;
484 // Now find the previous row.
485 WebNode row_it = parent.previousSibling();
486 while (!row_it.isNull()) {
487 if (row_it.isElementNode() &&
488 row_it.to<WebElement>().hasHTMLTagName(kTableRow)) {
489 break;
491 row_it = row_it.previousSibling();
494 // If there exists a previous row, check its cells and size. If they align
495 // with the current row, infer the label from the cell above.
496 if (!row_it.isNull()) {
497 WebNode matching_cell;
498 size_t prev_row_count = 0;
499 WebNode prev_row_it = row_it.firstChild();
500 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
501 while (!prev_row_it.isNull()) {
502 if (prev_row_it.isElementNode()) {
503 WebElement prev_row_element = prev_row_it.to<WebElement>();
504 if (prev_row_element.hasHTMLTagName(kTableCell) ||
505 prev_row_element.hasHTMLTagName(kTableHeader)) {
506 size_t span = CalculateTableCellColumnSpan(prev_row_element);
507 size_t prev_row_count_end = prev_row_count + span - 1;
508 if (prev_row_count == cell_position &&
509 prev_row_count_end == cell_position_end) {
510 matching_cell = prev_row_it;
512 prev_row_count += span;
515 prev_row_it = prev_row_it.nextSibling();
517 if ((cell_count == prev_row_count) && !matching_cell.isNull()) {
518 inferred_label = FindChildText(matching_cell);
519 if (!inferred_label.empty())
520 return inferred_label;
524 // If there is no previous row, or if the previous row and current row do not
525 // align, check all previous siblings, skipping non-element nodes, until we
526 // find a non-empty text block.
527 WebNode previous = parent.previousSibling();
528 while (inferred_label.empty() && !previous.isNull()) {
529 if (HasTagName(previous, kTableRow))
530 inferred_label = FindChildText(previous);
532 previous = previous.previousSibling();
535 return inferred_label;
538 // Helper for |InferLabelForElement()| that infers a label, if possible, from
539 // a surrounding div table,
540 // e.g. <div>Some Text<span><input ...></span></div>
541 // e.g. <div>Some Text</div><div><input ...></div>
543 // Because this is already traversing the <div> structure, if it finds a <label>
544 // sibling along the way, infer from that <label>.
545 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
546 WebNode node = element.parentNode();
547 bool looking_for_parent = true;
548 std::set<WebNode> divs_to_skip;
550 // Search the sibling and parent <div>s until we find a candidate label.
551 base::string16 inferred_label;
552 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
553 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
554 while (inferred_label.empty() && !node.isNull()) {
555 if (HasTagName(node, kDiv)) {
556 if (looking_for_parent)
557 inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip);
558 else
559 inferred_label = FindChildText(node);
561 // Avoid sibling DIVs that contain autofillable fields.
562 if (!looking_for_parent && !inferred_label.empty()) {
563 CR_DEFINE_STATIC_LOCAL(WebString, kSelector,
564 ("input, select, textarea"));
565 blink::WebExceptionCode ec = 0;
566 WebElement result_element = node.querySelector(kSelector, ec);
567 if (!result_element.isNull()) {
568 inferred_label.clear();
569 divs_to_skip.insert(node);
573 looking_for_parent = false;
574 } else if (!looking_for_parent && HasTagName(node, kLabel)) {
575 WebLabelElement label_element = node.to<WebLabelElement>();
576 if (label_element.correspondingControl().isNull())
577 inferred_label = FindChildText(node);
578 } else if (looking_for_parent && IsTraversableContainerElement(node)) {
579 // If the element is in a non-div container, its label most likely is too.
580 break;
583 if (node.previousSibling().isNull()) {
584 // If there are no more siblings, continue walking up the tree.
585 looking_for_parent = true;
588 node = looking_for_parent ? node.parentNode() : node.previousSibling();
591 return inferred_label;
594 // Helper for |InferLabelForElement()| that infers a label, if possible, from
595 // a surrounding definition list,
596 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
597 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
598 base::string16 InferLabelFromDefinitionList(
599 const WebFormControlElement& element) {
600 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
601 WebNode parent = element.parentNode();
602 while (!parent.isNull() && parent.isElementNode() &&
603 !parent.to<WebElement>().hasHTMLTagName(kDefinitionData))
604 parent = parent.parentNode();
606 if (parent.isNull() || !HasTagName(parent, kDefinitionData))
607 return base::string16();
609 // Skip by any intervening text nodes.
610 WebNode previous = parent.previousSibling();
611 while (!previous.isNull() && previous.isTextNode())
612 previous = previous.previousSibling();
614 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
615 if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
616 return base::string16();
618 return FindChildText(previous);
621 // Returns the element type for all ancestor nodes in CAPS, starting with the
622 // parent node.
623 std::vector<std::string> AncestorTagNames(
624 const WebFormControlElement& element) {
625 std::vector<std::string> tag_names;
626 for (WebNode parent_node = element.parentNode();
627 !parent_node.isNull();
628 parent_node = parent_node.parentNode()) {
629 if (!parent_node.isElementNode())
630 continue;
632 tag_names.push_back(parent_node.to<WebElement>().tagName().utf8());
634 return tag_names;
637 // Infers corresponding label for |element| from surrounding context in the DOM,
638 // e.g. the contents of the preceding <p> tag or text element.
639 base::string16 InferLabelForElement(const WebFormControlElement& element) {
640 base::string16 inferred_label;
641 if (IsCheckableElement(toWebInputElement(&element))) {
642 inferred_label = InferLabelFromNext(element);
643 if (!inferred_label.empty())
644 return inferred_label;
647 inferred_label = InferLabelFromPrevious(element);
648 if (!inferred_label.empty())
649 return inferred_label;
651 // If we didn't find a label, check for placeholder text.
652 inferred_label = InferLabelFromPlaceholder(element);
653 if (!inferred_label.empty())
654 return inferred_label;
656 // For all other searches that involve traversing up the tree, the search
657 // order is based on which tag is the closest ancestor to |element|.
658 std::vector<std::string> tag_names = AncestorTagNames(element);
659 std::set<std::string> seen_tag_names;
660 for (const std::string& tag_name : tag_names) {
661 if (ContainsKey(seen_tag_names, tag_name))
662 continue;
664 seen_tag_names.insert(tag_name);
665 if (tag_name == "DIV") {
666 inferred_label = InferLabelFromDivTable(element);
667 } else if (tag_name == "TD") {
668 inferred_label = InferLabelFromTableColumn(element);
669 if (inferred_label.empty())
670 inferred_label = InferLabelFromTableRow(element);
671 } else if (tag_name == "DD") {
672 inferred_label = InferLabelFromDefinitionList(element);
673 } else if (tag_name == "LI") {
674 inferred_label = InferLabelFromListItem(element);
675 } else if (tag_name == "FIELDSET") {
676 break;
679 if (!inferred_label.empty())
680 break;
683 return inferred_label;
686 // Fills |option_strings| with the values of the <option> elements present in
687 // |select_element|.
688 void GetOptionStringsFromElement(const WebSelectElement& select_element,
689 std::vector<base::string16>* option_values,
690 std::vector<base::string16>* option_contents) {
691 DCHECK(!select_element.isNull());
693 option_values->clear();
694 option_contents->clear();
695 WebVector<WebElement> list_items = select_element.listItems();
697 // Constrain the maximum list length to prevent a malicious site from DOS'ing
698 // the browser, without entirely breaking autocomplete for some extreme
699 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
700 if (list_items.size() > kMaxListSize)
701 return;
703 option_values->reserve(list_items.size());
704 option_contents->reserve(list_items.size());
705 for (size_t i = 0; i < list_items.size(); ++i) {
706 if (IsOptionElement(list_items[i])) {
707 const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
708 option_values->push_back(option.value());
709 option_contents->push_back(option.text());
714 // The callback type used by |ForEachMatchingFormField()|.
715 typedef void (*Callback)(const FormFieldData&,
716 bool, /* is_initiating_element */
717 blink::WebFormControlElement*);
719 void ForEachMatchingFormFieldCommon(
720 std::vector<WebFormControlElement>* control_elements,
721 const WebElement& initiating_element,
722 const FormData& data,
723 FieldFilterMask filters,
724 bool force_override,
725 const Callback& callback) {
726 DCHECK(control_elements);
727 if (control_elements->size() != data.fields.size()) {
728 // This case should be reachable only for pathological websites and tests,
729 // which add or remove form fields while the user is interacting with the
730 // Autofill popup.
731 return;
734 // It's possible that the site has injected fields into the form after the
735 // page has loaded, so we can't assert that the size of the cached control
736 // elements is equal to the size of the fields in |form|. Fortunately, the
737 // one case in the wild where this happens, paypal.com signup form, the fields
738 // are appended to the end of the form and are not visible.
739 for (size_t i = 0; i < control_elements->size(); ++i) {
740 WebFormControlElement* element = &(*control_elements)[i];
742 if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
743 // This case should be reachable only for pathological websites, which
744 // rename form fields while the user is interacting with the Autofill
745 // popup. I (isherman) am not aware of any such websites, and so am
746 // optimistically including a NOTREACHED(). If you ever trip this check,
747 // please file a bug against me.
748 NOTREACHED();
749 continue;
752 bool is_initiating_element = (*element == initiating_element);
754 // Only autofill empty fields and the field that initiated the filling,
755 // i.e. the field the user is currently editing and interacting with.
756 const WebInputElement* input_element = toWebInputElement(element);
757 if (!force_override && !is_initiating_element &&
758 ((IsAutofillableInputElement(input_element) ||
759 IsTextAreaElement(*element)) &&
760 !element->value().isEmpty()))
761 continue;
763 if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
764 ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
765 ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
766 continue;
768 callback(data.fields[i], is_initiating_element, element);
772 // For each autofillable field in |data| that matches a field in the |form|,
773 // the |callback| is invoked with the corresponding |form| field data.
774 void ForEachMatchingFormField(const WebFormElement& form_element,
775 const WebElement& initiating_element,
776 const FormData& data,
777 FieldFilterMask filters,
778 bool force_override,
779 const Callback& callback) {
780 std::vector<WebFormControlElement> control_elements =
781 ExtractAutofillableElementsInForm(form_element);
782 ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data,
783 filters, force_override, callback);
786 // For each autofillable field in |data| that matches a field in the set of
787 // unowned autofillable form fields, the |callback| is invoked with the
788 // corresponding |data| field.
789 void ForEachMatchingUnownedFormField(const WebElement& initiating_element,
790 const FormData& data,
791 FieldFilterMask filters,
792 bool force_override,
793 const Callback& callback) {
794 if (initiating_element.isNull())
795 return;
797 std::vector<WebFormControlElement> control_elements =
798 GetUnownedAutofillableFormFieldElements(
799 initiating_element.document().all(), nullptr);
800 if (!IsElementInControlElementSet(initiating_element, control_elements))
801 return;
803 ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data,
804 filters, force_override, callback);
807 // Sets the |field|'s value to the value in |data|.
808 // Also sets the "autofilled" attribute, causing the background to be yellow.
809 void FillFormField(const FormFieldData& data,
810 bool is_initiating_node,
811 blink::WebFormControlElement* field) {
812 // Nothing to fill.
813 if (data.value.empty())
814 return;
816 if (!data.is_autofilled)
817 return;
819 WebInputElement* input_element = toWebInputElement(field);
820 if (IsCheckableElement(input_element)) {
821 input_element->setChecked(data.is_checked, true);
822 } else {
823 base::string16 value = data.value;
824 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
825 // If the maxlength attribute contains a negative value, maxLength()
826 // returns the default maxlength value.
827 TruncateString(&value, input_element->maxLength());
829 field->setValue(value, true);
832 field->setAutofilled(true);
834 if (is_initiating_node &&
835 ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
836 IsTextAreaElement(*field))) {
837 int length = field->value().length();
838 field->setSelectionRange(length, length);
839 // Clear the current IME composition (the underline), if there is one.
840 field->document().frame()->unmarkText();
844 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
845 // Also sets the "autofilled" attribute, causing the background to be yellow.
846 void PreviewFormField(const FormFieldData& data,
847 bool is_initiating_node,
848 blink::WebFormControlElement* field) {
849 // Nothing to preview.
850 if (data.value.empty())
851 return;
853 if (!data.is_autofilled)
854 return;
856 // Preview input, textarea and select fields. For input fields, excludes
857 // checkboxes and radio buttons, as there is no provision for
858 // setSuggestedCheckedValue in WebInputElement.
859 WebInputElement* input_element = toWebInputElement(field);
860 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
861 // If the maxlength attribute contains a negative value, maxLength()
862 // returns the default maxlength value.
863 input_element->setSuggestedValue(
864 data.value.substr(0, input_element->maxLength()));
865 input_element->setAutofilled(true);
866 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
867 field->setSuggestedValue(data.value);
868 field->setAutofilled(true);
871 if (is_initiating_node &&
872 (IsTextInput(input_element) || IsTextAreaElement(*field))) {
873 // Select the part of the text that the user didn't type.
874 int start = field->value().length();
875 int end = field->suggestedValue().length();
876 field->setSelectionRange(start, end);
880 // Recursively checks whether |node| or any of its children have a non-empty
881 // bounding box. The recursion depth is bounded by |depth|.
882 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
883 if (depth < 0)
884 return false;
885 if (node.hasNonEmptyBoundingBox())
886 return true;
888 // The childNodes method is not a const method. Therefore it cannot be called
889 // on a const reference. Therefore we need a const cast.
890 const blink::WebNodeList& children =
891 const_cast<blink::WebNode&>(node).childNodes();
892 size_t length = children.length();
893 for (size_t i = 0; i < length; ++i) {
894 const blink::WebNode& item = children.item(i);
895 if (IsWebNodeVisibleImpl(item, depth - 1))
896 return true;
898 return false;
901 // Extracts the fields from |control_elements| with |extract_mask| to
902 // |form_fields|. The extracted fields are also placed in |element_map|.
903 // |form_fields| and |element_map| should start out empty.
904 // |fields_extracted| should have as many elements as |control_elements|,
905 // initialized to false.
906 // Returns true if the number of fields extracted is within
907 // [1, kMaxParseableFields].
908 bool ExtractFieldsFromControlElements(
909 const WebVector<WebFormControlElement>& control_elements,
910 ExtractMask extract_mask,
911 ScopedVector<FormFieldData>* form_fields,
912 std::vector<bool>* fields_extracted,
913 std::map<WebFormControlElement, FormFieldData*>* element_map) {
914 DCHECK(form_fields->empty());
915 DCHECK(element_map->empty());
916 DCHECK_EQ(control_elements.size(), fields_extracted->size());
918 for (size_t i = 0; i < control_elements.size(); ++i) {
919 const WebFormControlElement& control_element = control_elements[i];
921 if (!IsAutofillableElement(control_element))
922 continue;
924 // Create a new FormFieldData, fill it out and map it to the field's name.
925 FormFieldData* form_field = new FormFieldData;
926 WebFormControlElementToFormField(control_element, extract_mask, form_field);
927 form_fields->push_back(form_field);
928 (*element_map)[control_element] = form_field;
929 (*fields_extracted)[i] = true;
931 // To avoid overly expensive computation, we impose a maximum number of
932 // allowable fields.
933 if (form_fields->size() > kMaxParseableFields)
934 return false;
937 // Succeeded if fields were extracted.
938 return !form_fields->empty();
941 // For each label element, get the corresponding form control element, use the
942 // form control element's name as a key into the
943 // <WebFormControlElement, FormFieldData> map to find the previously created
944 // FormFieldData and set the FormFieldData's label to the
945 // label.firstChild().nodeValue() of the label element.
946 void MatchLabelsAndFields(
947 const WebElementCollection& labels,
948 std::map<WebFormControlElement, FormFieldData*>* element_map) {
949 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
950 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
952 for (WebElement item = labels.firstItem(); !item.isNull();
953 item = labels.nextItem()) {
954 WebLabelElement label = item.to<WebLabelElement>();
955 WebFormControlElement field_element =
956 label.correspondingControl().to<WebFormControlElement>();
957 FormFieldData* field_data = nullptr;
959 if (field_element.isNull()) {
960 // Sometimes site authors will incorrectly specify the corresponding
961 // field element's name rather than its id, so we compensate here.
962 base::string16 element_name = label.getAttribute(kFor);
963 if (element_name.empty())
964 continue;
965 // Look through the list for elements with this name. There can actually
966 // be more than one. In this case, the label may not be particularly
967 // useful, so just discard it.
968 for (const auto& iter : *element_map) {
969 if (iter.second->name == element_name) {
970 if (field_data) {
971 field_data = nullptr;
972 break;
973 } else {
974 field_data = iter.second;
978 } else if (!field_element.isFormControlElement() ||
979 field_element.formControlType() == kHidden) {
980 continue;
981 } else {
982 // Typical case: look up |field_data| in |element_map|.
983 auto iter = element_map->find(field_element);
984 if (iter == element_map->end())
985 continue;
986 field_data = iter->second;
989 if (!field_data)
990 continue;
992 base::string16 label_text = FindChildText(label);
994 // Concatenate labels because some sites might have multiple label
995 // candidates.
996 if (!field_data->label.empty() && !label_text.empty())
997 field_data->label += base::ASCIIToUTF16(" ");
998 field_data->label += label_text;
1002 // Common function shared by WebFormElementToFormData() and
1003 // UnownedFormElementsAndFieldSetsToFormData(). Either pass in:
1004 // 1) |form_element| and an empty |fieldsets|.
1005 // or
1006 // 2) a NULL |form_element|.
1008 // If |field| is not NULL, then |form_control_element| should be not NULL.
1009 bool FormOrFieldsetsToFormData(
1010 const blink::WebFormElement* form_element,
1011 const blink::WebFormControlElement* form_control_element,
1012 const std::vector<blink::WebElement>& fieldsets,
1013 const WebVector<WebFormControlElement>& control_elements,
1014 ExtractMask extract_mask,
1015 FormData* form,
1016 FormFieldData* field) {
1017 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
1019 if (form_element)
1020 DCHECK(fieldsets.empty());
1021 if (field)
1022 DCHECK(form_control_element);
1024 // A map from a FormFieldData's name to the FormFieldData itself.
1025 std::map<WebFormControlElement, FormFieldData*> element_map;
1027 // The extracted FormFields. We use pointers so we can store them in
1028 // |element_map|.
1029 ScopedVector<FormFieldData> form_fields;
1031 // A vector of bools that indicate whether each field in the form meets the
1032 // requirements and thus will be in the resulting |form|.
1033 std::vector<bool> fields_extracted(control_elements.size(), false);
1035 if (!ExtractFieldsFromControlElements(control_elements, extract_mask,
1036 &form_fields, &fields_extracted,
1037 &element_map)) {
1038 return false;
1041 if (form_element) {
1042 // Loop through the label elements inside the form element. For each label
1043 // element, get the corresponding form control element, use the form control
1044 // element's name as a key into the <name, FormFieldData> map to find the
1045 // previously created FormFieldData and set the FormFieldData's label to the
1046 // label.firstChild().nodeValue() of the label element.
1047 WebElementCollection labels =
1048 form_element->getElementsByHTMLTagName(kLabel);
1049 DCHECK(!labels.isNull());
1050 MatchLabelsAndFields(labels, &element_map);
1051 } else {
1052 // Same as the if block, but for all the labels in fieldsets.
1053 for (size_t i = 0; i < fieldsets.size(); ++i) {
1054 WebElementCollection labels =
1055 fieldsets[i].getElementsByHTMLTagName(kLabel);
1056 DCHECK(!labels.isNull());
1057 MatchLabelsAndFields(labels, &element_map);
1061 // Loop through the form control elements, extracting the label text from
1062 // the DOM. We use the |fields_extracted| vector to make sure we assign the
1063 // extracted label to the correct field, as it's possible |form_fields| will
1064 // not contain all of the elements in |control_elements|.
1065 for (size_t i = 0, field_idx = 0;
1066 i < control_elements.size() && field_idx < form_fields.size(); ++i) {
1067 // This field didn't meet the requirements, so don't try to find a label
1068 // for it.
1069 if (!fields_extracted[i])
1070 continue;
1072 const WebFormControlElement& control_element = control_elements[i];
1073 if (form_fields[field_idx]->label.empty())
1074 form_fields[field_idx]->label = InferLabelForElement(control_element);
1075 TruncateString(&form_fields[field_idx]->label, kMaxDataLength);
1077 if (field && *form_control_element == control_element)
1078 *field = *form_fields[field_idx];
1080 ++field_idx;
1083 // Copy the created FormFields into the resulting FormData object.
1084 for (const auto& iter : form_fields)
1085 form->fields.push_back(*iter);
1086 return true;
1089 } // namespace
1091 const size_t kMaxParseableFields = 200;
1093 bool IsMonthInput(const WebInputElement* element) {
1094 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
1095 return element && !element->isNull() && element->formControlType() == kMonth;
1098 // All text fields, including password fields, should be extracted.
1099 bool IsTextInput(const WebInputElement* element) {
1100 return element && !element->isNull() && element->isTextField();
1103 bool IsSelectElement(const WebFormControlElement& element) {
1104 // Static for improved performance.
1105 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
1106 return !element.isNull() && element.formControlType() == kSelectOne;
1109 bool IsTextAreaElement(const WebFormControlElement& element) {
1110 // Static for improved performance.
1111 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
1112 return !element.isNull() && element.formControlType() == kTextArea;
1115 bool IsCheckableElement(const WebInputElement* element) {
1116 if (!element || element->isNull())
1117 return false;
1119 return element->isCheckbox() || element->isRadioButton();
1122 bool IsAutofillableInputElement(const WebInputElement* element) {
1123 return IsTextInput(element) ||
1124 IsMonthInput(element) ||
1125 IsCheckableElement(element);
1128 const base::string16 GetFormIdentifier(const WebFormElement& form) {
1129 base::string16 identifier = form.name();
1130 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
1131 if (identifier.empty())
1132 identifier = form.getAttribute(kId);
1134 return identifier;
1137 bool IsWebNodeVisible(const blink::WebNode& node) {
1138 // In the bug http://crbug.com/237216 the form's bounding box is empty
1139 // however the form has non empty children. Thus we need to look at the
1140 // form's children.
1141 int kNodeSearchDepth = 2;
1142 return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
1145 std::vector<blink::WebFormControlElement> ExtractAutofillableElementsFromSet(
1146 const WebVector<WebFormControlElement>& control_elements) {
1147 std::vector<blink::WebFormControlElement> autofillable_elements;
1148 for (size_t i = 0; i < control_elements.size(); ++i) {
1149 WebFormControlElement element = control_elements[i];
1150 if (!IsAutofillableElement(element))
1151 continue;
1153 autofillable_elements.push_back(element);
1155 return autofillable_elements;
1158 std::vector<WebFormControlElement> ExtractAutofillableElementsInForm(
1159 const WebFormElement& form_element) {
1160 WebVector<WebFormControlElement> control_elements;
1161 form_element.getFormControlElements(control_elements);
1163 return ExtractAutofillableElementsFromSet(control_elements);
1166 void WebFormControlElementToFormField(const WebFormControlElement& element,
1167 ExtractMask extract_mask,
1168 FormFieldData* field) {
1169 DCHECK(field);
1170 DCHECK(!element.isNull());
1171 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
1172 CR_DEFINE_STATIC_LOCAL(WebString, kRole, ("role"));
1174 // The label is not officially part of a WebFormControlElement; however, the
1175 // labels for all form control elements are scraped from the DOM and set in
1176 // WebFormElementToFormData.
1177 field->name = element.nameForAutofill();
1178 field->form_control_type = element.formControlType().utf8();
1179 field->autocomplete_attribute = element.getAttribute(kAutocomplete).utf8();
1180 if (field->autocomplete_attribute.size() > kMaxDataLength) {
1181 // Discard overly long attribute values to avoid DOS-ing the browser
1182 // process. However, send over a default string to indicate that the
1183 // attribute was present.
1184 field->autocomplete_attribute = "x-max-data-length-exceeded";
1186 if (base::LowerCaseEqualsASCII(element.getAttribute(kRole), "presentation"))
1187 field->role = FormFieldData::ROLE_ATTRIBUTE_PRESENTATION;
1189 if (!IsAutofillableElement(element))
1190 return;
1192 const WebInputElement* input_element = toWebInputElement(&element);
1193 if (IsAutofillableInputElement(input_element) ||
1194 IsTextAreaElement(element)) {
1195 field->is_autofilled = element.isAutofilled();
1196 field->is_focusable = element.isFocusable();
1197 field->should_autocomplete = element.autoComplete();
1198 field->text_direction = element.directionForFormData() ==
1199 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
1202 if (IsAutofillableInputElement(input_element)) {
1203 if (IsTextInput(input_element))
1204 field->max_length = input_element->maxLength();
1206 field->is_checkable = IsCheckableElement(input_element);
1207 field->is_checked = input_element->isChecked();
1208 } else if (IsTextAreaElement(element)) {
1209 // Nothing more to do in this case.
1210 } else if (extract_mask & EXTRACT_OPTIONS) {
1211 // Set option strings on the field if available.
1212 DCHECK(IsSelectElement(element));
1213 const WebSelectElement select_element = element.toConst<WebSelectElement>();
1214 GetOptionStringsFromElement(select_element,
1215 &field->option_values,
1216 &field->option_contents);
1219 if (!(extract_mask & EXTRACT_VALUE))
1220 return;
1222 base::string16 value = element.value();
1224 if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) {
1225 const WebSelectElement select_element = element.toConst<WebSelectElement>();
1226 // Convert the |select_element| value to text if requested.
1227 WebVector<WebElement> list_items = select_element.listItems();
1228 for (size_t i = 0; i < list_items.size(); ++i) {
1229 if (IsOptionElement(list_items[i])) {
1230 const WebOptionElement option_element =
1231 list_items[i].toConst<WebOptionElement>();
1232 if (option_element.value() == value) {
1233 value = option_element.text();
1234 break;
1240 // Constrain the maximum data length to prevent a malicious site from DOS'ing
1241 // the browser: http://crbug.com/49332
1242 TruncateString(&value, kMaxDataLength);
1244 field->value = value;
1247 bool WebFormElementToFormData(
1248 const blink::WebFormElement& form_element,
1249 const blink::WebFormControlElement& form_control_element,
1250 ExtractMask extract_mask,
1251 FormData* form,
1252 FormFieldData* field) {
1253 const WebFrame* frame = form_element.document().frame();
1254 if (!frame)
1255 return false;
1257 form->name = GetFormIdentifier(form_element);
1258 form->origin = frame->document().url();
1259 form->action = frame->document().completeURL(form_element.action());
1260 form->user_submitted = form_element.wasUserSubmitted();
1262 // If the completed URL is not valid, just use the action we get from
1263 // WebKit.
1264 if (!form->action.is_valid())
1265 form->action = GURL(form_element.action());
1267 WebVector<WebFormControlElement> control_elements;
1268 form_element.getFormControlElements(control_elements);
1270 std::vector<blink::WebElement> dummy_fieldset;
1271 return FormOrFieldsetsToFormData(&form_element, &form_control_element,
1272 dummy_fieldset, control_elements,
1273 extract_mask, form, field);
1276 std::vector<WebFormControlElement>
1277 GetUnownedAutofillableFormFieldElements(
1278 const WebElementCollection& elements,
1279 std::vector<WebElement>* fieldsets) {
1280 std::vector<WebFormControlElement> unowned_fieldset_children;
1281 for (WebElement element = elements.firstItem();
1282 !element.isNull();
1283 element = elements.nextItem()) {
1284 if (element.isFormControlElement()) {
1285 WebFormControlElement control = element.to<WebFormControlElement>();
1286 if (control.form().isNull())
1287 unowned_fieldset_children.push_back(control);
1290 if (fieldsets && element.hasHTMLTagName("fieldset") &&
1291 !IsElementInsideFormOrFieldSet(element)) {
1292 fieldsets->push_back(element);
1295 return ExtractAutofillableElementsFromSet(unowned_fieldset_children);
1298 bool UnownedFormElementsAndFieldSetsToFormData(
1299 const std::vector<blink::WebElement>& fieldsets,
1300 const std::vector<blink::WebFormControlElement>& control_elements,
1301 const blink::WebFormControlElement* element,
1302 const blink::WebDocument& document,
1303 ExtractMask extract_mask,
1304 FormData* form,
1305 FormFieldData* field) {
1306 // Only attempt formless Autofill on checkout flows. This avoids the many
1307 // false positives found on the non-checkout web. See http://crbug.com/462375
1308 // For now this early abort only applies to English-language pages, because
1309 // the regex is not translated. Note that an empty "lang" attribute counts as
1310 // English. A potential problem is that this only checks document.title(), but
1311 // should actually check the main frame's title. Thus it may make bad
1312 // decisions for iframes.
1313 WebElement html_element = document.documentElement();
1314 std::string lang;
1315 if (!html_element.isNull())
1316 lang = html_element.getAttribute("lang").utf8();
1317 if ((lang.empty() || base::StartsWithASCII(lang, "en", false)) &&
1318 !MatchesPattern(document.title(),
1319 base::UTF8ToUTF16("payment|checkout|address|delivery|shipping"))) {
1320 return false;
1323 form->origin = document.url();
1324 form->user_submitted = false;
1325 form->is_form_tag = false;
1327 return FormOrFieldsetsToFormData(nullptr, element, fieldsets,
1328 control_elements, extract_mask, form, field);
1331 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
1332 FormData* form,
1333 FormFieldData* field) {
1334 if (!IsAutofillableElement(element))
1335 return false;
1337 ExtractMask extract_mask =
1338 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
1339 const WebFormElement form_element = element.form();
1340 if (form_element.isNull()) {
1341 // No associated form, try the synthetic form for unowned form elements.
1342 WebDocument document = element.document();
1343 std::vector<WebElement> fieldsets;
1344 std::vector<WebFormControlElement> control_elements =
1345 GetUnownedAutofillableFormFieldElements(document.all(), &fieldsets);
1346 return UnownedFormElementsAndFieldSetsToFormData(
1347 fieldsets, control_elements, &element, document, extract_mask,
1348 form, field);
1351 return WebFormElementToFormData(form_element,
1352 element,
1353 extract_mask,
1354 form,
1355 field);
1358 void FillForm(const FormData& form, const WebFormControlElement& element) {
1359 WebFormElement form_element = element.form();
1360 if (form_element.isNull()) {
1361 ForEachMatchingUnownedFormField(element,
1362 form,
1363 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1364 false, /* dont force override */
1365 &FillFormField);
1366 return;
1369 ForEachMatchingFormField(form_element,
1370 element,
1371 form,
1372 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1373 false, /* dont force override */
1374 &FillFormField);
1377 void FillFormIncludingNonFocusableElements(const FormData& form_data,
1378 const WebFormElement& form_element) {
1379 if (form_element.isNull()) {
1380 NOTREACHED();
1381 return;
1384 FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
1385 FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
1386 ForEachMatchingFormField(form_element,
1387 WebInputElement(),
1388 form_data,
1389 filter_mask,
1390 true, /* force override */
1391 &FillFormField);
1394 void PreviewForm(const FormData& form, const WebFormControlElement& element) {
1395 WebFormElement form_element = element.form();
1396 if (form_element.isNull()) {
1397 ForEachMatchingUnownedFormField(element,
1398 form,
1399 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1400 false, /* dont force override */
1401 &PreviewFormField);
1402 return;
1405 ForEachMatchingFormField(form_element,
1406 element,
1407 form,
1408 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1409 false, /* dont force override */
1410 &PreviewFormField);
1413 bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
1414 bool was_autofilled) {
1415 WebFormElement form_element = element.form();
1416 std::vector<WebFormControlElement> control_elements;
1417 if (form_element.isNull()) {
1418 control_elements = GetUnownedAutofillableFormFieldElements(
1419 element.document().all(), nullptr);
1420 if (!IsElementInControlElementSet(element, control_elements))
1421 return false;
1422 } else {
1423 control_elements = ExtractAutofillableElementsInForm(form_element);
1426 for (size_t i = 0; i < control_elements.size(); ++i) {
1427 // There might be unrelated elements in this form which have already been
1428 // auto-filled. For example, the user might have already filled the address
1429 // part of a form and now be dealing with the credit card section. We only
1430 // want to reset the auto-filled status for fields that were previewed.
1431 WebFormControlElement control_element = control_elements[i];
1433 // Only text input, textarea and select elements can be previewed.
1434 WebInputElement* input_element = toWebInputElement(&control_element);
1435 if (!IsTextInput(input_element) &&
1436 !IsMonthInput(input_element) &&
1437 !IsTextAreaElement(control_element) &&
1438 !IsSelectElement(control_element))
1439 continue;
1441 // If the element is not auto-filled, we did not preview it,
1442 // so there is nothing to reset.
1443 if (!control_element.isAutofilled())
1444 continue;
1446 if ((IsTextInput(input_element) ||
1447 IsMonthInput(input_element) ||
1448 IsTextAreaElement(control_element) ||
1449 IsSelectElement(control_element)) &&
1450 control_element.suggestedValue().isEmpty())
1451 continue;
1453 // Clear the suggested value. For the initiating node, also restore the
1454 // original value.
1455 if (IsTextInput(input_element) || IsMonthInput(input_element) ||
1456 IsTextAreaElement(control_element)) {
1457 control_element.setSuggestedValue(WebString());
1458 bool is_initiating_node = (element == control_element);
1459 if (is_initiating_node) {
1460 control_element.setAutofilled(was_autofilled);
1461 // Clearing the suggested value in the focused node (above) can cause
1462 // selection to be lost. We force selection range to restore the text
1463 // cursor.
1464 int length = control_element.value().length();
1465 control_element.setSelectionRange(length, length);
1466 } else {
1467 control_element.setAutofilled(false);
1469 } else if (IsSelectElement(control_element)) {
1470 control_element.setSuggestedValue(WebString());
1471 control_element.setAutofilled(false);
1475 return true;
1478 bool IsWebpageEmpty(const blink::WebFrame* frame) {
1479 blink::WebDocument document = frame->document();
1481 return IsWebElementEmpty(document.head()) &&
1482 IsWebElementEmpty(document.body());
1485 bool IsWebElementEmpty(const blink::WebElement& element) {
1486 // This array contains all tags which can be present in an empty page.
1487 const char* const kAllowedValue[] = {
1488 "script",
1489 "meta",
1490 "title",
1492 const size_t kAllowedValueLength = arraysize(kAllowedValue);
1494 if (element.isNull())
1495 return true;
1496 // The childNodes method is not a const method. Therefore it cannot be called
1497 // on a const reference. Therefore we need a const cast.
1498 const blink::WebNodeList& children =
1499 const_cast<blink::WebElement&>(element).childNodes();
1500 for (size_t i = 0; i < children.length(); ++i) {
1501 const blink::WebNode& item = children.item(i);
1503 if (item.isTextNode() &&
1504 !base::ContainsOnlyChars(item.nodeValue().utf8(),
1505 base::kWhitespaceASCII))
1506 return false;
1508 // We ignore all other items with names which begin with
1509 // the character # because they are not html tags.
1510 if (item.nodeName().utf8()[0] == '#')
1511 continue;
1513 bool tag_is_allowed = false;
1514 // Test if the item name is in the kAllowedValue array
1515 for (size_t allowed_value_index = 0;
1516 allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1517 if (HasTagName(item,
1518 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1519 tag_is_allowed = true;
1520 break;
1523 if (!tag_is_allowed)
1524 return false;
1526 return true;
1529 gfx::RectF GetScaledBoundingBox(float scale, WebElement* element) {
1530 gfx::Rect bounding_box(element->boundsInViewportSpace());
1531 return gfx::RectF(bounding_box.x() * scale,
1532 bounding_box.y() * scale,
1533 bounding_box.width() * scale,
1534 bounding_box.height() * scale);
1537 } // namespace autofill