Revert "Fix broken channel icon in chrome://help on CrOS" and try again
[chromium-blink-merge.git] / components / autofill / content / renderer / form_autofill_util.cc
blobe2ccac055e098ea370ada709df8f0862b3a198f2
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/form_autofill_util.h"
7 #include <map>
8 #include <set>
10 #include "base/command_line.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_vector.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "components/autofill/core/common/autofill_data_validation.h"
17 #include "components/autofill/core/common/autofill_regexes.h"
18 #include "components/autofill/core/common/autofill_switches.h"
19 #include "components/autofill/core/common/autofill_util.h"
20 #include "components/autofill/core/common/form_data.h"
21 #include "components/autofill/core/common/form_field_data.h"
22 #include "third_party/WebKit/public/platform/WebString.h"
23 #include "third_party/WebKit/public/platform/WebVector.h"
24 #include "third_party/WebKit/public/web/WebDocument.h"
25 #include "third_party/WebKit/public/web/WebElement.h"
26 #include "third_party/WebKit/public/web/WebElementCollection.h"
27 #include "third_party/WebKit/public/web/WebFormControlElement.h"
28 #include "third_party/WebKit/public/web/WebFormElement.h"
29 #include "third_party/WebKit/public/web/WebInputElement.h"
30 #include "third_party/WebKit/public/web/WebLabelElement.h"
31 #include "third_party/WebKit/public/web/WebLocalFrame.h"
32 #include "third_party/WebKit/public/web/WebNode.h"
33 #include "third_party/WebKit/public/web/WebNodeList.h"
34 #include "third_party/WebKit/public/web/WebOptionElement.h"
35 #include "third_party/WebKit/public/web/WebSelectElement.h"
36 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
38 using blink::WebDocument;
39 using blink::WebElement;
40 using blink::WebElementCollection;
41 using blink::WebFormControlElement;
42 using blink::WebFormElement;
43 using blink::WebFrame;
44 using blink::WebInputElement;
45 using blink::WebLabelElement;
46 using blink::WebNode;
47 using blink::WebNodeList;
48 using blink::WebOptionElement;
49 using blink::WebSelectElement;
50 using blink::WebTextAreaElement;
51 using blink::WebString;
52 using blink::WebVector;
54 namespace autofill {
55 namespace {
57 // A bit field mask for FillForm functions to not fill some fields.
58 enum FieldFilterMask {
59 FILTER_NONE = 0,
60 FILTER_DISABLED_ELEMENTS = 1 << 0,
61 FILTER_READONLY_ELEMENTS = 1 << 1,
62 FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2,
63 FILTER_ALL_NON_EDITABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
64 FILTER_READONLY_ELEMENTS |
65 FILTER_NON_FOCUSABLE_ELEMENTS,
68 void TruncateString(base::string16* str, size_t max_length) {
69 if (str->length() > max_length)
70 str->resize(max_length);
73 bool IsOptionElement(const WebElement& element) {
74 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
75 return element.hasHTMLTagName(kOption);
78 bool IsScriptElement(const WebElement& element) {
79 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
80 return element.hasHTMLTagName(kScript);
83 bool IsNoScriptElement(const WebElement& element) {
84 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
85 return element.hasHTMLTagName(kNoScript);
88 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
89 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
92 bool IsAutofillableElement(const WebFormControlElement& element) {
93 const WebInputElement* input_element = toWebInputElement(&element);
94 return IsAutofillableInputElement(input_element) ||
95 IsSelectElement(element) ||
96 IsTextAreaElement(element);
99 bool IsElementInControlElementSet(
100 const WebElement& element,
101 const std::vector<WebFormControlElement>& control_elements) {
102 if (!element.isFormControlElement())
103 return false;
104 const WebFormControlElement form_control_element =
105 element.toConst<WebFormControlElement>();
106 return std::find(control_elements.begin(),
107 control_elements.end(),
108 form_control_element) != control_elements.end();
111 bool IsElementInsideFormOrFieldSet(const WebElement& element) {
112 for (WebNode parent_node = element.parentNode();
113 !parent_node.isNull();
114 parent_node = parent_node.parentNode()) {
115 if (!parent_node.isElementNode())
116 continue;
118 WebElement cur_element = parent_node.to<WebElement>();
119 if (cur_element.hasHTMLTagName("form") ||
120 cur_element.hasHTMLTagName("fieldset")) {
121 return true;
124 return false;
127 // Returns true if |node| is an element and it is a container type that
128 // InferLabelForElement() can traverse.
129 bool IsTraversableContainerElement(const WebNode& node) {
130 if (!node.isElementNode())
131 return false;
133 std::string tag_name = node.toConst<WebElement>().tagName().utf8();
134 return (tag_name == "DD" ||
135 tag_name == "DIV" ||
136 tag_name == "FIELDSET" ||
137 tag_name == "LI" ||
138 tag_name == "TD" ||
139 tag_name == "TABLE");
142 // Returns the colspan for a <td> / <th>. Defaults to 1.
143 size_t CalculateTableCellColumnSpan(const WebElement& element) {
144 DCHECK(element.hasHTMLTagName("td") || element.hasHTMLTagName("th"));
146 size_t span = 1;
147 if (element.hasAttribute("colspan")) {
148 base::string16 colspan = element.getAttribute("colspan");
149 // Do not check return value to accept imperfect conversions.
150 base::StringToSizeT(colspan, &span);
151 // Handle overflow.
152 if (span == std::numeric_limits<size_t>::max())
153 span = 1;
154 span = std::max(span, static_cast<size_t>(1));
157 return span;
160 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
161 // to a single space. If |force_whitespace| is true, then the resulting string
162 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
163 // result includes a space only if |prefix| has trailing whitespace or |suffix|
164 // has leading whitespace.
165 // A few examples:
166 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
167 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
168 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
169 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
170 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
171 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
172 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
173 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
174 const base::string16 CombineAndCollapseWhitespace(
175 const base::string16& prefix,
176 const base::string16& suffix,
177 bool force_whitespace) {
178 base::string16 prefix_trimmed;
179 base::TrimPositions prefix_trailing_whitespace =
180 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
182 // Recursively compute the children's text.
183 base::string16 suffix_trimmed;
184 base::TrimPositions suffix_leading_whitespace =
185 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
187 if (prefix_trailing_whitespace || suffix_leading_whitespace ||
188 force_whitespace) {
189 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
190 } else {
191 return prefix_trimmed + suffix_trimmed;
195 // This is a helper function for the FindChildText() function (see below).
196 // Search depth is limited with the |depth| parameter.
197 // |divs_to_skip| is a list of <div> tags to ignore if encountered.
198 base::string16 FindChildTextInner(const WebNode& node,
199 int depth,
200 const std::set<WebNode>& divs_to_skip) {
201 if (depth <= 0 || node.isNull())
202 return base::string16();
204 // Skip over comments.
205 if (node.nodeType() == WebNode::CommentNode)
206 return FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
208 if (node.nodeType() != WebNode::ElementNode &&
209 node.nodeType() != WebNode::TextNode)
210 return base::string16();
212 // Ignore elements known not to contain inferable labels.
213 if (node.isElementNode()) {
214 const WebElement element = node.toConst<WebElement>();
215 if (IsOptionElement(element) ||
216 IsScriptElement(element) ||
217 IsNoScriptElement(element) ||
218 (element.isFormControlElement() &&
219 IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
220 return base::string16();
223 if (element.hasHTMLTagName("div") && ContainsKey(divs_to_skip, node))
224 return base::string16();
227 // Extract the text exactly at this node.
228 base::string16 node_text = node.nodeValue();
230 // Recursively compute the children's text.
231 // Preserve inter-element whitespace separation.
232 base::string16 child_text =
233 FindChildTextInner(node.firstChild(), depth - 1, divs_to_skip);
234 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
235 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
237 // Recursively compute the siblings' text.
238 // Again, preserve inter-element whitespace separation.
239 base::string16 sibling_text =
240 FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
241 add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
242 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
244 return node_text;
247 // Same as FindChildText() below, but with a list of div nodes to skip.
248 // TODO(thestig): See if other FindChildText() callers can benefit from this.
249 base::string16 FindChildTextWithIgnoreList(
250 const WebNode& node,
251 const std::set<WebNode>& divs_to_skip) {
252 if (node.isTextNode())
253 return node.nodeValue();
255 WebNode child = node.firstChild();
257 const int kChildSearchDepth = 10;
258 base::string16 node_text =
259 FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
260 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
261 return node_text;
264 // Returns the aggregated values of the descendants of |element| that are
265 // non-empty text nodes. This is a faster alternative to |innerText()| for
266 // performance critical operations. It does a full depth-first search so can be
267 // used when the structure is not directly known. However, unlike with
268 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
269 // Whitespace is trimmed from text accumulated at descendant nodes.
270 base::string16 FindChildText(const WebNode& node) {
271 return FindChildTextWithIgnoreList(node, std::set<WebNode>());
274 // Shared function for InferLabelFromPrevious() and InferLabelFromNext().
275 base::string16 InferLabelFromSibling(const WebFormControlElement& element,
276 bool forward) {
277 base::string16 inferred_label;
278 WebNode sibling = element;
279 while (true) {
280 sibling = forward ? sibling.nextSibling() : sibling.previousSibling();
281 if (sibling.isNull())
282 break;
284 // Skip over comments.
285 WebNode::NodeType node_type = sibling.nodeType();
286 if (node_type == WebNode::CommentNode)
287 continue;
289 // Otherwise, only consider normal HTML elements and their contents.
290 if (node_type != WebNode::TextNode &&
291 node_type != WebNode::ElementNode)
292 break;
294 // A label might be split across multiple "lightweight" nodes.
295 // Coalesce any text contained in multiple consecutive
296 // (a) plain text nodes or
297 // (b) inline HTML elements that are essentially equivalent to text nodes.
298 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
299 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
300 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
301 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
302 if (sibling.isTextNode() ||
303 HasTagName(sibling, kBold) || HasTagName(sibling, kStrong) ||
304 HasTagName(sibling, kSpan) || HasTagName(sibling, kFont)) {
305 base::string16 value = FindChildText(sibling);
306 // A text node's value will be empty if it is for a line break.
307 bool add_space = sibling.isTextNode() && value.empty();
308 inferred_label =
309 CombineAndCollapseWhitespace(value, inferred_label, add_space);
310 continue;
313 // If we have identified a partial label and have reached a non-lightweight
314 // element, consider the label to be complete.
315 base::string16 trimmed_label;
316 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
317 if (!trimmed_label.empty())
318 break;
320 // <img> and <br> tags often appear between the input element and its
321 // label text, so skip over them.
322 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
323 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
324 if (HasTagName(sibling, kImage) || HasTagName(sibling, kBreak))
325 continue;
327 // We only expect <p> and <label> tags to contain the full label text.
328 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
329 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
330 if (HasTagName(sibling, kPage) || HasTagName(sibling, kLabel))
331 inferred_label = FindChildText(sibling);
333 break;
336 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
337 return inferred_label;
340 // Helper for |InferLabelForElement()| that infers a label, if possible, from
341 // a previous sibling of |element|,
342 // e.g. Some Text <input ...>
343 // or Some <span>Text</span> <input ...>
344 // or <p>Some Text</p><input ...>
345 // or <label>Some Text</label> <input ...>
346 // or Some Text <img><input ...>
347 // or <b>Some Text</b><br/> <input ...>.
348 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
349 return InferLabelFromSibling(element, false /* forward? */);
352 // Same as InferLabelFromPrevious(), but in the other direction.
353 // Useful for cases like: <span><input type="checkbox">Label For Checkbox</span>
354 base::string16 InferLabelFromNext(const WebFormControlElement& element) {
355 return InferLabelFromSibling(element, true /* forward? */);
358 // Helper for |InferLabelForElement()| that infers a label, if possible, from
359 // the placeholder text. e.g. <input placeholder="foo">
360 base::string16 InferLabelFromPlaceholder(const WebFormControlElement& element) {
361 CR_DEFINE_STATIC_LOCAL(WebString, kPlaceholder, ("placeholder"));
362 if (element.hasAttribute(kPlaceholder))
363 return element.getAttribute(kPlaceholder);
365 return base::string16();
368 // Helper for |InferLabelForElement()| that infers a label, if possible, from
369 // enclosing list item,
370 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
371 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
372 WebNode parent = element.parentNode();
373 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
374 while (!parent.isNull() && parent.isElementNode() &&
375 !parent.to<WebElement>().hasHTMLTagName(kListItem)) {
376 parent = parent.parentNode();
379 if (!parent.isNull() && HasTagName(parent, kListItem))
380 return FindChildText(parent);
382 return base::string16();
385 // Helper for |InferLabelForElement()| that infers a label, if possible, from
386 // surrounding table structure,
387 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
388 // or <tr><th>Some Text</th><td><input ...></td></tr>
389 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
390 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
391 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
392 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
393 WebNode parent = element.parentNode();
394 while (!parent.isNull() && parent.isElementNode() &&
395 !parent.to<WebElement>().hasHTMLTagName(kTableCell)) {
396 parent = parent.parentNode();
399 if (parent.isNull())
400 return base::string16();
402 // Check all previous siblings, skipping non-element nodes, until we find a
403 // non-empty text block.
404 base::string16 inferred_label;
405 WebNode previous = parent.previousSibling();
406 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
407 while (inferred_label.empty() && !previous.isNull()) {
408 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
409 inferred_label = FindChildText(previous);
411 previous = previous.previousSibling();
414 return inferred_label;
417 // Helper for |InferLabelForElement()| that infers a label, if possible, from
418 // surrounding table structure,
420 // If there are multiple cells and the row with the input matches up with the
421 // previous row, then look for a specific cell within the previous row.
422 // e.g. <tr><td>Input 1 label</td><td>Input 2 label</td></tr>
423 // <tr><td><input name="input 1"></td><td><input name="input2"></td></tr>
425 // Otherwise, just look in the entire previous row.
426 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
427 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
428 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
429 base::string16 inferred_label;
431 // First find the <td> that contains |element|.
432 WebNode cell = element.parentNode();
433 while (!cell.isNull()) {
434 if (cell.isElementNode() &&
435 cell.to<WebElement>().hasHTMLTagName(kTableCell)) {
436 break;
438 cell = cell.parentNode();
441 // Not in a cell - bail out.
442 if (cell.isNull())
443 return inferred_label;
445 // Count the cell holding |element|.
446 size_t cell_count = CalculateTableCellColumnSpan(cell.to<WebElement>());
447 size_t cell_position = 0;
448 size_t cell_position_end = cell_count - 1;
450 // Count cells to the left to figure out |element|'s cell's position.
451 for (WebNode cell_it = cell.previousSibling();
452 !cell_it.isNull();
453 cell_it = cell_it.previousSibling()) {
454 if (cell_it.isElementNode() &&
455 cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) {
456 cell_position += CalculateTableCellColumnSpan(cell_it.to<WebElement>());
460 // Count cells to the right.
461 for (WebNode cell_it = cell.nextSibling();
462 !cell_it.isNull();
463 cell_it = cell_it.nextSibling()) {
464 if (cell_it.isElementNode() &&
465 cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) {
466 cell_count += CalculateTableCellColumnSpan(cell_it.to<WebElement>());
470 // Combine left + right.
471 cell_count += cell_position;
472 cell_position_end += cell_position;
474 // Find the current row.
475 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
476 WebNode parent = element.parentNode();
477 while (!parent.isNull() && parent.isElementNode() &&
478 !parent.to<WebElement>().hasHTMLTagName(kTableRow)) {
479 parent = parent.parentNode();
482 if (parent.isNull())
483 return inferred_label;
485 // Now find the previous row.
486 WebNode row_it = parent.previousSibling();
487 while (!row_it.isNull()) {
488 if (row_it.isElementNode() &&
489 row_it.to<WebElement>().hasHTMLTagName(kTableRow)) {
490 break;
492 row_it = row_it.previousSibling();
495 // If there exists a previous row, check its cells and size. If they align
496 // with the current row, infer the label from the cell above.
497 if (!row_it.isNull()) {
498 WebNode matching_cell;
499 size_t prev_row_count = 0;
500 WebNode prev_row_it = row_it.firstChild();
501 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
502 while (!prev_row_it.isNull()) {
503 if (prev_row_it.isElementNode()) {
504 WebElement prev_row_element = prev_row_it.to<WebElement>();
505 if (prev_row_element.hasHTMLTagName(kTableCell) ||
506 prev_row_element.hasHTMLTagName(kTableHeader)) {
507 size_t span = CalculateTableCellColumnSpan(prev_row_element);
508 size_t prev_row_count_end = prev_row_count + span - 1;
509 if (prev_row_count == cell_position &&
510 prev_row_count_end == cell_position_end) {
511 matching_cell = prev_row_it;
513 prev_row_count += span;
516 prev_row_it = prev_row_it.nextSibling();
518 if ((cell_count == prev_row_count) && !matching_cell.isNull()) {
519 inferred_label = FindChildText(matching_cell);
520 if (!inferred_label.empty())
521 return inferred_label;
525 // If there is no previous row, or if the previous row and current row do not
526 // align, check all previous siblings, skipping non-element nodes, until we
527 // find a non-empty text block.
528 WebNode previous = parent.previousSibling();
529 while (inferred_label.empty() && !previous.isNull()) {
530 if (HasTagName(previous, kTableRow))
531 inferred_label = FindChildText(previous);
533 previous = previous.previousSibling();
536 return inferred_label;
539 // Helper for |InferLabelForElement()| that infers a label, if possible, from
540 // a surrounding div table,
541 // e.g. <div>Some Text<span><input ...></span></div>
542 // e.g. <div>Some Text</div><div><input ...></div>
544 // Because this is already traversing the <div> structure, if it finds a <label>
545 // sibling along the way, infer from that <label>.
546 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
547 WebNode node = element.parentNode();
548 bool looking_for_parent = true;
549 std::set<WebNode> divs_to_skip;
551 // Search the sibling and parent <div>s until we find a candidate label.
552 base::string16 inferred_label;
553 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
554 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
555 while (inferred_label.empty() && !node.isNull()) {
556 if (HasTagName(node, kDiv)) {
557 if (looking_for_parent)
558 inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip);
559 else
560 inferred_label = FindChildText(node);
562 // Avoid sibling DIVs that contain autofillable fields.
563 if (!looking_for_parent && !inferred_label.empty()) {
564 CR_DEFINE_STATIC_LOCAL(WebString, kSelector,
565 ("input, select, textarea"));
566 blink::WebExceptionCode ec = 0;
567 WebElement result_element = node.querySelector(kSelector, ec);
568 if (!result_element.isNull()) {
569 inferred_label.clear();
570 divs_to_skip.insert(node);
574 looking_for_parent = false;
575 } else if (!looking_for_parent && HasTagName(node, kLabel)) {
576 WebLabelElement label_element = node.to<WebLabelElement>();
577 if (label_element.correspondingControl().isNull())
578 inferred_label = FindChildText(node);
579 } else if (looking_for_parent && IsTraversableContainerElement(node)) {
580 // If the element is in a non-div container, its label most likely is too.
581 break;
584 if (node.previousSibling().isNull()) {
585 // If there are no more siblings, continue walking up the tree.
586 looking_for_parent = true;
589 node = looking_for_parent ? node.parentNode() : node.previousSibling();
592 return inferred_label;
595 // Helper for |InferLabelForElement()| that infers a label, if possible, from
596 // a surrounding definition list,
597 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
598 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
599 base::string16 InferLabelFromDefinitionList(
600 const WebFormControlElement& element) {
601 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
602 WebNode parent = element.parentNode();
603 while (!parent.isNull() && parent.isElementNode() &&
604 !parent.to<WebElement>().hasHTMLTagName(kDefinitionData))
605 parent = parent.parentNode();
607 if (parent.isNull() || !HasTagName(parent, kDefinitionData))
608 return base::string16();
610 // Skip by any intervening text nodes.
611 WebNode previous = parent.previousSibling();
612 while (!previous.isNull() && previous.isTextNode())
613 previous = previous.previousSibling();
615 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
616 if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
617 return base::string16();
619 return FindChildText(previous);
622 // Returns the element type for all ancestor nodes in CAPS, starting with the
623 // parent node.
624 std::vector<std::string> AncestorTagNames(
625 const WebFormControlElement& element) {
626 std::vector<std::string> tag_names;
627 for (WebNode parent_node = element.parentNode();
628 !parent_node.isNull();
629 parent_node = parent_node.parentNode()) {
630 if (!parent_node.isElementNode())
631 continue;
633 tag_names.push_back(parent_node.to<WebElement>().tagName().utf8());
635 return tag_names;
638 // Infers corresponding label for |element| from surrounding context in the DOM,
639 // e.g. the contents of the preceding <p> tag or text element.
640 base::string16 InferLabelForElement(const WebFormControlElement& element) {
641 base::string16 inferred_label;
642 if (IsCheckableElement(toWebInputElement(&element))) {
643 inferred_label = InferLabelFromNext(element);
644 if (!inferred_label.empty())
645 return inferred_label;
648 inferred_label = InferLabelFromPrevious(element);
649 if (!inferred_label.empty())
650 return inferred_label;
652 // If we didn't find a label, check for placeholder text.
653 inferred_label = InferLabelFromPlaceholder(element);
654 if (!inferred_label.empty())
655 return inferred_label;
657 // For all other searches that involve traversing up the tree, the search
658 // order is based on which tag is the closest ancestor to |element|.
659 std::vector<std::string> tag_names = AncestorTagNames(element);
660 std::set<std::string> seen_tag_names;
661 for (const std::string& tag_name : tag_names) {
662 if (ContainsKey(seen_tag_names, tag_name))
663 continue;
665 seen_tag_names.insert(tag_name);
666 if (tag_name == "DIV") {
667 inferred_label = InferLabelFromDivTable(element);
668 } else if (tag_name == "TD") {
669 inferred_label = InferLabelFromTableColumn(element);
670 if (inferred_label.empty())
671 inferred_label = InferLabelFromTableRow(element);
672 } else if (tag_name == "DD") {
673 inferred_label = InferLabelFromDefinitionList(element);
674 } else if (tag_name == "LI") {
675 inferred_label = InferLabelFromListItem(element);
676 } else if (tag_name == "FIELDSET") {
677 break;
680 if (!inferred_label.empty())
681 break;
684 return inferred_label;
687 // Fills |option_strings| with the values of the <option> elements present in
688 // |select_element|.
689 void GetOptionStringsFromElement(const WebSelectElement& select_element,
690 std::vector<base::string16>* option_values,
691 std::vector<base::string16>* option_contents) {
692 DCHECK(!select_element.isNull());
694 option_values->clear();
695 option_contents->clear();
696 WebVector<WebElement> list_items = select_element.listItems();
698 // Constrain the maximum list length to prevent a malicious site from DOS'ing
699 // the browser, without entirely breaking autocomplete for some extreme
700 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
701 if (list_items.size() > kMaxListSize)
702 return;
704 option_values->reserve(list_items.size());
705 option_contents->reserve(list_items.size());
706 for (size_t i = 0; i < list_items.size(); ++i) {
707 if (IsOptionElement(list_items[i])) {
708 const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
709 option_values->push_back(option.value());
710 option_contents->push_back(option.text());
715 // The callback type used by |ForEachMatchingFormField()|.
716 typedef void (*Callback)(const FormFieldData&,
717 bool, /* is_initiating_element */
718 blink::WebFormControlElement*);
720 void ForEachMatchingFormFieldCommon(
721 std::vector<WebFormControlElement>* control_elements,
722 const WebElement& initiating_element,
723 const FormData& data,
724 FieldFilterMask filters,
725 bool force_override,
726 const Callback& callback) {
727 DCHECK(control_elements);
728 if (control_elements->size() != data.fields.size()) {
729 // This case should be reachable only for pathological websites and tests,
730 // which add or remove form fields while the user is interacting with the
731 // Autofill popup.
732 return;
735 // It's possible that the site has injected fields into the form after the
736 // page has loaded, so we can't assert that the size of the cached control
737 // elements is equal to the size of the fields in |form|. Fortunately, the
738 // one case in the wild where this happens, paypal.com signup form, the fields
739 // are appended to the end of the form and are not visible.
740 for (size_t i = 0; i < control_elements->size(); ++i) {
741 WebFormControlElement* element = &(*control_elements)[i];
743 if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
744 // This case should be reachable only for pathological websites, which
745 // rename form fields while the user is interacting with the Autofill
746 // popup. I (isherman) am not aware of any such websites, and so am
747 // optimistically including a NOTREACHED(). If you ever trip this check,
748 // please file a bug against me.
749 NOTREACHED();
750 continue;
753 bool is_initiating_element = (*element == initiating_element);
755 // Only autofill empty fields and the field that initiated the filling,
756 // i.e. the field the user is currently editing and interacting with.
757 const WebInputElement* input_element = toWebInputElement(element);
758 if (!force_override && !is_initiating_element &&
759 ((IsAutofillableInputElement(input_element) ||
760 IsTextAreaElement(*element)) &&
761 !element->value().isEmpty()))
762 continue;
764 if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
765 ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
766 ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
767 continue;
769 callback(data.fields[i], is_initiating_element, element);
773 // For each autofillable field in |data| that matches a field in the |form|,
774 // the |callback| is invoked with the corresponding |form| field data.
775 void ForEachMatchingFormField(const WebFormElement& form_element,
776 const WebElement& initiating_element,
777 const FormData& data,
778 FieldFilterMask filters,
779 bool force_override,
780 const Callback& callback) {
781 std::vector<WebFormControlElement> control_elements =
782 ExtractAutofillableElementsInForm(form_element);
783 ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data,
784 filters, force_override, callback);
787 // For each autofillable field in |data| that matches a field in the set of
788 // unowned autofillable form fields, the |callback| is invoked with the
789 // corresponding |data| field.
790 void ForEachMatchingUnownedFormField(const WebElement& initiating_element,
791 const FormData& data,
792 FieldFilterMask filters,
793 bool force_override,
794 const Callback& callback) {
795 if (initiating_element.isNull())
796 return;
798 std::vector<WebFormControlElement> control_elements =
799 GetUnownedAutofillableFormFieldElements(
800 initiating_element.document().all(), nullptr);
801 if (!IsElementInControlElementSet(initiating_element, control_elements))
802 return;
804 ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data,
805 filters, force_override, callback);
808 // Sets the |field|'s value to the value in |data|.
809 // Also sets the "autofilled" attribute, causing the background to be yellow.
810 void FillFormField(const FormFieldData& data,
811 bool is_initiating_node,
812 blink::WebFormControlElement* field) {
813 // Nothing to fill.
814 if (data.value.empty())
815 return;
817 if (!data.is_autofilled)
818 return;
820 WebInputElement* input_element = toWebInputElement(field);
821 if (IsCheckableElement(input_element)) {
822 input_element->setChecked(data.is_checked, true);
823 } else {
824 base::string16 value = data.value;
825 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
826 // If the maxlength attribute contains a negative value, maxLength()
827 // returns the default maxlength value.
828 TruncateString(&value, input_element->maxLength());
830 field->setValue(value, true);
833 field->setAutofilled(true);
835 if (is_initiating_node &&
836 ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
837 IsTextAreaElement(*field))) {
838 int length = field->value().length();
839 field->setSelectionRange(length, length);
840 // Clear the current IME composition (the underline), if there is one.
841 field->document().frame()->unmarkText();
845 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
846 // Also sets the "autofilled" attribute, causing the background to be yellow.
847 void PreviewFormField(const FormFieldData& data,
848 bool is_initiating_node,
849 blink::WebFormControlElement* field) {
850 // Nothing to preview.
851 if (data.value.empty())
852 return;
854 if (!data.is_autofilled)
855 return;
857 // Preview input, textarea and select fields. For input fields, excludes
858 // checkboxes and radio buttons, as there is no provision for
859 // setSuggestedCheckedValue in WebInputElement.
860 WebInputElement* input_element = toWebInputElement(field);
861 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
862 // If the maxlength attribute contains a negative value, maxLength()
863 // returns the default maxlength value.
864 input_element->setSuggestedValue(
865 data.value.substr(0, input_element->maxLength()));
866 input_element->setAutofilled(true);
867 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
868 field->setSuggestedValue(data.value);
869 field->setAutofilled(true);
872 if (is_initiating_node &&
873 (IsTextInput(input_element) || IsTextAreaElement(*field))) {
874 // Select the part of the text that the user didn't type.
875 PreviewSuggestion(field->suggestedValue(), field->value(), field);
879 // Recursively checks whether |node| or any of its children have a non-empty
880 // bounding box. The recursion depth is bounded by |depth|.
881 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
882 if (depth < 0)
883 return false;
884 if (node.hasNonEmptyBoundingBox())
885 return true;
887 // The childNodes method is not a const method. Therefore it cannot be called
888 // on a const reference. Therefore we need a const cast.
889 const blink::WebNodeList& children =
890 const_cast<blink::WebNode&>(node).childNodes();
891 size_t length = children.length();
892 for (size_t i = 0; i < length; ++i) {
893 const blink::WebNode& item = children.item(i);
894 if (IsWebNodeVisibleImpl(item, depth - 1))
895 return true;
897 return false;
900 // Extracts the fields from |control_elements| with |extract_mask| to
901 // |form_fields|. The extracted fields are also placed in |element_map|.
902 // |form_fields| and |element_map| should start out empty.
903 // |fields_extracted| should have as many elements as |control_elements|,
904 // initialized to false.
905 // Returns true if the number of fields extracted is within
906 // [1, kMaxParseableFields].
907 bool ExtractFieldsFromControlElements(
908 const WebVector<WebFormControlElement>& control_elements,
909 ExtractMask extract_mask,
910 ScopedVector<FormFieldData>* form_fields,
911 std::vector<bool>* fields_extracted,
912 std::map<WebFormControlElement, FormFieldData*>* element_map) {
913 DCHECK(form_fields->empty());
914 DCHECK(element_map->empty());
915 DCHECK_EQ(control_elements.size(), fields_extracted->size());
917 for (size_t i = 0; i < control_elements.size(); ++i) {
918 const WebFormControlElement& control_element = control_elements[i];
920 if (!IsAutofillableElement(control_element))
921 continue;
923 // Create a new FormFieldData, fill it out and map it to the field's name.
924 FormFieldData* form_field = new FormFieldData;
925 WebFormControlElementToFormField(control_element, extract_mask, form_field);
926 form_fields->push_back(form_field);
927 (*element_map)[control_element] = form_field;
928 (*fields_extracted)[i] = true;
930 // To avoid overly expensive computation, we impose a maximum number of
931 // allowable fields.
932 if (form_fields->size() > kMaxParseableFields)
933 return false;
936 // Succeeded if fields were extracted.
937 return !form_fields->empty();
940 // For each label element, get the corresponding form control element, use the
941 // form control element's name as a key into the
942 // <WebFormControlElement, FormFieldData> map to find the previously created
943 // FormFieldData and set the FormFieldData's label to the
944 // label.firstChild().nodeValue() of the label element.
945 void MatchLabelsAndFields(
946 const WebElementCollection& labels,
947 std::map<WebFormControlElement, FormFieldData*>* element_map) {
948 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
949 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
951 for (WebElement item = labels.firstItem(); !item.isNull();
952 item = labels.nextItem()) {
953 WebLabelElement label = item.to<WebLabelElement>();
954 WebFormControlElement field_element =
955 label.correspondingControl().to<WebFormControlElement>();
956 FormFieldData* field_data = nullptr;
958 if (field_element.isNull()) {
959 // Sometimes site authors will incorrectly specify the corresponding
960 // field element's name rather than its id, so we compensate here.
961 base::string16 element_name = label.getAttribute(kFor);
962 if (element_name.empty())
963 continue;
964 // Look through the list for elements with this name. There can actually
965 // be more than one. In this case, the label may not be particularly
966 // useful, so just discard it.
967 for (const auto& iter : *element_map) {
968 if (iter.second->name == element_name) {
969 if (field_data) {
970 field_data = nullptr;
971 break;
972 } else {
973 field_data = iter.second;
977 } else if (!field_element.isFormControlElement() ||
978 field_element.formControlType() == kHidden) {
979 continue;
980 } else {
981 // Typical case: look up |field_data| in |element_map|.
982 auto iter = element_map->find(field_element);
983 if (iter == element_map->end())
984 continue;
985 field_data = iter->second;
988 if (!field_data)
989 continue;
991 base::string16 label_text = FindChildText(label);
993 // Concatenate labels because some sites might have multiple label
994 // candidates.
995 if (!field_data->label.empty() && !label_text.empty())
996 field_data->label += base::ASCIIToUTF16(" ");
997 field_data->label += label_text;
1001 // Common function shared by WebFormElementToFormData() and
1002 // UnownedFormElementsAndFieldSetsToFormData(). Either pass in:
1003 // 1) |form_element| and an empty |fieldsets|.
1004 // or
1005 // 2) a NULL |form_element|.
1007 // If |field| is not NULL, then |form_control_element| should be not NULL.
1008 bool FormOrFieldsetsToFormData(
1009 const blink::WebFormElement* form_element,
1010 const blink::WebFormControlElement* form_control_element,
1011 const std::vector<blink::WebElement>& fieldsets,
1012 const WebVector<WebFormControlElement>& control_elements,
1013 ExtractMask extract_mask,
1014 FormData* form,
1015 FormFieldData* field) {
1016 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
1018 if (form_element)
1019 DCHECK(fieldsets.empty());
1020 if (field)
1021 DCHECK(form_control_element);
1023 // A map from a FormFieldData's name to the FormFieldData itself.
1024 std::map<WebFormControlElement, FormFieldData*> element_map;
1026 // The extracted FormFields. We use pointers so we can store them in
1027 // |element_map|.
1028 ScopedVector<FormFieldData> form_fields;
1030 // A vector of bools that indicate whether each field in the form meets the
1031 // requirements and thus will be in the resulting |form|.
1032 std::vector<bool> fields_extracted(control_elements.size(), false);
1034 if (!ExtractFieldsFromControlElements(control_elements, extract_mask,
1035 &form_fields, &fields_extracted,
1036 &element_map)) {
1037 return false;
1040 if (form_element) {
1041 // Loop through the label elements inside the form element. For each label
1042 // element, get the corresponding form control element, use the form control
1043 // element's name as a key into the <name, FormFieldData> map to find the
1044 // previously created FormFieldData and set the FormFieldData's label to the
1045 // label.firstChild().nodeValue() of the label element.
1046 WebElementCollection labels =
1047 form_element->getElementsByHTMLTagName(kLabel);
1048 DCHECK(!labels.isNull());
1049 MatchLabelsAndFields(labels, &element_map);
1050 } else {
1051 // Same as the if block, but for all the labels in fieldsets.
1052 for (size_t i = 0; i < fieldsets.size(); ++i) {
1053 WebElementCollection labels =
1054 fieldsets[i].getElementsByHTMLTagName(kLabel);
1055 DCHECK(!labels.isNull());
1056 MatchLabelsAndFields(labels, &element_map);
1060 // Loop through the form control elements, extracting the label text from
1061 // the DOM. We use the |fields_extracted| vector to make sure we assign the
1062 // extracted label to the correct field, as it's possible |form_fields| will
1063 // not contain all of the elements in |control_elements|.
1064 for (size_t i = 0, field_idx = 0;
1065 i < control_elements.size() && field_idx < form_fields.size(); ++i) {
1066 // This field didn't meet the requirements, so don't try to find a label
1067 // for it.
1068 if (!fields_extracted[i])
1069 continue;
1071 const WebFormControlElement& control_element = control_elements[i];
1072 if (form_fields[field_idx]->label.empty())
1073 form_fields[field_idx]->label = InferLabelForElement(control_element);
1074 TruncateString(&form_fields[field_idx]->label, kMaxDataLength);
1076 if (field && *form_control_element == control_element)
1077 *field = *form_fields[field_idx];
1079 ++field_idx;
1082 // Copy the created FormFields into the resulting FormData object.
1083 for (const auto& iter : form_fields)
1084 form->fields.push_back(*iter);
1085 return true;
1088 bool UnownedFormElementsAndFieldSetsToFormData(
1089 const std::vector<blink::WebElement>& fieldsets,
1090 const std::vector<blink::WebFormControlElement>& control_elements,
1091 const blink::WebFormControlElement* element,
1092 const blink::WebDocument& document,
1093 ExtractMask extract_mask,
1094 FormData* form,
1095 FormFieldData* field) {
1096 form->origin = document.url();
1097 form->is_form_tag = false;
1099 return FormOrFieldsetsToFormData(nullptr, element, fieldsets,
1100 control_elements, extract_mask, form, field);
1103 } // namespace
1105 const size_t kMaxParseableFields = 200;
1107 bool IsMonthInput(const WebInputElement* element) {
1108 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
1109 return element && !element->isNull() && element->formControlType() == kMonth;
1112 // All text fields, including password fields, should be extracted.
1113 bool IsTextInput(const WebInputElement* element) {
1114 return element && !element->isNull() && element->isTextField();
1117 bool IsSelectElement(const WebFormControlElement& element) {
1118 // Static for improved performance.
1119 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
1120 return !element.isNull() && element.formControlType() == kSelectOne;
1123 bool IsTextAreaElement(const WebFormControlElement& element) {
1124 // Static for improved performance.
1125 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
1126 return !element.isNull() && element.formControlType() == kTextArea;
1129 bool IsCheckableElement(const WebInputElement* element) {
1130 if (!element || element->isNull())
1131 return false;
1133 return element->isCheckbox() || element->isRadioButton();
1136 bool IsAutofillableInputElement(const WebInputElement* element) {
1137 return IsTextInput(element) ||
1138 IsMonthInput(element) ||
1139 IsCheckableElement(element);
1142 const base::string16 GetFormIdentifier(const WebFormElement& form) {
1143 base::string16 identifier = form.name();
1144 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
1145 if (identifier.empty())
1146 identifier = form.getAttribute(kId);
1148 return identifier;
1151 bool IsWebNodeVisible(const blink::WebNode& node) {
1152 // In the bug http://crbug.com/237216 the form's bounding box is empty
1153 // however the form has non empty children. Thus we need to look at the
1154 // form's children.
1155 int kNodeSearchDepth = 2;
1156 return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
1159 std::vector<blink::WebFormControlElement> ExtractAutofillableElementsFromSet(
1160 const WebVector<WebFormControlElement>& control_elements) {
1161 std::vector<blink::WebFormControlElement> autofillable_elements;
1162 for (size_t i = 0; i < control_elements.size(); ++i) {
1163 WebFormControlElement element = control_elements[i];
1164 if (!IsAutofillableElement(element))
1165 continue;
1167 autofillable_elements.push_back(element);
1169 return autofillable_elements;
1172 std::vector<WebFormControlElement> ExtractAutofillableElementsInForm(
1173 const WebFormElement& form_element) {
1174 WebVector<WebFormControlElement> control_elements;
1175 form_element.getFormControlElements(control_elements);
1177 return ExtractAutofillableElementsFromSet(control_elements);
1180 void WebFormControlElementToFormField(const WebFormControlElement& element,
1181 ExtractMask extract_mask,
1182 FormFieldData* field) {
1183 DCHECK(field);
1184 DCHECK(!element.isNull());
1185 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
1186 CR_DEFINE_STATIC_LOCAL(WebString, kRole, ("role"));
1188 // The label is not officially part of a WebFormControlElement; however, the
1189 // labels for all form control elements are scraped from the DOM and set in
1190 // WebFormElementToFormData.
1191 field->name = element.nameForAutofill();
1192 field->form_control_type = element.formControlType().utf8();
1193 field->autocomplete_attribute = element.getAttribute(kAutocomplete).utf8();
1194 if (field->autocomplete_attribute.size() > kMaxDataLength) {
1195 // Discard overly long attribute values to avoid DOS-ing the browser
1196 // process. However, send over a default string to indicate that the
1197 // attribute was present.
1198 field->autocomplete_attribute = "x-max-data-length-exceeded";
1200 if (base::LowerCaseEqualsASCII(
1201 base::StringPiece16(element.getAttribute(kRole)), "presentation"))
1202 field->role = FormFieldData::ROLE_ATTRIBUTE_PRESENTATION;
1204 if (!IsAutofillableElement(element))
1205 return;
1207 const WebInputElement* input_element = toWebInputElement(&element);
1208 if (IsAutofillableInputElement(input_element) ||
1209 IsTextAreaElement(element) ||
1210 IsSelectElement(element)) {
1211 field->is_autofilled = element.isAutofilled();
1212 field->is_focusable = element.isFocusable();
1213 field->should_autocomplete = element.autoComplete();
1214 field->text_direction = element.directionForFormData() ==
1215 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
1218 if (IsAutofillableInputElement(input_element)) {
1219 if (IsTextInput(input_element))
1220 field->max_length = input_element->maxLength();
1222 field->is_checkable = IsCheckableElement(input_element);
1223 field->is_checked = input_element->isChecked();
1224 } else if (IsTextAreaElement(element)) {
1225 // Nothing more to do in this case.
1226 } else if (extract_mask & EXTRACT_OPTIONS) {
1227 // Set option strings on the field if available.
1228 DCHECK(IsSelectElement(element));
1229 const WebSelectElement select_element = element.toConst<WebSelectElement>();
1230 GetOptionStringsFromElement(select_element,
1231 &field->option_values,
1232 &field->option_contents);
1235 if (!(extract_mask & EXTRACT_VALUE))
1236 return;
1238 base::string16 value = element.value();
1240 if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) {
1241 const WebSelectElement select_element = element.toConst<WebSelectElement>();
1242 // Convert the |select_element| value to text if requested.
1243 WebVector<WebElement> list_items = select_element.listItems();
1244 for (size_t i = 0; i < list_items.size(); ++i) {
1245 if (IsOptionElement(list_items[i])) {
1246 const WebOptionElement option_element =
1247 list_items[i].toConst<WebOptionElement>();
1248 if (option_element.value() == value) {
1249 value = option_element.text();
1250 break;
1256 // Constrain the maximum data length to prevent a malicious site from DOS'ing
1257 // the browser: http://crbug.com/49332
1258 TruncateString(&value, kMaxDataLength);
1260 field->value = value;
1263 bool WebFormElementToFormData(
1264 const blink::WebFormElement& form_element,
1265 const blink::WebFormControlElement& form_control_element,
1266 ExtractMask extract_mask,
1267 FormData* form,
1268 FormFieldData* field) {
1269 const WebFrame* frame = form_element.document().frame();
1270 if (!frame)
1271 return false;
1273 form->name = GetFormIdentifier(form_element);
1274 form->origin = frame->document().url();
1275 form->action = frame->document().completeURL(form_element.action());
1277 // If the completed URL is not valid, just use the action we get from
1278 // WebKit.
1279 if (!form->action.is_valid())
1280 form->action = GURL(form_element.action());
1282 WebVector<WebFormControlElement> control_elements;
1283 form_element.getFormControlElements(control_elements);
1285 std::vector<blink::WebElement> dummy_fieldset;
1286 return FormOrFieldsetsToFormData(&form_element, &form_control_element,
1287 dummy_fieldset, control_elements,
1288 extract_mask, form, field);
1291 std::vector<WebFormControlElement>
1292 GetUnownedAutofillableFormFieldElements(
1293 const WebElementCollection& elements,
1294 std::vector<WebElement>* fieldsets) {
1295 std::vector<WebFormControlElement> unowned_fieldset_children;
1296 for (WebElement element = elements.firstItem();
1297 !element.isNull();
1298 element = elements.nextItem()) {
1299 if (element.isFormControlElement()) {
1300 WebFormControlElement control = element.to<WebFormControlElement>();
1301 if (control.form().isNull())
1302 unowned_fieldset_children.push_back(control);
1305 if (fieldsets && element.hasHTMLTagName("fieldset") &&
1306 !IsElementInsideFormOrFieldSet(element)) {
1307 fieldsets->push_back(element);
1310 return ExtractAutofillableElementsFromSet(unowned_fieldset_children);
1313 bool UnownedCheckoutFormElementsAndFieldSetsToFormData(
1314 const std::vector<blink::WebElement>& fieldsets,
1315 const std::vector<blink::WebFormControlElement>& control_elements,
1316 const blink::WebFormControlElement* element,
1317 const blink::WebDocument& document,
1318 ExtractMask extract_mask,
1319 FormData* form,
1320 FormFieldData* field) {
1321 // Only attempt formless Autofill on checkout flows. This avoids the many
1322 // false positives found on the non-checkout web. See
1323 // http://crbug.com/462375. For now this early abort only applies to
1324 // English-language pages, because the regex is not translated. Note that
1325 // an empty "lang" attribute counts as English. A potential problem is that
1326 // this only checks document.title(), but should actually check the main
1327 // frame's title. Thus it may make bad decisions for iframes.
1328 WebElement html_element = document.documentElement();
1329 std::string lang;
1330 if (!html_element.isNull())
1331 lang = html_element.getAttribute("lang").utf8();
1332 if ((lang.empty() ||
1333 base::StartsWith(lang, "en", base::CompareCase::INSENSITIVE_ASCII)) &&
1334 !MatchesPattern(document.title(),
1335 base::UTF8ToUTF16("payment|checkout|address|delivery|shipping"))) {
1336 return false;
1339 return UnownedFormElementsAndFieldSetsToFormData(
1340 fieldsets, control_elements, element, document, extract_mask, form,
1341 field);
1344 bool UnownedPasswordFormElementsAndFieldSetsToFormData(
1345 const std::vector<blink::WebElement>& fieldsets,
1346 const std::vector<blink::WebFormControlElement>& control_elements,
1347 const blink::WebFormControlElement* element,
1348 const blink::WebDocument& document,
1349 ExtractMask extract_mask,
1350 FormData* form,
1351 FormFieldData* field) {
1352 return UnownedFormElementsAndFieldSetsToFormData(
1353 fieldsets, control_elements, element, document, extract_mask, form,
1354 field);
1358 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
1359 FormData* form,
1360 FormFieldData* field) {
1361 if (!IsAutofillableElement(element))
1362 return false;
1364 ExtractMask extract_mask =
1365 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
1366 const WebFormElement form_element = element.form();
1367 if (form_element.isNull()) {
1368 // No associated form, try the synthetic form for unowned form elements.
1369 WebDocument document = element.document();
1370 std::vector<WebElement> fieldsets;
1371 std::vector<WebFormControlElement> control_elements =
1372 GetUnownedAutofillableFormFieldElements(document.all(), &fieldsets);
1373 return UnownedCheckoutFormElementsAndFieldSetsToFormData(
1374 fieldsets, control_elements, &element, document, extract_mask,
1375 form, field);
1378 return WebFormElementToFormData(form_element,
1379 element,
1380 extract_mask,
1381 form,
1382 field);
1385 void FillForm(const FormData& form, const WebFormControlElement& element) {
1386 WebFormElement form_element = element.form();
1387 if (form_element.isNull()) {
1388 ForEachMatchingUnownedFormField(element,
1389 form,
1390 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1391 false, /* dont force override */
1392 &FillFormField);
1393 return;
1396 ForEachMatchingFormField(form_element,
1397 element,
1398 form,
1399 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1400 false, /* dont force override */
1401 &FillFormField);
1404 void FillFormIncludingNonFocusableElements(const FormData& form_data,
1405 const WebFormElement& form_element) {
1406 if (form_element.isNull()) {
1407 NOTREACHED();
1408 return;
1411 FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
1412 FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
1413 ForEachMatchingFormField(form_element,
1414 WebInputElement(),
1415 form_data,
1416 filter_mask,
1417 true, /* force override */
1418 &FillFormField);
1421 void PreviewForm(const FormData& form, const WebFormControlElement& element) {
1422 WebFormElement form_element = element.form();
1423 if (form_element.isNull()) {
1424 ForEachMatchingUnownedFormField(element,
1425 form,
1426 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1427 false, /* dont force override */
1428 &PreviewFormField);
1429 return;
1432 ForEachMatchingFormField(form_element,
1433 element,
1434 form,
1435 FILTER_ALL_NON_EDITABLE_ELEMENTS,
1436 false, /* dont force override */
1437 &PreviewFormField);
1440 bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
1441 bool was_autofilled) {
1442 WebFormElement form_element = element.form();
1443 std::vector<WebFormControlElement> control_elements;
1444 if (form_element.isNull()) {
1445 control_elements = GetUnownedAutofillableFormFieldElements(
1446 element.document().all(), nullptr);
1447 if (!IsElementInControlElementSet(element, control_elements))
1448 return false;
1449 } else {
1450 control_elements = ExtractAutofillableElementsInForm(form_element);
1453 for (size_t i = 0; i < control_elements.size(); ++i) {
1454 // There might be unrelated elements in this form which have already been
1455 // auto-filled. For example, the user might have already filled the address
1456 // part of a form and now be dealing with the credit card section. We only
1457 // want to reset the auto-filled status for fields that were previewed.
1458 WebFormControlElement control_element = control_elements[i];
1460 // Only text input, textarea and select elements can be previewed.
1461 WebInputElement* input_element = toWebInputElement(&control_element);
1462 if (!IsTextInput(input_element) &&
1463 !IsMonthInput(input_element) &&
1464 !IsTextAreaElement(control_element) &&
1465 !IsSelectElement(control_element))
1466 continue;
1468 // If the element is not auto-filled, we did not preview it,
1469 // so there is nothing to reset.
1470 if (!control_element.isAutofilled())
1471 continue;
1473 if ((IsTextInput(input_element) ||
1474 IsMonthInput(input_element) ||
1475 IsTextAreaElement(control_element) ||
1476 IsSelectElement(control_element)) &&
1477 control_element.suggestedValue().isEmpty())
1478 continue;
1480 // Clear the suggested value. For the initiating node, also restore the
1481 // original value.
1482 if (IsTextInput(input_element) || IsMonthInput(input_element) ||
1483 IsTextAreaElement(control_element)) {
1484 control_element.setSuggestedValue(WebString());
1485 bool is_initiating_node = (element == control_element);
1486 if (is_initiating_node) {
1487 control_element.setAutofilled(was_autofilled);
1488 // Clearing the suggested value in the focused node (above) can cause
1489 // selection to be lost. We force selection range to restore the text
1490 // cursor.
1491 int length = control_element.value().length();
1492 control_element.setSelectionRange(length, length);
1493 } else {
1494 control_element.setAutofilled(false);
1496 } else if (IsSelectElement(control_element)) {
1497 control_element.setSuggestedValue(WebString());
1498 control_element.setAutofilled(false);
1502 return true;
1505 bool IsWebpageEmpty(const blink::WebFrame* frame) {
1506 blink::WebDocument document = frame->document();
1508 return IsWebElementEmpty(document.head()) &&
1509 IsWebElementEmpty(document.body());
1512 bool IsWebElementEmpty(const blink::WebElement& element) {
1513 // This array contains all tags which can be present in an empty page.
1514 const char* const kAllowedValue[] = {
1515 "script",
1516 "meta",
1517 "title",
1519 const size_t kAllowedValueLength = arraysize(kAllowedValue);
1521 if (element.isNull())
1522 return true;
1523 // The childNodes method is not a const method. Therefore it cannot be called
1524 // on a const reference. Therefore we need a const cast.
1525 const blink::WebNodeList& children =
1526 const_cast<blink::WebElement&>(element).childNodes();
1527 for (size_t i = 0; i < children.length(); ++i) {
1528 const blink::WebNode& item = children.item(i);
1530 if (item.isTextNode() &&
1531 !base::ContainsOnlyChars(item.nodeValue().utf8(),
1532 base::kWhitespaceASCII))
1533 return false;
1535 // We ignore all other items with names which begin with
1536 // the character # because they are not html tags.
1537 if (item.nodeName().utf8()[0] == '#')
1538 continue;
1540 bool tag_is_allowed = false;
1541 // Test if the item name is in the kAllowedValue array
1542 for (size_t allowed_value_index = 0;
1543 allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1544 if (HasTagName(item,
1545 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1546 tag_is_allowed = true;
1547 break;
1550 if (!tag_is_allowed)
1551 return false;
1553 return true;
1556 gfx::RectF GetScaledBoundingBox(float scale, WebElement* element) {
1557 gfx::Rect bounding_box(element->boundsInViewportSpace());
1558 return gfx::RectF(bounding_box.x() * scale,
1559 bounding_box.y() * scale,
1560 bounding_box.width() * scale,
1561 bounding_box.height() * scale);
1564 void PreviewSuggestion(const base::string16& suggestion,
1565 const base::string16& user_input,
1566 blink::WebFormControlElement* input_element) {
1567 size_t selection_start = user_input.length();
1568 if (IsFeatureSubstringMatchEnabled()) {
1569 size_t offset =
1570 autofill::GetTextSelectionStart(suggestion, user_input, false);
1571 // Zero selection start is for password manager, which can show usernames
1572 // that do not begin with the user input value.
1573 selection_start = (offset == base::string16::npos) ? 0 : offset;
1576 input_element->setSelectionRange(selection_start, suggestion.length());
1579 } // namespace autofill