1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/form_autofill_util.h"
10 #include "base/command_line.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_vector.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "components/autofill/core/common/autofill_data_validation.h"
17 #include "components/autofill/core/common/autofill_regexes.h"
18 #include "components/autofill/core/common/autofill_switches.h"
19 #include "components/autofill/core/common/autofill_util.h"
20 #include "components/autofill/core/common/form_data.h"
21 #include "components/autofill/core/common/form_field_data.h"
22 #include "third_party/WebKit/public/platform/WebString.h"
23 #include "third_party/WebKit/public/platform/WebVector.h"
24 #include "third_party/WebKit/public/web/WebDocument.h"
25 #include "third_party/WebKit/public/web/WebElement.h"
26 #include "third_party/WebKit/public/web/WebElementCollection.h"
27 #include "third_party/WebKit/public/web/WebFormControlElement.h"
28 #include "third_party/WebKit/public/web/WebFormElement.h"
29 #include "third_party/WebKit/public/web/WebInputElement.h"
30 #include "third_party/WebKit/public/web/WebLabelElement.h"
31 #include "third_party/WebKit/public/web/WebLocalFrame.h"
32 #include "third_party/WebKit/public/web/WebNode.h"
33 #include "third_party/WebKit/public/web/WebNodeList.h"
34 #include "third_party/WebKit/public/web/WebOptionElement.h"
35 #include "third_party/WebKit/public/web/WebSelectElement.h"
36 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
38 using blink::WebDocument
;
39 using blink::WebElement
;
40 using blink::WebElementCollection
;
41 using blink::WebFormControlElement
;
42 using blink::WebFormElement
;
43 using blink::WebFrame
;
44 using blink::WebInputElement
;
45 using blink::WebLabelElement
;
47 using blink::WebNodeList
;
48 using blink::WebOptionElement
;
49 using blink::WebSelectElement
;
50 using blink::WebTextAreaElement
;
51 using blink::WebString
;
52 using blink::WebVector
;
57 // A bit field mask for FillForm functions to not fill some fields.
58 enum FieldFilterMask
{
60 FILTER_DISABLED_ELEMENTS
= 1 << 0,
61 FILTER_READONLY_ELEMENTS
= 1 << 1,
62 FILTER_NON_FOCUSABLE_ELEMENTS
= 1 << 2,
63 FILTER_ALL_NON_EDITABLE_ELEMENTS
= FILTER_DISABLED_ELEMENTS
|
64 FILTER_READONLY_ELEMENTS
|
65 FILTER_NON_FOCUSABLE_ELEMENTS
,
68 void TruncateString(base::string16
* str
, size_t max_length
) {
69 if (str
->length() > max_length
)
70 str
->resize(max_length
);
73 bool IsOptionElement(const WebElement
& element
) {
74 CR_DEFINE_STATIC_LOCAL(WebString
, kOption
, ("option"));
75 return element
.hasHTMLTagName(kOption
);
78 bool IsScriptElement(const WebElement
& element
) {
79 CR_DEFINE_STATIC_LOCAL(WebString
, kScript
, ("script"));
80 return element
.hasHTMLTagName(kScript
);
83 bool IsNoScriptElement(const WebElement
& element
) {
84 CR_DEFINE_STATIC_LOCAL(WebString
, kNoScript
, ("noscript"));
85 return element
.hasHTMLTagName(kNoScript
);
88 bool HasTagName(const WebNode
& node
, const blink::WebString
& tag
) {
89 return node
.isElementNode() && node
.toConst
<WebElement
>().hasHTMLTagName(tag
);
92 bool IsAutofillableElement(const WebFormControlElement
& element
) {
93 const WebInputElement
* input_element
= toWebInputElement(&element
);
94 return IsAutofillableInputElement(input_element
) ||
95 IsSelectElement(element
) ||
96 IsTextAreaElement(element
);
99 bool IsElementInControlElementSet(
100 const WebElement
& element
,
101 const std::vector
<WebFormControlElement
>& control_elements
) {
102 if (!element
.isFormControlElement())
104 const WebFormControlElement form_control_element
=
105 element
.toConst
<WebFormControlElement
>();
106 return std::find(control_elements
.begin(),
107 control_elements
.end(),
108 form_control_element
) != control_elements
.end();
111 bool IsElementInsideFormOrFieldSet(const WebElement
& element
) {
112 for (WebNode parent_node
= element
.parentNode();
113 !parent_node
.isNull();
114 parent_node
= parent_node
.parentNode()) {
115 if (!parent_node
.isElementNode())
118 WebElement cur_element
= parent_node
.to
<WebElement
>();
119 if (cur_element
.hasHTMLTagName("form") ||
120 cur_element
.hasHTMLTagName("fieldset")) {
127 // Returns true if |node| is an element and it is a container type that
128 // InferLabelForElement() can traverse.
129 bool IsTraversableContainerElement(const WebNode
& node
) {
130 if (!node
.isElementNode())
133 std::string tag_name
= node
.toConst
<WebElement
>().tagName().utf8();
134 return (tag_name
== "DD" ||
136 tag_name
== "FIELDSET" ||
139 tag_name
== "TABLE");
142 // Returns the colspan for a <td> / <th>. Defaults to 1.
143 size_t CalculateTableCellColumnSpan(const WebElement
& element
) {
144 DCHECK(element
.hasHTMLTagName("td") || element
.hasHTMLTagName("th"));
147 if (element
.hasAttribute("colspan")) {
148 base::string16 colspan
= element
.getAttribute("colspan");
149 // Do not check return value to accept imperfect conversions.
150 base::StringToSizeT(colspan
, &span
);
152 if (span
== std::numeric_limits
<size_t>::max())
154 span
= std::max(span
, static_cast<size_t>(1));
160 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
161 // to a single space. If |force_whitespace| is true, then the resulting string
162 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
163 // result includes a space only if |prefix| has trailing whitespace or |suffix|
164 // has leading whitespace.
166 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
167 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
168 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
169 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
170 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
171 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
172 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
173 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
174 const base::string16
CombineAndCollapseWhitespace(
175 const base::string16
& prefix
,
176 const base::string16
& suffix
,
177 bool force_whitespace
) {
178 base::string16 prefix_trimmed
;
179 base::TrimPositions prefix_trailing_whitespace
=
180 base::TrimWhitespace(prefix
, base::TRIM_TRAILING
, &prefix_trimmed
);
182 // Recursively compute the children's text.
183 base::string16 suffix_trimmed
;
184 base::TrimPositions suffix_leading_whitespace
=
185 base::TrimWhitespace(suffix
, base::TRIM_LEADING
, &suffix_trimmed
);
187 if (prefix_trailing_whitespace
|| suffix_leading_whitespace
||
189 return prefix_trimmed
+ base::ASCIIToUTF16(" ") + suffix_trimmed
;
191 return prefix_trimmed
+ suffix_trimmed
;
195 // This is a helper function for the FindChildText() function (see below).
196 // Search depth is limited with the |depth| parameter.
197 // |divs_to_skip| is a list of <div> tags to ignore if encountered.
198 base::string16
FindChildTextInner(const WebNode
& node
,
200 const std::set
<WebNode
>& divs_to_skip
) {
201 if (depth
<= 0 || node
.isNull())
202 return base::string16();
204 // Skip over comments.
205 if (node
.isCommentNode())
206 return FindChildTextInner(node
.nextSibling(), depth
- 1, divs_to_skip
);
208 if (!node
.isElementNode() && !node
.isTextNode())
209 return base::string16();
211 // Ignore elements known not to contain inferable labels.
212 if (node
.isElementNode()) {
213 const WebElement element
= node
.toConst
<WebElement
>();
214 if (IsOptionElement(element
) ||
215 IsScriptElement(element
) ||
216 IsNoScriptElement(element
) ||
217 (element
.isFormControlElement() &&
218 IsAutofillableElement(element
.toConst
<WebFormControlElement
>()))) {
219 return base::string16();
222 if (element
.hasHTMLTagName("div") && ContainsKey(divs_to_skip
, node
))
223 return base::string16();
226 // Extract the text exactly at this node.
227 base::string16 node_text
= node
.nodeValue();
229 // Recursively compute the children's text.
230 // Preserve inter-element whitespace separation.
231 base::string16 child_text
=
232 FindChildTextInner(node
.firstChild(), depth
- 1, divs_to_skip
);
233 bool add_space
= node
.isTextNode() && node_text
.empty();
234 node_text
= CombineAndCollapseWhitespace(node_text
, child_text
, add_space
);
236 // Recursively compute the siblings' text.
237 // Again, preserve inter-element whitespace separation.
238 base::string16 sibling_text
=
239 FindChildTextInner(node
.nextSibling(), depth
- 1, divs_to_skip
);
240 add_space
= node
.isTextNode() && node_text
.empty();
241 node_text
= CombineAndCollapseWhitespace(node_text
, sibling_text
, add_space
);
246 // Same as FindChildText() below, but with a list of div nodes to skip.
247 // TODO(thestig): See if other FindChildText() callers can benefit from this.
248 base::string16
FindChildTextWithIgnoreList(
250 const std::set
<WebNode
>& divs_to_skip
) {
251 if (node
.isTextNode())
252 return node
.nodeValue();
254 WebNode child
= node
.firstChild();
256 const int kChildSearchDepth
= 10;
257 base::string16 node_text
=
258 FindChildTextInner(child
, kChildSearchDepth
, divs_to_skip
);
259 base::TrimWhitespace(node_text
, base::TRIM_ALL
, &node_text
);
263 // Returns the aggregated values of the descendants of |element| that are
264 // non-empty text nodes. This is a faster alternative to |innerText()| for
265 // performance critical operations. It does a full depth-first search so can be
266 // used when the structure is not directly known. However, unlike with
267 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
268 // Whitespace is trimmed from text accumulated at descendant nodes.
269 base::string16
FindChildText(const WebNode
& node
) {
270 return FindChildTextWithIgnoreList(node
, std::set
<WebNode
>());
273 // Shared function for InferLabelFromPrevious() and InferLabelFromNext().
274 base::string16
InferLabelFromSibling(const WebFormControlElement
& element
,
276 base::string16 inferred_label
;
277 WebNode sibling
= element
;
279 sibling
= forward
? sibling
.nextSibling() : sibling
.previousSibling();
280 if (sibling
.isNull())
283 // Skip over comments.
284 if (sibling
.isCommentNode())
287 // Otherwise, only consider normal HTML elements and their contents.
288 if (!sibling
.isElementNode() && !sibling
.isTextNode())
291 // A label might be split across multiple "lightweight" nodes.
292 // Coalesce any text contained in multiple consecutive
293 // (a) plain text nodes or
294 // (b) inline HTML elements that are essentially equivalent to text nodes.
295 CR_DEFINE_STATIC_LOCAL(WebString
, kBold
, ("b"));
296 CR_DEFINE_STATIC_LOCAL(WebString
, kStrong
, ("strong"));
297 CR_DEFINE_STATIC_LOCAL(WebString
, kSpan
, ("span"));
298 CR_DEFINE_STATIC_LOCAL(WebString
, kFont
, ("font"));
299 if (sibling
.isTextNode() ||
300 HasTagName(sibling
, kBold
) || HasTagName(sibling
, kStrong
) ||
301 HasTagName(sibling
, kSpan
) || HasTagName(sibling
, kFont
)) {
302 base::string16 value
= FindChildText(sibling
);
303 // A text node's value will be empty if it is for a line break.
304 bool add_space
= sibling
.isTextNode() && value
.empty();
306 CombineAndCollapseWhitespace(value
, inferred_label
, add_space
);
310 // If we have identified a partial label and have reached a non-lightweight
311 // element, consider the label to be complete.
312 base::string16 trimmed_label
;
313 base::TrimWhitespace(inferred_label
, base::TRIM_ALL
, &trimmed_label
);
314 if (!trimmed_label
.empty())
317 // <img> and <br> tags often appear between the input element and its
318 // label text, so skip over them.
319 CR_DEFINE_STATIC_LOCAL(WebString
, kImage
, ("img"));
320 CR_DEFINE_STATIC_LOCAL(WebString
, kBreak
, ("br"));
321 if (HasTagName(sibling
, kImage
) || HasTagName(sibling
, kBreak
))
324 // We only expect <p> and <label> tags to contain the full label text.
325 CR_DEFINE_STATIC_LOCAL(WebString
, kPage
, ("p"));
326 CR_DEFINE_STATIC_LOCAL(WebString
, kLabel
, ("label"));
327 if (HasTagName(sibling
, kPage
) || HasTagName(sibling
, kLabel
))
328 inferred_label
= FindChildText(sibling
);
333 base::TrimWhitespace(inferred_label
, base::TRIM_ALL
, &inferred_label
);
334 return inferred_label
;
337 // Helper for |InferLabelForElement()| that infers a label, if possible, from
338 // a previous sibling of |element|,
339 // e.g. Some Text <input ...>
340 // or Some <span>Text</span> <input ...>
341 // or <p>Some Text</p><input ...>
342 // or <label>Some Text</label> <input ...>
343 // or Some Text <img><input ...>
344 // or <b>Some Text</b><br/> <input ...>.
345 base::string16
InferLabelFromPrevious(const WebFormControlElement
& element
) {
346 return InferLabelFromSibling(element
, false /* forward? */);
349 // Same as InferLabelFromPrevious(), but in the other direction.
350 // Useful for cases like: <span><input type="checkbox">Label For Checkbox</span>
351 base::string16
InferLabelFromNext(const WebFormControlElement
& element
) {
352 return InferLabelFromSibling(element
, true /* forward? */);
355 // Helper for |InferLabelForElement()| that infers a label, if possible, from
356 // the placeholder text. e.g. <input placeholder="foo">
357 base::string16
InferLabelFromPlaceholder(const WebFormControlElement
& element
) {
358 CR_DEFINE_STATIC_LOCAL(WebString
, kPlaceholder
, ("placeholder"));
359 if (element
.hasAttribute(kPlaceholder
))
360 return element
.getAttribute(kPlaceholder
);
362 return base::string16();
365 // Helper for |InferLabelForElement()| that infers a label, if possible, from
366 // enclosing list item,
367 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
368 base::string16
InferLabelFromListItem(const WebFormControlElement
& element
) {
369 WebNode parent
= element
.parentNode();
370 CR_DEFINE_STATIC_LOCAL(WebString
, kListItem
, ("li"));
371 while (!parent
.isNull() && parent
.isElementNode() &&
372 !parent
.to
<WebElement
>().hasHTMLTagName(kListItem
)) {
373 parent
= parent
.parentNode();
376 if (!parent
.isNull() && HasTagName(parent
, kListItem
))
377 return FindChildText(parent
);
379 return base::string16();
382 // Helper for |InferLabelForElement()| that infers a label, if possible, from
383 // surrounding table structure,
384 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
385 // or <tr><th>Some Text</th><td><input ...></td></tr>
386 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
387 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
388 base::string16
InferLabelFromTableColumn(const WebFormControlElement
& element
) {
389 CR_DEFINE_STATIC_LOCAL(WebString
, kTableCell
, ("td"));
390 WebNode parent
= element
.parentNode();
391 while (!parent
.isNull() && parent
.isElementNode() &&
392 !parent
.to
<WebElement
>().hasHTMLTagName(kTableCell
)) {
393 parent
= parent
.parentNode();
397 return base::string16();
399 // Check all previous siblings, skipping non-element nodes, until we find a
400 // non-empty text block.
401 base::string16 inferred_label
;
402 WebNode previous
= parent
.previousSibling();
403 CR_DEFINE_STATIC_LOCAL(WebString
, kTableHeader
, ("th"));
404 while (inferred_label
.empty() && !previous
.isNull()) {
405 if (HasTagName(previous
, kTableCell
) || HasTagName(previous
, kTableHeader
))
406 inferred_label
= FindChildText(previous
);
408 previous
= previous
.previousSibling();
411 return inferred_label
;
414 // Helper for |InferLabelForElement()| that infers a label, if possible, from
415 // surrounding table structure,
417 // If there are multiple cells and the row with the input matches up with the
418 // previous row, then look for a specific cell within the previous row.
419 // e.g. <tr><td>Input 1 label</td><td>Input 2 label</td></tr>
420 // <tr><td><input name="input 1"></td><td><input name="input2"></td></tr>
422 // Otherwise, just look in the entire previous row.
423 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
424 base::string16
InferLabelFromTableRow(const WebFormControlElement
& element
) {
425 CR_DEFINE_STATIC_LOCAL(WebString
, kTableCell
, ("td"));
426 base::string16 inferred_label
;
428 // First find the <td> that contains |element|.
429 WebNode cell
= element
.parentNode();
430 while (!cell
.isNull()) {
431 if (cell
.isElementNode() &&
432 cell
.to
<WebElement
>().hasHTMLTagName(kTableCell
)) {
435 cell
= cell
.parentNode();
438 // Not in a cell - bail out.
440 return inferred_label
;
442 // Count the cell holding |element|.
443 size_t cell_count
= CalculateTableCellColumnSpan(cell
.to
<WebElement
>());
444 size_t cell_position
= 0;
445 size_t cell_position_end
= cell_count
- 1;
447 // Count cells to the left to figure out |element|'s cell's position.
448 for (WebNode cell_it
= cell
.previousSibling();
450 cell_it
= cell_it
.previousSibling()) {
451 if (cell_it
.isElementNode() &&
452 cell_it
.to
<WebElement
>().hasHTMLTagName(kTableCell
)) {
453 cell_position
+= CalculateTableCellColumnSpan(cell_it
.to
<WebElement
>());
457 // Count cells to the right.
458 for (WebNode cell_it
= cell
.nextSibling();
460 cell_it
= cell_it
.nextSibling()) {
461 if (cell_it
.isElementNode() &&
462 cell_it
.to
<WebElement
>().hasHTMLTagName(kTableCell
)) {
463 cell_count
+= CalculateTableCellColumnSpan(cell_it
.to
<WebElement
>());
467 // Combine left + right.
468 cell_count
+= cell_position
;
469 cell_position_end
+= cell_position
;
471 // Find the current row.
472 CR_DEFINE_STATIC_LOCAL(WebString
, kTableRow
, ("tr"));
473 WebNode parent
= element
.parentNode();
474 while (!parent
.isNull() && parent
.isElementNode() &&
475 !parent
.to
<WebElement
>().hasHTMLTagName(kTableRow
)) {
476 parent
= parent
.parentNode();
480 return inferred_label
;
482 // Now find the previous row.
483 WebNode row_it
= parent
.previousSibling();
484 while (!row_it
.isNull()) {
485 if (row_it
.isElementNode() &&
486 row_it
.to
<WebElement
>().hasHTMLTagName(kTableRow
)) {
489 row_it
= row_it
.previousSibling();
492 // If there exists a previous row, check its cells and size. If they align
493 // with the current row, infer the label from the cell above.
494 if (!row_it
.isNull()) {
495 WebNode matching_cell
;
496 size_t prev_row_count
= 0;
497 WebNode prev_row_it
= row_it
.firstChild();
498 CR_DEFINE_STATIC_LOCAL(WebString
, kTableHeader
, ("th"));
499 while (!prev_row_it
.isNull()) {
500 if (prev_row_it
.isElementNode()) {
501 WebElement prev_row_element
= prev_row_it
.to
<WebElement
>();
502 if (prev_row_element
.hasHTMLTagName(kTableCell
) ||
503 prev_row_element
.hasHTMLTagName(kTableHeader
)) {
504 size_t span
= CalculateTableCellColumnSpan(prev_row_element
);
505 size_t prev_row_count_end
= prev_row_count
+ span
- 1;
506 if (prev_row_count
== cell_position
&&
507 prev_row_count_end
== cell_position_end
) {
508 matching_cell
= prev_row_it
;
510 prev_row_count
+= span
;
513 prev_row_it
= prev_row_it
.nextSibling();
515 if ((cell_count
== prev_row_count
) && !matching_cell
.isNull()) {
516 inferred_label
= FindChildText(matching_cell
);
517 if (!inferred_label
.empty())
518 return inferred_label
;
522 // If there is no previous row, or if the previous row and current row do not
523 // align, check all previous siblings, skipping non-element nodes, until we
524 // find a non-empty text block.
525 WebNode previous
= parent
.previousSibling();
526 while (inferred_label
.empty() && !previous
.isNull()) {
527 if (HasTagName(previous
, kTableRow
))
528 inferred_label
= FindChildText(previous
);
530 previous
= previous
.previousSibling();
533 return inferred_label
;
536 // Helper for |InferLabelForElement()| that infers a label, if possible, from
537 // a surrounding div table,
538 // e.g. <div>Some Text<span><input ...></span></div>
539 // e.g. <div>Some Text</div><div><input ...></div>
541 // Because this is already traversing the <div> structure, if it finds a <label>
542 // sibling along the way, infer from that <label>.
543 base::string16
InferLabelFromDivTable(const WebFormControlElement
& element
) {
544 WebNode node
= element
.parentNode();
545 bool looking_for_parent
= true;
546 std::set
<WebNode
> divs_to_skip
;
548 // Search the sibling and parent <div>s until we find a candidate label.
549 base::string16 inferred_label
;
550 CR_DEFINE_STATIC_LOCAL(WebString
, kDiv
, ("div"));
551 CR_DEFINE_STATIC_LOCAL(WebString
, kLabel
, ("label"));
552 while (inferred_label
.empty() && !node
.isNull()) {
553 if (HasTagName(node
, kDiv
)) {
554 if (looking_for_parent
)
555 inferred_label
= FindChildTextWithIgnoreList(node
, divs_to_skip
);
557 inferred_label
= FindChildText(node
);
559 // Avoid sibling DIVs that contain autofillable fields.
560 if (!looking_for_parent
&& !inferred_label
.empty()) {
561 CR_DEFINE_STATIC_LOCAL(WebString
, kSelector
,
562 ("input, select, textarea"));
563 blink::WebExceptionCode ec
= 0;
564 WebElement result_element
= node
.querySelector(kSelector
, ec
);
565 if (!result_element
.isNull()) {
566 inferred_label
.clear();
567 divs_to_skip
.insert(node
);
571 looking_for_parent
= false;
572 } else if (!looking_for_parent
&& HasTagName(node
, kLabel
)) {
573 WebLabelElement label_element
= node
.to
<WebLabelElement
>();
574 if (label_element
.correspondingControl().isNull())
575 inferred_label
= FindChildText(node
);
576 } else if (looking_for_parent
&& IsTraversableContainerElement(node
)) {
577 // If the element is in a non-div container, its label most likely is too.
581 if (node
.previousSibling().isNull()) {
582 // If there are no more siblings, continue walking up the tree.
583 looking_for_parent
= true;
586 node
= looking_for_parent
? node
.parentNode() : node
.previousSibling();
589 return inferred_label
;
592 // Helper for |InferLabelForElement()| that infers a label, if possible, from
593 // a surrounding definition list,
594 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
595 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
596 base::string16
InferLabelFromDefinitionList(
597 const WebFormControlElement
& element
) {
598 CR_DEFINE_STATIC_LOCAL(WebString
, kDefinitionData
, ("dd"));
599 WebNode parent
= element
.parentNode();
600 while (!parent
.isNull() && parent
.isElementNode() &&
601 !parent
.to
<WebElement
>().hasHTMLTagName(kDefinitionData
))
602 parent
= parent
.parentNode();
604 if (parent
.isNull() || !HasTagName(parent
, kDefinitionData
))
605 return base::string16();
607 // Skip by any intervening text nodes.
608 WebNode previous
= parent
.previousSibling();
609 while (!previous
.isNull() && previous
.isTextNode())
610 previous
= previous
.previousSibling();
612 CR_DEFINE_STATIC_LOCAL(WebString
, kDefinitionTag
, ("dt"));
613 if (previous
.isNull() || !HasTagName(previous
, kDefinitionTag
))
614 return base::string16();
616 return FindChildText(previous
);
619 // Returns the element type for all ancestor nodes in CAPS, starting with the
621 std::vector
<std::string
> AncestorTagNames(
622 const WebFormControlElement
& element
) {
623 std::vector
<std::string
> tag_names
;
624 for (WebNode parent_node
= element
.parentNode();
625 !parent_node
.isNull();
626 parent_node
= parent_node
.parentNode()) {
627 if (!parent_node
.isElementNode())
630 tag_names
.push_back(parent_node
.to
<WebElement
>().tagName().utf8());
635 // Infers corresponding label for |element| from surrounding context in the DOM,
636 // e.g. the contents of the preceding <p> tag or text element.
637 base::string16
InferLabelForElement(const WebFormControlElement
& element
) {
638 base::string16 inferred_label
;
639 if (IsCheckableElement(toWebInputElement(&element
))) {
640 inferred_label
= InferLabelFromNext(element
);
641 if (!inferred_label
.empty())
642 return inferred_label
;
645 inferred_label
= InferLabelFromPrevious(element
);
646 if (!inferred_label
.empty())
647 return inferred_label
;
649 // If we didn't find a label, check for placeholder text.
650 inferred_label
= InferLabelFromPlaceholder(element
);
651 if (!inferred_label
.empty())
652 return inferred_label
;
654 // For all other searches that involve traversing up the tree, the search
655 // order is based on which tag is the closest ancestor to |element|.
656 std::vector
<std::string
> tag_names
= AncestorTagNames(element
);
657 std::set
<std::string
> seen_tag_names
;
658 for (const std::string
& tag_name
: tag_names
) {
659 if (ContainsKey(seen_tag_names
, tag_name
))
662 seen_tag_names
.insert(tag_name
);
663 if (tag_name
== "DIV") {
664 inferred_label
= InferLabelFromDivTable(element
);
665 } else if (tag_name
== "TD") {
666 inferred_label
= InferLabelFromTableColumn(element
);
667 if (inferred_label
.empty())
668 inferred_label
= InferLabelFromTableRow(element
);
669 } else if (tag_name
== "DD") {
670 inferred_label
= InferLabelFromDefinitionList(element
);
671 } else if (tag_name
== "LI") {
672 inferred_label
= InferLabelFromListItem(element
);
673 } else if (tag_name
== "FIELDSET") {
677 if (!inferred_label
.empty())
681 return inferred_label
;
684 // Fills |option_strings| with the values of the <option> elements present in
686 void GetOptionStringsFromElement(const WebSelectElement
& select_element
,
687 std::vector
<base::string16
>* option_values
,
688 std::vector
<base::string16
>* option_contents
) {
689 DCHECK(!select_element
.isNull());
691 option_values
->clear();
692 option_contents
->clear();
693 WebVector
<WebElement
> list_items
= select_element
.listItems();
695 // Constrain the maximum list length to prevent a malicious site from DOS'ing
696 // the browser, without entirely breaking autocomplete for some extreme
697 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
698 if (list_items
.size() > kMaxListSize
)
701 option_values
->reserve(list_items
.size());
702 option_contents
->reserve(list_items
.size());
703 for (size_t i
= 0; i
< list_items
.size(); ++i
) {
704 if (IsOptionElement(list_items
[i
])) {
705 const WebOptionElement option
= list_items
[i
].toConst
<WebOptionElement
>();
706 option_values
->push_back(option
.value());
707 option_contents
->push_back(option
.text());
712 // The callback type used by |ForEachMatchingFormField()|.
713 typedef void (*Callback
)(const FormFieldData
&,
714 bool, /* is_initiating_element */
715 blink::WebFormControlElement
*);
717 void ForEachMatchingFormFieldCommon(
718 std::vector
<WebFormControlElement
>* control_elements
,
719 const WebElement
& initiating_element
,
720 const FormData
& data
,
721 FieldFilterMask filters
,
723 const Callback
& callback
) {
724 DCHECK(control_elements
);
725 if (control_elements
->size() != data
.fields
.size()) {
726 // This case should be reachable only for pathological websites and tests,
727 // which add or remove form fields while the user is interacting with the
732 // It's possible that the site has injected fields into the form after the
733 // page has loaded, so we can't assert that the size of the cached control
734 // elements is equal to the size of the fields in |form|. Fortunately, the
735 // one case in the wild where this happens, paypal.com signup form, the fields
736 // are appended to the end of the form and are not visible.
737 for (size_t i
= 0; i
< control_elements
->size(); ++i
) {
738 WebFormControlElement
* element
= &(*control_elements
)[i
];
740 if (base::string16(element
->nameForAutofill()) != data
.fields
[i
].name
) {
741 // This case should be reachable only for pathological websites, which
742 // rename form fields while the user is interacting with the Autofill
743 // popup. I (isherman) am not aware of any such websites, and so am
744 // optimistically including a NOTREACHED(). If you ever trip this check,
745 // please file a bug against me.
750 bool is_initiating_element
= (*element
== initiating_element
);
752 // Only autofill empty fields and the field that initiated the filling,
753 // i.e. the field the user is currently editing and interacting with.
754 const WebInputElement
* input_element
= toWebInputElement(element
);
755 if (!force_override
&& !is_initiating_element
&&
756 ((IsAutofillableInputElement(input_element
) ||
757 IsTextAreaElement(*element
)) &&
758 !element
->value().isEmpty()))
761 if (((filters
& FILTER_DISABLED_ELEMENTS
) && !element
->isEnabled()) ||
762 ((filters
& FILTER_READONLY_ELEMENTS
) && element
->isReadOnly()) ||
763 ((filters
& FILTER_NON_FOCUSABLE_ELEMENTS
) && !element
->isFocusable()))
766 callback(data
.fields
[i
], is_initiating_element
, element
);
770 // For each autofillable field in |data| that matches a field in the |form|,
771 // the |callback| is invoked with the corresponding |form| field data.
772 void ForEachMatchingFormField(const WebFormElement
& form_element
,
773 const WebElement
& initiating_element
,
774 const FormData
& data
,
775 FieldFilterMask filters
,
777 const Callback
& callback
) {
778 std::vector
<WebFormControlElement
> control_elements
=
779 ExtractAutofillableElementsInForm(form_element
);
780 ForEachMatchingFormFieldCommon(&control_elements
, initiating_element
, data
,
781 filters
, force_override
, callback
);
784 // For each autofillable field in |data| that matches a field in the set of
785 // unowned autofillable form fields, the |callback| is invoked with the
786 // corresponding |data| field.
787 void ForEachMatchingUnownedFormField(const WebElement
& initiating_element
,
788 const FormData
& data
,
789 FieldFilterMask filters
,
791 const Callback
& callback
) {
792 if (initiating_element
.isNull())
795 std::vector
<WebFormControlElement
> control_elements
=
796 GetUnownedAutofillableFormFieldElements(
797 initiating_element
.document().all(), nullptr);
798 if (!IsElementInControlElementSet(initiating_element
, control_elements
))
801 ForEachMatchingFormFieldCommon(&control_elements
, initiating_element
, data
,
802 filters
, force_override
, callback
);
805 // Sets the |field|'s value to the value in |data|.
806 // Also sets the "autofilled" attribute, causing the background to be yellow.
807 void FillFormField(const FormFieldData
& data
,
808 bool is_initiating_node
,
809 blink::WebFormControlElement
* field
) {
811 if (data
.value
.empty())
814 if (!data
.is_autofilled
)
817 WebInputElement
* input_element
= toWebInputElement(field
);
818 if (IsCheckableElement(input_element
)) {
819 input_element
->setChecked(data
.is_checked
, true);
821 base::string16 value
= data
.value
;
822 if (IsTextInput(input_element
) || IsMonthInput(input_element
)) {
823 // If the maxlength attribute contains a negative value, maxLength()
824 // returns the default maxlength value.
825 TruncateString(&value
, input_element
->maxLength());
827 field
->setValue(value
, true);
830 field
->setAutofilled(true);
832 if (is_initiating_node
&&
833 ((IsTextInput(input_element
) || IsMonthInput(input_element
)) ||
834 IsTextAreaElement(*field
))) {
835 int length
= field
->value().length();
836 field
->setSelectionRange(length
, length
);
837 // Clear the current IME composition (the underline), if there is one.
838 field
->document().frame()->unmarkText();
842 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
843 // Also sets the "autofilled" attribute, causing the background to be yellow.
844 void PreviewFormField(const FormFieldData
& data
,
845 bool is_initiating_node
,
846 blink::WebFormControlElement
* field
) {
847 // Nothing to preview.
848 if (data
.value
.empty())
851 if (!data
.is_autofilled
)
854 // Preview input, textarea and select fields. For input fields, excludes
855 // checkboxes and radio buttons, as there is no provision for
856 // setSuggestedCheckedValue in WebInputElement.
857 WebInputElement
* input_element
= toWebInputElement(field
);
858 if (IsTextInput(input_element
) || IsMonthInput(input_element
)) {
859 // If the maxlength attribute contains a negative value, maxLength()
860 // returns the default maxlength value.
861 input_element
->setSuggestedValue(
862 data
.value
.substr(0, input_element
->maxLength()));
863 input_element
->setAutofilled(true);
864 } else if (IsTextAreaElement(*field
) || IsSelectElement(*field
)) {
865 field
->setSuggestedValue(data
.value
);
866 field
->setAutofilled(true);
869 if (is_initiating_node
&&
870 (IsTextInput(input_element
) || IsTextAreaElement(*field
))) {
871 // Select the part of the text that the user didn't type.
872 PreviewSuggestion(field
->suggestedValue(), field
->value(), field
);
876 // Extracts the fields from |control_elements| with |extract_mask| to
877 // |form_fields|. The extracted fields are also placed in |element_map|.
878 // |form_fields| and |element_map| should start out empty.
879 // |fields_extracted| should have as many elements as |control_elements|,
880 // initialized to false.
881 // Returns true if the number of fields extracted is within
882 // [1, kMaxParseableFields].
883 bool ExtractFieldsFromControlElements(
884 const WebVector
<WebFormControlElement
>& control_elements
,
885 ExtractMask extract_mask
,
886 ScopedVector
<FormFieldData
>* form_fields
,
887 std::vector
<bool>* fields_extracted
,
888 std::map
<WebFormControlElement
, FormFieldData
*>* element_map
) {
889 DCHECK(form_fields
->empty());
890 DCHECK(element_map
->empty());
891 DCHECK_EQ(control_elements
.size(), fields_extracted
->size());
893 for (size_t i
= 0; i
< control_elements
.size(); ++i
) {
894 const WebFormControlElement
& control_element
= control_elements
[i
];
896 if (!IsAutofillableElement(control_element
))
899 // Create a new FormFieldData, fill it out and map it to the field's name.
900 FormFieldData
* form_field
= new FormFieldData
;
901 WebFormControlElementToFormField(control_element
, extract_mask
, form_field
);
902 form_fields
->push_back(form_field
);
903 (*element_map
)[control_element
] = form_field
;
904 (*fields_extracted
)[i
] = true;
906 // To avoid overly expensive computation, we impose a maximum number of
908 if (form_fields
->size() > kMaxParseableFields
)
912 // Succeeded if fields were extracted.
913 return !form_fields
->empty();
916 // For each label element, get the corresponding form control element, use the
917 // form control element's name as a key into the
918 // <WebFormControlElement, FormFieldData> map to find the previously created
919 // FormFieldData and set the FormFieldData's label to the
920 // label.firstChild().nodeValue() of the label element.
921 void MatchLabelsAndFields(
922 const WebElementCollection
& labels
,
923 std::map
<WebFormControlElement
, FormFieldData
*>* element_map
) {
924 CR_DEFINE_STATIC_LOCAL(WebString
, kFor
, ("for"));
925 CR_DEFINE_STATIC_LOCAL(WebString
, kHidden
, ("hidden"));
927 for (WebElement item
= labels
.firstItem(); !item
.isNull();
928 item
= labels
.nextItem()) {
929 WebLabelElement label
= item
.to
<WebLabelElement
>();
930 WebFormControlElement field_element
=
931 label
.correspondingControl().to
<WebFormControlElement
>();
932 FormFieldData
* field_data
= nullptr;
934 if (field_element
.isNull()) {
935 // Sometimes site authors will incorrectly specify the corresponding
936 // field element's name rather than its id, so we compensate here.
937 base::string16 element_name
= label
.getAttribute(kFor
);
938 if (element_name
.empty())
940 // Look through the list for elements with this name. There can actually
941 // be more than one. In this case, the label may not be particularly
942 // useful, so just discard it.
943 for (const auto& iter
: *element_map
) {
944 if (iter
.second
->name
== element_name
) {
946 field_data
= nullptr;
949 field_data
= iter
.second
;
953 } else if (!field_element
.isFormControlElement() ||
954 field_element
.formControlType() == kHidden
) {
957 // Typical case: look up |field_data| in |element_map|.
958 auto iter
= element_map
->find(field_element
);
959 if (iter
== element_map
->end())
961 field_data
= iter
->second
;
967 base::string16 label_text
= FindChildText(label
);
969 // Concatenate labels because some sites might have multiple label
971 if (!field_data
->label
.empty() && !label_text
.empty())
972 field_data
->label
+= base::ASCIIToUTF16(" ");
973 field_data
->label
+= label_text
;
977 // Common function shared by WebFormElementToFormData() and
978 // UnownedFormElementsAndFieldSetsToFormData(). Either pass in:
979 // 1) |form_element| and an empty |fieldsets|.
981 // 2) a NULL |form_element|.
983 // If |field| is not NULL, then |form_control_element| should be not NULL.
984 bool FormOrFieldsetsToFormData(
985 const blink::WebFormElement
* form_element
,
986 const blink::WebFormControlElement
* form_control_element
,
987 const std::vector
<blink::WebElement
>& fieldsets
,
988 const WebVector
<WebFormControlElement
>& control_elements
,
989 ExtractMask extract_mask
,
991 FormFieldData
* field
) {
992 CR_DEFINE_STATIC_LOCAL(WebString
, kLabel
, ("label"));
995 DCHECK(fieldsets
.empty());
997 DCHECK(form_control_element
);
999 // A map from a FormFieldData's name to the FormFieldData itself.
1000 std::map
<WebFormControlElement
, FormFieldData
*> element_map
;
1002 // The extracted FormFields. We use pointers so we can store them in
1004 ScopedVector
<FormFieldData
> form_fields
;
1006 // A vector of bools that indicate whether each field in the form meets the
1007 // requirements and thus will be in the resulting |form|.
1008 std::vector
<bool> fields_extracted(control_elements
.size(), false);
1010 if (!ExtractFieldsFromControlElements(control_elements
, extract_mask
,
1011 &form_fields
, &fields_extracted
,
1017 // Loop through the label elements inside the form element. For each label
1018 // element, get the corresponding form control element, use the form control
1019 // element's name as a key into the <name, FormFieldData> map to find the
1020 // previously created FormFieldData and set the FormFieldData's label to the
1021 // label.firstChild().nodeValue() of the label element.
1022 WebElementCollection labels
=
1023 form_element
->getElementsByHTMLTagName(kLabel
);
1024 DCHECK(!labels
.isNull());
1025 MatchLabelsAndFields(labels
, &element_map
);
1027 // Same as the if block, but for all the labels in fieldsets.
1028 for (size_t i
= 0; i
< fieldsets
.size(); ++i
) {
1029 WebElementCollection labels
=
1030 fieldsets
[i
].getElementsByHTMLTagName(kLabel
);
1031 DCHECK(!labels
.isNull());
1032 MatchLabelsAndFields(labels
, &element_map
);
1036 // Loop through the form control elements, extracting the label text from
1037 // the DOM. We use the |fields_extracted| vector to make sure we assign the
1038 // extracted label to the correct field, as it's possible |form_fields| will
1039 // not contain all of the elements in |control_elements|.
1040 for (size_t i
= 0, field_idx
= 0;
1041 i
< control_elements
.size() && field_idx
< form_fields
.size(); ++i
) {
1042 // This field didn't meet the requirements, so don't try to find a label
1044 if (!fields_extracted
[i
])
1047 const WebFormControlElement
& control_element
= control_elements
[i
];
1048 if (form_fields
[field_idx
]->label
.empty())
1049 form_fields
[field_idx
]->label
= InferLabelForElement(control_element
);
1050 TruncateString(&form_fields
[field_idx
]->label
, kMaxDataLength
);
1052 if (field
&& *form_control_element
== control_element
)
1053 *field
= *form_fields
[field_idx
];
1058 // Copy the created FormFields into the resulting FormData object.
1059 for (const auto& iter
: form_fields
)
1060 form
->fields
.push_back(*iter
);
1064 bool UnownedFormElementsAndFieldSetsToFormData(
1065 const std::vector
<blink::WebElement
>& fieldsets
,
1066 const std::vector
<blink::WebFormControlElement
>& control_elements
,
1067 const blink::WebFormControlElement
* element
,
1068 const blink::WebDocument
& document
,
1069 ExtractMask extract_mask
,
1071 FormFieldData
* field
) {
1072 form
->origin
= document
.url();
1073 form
->is_form_tag
= false;
1075 return FormOrFieldsetsToFormData(nullptr, element
, fieldsets
,
1076 control_elements
, extract_mask
, form
, field
);
1081 const size_t kMaxParseableFields
= 200;
1083 bool IsMonthInput(const WebInputElement
* element
) {
1084 CR_DEFINE_STATIC_LOCAL(WebString
, kMonth
, ("month"));
1085 return element
&& !element
->isNull() && element
->formControlType() == kMonth
;
1088 // All text fields, including password fields, should be extracted.
1089 bool IsTextInput(const WebInputElement
* element
) {
1090 return element
&& !element
->isNull() && element
->isTextField();
1093 bool IsSelectElement(const WebFormControlElement
& element
) {
1094 // Static for improved performance.
1095 CR_DEFINE_STATIC_LOCAL(WebString
, kSelectOne
, ("select-one"));
1096 return !element
.isNull() && element
.formControlType() == kSelectOne
;
1099 bool IsTextAreaElement(const WebFormControlElement
& element
) {
1100 // Static for improved performance.
1101 CR_DEFINE_STATIC_LOCAL(WebString
, kTextArea
, ("textarea"));
1102 return !element
.isNull() && element
.formControlType() == kTextArea
;
1105 bool IsCheckableElement(const WebInputElement
* element
) {
1106 if (!element
|| element
->isNull())
1109 return element
->isCheckbox() || element
->isRadioButton();
1112 bool IsAutofillableInputElement(const WebInputElement
* element
) {
1113 return IsTextInput(element
) ||
1114 IsMonthInput(element
) ||
1115 IsCheckableElement(element
);
1118 const base::string16
GetFormIdentifier(const WebFormElement
& form
) {
1119 base::string16 identifier
= form
.name();
1120 CR_DEFINE_STATIC_LOCAL(WebString
, kId
, ("id"));
1121 if (identifier
.empty())
1122 identifier
= form
.getAttribute(kId
);
1127 bool IsWebNodeVisible(const blink::WebNode
& node
) {
1128 // TODO(esprehn): This code doesn't really check if the node is visible, just
1129 // if the node takes up space in the layout. Does it want to check opacity,
1130 // transform, and visibility too?
1131 if (!node
.isElementNode())
1133 const WebElement element
= node
.toConst
<WebElement
>();
1134 return element
.hasNonEmptyLayoutSize();
1137 std::vector
<blink::WebFormControlElement
> ExtractAutofillableElementsFromSet(
1138 const WebVector
<WebFormControlElement
>& control_elements
) {
1139 std::vector
<blink::WebFormControlElement
> autofillable_elements
;
1140 for (size_t i
= 0; i
< control_elements
.size(); ++i
) {
1141 WebFormControlElement element
= control_elements
[i
];
1142 if (!IsAutofillableElement(element
))
1145 autofillable_elements
.push_back(element
);
1147 return autofillable_elements
;
1150 std::vector
<WebFormControlElement
> ExtractAutofillableElementsInForm(
1151 const WebFormElement
& form_element
) {
1152 WebVector
<WebFormControlElement
> control_elements
;
1153 form_element
.getFormControlElements(control_elements
);
1155 return ExtractAutofillableElementsFromSet(control_elements
);
1158 void WebFormControlElementToFormField(const WebFormControlElement
& element
,
1159 ExtractMask extract_mask
,
1160 FormFieldData
* field
) {
1162 DCHECK(!element
.isNull());
1163 CR_DEFINE_STATIC_LOCAL(WebString
, kAutocomplete
, ("autocomplete"));
1164 CR_DEFINE_STATIC_LOCAL(WebString
, kRole
, ("role"));
1166 // The label is not officially part of a WebFormControlElement; however, the
1167 // labels for all form control elements are scraped from the DOM and set in
1168 // WebFormElementToFormData.
1169 field
->name
= element
.nameForAutofill();
1170 field
->form_control_type
= element
.formControlType().utf8();
1171 field
->autocomplete_attribute
= element
.getAttribute(kAutocomplete
).utf8();
1172 if (field
->autocomplete_attribute
.size() > kMaxDataLength
) {
1173 // Discard overly long attribute values to avoid DOS-ing the browser
1174 // process. However, send over a default string to indicate that the
1175 // attribute was present.
1176 field
->autocomplete_attribute
= "x-max-data-length-exceeded";
1178 if (base::LowerCaseEqualsASCII(
1179 base::StringPiece16(element
.getAttribute(kRole
)), "presentation"))
1180 field
->role
= FormFieldData::ROLE_ATTRIBUTE_PRESENTATION
;
1182 if (!IsAutofillableElement(element
))
1185 const WebInputElement
* input_element
= toWebInputElement(&element
);
1186 if (IsAutofillableInputElement(input_element
) ||
1187 IsTextAreaElement(element
) ||
1188 IsSelectElement(element
)) {
1189 field
->is_autofilled
= element
.isAutofilled();
1190 field
->is_focusable
= element
.isFocusable();
1191 field
->should_autocomplete
= element
.autoComplete();
1192 field
->text_direction
= element
.directionForFormData() ==
1193 "rtl" ? base::i18n::RIGHT_TO_LEFT
: base::i18n::LEFT_TO_RIGHT
;
1196 if (IsAutofillableInputElement(input_element
)) {
1197 if (IsTextInput(input_element
))
1198 field
->max_length
= input_element
->maxLength();
1200 field
->is_checkable
= IsCheckableElement(input_element
);
1201 field
->is_checked
= input_element
->isChecked();
1202 } else if (IsTextAreaElement(element
)) {
1203 // Nothing more to do in this case.
1204 } else if (extract_mask
& EXTRACT_OPTIONS
) {
1205 // Set option strings on the field if available.
1206 DCHECK(IsSelectElement(element
));
1207 const WebSelectElement select_element
= element
.toConst
<WebSelectElement
>();
1208 GetOptionStringsFromElement(select_element
,
1209 &field
->option_values
,
1210 &field
->option_contents
);
1213 if (!(extract_mask
& EXTRACT_VALUE
))
1216 base::string16 value
= element
.value();
1218 if (IsSelectElement(element
) && (extract_mask
& EXTRACT_OPTION_TEXT
)) {
1219 const WebSelectElement select_element
= element
.toConst
<WebSelectElement
>();
1220 // Convert the |select_element| value to text if requested.
1221 WebVector
<WebElement
> list_items
= select_element
.listItems();
1222 for (size_t i
= 0; i
< list_items
.size(); ++i
) {
1223 if (IsOptionElement(list_items
[i
])) {
1224 const WebOptionElement option_element
=
1225 list_items
[i
].toConst
<WebOptionElement
>();
1226 if (option_element
.value() == value
) {
1227 value
= option_element
.text();
1234 // Constrain the maximum data length to prevent a malicious site from DOS'ing
1235 // the browser: http://crbug.com/49332
1236 TruncateString(&value
, kMaxDataLength
);
1238 field
->value
= value
;
1241 bool WebFormElementToFormData(
1242 const blink::WebFormElement
& form_element
,
1243 const blink::WebFormControlElement
& form_control_element
,
1244 ExtractMask extract_mask
,
1246 FormFieldData
* field
) {
1247 const WebFrame
* frame
= form_element
.document().frame();
1251 form
->name
= GetFormIdentifier(form_element
);
1252 form
->origin
= frame
->document().url();
1253 form
->action
= frame
->document().completeURL(form_element
.action());
1255 // If the completed URL is not valid, just use the action we get from
1257 if (!form
->action
.is_valid())
1258 form
->action
= GURL(form_element
.action());
1260 WebVector
<WebFormControlElement
> control_elements
;
1261 form_element
.getFormControlElements(control_elements
);
1263 std::vector
<blink::WebElement
> dummy_fieldset
;
1264 return FormOrFieldsetsToFormData(&form_element
, &form_control_element
,
1265 dummy_fieldset
, control_elements
,
1266 extract_mask
, form
, field
);
1269 std::vector
<WebFormControlElement
>
1270 GetUnownedAutofillableFormFieldElements(
1271 const WebElementCollection
& elements
,
1272 std::vector
<WebElement
>* fieldsets
) {
1273 std::vector
<WebFormControlElement
> unowned_fieldset_children
;
1274 for (WebElement element
= elements
.firstItem();
1276 element
= elements
.nextItem()) {
1277 if (element
.isFormControlElement()) {
1278 WebFormControlElement control
= element
.to
<WebFormControlElement
>();
1279 if (control
.form().isNull())
1280 unowned_fieldset_children
.push_back(control
);
1283 if (fieldsets
&& element
.hasHTMLTagName("fieldset") &&
1284 !IsElementInsideFormOrFieldSet(element
)) {
1285 fieldsets
->push_back(element
);
1288 return ExtractAutofillableElementsFromSet(unowned_fieldset_children
);
1291 bool UnownedCheckoutFormElementsAndFieldSetsToFormData(
1292 const std::vector
<blink::WebElement
>& fieldsets
,
1293 const std::vector
<blink::WebFormControlElement
>& control_elements
,
1294 const blink::WebFormControlElement
* element
,
1295 const blink::WebDocument
& document
,
1296 ExtractMask extract_mask
,
1298 FormFieldData
* field
) {
1299 // Only attempt formless Autofill on checkout flows. This avoids the many
1300 // false positives found on the non-checkout web. See
1301 // http://crbug.com/462375. For now this early abort only applies to
1302 // English-language pages, because the regex is not translated. Note that
1303 // an empty "lang" attribute counts as English. A potential problem is that
1304 // this only checks document.title(), but should actually check the main
1305 // frame's title. Thus it may make bad decisions for iframes.
1306 WebElement html_element
= document
.documentElement();
1308 if (!html_element
.isNull())
1309 lang
= html_element
.getAttribute("lang").utf8();
1310 if ((lang
.empty() ||
1311 base::StartsWith(lang
, "en", base::CompareCase::INSENSITIVE_ASCII
)) &&
1312 !MatchesPattern(document
.title(),
1313 base::UTF8ToUTF16("payment|checkout|address|delivery|shipping"))) {
1317 return UnownedFormElementsAndFieldSetsToFormData(
1318 fieldsets
, control_elements
, element
, document
, extract_mask
, form
,
1322 bool UnownedPasswordFormElementsAndFieldSetsToFormData(
1323 const std::vector
<blink::WebElement
>& fieldsets
,
1324 const std::vector
<blink::WebFormControlElement
>& control_elements
,
1325 const blink::WebFormControlElement
* element
,
1326 const blink::WebDocument
& document
,
1327 ExtractMask extract_mask
,
1329 FormFieldData
* field
) {
1330 return UnownedFormElementsAndFieldSetsToFormData(
1331 fieldsets
, control_elements
, element
, document
, extract_mask
, form
,
1336 bool FindFormAndFieldForFormControlElement(const WebFormControlElement
& element
,
1338 FormFieldData
* field
) {
1339 if (!IsAutofillableElement(element
))
1342 ExtractMask extract_mask
=
1343 static_cast<ExtractMask
>(EXTRACT_VALUE
| EXTRACT_OPTIONS
);
1344 const WebFormElement form_element
= element
.form();
1345 if (form_element
.isNull()) {
1346 // No associated form, try the synthetic form for unowned form elements.
1347 WebDocument document
= element
.document();
1348 std::vector
<WebElement
> fieldsets
;
1349 std::vector
<WebFormControlElement
> control_elements
=
1350 GetUnownedAutofillableFormFieldElements(document
.all(), &fieldsets
);
1351 return UnownedCheckoutFormElementsAndFieldSetsToFormData(
1352 fieldsets
, control_elements
, &element
, document
, extract_mask
,
1356 return WebFormElementToFormData(form_element
,
1363 void FillForm(const FormData
& form
, const WebFormControlElement
& element
) {
1364 WebFormElement form_element
= element
.form();
1365 if (form_element
.isNull()) {
1366 ForEachMatchingUnownedFormField(element
,
1368 FILTER_ALL_NON_EDITABLE_ELEMENTS
,
1369 false, /* dont force override */
1374 ForEachMatchingFormField(form_element
,
1377 FILTER_ALL_NON_EDITABLE_ELEMENTS
,
1378 false, /* dont force override */
1382 void FillFormIncludingNonFocusableElements(const FormData
& form_data
,
1383 const WebFormElement
& form_element
) {
1384 if (form_element
.isNull()) {
1389 FieldFilterMask filter_mask
= static_cast<FieldFilterMask
>(
1390 FILTER_DISABLED_ELEMENTS
| FILTER_READONLY_ELEMENTS
);
1391 ForEachMatchingFormField(form_element
,
1395 true, /* force override */
1399 void PreviewForm(const FormData
& form
, const WebFormControlElement
& element
) {
1400 WebFormElement form_element
= element
.form();
1401 if (form_element
.isNull()) {
1402 ForEachMatchingUnownedFormField(element
,
1404 FILTER_ALL_NON_EDITABLE_ELEMENTS
,
1405 false, /* dont force override */
1410 ForEachMatchingFormField(form_element
,
1413 FILTER_ALL_NON_EDITABLE_ELEMENTS
,
1414 false, /* dont force override */
1418 bool ClearPreviewedFormWithElement(const WebFormControlElement
& element
,
1419 bool was_autofilled
) {
1420 WebFormElement form_element
= element
.form();
1421 std::vector
<WebFormControlElement
> control_elements
;
1422 if (form_element
.isNull()) {
1423 control_elements
= GetUnownedAutofillableFormFieldElements(
1424 element
.document().all(), nullptr);
1425 if (!IsElementInControlElementSet(element
, control_elements
))
1428 control_elements
= ExtractAutofillableElementsInForm(form_element
);
1431 for (size_t i
= 0; i
< control_elements
.size(); ++i
) {
1432 // There might be unrelated elements in this form which have already been
1433 // auto-filled. For example, the user might have already filled the address
1434 // part of a form and now be dealing with the credit card section. We only
1435 // want to reset the auto-filled status for fields that were previewed.
1436 WebFormControlElement control_element
= control_elements
[i
];
1438 // Only text input, textarea and select elements can be previewed.
1439 WebInputElement
* input_element
= toWebInputElement(&control_element
);
1440 if (!IsTextInput(input_element
) &&
1441 !IsMonthInput(input_element
) &&
1442 !IsTextAreaElement(control_element
) &&
1443 !IsSelectElement(control_element
))
1446 // If the element is not auto-filled, we did not preview it,
1447 // so there is nothing to reset.
1448 if (!control_element
.isAutofilled())
1451 if ((IsTextInput(input_element
) ||
1452 IsMonthInput(input_element
) ||
1453 IsTextAreaElement(control_element
) ||
1454 IsSelectElement(control_element
)) &&
1455 control_element
.suggestedValue().isEmpty())
1458 // Clear the suggested value. For the initiating node, also restore the
1460 if (IsTextInput(input_element
) || IsMonthInput(input_element
) ||
1461 IsTextAreaElement(control_element
)) {
1462 control_element
.setSuggestedValue(WebString());
1463 bool is_initiating_node
= (element
== control_element
);
1464 if (is_initiating_node
) {
1465 control_element
.setAutofilled(was_autofilled
);
1466 // Clearing the suggested value in the focused node (above) can cause
1467 // selection to be lost. We force selection range to restore the text
1469 int length
= control_element
.value().length();
1470 control_element
.setSelectionRange(length
, length
);
1472 control_element
.setAutofilled(false);
1474 } else if (IsSelectElement(control_element
)) {
1475 control_element
.setSuggestedValue(WebString());
1476 control_element
.setAutofilled(false);
1483 bool IsWebpageEmpty(const blink::WebFrame
* frame
) {
1484 blink::WebDocument document
= frame
->document();
1486 return IsWebElementEmpty(document
.head()) &&
1487 IsWebElementEmpty(document
.body());
1490 bool IsWebElementEmpty(const blink::WebElement
& root
) {
1491 CR_DEFINE_STATIC_LOCAL(WebString
, kScript
, ("script"));
1492 CR_DEFINE_STATIC_LOCAL(WebString
, kMeta
, ("meta"));
1493 CR_DEFINE_STATIC_LOCAL(WebString
, kTitle
, ("title"));
1498 for (WebNode child
= root
.firstChild();
1500 child
= child
.nextSibling()) {
1501 if (child
.isTextNode() &&
1502 !base::ContainsOnlyChars(child
.nodeValue().utf8(),
1503 base::kWhitespaceASCII
))
1506 if (!child
.isElementNode())
1509 WebElement element
= child
.to
<WebElement
>();
1510 if (!element
.hasHTMLTagName(kScript
) &&
1511 !element
.hasHTMLTagName(kMeta
) &&
1512 !element
.hasHTMLTagName(kTitle
))
1518 gfx::RectF
GetScaledBoundingBox(float scale
, WebElement
* element
) {
1519 gfx::Rect
bounding_box(element
->boundsInViewportSpace());
1520 return gfx::RectF(bounding_box
.x() * scale
,
1521 bounding_box
.y() * scale
,
1522 bounding_box
.width() * scale
,
1523 bounding_box
.height() * scale
);
1526 void PreviewSuggestion(const base::string16
& suggestion
,
1527 const base::string16
& user_input
,
1528 blink::WebFormControlElement
* input_element
) {
1529 size_t selection_start
= user_input
.length();
1530 if (IsFeatureSubstringMatchEnabled()) {
1531 size_t offset
= GetTextSelectionStart(suggestion
, user_input
, false);
1532 // Zero selection start is for password manager, which can show usernames
1533 // that do not begin with the user input value.
1534 selection_start
= (offset
== base::string16::npos
) ? 0 : offset
;
1537 input_element
->setSelectionRange(selection_start
, suggestion
.length());
1540 } // namespace autofill