1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 import { FormAutofill } from "resource://autofill/FormAutofill.sys.mjs";
6 import { HeuristicsRegExp } from "resource://gre/modules/shared/HeuristicsRegExp.sys.mjs";
9 ChromeUtils.defineESModuleGetters(lazy, {
10 CreditCard: "resource://gre/modules/CreditCard.sys.mjs",
11 CreditCardRulesets: "resource://gre/modules/shared/CreditCardRuleset.sys.mjs",
12 FieldDetail: "resource://gre/modules/shared/FieldScanner.sys.mjs",
13 FieldScanner: "resource://gre/modules/shared/FieldScanner.sys.mjs",
14 FormAutofillUtils: "resource://gre/modules/shared/FormAutofillUtils.sys.mjs",
15 LabelUtils: "resource://gre/modules/shared/LabelUtils.sys.mjs",
16 MLAutofill: "resource://autofill/MLAutofill.sys.mjs",
20 * To help us classify sections that can appear only N times in a row.
21 * For example, the only time multiple cc-number fields are valid is when
22 * there are four of these fields in a row.
23 * Otherwise, multiple cc-number fields should be in separate sections.
25 const MULTI_N_FIELD_NAMES = {
33 * Returns the autocomplete information of fields according to heuristics.
35 export const FormAutofillHeuristics = {
36 RULES: HeuristicsRegExp.getRules(),
37 LABEL_RULES: HeuristicsRegExp.getLabelRules(),
39 CREDIT_CARD_FIELDNAMES: [],
40 ADDRESS_FIELDNAMES: [],
42 * Try to find a contiguous sub-array within an array.
44 * @param {Array} array
45 * @param {Array} subArray
48 * Return whether subArray was found within the array or not.
50 _matchContiguousSubArray(array, subArray) {
51 return array.some((elm, i) =>
52 subArray.every((sElem, j) => sElem == array[i + j])
57 * Try to find the field that is look like a month select.
59 * @param {DOMElement} element
61 * Return true if we observe the trait of month select in
62 * the current element.
64 _isExpirationMonthLikely(element) {
65 if (!HTMLSelectElement.isInstance(element)) {
69 const options = [...element.options];
70 const desiredValues = Array(12)
72 .map((v, i) => v + i);
74 // The number of month options shouldn't be less than 12 or larger than 13
75 // including the default option.
76 if (options.length < 12 || options.length > 13) {
81 this._matchContiguousSubArray(
82 options.map(e => +e.value),
85 this._matchContiguousSubArray(
86 options.map(e => +e.label),
93 * Try to find the field that is look like a year select.
95 * @param {DOMElement} element
97 * Return true if we observe the trait of year select in
98 * the current element.
100 _isExpirationYearLikely(element) {
101 if (!HTMLSelectElement.isInstance(element)) {
105 const options = [...element.options];
106 // A normal expiration year select should contain at least the last three years
108 const curYear = new Date().getFullYear();
109 const desiredValues = Array(3)
111 .map((v, i) => v + curYear + i);
114 this._matchContiguousSubArray(
115 options.map(e => +e.value),
118 this._matchContiguousSubArray(
119 options.map(e => +e.label),
126 * This function handles the case when two adjacent fields are incorrectly
127 * identified with the same field name. Currently, only given-name and
128 * family-name are handled as possible errors.
130 * @param {FieldScanner} scanner
131 * The current parsing status for all elements
133 * Return true if any field is recognized and updated, otherwise false.
135 _parseNameFieldsContent(scanner, fieldDetail) {
136 const TARGET_FIELDS = ["given-name", "family-name"];
137 if (!TARGET_FIELDS.includes(fieldDetail.fieldName)) {
141 let idx = scanner.parsingIndex;
142 const detailBefore = scanner.getFieldDetailByIndex(idx - 1);
143 if (fieldDetail.fieldName == detailBefore?.fieldName) {
145 fieldDetail.fieldName == TARGET_FIELDS[0]
149 // If the second field matches both field names, or both fields match
150 // both field names, then we change the second field, since the author
151 // was more likely to miscopy the second field from the first. However,
152 // if the earlier field only matches, then we change the first field.
154 this._findMatchedFieldNames(fieldDetail.element, [otherFieldName])
157 scanner.updateFieldName(idx, otherFieldName);
159 this._findMatchedFieldNames(detailBefore.element, [otherFieldName])
162 scanner.updateFieldName(idx - 1, otherFieldName);
165 scanner.parsingIndex++;
173 * In some languages such French (nom) and German (Name), name can mean either family name or
174 * full name in a form, depending on the context. We want to be sure that if "name" is
175 * detected in the context of "family-name" or "given-name", it is updated accordingly.
177 * Look for "given-name", "family-name", and "name" fields. If any two of those fields are detected
178 * and one of them is "name", then replace "name" with "family-name" if "name" is accompanied by
179 * "given-name" or vise-versa.
181 * @param {FieldScanner} scanner
182 * The current parsing status for all elements
184 * Return true if any field is recognized and updated, otherwise false.
186 _parseNameFields(scanner, fieldDetail) {
187 const TARGET_FIELDS = ["name", "given-name", "family-name"];
189 if (!TARGET_FIELDS.includes(fieldDetail.fieldName)) {
196 for (let idx = scanner.parsingIndex; ; idx++) {
197 const detail = scanner.getFieldDetailByIndex(idx);
198 if (!TARGET_FIELDS.includes(detail?.fieldName)) {
201 if (detail.fieldName === "name") {
207 if (nameIndex != -1 && fields.length == 2) {
208 //if name is detected and the other of the two fields detected is 'given-name'
209 //then update name to 'name' to 'family-name'
211 fields[0].fieldName == "given-name" ||
212 fields[1].fieldName == "given-name"
214 scanner.updateFieldName(nameIndex, "family-name");
215 //if name is detected and the other of the two fields detected is 'family-name'
216 //then update name to 'name' to 'given-name'
218 fields[0].fieldName == "family-name" ||
219 fields[1].fieldName == "family-name"
221 scanner.updateFieldName(nameIndex, "given-name");
226 scanner.parsingIndex += fields.length;
234 * Try to match the telephone related fields to the grammar
235 * list to see if there is any valid telephone set and correct their
238 * @param {FieldScanner} scanner
239 * The current parsing status for all elements
241 * Return true if there is any field can be recognized in the parser,
244 _parsePhoneFields(scanner, _fieldDetail) {
246 const GRAMMARS = this.PHONE_FIELD_GRAMMARS;
248 function isGrammarSeparator(index) {
249 return !GRAMMARS[index][0];
252 const savedIndex = scanner.parsingIndex;
253 for (let ruleFrom = 0; ruleFrom < GRAMMARS.length; ) {
254 const detailStart = scanner.parsingIndex;
255 let ruleTo = ruleFrom;
256 for (let count = 0; ruleTo < GRAMMARS.length; ruleTo++, count++) {
257 // Bail out when reaching the end of the current set of grammars
258 // or there are no more elements to parse
260 isGrammarSeparator(ruleTo) ||
261 !scanner.elementExisting(detailStart + count)
266 const [category, , length] = GRAMMARS[ruleTo];
267 const detail = scanner.getFieldDetailByIndex(detailStart + count);
269 // If the field is not what this grammar rule is interested in, skip processing.
272 detail.fieldName != category ||
273 detail.reason == "autocomplete"
278 const element = detail.element;
279 if (length && (!element.maxLength || length < element.maxLength)) {
284 // if we reach the grammar separator, that means all the previous rules are matched.
285 // Set the matchingResult so we update field names accordingly.
286 if (isGrammarSeparator(ruleTo)) {
287 matchingResult = { ruleFrom, ruleTo };
291 // Fast forward to the next rule set.
292 for (; ruleFrom < GRAMMARS.length; ) {
293 if (isGrammarSeparator(ruleFrom++)) {
299 if (matchingResult) {
300 const { ruleFrom, ruleTo } = matchingResult;
301 for (let i = ruleFrom; i < ruleTo; i++) {
302 scanner.updateFieldName(scanner.parsingIndex, GRAMMARS[i][1]);
303 scanner.parsingIndex++;
307 // If the previous parsed field is a "tel" field, run heuristic to see
308 // if the current field is a "tel-extension" field
309 const field = scanner.getFieldDetailByIndex(scanner.parsingIndex);
310 if (field && field.reason != "autocomplete") {
311 const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);
314 lazy.FormAutofillUtils.getCategoryFromFieldName(prev.fieldName) == "tel"
316 const regExpTelExtension = new RegExp(
317 "\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT
320 if (this._matchRegexp(field.element, regExpTelExtension)) {
321 scanner.updateFieldName(scanner.parsingIndex, "tel-extension");
322 scanner.parsingIndex++;
326 return savedIndex != scanner.parsingIndex;
330 * If this is a house number field and there is no address-line1 or
331 * street-address field, change the house number field to address-line1.
333 * @param {FieldScanner} scanner
334 * The current parsing status for all elements
336 * Return true if there is any field can be recognized in the parser,
339 _parseHouseNumberFields(scanner, fieldDetail) {
340 if (fieldDetail?.fieldName == "address-housenumber") {
341 const savedIndex = scanner.parsingIndex;
342 for (let idx = 0; !scanner.parsingFinished; idx++) {
343 const detail = scanner.getFieldDetailByIndex(idx);
348 if (["address-line1", "street-address"].includes(detail?.fieldName)) {
353 // Return false so additional address handling still gets performed.
354 scanner.updateFieldName(savedIndex, "street-address");
361 * Try to find the correct address-line[1-3] sequence and correct their field
364 * @param {FieldScanner} scanner
365 * The current parsing status for all elements
367 * Return true if there is any field can be recognized in the parser,
370 _parseStreetAddressFields(scanner, _fieldDetail) {
371 const INTERESTED_FIELDS = [
378 let houseNumberFields = 0;
380 // We need to build a list of the address fields. A list of the indicies
381 // is also needed as the fields with a given name can change positions
382 // during the update.
384 const fieldIndicies = [];
385 for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) {
386 const detail = scanner.getFieldDetailByIndex(idx);
388 // Skip over any house number fields. There should only be zero or one,
389 // but we'll skip over them all anyway.
390 if (detail?.fieldName == "address-housenumber") {
395 if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
399 fieldIndicies.push(idx);
402 if (!fields.length) {
406 switch (fields.length) {
409 fields[0].reason != "autocomplete" &&
410 ["address-line2", "address-line3"].includes(fields[0].fieldName)
412 // If an earlier address field was already found, ignore any
413 // address-related fields from the OTHER_ADDRESS_FIELDS
414 // list since those can appear in-between the address-level1
415 // and additional address info fields. If no address field
416 // exists, update the field to be address-line1.
417 const OTHER_ADDRESS_FIELDS = [
423 let canUpdate = true;
425 for (let idx = scanner.parsingIndex - 1; idx >= 0; idx--) {
426 const detail = scanner.getFieldDetailByIndex(idx);
428 detail?.fieldName == "street-address" ||
429 detail?.fieldName == "address-line1" ||
430 detail?.fieldName == "address-housenumber"
436 if (!OTHER_ADDRESS_FIELDS.includes(detail?.fieldName)) {
442 scanner.updateFieldName(fieldIndicies[0], "address-line1");
447 if (fields[0].reason == "autocomplete") {
449 fields[0].fieldName == "street-address" &&
450 (fields[1].fieldName == "address-line2" ||
451 fields[1].reason != "autocomplete")
453 scanner.updateFieldName(fieldIndicies[0], "address-line1", true);
456 scanner.updateFieldName(fieldIndicies[0], "address-line1");
458 scanner.updateFieldName(fieldIndicies[1], "address-line2");
462 scanner.updateFieldName(fieldIndicies[0], "address-line1");
463 scanner.updateFieldName(fieldIndicies[1], "address-line2");
464 scanner.updateFieldName(fieldIndicies[2], "address-line3");
468 scanner.parsingIndex += fields.length + houseNumberFields;
472 _parseAddressFields(scanner, fieldDetail) {
473 const INTERESTED_FIELDS = ["address-level1", "address-level2"];
475 if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
480 for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) {
481 const detail = scanner.getFieldDetailByIndex(idx);
482 if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
488 if (!fields.length) {
492 // State & City(address-level2)
493 if (fields.length == 1) {
494 if (fields[0].fieldName == "address-level2") {
495 const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);
496 if (prev && !prev.fieldName && prev.localName == "select") {
497 scanner.updateFieldName(scanner.parsingIndex - 1, "address-level1");
498 scanner.parsingIndex += 1;
501 const next = scanner.getFieldDetailByIndex(scanner.parsingIndex + 1);
502 if (next && !next.fieldName && next.localName == "select") {
503 scanner.updateFieldName(scanner.parsingIndex + 1, "address-level1");
504 scanner.parsingIndex += 2;
510 scanner.parsingIndex += fields.length;
515 * Try to look for expiration date fields and revise the field names if needed.
517 * @param {FieldScanner} scanner
518 * The current parsing status for all elements
520 * Return true if there is any field can be recognized in the parser,
523 _parseCreditCardExpiryFields(scanner, fieldDetail) {
524 const INTERESTED_FIELDS = ["cc-exp", "cc-exp-month", "cc-exp-year"];
526 if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
531 for (let idx = scanner.parsingIndex; ; idx++) {
532 const detail = scanner.getFieldDetailByIndex(idx);
533 if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
539 // Don't process the fields if expiration month and expiration year are already
540 // matched by regex in correct order.
542 (fields.length == 1 && fields[0].fieldName == "cc-exp") ||
543 (fields.length == 2 &&
544 fields[0].fieldName == "cc-exp-month" &&
545 fields[1].fieldName == "cc-exp-year")
547 scanner.parsingIndex += fields.length;
551 const prevCCFields = new Set();
552 for (let idx = scanner.parsingIndex - 1; ; idx--) {
553 const detail = scanner.getFieldDetailByIndex(idx);
555 lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
560 prevCCFields.add(detail.fieldName);
562 // We update the "cc-exp-*" fields to correct "cc-ex-*" fields order when
563 // the following conditions are met:
564 // 1. The previous elements are identified as credit card fields and
565 // cc-number is in it
566 // 2. There is no "cc-exp-*" fields in the previous credit card elements
568 ["cc-number", "cc-name"].some(f => prevCCFields.has(f)) &&
569 !["cc-exp", "cc-exp-month", "cc-exp-year"].some(f => prevCCFields.has(f))
571 if (fields.length == 1) {
572 scanner.updateFieldName(scanner.parsingIndex, "cc-exp");
573 } else if (fields.length == 2) {
574 scanner.updateFieldName(scanner.parsingIndex, "cc-exp-month");
575 scanner.updateFieldName(scanner.parsingIndex + 1, "cc-exp-year");
577 scanner.parsingIndex += fields.length;
581 // Set field name to null as it failed to match any patterns.
582 for (let idx = 0; idx < fields.length; idx++) {
583 scanner.updateFieldName(scanner.parsingIndex + idx, null);
588 _parseCreditCardNumberFields(scanner, fieldDetail) {
589 const INTERESTED_FIELDS = ["cc-number"];
591 if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
595 const fieldDetails = [];
596 for (let idx = scanner.parsingIndex; ; idx++) {
597 const detail = scanner.getFieldDetailByIndex(idx);
598 if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
601 fieldDetails.push(detail);
604 // This rule only applies when all the fields are visible
605 if (fieldDetails.some(field => !field.isVisible)) {
606 scanner.parsingIndex += fieldDetails.length;
610 // This is the heuristic to handle special cases where we can have multiple
611 // fields in one section, but only if the field has appeared N times in a row.
612 // For example, websites can use 4 consecutive 4-digit `cc-number` fields
613 // instead of one 16-digit `cc-number` field.
614 const N = MULTI_N_FIELD_NAMES["cc-number"];
615 if (fieldDetails.length == N) {
616 fieldDetails.forEach((fd, index) => {
617 // part starts with 1
620 scanner.parsingIndex += fieldDetails.length;
627 * Look for cc-*-name fields when *-name field is present
629 * @param {FieldScanner} scanner
630 * The current parsing status for all elements
632 * Return true if there is any field can be recognized in the parser,
635 _parseCreditCardNameFields(scanner, fieldDetail) {
636 const INTERESTED_FIELDS = [
643 if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
648 for (let idx = scanner.parsingIndex; ; idx++) {
649 const detail = scanner.getFieldDetailByIndex(idx);
650 if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
656 const prevCCFields = new Set();
657 for (let idx = scanner.parsingIndex - 1; ; idx--) {
658 const detail = scanner.getFieldDetailByIndex(idx);
660 lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
665 prevCCFields.add(detail.fieldName);
668 const subsequentCCFields = new Set();
670 for (let idx = scanner.parsingIndex + fields.length; ; idx++) {
671 const detail = scanner.getFieldDetailByIndex(idx);
673 // For updates we only check subsequent fields that are not of type address or do not have an
674 // alternative field name that is of type address, to avoid falsely updating address
675 // form name fields to cc-*-name.
676 lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
678 (detail?.alternativeFieldName !== undefined &&
679 lazy.FormAutofillUtils.getCategoryFromFieldName(
680 detail?.alternativeFieldName
685 subsequentCCFields.add(detail.fieldName);
689 scanner.getFieldDetailByIndex(scanner.parsingIndex + 1) === null;
691 // We update the "name" fields to "cc-name" fields when the following
692 // conditions are met:
693 // 1. The preceding fields are identified as credit card fields and
694 // contain the "cc-number" field.
695 // 2. No "cc-name-*" field is found among the preceding credit card fields.
696 // 3. The "cc-csc" field is either not present among the preceding credit card fields,
697 // or the current field is the last field in the form. This condition is in place
698 // because "cc-csc" is often the last field in a credit card form, and we want to
699 // avoid mistakenly updating fields in subsequent address forms.
701 (["cc-number"].some(f => prevCCFields.has(f)) &&
702 !["cc-name", "cc-given-name", "cc-family-name"].some(f =>
705 (isLastField || !prevCCFields.has("cc-csc"))) || // 4. Or we update when current name field is followed by
706 // creditcard form fields that contain cc-number
707 // and no cc-*-name field is detected
708 (["cc-number"].some(f => subsequentCCFields.has(f)) &&
709 !["cc-name", "cc-given-name", "cc-family-name"].some(f =>
710 subsequentCCFields.has(f)
713 // If there is only one field, assume the name field a `cc-name` field
714 if (fields.length == 1) {
715 scanner.updateFieldName(scanner.parsingIndex, `cc-name`);
716 scanner.parsingIndex += 1;
718 // update *-name to cc-*-name
719 for (const field of fields) {
720 scanner.updateFieldName(
721 scanner.parsingIndex,
722 `cc-${field.fieldName}`
724 scanner.parsingIndex += 1;
734 * If the given field is of a different type than the previous
735 * field, use the alternate field name instead.
737 _checkForAlternateField(scanner, fieldDetail) {
738 if (fieldDetail.alternativeFieldName) {
739 const previousField = scanner.getFieldDetailByIndex(
740 scanner.parsingIndex - 1
743 const preIsCC = lazy.FormAutofillUtils.isCreditCardField(
744 previousField.fieldName
746 const curIsCC = lazy.FormAutofillUtils.isCreditCardField(
747 fieldDetail.fieldName
750 // If the current type is different from the previous element's type, use
751 // the alternative fieldname instead.
752 if (preIsCC != curIsCC) {
753 fieldDetail.fieldName = fieldDetail.alternativeFieldName;
754 fieldDetail.reason = "update-heuristic-alternate";
761 * This function should provide all field details of a form which are placed
762 * in the belonging section. The details contain the autocomplete info
763 * (e.g. fieldName, section, etc).
765 * @param {formLike} formLike
766 * the elements in this form to be predicted the field info.
767 * @param {boolean} ignoreInvisibleInput
768 * True to NOT run heuristics on invisible <input> fields.
769 * @returns {Array<FormSection>}
770 * all sections within its field details in the form.
772 getFormInfo(formLike, ignoreInvisibleInput) {
773 const elements = Array.from(formLike.elements).filter(element =>
774 lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element)
779 if (FormAutofill.isMLExperimentEnabled && elements.length) {
780 closestHeaders = lazy.MLAutofill.closestHeaderAbove(elements);
781 closestButtons = lazy.MLAutofill.closestButtonBelow(elements);
784 const fieldDetails = [];
785 for (let idx = 0; idx < elements.length; idx++) {
786 const element = elements[idx];
787 // Ignore invisible <input>, we still keep invisible <select> since
788 // some websites implements their custom dropdown and use invisible <select>
789 // to store the value.
790 const isVisible = lazy.FormAutofillUtils.isFieldVisible(element);
792 !HTMLSelectElement.isInstance(element) &&
799 const [fieldName, inferInfo] = this.inferFieldInfo(element, elements);
801 // For cases where the heuristic has determined the field name without
802 // running Fathom, still run Fathom so we can compare the results between
803 // Fathom and the ML model. Note that this is only enabled when the ML experiment
806 FormAutofill.isMLExperimentEnabled &&
807 inferInfo.fathomConfidence == undefined
809 let fields = this._getPossibleFieldNames(element);
810 fields = fields.filter(r => lazy.CreditCardRulesets.types.includes(r));
811 const [label, score] = this.getFathomField(element, fields, elements);
812 inferInfo.fathomLabel = label;
813 inferInfo.fathomConfidence = score;
817 lazy.FieldDetail.create(element, formLike, fieldName, {
818 autocompleteInfo: inferInfo.autocompleteInfo,
819 fathomLabel: inferInfo.fathomLabel,
820 fathomConfidence: inferInfo.fathomConfidence,
822 mlHeaderInput: closestHeaders?.[idx] ?? null,
823 mlButtonInput: closestButtons?.[idx] ?? null,
828 this.parseAndUpdateFieldNamesContent(fieldDetails);
830 lazy.LabelUtils.clearLabelMap();
836 * Similar to `parseAndUpdateFieldNamesParent`. The difference is that
837 * the parsing heuristics used in this function are based on information
838 * not currently passed to the parent process. For example,
839 * text strings from associated labels.
841 * Note that the heuristics run in this function will not be able
842 * to reference field information across frames.
844 * @param {Array<FieldDetail>} fieldDetails
845 * An array of the identified fields.
847 parseAndUpdateFieldNamesContent(fieldDetails) {
848 const scanner = new lazy.FieldScanner(fieldDetails);
850 while (!scanner.parsingFinished) {
851 const savedIndex = scanner.parsingIndex;
853 // First, we get the inferred field info
854 const fieldDetail = scanner.getFieldDetailByIndex(scanner.parsingIndex);
857 this._parseNameFieldsContent(scanner, fieldDetail) ||
858 this._parsePhoneFields(scanner, fieldDetail)
863 if (savedIndex == scanner.parsingIndex) {
864 scanner.parsingIndex++;
870 * Iterates through the field details and updates the field names
871 * based on surrounding field information, using various parsing functions.
873 * @param {Array<FieldDetail>} fieldDetails
874 * An array of the identified fields.
876 parseAndUpdateFieldNamesParent(fieldDetails) {
877 const scanner = new lazy.FieldScanner(fieldDetails);
879 while (!scanner.parsingFinished) {
880 const savedIndex = scanner.parsingIndex;
882 const fieldDetail = scanner.getFieldDetailByIndex(scanner.parsingIndex);
884 this._checkForAlternateField(scanner, fieldDetail);
886 // Attempt to parse the field using different parsers.
888 this._parseNameFields(scanner, fieldDetail) ||
889 this._parseHouseNumberFields(scanner, fieldDetail) ||
890 this._parseStreetAddressFields(scanner, fieldDetail) ||
891 this._parseAddressFields(scanner, fieldDetail) ||
892 this._parseCreditCardExpiryFields(scanner, fieldDetail) ||
893 this._parseCreditCardNameFields(scanner, fieldDetail) ||
894 this._parseCreditCardNumberFields(scanner, fieldDetail)
899 // Move the parsing cursor forward if no parser was applied.
900 if (savedIndex == scanner.parsingIndex) {
901 scanner.parsingIndex++;
906 _getPossibleFieldNames(element) {
908 const isAutoCompleteOff =
909 element.autocomplete == "off" || element.form?.autocomplete == "off";
910 if (!isAutoCompleteOff || FormAutofill.creditCardsAutocompleteOff) {
911 fieldNames.push(...this.CREDIT_CARD_FIELDNAMES);
913 if (!isAutoCompleteOff || FormAutofill.addressesAutocompleteOff) {
914 fieldNames.push(...this.ADDRESS_FIELDNAMES);
917 if (HTMLSelectElement.isInstance(element)) {
918 const FIELDNAMES_FOR_SELECT_ELEMENT = [
927 fieldNames = fieldNames.filter(name =>
928 FIELDNAMES_FOR_SELECT_ELEMENT.includes(name)
936 * Get inferred information about an input element using autocomplete info, fathom and regex-based heuristics.
938 * @param {HTMLElement} element - The input element to infer information about.
939 * @param {Array<HTMLElement>} elements - See `getFathomField` for details
940 * @returns {Array} - An array containing:
941 * [0]the inferred field name
942 * [1]information collected during the inference process. The possible values includes:
943 * 'autocompleteInfo', 'fathomLabel', and 'fathomConfidence'.
945 inferFieldInfo(element, elements = []) {
946 const inferredInfo = {};
947 const autocompleteInfo = element.getAutocompleteInfo();
949 // An input[autocomplete="on"] will not be early return here since it stll
950 // needs to find the field name.
952 autocompleteInfo?.fieldName &&
953 !["on", "off"].includes(autocompleteInfo.fieldName)
955 inferredInfo.autocompleteInfo = autocompleteInfo;
956 return [autocompleteInfo.fieldName, inferredInfo];
959 const fields = this._getPossibleFieldNames(element);
961 // "email" type of input is accurate for heuristics to determine its Email
962 // field or not. However, "tel" type is used for ZIP code for some web site
963 // (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel"
965 if (element.type == "email" && fields.includes("email")) {
966 return ["email", inferredInfo];
969 if (lazy.FormAutofillUtils.isFathomCreditCardsEnabled()) {
970 // We don't care fields that are not supported by fathom
971 const fathomFields = fields.filter(r =>
972 lazy.CreditCardRulesets.types.includes(r)
974 const [matchedFieldName, confidence] = this.getFathomField(
979 if (confidence != null) {
980 inferredInfo.fathomLabel = matchedFieldName;
981 inferredInfo.fathomConfidence = confidence;
983 // At this point, use fathom's recommendation if it has one
984 if (matchedFieldName) {
985 return [matchedFieldName, inferredInfo];
988 // Continue to run regex-based heuristics even when fathom doesn't recognize
989 // the field. Since the regex-based heuristic has good search coverage but
990 // has a worse precision. We use it in conjunction with fathom to maximize
991 // our search coverage. For example, when a <input> is not considered cc-name
992 // by fathom but is considered cc-name by regex-based heuristic, if the form
993 // also contains a cc-number identified by fathom, we will treat the form as a
994 // valid cc form; hence both cc-number & cc-name are identified.
997 // Check every select for options that
998 // match credit card network names in value or label.
999 if (HTMLSelectElement.isInstance(element)) {
1000 if (this._isExpirationMonthLikely(element)) {
1001 return ["cc-exp-month", inferredInfo];
1002 } else if (this._isExpirationYearLikely(element)) {
1003 return ["cc-exp-year", inferredInfo];
1006 const options = Array.from(element.querySelectorAll("option"));
1010 lazy.CreditCard.getNetworkFromName(option.value) ||
1011 lazy.CreditCard.getNetworkFromName(option.text)
1014 return ["cc-type", inferredInfo];
1017 // At least two options match the country name, otherwise some state name might
1018 // also match a country name, ex, Georgia. We check the last two
1019 // options rather than the first, as selects often start with a non-country display option.
1020 const countryDisplayNames = Array.from(FormAutofill.countries.values());
1022 options.length >= 2 &&
1027 countryDisplayNames.includes(option.value) ||
1028 countryDisplayNames.includes(option.text)
1031 return ["country", inferredInfo];
1035 // Find a matched field name using regexp-based heuristics
1036 const matchedFieldNames = this._findMatchedFieldNames(element, fields);
1037 return [matchedFieldNames, inferredInfo];
1041 * Using Fathom, say what kind of CC field an element is most likely to be.
1042 * This function deoesn't only run fathom on the passed elements. It also
1043 * runs fathom for all elements in the FieldScanner for optimization purpose.
1045 * @param {HTMLElement} element
1046 * @param {Array} fields
1047 * @param {Array<HTMLElement>} elements - All other eligible elements in the same form. This is mainly used as an
1048 * optimization approach to run fathom model on all eligible elements
1049 * once instead of one by one
1050 * @returns {Array} A tuple of [field name, probability] describing the
1051 * highest-confidence classification
1053 getFathomField(element, fields, elements = []) {
1054 if (!fields.length) {
1055 return [null, null];
1058 if (!this._fathomConfidences?.get(element)) {
1059 this._fathomConfidences = new Map();
1061 // This should not throw unless we run into an OOM situation, at which
1062 // point we have worse problems and this failing is not a big deal.
1063 elements = elements.includes(element) ? elements : [element];
1064 const confidences = this.getFormAutofillConfidences(elements);
1066 for (let i = 0; i < elements.length; i++) {
1067 this._fathomConfidences.set(elements[i], confidences[i]);
1071 const elementConfidences = this._fathomConfidences.get(element);
1072 if (!elementConfidences) {
1073 return [null, null];
1076 let highestField = null;
1077 let highestConfidence = lazy.FormAutofillUtils.ccFathomConfidenceThreshold; // Start with a threshold of 0.5
1078 for (let [key, value] of Object.entries(elementConfidences)) {
1079 if (!fields.includes(key)) {
1080 // ignore field that we don't care
1084 if (value > highestConfidence) {
1085 highestConfidence = value;
1090 if (!highestField) {
1091 return [null, null];
1094 // Used by test ONLY! This ensure testcases always get the same confidence
1095 if (lazy.FormAutofillUtils.ccFathomTestConfidence > 0) {
1096 highestConfidence = lazy.FormAutofillUtils.ccFathomTestConfidence;
1099 return [highestField, highestConfidence];
1103 * @param {Array} elements Array of elements that we want to get result from fathom cc rules
1104 * @returns {object} Fathom confidence keyed by field-type.
1106 getFormAutofillConfidences(elements) {
1108 lazy.FormAutofillUtils.ccHeuristicsMode ==
1109 lazy.FormAutofillUtils.CC_FATHOM_NATIVE
1111 const confidences = ChromeUtils.getFormAutofillConfidences(elements);
1112 return confidences.map(c => {
1114 for (let [fieldName, confidence] of Object.entries(c)) {
1116 lazy.FormAutofillUtils.formAutofillConfidencesKeyToCCFieldType(
1119 result[type] = confidence;
1125 return elements.map(element => {
1127 * Return how confident our ML model is that `element` is a field of the
1130 * @param {string} fieldName The Fathom type to check against. This is
1131 * conveniently the same as the autocomplete attribute value that means
1133 * @returns {number} Confidence in range [0, 1]
1135 function confidence(fieldName) {
1136 const ruleset = lazy.CreditCardRulesets[fieldName];
1137 const fnodes = ruleset.against(element).get(fieldName);
1139 // fnodes is either 0 or 1 item long, since we ran the ruleset
1140 // against a single element:
1141 return fnodes.length ? fnodes[0].scoreFor(fieldName) : 0;
1144 // Bang the element against the ruleset for every type of field:
1145 const confidences = {};
1146 lazy.CreditCardRulesets.types.map(fieldName => {
1147 confidences[fieldName] = confidence(fieldName);
1155 * @typedef ElementStrings
1157 * @yields {string} id - element id.
1158 * @yields {string} name - element name.
1159 * @yields {Array<string>} labels - extracted labels.
1163 * Extract all the signature strings of an element.
1165 * @param {HTMLElement} element
1166 * @returns {Array<string>}
1168 _getElementStrings(element) {
1169 return [element.id, element.name, element.placeholder?.trim()];
1173 * Extract all the label strings associated with an element.
1175 * @param {HTMLElement} element
1176 * @returns {ElementStrings}
1178 _getElementLabelStrings(element) {
1180 *[Symbol.iterator]() {
1181 const labels = lazy.LabelUtils.findLabelElements(element);
1182 for (let label of labels) {
1183 yield* lazy.LabelUtils.extractLabelStrings(label);
1186 const ariaLabels = element.getAttribute("aria-label");
1188 yield* [ariaLabels];
1194 // In order to support webkit we need to avoid usage of negative lookbehind due to low support
1195 // First safari version with support is 16.4 (Release Date: 27th March 2023)
1196 // https://caniuse.com/js-regexp-lookbehind
1197 // We can mimic the behaviour of negative lookbehinds by using a named capture group
1198 // (?<!not)word -> (?<neg>notword)|word
1199 // TODO: Bug 1829583
1200 testRegex(regex, string) {
1201 const matches = string?.matchAll(regex);
1206 const excludeNegativeCaptureGroups = [];
1208 for (const match of matches) {
1209 excludeNegativeCaptureGroups.push(
1210 ...match.filter(m => m !== match?.groups?.neg).filter(Boolean)
1213 return excludeNegativeCaptureGroups?.length > 0;
1217 * Find matching field names from a given list of field names
1218 * that matches an HTML element.
1220 * The function first tries to match the element against a set of
1221 * pre-defined regular expression rules. If no match is found, it
1222 * then checks for label-specific rules, if they exist.
1224 * The return value can contain a maximum of two field names, the
1225 * first item the first match found, and the second an alternate field
1226 * name always of a different type, where the two type are credit card
1229 * Note: For label rules, the keyword is often more general
1230 * (e.g., "^\\W*address"), hence they are only searched within labels
1231 * to reduce the occurrence of false positives.
1233 * @param {HTMLElement} element The element to match.
1234 * @param {Array<string>} fieldNames An array of field names to compare against.
1235 * @returns {Array} An array of the matching field names.
1237 _findMatchedFieldNames(element, fieldNames) {
1238 if (!fieldNames.length) {
1242 // The first element is the field name, and the second element is the type.
1243 let fields = fieldNames.map(name => [
1245 lazy.FormAutofillUtils.isCreditCardField(name) ? CC_TYPE : ADDR_TYPE,
1249 let attribute = true;
1250 let matchedFieldNames = [];
1252 // Check RULES first, and only check LABEL_RULES if no match is found.
1253 for (let rules of [this.RULES, this.LABEL_RULES]) {
1254 // Attempt to match the element against the default set of rules.
1256 fields.find(field => {
1257 const [fieldName, type] = field;
1259 // The same type has been found already, so skip.
1260 if (foundType == type) {
1264 if (!this._matchRegexp(element, rules[fieldName], { attribute })) {
1269 matchedFieldNames.push(fieldName);
1271 return matchedFieldNames.length == 2;
1277 // Don't match attributes for label rules.
1281 return matchedFieldNames;
1285 * Determine whether the regexp can match any of element strings.
1287 * @param {HTMLElement} element The HTML element to match.
1288 * @param {RegExp} regexp The regular expression to match against.
1289 * @param {object} [options] Optional parameters for matching.
1290 * @param {boolean} [options.attribute=true]
1291 * Whether to match against the element's attributes.
1292 * @param {boolean} [options.label=true]
1293 * Whether to match against the element's labels.
1294 * @returns {boolean} True if a match is found, otherwise false.
1296 _matchRegexp(element, regexp, { attribute = true, label = true } = {}) {
1302 const elemStrings = this._getElementStrings(element);
1303 if (elemStrings.find(s => this.testRegex(regexp, s?.toLowerCase()))) {
1309 const elementLabelStrings = this._getElementLabelStrings(element);
1310 for (const s of elementLabelStrings) {
1311 if (this.testRegex(regexp, s?.toLowerCase())) {
1321 * Phone field grammars - first matched grammar will be parsed. Grammars are
1322 * separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
1323 * parsed separately unless they are necessary parts of the match.
1324 * The following notation is used to describe the patterns:
1325 * <cc> - country code field.
1326 * <ac> - area code field.
1327 * <phone> - phone or prefix.
1328 * <suffix> - suffix.
1329 * <ext> - extension.
1330 * :N means field is limited to N characters, otherwise it is unlimited.
1331 * (pattern <field>)? means pattern is optional and matched separately.
1333 * This grammar list from Chromium will be enabled partially once we need to
1334 * support more cases of Telephone fields.
1336 PHONE_FIELD_GRAMMARS: [
1337 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
1340 // {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0},
1341 // {REGEX_AREA, FIELD_AREA_CODE, 0},
1342 // {REGEX_PHONE, FIELD_PHONE, 0},
1343 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1345 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
1346 // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3},
1347 // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
1348 // {REGEX_PHONE, FIELD_SUFFIX, 4},
1349 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1351 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
1352 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
1353 // {REGEX_PHONE, FIELD_AREA_CODE, 3},
1354 // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
1355 // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4},
1356 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1358 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
1359 ["tel", "tel-country-code", 3],
1360 ["tel", "tel-area-code", 3],
1361 ["tel", "tel-local-prefix", 3],
1362 ["tel", "tel-local-suffix", 4],
1365 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
1366 // {REGEX_AREA, FIELD_AREA_CODE, 0},
1367 // {REGEX_PHONE, FIELD_PHONE, 0},
1368 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1370 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
1371 // {REGEX_PHONE, FIELD_AREA_CODE, 0},
1372 // {REGEX_PHONE, FIELD_PHONE, 3},
1373 // {REGEX_PHONE, FIELD_SUFFIX, 4},
1374 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1376 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
1377 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
1378 // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
1379 // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
1380 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1382 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
1383 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
1384 // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
1385 // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
1386 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1388 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
1389 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
1390 // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
1391 // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
1392 // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0},
1393 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1395 // Area code: <ac>:3 Prefix: <prefix>:3 Suffix: <suffix>:4 (Ext: <ext>)?
1396 // {REGEX_AREA, FIELD_AREA_CODE, 3},
1397 // {REGEX_PREFIX, FIELD_PHONE, 3},
1398 // {REGEX_SUFFIX, FIELD_SUFFIX, 4},
1399 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1401 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
1402 // {REGEX_PHONE, FIELD_AREA_CODE, 0},
1403 // {REGEX_PREFIX, FIELD_PHONE, 0},
1404 // {REGEX_SUFFIX, FIELD_SUFFIX, 0},
1405 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1407 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
1408 ["tel", "tel-area-code", 0],
1409 ["tel", "tel-local-prefix", 3],
1410 ["tel", "tel-local-suffix", 4],
1413 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
1414 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
1415 // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
1416 // {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0},
1417 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1419 // Phone: <ac> - <phone> (Ext: <ext>)?
1420 // {REGEX_AREA, FIELD_AREA_CODE, 0},
1421 // {REGEX_PHONE, FIELD_PHONE, 0},
1422 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1424 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
1425 // {REGEX_PHONE, FIELD_COUNTRY_CODE, 3},
1426 // {REGEX_PHONE, FIELD_PHONE, 10},
1427 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1430 // {REGEX_EXTENSION, FIELD_EXTENSION, 0},
1431 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1433 // Phone: <phone> (Ext: <ext>)?
1434 // {REGEX_PHONE, FIELD_PHONE, 0},
1435 // {REGEX_SEPARATOR, FIELD_NONE, 0},
1439 ChromeUtils.defineLazyGetter(
1440 FormAutofillHeuristics,
1441 "CREDIT_CARD_FIELDNAMES",
1443 Object.keys(FormAutofillHeuristics.RULES).filter(name =>
1444 lazy.FormAutofillUtils.isCreditCardField(name)
1448 ChromeUtils.defineLazyGetter(FormAutofillHeuristics, "ADDRESS_FIELDNAMES", () =>
1449 Object.keys(FormAutofillHeuristics.RULES).filter(name =>
1450 lazy.FormAutofillUtils.isAddressField(name)
1454 export default FormAutofillHeuristics;