Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / components / autofill / core / browser / credit_card_field.cc
blobd6b67dca712783a865789155647fd340a5ee59dd
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/credit_card_field.h"
7 #include <stddef.h>
9 #include "base/memory/scoped_ptr.h"
10 #include "base/stl_util.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/time/time.h"
16 #include "components/autofill/core/browser/autofill_field.h"
17 #include "components/autofill/core/browser/autofill_regex_constants.h"
18 #include "components/autofill/core/browser/autofill_scanner.h"
19 #include "components/autofill/core/browser/field_types.h"
20 #include "components/autofill/core/common/autofill_regexes.h"
21 #include "grit/components_strings.h"
22 #include "ui/base/l10n/l10n_util.h"
24 namespace autofill {
26 namespace {
28 // Credit card numbers are at most 19 digits in length.
29 // [Ref: http://en.wikipedia.org/wiki/Bank_card_number]
30 const size_t kMaxValidCardNumberSize = 19;
32 // Look for the vector |regex_needles| in |haystack|. Returns true if a
33 // consecutive section of |haystack| matches |regex_needles|.
34 bool FindConsecutiveStrings(const std::vector<base::string16>& regex_needles,
35 const std::vector<base::string16>& haystack) {
36 if (regex_needles.empty() ||
37 haystack.empty() ||
38 (haystack.size() < regex_needles.size()))
39 return false;
41 for (size_t i = 0; i < haystack.size() - regex_needles.size() + 1; ++i) {
42 for (size_t j = 0; j < regex_needles.size(); ++j) {
43 if (!MatchesPattern(haystack[i + j], regex_needles[j]))
44 break;
46 if (j == regex_needles.size() - 1)
47 return true;
50 return false;
53 // Returns true if a field that has |max_length| can fit the data for a field of
54 // |type|.
55 bool FieldCanFitDataForFieldType(int max_length, ServerFieldType type) {
56 if (max_length == 0)
57 return true;
59 switch (type) {
60 case CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR: {
61 static int kMinimum2YearCcExpLength = strlen("12/14");
62 return max_length >= kMinimum2YearCcExpLength;
64 case CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR: {
65 static int kMinimum4YearCcExpLength = strlen("12/2014");
66 return max_length >= kMinimum4YearCcExpLength;
68 default:
69 NOTREACHED();
70 return false;
75 } // namespace
77 // static
78 scoped_ptr<FormField> CreditCardField::Parse(AutofillScanner* scanner) {
79 if (scanner->IsEnd())
80 return nullptr;
82 scoped_ptr<CreditCardField> credit_card_field(new CreditCardField);
83 size_t saved_cursor = scanner->SaveCursor();
85 // Credit card fields can appear in many different orders.
86 // We loop until no more credit card related fields are found, see |break| at
87 // the bottom of the loop.
88 for (int fields = 0; !scanner->IsEnd(); ++fields) {
89 // Ignore gift card fields.
90 if (IsGiftCardField(scanner))
91 break;
93 if (!credit_card_field->cardholder_) {
94 if (ParseField(scanner,
95 base::UTF8ToUTF16(kNameOnCardRe),
96 &credit_card_field->cardholder_)) {
97 continue;
100 // Sometimes the cardholder field is just labeled "name". Unfortunately
101 // this is a dangerously generic word to search for, since it will often
102 // match a name (not cardholder name) field before or after credit card
103 // fields. So we search for "name" only when we've already parsed at
104 // least one other credit card field and haven't yet parsed the
105 // expiration date (which usually appears at the end).
106 if (fields > 0 &&
107 !credit_card_field->expiration_month_ &&
108 ParseField(scanner,
109 base::UTF8ToUTF16(kNameOnCardContextualRe),
110 &credit_card_field->cardholder_)) {
111 continue;
115 // Check for a credit card type (Visa, MasterCard, etc.) field.
116 // All CC type fields encountered so far have been of type select.
117 if (!credit_card_field->type_ && LikelyCardTypeSelectField(scanner)) {
118 credit_card_field->type_ = scanner->Cursor();
119 scanner->Advance();
120 continue;
123 // We look for a card security code before we look for a credit card number
124 // and match the general term "number". The security code has a plethora of
125 // names; we've seen "verification #", "verification number", "card
126 // identification number", and others listed in the regex pattern used
127 // below.
128 // Note: Some sites use type="tel" or type="number" for numerical inputs.
129 const int kMatchNumAndTel = MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE;
130 if (!credit_card_field->verification_ &&
131 ParseFieldSpecifics(scanner,
132 base::UTF8ToUTF16(kCardCvcRe),
133 kMatchNumAndTel | MATCH_PASSWORD,
134 &credit_card_field->verification_)) {
135 continue;
138 AutofillField* current_number_field;
139 if (ParseFieldSpecifics(scanner,
140 base::UTF8ToUTF16(kCardNumberRe),
141 kMatchNumAndTel,
142 &current_number_field)) {
143 // Avoid autofilling any credit card number field having very low or high
144 // |start_index| on the HTML form.
145 size_t start_index = 0;
146 if (!credit_card_field->numbers_.empty()) {
147 size_t last_number_field_size =
148 credit_card_field->numbers_.back()->credit_card_number_offset() +
149 credit_card_field->numbers_.back()->max_length;
151 // Distinguish between
152 // (a) one card split across multiple fields
153 // (b) multiple fields for multiple cards
154 // Treat this field as a part of the same card as the last field, except
155 // when doing so would cause overflow.
156 if (last_number_field_size < kMaxValidCardNumberSize)
157 start_index = last_number_field_size;
160 current_number_field->set_credit_card_number_offset(start_index);
161 credit_card_field->numbers_.push_back(current_number_field);
162 continue;
165 if (credit_card_field->ParseExpirationDate(scanner))
166 continue;
168 if (credit_card_field->expiration_month_ &&
169 !credit_card_field->expiration_year_ &&
170 !credit_card_field->expiration_date_) {
171 // Parsed a month but couldn't parse a year; give up.
172 scanner->RewindTo(saved_cursor);
173 return nullptr;
176 break;
179 // Some pages have a billing address field after the cardholder name field.
180 // For that case, allow only just the cardholder name field. The remaining
181 // CC fields will be picked up in a following CreditCardField.
182 if (credit_card_field->cardholder_)
183 return credit_card_field.Pass();
185 // On some pages, the user selects a card type using radio buttons
186 // (e.g. test page Apple Store Billing.html). We can't handle that yet,
187 // so we treat the card type as optional for now.
188 // The existence of a number or cvc in combination with expiration date is
189 // a strong enough signal that this is a credit card. It is possible that
190 // the number and name were parsed in a separate part of the form. So if
191 // the cvc and date were found independently they are returned.
192 bool has_cc_number_or_verification = (credit_card_field->verification_ ||
193 !credit_card_field->numbers_.empty());
194 bool has_date_or_mm_yy = (credit_card_field->expiration_date_ ||
195 (credit_card_field->expiration_month_ &&
196 credit_card_field->expiration_year_));
197 if (has_cc_number_or_verification && has_date_or_mm_yy)
198 return credit_card_field.Pass();
200 scanner->RewindTo(saved_cursor);
201 return nullptr;
204 // static
205 bool CreditCardField::LikelyCardMonthSelectField(AutofillScanner* scanner) {
206 if (scanner->IsEnd())
207 return false;
209 AutofillField* field = scanner->Cursor();
210 if (!MatchesFormControlType(field->form_control_type, MATCH_SELECT))
211 return false;
213 if (field->option_values.size() < 12 || field->option_values.size() > 13)
214 return false;
216 // Filter out years.
217 const base::string16 kNumericalYearRe =
218 base::ASCIIToUTF16("[1-9][0-9][0-9][0-9]");
219 for (const auto& value : field->option_values) {
220 if (MatchesPattern(value, kNumericalYearRe))
221 return false;
223 for (const auto& value : field->option_contents) {
224 if (MatchesPattern(value, kNumericalYearRe))
225 return false;
228 // Look for numerical months.
229 const base::string16 kNumericalMonthRe = base::ASCIIToUTF16("12");
230 if (MatchesPattern(field->option_values.back(), kNumericalMonthRe) ||
231 MatchesPattern(field->option_contents.back(), kNumericalMonthRe)) {
232 return true;
235 // Maybe do more matches here. e.g. look for (translated) December.
237 // Unsure? Return false.
238 return false;
241 // static
242 bool CreditCardField::LikelyCardYearSelectField(AutofillScanner* scanner) {
243 if (scanner->IsEnd())
244 return false;
246 AutofillField* field = scanner->Cursor();
247 if (!MatchesFormControlType(field->form_control_type, MATCH_SELECT))
248 return false;
250 const base::Time time_now = base::Time::Now();
251 base::Time::Exploded time_exploded;
252 time_now.UTCExplode(&time_exploded);
254 const int kYearsToMatch = 3;
255 std::vector<base::string16> years_to_check;
256 for (int year = time_exploded.year;
257 year < time_exploded.year + kYearsToMatch;
258 ++year) {
259 years_to_check.push_back(base::IntToString16(year));
261 return (FindConsecutiveStrings(years_to_check, field->option_values) ||
262 FindConsecutiveStrings(years_to_check, field->option_contents));
265 // static
266 bool CreditCardField::LikelyCardTypeSelectField(AutofillScanner* scanner) {
267 if (scanner->IsEnd())
268 return false;
270 AutofillField* field = scanner->Cursor();
271 if (!MatchesFormControlType(field->form_control_type, MATCH_SELECT))
272 return false;
274 return AutofillField::FindValueInSelectControl(
275 *field, l10n_util::GetStringUTF16(IDS_AUTOFILL_CC_VISA),
276 nullptr) ||
277 AutofillField::FindValueInSelectControl(
278 *field, l10n_util::GetStringUTF16(IDS_AUTOFILL_CC_MASTERCARD),
279 nullptr);
282 // static
283 bool CreditCardField::IsGiftCardField(AutofillScanner* scanner) {
284 if (scanner->IsEnd())
285 return false;
287 size_t saved_cursor = scanner->SaveCursor();
288 if (ParseField(scanner, base::UTF8ToUTF16(kDebitCardRe), nullptr)) {
289 scanner->RewindTo(saved_cursor);
290 return false;
292 if (ParseField(scanner, base::UTF8ToUTF16(kDebitGiftCardRe), nullptr)) {
293 scanner->RewindTo(saved_cursor);
294 return false;
297 return ParseField(scanner, base::UTF8ToUTF16(kGiftCardRe), nullptr);
300 CreditCardField::CreditCardField()
301 : cardholder_(nullptr),
302 cardholder_last_(nullptr),
303 type_(nullptr),
304 verification_(nullptr),
305 expiration_month_(nullptr),
306 expiration_year_(nullptr),
307 expiration_date_(nullptr),
308 exp_year_type_(CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR) {
311 CreditCardField::~CreditCardField() {
314 bool CreditCardField::ClassifyField(ServerFieldTypeMap* map) const {
315 bool ok = true;
316 for (size_t index = 0; index < numbers_.size(); ++index) {
317 ok = ok && AddClassification(numbers_[index], CREDIT_CARD_NUMBER, map);
320 ok = ok && AddClassification(type_, CREDIT_CARD_TYPE, map);
321 ok = ok &&
322 AddClassification(verification_, CREDIT_CARD_VERIFICATION_CODE, map);
324 // If the heuristics detected first and last name in separate fields,
325 // then ignore both fields. Putting them into separate fields is probably
326 // wrong, because the credit card can also contain a middle name or middle
327 // initial.
328 if (cardholder_last_ == nullptr)
329 ok = ok && AddClassification(cardholder_, CREDIT_CARD_NAME, map);
331 if (expiration_date_) {
332 DCHECK(!expiration_month_);
333 DCHECK(!expiration_year_);
334 ok =
335 ok && AddClassification(expiration_date_, GetExpirationYearType(), map);
336 } else {
337 ok = ok && AddClassification(expiration_month_, CREDIT_CARD_EXP_MONTH, map);
338 ok =
339 ok && AddClassification(expiration_year_, GetExpirationYearType(), map);
342 return ok;
345 bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner) {
346 if (!expiration_date_ &&
347 LowerCaseEqualsASCII(scanner->Cursor()->form_control_type, "month")) {
348 expiration_date_ = scanner->Cursor();
349 expiration_month_ = nullptr;
350 expiration_year_ = nullptr;
351 scanner->Advance();
352 return true;
355 if (expiration_month_ || expiration_date_)
356 return false;
358 // First try to parse split month/year expiration fields by looking for a
359 // pair of select fields that look like month/year.
360 size_t month_year_saved_cursor = scanner->SaveCursor();
362 if (LikelyCardMonthSelectField(scanner)) {
363 expiration_month_ = scanner->Cursor();
364 scanner->Advance();
365 if (LikelyCardYearSelectField(scanner)) {
366 expiration_year_ = scanner->Cursor();
367 scanner->Advance();
368 return true;
370 expiration_month_ = nullptr;
371 expiration_year_ = nullptr;
374 // If that fails, do a general regex search.
375 scanner->RewindTo(month_year_saved_cursor);
376 const int kMatchTelAndSelect = MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_SELECT;
377 if (ParseFieldSpecifics(scanner,
378 base::UTF8ToUTF16(kExpirationMonthRe),
379 kMatchTelAndSelect,
380 &expiration_month_) &&
381 ParseFieldSpecifics(scanner,
382 base::UTF8ToUTF16(kExpirationYearRe),
383 kMatchTelAndSelect,
384 &expiration_year_)) {
385 return true;
388 // If that fails, look for just MM/YY(YY).
389 scanner->RewindTo(month_year_saved_cursor);
390 if (ParseFieldSpecifics(scanner,
391 base::ASCIIToUTF16("^mm$"),
392 kMatchTelAndSelect,
393 &expiration_month_) &&
394 ParseFieldSpecifics(scanner,
395 base::ASCIIToUTF16("^(yy|yyyy)$"),
396 kMatchTelAndSelect,
397 &expiration_year_)) {
398 return true;
401 // If that fails, try to parse a combined expiration field.
402 // We allow <select> fields, because they're used e.g. on qvc.com.
403 scanner->RewindTo(month_year_saved_cursor);
405 // Bail out if the field cannot fit a 2-digit year expiration date.
406 const int current_field_max_length = scanner->Cursor()->max_length;
407 if (!FieldCanFitDataForFieldType(current_field_max_length,
408 CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR)) {
409 return false;
412 // Try to look for a 2-digit year expiration date.
413 if (ParseFieldSpecifics(scanner,
414 base::UTF8ToUTF16(kExpirationDate2DigitYearRe),
415 kMatchTelAndSelect,
416 &expiration_date_)) {
417 exp_year_type_ = CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
418 expiration_month_ = nullptr;
419 return true;
422 // Try to look for a generic expiration date field. (2 or 4 digit year)
423 if (ParseFieldSpecifics(scanner,
424 base::UTF8ToUTF16(kExpirationDateRe),
425 kMatchTelAndSelect,
426 &expiration_date_)) {
427 // If such a field exists, but it cannot fit a 4-digit year expiration
428 // date, then the likely possibility is that it is a 2-digit year expiration
429 // date.
430 if (!FieldCanFitDataForFieldType(current_field_max_length,
431 CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR)) {
432 exp_year_type_ = CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
434 expiration_month_ = nullptr;
435 return true;
438 // Try to look for a 4-digit year expiration date.
439 if (FieldCanFitDataForFieldType(current_field_max_length,
440 CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR) &&
441 ParseFieldSpecifics(scanner,
442 base::UTF8ToUTF16(kExpirationDate4DigitYearRe),
443 kMatchTelAndSelect,
444 &expiration_date_)) {
445 expiration_month_ = nullptr;
446 return true;
449 return false;
452 ServerFieldType CreditCardField::GetExpirationYearType() const {
453 return (expiration_date_
454 ? exp_year_type_
455 : ((expiration_year_ && expiration_year_->max_length == 2)
456 ? CREDIT_CARD_EXP_2_DIGIT_YEAR
457 : CREDIT_CARD_EXP_4_DIGIT_YEAR));
460 } // namespace autofill