content/common/android/address_parser_internal.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/common/android/address_parser_internal.h"
   6
   7 #include <bitset>
   8
   9 #include "base/logging.h"
  10 #include "base/strings/string_util.h"
  11
  12 namespace {
  13
  14 // Number of digits for a valid zip code.
  15 const size_t kZipDigits = 5;
  16
  17 // Number of digits for a valid zip code in the Zip Plus 4 format.
  18 const size_t kZipPlus4Digits = 9;
  19
  20 // Maximum number of digits of a house number, including possible hyphens.
  21 const size_t kMaxHouseDigits = 5;
  22
  23 base::char16 SafePreviousChar(const base::string16::const_iterator& it,
  24     const base::string16::const_iterator& begin) {
  25   if (it == begin)
  26     return ' ';
  27   return *(it - 1);
  28 }
  29
  30 base::char16 SafeNextChar(const base::string16::const_iterator& it,
  31     const base::string16::const_iterator& end) {
  32   if (it == end)
  33     return ' ';
  34   return *(it + 1);
  35 }
  36
  37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
  38     base::string16::const_iterator word_end, const char* ascii_to_match) {
  39   for (base::string16::const_iterator it = word_begin; it != word_end;
  40       ++it, ++ascii_to_match) {
  41     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  42       return false;
  43   }
  44   return *ascii_to_match == 0 || *ascii_to_match == ' ';
  45 }
  46
  47 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
  48     base::string16::const_iterator word_end, const char* ascii_to_match,
  49     bool allow_plural) {
  50   for (base::string16::const_iterator it = word_begin; it != word_end;
  51       ++it, ++ascii_to_match) {
  52     if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
  53       return true;
  54
  55     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  56       return false;
  57   }
  58   return *ascii_to_match == 0;
  59 }
  60
  61 }  // anonymous namespace
  62
  63 namespace content {
  64
  65 namespace address_parser {
  66
  67 namespace internal {
  68
  69 Word::Word(const base::string16::const_iterator& begin,
  70            const base::string16::const_iterator& end)
  71     : begin(begin),
  72       end(end) {
  73   DCHECK(begin <= end);
  74 }
  75
  76 bool HouseNumberParser::IsPreDelimiter(base::char16 character) {
  77   return character == ':' || IsPostDelimiter(character);
  78 }
  79
  80 bool HouseNumberParser::IsPostDelimiter(base::char16 character) {
  81   return IsWhitespace(character) || strchr(",\"'", character);
  82 }
  83
  84 void HouseNumberParser::RestartOnNextDelimiter() {
  85   ResetState();
  86   for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
  87 }
  88
  89 void HouseNumberParser::AcceptChars(size_t num_chars) {
  90   size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
  91                            num_chars);
  92   it_ += offset;
  93   result_chars_ += offset;
  94 }
  95
  96 void HouseNumberParser::SkipChars(size_t num_chars) {
  97   it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
  98 }
  99
 100 void HouseNumberParser::ResetState() {
 101   num_digits_ = 0;
 102   result_chars_ = 0;
 103 }
 104
 105 bool HouseNumberParser::CheckFinished(Word* word) const {
 106   // There should always be a number after a hyphen.
 107   if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
 108     return false;
 109
 110   if (word) {
 111     word->begin = it_ - result_chars_;
 112     word->end = it_;
 113   }
 114   return true;
 115 }
 116
 117 bool HouseNumberParser::Parse(
 118     const base::string16::const_iterator& begin,
 119     const base::string16::const_iterator& end, Word* word) {
 120   it_ = begin_ = begin;
 121   end_ = end;
 122   ResetState();
 123
 124   // Iterations only used as a fail-safe against any buggy infinite loops.
 125   size_t iterations = 0;
 126   size_t max_iterations = end - begin + 1;
 127   for (; it_ != end_ && iterations < max_iterations; ++iterations) {
 128
 129     // Word finished case.
 130     if (IsPostDelimiter(*it_)) {
 131       if (CheckFinished(word))
 132         return true;
 133       else if (result_chars_)
 134         ResetState();
 135
 136       SkipChars(1);
 137       continue;
 138     }
 139
 140     // More digits. There should be no more after a letter was found.
 141     if (IsAsciiDigit(*it_)) {
 142       if (num_digits_ >= kMaxHouseDigits) {
 143         RestartOnNextDelimiter();
 144       } else {
 145         AcceptChars(1);
 146         ++num_digits_;
 147       }
 148       continue;
 149     }
 150
 151     if (IsAsciiAlpha(*it_)) {
 152       // Handle special case 'one'.
 153       if (result_chars_ == 0) {
 154         if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one"))
 155           AcceptChars(3);
 156         else
 157           RestartOnNextDelimiter();
 158         continue;
 159       }
 160
 161       // There should be more than 1 character because of result_chars.
 162       DCHECK_GT(result_chars_, 0U);
 163       DCHECK(it_ != begin_);
 164       base::char16 previous = SafePreviousChar(it_, begin_);
 165       if (IsAsciiDigit(previous)) {
 166         // Check cases like '12A'.
 167         base::char16 next = SafeNextChar(it_, end_);
 168         if (IsPostDelimiter(next)) {
 169           AcceptChars(1);
 170           continue;
 171         }
 172
 173         // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
 174         if (IsAsciiAlpha(next)) {
 175           base::char16 last_digit = previous;
 176           base::char16 first_letter = base::ToLowerASCII(*it_);
 177           base::char16 second_letter = base::ToLowerASCII(next);
 178           bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
 179               num_digits_ == 2;
 180
 181           switch (last_digit - '0') {
 182           case 1:
 183             if ((first_letter == 's' && second_letter == 't') ||
 184                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 185               AcceptChars(2);
 186               continue;
 187             }
 188             break;
 189
 190           case 2:
 191             if ((first_letter == 'n' && second_letter == 'd') ||
 192                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 193               AcceptChars(2);
 194               continue;
 195             }
 196             break;
 197
 198           case 3:
 199             if ((first_letter == 'r' && second_letter == 'd') ||
 200                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 201               AcceptChars(2);
 202               continue;
 203             }
 204             break;
 205
 206           case 0:
 207             // Explicitly exclude '0th'.
 208             if (num_digits_ == 1)
 209               break;
 210
 211           case 4:
 212           case 5:
 213           case 6:
 214           case 7:
 215           case 8:
 216           case 9:
 217             if (first_letter == 't' && second_letter == 'h') {
 218               AcceptChars(2);
 219               continue;
 220             }
 221             break;
 222
 223           default:
 224             NOTREACHED();
 225           }
 226         }
 227       }
 228
 229       RestartOnNextDelimiter();
 230       continue;
 231     }
 232
 233     if (*it_ == '-' && num_digits_ > 0) {
 234       AcceptChars(1);
 235       ++num_digits_;
 236       continue;
 237     }
 238
 239     RestartOnNextDelimiter();
 240     SkipChars(1);
 241   }
 242
 243   if (iterations >= max_iterations)
 244     return false;
 245
 246   return CheckFinished(word);
 247 }
 248
 249 bool FindStateStartingInWord(WordList* words,
 250                              size_t state_first_word,
 251                              size_t* state_last_word,
 252                              String16Tokenizer* tokenizer,
 253                              size_t* state_index) {
 254
 255   // Bitmasks containing the allowed suffixes for 2-letter state codes.
 256   static const int state_two_letter_suffix[23] = {
 257     0x02060c00,  // A followed by: [KLRSZ].
 258     0x00000000,  // B.
 259     0x00084001,  // C followed by: [AOT].
 260     0x00000014,  // D followed by: [CE].
 261     0x00000000,  // E.
 262     0x00001800,  // F followed by: [LM].
 263     0x00100001,  // G followed by: [AU].
 264     0x00000100,  // H followed by: [I].
 265     0x00002809,  // I followed by: [ADLN].
 266     0x00000000,  // J.
 267     0x01040000,  // K followed by: [SY].
 268     0x00000001,  // L followed by: [A].
 269     0x000ce199,  // M followed by: [ADEHINOPST].
 270     0x0120129c,  // N followed by: [CDEHJMVY].
 271     0x00020480,  // O followed by: [HKR].
 272     0x00420001,  // P followed by: [ARW].
 273     0x00000000,  // Q.
 274     0x00000100,  // R followed by: [I].
 275     0x0000000c,  // S followed by: [CD].
 276     0x00802000,  // T followed by: [NX].
 277     0x00080000,  // U followed by: [T].
 278     0x00080101,  // V followed by: [AIT].
 279     0x01200101   // W followed by: [AIVY].
 280   };
 281
 282   // Accumulative number of states for the 2-letter code indexed by the first.
 283   static const int state_two_letter_accumulative[24] = {
 284      0,  5,  5,  8, 10, 10, 12, 14,
 285     15, 19, 19, 21, 22, 32, 40, 43,
 286     46, 46, 47, 49, 51, 52, 55, 59
 287   };
 288
 289   // State names sorted alphabetically with their lengths.
 290   // There can be more than one possible name for a same state if desired.
 291   static const struct StateNameInfo {
 292     const char* string;
 293     char first_word_length;
 294     char length;
 295     char state_index; // Relative to two-character code alphabetical order.
 296   } state_names[59] = {
 297     { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
 298     { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
 299     { "arkansas", 8, 8, 2 },
 300     { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
 301     { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
 302     { "district of columbia", 8, 20, 8 },
 303     { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
 304     { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
 305     { "hawaii", 6, 6, 14 },
 306     { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
 307     { "iowa", 4, 4, 15 },
 308     { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
 309     { "louisiana", 9, 9, 21 },
 310     { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
 311     { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
 312     { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
 313     { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
 314     { "montana", 7, 7, 31 },
 315     { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
 316     { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
 317     { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
 318     { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
 319     { "northern mariana islands", 8, 24, 29 },
 320     { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
 321     { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
 322     { "puerto rico", 6, 11, 44 },
 323     { "rhode island", 5, 5, 46 },
 324     { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
 325     { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
 326     { "utah", 4, 4, 51 },
 327     { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
 328     { "virginia", 8, 8, 52 },
 329     { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
 330     { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
 331   };
 332
 333   // Accumulative number of states for sorted names indexed by the first letter.
 334   // Required a different one since there are codes that don't share their
 335   // first letter with the name of their state (MP = Northern Mariana Islands).
 336   static const int state_names_accumulative[24] = {
 337      0,  5,  5,  8, 10, 10, 12, 14,
 338     15, 19, 19, 21, 22, 31, 40, 43,
 339     46, 46, 47, 49, 51, 52, 55, 59
 340   };
 341
 342   DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
 343       static_cast<int>(arraysize(state_names)));
 344
 345   const Word& first_word = words->at(state_first_word);
 346   int length = first_word.end - first_word.begin;
 347   if (length < 2 || !IsAsciiAlpha(*first_word.begin))
 348     return false;
 349
 350   // No state names start with x, y, z.
 351   base::char16 first_letter = base::ToLowerASCII(*first_word.begin);
 352   if (first_letter > 'w')
 353     return false;
 354
 355   DCHECK(first_letter >= 'a');
 356   int first_index = first_letter - 'a';
 357
 358   // Look for two-letter state names.
 359   if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) {
 360     base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
 361     DCHECK(second_letter >= 'a');
 362
 363     int second_index = second_letter - 'a';
 364     if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
 365       return false;
 366
 367     std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
 368         ((1 << second_index) - 1);
 369     *state_last_word = state_first_word;
 370     *state_index = state_two_letter_accumulative[first_index] +
 371         previous_suffixes.count();
 372     return true;
 373   }
 374
 375   // Look for full state names by their first letter. Discard by length.
 376   for (int state = state_names_accumulative[first_index];
 377       state < state_names_accumulative[first_index + 1]; ++state) {
 378     if (state_names[state].first_word_length != length)
 379       continue;
 380
 381     bool state_match = false;
 382     size_t state_word = state_first_word;
 383     for (int pos = 0; true; ) {
 384       if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
 385           words->at(state_word).end, &state_names[state].string[pos]))
 386         break;
 387
 388       pos += words->at(state_word).end - words->at(state_word).begin + 1;
 389       if (pos >= state_names[state].length) {
 390         state_match = true;
 391         break;
 392       }
 393
 394       // Ran out of words, extract more from the tokenizer.
 395       if (++state_word == words->size()) {
 396         do {
 397           if (!tokenizer->GetNext())
 398             break;
 399         } while (tokenizer->token_is_delim());
 400         words->push_back(
 401             Word(tokenizer->token_begin(), tokenizer->token_end()));
 402       }
 403     }
 404
 405     if (state_match) {
 406       *state_last_word = state_word;
 407       *state_index = state_names[state].state_index;
 408       return true;
 409     }
 410   }
 411
 412   return false;
 413 }
 414
 415 bool IsZipValid(const Word& word, size_t state_index) {
 416   size_t length = word.end - word.begin;
 417   if (length != kZipDigits && length != kZipPlus4Digits + 1)
 418     return false;
 419
 420   for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
 421     size_t pos = it - word.begin;
 422     if (IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
 423       continue;
 424     return false;
 425   }
 426   return IsZipValidForState(word, state_index);
 427 }
 428
 429 bool IsZipValidForState(const Word& word, size_t state_index) {
 430   // List of valid zip code ranges.
 431   static const struct {
 432     signed char low;
 433     signed char high;
 434     signed char exception1;
 435     signed char exception2;
 436   } zip_range[] = {
 437     { 99, 99, -1, -1 }, // AK Alaska.
 438     { 35, 36, -1, -1 }, // AL Alabama.
 439     { 71, 72, -1, -1 }, // AR Arkansas.
 440     { 96, 96, -1, -1 }, // AS American Samoa.
 441     { 85, 86, -1, -1 }, // AZ Arizona.
 442     { 90, 96, -1, -1 }, // CA California.
 443     { 80, 81, -1, -1 }, // CO Colorado.
 444     {  6,  6, -1, -1 }, // CT Connecticut.
 445     { 20, 20, -1, -1 }, // DC District of Columbia.
 446     { 19, 19, -1, -1 }, // DE Delaware.
 447     { 32, 34, -1, -1 }, // FL Florida.
 448     { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
 449     { 30, 31, -1, -1 }, // GA Georgia.
 450     { 96, 96, -1, -1 }, // GU Guam.
 451     { 96, 96, -1, -1 }, // HI Hawaii.
 452     { 50, 52, -1, -1 }, // IA Iowa.
 453     { 83, 83, -1, -1 }, // ID Idaho.
 454     { 60, 62, -1, -1 }, // IL Illinois.
 455     { 46, 47, -1, -1 }, // IN Indiana.
 456     { 66, 67, 73, -1 }, // KS Kansas.
 457     { 40, 42, -1, -1 }, // KY Kentucky.
 458     { 70, 71, -1, -1 }, // LA Louisiana.
 459     {  1,  2, -1, -1 }, // MA Massachusetts.
 460     { 20, 21, -1, -1 }, // MD Maryland.
 461     {  3,  4, -1, -1 }, // ME Maine.
 462     { 96, 96, -1, -1 }, // MH Marshall Islands.
 463     { 48, 49, -1, -1 }, // MI Michigan.
 464     { 55, 56, -1, -1 }, // MN Minnesota.
 465     { 63, 65, -1, -1 }, // MO Missouri.
 466     { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
 467     { 38, 39, -1, -1 }, // MS Mississippi.
 468     { 55, 56, -1, -1 }, // MT Montana.
 469     { 27, 28, -1, -1 }, // NC North Carolina.
 470     { 58, 58, -1, -1 }, // ND North Dakota.
 471     { 68, 69, -1, -1 }, // NE Nebraska.
 472     {  3,  4, -1, -1 }, // NH New Hampshire.
 473     {  7,  8, -1, -1 }, // NJ New Jersey.
 474     { 87, 88, 86, -1 }, // NM New Mexico.
 475     { 88, 89, 96, -1 }, // NV Nevada.
 476     { 10, 14,  0,  6 }, // NY New York.
 477     { 43, 45, -1, -1 }, // OH Ohio.
 478     { 73, 74, -1, -1 }, // OK Oklahoma.
 479     { 97, 97, -1, -1 }, // OR Oregon.
 480     { 15, 19, -1, -1 }, // PA Pennsylvania.
 481     {  6,  6,  0,  9 }, // PR Puerto Rico.
 482     { 96, 96, -1, -1 }, // PW Palau.
 483     {  2,  2, -1, -1 }, // RI Rhode Island.
 484     { 29, 29, -1, -1 }, // SC South Carolina.
 485     { 57, 57, -1, -1 }, // SD South Dakota.
 486     { 37, 38, -1, -1 }, // TN Tennessee.
 487     { 75, 79, 87, 88 }, // TX Texas.
 488     { 84, 84, -1, -1 }, // UT Utah.
 489     { 22, 24, 20, -1 }, // VA Virginia.
 490     {  6,  9, -1, -1 }, // VI Virgin Islands.
 491     {  5,  5, -1, -1 }, // VT Vermont.
 492     { 98, 99, -1, -1 }, // WA Washington.
 493     { 53, 54, -1, -1 }, // WI Wisconsin.
 494     { 24, 26, -1, -1 }, // WV West Virginia.
 495     { 82, 83, -1, -1 }  // WY Wyoming.
 496   };
 497
 498   // Zip numeric value for the first two characters.
 499   DCHECK(word.begin != word.end);
 500   DCHECK(IsAsciiDigit(*word.begin));
 501   DCHECK(IsAsciiDigit(*(word.begin + 1)));
 502   int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
 503
 504   if ((zip_prefix >= zip_range[state_index].low &&
 505        zip_prefix <= zip_range[state_index].high) ||
 506       zip_prefix == zip_range[state_index].exception1 ||
 507       zip_prefix == zip_range[state_index].exception2) {
 508     return true;
 509   }
 510   return false;
 511 }
 512
 513 bool IsValidLocationName(const Word& word) {
 514   // Supported location names sorted alphabetically and grouped by first letter.
 515   static const struct LocationNameInfo {
 516     const char* string;
 517     char length;
 518     bool allow_plural;
 519   } location_names[157] = {
 520     { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
 521     { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
 522     { "alameda", 7, false },
 523     { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
 524     { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
 525     { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
 526     { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
 527     { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
 528     { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
 529     { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
 530     { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
 531     { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
 532     { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
 533     { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
 534     { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
 535     { "drive", 5, true },
 536     { "estate", 6, true }, { "expressway", 10, false },
 537     { "extension", 9, true },
 538     { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
 539     { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
 540     { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
 541     { "freeway", 7, false },
 542     { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
 543     { "green", 5, true }, { "grove", 5, true },
 544     { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
 545     { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
 546     { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
 547     { "junction", 8, true },
 548     { "key", 3, true }, { "knoll", 5, true },
 549     { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
 550     { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
 551     { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
 552     { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
 553     { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
 554     { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
 555     { "neck", 4, false },
 556     { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
 557     { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
 558     { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
 559     { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
 560     { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
 561     { "privada", 7, false },
 562     { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
 563     { "rapid", 5, true }, { "rest", 4, false }, { "ridge", 5, true },
 564     { "river", 5, false }, { "road", 4, true }, { "route", 5, false },
 565     { "row", 3, false }, { "rue", 3, false }, { "run", 3, false },
 566     { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
 567     { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
 568     { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
 569     { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
 570     { "summit", 6, false }, { "speedway", 8, false },
 571     { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
 572     { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
 573     { "tunnel", 6, false }, { "turnpike", 8, false },
 574     { "underpass", 9, false }, { "union", 5, true },
 575     { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
 576     { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
 577     { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
 578     { "well", 4, true },
 579     { "xing", 4, false }, { "xrd", 3, false }
 580   };
 581
 582   // Accumulative number of location names for each starting letter.
 583   static const int location_names_accumulative[25] = {
 584       0,   7,  19,  40,  44,
 585      47,  57,  62,  68,  71,
 586      72,  74,  83,  92,  93,
 587      96, 109, 109, 121, 135,
 588     143, 145, 151, 155, 157
 589   };
 590
 591   DCHECK_EQ(
 592       location_names_accumulative[arraysize(location_names_accumulative) - 1],
 593       static_cast<int>(arraysize(location_names)));
 594
 595   if (!IsAsciiAlpha(*word.begin))
 596     return false;
 597
 598   // No location names start with y, z.
 599   base::char16 first_letter = base::ToLowerASCII(*word.begin);
 600   if (first_letter > 'x')
 601     return false;
 602
 603   DCHECK(first_letter >= 'a');
 604   int index = first_letter - 'a';
 605   int length = std::distance(word.begin, word.end);
 606   for (int i = location_names_accumulative[index];
 607       i < location_names_accumulative[index + 1]; ++i) {
 608     if (location_names[i].length != length &&
 609         (location_names[i].allow_plural &&
 610          location_names[i].length + 1 != length)) {
 611       continue;
 612     }
 613
 614     if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
 615                                        location_names[i].string,
 616                                        location_names[i].allow_plural)) {
 617       return true;
 618     }
 619   }
 620
 621   return false;
 622 }
 623
 624 } // namespace internal
 625
 626 } // namespace address_parser
 627
 628 }  // namespace content