content/common/android/address_parser_internal.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/common/android/address_parser_internal.h"
   6
   7 #include <bitset>
   8
   9 #include "base/logging.h"
  10 #include "base/strings/string_util.h"
  11
  12 namespace {
  13
  14 // Number of digits for a valid zip code.
  15 const size_t kZipDigits = 5;
  16
  17 // Number of digits for a valid zip code in the Zip Plus 4 format.
  18 const size_t kZipPlus4Digits = 9;
  19
  20 // Maximum number of digits of a house number, including possible hyphens.
  21 const size_t kMaxHouseDigits = 5;
  22
  23 base::char16 SafePreviousChar(const base::string16::const_iterator& it,
  24     const base::string16::const_iterator& begin) {
  25   if (it == begin)
  26     return ' ';
  27   return *(it - 1);
  28 }
  29
  30 base::char16 SafeNextChar(const base::string16::const_iterator& it,
  31     const base::string16::const_iterator& end) {
  32   if (it == end)
  33     return ' ';
  34   return *(it + 1);
  35 }
  36
  37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
  38     base::string16::const_iterator word_end, const char* ascii_to_match) {
  39   for (base::string16::const_iterator it = word_begin; it != word_end;
  40       ++it, ++ascii_to_match) {
  41     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  42       return false;
  43   }
  44   return *ascii_to_match == 0 || *ascii_to_match == ' ';
  45 }
  46
  47 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
  48     base::string16::const_iterator word_end, const char* ascii_to_match,
  49     bool allow_plural) {
  50   for (base::string16::const_iterator it = word_begin; it != word_end;
  51       ++it, ++ascii_to_match) {
  52     if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
  53       return true;
  54
  55     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  56       return false;
  57   }
  58   return *ascii_to_match == 0;
  59 }
  60
  61 }  // anonymous namespace
  62
  63 namespace content {
  64
  65 namespace address_parser {
  66
  67 namespace internal {
  68
  69 Word::Word() {
  70 }
  71
  72 Word::Word(const base::string16::const_iterator& begin,
  73            const base::string16::const_iterator& end)
  74     : begin(begin),
  75       end(end) {
  76   DCHECK(begin <= end);
  77 }
  78
  79 HouseNumberParser::HouseNumberParser() {
  80 }
  81
  82 bool HouseNumberParser::IsPreDelimiter(base::char16 character) {
  83   return character == ':' || IsPostDelimiter(character);
  84 }
  85
  86 bool HouseNumberParser::IsPostDelimiter(base::char16 character) {
  87   return base::IsUnicodeWhitespace(character) || strchr(",\"'", character);
  88 }
  89
  90 void HouseNumberParser::RestartOnNextDelimiter() {
  91   ResetState();
  92   for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
  93 }
  94
  95 void HouseNumberParser::AcceptChars(size_t num_chars) {
  96   size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
  97                            num_chars);
  98   it_ += offset;
  99   result_chars_ += offset;
 100 }
 101
 102 void HouseNumberParser::SkipChars(size_t num_chars) {
 103   it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
 104 }
 105
 106 void HouseNumberParser::ResetState() {
 107   num_digits_ = 0;
 108   result_chars_ = 0;
 109 }
 110
 111 bool HouseNumberParser::CheckFinished(Word* word) const {
 112   // There should always be a number after a hyphen.
 113   if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
 114     return false;
 115
 116   if (word) {
 117     word->begin = it_ - result_chars_;
 118     word->end = it_;
 119   }
 120   return true;
 121 }
 122
 123 bool HouseNumberParser::Parse(
 124     const base::string16::const_iterator& begin,
 125     const base::string16::const_iterator& end, Word* word) {
 126   it_ = begin_ = begin;
 127   end_ = end;
 128   ResetState();
 129
 130   // Iterations only used as a fail-safe against any buggy infinite loops.
 131   size_t iterations = 0;
 132   size_t max_iterations = end - begin + 1;
 133   for (; it_ != end_ && iterations < max_iterations; ++iterations) {
 134
 135     // Word finished case.
 136     if (IsPostDelimiter(*it_)) {
 137       if (CheckFinished(word))
 138         return true;
 139       else if (result_chars_)
 140         ResetState();
 141
 142       SkipChars(1);
 143       continue;
 144     }
 145
 146     // More digits. There should be no more after a letter was found.
 147     if (base::IsAsciiDigit(*it_)) {
 148       if (num_digits_ >= kMaxHouseDigits) {
 149         RestartOnNextDelimiter();
 150       } else {
 151         AcceptChars(1);
 152         ++num_digits_;
 153       }
 154       continue;
 155     }
 156
 157     if (base::IsAsciiAlpha(*it_)) {
 158       // Handle special case 'one'.
 159       if (result_chars_ == 0) {
 160         if (it_ + 3 <= end_ &&
 161             base::LowerCaseEqualsASCII(base::StringPiece16(it_, it_ + 3),
 162                                        "one"))
 163           AcceptChars(3);
 164         else
 165           RestartOnNextDelimiter();
 166         continue;
 167       }
 168
 169       // There should be more than 1 character because of result_chars.
 170       DCHECK_GT(result_chars_, 0U);
 171       DCHECK(it_ != begin_);
 172       base::char16 previous = SafePreviousChar(it_, begin_);
 173       if (base::IsAsciiDigit(previous)) {
 174         // Check cases like '12A'.
 175         base::char16 next = SafeNextChar(it_, end_);
 176         if (IsPostDelimiter(next)) {
 177           AcceptChars(1);
 178           continue;
 179         }
 180
 181         // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
 182         if (base::IsAsciiAlpha(next)) {
 183           base::char16 last_digit = previous;
 184           base::char16 first_letter = base::ToLowerASCII(*it_);
 185           base::char16 second_letter = base::ToLowerASCII(next);
 186           bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
 187               num_digits_ == 2;
 188
 189           switch (last_digit - '0') {
 190           case 1:
 191             if ((first_letter == 's' && second_letter == 't') ||
 192                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 193               AcceptChars(2);
 194               continue;
 195             }
 196             break;
 197
 198           case 2:
 199             if ((first_letter == 'n' && second_letter == 'd') ||
 200                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 201               AcceptChars(2);
 202               continue;
 203             }
 204             break;
 205
 206           case 3:
 207             if ((first_letter == 'r' && second_letter == 'd') ||
 208                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 209               AcceptChars(2);
 210               continue;
 211             }
 212             break;
 213
 214           case 0:
 215             // Explicitly exclude '0th'.
 216             if (num_digits_ == 1)
 217               break;
 218
 219           case 4:
 220           case 5:
 221           case 6:
 222           case 7:
 223           case 8:
 224           case 9:
 225             if (first_letter == 't' && second_letter == 'h') {
 226               AcceptChars(2);
 227               continue;
 228             }
 229             break;
 230
 231           default:
 232             NOTREACHED();
 233           }
 234         }
 235       }
 236
 237       RestartOnNextDelimiter();
 238       continue;
 239     }
 240
 241     if (*it_ == '-' && num_digits_ > 0) {
 242       AcceptChars(1);
 243       ++num_digits_;
 244       continue;
 245     }
 246
 247     RestartOnNextDelimiter();
 248     SkipChars(1);
 249   }
 250
 251   if (iterations >= max_iterations)
 252     return false;
 253
 254   return CheckFinished(word);
 255 }
 256
 257 bool FindStateStartingInWord(WordList* words,
 258                              size_t state_first_word,
 259                              size_t* state_last_word,
 260                              String16Tokenizer* tokenizer,
 261                              size_t* state_index) {
 262
 263   // Bitmasks containing the allowed suffixes for 2-letter state codes.
 264   static const int state_two_letter_suffix[23] = {
 265     0x02060c00,  // A followed by: [KLRSZ].
 266     0x00000000,  // B.
 267     0x00084001,  // C followed by: [AOT].
 268     0x00000014,  // D followed by: [CE].
 269     0x00000000,  // E.
 270     0x00001800,  // F followed by: [LM].
 271     0x00100001,  // G followed by: [AU].
 272     0x00000100,  // H followed by: [I].
 273     0x00002809,  // I followed by: [ADLN].
 274     0x00000000,  // J.
 275     0x01040000,  // K followed by: [SY].
 276     0x00000001,  // L followed by: [A].
 277     0x000ce199,  // M followed by: [ADEHINOPST].
 278     0x0120129c,  // N followed by: [CDEHJMVY].
 279     0x00020480,  // O followed by: [HKR].
 280     0x00420001,  // P followed by: [ARW].
 281     0x00000000,  // Q.
 282     0x00000100,  // R followed by: [I].
 283     0x0000000c,  // S followed by: [CD].
 284     0x00802000,  // T followed by: [NX].
 285     0x00080000,  // U followed by: [T].
 286     0x00080101,  // V followed by: [AIT].
 287     0x01200101   // W followed by: [AIVY].
 288   };
 289
 290   // Accumulative number of states for the 2-letter code indexed by the first.
 291   static const int state_two_letter_accumulative[24] = {
 292      0,  5,  5,  8, 10, 10, 12, 14,
 293     15, 19, 19, 21, 22, 32, 40, 43,
 294     46, 46, 47, 49, 51, 52, 55, 59
 295   };
 296
 297   // State names sorted alphabetically with their lengths.
 298   // There can be more than one possible name for a same state if desired.
 299   static const struct StateNameInfo {
 300     const char* string;
 301     char first_word_length;
 302     char length;
 303     char state_index; // Relative to two-character code alphabetical order.
 304   } state_names[59] = {
 305     { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
 306     { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
 307     { "arkansas", 8, 8, 2 },
 308     { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
 309     { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
 310     { "district of columbia", 8, 20, 8 },
 311     { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
 312     { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
 313     { "hawaii", 6, 6, 14 },
 314     { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
 315     { "iowa", 4, 4, 15 },
 316     { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
 317     { "louisiana", 9, 9, 21 },
 318     { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
 319     { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
 320     { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
 321     { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
 322     { "montana", 7, 7, 31 },
 323     { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
 324     { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
 325     { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
 326     { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
 327     { "northern mariana islands", 8, 24, 29 },
 328     { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
 329     { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
 330     { "puerto rico", 6, 11, 44 },
 331     { "rhode island", 5, 5, 46 },
 332     { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
 333     { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
 334     { "utah", 4, 4, 51 },
 335     { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
 336     { "virginia", 8, 8, 52 },
 337     { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
 338     { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
 339   };
 340
 341   // Accumulative number of states for sorted names indexed by the first letter.
 342   // Required a different one since there are codes that don't share their
 343   // first letter with the name of their state (MP = Northern Mariana Islands).
 344   static const int state_names_accumulative[24] = {
 345      0,  5,  5,  8, 10, 10, 12, 14,
 346     15, 19, 19, 21, 22, 31, 40, 43,
 347     46, 46, 47, 49, 51, 52, 55, 59
 348   };
 349
 350   DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
 351       static_cast<int>(arraysize(state_names)));
 352
 353   const Word& first_word = words->at(state_first_word);
 354   int length = first_word.end - first_word.begin;
 355   if (length < 2 || !base::IsAsciiAlpha(*first_word.begin))
 356     return false;
 357
 358   // No state names start with x, y, z.
 359   base::char16 first_letter = base::ToLowerASCII(*first_word.begin);
 360   if (first_letter > 'w')
 361     return false;
 362
 363   DCHECK(first_letter >= 'a');
 364   int first_index = first_letter - 'a';
 365
 366   // Look for two-letter state names.
 367   if (length == 2 && base::IsAsciiAlpha(*(first_word.begin + 1))) {
 368     base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
 369     DCHECK(second_letter >= 'a');
 370
 371     int second_index = second_letter - 'a';
 372     if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
 373       return false;
 374
 375     std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
 376         ((1 << second_index) - 1);
 377     *state_last_word = state_first_word;
 378     *state_index = state_two_letter_accumulative[first_index] +
 379         previous_suffixes.count();
 380     return true;
 381   }
 382
 383   // Look for full state names by their first letter. Discard by length.
 384   for (int state = state_names_accumulative[first_index];
 385       state < state_names_accumulative[first_index + 1]; ++state) {
 386     if (state_names[state].first_word_length != length)
 387       continue;
 388
 389     bool state_match = false;
 390     size_t state_word = state_first_word;
 391     for (int pos = 0; true; ) {
 392       if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
 393           words->at(state_word).end, &state_names[state].string[pos]))
 394         break;
 395
 396       pos += words->at(state_word).end - words->at(state_word).begin + 1;
 397       if (pos >= state_names[state].length) {
 398         state_match = true;
 399         break;
 400       }
 401
 402       // Ran out of words, extract more from the tokenizer.
 403       if (++state_word == words->size()) {
 404         do {
 405           if (!tokenizer->GetNext())
 406             break;
 407         } while (tokenizer->token_is_delim());
 408         words->push_back(
 409             Word(tokenizer->token_begin(), tokenizer->token_end()));
 410       }
 411     }
 412
 413     if (state_match) {
 414       *state_last_word = state_word;
 415       *state_index = state_names[state].state_index;
 416       return true;
 417     }
 418   }
 419
 420   return false;
 421 }
 422
 423 bool IsZipValid(const Word& word, size_t state_index) {
 424   size_t length = word.end - word.begin;
 425   if (length != kZipDigits && length != kZipPlus4Digits + 1)
 426     return false;
 427
 428   for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
 429     size_t pos = it - word.begin;
 430     if (base::IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
 431       continue;
 432     return false;
 433   }
 434   return IsZipValidForState(word, state_index);
 435 }
 436
 437 bool IsZipValidForState(const Word& word, size_t state_index) {
 438   // List of valid zip code ranges.
 439   static const struct {
 440     signed char low;
 441     signed char high;
 442     signed char exception1;
 443     signed char exception2;
 444   } zip_range[] = {
 445     { 99, 99, -1, -1 }, // AK Alaska.
 446     { 35, 36, -1, -1 }, // AL Alabama.
 447     { 71, 72, -1, -1 }, // AR Arkansas.
 448     { 96, 96, -1, -1 }, // AS American Samoa.
 449     { 85, 86, -1, -1 }, // AZ Arizona.
 450     { 90, 96, -1, -1 }, // CA California.
 451     { 80, 81, -1, -1 }, // CO Colorado.
 452     {  6,  6, -1, -1 }, // CT Connecticut.
 453     { 20, 20, -1, -1 }, // DC District of Columbia.
 454     { 19, 19, -1, -1 }, // DE Delaware.
 455     { 32, 34, -1, -1 }, // FL Florida.
 456     { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
 457     { 30, 31, -1, -1 }, // GA Georgia.
 458     { 96, 96, -1, -1 }, // GU Guam.
 459     { 96, 96, -1, -1 }, // HI Hawaii.
 460     { 50, 52, -1, -1 }, // IA Iowa.
 461     { 83, 83, -1, -1 }, // ID Idaho.
 462     { 60, 62, -1, -1 }, // IL Illinois.
 463     { 46, 47, -1, -1 }, // IN Indiana.
 464     { 66, 67, 73, -1 }, // KS Kansas.
 465     { 40, 42, -1, -1 }, // KY Kentucky.
 466     { 70, 71, -1, -1 }, // LA Louisiana.
 467     {  1,  2, -1, -1 }, // MA Massachusetts.
 468     { 20, 21, -1, -1 }, // MD Maryland.
 469     {  3,  4, -1, -1 }, // ME Maine.
 470     { 96, 96, -1, -1 }, // MH Marshall Islands.
 471     { 48, 49, -1, -1 }, // MI Michigan.
 472     { 55, 56, -1, -1 }, // MN Minnesota.
 473     { 63, 65, -1, -1 }, // MO Missouri.
 474     { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
 475     { 38, 39, -1, -1 }, // MS Mississippi.
 476     { 55, 56, -1, -1 }, // MT Montana.
 477     { 27, 28, -1, -1 }, // NC North Carolina.
 478     { 58, 58, -1, -1 }, // ND North Dakota.
 479     { 68, 69, -1, -1 }, // NE Nebraska.
 480     {  3,  4, -1, -1 }, // NH New Hampshire.
 481     {  7,  8, -1, -1 }, // NJ New Jersey.
 482     { 87, 88, 86, -1 }, // NM New Mexico.
 483     { 88, 89, 96, -1 }, // NV Nevada.
 484     { 10, 14,  0,  6 }, // NY New York.
 485     { 43, 45, -1, -1 }, // OH Ohio.
 486     { 73, 74, -1, -1 }, // OK Oklahoma.
 487     { 97, 97, -1, -1 }, // OR Oregon.
 488     { 15, 19, -1, -1 }, // PA Pennsylvania.
 489     {  6,  6,  0,  9 }, // PR Puerto Rico.
 490     { 96, 96, -1, -1 }, // PW Palau.
 491     {  2,  2, -1, -1 }, // RI Rhode Island.
 492     { 29, 29, -1, -1 }, // SC South Carolina.
 493     { 57, 57, -1, -1 }, // SD South Dakota.
 494     { 37, 38, -1, -1 }, // TN Tennessee.
 495     { 75, 79, 87, 88 }, // TX Texas.
 496     { 84, 84, -1, -1 }, // UT Utah.
 497     { 22, 24, 20, -1 }, // VA Virginia.
 498     {  6,  9, -1, -1 }, // VI Virgin Islands.
 499     {  5,  5, -1, -1 }, // VT Vermont.
 500     { 98, 99, -1, -1 }, // WA Washington.
 501     { 53, 54, -1, -1 }, // WI Wisconsin.
 502     { 24, 26, -1, -1 }, // WV West Virginia.
 503     { 82, 83, -1, -1 }  // WY Wyoming.
 504   };
 505
 506   // Zip numeric value for the first two characters.
 507   DCHECK(word.begin != word.end);
 508   DCHECK(base::IsAsciiDigit(*word.begin));
 509   DCHECK(base::IsAsciiDigit(*(word.begin + 1)));
 510   int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
 511
 512   if ((zip_prefix >= zip_range[state_index].low &&
 513        zip_prefix <= zip_range[state_index].high) ||
 514       zip_prefix == zip_range[state_index].exception1 ||
 515       zip_prefix == zip_range[state_index].exception2) {
 516     return true;
 517   }
 518   return false;
 519 }
 520
 521 bool IsValidLocationName(const Word& word) {
 522   // Supported location names sorted alphabetically and grouped by first letter.
 523   static const struct LocationNameInfo {
 524     const char* string;
 525     char length;
 526     bool allow_plural;
 527   } location_names[159] = {
 528     { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
 529     { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
 530     { "alameda", 7, false },
 531     { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
 532     { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
 533     { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
 534     { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
 535     { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
 536     { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
 537     { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
 538     { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
 539     { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
 540     { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
 541     { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
 542     { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
 543     { "drive", 5, true },
 544     { "estate", 6, true }, { "expressway", 10, false },
 545     { "extension", 9, true },
 546     { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
 547     { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
 548     { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
 549     { "freeway", 7, false },
 550     { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
 551     { "green", 5, true }, { "grove", 5, true },
 552     { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
 553     { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
 554     { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
 555     { "junction", 8, true },
 556     { "key", 3, true }, { "knoll", 5, true },
 557     { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
 558     { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
 559     { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
 560     { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
 561     { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
 562     { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
 563     { "neck", 4, false },
 564     { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
 565     { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
 566     { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
 567     { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
 568     { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
 569     { "privada", 7, false },
 570     { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
 571     { "rapid", 5, true }, { "rd", 2, false }, { "rd.", 3, false },
 572     { "rest", 4, false }, { "ridge", 5, true }, { "river", 5, false },
 573     { "road", 4, true }, { "route", 5, false }, { "row", 3, false },
 574     { "rue", 3, false }, { "run", 3, false },
 575     { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
 576     { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
 577     { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
 578     { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
 579     { "summit", 6, false }, { "speedway", 8, false },
 580     { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
 581     { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
 582     { "tunnel", 6, false }, { "turnpike", 8, false },
 583     { "underpass", 9, false }, { "union", 5, true },
 584     { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
 585     { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
 586     { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
 587     { "well", 4, true },
 588     { "xing", 4, false }, { "xrd", 3, false }
 589   };
 590
 591   // Accumulative number of location names for each starting letter.
 592   static const int location_names_accumulative[25] = {
 593       0,   7,  19,  40,  44,
 594      47,  57,  62,  68,  71,
 595      72,  74,  83,  92,  93,
 596      96, 109, 109, 123, 137,
 597     145, 147, 153, 157, 159
 598   };
 599
 600   DCHECK_EQ(
 601       location_names_accumulative[arraysize(location_names_accumulative) - 1],
 602       static_cast<int>(arraysize(location_names)));
 603
 604   if (!base::IsAsciiAlpha(*word.begin))
 605     return false;
 606
 607   // No location names start with y, z.
 608   base::char16 first_letter = base::ToLowerASCII(*word.begin);
 609   if (first_letter > 'x')
 610     return false;
 611
 612   DCHECK(first_letter >= 'a');
 613   int index = first_letter - 'a';
 614   int length = std::distance(word.begin, word.end);
 615   for (int i = location_names_accumulative[index];
 616       i < location_names_accumulative[index + 1]; ++i) {
 617     if (location_names[i].length != length &&
 618         (location_names[i].allow_plural &&
 619          location_names[i].length + 1 != length)) {
 620       continue;
 621     }
 622
 623     if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
 624                                        location_names[i].string,
 625                                        location_names[i].allow_plural)) {
 626       return true;
 627     }
 628   }
 629
 630   return false;
 631 }
 632
 633 } // namespace internal
 634
 635 } // namespace address_parser
 636
 637 }  // namespace content