content/common/android/address_parser_internal.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/common/android/address_parser_internal.h"
   6
   7 #include <bitset>
   8
   9 #include "base/logging.h"
  10 #include "base/strings/string_util.h"
  11
  12 namespace {
  13
  14 // Number of digits for a valid zip code.
  15 const size_t kZipDigits = 5;
  16
  17 // Number of digits for a valid zip code in the Zip Plus 4 format.
  18 const size_t kZipPlus4Digits = 9;
  19
  20 // Maximum number of digits of a house number, including possible hyphens.
  21 const size_t kMaxHouseDigits = 5;
  22
  23 base::char16 SafePreviousChar(const base::string16::const_iterator& it,
  24     const base::string16::const_iterator& begin) {
  25   if (it == begin)
  26     return ' ';
  27   return *(it - 1);
  28 }
  29
  30 base::char16 SafeNextChar(const base::string16::const_iterator& it,
  31     const base::string16::const_iterator& end) {
  32   if (it == end)
  33     return ' ';
  34   return *(it + 1);
  35 }
  36
  37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
  38     base::string16::const_iterator word_end, const char* ascii_to_match) {
  39   for (base::string16::const_iterator it = word_begin; it != word_end;
  40       ++it, ++ascii_to_match) {
  41     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  42       return false;
  43   }
  44   return *ascii_to_match == 0 || *ascii_to_match == ' ';
  45 }
  46
  47 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
  48     base::string16::const_iterator word_end, const char* ascii_to_match,
  49     bool allow_plural) {
  50   for (base::string16::const_iterator it = word_begin; it != word_end;
  51       ++it, ++ascii_to_match) {
  52     if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
  53       return true;
  54
  55     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
  56       return false;
  57   }
  58   return *ascii_to_match == 0;
  59 }
  60
  61 }  // anonymous namespace
  62
  63 namespace content {
  64
  65 namespace address_parser {
  66
  67 namespace internal {
  68
  69 Word::Word() {
  70 }
  71
  72 Word::Word(const base::string16::const_iterator& begin,
  73            const base::string16::const_iterator& end)
  74     : begin(begin),
  75       end(end) {
  76   DCHECK(begin <= end);
  77 }
  78
  79 HouseNumberParser::HouseNumberParser() {
  80 }
  81
  82 bool HouseNumberParser::IsPreDelimiter(base::char16 character) {
  83   return character == ':' || IsPostDelimiter(character);
  84 }
  85
  86 bool HouseNumberParser::IsPostDelimiter(base::char16 character) {
  87   return IsWhitespace(character) || strchr(",\"'", character);
  88 }
  89
  90 void HouseNumberParser::RestartOnNextDelimiter() {
  91   ResetState();
  92   for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
  93 }
  94
  95 void HouseNumberParser::AcceptChars(size_t num_chars) {
  96   size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
  97                            num_chars);
  98   it_ += offset;
  99   result_chars_ += offset;
 100 }
 101
 102 void HouseNumberParser::SkipChars(size_t num_chars) {
 103   it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
 104 }
 105
 106 void HouseNumberParser::ResetState() {
 107   num_digits_ = 0;
 108   result_chars_ = 0;
 109 }
 110
 111 bool HouseNumberParser::CheckFinished(Word* word) const {
 112   // There should always be a number after a hyphen.
 113   if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
 114     return false;
 115
 116   if (word) {
 117     word->begin = it_ - result_chars_;
 118     word->end = it_;
 119   }
 120   return true;
 121 }
 122
 123 bool HouseNumberParser::Parse(
 124     const base::string16::const_iterator& begin,
 125     const base::string16::const_iterator& end, Word* word) {
 126   it_ = begin_ = begin;
 127   end_ = end;
 128   ResetState();
 129
 130   // Iterations only used as a fail-safe against any buggy infinite loops.
 131   size_t iterations = 0;
 132   size_t max_iterations = end - begin + 1;
 133   for (; it_ != end_ && iterations < max_iterations; ++iterations) {
 134
 135     // Word finished case.
 136     if (IsPostDelimiter(*it_)) {
 137       if (CheckFinished(word))
 138         return true;
 139       else if (result_chars_)
 140         ResetState();
 141
 142       SkipChars(1);
 143       continue;
 144     }
 145
 146     // More digits. There should be no more after a letter was found.
 147     if (IsAsciiDigit(*it_)) {
 148       if (num_digits_ >= kMaxHouseDigits) {
 149         RestartOnNextDelimiter();
 150       } else {
 151         AcceptChars(1);
 152         ++num_digits_;
 153       }
 154       continue;
 155     }
 156
 157     if (IsAsciiAlpha(*it_)) {
 158       // Handle special case 'one'.
 159       if (result_chars_ == 0) {
 160         if (it_ + 3 <= end_ && base::LowerCaseEqualsASCII(it_, it_ + 3, "one"))
 161           AcceptChars(3);
 162         else
 163           RestartOnNextDelimiter();
 164         continue;
 165       }
 166
 167       // There should be more than 1 character because of result_chars.
 168       DCHECK_GT(result_chars_, 0U);
 169       DCHECK(it_ != begin_);
 170       base::char16 previous = SafePreviousChar(it_, begin_);
 171       if (IsAsciiDigit(previous)) {
 172         // Check cases like '12A'.
 173         base::char16 next = SafeNextChar(it_, end_);
 174         if (IsPostDelimiter(next)) {
 175           AcceptChars(1);
 176           continue;
 177         }
 178
 179         // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
 180         if (IsAsciiAlpha(next)) {
 181           base::char16 last_digit = previous;
 182           base::char16 first_letter = base::ToLowerASCII(*it_);
 183           base::char16 second_letter = base::ToLowerASCII(next);
 184           bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
 185               num_digits_ == 2;
 186
 187           switch (last_digit - '0') {
 188           case 1:
 189             if ((first_letter == 's' && second_letter == 't') ||
 190                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 191               AcceptChars(2);
 192               continue;
 193             }
 194             break;
 195
 196           case 2:
 197             if ((first_letter == 'n' && second_letter == 'd') ||
 198                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 199               AcceptChars(2);
 200               continue;
 201             }
 202             break;
 203
 204           case 3:
 205             if ((first_letter == 'r' && second_letter == 'd') ||
 206                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
 207               AcceptChars(2);
 208               continue;
 209             }
 210             break;
 211
 212           case 0:
 213             // Explicitly exclude '0th'.
 214             if (num_digits_ == 1)
 215               break;
 216
 217           case 4:
 218           case 5:
 219           case 6:
 220           case 7:
 221           case 8:
 222           case 9:
 223             if (first_letter == 't' && second_letter == 'h') {
 224               AcceptChars(2);
 225               continue;
 226             }
 227             break;
 228
 229           default:
 230             NOTREACHED();
 231           }
 232         }
 233       }
 234
 235       RestartOnNextDelimiter();
 236       continue;
 237     }
 238
 239     if (*it_ == '-' && num_digits_ > 0) {
 240       AcceptChars(1);
 241       ++num_digits_;
 242       continue;
 243     }
 244
 245     RestartOnNextDelimiter();
 246     SkipChars(1);
 247   }
 248
 249   if (iterations >= max_iterations)
 250     return false;
 251
 252   return CheckFinished(word);
 253 }
 254
 255 bool FindStateStartingInWord(WordList* words,
 256                              size_t state_first_word,
 257                              size_t* state_last_word,
 258                              String16Tokenizer* tokenizer,
 259                              size_t* state_index) {
 260
 261   // Bitmasks containing the allowed suffixes for 2-letter state codes.
 262   static const int state_two_letter_suffix[23] = {
 263     0x02060c00,  // A followed by: [KLRSZ].
 264     0x00000000,  // B.
 265     0x00084001,  // C followed by: [AOT].
 266     0x00000014,  // D followed by: [CE].
 267     0x00000000,  // E.
 268     0x00001800,  // F followed by: [LM].
 269     0x00100001,  // G followed by: [AU].
 270     0x00000100,  // H followed by: [I].
 271     0x00002809,  // I followed by: [ADLN].
 272     0x00000000,  // J.
 273     0x01040000,  // K followed by: [SY].
 274     0x00000001,  // L followed by: [A].
 275     0x000ce199,  // M followed by: [ADEHINOPST].
 276     0x0120129c,  // N followed by: [CDEHJMVY].
 277     0x00020480,  // O followed by: [HKR].
 278     0x00420001,  // P followed by: [ARW].
 279     0x00000000,  // Q.
 280     0x00000100,  // R followed by: [I].
 281     0x0000000c,  // S followed by: [CD].
 282     0x00802000,  // T followed by: [NX].
 283     0x00080000,  // U followed by: [T].
 284     0x00080101,  // V followed by: [AIT].
 285     0x01200101   // W followed by: [AIVY].
 286   };
 287
 288   // Accumulative number of states for the 2-letter code indexed by the first.
 289   static const int state_two_letter_accumulative[24] = {
 290      0,  5,  5,  8, 10, 10, 12, 14,
 291     15, 19, 19, 21, 22, 32, 40, 43,
 292     46, 46, 47, 49, 51, 52, 55, 59
 293   };
 294
 295   // State names sorted alphabetically with their lengths.
 296   // There can be more than one possible name for a same state if desired.
 297   static const struct StateNameInfo {
 298     const char* string;
 299     char first_word_length;
 300     char length;
 301     char state_index; // Relative to two-character code alphabetical order.
 302   } state_names[59] = {
 303     { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
 304     { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
 305     { "arkansas", 8, 8, 2 },
 306     { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
 307     { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
 308     { "district of columbia", 8, 20, 8 },
 309     { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
 310     { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
 311     { "hawaii", 6, 6, 14 },
 312     { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
 313     { "iowa", 4, 4, 15 },
 314     { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
 315     { "louisiana", 9, 9, 21 },
 316     { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
 317     { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
 318     { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
 319     { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
 320     { "montana", 7, 7, 31 },
 321     { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
 322     { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
 323     { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
 324     { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
 325     { "northern mariana islands", 8, 24, 29 },
 326     { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
 327     { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
 328     { "puerto rico", 6, 11, 44 },
 329     { "rhode island", 5, 5, 46 },
 330     { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
 331     { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
 332     { "utah", 4, 4, 51 },
 333     { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
 334     { "virginia", 8, 8, 52 },
 335     { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
 336     { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
 337   };
 338
 339   // Accumulative number of states for sorted names indexed by the first letter.
 340   // Required a different one since there are codes that don't share their
 341   // first letter with the name of their state (MP = Northern Mariana Islands).
 342   static const int state_names_accumulative[24] = {
 343      0,  5,  5,  8, 10, 10, 12, 14,
 344     15, 19, 19, 21, 22, 31, 40, 43,
 345     46, 46, 47, 49, 51, 52, 55, 59
 346   };
 347
 348   DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
 349       static_cast<int>(arraysize(state_names)));
 350
 351   const Word& first_word = words->at(state_first_word);
 352   int length = first_word.end - first_word.begin;
 353   if (length < 2 || !IsAsciiAlpha(*first_word.begin))
 354     return false;
 355
 356   // No state names start with x, y, z.
 357   base::char16 first_letter = base::ToLowerASCII(*first_word.begin);
 358   if (first_letter > 'w')
 359     return false;
 360
 361   DCHECK(first_letter >= 'a');
 362   int first_index = first_letter - 'a';
 363
 364   // Look for two-letter state names.
 365   if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) {
 366     base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
 367     DCHECK(second_letter >= 'a');
 368
 369     int second_index = second_letter - 'a';
 370     if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
 371       return false;
 372
 373     std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
 374         ((1 << second_index) - 1);
 375     *state_last_word = state_first_word;
 376     *state_index = state_two_letter_accumulative[first_index] +
 377         previous_suffixes.count();
 378     return true;
 379   }
 380
 381   // Look for full state names by their first letter. Discard by length.
 382   for (int state = state_names_accumulative[first_index];
 383       state < state_names_accumulative[first_index + 1]; ++state) {
 384     if (state_names[state].first_word_length != length)
 385       continue;
 386
 387     bool state_match = false;
 388     size_t state_word = state_first_word;
 389     for (int pos = 0; true; ) {
 390       if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
 391           words->at(state_word).end, &state_names[state].string[pos]))
 392         break;
 393
 394       pos += words->at(state_word).end - words->at(state_word).begin + 1;
 395       if (pos >= state_names[state].length) {
 396         state_match = true;
 397         break;
 398       }
 399
 400       // Ran out of words, extract more from the tokenizer.
 401       if (++state_word == words->size()) {
 402         do {
 403           if (!tokenizer->GetNext())
 404             break;
 405         } while (tokenizer->token_is_delim());
 406         words->push_back(
 407             Word(tokenizer->token_begin(), tokenizer->token_end()));
 408       }
 409     }
 410
 411     if (state_match) {
 412       *state_last_word = state_word;
 413       *state_index = state_names[state].state_index;
 414       return true;
 415     }
 416   }
 417
 418   return false;
 419 }
 420
 421 bool IsZipValid(const Word& word, size_t state_index) {
 422   size_t length = word.end - word.begin;
 423   if (length != kZipDigits && length != kZipPlus4Digits + 1)
 424     return false;
 425
 426   for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
 427     size_t pos = it - word.begin;
 428     if (IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
 429       continue;
 430     return false;
 431   }
 432   return IsZipValidForState(word, state_index);
 433 }
 434
 435 bool IsZipValidForState(const Word& word, size_t state_index) {
 436   // List of valid zip code ranges.
 437   static const struct {
 438     signed char low;
 439     signed char high;
 440     signed char exception1;
 441     signed char exception2;
 442   } zip_range[] = {
 443     { 99, 99, -1, -1 }, // AK Alaska.
 444     { 35, 36, -1, -1 }, // AL Alabama.
 445     { 71, 72, -1, -1 }, // AR Arkansas.
 446     { 96, 96, -1, -1 }, // AS American Samoa.
 447     { 85, 86, -1, -1 }, // AZ Arizona.
 448     { 90, 96, -1, -1 }, // CA California.
 449     { 80, 81, -1, -1 }, // CO Colorado.
 450     {  6,  6, -1, -1 }, // CT Connecticut.
 451     { 20, 20, -1, -1 }, // DC District of Columbia.
 452     { 19, 19, -1, -1 }, // DE Delaware.
 453     { 32, 34, -1, -1 }, // FL Florida.
 454     { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
 455     { 30, 31, -1, -1 }, // GA Georgia.
 456     { 96, 96, -1, -1 }, // GU Guam.
 457     { 96, 96, -1, -1 }, // HI Hawaii.
 458     { 50, 52, -1, -1 }, // IA Iowa.
 459     { 83, 83, -1, -1 }, // ID Idaho.
 460     { 60, 62, -1, -1 }, // IL Illinois.
 461     { 46, 47, -1, -1 }, // IN Indiana.
 462     { 66, 67, 73, -1 }, // KS Kansas.
 463     { 40, 42, -1, -1 }, // KY Kentucky.
 464     { 70, 71, -1, -1 }, // LA Louisiana.
 465     {  1,  2, -1, -1 }, // MA Massachusetts.
 466     { 20, 21, -1, -1 }, // MD Maryland.
 467     {  3,  4, -1, -1 }, // ME Maine.
 468     { 96, 96, -1, -1 }, // MH Marshall Islands.
 469     { 48, 49, -1, -1 }, // MI Michigan.
 470     { 55, 56, -1, -1 }, // MN Minnesota.
 471     { 63, 65, -1, -1 }, // MO Missouri.
 472     { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
 473     { 38, 39, -1, -1 }, // MS Mississippi.
 474     { 55, 56, -1, -1 }, // MT Montana.
 475     { 27, 28, -1, -1 }, // NC North Carolina.
 476     { 58, 58, -1, -1 }, // ND North Dakota.
 477     { 68, 69, -1, -1 }, // NE Nebraska.
 478     {  3,  4, -1, -1 }, // NH New Hampshire.
 479     {  7,  8, -1, -1 }, // NJ New Jersey.
 480     { 87, 88, 86, -1 }, // NM New Mexico.
 481     { 88, 89, 96, -1 }, // NV Nevada.
 482     { 10, 14,  0,  6 }, // NY New York.
 483     { 43, 45, -1, -1 }, // OH Ohio.
 484     { 73, 74, -1, -1 }, // OK Oklahoma.
 485     { 97, 97, -1, -1 }, // OR Oregon.
 486     { 15, 19, -1, -1 }, // PA Pennsylvania.
 487     {  6,  6,  0,  9 }, // PR Puerto Rico.
 488     { 96, 96, -1, -1 }, // PW Palau.
 489     {  2,  2, -1, -1 }, // RI Rhode Island.
 490     { 29, 29, -1, -1 }, // SC South Carolina.
 491     { 57, 57, -1, -1 }, // SD South Dakota.
 492     { 37, 38, -1, -1 }, // TN Tennessee.
 493     { 75, 79, 87, 88 }, // TX Texas.
 494     { 84, 84, -1, -1 }, // UT Utah.
 495     { 22, 24, 20, -1 }, // VA Virginia.
 496     {  6,  9, -1, -1 }, // VI Virgin Islands.
 497     {  5,  5, -1, -1 }, // VT Vermont.
 498     { 98, 99, -1, -1 }, // WA Washington.
 499     { 53, 54, -1, -1 }, // WI Wisconsin.
 500     { 24, 26, -1, -1 }, // WV West Virginia.
 501     { 82, 83, -1, -1 }  // WY Wyoming.
 502   };
 503
 504   // Zip numeric value for the first two characters.
 505   DCHECK(word.begin != word.end);
 506   DCHECK(IsAsciiDigit(*word.begin));
 507   DCHECK(IsAsciiDigit(*(word.begin + 1)));
 508   int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
 509
 510   if ((zip_prefix >= zip_range[state_index].low &&
 511        zip_prefix <= zip_range[state_index].high) ||
 512       zip_prefix == zip_range[state_index].exception1 ||
 513       zip_prefix == zip_range[state_index].exception2) {
 514     return true;
 515   }
 516   return false;
 517 }
 518
 519 bool IsValidLocationName(const Word& word) {
 520   // Supported location names sorted alphabetically and grouped by first letter.
 521   static const struct LocationNameInfo {
 522     const char* string;
 523     char length;
 524     bool allow_plural;
 525   } location_names[157] = {
 526     { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
 527     { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
 528     { "alameda", 7, false },
 529     { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
 530     { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
 531     { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
 532     { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
 533     { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
 534     { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
 535     { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
 536     { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
 537     { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
 538     { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
 539     { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
 540     { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
 541     { "drive", 5, true },
 542     { "estate", 6, true }, { "expressway", 10, false },
 543     { "extension", 9, true },
 544     { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
 545     { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
 546     { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
 547     { "freeway", 7, false },
 548     { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
 549     { "green", 5, true }, { "grove", 5, true },
 550     { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
 551     { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
 552     { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
 553     { "junction", 8, true },
 554     { "key", 3, true }, { "knoll", 5, true },
 555     { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
 556     { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
 557     { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
 558     { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
 559     { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
 560     { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
 561     { "neck", 4, false },
 562     { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
 563     { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
 564     { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
 565     { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
 566     { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
 567     { "privada", 7, false },
 568     { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
 569     { "rapid", 5, true }, { "rest", 4, false }, { "ridge", 5, true },
 570     { "river", 5, false }, { "road", 4, true }, { "route", 5, false },
 571     { "row", 3, false }, { "rue", 3, false }, { "run", 3, false },
 572     { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
 573     { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
 574     { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
 575     { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
 576     { "summit", 6, false }, { "speedway", 8, false },
 577     { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
 578     { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
 579     { "tunnel", 6, false }, { "turnpike", 8, false },
 580     { "underpass", 9, false }, { "union", 5, true },
 581     { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
 582     { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
 583     { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
 584     { "well", 4, true },
 585     { "xing", 4, false }, { "xrd", 3, false }
 586   };
 587
 588   // Accumulative number of location names for each starting letter.
 589   static const int location_names_accumulative[25] = {
 590       0,   7,  19,  40,  44,
 591      47,  57,  62,  68,  71,
 592      72,  74,  83,  92,  93,
 593      96, 109, 109, 121, 135,
 594     143, 145, 151, 155, 157
 595   };
 596
 597   DCHECK_EQ(
 598       location_names_accumulative[arraysize(location_names_accumulative) - 1],
 599       static_cast<int>(arraysize(location_names)));
 600
 601   if (!IsAsciiAlpha(*word.begin))
 602     return false;
 603
 604   // No location names start with y, z.
 605   base::char16 first_letter = base::ToLowerASCII(*word.begin);
 606   if (first_letter > 'x')
 607     return false;
 608
 609   DCHECK(first_letter >= 'a');
 610   int index = first_letter - 'a';
 611   int length = std::distance(word.begin, word.end);
 612   for (int i = location_names_accumulative[index];
 613       i < location_names_accumulative[index + 1]; ++i) {
 614     if (location_names[i].length != length &&
 615         (location_names[i].allow_plural &&
 616          location_names[i].length + 1 != length)) {
 617       continue;
 618     }
 619
 620     if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
 621                                        location_names[i].string,
 622                                        location_names[i].allow_plural)) {
 623       return true;
 624     }
 625   }
 626
 627   return false;
 628 }
 629
 630 } // namespace internal
 631
 632 } // namespace address_parser
 633
 634 }  // namespace content