ui/base/l10n/l10n_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "ui/base/l10n/l10n_util.h"
   6
   7 #include <algorithm>
   8 #include <cstdlib>
   9 #include <iterator>
  10 #include <string>
  11
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/files/file_util.h"
  15 #include "base/i18n/file_util_icu.h"
  16 #include "base/i18n/rtl.h"
  17 #include "base/i18n/string_compare.h"
  18 #include "base/lazy_instance.h"
  19 #include "base/memory/scoped_ptr.h"
  20 #include "base/strings/string_number_conversions.h"
  21 #include "base/strings/string_split.h"
  22 #include "base/strings/string_util.h"
  23 #include "base/strings/stringprintf.h"
  24 #include "base/strings/sys_string_conversions.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "build/build_config.h"
  27 #include "third_party/icu/source/common/unicode/rbbi.h"
  28 #include "third_party/icu/source/common/unicode/uloc.h"
  29 #include "ui/base/l10n/l10n_util_collator.h"
  30 #include "ui/base/l10n/l10n_util_plurals.h"
  31 #include "ui/base/resource/resource_bundle.h"
  32 #include "ui/base/ui_base_paths.h"
  33
  34 #if defined(OS_ANDROID)
  35 #include "base/android/locale_utils.h"
  36 #include "ui/base/l10n/l10n_util_android.h"
  37 #endif
  38
  39 #if defined(USE_GLIB)
  40 #include <glib.h>
  41 #endif
  42
  43 #if defined(OS_WIN)
  44 #include "ui/base/l10n/l10n_util_win.h"
  45 #endif  // OS_WIN
  46
  47 namespace {
  48
  49 static const char* const kAcceptLanguageList[] = {
  50   "af",     // Afrikaans
  51   "am",     // Amharic
  52   "ar",     // Arabic
  53   "az",     // Azerbaijani
  54   "be",     // Belarusian
  55   "bg",     // Bulgarian
  56   "bh",     // Bihari
  57   "bn",     // Bengali
  58   "br",     // Breton
  59   "bs",     // Bosnian
  60   "ca",     // Catalan
  61   "ckb",    // Kurdish (Arabci),  Sorani
  62   "co",     // Corsican
  63   "cs",     // Czech
  64   "cy",     // Welsh
  65   "da",     // Danish
  66   "de",     // German
  67   "de-AT",  // German (Austria)
  68   "de-CH",  // German (Switzerland)
  69   "de-DE",  // German (Germany)
  70   "de-LI",  // German (Liechtenstein)
  71   "el",     // Greek
  72   "en",     // English
  73   "en-AU",  // English (Australia)
  74   "en-CA",  // English (Canada)
  75   "en-GB",  // English (UK)
  76   "en-NZ",  // English (New Zealand)
  77   "en-US",  // English (US)
  78   "en-ZA",  // English (South Africa)
  79   "eo",     // Esperanto
  80   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
  81   // Spanish speaking countries?
  82   "es",     // Spanish
  83   "es-419", // Spanish (Latin America)
  84   "et",     // Estonian
  85   "eu",     // Basque
  86   "fa",     // Persian
  87   "fi",     // Finnish
  88   "fil",    // Filipino
  89   "fo",     // Faroese
  90   "fr",     // French
  91   "fr-CA",  // French (Canada)
  92   "fr-CH",  // French (Switzerland)
  93   "fr-FR",  // French (France)
  94   "fy",     // Frisian
  95   "ga",     // Irish
  96   "gd",     // Scots Gaelic
  97   "gl",     // Galician
  98   "gn",     // Guarani
  99   "gu",     // Gujarati
 100   "ha",     // Hausa
 101   "haw",    // Hawaiian
 102   "he",     // Hebrew
 103   "hi",     // Hindi
 104   "hr",     // Croatian
 105   "hu",     // Hungarian
 106   "hy",     // Armenian
 107   "ia",     // Interlingua
 108   "id",     // Indonesian
 109   "is",     // Icelandic
 110   "it",     // Italian
 111   "it-CH",  // Italian (Switzerland)
 112   "it-IT",  // Italian (Italy)
 113   "ja",     // Japanese
 114   "jw",     // Javanese
 115   "ka",     // Georgian
 116   "kk",     // Kazakh
 117   "km",     // Cambodian
 118   "kn",     // Kannada
 119   "ko",     // Korean
 120   "ku",     // Kurdish
 121   "ky",     // Kyrgyz
 122   "la",     // Latin
 123   "ln",     // Lingala
 124   "lo",     // Laothian
 125   "lt",     // Lithuanian
 126   "lv",     // Latvian
 127   "mk",     // Macedonian
 128   "ml",     // Malayalam
 129   "mn",     // Mongolian
 130   "mo",     // Moldavian
 131   "mr",     // Marathi
 132   "ms",     // Malay
 133   "mt",     // Maltese
 134   "nb",     // Norwegian (Bokmal)
 135   "ne",     // Nepali
 136   "nl",     // Dutch
 137   "nn",     // Norwegian (Nynorsk)
 138   "no",     // Norwegian
 139   "oc",     // Occitan
 140   "om",     // Oromo
 141   "or",     // Oriya
 142   "pa",     // Punjabi
 143   "pl",     // Polish
 144   "ps",     // Pashto
 145   "pt",     // Portuguese
 146   "pt-BR",  // Portuguese (Brazil)
 147   "pt-PT",  // Portuguese (Portugal)
 148   "qu",     // Quechua
 149   "rm",     // Romansh
 150   "ro",     // Romanian
 151   "ru",     // Russian
 152   "sd",     // Sindhi
 153   "sh",     // Serbo-Croatian
 154   "si",     // Sinhalese
 155   "sk",     // Slovak
 156   "sl",     // Slovenian
 157   "sn",     // Shona
 158   "so",     // Somali
 159   "sq",     // Albanian
 160   "sr",     // Serbian
 161   "st",     // Sesotho
 162   "su",     // Sundanese
 163   "sv",     // Swedish
 164   "sw",     // Swahili
 165   "ta",     // Tamil
 166   "te",     // Telugu
 167   "tg",     // Tajik
 168   "th",     // Thai
 169   "ti",     // Tigrinya
 170   "tk",     // Turkmen
 171   "to",     // Tonga
 172   "tr",     // Turkish
 173   "tt",     // Tatar
 174   "tw",     // Twi
 175   "ug",     // Uighur
 176   "uk",     // Ukrainian
 177   "ur",     // Urdu
 178   "uz",     // Uzbek
 179   "vi",     // Vietnamese
 180   "xh",     // Xhosa
 181   "yi",     // Yiddish
 182   "yo",     // Yoruba
 183   "zh",     // Chinese
 184   "zh-CN",  // Chinese (Simplified)
 185   "zh-TW",  // Chinese (Traditional)
 186   "zu",     // Zulu
 187 };
 188
 189 // Returns true if |locale_name| has an alias in the ICU data file.
 190 bool IsDuplicateName(const std::string& locale_name) {
 191   static const char* const kDuplicateNames[] = {
 192     "en",
 193     "en_001",
 194     "pt", // pt-BR and pt-PT are used.
 195     "zh",
 196     "zh_hans_cn",
 197     "zh_hant_hk",
 198     "zh_hant_mo",
 199     "zh_hans_sg",
 200     "zh_hant_tw"
 201   };
 202
 203   // Skip all the es_Foo other than es_419 for now.
 204   if (base::StartsWith(locale_name, "es_",
 205                        base::CompareCase::INSENSITIVE_ASCII))
 206     return !base::EndsWith(locale_name, "419", base::CompareCase::SENSITIVE);
 207
 208   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
 209     if (base::EqualsCaseInsensitiveASCII(kDuplicateNames[i], locale_name))
 210       return true;
 211   }
 212   return false;
 213 }
 214
 215 // We added 30+ minimally populated locales with only a few entries
 216 // (exemplar character set, script, writing direction and its own
 217 // lanaguage name). These locales have to be distinguished from the
 218 // fully populated locales to which Chrome is localized.
 219 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
 220   // For partially populated locales, even the translation for "English"
 221   // is not available. A more robust/elegant way to check is to add a special
 222   // field (say, 'isPartial' to our version of ICU locale files) and
 223   // check its value, but this hack seems to work well.
 224   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
 225 }
 226
 227 #if !defined(OS_MACOSX)
 228 bool IsLocaleAvailable(const std::string& locale) {
 229   // If locale has any illegal characters in it, we don't want to try to
 230   // load it because it may be pointing outside the locale data file directory.
 231   if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale)))
 232     return false;
 233
 234   // IsLocalePartiallyPopulated() can be called here for an early return w/o
 235   // checking the resource availability below. It'd help when Chrome is run
 236   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
 237   // but it'd slow down the start up time a little bit for locales Chrome is
 238   // localized to. So, we don't call it here.
 239   if (!l10n_util::IsLocaleSupportedByOS(locale))
 240     return false;
 241
 242   // If the ResourceBundle is not yet initialized, return false to avoid the
 243   // CHECK failure in ResourceBundle::GetSharedInstance().
 244   if (!ResourceBundle::HasSharedInstance())
 245     return false;
 246
 247   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
 248   // so that this can be invoked without initializing the global instance.
 249   // See crbug.com/230432: CHECK failure in GetUserDataDir().
 250   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
 251 }
 252 #endif
 253
 254 // On Linux, the text layout engine Pango determines paragraph directionality
 255 // by looking at the first strongly-directional character in the text. This
 256 // means text such as "Google Chrome foo bar..." will be layed out LTR even
 257 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
 258 // cases.
 259 void AdjustParagraphDirectionality(base::string16* paragraph) {
 260 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
 261   if (base::i18n::IsRTL() &&
 262       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
 263     paragraph->insert(0, 1,
 264                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
 265   }
 266 #endif
 267 }
 268
 269 struct AvailableLocalesTraits
 270     : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
 271   static std::vector<std::string>* New(void* instance) {
 272     std::vector<std::string>* locales =
 273         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
 274             instance);
 275     int num_locales = uloc_countAvailable();
 276     for (int i = 0; i < num_locales; ++i) {
 277       std::string locale_name = uloc_getAvailable(i);
 278       // Filter out the names that have aliases.
 279       if (IsDuplicateName(locale_name))
 280         continue;
 281       // Filter out locales for which we have only partially populated data
 282       // and to which Chrome is not localized.
 283       if (IsLocalePartiallyPopulated(locale_name))
 284         continue;
 285       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
 286         continue;
 287       // Normalize underscores to hyphens because that's what our locale files
 288       // use.
 289       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
 290
 291       // Map the Chinese locale names over to zh-CN and zh-TW.
 292       if (base::LowerCaseEqualsASCII(locale_name, "zh-hans")) {
 293         locale_name = "zh-CN";
 294       } else if (base::LowerCaseEqualsASCII(locale_name, "zh-hant")) {
 295         locale_name = "zh-TW";
 296       }
 297       locales->push_back(locale_name);
 298     }
 299
 300     return locales;
 301   }
 302 };
 303
 304 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
 305     g_available_locales = LAZY_INSTANCE_INITIALIZER;
 306
 307 }  // namespace
 308
 309 namespace l10n_util {
 310
 311 std::string GetLanguage(const std::string& locale) {
 312   const std::string::size_type hyphen_pos = locale.find('-');
 313   return std::string(locale, 0, hyphen_pos);
 314 }
 315
 316 bool CheckAndResolveLocale(const std::string& locale,
 317                            std::string* resolved_locale) {
 318 #if defined(OS_MACOSX)
 319   NOTIMPLEMENTED();
 320   return false;
 321 #else
 322   if (IsLocaleAvailable(locale)) {
 323     *resolved_locale = locale;
 324     return true;
 325   }
 326
 327   // If there's a variant, skip over it so we can try without the region
 328   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
 329   // before ca.
 330   std::string::size_type variant_pos = locale.find('@');
 331   if (variant_pos != std::string::npos)
 332     return false;
 333
 334   // If the locale matches language but not country, use that instead.
 335   // TODO(jungshik) : Nothing is done about languages that Chrome
 336   // does not support but available on Windows. We fall
 337   // back to en-US in GetApplicationLocale so that it's a not critical,
 338   // but we can do better.
 339   const std::string lang(GetLanguage(locale));
 340   if (lang.size() < locale.size()) {
 341     std::string region(locale, lang.size() + 1);
 342     std::string tmp_locale(lang);
 343     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
 344     // Spanish locale).
 345     if (base::LowerCaseEqualsASCII(lang, "es") &&
 346         !base::LowerCaseEqualsASCII(region, "es")) {
 347       tmp_locale.append("-419");
 348     } else if (base::LowerCaseEqualsASCII(lang, "zh")) {
 349       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
 350       if (base::LowerCaseEqualsASCII(region, "hk") ||
 351           base::LowerCaseEqualsASCII(region, "mo")) {  // Macao
 352         tmp_locale.append("-TW");
 353       } else {
 354         tmp_locale.append("-CN");
 355       }
 356     } else if (base::LowerCaseEqualsASCII(lang, "en")) {
 357       // Map Australian, Canadian, New Zealand and South African English
 358       // to British English for now.
 359       // TODO(jungshik): en-CA may have to change sides once
 360       // we have OS locale separate from app locale (Chrome's UI language).
 361       if (base::LowerCaseEqualsASCII(region, "au") ||
 362           base::LowerCaseEqualsASCII(region, "ca") ||
 363           base::LowerCaseEqualsASCII(region, "nz") ||
 364           base::LowerCaseEqualsASCII(region, "za")) {
 365         tmp_locale.append("-GB");
 366       } else {
 367         tmp_locale.append("-US");
 368       }
 369     }
 370     if (IsLocaleAvailable(tmp_locale)) {
 371       resolved_locale->swap(tmp_locale);
 372       return true;
 373     }
 374   }
 375
 376   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
 377   struct {
 378     const char* source;
 379     const char* dest;
 380   } alias_map[] = {
 381       {"no", "nb"},
 382       {"tl", "fil"},
 383       {"iw", "he"},
 384       {"en", "en-US"},
 385   };
 386
 387   for (size_t i = 0; i < arraysize(alias_map); ++i) {
 388     if (base::LowerCaseEqualsASCII(lang, alias_map[i].source)) {
 389       std::string tmp_locale(alias_map[i].dest);
 390       if (IsLocaleAvailable(tmp_locale)) {
 391         resolved_locale->swap(tmp_locale);
 392         return true;
 393       }
 394     }
 395   }
 396
 397   return false;
 398 #endif
 399 }
 400
 401 std::string GetApplicationLocaleInternal(const std::string& pref_locale) {
 402 #if defined(OS_MACOSX)
 403
 404   // Use any override (Cocoa for the browser), otherwise use the preference
 405   // passed to the function.
 406   std::string app_locale = l10n_util::GetLocaleOverride();
 407   if (app_locale.empty())
 408     app_locale = pref_locale;
 409
 410   // The above should handle all of the cases Chrome normally hits, but for some
 411   // unit tests, we need something to fall back too.
 412   if (app_locale.empty())
 413     app_locale = "en-US";
 414
 415   return app_locale;
 416
 417 #else
 418
 419   std::string resolved_locale;
 420   std::vector<std::string> candidates;
 421
 422   // We only use --lang and the app pref on Windows.  On Linux, we only
 423   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
 424   // to renderer and plugin processes so they know what language the parent
 425   // process decided to use.
 426
 427 #if defined(OS_WIN)
 428
 429   // First, try the preference value.
 430   if (!pref_locale.empty())
 431     candidates.push_back(base::i18n::GetCanonicalLocale(pref_locale));
 432
 433   // Next, try the overridden locale.
 434   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
 435   if (!languages.empty()) {
 436     candidates.reserve(candidates.size() + languages.size());
 437     std::transform(languages.begin(), languages.end(),
 438                    std::back_inserter(candidates),
 439                    &base::i18n::GetCanonicalLocale);
 440   } else {
 441     // If no override was set, defer to ICU
 442     candidates.push_back(base::i18n::GetConfiguredLocale());
 443   }
 444
 445 #elif defined(OS_ANDROID)
 446
 447   // On Android, query java.util.Locale for the default locale.
 448   candidates.push_back(base::android::GetDefaultLocale());
 449
 450 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
 451
 452   // GLib implements correct environment variable parsing with
 453   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
 454   // We used to use our custom parsing code along with ICU for this purpose.
 455   // If we have a port that does not depend on GTK, we have to
 456   // restore our custom code for that port.
 457   const char* const* languages = g_get_language_names();
 458   DCHECK(languages);  // A valid pointer is guaranteed.
 459   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
 460
 461   for (; *languages != NULL; ++languages) {
 462     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
 463   }
 464
 465 #else
 466
 467   // By default, use the application locale preference. This applies to ChromeOS
 468   // and linux systems without glib.
 469   if (!pref_locale.empty())
 470     candidates.push_back(pref_locale);
 471
 472 #endif
 473
 474   std::vector<std::string>::const_iterator i = candidates.begin();
 475   for (; i != candidates.end(); ++i) {
 476     if (CheckAndResolveLocale(*i, &resolved_locale)) {
 477       return resolved_locale;
 478     }
 479   }
 480
 481   // Fallback on en-US.
 482   const std::string fallback_locale("en-US");
 483   if (IsLocaleAvailable(fallback_locale)) {
 484     return fallback_locale;
 485   }
 486
 487   return std::string();
 488
 489 #endif
 490 }
 491
 492 std::string GetApplicationLocale(const std::string& pref_locale,
 493                                  bool set_icu_locale) {
 494   const std::string locale = GetApplicationLocaleInternal(pref_locale);
 495   if (set_icu_locale && !locale.empty())
 496     base::i18n::SetICUDefaultLocale(locale);
 497   return locale;
 498 }
 499
 500 std::string GetApplicationLocale(const std::string& pref_locale) {
 501   return GetApplicationLocale(pref_locale, true /* set_icu_locale */);
 502 }
 503
 504 bool IsLocaleNameTranslated(const char* locale,
 505                             const std::string& display_locale) {
 506   base::string16 display_name =
 507       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
 508   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
 509   // uloc_getDisplayName returns the actual translation or the default
 510   // value (locale code), we have to rely on this hack to tell whether
 511   // the translation is available or not.  If ICU doesn't have a translated
 512   // name for this locale, GetDisplayNameForLocale will just return the
 513   // locale code.
 514   return !base::IsStringASCII(display_name) ||
 515       base::UTF16ToASCII(display_name) != locale;
 516 }
 517
 518 base::string16 GetDisplayNameForLocale(const std::string& locale,
 519                                        const std::string& display_locale,
 520                                        bool is_for_ui) {
 521   std::string locale_code = locale;
 522   // Internally, we use the language code of zh-CN and zh-TW, but we want the
 523   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
 524   // of Chinese (China) and Chinese (Taiwan).
 525   // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google
 526   // translate is changed to understand "fil". Make "tl" alias to "fil".
 527   if (locale_code == "zh-CN")
 528     locale_code = "zh-Hans";
 529   else if (locale_code == "zh-TW")
 530     locale_code = "zh-Hant";
 531   else if (locale_code == "tl")
 532     locale_code = "fil";
 533   else if (locale_code == "mo")
 534     locale_code = "ro-MD";
 535
 536   base::string16 display_name;
 537 #if defined(OS_ANDROID)
 538   // Use Java API to get locale display name so that we can remove most of
 539   // the lang data from icu data to reduce binary size, except for zh-Hans and
 540   // zh-Hant because the current Android Java API doesn't support scripts.
 541   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
 542   // Android Java API supports scripts.
 543   if (!base::StartsWith(locale_code, "zh-Han", base::CompareCase::SENSITIVE)) {
 544     display_name = GetDisplayNameForLocale(locale_code, display_locale);
 545   } else
 546 #endif
 547   {
 548     UErrorCode error = U_ZERO_ERROR;
 549     const int kBufferSize = 1024;
 550
 551     int actual_size = uloc_getDisplayName(
 552         locale_code.c_str(), display_locale.c_str(),
 553         base::WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
 554     DCHECK(U_SUCCESS(error));
 555     display_name.resize(actual_size);
 556   }
 557
 558   // Add directional markup so parentheses are properly placed.
 559   if (is_for_ui && base::i18n::IsRTL())
 560     base::i18n::AdjustStringForLocaleDirection(&display_name);
 561   return display_name;
 562 }
 563
 564 base::string16 GetDisplayNameForCountry(const std::string& country_code,
 565                                         const std::string& display_locale) {
 566   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
 567 }
 568
 569 std::string NormalizeLocale(const std::string& locale) {
 570   std::string normalized_locale(locale);
 571   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
 572
 573   return normalized_locale;
 574 }
 575
 576 void GetParentLocales(const std::string& current_locale,
 577                       std::vector<std::string>* parent_locales) {
 578   std::string locale(NormalizeLocale(current_locale));
 579
 580   const int kNameCapacity = 256;
 581   char parent[kNameCapacity];
 582   base::strlcpy(parent, locale.c_str(), kNameCapacity);
 583   parent_locales->push_back(parent);
 584   UErrorCode err = U_ZERO_ERROR;
 585   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
 586     if (U_FAILURE(err))
 587       break;
 588     parent_locales->push_back(parent);
 589   }
 590 }
 591
 592 bool IsValidLocaleSyntax(const std::string& locale) {
 593   // Check that the length is plausible.
 594   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
 595     return false;
 596
 597   // Strip off the part after an '@' sign, which might contain keywords,
 598   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
 599   // We don't validate that part much, just check that there's at least one
 600   // equals sign in a plausible place. Normalize the prefix so that hyphens
 601   // are changed to underscores.
 602   std::string prefix = NormalizeLocale(locale);
 603   size_t split_point = locale.find("@");
 604   if (split_point != std::string::npos) {
 605     std::string keywords = locale.substr(split_point + 1);
 606     prefix = locale.substr(0, split_point);
 607
 608     size_t equals_loc = keywords.find("=");
 609     if (equals_loc == std::string::npos ||
 610         equals_loc < 1 || equals_loc > keywords.size() - 2)
 611       return false;
 612   }
 613
 614   // Check that all characters before the at-sign are alphanumeric or
 615   // underscore.
 616   for (size_t i = 0; i < prefix.size(); i++) {
 617     char ch = prefix[i];
 618     if (!base::IsAsciiAlpha(ch) && !base::IsAsciiDigit(ch) && ch != '_')
 619       return false;
 620   }
 621
 622   // Check that the initial token (before the first hyphen/underscore)
 623   // is 1 - 3 alphabetical characters (a language tag).
 624   for (size_t i = 0; i < prefix.size(); i++) {
 625     char ch = prefix[i];
 626     if (ch == '_') {
 627       if (i < 1 || i > 3)
 628         return false;
 629       break;
 630     }
 631     if (!base::IsAsciiAlpha(ch))
 632       return false;
 633   }
 634
 635   // Check that the all tokens after the initial token are 1 - 8 characters.
 636   // (Tokenize/StringTokenizer don't work here, they collapse multiple
 637   // delimiters into one.)
 638   int token_len = 0;
 639   int token_index = 0;
 640   for (size_t i = 0; i < prefix.size(); i++) {
 641     if (prefix[i] != '_') {
 642       token_len++;
 643       continue;
 644     }
 645
 646     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
 647       return false;
 648     }
 649     token_index++;
 650     token_len = 0;
 651   }
 652   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
 653     return false;
 654   } else if (token_len < 1 || token_len > 8) {
 655     return false;
 656   }
 657
 658   return true;
 659 }
 660
 661 std::string GetStringUTF8(int message_id) {
 662   return base::UTF16ToUTF8(GetStringUTF16(message_id));
 663 }
 664
 665 base::string16 GetStringUTF16(int message_id) {
 666   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 667   base::string16 str = rb.GetLocalizedString(message_id);
 668   AdjustParagraphDirectionality(&str);
 669
 670   return str;
 671 }
 672
 673 base::string16 GetStringFUTF16(int message_id,
 674                                const std::vector<base::string16>& replacements,
 675                                std::vector<size_t>* offsets) {
 676   // TODO(tc): We could save a string copy if we got the raw string as
 677   // a StringPiece and were able to call ReplaceStringPlaceholders with
 678   // a StringPiece format string and base::string16 substitution strings.  In
 679   // practice, the strings should be relatively short.
 680   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 681   const base::string16& format_string = rb.GetLocalizedString(message_id);
 682
 683 #ifndef NDEBUG
 684   // Make sure every replacement string is being used, so we don't just
 685   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
 686   // check as the code may simply want to find the placeholders rather than
 687   // actually replacing them.
 688   if (!offsets) {
 689     std::string utf8_string = base::UTF16ToUTF8(format_string);
 690
 691     // $9 is the highest allowed placeholder.
 692     for (size_t i = 0; i < 9; ++i) {
 693       bool placeholder_should_exist = replacements.size() > i;
 694
 695       std::string placeholder =
 696           base::StringPrintf("$%d", static_cast<int>(i + 1));
 697       size_t pos = utf8_string.find(placeholder.c_str());
 698       if (placeholder_should_exist) {
 699         DCHECK_NE(std::string::npos, pos) <<
 700             " Didn't find a " << placeholder << " placeholder in " <<
 701             utf8_string;
 702       } else {
 703         DCHECK_EQ(std::string::npos, pos) <<
 704             " Unexpectedly found a " << placeholder << " placeholder in " <<
 705             utf8_string;
 706       }
 707     }
 708   }
 709 #endif
 710
 711   base::string16 formatted = base::ReplaceStringPlaceholders(
 712       format_string, replacements, offsets);
 713   AdjustParagraphDirectionality(&formatted);
 714
 715   return formatted;
 716 }
 717
 718 std::string GetStringFUTF8(int message_id,
 719                            const base::string16& a) {
 720   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
 721 }
 722
 723 std::string GetStringFUTF8(int message_id,
 724                            const base::string16& a,
 725                            const base::string16& b) {
 726   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
 727 }
 728
 729 std::string GetStringFUTF8(int message_id,
 730                            const base::string16& a,
 731                            const base::string16& b,
 732                            const base::string16& c) {
 733   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
 734 }
 735
 736 std::string GetStringFUTF8(int message_id,
 737                            const base::string16& a,
 738                            const base::string16& b,
 739                            const base::string16& c,
 740                            const base::string16& d) {
 741   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
 742 }
 743
 744 base::string16 GetStringFUTF16(int message_id,
 745                                const base::string16& a) {
 746   std::vector<base::string16> replacements;
 747   replacements.push_back(a);
 748   return GetStringFUTF16(message_id, replacements, NULL);
 749 }
 750
 751 base::string16 GetStringFUTF16(int message_id,
 752                                const base::string16& a,
 753                                const base::string16& b) {
 754   return GetStringFUTF16(message_id, a, b, NULL);
 755 }
 756
 757 base::string16 GetStringFUTF16(int message_id,
 758                                const base::string16& a,
 759                                const base::string16& b,
 760                                const base::string16& c) {
 761   std::vector<base::string16> replacements;
 762   replacements.push_back(a);
 763   replacements.push_back(b);
 764   replacements.push_back(c);
 765   return GetStringFUTF16(message_id, replacements, NULL);
 766 }
 767
 768 base::string16 GetStringFUTF16(int message_id,
 769                                const base::string16& a,
 770                                const base::string16& b,
 771                                const base::string16& c,
 772                                const base::string16& d) {
 773   std::vector<base::string16> replacements;
 774   replacements.push_back(a);
 775   replacements.push_back(b);
 776   replacements.push_back(c);
 777   replacements.push_back(d);
 778   return GetStringFUTF16(message_id, replacements, NULL);
 779 }
 780
 781 base::string16 GetStringFUTF16(int message_id,
 782                                const base::string16& a,
 783                                const base::string16& b,
 784                                const base::string16& c,
 785                                const base::string16& d,
 786                                const base::string16& e) {
 787   std::vector<base::string16> replacements;
 788   replacements.push_back(a);
 789   replacements.push_back(b);
 790   replacements.push_back(c);
 791   replacements.push_back(d);
 792   replacements.push_back(e);
 793   return GetStringFUTF16(message_id, replacements, NULL);
 794 }
 795
 796 base::string16 GetStringFUTF16(int message_id,
 797                                const base::string16& a,
 798                                size_t* offset) {
 799   DCHECK(offset);
 800   std::vector<size_t> offsets;
 801   std::vector<base::string16> replacements;
 802   replacements.push_back(a);
 803   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
 804   DCHECK(offsets.size() == 1);
 805   *offset = offsets[0];
 806   return result;
 807 }
 808
 809 base::string16 GetStringFUTF16(int message_id,
 810                                const base::string16& a,
 811                                const base::string16& b,
 812                                std::vector<size_t>* offsets) {
 813   std::vector<base::string16> replacements;
 814   replacements.push_back(a);
 815   replacements.push_back(b);
 816   return GetStringFUTF16(message_id, replacements, offsets);
 817 }
 818
 819 base::string16 GetStringFUTF16Int(int message_id, int a) {
 820   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
 821 }
 822
 823 base::string16 GetStringFUTF16Int(int message_id, int64 a) {
 824   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
 825 }
 826
 827 base::string16 GetPluralStringFUTF16(int message_id, int number) {
 828   base::string16 pattern = GetStringUTF16(message_id);
 829   UErrorCode err = U_ZERO_ERROR;
 830   icu::MessageFormat format(
 831       icu::UnicodeString(FALSE, pattern.data(), pattern.length()), err);
 832   icu::UnicodeString result_unistring;
 833   FormatNumberInPlural(format, number, &result_unistring, &err);
 834   int capacity = result_unistring.length() + 1;
 835   DCHECK_GT(capacity, 1);
 836   base::string16 result;
 837   result_unistring.extract(
 838       static_cast<UChar*>(base::WriteInto(&result, capacity)), capacity, err);
 839   DCHECK(U_SUCCESS(err));
 840   return result;
 841 }
 842
 843 std::string GetPluralStringFUTF8(int message_id, int number) {
 844   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_id, number));
 845 }
 846
 847 void SortStrings16(const std::string& locale,
 848                    std::vector<base::string16>* strings) {
 849   SortVectorWithStringKey(locale, strings, false);
 850 }
 851
 852 const std::vector<std::string>& GetAvailableLocales() {
 853   return g_available_locales.Get();
 854 }
 855
 856 void GetAcceptLanguagesForLocale(const std::string& display_locale,
 857                                  std::vector<std::string>* locale_codes) {
 858   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
 859     if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
 860                                            display_locale))
 861       // TODO(jungshik) : Put them at the of the list with language codes
 862       // enclosed by brackets instead of skipping.
 863         continue;
 864     locale_codes->push_back(kAcceptLanguageList[i]);
 865   }
 866 }
 867
 868 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
 869   int width = 0;
 870   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
 871   DCHECK_GT(width, 0);
 872   return width;
 873 }
 874
 875 const char* const* GetAcceptLanguageListForTesting() {
 876   return kAcceptLanguageList;
 877 }
 878
 879 size_t GetAcceptLanguageListSizeForTesting() {
 880   return arraysize(kAcceptLanguageList);
 881 }
 882
 883 }  // namespace l10n_util