ui/base/l10n/l10n_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "ui/base/l10n/l10n_util.h"
   6
   7 #include <algorithm>
   8 #include <cstdlib>
   9 #include <iterator>
  10 #include <string>
  11
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/file_util.h"
  15 #include "base/i18n/file_util_icu.h"
  16 #include "base/i18n/rtl.h"
  17 #include "base/i18n/string_compare.h"
  18 #include "base/lazy_instance.h"
  19 #include "base/memory/scoped_ptr.h"
  20 #include "base/path_service.h"
  21 #include "base/string_number_conversions.h"
  22 #include "base/string_util.h"
  23 #include "base/stringprintf.h"
  24 #include "base/strings/string_split.h"
  25 #include "base/strings/sys_string_conversions.h"
  26 #include "base/utf_string_conversions.h"
  27 #include "build/build_config.h"
  28 #include "third_party/icu/public/common/unicode/rbbi.h"
  29 #include "third_party/icu/public/common/unicode/uloc.h"
  30 #include "ui/base/l10n/l10n_util_collator.h"
  31 #include "ui/base/resource/resource_bundle.h"
  32 #include "ui/base/ui_base_paths.h"
  33
  34 #if defined(OS_ANDROID)
  35 #include "ui/base/l10n/l10n_util_android.h"
  36 #endif
  37
  38 #if defined(OS_LINUX)
  39 #include <glib.h>
  40 #endif
  41
  42 #if defined(OS_WIN)
  43 #include "ui/base/l10n/l10n_util_win.h"
  44 #endif  // OS_WIN
  45
  46 namespace {
  47
  48 static const char* const kAcceptLanguageList[] = {
  49   "af",     // Afrikaans
  50   "am",     // Amharic
  51   "ar",     // Arabic
  52   "az",     // Azerbaijani
  53   "be",     // Belarusian
  54   "bg",     // Bulgarian
  55   "bh",     // Bihari
  56   "bn",     // Bengali
  57   "br",     // Breton
  58   "bs",     // Bosnian
  59   "ca",     // Catalan
  60   "co",     // Corsican
  61   "cs",     // Czech
  62   "cy",     // Welsh
  63   "da",     // Danish
  64   "de",     // German
  65   "de-AT",  // German (Austria)
  66   "de-CH",  // German (Switzerland)
  67   "de-DE",  // German (Germany)
  68   "el",     // Greek
  69   "en",     // English
  70   "en-AU",  // English (Australia)
  71   "en-CA",  // English (Canada)
  72   "en-GB",  // English (UK)
  73   "en-NZ",  // English (New Zealand)
  74   "en-US",  // English (US)
  75   "en-ZA",  // English (South Africa)
  76   "eo",     // Esperanto
  77   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
  78   // Spanish speaking countries?
  79   "es",     // Spanish
  80   "es-419", // Spanish (Latin America)
  81   "et",     // Estonian
  82   "eu",     // Basque
  83   "fa",     // Persian
  84   "fi",     // Finnish
  85   "fil",    // Filipino
  86   "fo",     // Faroese
  87   "fr",     // French
  88   "fr-CA",  // French (Canada)
  89   "fr-CH",  // French (Switzerland)
  90   "fr-FR",  // French (France)
  91   "fy",     // Frisian
  92   "ga",     // Irish
  93   "gd",     // Scots Gaelic
  94   "gl",     // Galician
  95   "gn",     // Guarani
  96   "gu",     // Gujarati
  97   "ha",     // Hausa
  98   "haw",    // Hawaiian
  99   "he",     // Hebrew
 100   "hi",     // Hindi
 101   "hr",     // Croatian
 102   "hu",     // Hungarian
 103   "hy",     // Armenian
 104   "ia",     // Interlingua
 105   "id",     // Indonesian
 106   "is",     // Icelandic
 107   "it",     // Italian
 108   "it-CH",  // Italian (Switzerland)
 109   "it-IT",  // Italian (Italy)
 110   "ja",     // Japanese
 111   "jw",     // Javanese
 112   "ka",     // Georgian
 113   "kk",     // Kazakh
 114   "km",     // Cambodian
 115   "kn",     // Kannada
 116   "ko",     // Korean
 117   "ku",     // Kurdish
 118   "ky",     // Kyrgyz
 119   "la",     // Latin
 120   "ln",     // Lingala
 121   "lo",     // Laothian
 122   "lt",     // Lithuanian
 123   "lv",     // Latvian
 124   "mk",     // Macedonian
 125   "ml",     // Malayalam
 126   "mn",     // Mongolian
 127   "mo",     // Moldavian
 128   "mr",     // Marathi
 129   "ms",     // Malay
 130   "mt",     // Maltese
 131   "nb",     // Norwegian (Bokmal)
 132   "ne",     // Nepali
 133   "nl",     // Dutch
 134   "nn",     // Norwegian (Nynorsk)
 135   "no",     // Norwegian
 136   "oc",     // Occitan
 137   "om",     // Oromo
 138   "or",     // Oriya
 139   "pa",     // Punjabi
 140   "pl",     // Polish
 141   "ps",     // Pashto
 142   "pt",     // Portuguese
 143   "pt-BR",  // Portuguese (Brazil)
 144   "pt-PT",  // Portuguese (Portugal)
 145   "qu",     // Quechua
 146   "rm",     // Romansh
 147   "ro",     // Romanian
 148   "ru",     // Russian
 149   "sd",     // Sindhi
 150   "sh",     // Serbo-Croatian
 151   "si",     // Sinhalese
 152   "sk",     // Slovak
 153   "sl",     // Slovenian
 154   "sn",     // Shona
 155   "so",     // Somali
 156   "sq",     // Albanian
 157   "sr",     // Serbian
 158   "st",     // Sesotho
 159   "su",     // Sundanese
 160   "sv",     // Swedish
 161   "sw",     // Swahili
 162   "ta",     // Tamil
 163   "te",     // Telugu
 164   "tg",     // Tajik
 165   "th",     // Thai
 166   "ti",     // Tigrinya
 167   "tk",     // Turkmen
 168   "to",     // Tonga
 169   "tr",     // Turkish
 170   "tt",     // Tatar
 171   "tw",     // Twi
 172   "ug",     // Uighur
 173   "uk",     // Ukrainian
 174   "ur",     // Urdu
 175   "uz",     // Uzbek
 176   "vi",     // Vietnamese
 177   "xh",     // Xhosa
 178   "yi",     // Yiddish
 179   "yo",     // Yoruba
 180   "zh",     // Chinese
 181   "zh-CN",  // Chinese (Simplified)
 182   "zh-TW",  // Chinese (Traditional)
 183   "zu",     // Zulu
 184 };
 185
 186 // Returns true if |locale_name| has an alias in the ICU data file.
 187 bool IsDuplicateName(const std::string& locale_name) {
 188   static const char* const kDuplicateNames[] = {
 189     "en",
 190     "pt",
 191     "zh",
 192     "zh_hans_cn",
 193     "zh_hant_hk",
 194     "zh_hant_mo",
 195     "zh_hans_sg",
 196     "zh_hant_tw"
 197   };
 198
 199   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
 200   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
 201   // has to be added manually in GetAvailableLocales().
 202   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
 203     return true;
 204   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
 205     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
 206       return true;
 207   }
 208   return false;
 209 }
 210
 211 bool IsLocaleNameTranslated(const char* locale,
 212                             const std::string& display_locale) {
 213   string16 display_name =
 214       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
 215   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
 216   // uloc_getDisplayName returns the actual translation or the default
 217   // value (locale code), we have to rely on this hack to tell whether
 218   // the translation is available or not.  If ICU doesn't have a translated
 219   // name for this locale, GetDisplayNameForLocale will just return the
 220   // locale code.
 221   return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale;
 222 }
 223
 224 // We added 30+ minimally populated locales with only a few entries
 225 // (exemplar character set, script, writing direction and its own
 226 // lanaguage name). These locales have to be distinguished from the
 227 // fully populated locales to which Chrome is localized.
 228 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
 229   // For partially populated locales, even the translation for "English"
 230   // is not available. A more robust/elegant way to check is to add a special
 231   // field (say, 'isPartial' to our version of ICU locale files) and
 232   // check its value, but this hack seems to work well.
 233   return !IsLocaleNameTranslated("en", locale_name);
 234 }
 235
 236 #if !defined(OS_MACOSX)
 237 bool IsLocaleAvailable(const std::string& locale) {
 238   // If locale has any illegal characters in it, we don't want to try to
 239   // load it because it may be pointing outside the locale data file directory.
 240   if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale)))
 241     return false;
 242
 243   // IsLocalePartiallyPopulated() can be called here for an early return w/o
 244   // checking the resource availability below. It'd help when Chrome is run
 245   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
 246   // but it'd slow down the start up time a little bit for locales Chrome is
 247   // localized to. So, we don't call it here.
 248   if (!l10n_util::IsLocaleSupportedByOS(locale))
 249     return false;
 250
 251   // If the ResourceBundle is not yet initialized, return false to avoid the
 252   // CHECK failure in ResourceBundle::GetSharedInstance().
 253   if (!ResourceBundle::HasSharedInstance())
 254     return false;
 255
 256   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
 257   // so that this can be invoked without initializing the global instance.
 258   // See crbug.com/230432: CHECK failure in GetUserDataDir().
 259   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
 260 }
 261
 262 bool CheckAndResolveLocale(const std::string& locale,
 263                            std::string* resolved_locale) {
 264   if (IsLocaleAvailable(locale)) {
 265     *resolved_locale = locale;
 266     return true;
 267   }
 268
 269   // If there's a variant, skip over it so we can try without the region
 270   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
 271   // before ca.
 272   std::string::size_type variant_pos = locale.find('@');
 273   if (variant_pos != std::string::npos)
 274     return false;
 275
 276   // If the locale matches language but not country, use that instead.
 277   // TODO(jungshik) : Nothing is done about languages that Chrome
 278   // does not support but available on Windows. We fall
 279   // back to en-US in GetApplicationLocale so that it's a not critical,
 280   // but we can do better.
 281   std::string::size_type hyphen_pos = locale.find('-');
 282   std::string lang(locale, 0, hyphen_pos);
 283   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
 284     std::string region(locale, hyphen_pos + 1);
 285     std::string tmp_locale(lang);
 286     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
 287     // Spanish locale).
 288     if (LowerCaseEqualsASCII(lang, "es") &&
 289         !LowerCaseEqualsASCII(region, "es")) {
 290       tmp_locale.append("-419");
 291     } else if (LowerCaseEqualsASCII(lang, "zh")) {
 292       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
 293       if (LowerCaseEqualsASCII(region, "hk") ||
 294           LowerCaseEqualsASCII(region, "mo")) { // Macao
 295         tmp_locale.append("-TW");
 296       } else {
 297         tmp_locale.append("-CN");
 298       }
 299     } else if (LowerCaseEqualsASCII(lang, "en")) {
 300       // Map Australian, Canadian, New Zealand and South African English
 301       // to British English for now.
 302       // TODO(jungshik): en-CA may have to change sides once
 303       // we have OS locale separate from app locale (Chrome's UI language).
 304       if (LowerCaseEqualsASCII(region, "au") ||
 305           LowerCaseEqualsASCII(region, "ca") ||
 306           LowerCaseEqualsASCII(region, "nz") ||
 307           LowerCaseEqualsASCII(region, "za")) {
 308         tmp_locale.append("-GB");
 309       } else {
 310         tmp_locale.append("-US");
 311       }
 312     }
 313     if (IsLocaleAvailable(tmp_locale)) {
 314       resolved_locale->swap(tmp_locale);
 315       return true;
 316     }
 317   }
 318
 319   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
 320   struct {
 321     const char* source;
 322     const char* dest;
 323   } alias_map[] = {
 324       {"no", "nb"},
 325       {"tl", "fil"},
 326       {"iw", "he"},
 327       {"en", "en-US"},
 328   };
 329
 330   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
 331     if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
 332       std::string tmp_locale(alias_map[i].dest);
 333       if (IsLocaleAvailable(tmp_locale)) {
 334         resolved_locale->swap(tmp_locale);
 335         return true;
 336       }
 337     }
 338   }
 339
 340   return false;
 341 }
 342 #endif
 343
 344 // On Linux, the text layout engine Pango determines paragraph directionality
 345 // by looking at the first strongly-directional character in the text. This
 346 // means text such as "Google Chrome foo bar..." will be layed out LTR even
 347 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
 348 // cases.
 349 void AdjustParagraphDirectionality(string16* paragraph) {
 350 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
 351   if (base::i18n::IsRTL() &&
 352       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
 353     paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark));
 354   }
 355 #endif
 356 }
 357
 358 #if defined(OS_WIN)
 359 std::string GetCanonicalLocale(const std::string& locale) {
 360   return base::i18n::GetCanonicalLocale(locale.c_str());
 361 }
 362 #endif
 363
 364 struct AvailableLocalesTraits :
 365     base::DefaultLazyInstanceTraits<std::vector<std::string> > {
 366   static std::vector<std::string>* New(void* instance) {
 367     std::vector<std::string>* locales =
 368         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
 369             instance);
 370     int num_locales = uloc_countAvailable();
 371     for (int i = 0; i < num_locales; ++i) {
 372       std::string locale_name = uloc_getAvailable(i);
 373       // Filter out the names that have aliases.
 374       if (IsDuplicateName(locale_name))
 375         continue;
 376       // Filter out locales for which we have only partially populated data
 377       // and to which Chrome is not localized.
 378       if (IsLocalePartiallyPopulated(locale_name))
 379         continue;
 380       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
 381         continue;
 382       // Normalize underscores to hyphens because that's what our locale files
 383       // use.
 384       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
 385
 386       // Map the Chinese locale names over to zh-CN and zh-TW.
 387       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
 388         locale_name = "zh-CN";
 389       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
 390         locale_name = "zh-TW";
 391       }
 392       locales->push_back(locale_name);
 393     }
 394
 395     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
 396     locales->push_back("es-419");
 397     return locales;
 398   }
 399 };
 400
 401 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits >
 402     g_available_locales = LAZY_INSTANCE_INITIALIZER;
 403
 404 }  // namespace
 405
 406 namespace l10n_util {
 407
 408 std::string GetApplicationLocale(const std::string& pref_locale) {
 409 #if defined(OS_MACOSX)
 410
 411   // Use any override (Cocoa for the browser), otherwise use the preference
 412   // passed to the function.
 413   std::string app_locale = l10n_util::GetLocaleOverride();
 414   if (app_locale.empty())
 415     app_locale = pref_locale;
 416
 417   // The above should handle all of the cases Chrome normally hits, but for some
 418   // unit tests, we need something to fall back too.
 419   if (app_locale.empty())
 420     app_locale = "en-US";
 421
 422   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
 423   // with CheckAndResolveLocal to make ICU APIs work in that locale.
 424   // Mac doesn't use a locale directory tree of resources (it uses Mac style
 425   // resources), so mirror the Windows/Linux behavior of calling
 426   // SetICUDefaultLocale.
 427   base::i18n::SetICUDefaultLocale(app_locale);
 428   return app_locale;
 429
 430 #else
 431
 432   std::string resolved_locale;
 433   std::vector<std::string> candidates;
 434
 435   // We only use --lang and the app pref on Windows.  On Linux, we only
 436   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
 437   // to renderer and plugin processes so they know what language the parent
 438   // process decided to use.
 439
 440 #if defined(OS_WIN)
 441
 442   // First, try the preference value.
 443   if (!pref_locale.empty())
 444     candidates.push_back(GetCanonicalLocale(pref_locale));
 445
 446   // Next, try the overridden locale.
 447   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
 448   if (!languages.empty()) {
 449     candidates.reserve(candidates.size() + languages.size());
 450     std::transform(languages.begin(), languages.end(),
 451                    std::back_inserter(candidates), &GetCanonicalLocale);
 452   } else {
 453     // If no override was set, defer to ICU
 454     candidates.push_back(base::i18n::GetConfiguredLocale());
 455   }
 456
 457 #elif defined(OS_CHROMEOS) || (defined(USE_AURA) && !defined(OS_LINUX))
 458
 459   // On ChromeOS, use the application locale preference.
 460   if (!pref_locale.empty())
 461     candidates.push_back(pref_locale);
 462
 463 #elif defined(OS_ANDROID)
 464
 465   // On Android, query java.util.Locale for the default locale.
 466   candidates.push_back(GetDefaultLocale());
 467
 468 #elif defined(OS_LINUX)
 469   // If we're on a different Linux system, we have glib.
 470
 471   // GLib implements correct environment variable parsing with
 472   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
 473   // We used to use our custom parsing code along with ICU for this purpose.
 474   // If we have a port that does not depend on GTK, we have to
 475   // restore our custom code for that port.
 476   const char* const* languages = g_get_language_names();
 477   DCHECK(languages);  // A valid pointer is guaranteed.
 478   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
 479
 480   for (; *languages != NULL; ++languages) {
 481     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
 482   }
 483
 484 #else
 485 #error Unsupported platform, see build/build_config.h
 486 #endif
 487
 488   std::vector<std::string>::const_iterator i = candidates.begin();
 489   for (; i != candidates.end(); ++i) {
 490     if (CheckAndResolveLocale(*i, &resolved_locale)) {
 491       base::i18n::SetICUDefaultLocale(resolved_locale);
 492       return resolved_locale;
 493     }
 494   }
 495
 496   // Fallback on en-US.
 497   const std::string fallback_locale("en-US");
 498   if (IsLocaleAvailable(fallback_locale)) {
 499     base::i18n::SetICUDefaultLocale(fallback_locale);
 500     return fallback_locale;
 501   }
 502
 503   return std::string();
 504
 505 #endif
 506 }
 507
 508 string16 GetDisplayNameForLocale(const std::string& locale,
 509                                  const std::string& display_locale,
 510                                  bool is_for_ui) {
 511   std::string locale_code = locale;
 512   // Internally, we use the language code of zh-CN and zh-TW, but we want the
 513   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
 514   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
 515   // and zh-Hant to ICU. Even with this mapping, we'd get
 516   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
 517   // even longer results in other languages. Arguably, they're better than
 518   // the current results : Chinese (China) / Chinese (Taiwan).
 519   // TODO(jungshik): Do one of the following:
 520   // 1. Special-case Chinese by getting the custom-translation for them
 521   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
 522   // 3. Get translations for two directly from the ICU resouce bundle
 523   // because they're not accessible with other any API.
 524   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
 525   // #1 and #2 wouldn't work if display_locale != current UI locale although
 526   // we can think of additional hack to work around the problem.
 527   // #3 can be potentially expensive.
 528   if (locale_code == "zh-CN")
 529     locale_code = "zh-Hans";
 530   else if (locale_code == "zh-TW")
 531     locale_code = "zh-Hant";
 532
 533   string16 display_name;
 534 #if defined(OS_ANDROID)
 535   // Use Java API to get locale display name so that we can remove most of
 536   // the lang data from icu data to reduce binary size, except for zh-Hans and
 537   // zh-Hant because the current Android Java API doesn't support scripts.
 538   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
 539   // Android Java API supports scripts.
 540   if (!StartsWithASCII(locale_code, "zh-Han", true)) {
 541     display_name = GetDisplayNameForLocale(locale_code, display_locale);
 542   } else
 543 #endif
 544   {
 545     UErrorCode error = U_ZERO_ERROR;
 546     const int kBufferSize = 1024;
 547
 548     int actual_size = uloc_getDisplayName(
 549         locale_code.c_str(), display_locale.c_str(),
 550         WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
 551     DCHECK(U_SUCCESS(error));
 552     display_name.resize(actual_size);
 553   }
 554
 555   // Add directional markup so parentheses are properly placed.
 556   if (is_for_ui && base::i18n::IsRTL())
 557     base::i18n::AdjustStringForLocaleDirection(&display_name);
 558   return display_name;
 559 }
 560
 561 string16 GetDisplayNameForCountry(const std::string& country_code,
 562                                   const std::string& display_locale) {
 563   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
 564 }
 565
 566 std::string NormalizeLocale(const std::string& locale) {
 567   std::string normalized_locale(locale);
 568   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
 569
 570   return normalized_locale;
 571 }
 572
 573 void GetParentLocales(const std::string& current_locale,
 574                       std::vector<std::string>* parent_locales) {
 575   std::string locale(NormalizeLocale(current_locale));
 576
 577   const int kNameCapacity = 256;
 578   char parent[kNameCapacity];
 579   base::strlcpy(parent, locale.c_str(), kNameCapacity);
 580   parent_locales->push_back(parent);
 581   UErrorCode err = U_ZERO_ERROR;
 582   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
 583     if (U_FAILURE(err))
 584       break;
 585     parent_locales->push_back(parent);
 586   }
 587 }
 588
 589 bool IsValidLocaleSyntax(const std::string& locale) {
 590   // Check that the length is plausible.
 591   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
 592     return false;
 593
 594   // Strip off the part after an '@' sign, which might contain keywords,
 595   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
 596   // We don't validate that part much, just check that there's at least one
 597   // equals sign in a plausible place. Normalize the prefix so that hyphens
 598   // are changed to underscores.
 599   std::string prefix = NormalizeLocale(locale);
 600   size_t split_point = locale.find("@");
 601   if (split_point != std::string::npos) {
 602     std::string keywords = locale.substr(split_point + 1);
 603     prefix = locale.substr(0, split_point);
 604
 605     size_t equals_loc = keywords.find("=");
 606     if (equals_loc == std::string::npos ||
 607         equals_loc < 1 || equals_loc > keywords.size() - 2)
 608       return false;
 609   }
 610
 611   // Check that all characters before the at-sign are alphanumeric or
 612   // underscore.
 613   for (size_t i = 0; i < prefix.size(); i++) {
 614     char ch = prefix[i];
 615     if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
 616       return false;
 617   }
 618
 619   // Check that the initial token (before the first hyphen/underscore)
 620   // is 1 - 3 alphabetical characters (a language tag).
 621   for (size_t i = 0; i < prefix.size(); i++) {
 622     char ch = prefix[i];
 623     if (ch == '_') {
 624       if (i < 1 || i > 3)
 625         return false;
 626       break;
 627     }
 628     if (!IsAsciiAlpha(ch))
 629       return false;
 630   }
 631
 632   // Check that the all tokens after the initial token are 1 - 8 characters.
 633   // (Tokenize/StringTokenizer don't work here, they collapse multiple
 634   // delimiters into one.)
 635   int token_len = 0;
 636   int token_index = 0;
 637   for (size_t i = 0; i < prefix.size(); i++) {
 638     if (prefix[i] != '_') {
 639       token_len++;
 640       continue;
 641     }
 642
 643     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
 644       return false;
 645     }
 646     token_index++;
 647     token_len = 0;
 648   }
 649   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
 650     return false;
 651   } else if (token_len < 1 || token_len > 8) {
 652     return false;
 653   }
 654
 655   return true;
 656 }
 657
 658 std::string GetStringUTF8(int message_id) {
 659   return UTF16ToUTF8(GetStringUTF16(message_id));
 660 }
 661
 662 string16 GetStringUTF16(int message_id) {
 663   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 664   string16 str = rb.GetLocalizedString(message_id);
 665   AdjustParagraphDirectionality(&str);
 666
 667   return str;
 668 }
 669
 670 static string16 GetStringF(int message_id,
 671                            const std::vector<string16>& replacements,
 672                            std::vector<size_t>* offsets) {
 673   // TODO(tc): We could save a string copy if we got the raw string as
 674   // a StringPiece and were able to call ReplaceStringPlaceholders with
 675   // a StringPiece format string and string16 substitution strings.  In
 676   // practice, the strings should be relatively short.
 677   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 678   const string16& format_string = rb.GetLocalizedString(message_id);
 679
 680 #ifndef NDEBUG
 681   // Make sure every replacement string is being used, so we don't just
 682   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
 683   // check as the code may simply want to find the placeholders rather than
 684   // actually replacing them.
 685   if (!offsets) {
 686     std::string utf8_string = UTF16ToUTF8(format_string);
 687
 688     // $9 is the highest allowed placeholder.
 689     for (size_t i = 0; i < 9; ++i) {
 690       bool placeholder_should_exist = replacements.size() > i;
 691
 692       std::string placeholder =
 693           base::StringPrintf("$%d", static_cast<int>(i + 1));
 694       size_t pos = utf8_string.find(placeholder.c_str());
 695       if (placeholder_should_exist) {
 696         DCHECK_NE(std::string::npos, pos) <<
 697             " Didn't find a " << placeholder << " placeholder in " <<
 698             utf8_string;
 699       } else {
 700         DCHECK_EQ(std::string::npos, pos) <<
 701             " Unexpectedly found a " << placeholder << " placeholder in " <<
 702             utf8_string;
 703       }
 704     }
 705   }
 706 #endif
 707
 708   string16 formatted = ReplaceStringPlaceholders(format_string, replacements,
 709                                                  offsets);
 710   AdjustParagraphDirectionality(&formatted);
 711
 712   return formatted;
 713 }
 714
 715 std::string GetStringFUTF8(int message_id,
 716                            const string16& a) {
 717   return UTF16ToUTF8(GetStringFUTF16(message_id, a));
 718 }
 719
 720 std::string GetStringFUTF8(int message_id,
 721                            const string16& a,
 722                            const string16& b) {
 723   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
 724 }
 725
 726 std::string GetStringFUTF8(int message_id,
 727                            const string16& a,
 728                            const string16& b,
 729                            const string16& c) {
 730   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
 731 }
 732
 733 std::string GetStringFUTF8(int message_id,
 734                            const string16& a,
 735                            const string16& b,
 736                            const string16& c,
 737                            const string16& d) {
 738   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
 739 }
 740
 741 string16 GetStringFUTF16(int message_id,
 742                          const string16& a) {
 743   std::vector<string16> replacements;
 744   replacements.push_back(a);
 745   return GetStringF(message_id, replacements, NULL);
 746 }
 747
 748 string16 GetStringFUTF16(int message_id,
 749                          const string16& a,
 750                          const string16& b) {
 751   return GetStringFUTF16(message_id, a, b, NULL);
 752 }
 753
 754 string16 GetStringFUTF16(int message_id,
 755                          const string16& a,
 756                          const string16& b,
 757                          const string16& c) {
 758   std::vector<string16> replacements;
 759   replacements.push_back(a);
 760   replacements.push_back(b);
 761   replacements.push_back(c);
 762   return GetStringF(message_id, replacements, NULL);
 763 }
 764
 765 string16 GetStringFUTF16(int message_id,
 766                          const string16& a,
 767                          const string16& b,
 768                          const string16& c,
 769                          const string16& d) {
 770   std::vector<string16> replacements;
 771   replacements.push_back(a);
 772   replacements.push_back(b);
 773   replacements.push_back(c);
 774   replacements.push_back(d);
 775   return GetStringF(message_id, replacements, NULL);
 776 }
 777
 778 string16 GetStringFUTF16(int message_id,
 779                          const string16& a,
 780                          const string16& b,
 781                          const string16& c,
 782                          const string16& d,
 783                          const string16& e) {
 784   std::vector<string16> replacements;
 785   replacements.push_back(a);
 786   replacements.push_back(b);
 787   replacements.push_back(c);
 788   replacements.push_back(d);
 789   replacements.push_back(e);
 790   return GetStringF(message_id, replacements, NULL);
 791 }
 792
 793 string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) {
 794   DCHECK(offset);
 795   std::vector<size_t> offsets;
 796   std::vector<string16> replacements;
 797   replacements.push_back(a);
 798   string16 result = GetStringF(message_id, replacements, &offsets);
 799   DCHECK(offsets.size() == 1);
 800   *offset = offsets[0];
 801   return result;
 802 }
 803
 804 string16 GetStringFUTF16(int message_id,
 805                          const string16& a,
 806                          const string16& b,
 807                          std::vector<size_t>* offsets) {
 808   std::vector<string16> replacements;
 809   replacements.push_back(a);
 810   replacements.push_back(b);
 811   return GetStringF(message_id, replacements, offsets);
 812 }
 813
 814 string16 GetStringFUTF16Int(int message_id, int a) {
 815   return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a)));
 816 }
 817
 818 string16 GetStringFUTF16Int(int message_id, int64 a) {
 819   return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a)));
 820 }
 821
 822 // Specialization of operator() method for string16 version.
 823 template <>
 824 bool StringComparator<string16>::operator()(const string16& lhs,
 825                                             const string16& rhs) {
 826   // If we can not get collator instance for specified locale, just do simple
 827   // string compare.
 828   if (!collator_)
 829     return lhs < rhs;
 830   return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
 831       UCOL_LESS;
 832 };
 833
 834 void SortStrings16(const std::string& locale,
 835                    std::vector<string16>* strings) {
 836   SortVectorWithStringKey(locale, strings, false);
 837 }
 838
 839 const std::vector<std::string>& GetAvailableLocales() {
 840   return g_available_locales.Get();
 841 }
 842
 843 void GetAcceptLanguagesForLocale(const std::string& display_locale,
 844                                  std::vector<std::string>* locale_codes) {
 845   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
 846     if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale))
 847       // TODO(jungshik) : Put them at the of the list with language codes
 848       // enclosed by brackets instead of skipping.
 849         continue;
 850     locale_codes->push_back(kAcceptLanguageList[i]);
 851   }
 852 }
 853
 854 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
 855   int width = 0;
 856   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
 857   DCHECK_GT(width, 0);
 858   return width;
 859 }
 860
 861 }  // namespace l10n_util