ui/base/l10n/l10n_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "ui/base/l10n/l10n_util.h"
   6
   7 #include <algorithm>
   8 #include <cstdlib>
   9 #include <iterator>
  10 #include <string>
  11
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/file_util.h"
  15 #include "base/i18n/file_util_icu.h"
  16 #include "base/i18n/rtl.h"
  17 #include "base/i18n/string_compare.h"
  18 #include "base/lazy_instance.h"
  19 #include "base/memory/scoped_ptr.h"
  20 #include "base/path_service.h"
  21 #include "base/strings/string_number_conversions.h"
  22 #include "base/strings/string_split.h"
  23 #include "base/strings/string_util.h"
  24 #include "base/strings/stringprintf.h"
  25 #include "base/strings/sys_string_conversions.h"
  26 #include "base/strings/utf_string_conversions.h"
  27 #include "build/build_config.h"
  28 #include "third_party/icu/source/common/unicode/rbbi.h"
  29 #include "third_party/icu/source/common/unicode/uloc.h"
  30 #include "ui/base/l10n/l10n_util_collator.h"
  31 #include "ui/base/l10n/l10n_util_plurals.h"
  32 #include "ui/base/resource/resource_bundle.h"
  33 #include "ui/base/ui_base_paths.h"
  34
  35 #if defined(OS_ANDROID)
  36 #include "ui/base/l10n/l10n_util_android.h"
  37 #endif
  38
  39 #if defined(USE_GLIB)
  40 #include <glib.h>
  41 #endif
  42
  43 #if defined(OS_WIN)
  44 #include "ui/base/l10n/l10n_util_win.h"
  45 #endif  // OS_WIN
  46
  47 namespace {
  48
  49 static const char* const kAcceptLanguageList[] = {
  50   "af",     // Afrikaans
  51   "am",     // Amharic
  52   "ar",     // Arabic
  53   "az",     // Azerbaijani
  54   "be",     // Belarusian
  55   "bg",     // Bulgarian
  56   "bh",     // Bihari
  57   "bn",     // Bengali
  58   "br",     // Breton
  59   "bs",     // Bosnian
  60   "ca",     // Catalan
  61   "co",     // Corsican
  62   "cs",     // Czech
  63   "cy",     // Welsh
  64   "da",     // Danish
  65   "de",     // German
  66   "de-AT",  // German (Austria)
  67   "de-CH",  // German (Switzerland)
  68   "de-DE",  // German (Germany)
  69   "el",     // Greek
  70   "en",     // English
  71   "en-AU",  // English (Australia)
  72   "en-CA",  // English (Canada)
  73   "en-GB",  // English (UK)
  74   "en-NZ",  // English (New Zealand)
  75   "en-US",  // English (US)
  76   "en-ZA",  // English (South Africa)
  77   "eo",     // Esperanto
  78   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
  79   // Spanish speaking countries?
  80   "es",     // Spanish
  81   "es-419", // Spanish (Latin America)
  82   "et",     // Estonian
  83   "eu",     // Basque
  84   "fa",     // Persian
  85   "fi",     // Finnish
  86   "fil",    // Filipino
  87   "fo",     // Faroese
  88   "fr",     // French
  89   "fr-CA",  // French (Canada)
  90   "fr-CH",  // French (Switzerland)
  91   "fr-FR",  // French (France)
  92   "fy",     // Frisian
  93   "ga",     // Irish
  94   "gd",     // Scots Gaelic
  95   "gl",     // Galician
  96   "gn",     // Guarani
  97   "gu",     // Gujarati
  98   "ha",     // Hausa
  99   "haw",    // Hawaiian
 100   "he",     // Hebrew
 101   "hi",     // Hindi
 102   "hr",     // Croatian
 103   "hu",     // Hungarian
 104   "hy",     // Armenian
 105   "ia",     // Interlingua
 106   "id",     // Indonesian
 107   "is",     // Icelandic
 108   "it",     // Italian
 109   "it-CH",  // Italian (Switzerland)
 110   "it-IT",  // Italian (Italy)
 111   "ja",     // Japanese
 112   "jw",     // Javanese
 113   "ka",     // Georgian
 114   "kk",     // Kazakh
 115   "km",     // Cambodian
 116   "kn",     // Kannada
 117   "ko",     // Korean
 118   "ku",     // Kurdish
 119   "ky",     // Kyrgyz
 120   "la",     // Latin
 121   "ln",     // Lingala
 122   "lo",     // Laothian
 123   "lt",     // Lithuanian
 124   "lv",     // Latvian
 125   "mk",     // Macedonian
 126   "ml",     // Malayalam
 127   "mn",     // Mongolian
 128   "mo",     // Moldavian
 129   "mr",     // Marathi
 130   "ms",     // Malay
 131   "mt",     // Maltese
 132   "nb",     // Norwegian (Bokmal)
 133   "ne",     // Nepali
 134   "nl",     // Dutch
 135   "nn",     // Norwegian (Nynorsk)
 136   "no",     // Norwegian
 137   "oc",     // Occitan
 138   "om",     // Oromo
 139   "or",     // Oriya
 140   "pa",     // Punjabi
 141   "pl",     // Polish
 142   "ps",     // Pashto
 143   "pt",     // Portuguese
 144   "pt-BR",  // Portuguese (Brazil)
 145   "pt-PT",  // Portuguese (Portugal)
 146   "qu",     // Quechua
 147   "rm",     // Romansh
 148   "ro",     // Romanian
 149   "ru",     // Russian
 150   "sd",     // Sindhi
 151   "sh",     // Serbo-Croatian
 152   "si",     // Sinhalese
 153   "sk",     // Slovak
 154   "sl",     // Slovenian
 155   "sn",     // Shona
 156   "so",     // Somali
 157   "sq",     // Albanian
 158   "sr",     // Serbian
 159   "st",     // Sesotho
 160   "su",     // Sundanese
 161   "sv",     // Swedish
 162   "sw",     // Swahili
 163   "ta",     // Tamil
 164   "te",     // Telugu
 165   "tg",     // Tajik
 166   "th",     // Thai
 167   "ti",     // Tigrinya
 168   "tk",     // Turkmen
 169   "to",     // Tonga
 170   "tr",     // Turkish
 171   "tt",     // Tatar
 172   "tw",     // Twi
 173   "ug",     // Uighur
 174   "uk",     // Ukrainian
 175   "ur",     // Urdu
 176   "uz",     // Uzbek
 177   "vi",     // Vietnamese
 178   "xh",     // Xhosa
 179   "yi",     // Yiddish
 180   "yo",     // Yoruba
 181   "zh",     // Chinese
 182   "zh-CN",  // Chinese (Simplified)
 183   "zh-TW",  // Chinese (Traditional)
 184   "zu",     // Zulu
 185 };
 186
 187 // Returns true if |locale_name| has an alias in the ICU data file.
 188 bool IsDuplicateName(const std::string& locale_name) {
 189   static const char* const kDuplicateNames[] = {
 190     "en",
 191     "pt",
 192     "zh",
 193     "zh_hans_cn",
 194     "zh_hant_hk",
 195     "zh_hant_mo",
 196     "zh_hans_sg",
 197     "zh_hant_tw"
 198   };
 199
 200   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
 201   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
 202   // has to be added manually in GetAvailableLocales().
 203   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
 204     return true;
 205   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
 206     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
 207       return true;
 208   }
 209   return false;
 210 }
 211
 212 // We added 30+ minimally populated locales with only a few entries
 213 // (exemplar character set, script, writing direction and its own
 214 // lanaguage name). These locales have to be distinguished from the
 215 // fully populated locales to which Chrome is localized.
 216 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
 217   // For partially populated locales, even the translation for "English"
 218   // is not available. A more robust/elegant way to check is to add a special
 219   // field (say, 'isPartial' to our version of ICU locale files) and
 220   // check its value, but this hack seems to work well.
 221   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
 222 }
 223
 224 #if !defined(OS_MACOSX)
 225 bool IsLocaleAvailable(const std::string& locale) {
 226   // If locale has any illegal characters in it, we don't want to try to
 227   // load it because it may be pointing outside the locale data file directory.
 228   if (!file_util::IsFilenameLegal(base::ASCIIToUTF16(locale)))
 229     return false;
 230
 231   // IsLocalePartiallyPopulated() can be called here for an early return w/o
 232   // checking the resource availability below. It'd help when Chrome is run
 233   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
 234   // but it'd slow down the start up time a little bit for locales Chrome is
 235   // localized to. So, we don't call it here.
 236   if (!l10n_util::IsLocaleSupportedByOS(locale))
 237     return false;
 238
 239   // If the ResourceBundle is not yet initialized, return false to avoid the
 240   // CHECK failure in ResourceBundle::GetSharedInstance().
 241   if (!ResourceBundle::HasSharedInstance())
 242     return false;
 243
 244   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
 245   // so that this can be invoked without initializing the global instance.
 246   // See crbug.com/230432: CHECK failure in GetUserDataDir().
 247   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
 248 }
 249 #endif
 250
 251 // On Linux, the text layout engine Pango determines paragraph directionality
 252 // by looking at the first strongly-directional character in the text. This
 253 // means text such as "Google Chrome foo bar..." will be layed out LTR even
 254 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
 255 // cases.
 256 void AdjustParagraphDirectionality(base::string16* paragraph) {
 257 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
 258   if (base::i18n::IsRTL() &&
 259       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
 260     paragraph->insert(0, 1,
 261                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
 262   }
 263 #endif
 264 }
 265
 266 struct AvailableLocalesTraits
 267     : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
 268   static std::vector<std::string>* New(void* instance) {
 269     std::vector<std::string>* locales =
 270         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
 271             instance);
 272     int num_locales = uloc_countAvailable();
 273     for (int i = 0; i < num_locales; ++i) {
 274       std::string locale_name = uloc_getAvailable(i);
 275       // Filter out the names that have aliases.
 276       if (IsDuplicateName(locale_name))
 277         continue;
 278       // Filter out locales for which we have only partially populated data
 279       // and to which Chrome is not localized.
 280       if (IsLocalePartiallyPopulated(locale_name))
 281         continue;
 282       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
 283         continue;
 284       // Normalize underscores to hyphens because that's what our locale files
 285       // use.
 286       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
 287
 288       // Map the Chinese locale names over to zh-CN and zh-TW.
 289       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
 290         locale_name = "zh-CN";
 291       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
 292         locale_name = "zh-TW";
 293       }
 294       locales->push_back(locale_name);
 295     }
 296
 297     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
 298     locales->push_back("es-419");
 299     return locales;
 300   }
 301 };
 302
 303 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
 304     g_available_locales = LAZY_INSTANCE_INITIALIZER;
 305
 306 }  // namespace
 307
 308 namespace l10n_util {
 309
 310 std::string GetCanonicalLocale(const std::string& locale) {
 311   return base::i18n::GetCanonicalLocale(locale.c_str());
 312 }
 313
 314 bool CheckAndResolveLocale(const std::string& locale,
 315                            std::string* resolved_locale) {
 316 #if defined(OS_MACOSX)
 317   NOTIMPLEMENTED();
 318   return false;
 319 #else
 320   if (IsLocaleAvailable(locale)) {
 321     *resolved_locale = locale;
 322     return true;
 323   }
 324
 325   // If there's a variant, skip over it so we can try without the region
 326   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
 327   // before ca.
 328   std::string::size_type variant_pos = locale.find('@');
 329   if (variant_pos != std::string::npos)
 330     return false;
 331
 332   // If the locale matches language but not country, use that instead.
 333   // TODO(jungshik) : Nothing is done about languages that Chrome
 334   // does not support but available on Windows. We fall
 335   // back to en-US in GetApplicationLocale so that it's a not critical,
 336   // but we can do better.
 337   std::string::size_type hyphen_pos = locale.find('-');
 338   std::string lang(locale, 0, hyphen_pos);
 339   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
 340     std::string region(locale, hyphen_pos + 1);
 341     std::string tmp_locale(lang);
 342     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
 343     // Spanish locale).
 344     if (LowerCaseEqualsASCII(lang, "es") &&
 345         !LowerCaseEqualsASCII(region, "es")) {
 346       tmp_locale.append("-419");
 347     } else if (LowerCaseEqualsASCII(lang, "zh")) {
 348       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
 349       if (LowerCaseEqualsASCII(region, "hk") ||
 350           LowerCaseEqualsASCII(region, "mo")) { // Macao
 351         tmp_locale.append("-TW");
 352       } else {
 353         tmp_locale.append("-CN");
 354       }
 355     } else if (LowerCaseEqualsASCII(lang, "en")) {
 356       // Map Australian, Canadian, New Zealand and South African English
 357       // to British English for now.
 358       // TODO(jungshik): en-CA may have to change sides once
 359       // we have OS locale separate from app locale (Chrome's UI language).
 360       if (LowerCaseEqualsASCII(region, "au") ||
 361           LowerCaseEqualsASCII(region, "ca") ||
 362           LowerCaseEqualsASCII(region, "nz") ||
 363           LowerCaseEqualsASCII(region, "za")) {
 364         tmp_locale.append("-GB");
 365       } else {
 366         tmp_locale.append("-US");
 367       }
 368     }
 369     if (IsLocaleAvailable(tmp_locale)) {
 370       resolved_locale->swap(tmp_locale);
 371       return true;
 372     }
 373   }
 374
 375   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
 376   struct {
 377     const char* source;
 378     const char* dest;
 379   } alias_map[] = {
 380       {"no", "nb"},
 381       {"tl", "fil"},
 382       {"iw", "he"},
 383       {"en", "en-US"},
 384   };
 385
 386   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
 387     if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
 388       std::string tmp_locale(alias_map[i].dest);
 389       if (IsLocaleAvailable(tmp_locale)) {
 390         resolved_locale->swap(tmp_locale);
 391         return true;
 392       }
 393     }
 394   }
 395
 396   return false;
 397 #endif
 398 }
 399
 400 std::string GetApplicationLocale(const std::string& pref_locale) {
 401 #if defined(OS_MACOSX)
 402
 403   // Use any override (Cocoa for the browser), otherwise use the preference
 404   // passed to the function.
 405   std::string app_locale = l10n_util::GetLocaleOverride();
 406   if (app_locale.empty())
 407     app_locale = pref_locale;
 408
 409   // The above should handle all of the cases Chrome normally hits, but for some
 410   // unit tests, we need something to fall back too.
 411   if (app_locale.empty())
 412     app_locale = "en-US";
 413
 414   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
 415   // with CheckAndResolveLocal to make ICU APIs work in that locale.
 416   // Mac doesn't use a locale directory tree of resources (it uses Mac style
 417   // resources), so mirror the Windows/Linux behavior of calling
 418   // SetICUDefaultLocale.
 419   base::i18n::SetICUDefaultLocale(app_locale);
 420   return app_locale;
 421
 422 #else
 423
 424   std::string resolved_locale;
 425   std::vector<std::string> candidates;
 426
 427   // We only use --lang and the app pref on Windows.  On Linux, we only
 428   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
 429   // to renderer and plugin processes so they know what language the parent
 430   // process decided to use.
 431
 432 #if defined(OS_WIN)
 433
 434   // First, try the preference value.
 435   if (!pref_locale.empty())
 436     candidates.push_back(GetCanonicalLocale(pref_locale));
 437
 438   // Next, try the overridden locale.
 439   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
 440   if (!languages.empty()) {
 441     candidates.reserve(candidates.size() + languages.size());
 442     std::transform(languages.begin(), languages.end(),
 443                    std::back_inserter(candidates), &GetCanonicalLocale);
 444   } else {
 445     // If no override was set, defer to ICU
 446     candidates.push_back(base::i18n::GetConfiguredLocale());
 447   }
 448
 449 #elif defined(OS_ANDROID)
 450
 451   // On Android, query java.util.Locale for the default locale.
 452   candidates.push_back(GetDefaultLocale());
 453
 454 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
 455
 456   // GLib implements correct environment variable parsing with
 457   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
 458   // We used to use our custom parsing code along with ICU for this purpose.
 459   // If we have a port that does not depend on GTK, we have to
 460   // restore our custom code for that port.
 461   const char* const* languages = g_get_language_names();
 462   DCHECK(languages);  // A valid pointer is guaranteed.
 463   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
 464
 465   for (; *languages != NULL; ++languages) {
 466     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
 467   }
 468
 469 #else
 470
 471   // By default, use the application locale preference. This applies to ChromeOS
 472   // and linux systems without glib.
 473   if (!pref_locale.empty())
 474     candidates.push_back(pref_locale);
 475
 476 #endif
 477
 478   std::vector<std::string>::const_iterator i = candidates.begin();
 479   for (; i != candidates.end(); ++i) {
 480     if (CheckAndResolveLocale(*i, &resolved_locale)) {
 481       base::i18n::SetICUDefaultLocale(resolved_locale);
 482       return resolved_locale;
 483     }
 484   }
 485
 486   // Fallback on en-US.
 487   const std::string fallback_locale("en-US");
 488   if (IsLocaleAvailable(fallback_locale)) {
 489     base::i18n::SetICUDefaultLocale(fallback_locale);
 490     return fallback_locale;
 491   }
 492
 493   return std::string();
 494
 495 #endif
 496 }
 497
 498 bool IsLocaleNameTranslated(const char* locale,
 499                             const std::string& display_locale) {
 500   base::string16 display_name =
 501       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
 502   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
 503   // uloc_getDisplayName returns the actual translation or the default
 504   // value (locale code), we have to rely on this hack to tell whether
 505   // the translation is available or not.  If ICU doesn't have a translated
 506   // name for this locale, GetDisplayNameForLocale will just return the
 507   // locale code.
 508   return !base::IsStringASCII(display_name) ||
 509       base::UTF16ToASCII(display_name) != locale;
 510 }
 511
 512 base::string16 GetDisplayNameForLocale(const std::string& locale,
 513                                        const std::string& display_locale,
 514                                        bool is_for_ui) {
 515   std::string locale_code = locale;
 516   // Internally, we use the language code of zh-CN and zh-TW, but we want the
 517   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
 518   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
 519   // and zh-Hant to ICU. Even with this mapping, we'd get
 520   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
 521   // even longer results in other languages. Arguably, they're better than
 522   // the current results : Chinese (China) / Chinese (Taiwan).
 523   // TODO(jungshik): Do one of the following:
 524   // 1. Special-case Chinese by getting the custom-translation for them
 525   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
 526   // 3. Get translations for two directly from the ICU resouce bundle
 527   // because they're not accessible with other any API.
 528   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
 529   // #1 and #2 wouldn't work if display_locale != current UI locale although
 530   // we can think of additional hack to work around the problem.
 531   // #3 can be potentially expensive.
 532   if (locale_code == "zh-CN")
 533     locale_code = "zh-Hans";
 534   else if (locale_code == "zh-TW")
 535     locale_code = "zh-Hant";
 536
 537   base::string16 display_name;
 538 #if defined(OS_ANDROID)
 539   // Use Java API to get locale display name so that we can remove most of
 540   // the lang data from icu data to reduce binary size, except for zh-Hans and
 541   // zh-Hant because the current Android Java API doesn't support scripts.
 542   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
 543   // Android Java API supports scripts.
 544   if (!StartsWithASCII(locale_code, "zh-Han", true)) {
 545     display_name = GetDisplayNameForLocale(locale_code, display_locale);
 546   } else
 547 #endif
 548   {
 549     UErrorCode error = U_ZERO_ERROR;
 550     const int kBufferSize = 1024;
 551
 552     int actual_size = uloc_getDisplayName(
 553         locale_code.c_str(), display_locale.c_str(),
 554         WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
 555     DCHECK(U_SUCCESS(error));
 556     display_name.resize(actual_size);
 557   }
 558
 559   // Add directional markup so parentheses are properly placed.
 560   if (is_for_ui && base::i18n::IsRTL())
 561     base::i18n::AdjustStringForLocaleDirection(&display_name);
 562   return display_name;
 563 }
 564
 565 base::string16 GetDisplayNameForCountry(const std::string& country_code,
 566                                         const std::string& display_locale) {
 567   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
 568 }
 569
 570 std::string NormalizeLocale(const std::string& locale) {
 571   std::string normalized_locale(locale);
 572   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
 573
 574   return normalized_locale;
 575 }
 576
 577 void GetParentLocales(const std::string& current_locale,
 578                       std::vector<std::string>* parent_locales) {
 579   std::string locale(NormalizeLocale(current_locale));
 580
 581   const int kNameCapacity = 256;
 582   char parent[kNameCapacity];
 583   base::strlcpy(parent, locale.c_str(), kNameCapacity);
 584   parent_locales->push_back(parent);
 585   UErrorCode err = U_ZERO_ERROR;
 586   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
 587     if (U_FAILURE(err))
 588       break;
 589     parent_locales->push_back(parent);
 590   }
 591 }
 592
 593 bool IsValidLocaleSyntax(const std::string& locale) {
 594   // Check that the length is plausible.
 595   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
 596     return false;
 597
 598   // Strip off the part after an '@' sign, which might contain keywords,
 599   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
 600   // We don't validate that part much, just check that there's at least one
 601   // equals sign in a plausible place. Normalize the prefix so that hyphens
 602   // are changed to underscores.
 603   std::string prefix = NormalizeLocale(locale);
 604   size_t split_point = locale.find("@");
 605   if (split_point != std::string::npos) {
 606     std::string keywords = locale.substr(split_point + 1);
 607     prefix = locale.substr(0, split_point);
 608
 609     size_t equals_loc = keywords.find("=");
 610     if (equals_loc == std::string::npos ||
 611         equals_loc < 1 || equals_loc > keywords.size() - 2)
 612       return false;
 613   }
 614
 615   // Check that all characters before the at-sign are alphanumeric or
 616   // underscore.
 617   for (size_t i = 0; i < prefix.size(); i++) {
 618     char ch = prefix[i];
 619     if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
 620       return false;
 621   }
 622
 623   // Check that the initial token (before the first hyphen/underscore)
 624   // is 1 - 3 alphabetical characters (a language tag).
 625   for (size_t i = 0; i < prefix.size(); i++) {
 626     char ch = prefix[i];
 627     if (ch == '_') {
 628       if (i < 1 || i > 3)
 629         return false;
 630       break;
 631     }
 632     if (!IsAsciiAlpha(ch))
 633       return false;
 634   }
 635
 636   // Check that the all tokens after the initial token are 1 - 8 characters.
 637   // (Tokenize/StringTokenizer don't work here, they collapse multiple
 638   // delimiters into one.)
 639   int token_len = 0;
 640   int token_index = 0;
 641   for (size_t i = 0; i < prefix.size(); i++) {
 642     if (prefix[i] != '_') {
 643       token_len++;
 644       continue;
 645     }
 646
 647     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
 648       return false;
 649     }
 650     token_index++;
 651     token_len = 0;
 652   }
 653   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
 654     return false;
 655   } else if (token_len < 1 || token_len > 8) {
 656     return false;
 657   }
 658
 659   return true;
 660 }
 661
 662 std::string GetStringUTF8(int message_id) {
 663   return base::UTF16ToUTF8(GetStringUTF16(message_id));
 664 }
 665
 666 base::string16 GetStringUTF16(int message_id) {
 667   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 668   base::string16 str = rb.GetLocalizedString(message_id);
 669   AdjustParagraphDirectionality(&str);
 670
 671   return str;
 672 }
 673
 674 base::string16 GetStringFUTF16(int message_id,
 675                                const std::vector<base::string16>& replacements,
 676                                std::vector<size_t>* offsets) {
 677   // TODO(tc): We could save a string copy if we got the raw string as
 678   // a StringPiece and were able to call ReplaceStringPlaceholders with
 679   // a StringPiece format string and base::string16 substitution strings.  In
 680   // practice, the strings should be relatively short.
 681   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 682   const base::string16& format_string = rb.GetLocalizedString(message_id);
 683
 684 #ifndef NDEBUG
 685   // Make sure every replacement string is being used, so we don't just
 686   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
 687   // check as the code may simply want to find the placeholders rather than
 688   // actually replacing them.
 689   if (!offsets) {
 690     std::string utf8_string = base::UTF16ToUTF8(format_string);
 691
 692     // $9 is the highest allowed placeholder.
 693     for (size_t i = 0; i < 9; ++i) {
 694       bool placeholder_should_exist = replacements.size() > i;
 695
 696       std::string placeholder =
 697           base::StringPrintf("$%d", static_cast<int>(i + 1));
 698       size_t pos = utf8_string.find(placeholder.c_str());
 699       if (placeholder_should_exist) {
 700         DCHECK_NE(std::string::npos, pos) <<
 701             " Didn't find a " << placeholder << " placeholder in " <<
 702             utf8_string;
 703       } else {
 704         DCHECK_EQ(std::string::npos, pos) <<
 705             " Unexpectedly found a " << placeholder << " placeholder in " <<
 706             utf8_string;
 707       }
 708     }
 709   }
 710 #endif
 711
 712   base::string16 formatted = ReplaceStringPlaceholders(
 713       format_string, replacements, offsets);
 714   AdjustParagraphDirectionality(&formatted);
 715
 716   return formatted;
 717 }
 718
 719 std::string GetStringFUTF8(int message_id,
 720                            const base::string16& a) {
 721   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
 722 }
 723
 724 std::string GetStringFUTF8(int message_id,
 725                            const base::string16& a,
 726                            const base::string16& b) {
 727   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
 728 }
 729
 730 std::string GetStringFUTF8(int message_id,
 731                            const base::string16& a,
 732                            const base::string16& b,
 733                            const base::string16& c) {
 734   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
 735 }
 736
 737 std::string GetStringFUTF8(int message_id,
 738                            const base::string16& a,
 739                            const base::string16& b,
 740                            const base::string16& c,
 741                            const base::string16& d) {
 742   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
 743 }
 744
 745 base::string16 GetStringFUTF16(int message_id,
 746                                const base::string16& a) {
 747   std::vector<base::string16> replacements;
 748   replacements.push_back(a);
 749   return GetStringFUTF16(message_id, replacements, NULL);
 750 }
 751
 752 base::string16 GetStringFUTF16(int message_id,
 753                                const base::string16& a,
 754                                const base::string16& b) {
 755   return GetStringFUTF16(message_id, a, b, NULL);
 756 }
 757
 758 base::string16 GetStringFUTF16(int message_id,
 759                                const base::string16& a,
 760                                const base::string16& b,
 761                                const base::string16& c) {
 762   std::vector<base::string16> replacements;
 763   replacements.push_back(a);
 764   replacements.push_back(b);
 765   replacements.push_back(c);
 766   return GetStringFUTF16(message_id, replacements, NULL);
 767 }
 768
 769 base::string16 GetStringFUTF16(int message_id,
 770                                const base::string16& a,
 771                                const base::string16& b,
 772                                const base::string16& c,
 773                                const base::string16& d) {
 774   std::vector<base::string16> replacements;
 775   replacements.push_back(a);
 776   replacements.push_back(b);
 777   replacements.push_back(c);
 778   replacements.push_back(d);
 779   return GetStringFUTF16(message_id, replacements, NULL);
 780 }
 781
 782 base::string16 GetStringFUTF16(int message_id,
 783                                const base::string16& a,
 784                                const base::string16& b,
 785                                const base::string16& c,
 786                                const base::string16& d,
 787                                const base::string16& e) {
 788   std::vector<base::string16> replacements;
 789   replacements.push_back(a);
 790   replacements.push_back(b);
 791   replacements.push_back(c);
 792   replacements.push_back(d);
 793   replacements.push_back(e);
 794   return GetStringFUTF16(message_id, replacements, NULL);
 795 }
 796
 797 base::string16 GetStringFUTF16(int message_id,
 798                                const base::string16& a,
 799                                size_t* offset) {
 800   DCHECK(offset);
 801   std::vector<size_t> offsets;
 802   std::vector<base::string16> replacements;
 803   replacements.push_back(a);
 804   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
 805   DCHECK(offsets.size() == 1);
 806   *offset = offsets[0];
 807   return result;
 808 }
 809
 810 base::string16 GetStringFUTF16(int message_id,
 811                                const base::string16& a,
 812                                const base::string16& b,
 813                                std::vector<size_t>* offsets) {
 814   std::vector<base::string16> replacements;
 815   replacements.push_back(a);
 816   replacements.push_back(b);
 817   return GetStringFUTF16(message_id, replacements, offsets);
 818 }
 819
 820 base::string16 GetStringFUTF16Int(int message_id, int a) {
 821   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
 822 }
 823
 824 base::string16 GetStringFUTF16Int(int message_id, int64 a) {
 825   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
 826 }
 827
 828 // Specialization of operator() method for base::string16 version.
 829 template <>
 830 bool StringComparator<base::string16>::operator()(const base::string16& lhs,
 831                                                   const base::string16& rhs) {
 832   // If we can not get collator instance for specified locale, just do simple
 833   // string compare.
 834   if (!collator_)
 835     return lhs < rhs;
 836   return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
 837       UCOL_LESS;
 838 };
 839
 840 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids,
 841                                int number) {
 842   scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids);
 843   DCHECK(format);
 844
 845   UErrorCode err = U_ZERO_ERROR;
 846   icu::UnicodeString result_files_string = format->format(number, err);
 847   int capacity = result_files_string.length() + 1;
 848   DCHECK_GT(capacity, 1);
 849   base::string16 result;
 850   result_files_string.extract(
 851       static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err);
 852   DCHECK(U_SUCCESS(err));
 853   return result;
 854 }
 855
 856 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids,
 857                                  int number) {
 858   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number));
 859 }
 860
 861 void SortStrings16(const std::string& locale,
 862                    std::vector<base::string16>* strings) {
 863   SortVectorWithStringKey(locale, strings, false);
 864 }
 865
 866 const std::vector<std::string>& GetAvailableLocales() {
 867   return g_available_locales.Get();
 868 }
 869
 870 void GetAcceptLanguagesForLocale(const std::string& display_locale,
 871                                  std::vector<std::string>* locale_codes) {
 872   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
 873     if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
 874                                            display_locale))
 875       // TODO(jungshik) : Put them at the of the list with language codes
 876       // enclosed by brackets instead of skipping.
 877         continue;
 878     locale_codes->push_back(kAcceptLanguageList[i]);
 879   }
 880 }
 881
 882 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
 883   int width = 0;
 884   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
 885   DCHECK_GT(width, 0);
 886   return width;
 887 }
 888
 889 }  // namespace l10n_util