Add ICU message format support
[chromium-blink-merge.git] / base / i18n / rtl.cc
blob96ef9aaecd257b16b9e8054e90c5421776a5e8c8
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
7 #include "base/files/file_path.h"
8 #include "base/logging.h"
9 #include "base/metrics/field_trial.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "third_party/icu/source/common/unicode/locid.h"
14 #include "third_party/icu/source/common/unicode/uchar.h"
15 #include "third_party/icu/source/common/unicode/uscript.h"
16 #include "third_party/icu/source/i18n/unicode/coll.h"
18 namespace {
20 // Extract language, country and variant, but ignore keywords. For example,
21 // en-US, ca@valencia, ca-ES@valencia.
22 std::string GetLocaleString(const icu::Locale& locale) {
23 const char* language = locale.getLanguage();
24 const char* country = locale.getCountry();
25 const char* variant = locale.getVariant();
27 std::string result =
28 (language != NULL && *language != '\0') ? language : "und";
30 if (country != NULL && *country != '\0') {
31 result += '-';
32 result += country;
35 if (variant != NULL && *variant != '\0') {
36 std::string variant_str(variant);
37 base::StringToLowerASCII(&variant_str);
38 result += '@' + variant_str;
41 return result;
44 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
45 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
46 // http://unicode.org/reports/tr9/ for more information.
47 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
48 // Now that we have the character, we use ICU in order to query for the
49 // appropriate Unicode BiDi character type.
50 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
51 if ((property == U_RIGHT_TO_LEFT) ||
52 (property == U_RIGHT_TO_LEFT_ARABIC) ||
53 (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
54 (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
55 return base::i18n::RIGHT_TO_LEFT;
56 } else if ((property == U_LEFT_TO_RIGHT) ||
57 (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
58 (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
59 return base::i18n::LEFT_TO_RIGHT;
61 return base::i18n::UNKNOWN_DIRECTION;
64 } // namespace
66 namespace base {
67 namespace i18n {
69 // Represents the locale-specific ICU text direction.
70 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
72 // Convert the ICU default locale to a string.
73 std::string GetConfiguredLocale() {
74 return GetLocaleString(icu::Locale::getDefault());
77 // Convert the ICU canonicalized locale to a string.
78 std::string GetCanonicalLocale(const std::string& locale) {
79 return GetLocaleString(icu::Locale::createCanonical(locale.c_str()));
82 // Convert Chrome locale name to ICU locale name
83 std::string ICULocaleName(const std::string& locale_string) {
84 // If not Spanish, just return it.
85 if (locale_string.substr(0, 2) != "es")
86 return locale_string;
87 // Expand es to es-ES.
88 if (LowerCaseEqualsASCII(locale_string, "es"))
89 return "es-ES";
90 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
91 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
92 // to es-MX (the most populous in Spanish-speaking Latin America).
93 if (LowerCaseEqualsASCII(locale_string, "es-419")) {
94 const icu::Locale& locale = icu::Locale::getDefault();
95 std::string language = locale.getLanguage();
96 const char* country = locale.getCountry();
97 if (LowerCaseEqualsASCII(language, "es") &&
98 !LowerCaseEqualsASCII(country, "es")) {
99 language += '-';
100 language += country;
101 return language;
103 return "es-MX";
105 // Currently, Chrome has only "es" and "es-419", but later we may have
106 // more specific "es-RR".
107 return locale_string;
110 void SetICUDefaultLocale(const std::string& locale_string) {
111 icu::Locale locale(ICULocaleName(locale_string).c_str());
112 UErrorCode error_code = U_ZERO_ERROR;
113 icu::Locale::setDefault(locale, error_code);
114 // This return value is actually bogus because Locale object is
115 // an ID and setDefault seems to always succeed (regardless of the
116 // presence of actual locale data). However,
117 // it does not hurt to have it as a sanity check.
118 DCHECK(U_SUCCESS(error_code));
119 g_icu_text_direction = UNKNOWN_DIRECTION;
122 bool IsRTL() {
123 return ICUIsRTL();
126 bool ICUIsRTL() {
127 if (g_icu_text_direction == UNKNOWN_DIRECTION) {
128 const icu::Locale& locale = icu::Locale::getDefault();
129 g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
131 return g_icu_text_direction == RIGHT_TO_LEFT;
134 TextDirection GetTextDirectionForLocale(const char* locale_name) {
135 const std::string group_name =
136 FieldTrialList::FindFullName("LightSpeed");
137 // StartsWith allows flexibility for this experiment to apply to multiple
138 // group names. To start, this will apply to AvoidMMapOnStartup.
139 if (StartsWith(group_name, "AvoidMMap", CompareCase::SENSITIVE)) {
140 static const char kEnglishLocale[] = "en_";
141 if (StartsWith(locale_name, kEnglishLocale, CompareCase::SENSITIVE))
142 return LEFT_TO_RIGHT;
144 UErrorCode status = U_ZERO_ERROR;
145 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
146 DCHECK(U_SUCCESS(status));
147 // Treat anything other than RTL as LTR.
148 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
151 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
152 const UChar* string = text.c_str();
153 size_t length = text.length();
154 size_t position = 0;
155 while (position < length) {
156 UChar32 character;
157 size_t next_position = position;
158 U16_NEXT(string, next_position, length, character);
159 TextDirection direction = GetCharacterDirection(character);
160 if (direction != UNKNOWN_DIRECTION)
161 return direction;
162 position = next_position;
164 return LEFT_TO_RIGHT;
167 TextDirection GetLastStrongCharacterDirection(const string16& text) {
168 const UChar* string = text.c_str();
169 size_t position = text.length();
170 while (position > 0) {
171 UChar32 character;
172 size_t prev_position = position;
173 U16_PREV(string, 0, prev_position, character);
174 TextDirection direction = GetCharacterDirection(character);
175 if (direction != UNKNOWN_DIRECTION)
176 return direction;
177 position = prev_position;
179 return LEFT_TO_RIGHT;
182 TextDirection GetStringDirection(const string16& text) {
183 const UChar* string = text.c_str();
184 size_t length = text.length();
185 size_t position = 0;
187 TextDirection result(UNKNOWN_DIRECTION);
188 while (position < length) {
189 UChar32 character;
190 size_t next_position = position;
191 U16_NEXT(string, next_position, length, character);
192 TextDirection direction = GetCharacterDirection(character);
193 if (direction != UNKNOWN_DIRECTION) {
194 if (result != UNKNOWN_DIRECTION && result != direction)
195 return UNKNOWN_DIRECTION;
196 result = direction;
198 position = next_position;
201 // Handle the case of a string not containing any strong directionality
202 // characters defaulting to LEFT_TO_RIGHT.
203 if (result == UNKNOWN_DIRECTION)
204 return LEFT_TO_RIGHT;
206 return result;
209 #if defined(OS_WIN)
210 bool AdjustStringForLocaleDirection(string16* text) {
211 if (!IsRTL() || text->empty())
212 return false;
214 // Marking the string as LTR if the locale is RTL and the string does not
215 // contain strong RTL characters. Otherwise, mark the string as RTL.
216 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
217 if (!has_rtl_chars)
218 WrapStringWithLTRFormatting(text);
219 else
220 WrapStringWithRTLFormatting(text);
222 return true;
225 bool UnadjustStringForLocaleDirection(string16* text) {
226 if (!IsRTL() || text->empty())
227 return false;
229 *text = StripWrappingBidiControlCharacters(*text);
230 return true;
232 #else
233 bool AdjustStringForLocaleDirection(string16* text) {
234 // On OS X & GTK the directionality of a label is determined by the first
235 // strongly directional character.
236 // However, we want to make sure that in an LTR-language-UI all strings are
237 // left aligned and vice versa.
238 // A problem can arise if we display a string which starts with user input.
239 // User input may be of the opposite directionality to the UI. So the whole
240 // string will be displayed in the opposite directionality, e.g. if we want to
241 // display in an LTR UI [such as US English]:
243 // EMAN_NOISNETXE is now installed.
245 // Since EXTENSION_NAME begins with a strong RTL char, the label's
246 // directionality will be set to RTL and the string will be displayed visually
247 // as:
249 // .is now installed EMAN_NOISNETXE
251 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
252 // strongly directional LTR char.
253 // We also append an LRM at the end, which ensures that we're in an LTR
254 // context.
256 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
257 // box so there is no issue with displaying zero-width bidi control characters
258 // on any system. Thus no need for the !IsRTL() check here.
259 if (text->empty())
260 return false;
262 bool ui_direction_is_rtl = IsRTL();
264 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
265 if (!ui_direction_is_rtl && has_rtl_chars) {
266 WrapStringWithRTLFormatting(text);
267 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
268 kLeftToRightMark);
269 text->push_back(kLeftToRightMark);
270 } else if (ui_direction_is_rtl && has_rtl_chars) {
271 WrapStringWithRTLFormatting(text);
272 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
273 kRightToLeftMark);
274 text->push_back(kRightToLeftMark);
275 } else if (ui_direction_is_rtl) {
276 WrapStringWithLTRFormatting(text);
277 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
278 kRightToLeftMark);
279 text->push_back(kRightToLeftMark);
280 } else {
281 return false;
284 return true;
287 bool UnadjustStringForLocaleDirection(string16* text) {
288 if (text->empty())
289 return false;
291 size_t begin_index = 0;
292 char16 begin = text->at(begin_index);
293 if (begin == kLeftToRightMark ||
294 begin == kRightToLeftMark) {
295 ++begin_index;
298 size_t end_index = text->length() - 1;
299 char16 end = text->at(end_index);
300 if (end == kLeftToRightMark ||
301 end == kRightToLeftMark) {
302 --end_index;
305 string16 unmarked_text =
306 text->substr(begin_index, end_index - begin_index + 1);
307 *text = StripWrappingBidiControlCharacters(unmarked_text);
308 return true;
311 #endif // !OS_WIN
313 bool StringContainsStrongRTLChars(const string16& text) {
314 const UChar* string = text.c_str();
315 size_t length = text.length();
316 size_t position = 0;
317 while (position < length) {
318 UChar32 character;
319 size_t next_position = position;
320 U16_NEXT(string, next_position, length, character);
322 // Now that we have the character, we use ICU in order to query for the
323 // appropriate Unicode BiDi character type.
324 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
325 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
326 return true;
328 position = next_position;
331 return false;
334 void WrapStringWithLTRFormatting(string16* text) {
335 if (text->empty())
336 return;
338 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
339 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
340 kLeftToRightEmbeddingMark);
342 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
343 text->push_back(kPopDirectionalFormatting);
346 void WrapStringWithRTLFormatting(string16* text) {
347 if (text->empty())
348 return;
350 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
351 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
352 kRightToLeftEmbeddingMark);
354 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
355 text->push_back(kPopDirectionalFormatting);
358 void WrapPathWithLTRFormatting(const FilePath& path,
359 string16* rtl_safe_path) {
360 // Wrap the overall path with LRE-PDF pair which essentialy marks the
361 // string as a Left-To-Right string.
362 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
363 rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
364 #if defined(OS_MACOSX)
365 rtl_safe_path->append(UTF8ToUTF16(path.value()));
366 #elif defined(OS_WIN)
367 rtl_safe_path->append(path.value());
368 #else // defined(OS_POSIX) && !defined(OS_MACOSX)
369 std::wstring wide_path = base::SysNativeMBToWide(path.value());
370 rtl_safe_path->append(WideToUTF16(wide_path));
371 #endif
372 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
373 rtl_safe_path->push_back(kPopDirectionalFormatting);
376 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
377 // Always wrap the string in RTL UI (it may be appended to RTL string).
378 // Also wrap strings with an RTL first strong character direction in LTR UI.
379 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
380 string16 text_mutable(text);
381 WrapStringWithLTRFormatting(&text_mutable);
382 return text_mutable;
384 return text;
387 string16 StripWrappingBidiControlCharacters(const string16& text) {
388 if (text.empty())
389 return text;
390 size_t begin_index = 0;
391 char16 begin = text[begin_index];
392 if (begin == kLeftToRightEmbeddingMark ||
393 begin == kRightToLeftEmbeddingMark ||
394 begin == kLeftToRightOverride ||
395 begin == kRightToLeftOverride)
396 ++begin_index;
397 size_t end_index = text.length() - 1;
398 if (text[end_index] == kPopDirectionalFormatting)
399 --end_index;
400 return text.substr(begin_index, end_index - begin_index + 1);
403 } // namespace i18n
404 } // namespace base