1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
9 #include "base/files/file_path.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/sys_string_conversions.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "third_party/icu/source/common/unicode/locid.h"
16 #include "third_party/icu/source/common/unicode/uchar.h"
17 #include "third_party/icu/source/common/unicode/uscript.h"
18 #include "third_party/icu/source/i18n/unicode/coll.h"
21 #include "base/ios/ios_util.h"
26 // Extract language, country and variant, but ignore keywords. For example,
27 // en-US, ca@valencia, ca-ES@valencia.
28 std::string
GetLocaleString(const icu::Locale
& locale
) {
29 const char* language
= locale
.getLanguage();
30 const char* country
= locale
.getCountry();
31 const char* variant
= locale
.getVariant();
34 (language
!= NULL
&& *language
!= '\0') ? language
: "und";
36 if (country
!= NULL
&& *country
!= '\0') {
41 if (variant
!= NULL
&& *variant
!= '\0')
42 result
+= '@' + base::ToLowerASCII(variant
);
47 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
48 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
49 // http://unicode.org/reports/tr9/ for more information.
50 base::i18n::TextDirection
GetCharacterDirection(UChar32 character
) {
51 // Now that we have the character, we use ICU in order to query for the
52 // appropriate Unicode BiDi character type.
53 int32_t property
= u_getIntPropertyValue(character
, UCHAR_BIDI_CLASS
);
54 if ((property
== U_RIGHT_TO_LEFT
) ||
55 (property
== U_RIGHT_TO_LEFT_ARABIC
) ||
56 (property
== U_RIGHT_TO_LEFT_EMBEDDING
) ||
57 (property
== U_RIGHT_TO_LEFT_OVERRIDE
)) {
58 return base::i18n::RIGHT_TO_LEFT
;
59 } else if ((property
== U_LEFT_TO_RIGHT
) ||
60 (property
== U_LEFT_TO_RIGHT_EMBEDDING
) ||
61 (property
== U_LEFT_TO_RIGHT_OVERRIDE
)) {
62 return base::i18n::LEFT_TO_RIGHT
;
64 return base::i18n::UNKNOWN_DIRECTION
;
72 // Represents the locale-specific ICU text direction.
73 static TextDirection g_icu_text_direction
= UNKNOWN_DIRECTION
;
75 // Convert the ICU default locale to a string.
76 std::string
GetConfiguredLocale() {
77 return GetLocaleString(icu::Locale::getDefault());
80 // Convert the ICU canonicalized locale to a string.
81 std::string
GetCanonicalLocale(const std::string
& locale
) {
82 return GetLocaleString(icu::Locale::createCanonical(locale
.c_str()));
85 // Convert Chrome locale name to ICU locale name
86 std::string
ICULocaleName(const std::string
& locale_string
) {
87 // If not Spanish, just return it.
88 if (locale_string
.substr(0, 2) != "es")
90 // Expand es to es-ES.
91 if (LowerCaseEqualsASCII(locale_string
, "es"))
93 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
94 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
95 // to es-MX (the most populous in Spanish-speaking Latin America).
96 if (LowerCaseEqualsASCII(locale_string
, "es-419")) {
97 const icu::Locale
& locale
= icu::Locale::getDefault();
98 std::string language
= locale
.getLanguage();
99 const char* country
= locale
.getCountry();
100 if (LowerCaseEqualsASCII(language
, "es") &&
101 !LowerCaseEqualsASCII(country
, "es")) {
108 // Currently, Chrome has only "es" and "es-419", but later we may have
109 // more specific "es-RR".
110 return locale_string
;
113 void SetICUDefaultLocale(const std::string
& locale_string
) {
114 icu::Locale
locale(ICULocaleName(locale_string
).c_str());
115 UErrorCode error_code
= U_ZERO_ERROR
;
116 icu::Locale::setDefault(locale
, error_code
);
117 // This return value is actually bogus because Locale object is
118 // an ID and setDefault seems to always succeed (regardless of the
119 // presence of actual locale data). However,
120 // it does not hurt to have it as a sanity check.
121 DCHECK(U_SUCCESS(error_code
));
122 g_icu_text_direction
= UNKNOWN_DIRECTION
;
130 if (g_icu_text_direction
== UNKNOWN_DIRECTION
) {
131 const icu::Locale
& locale
= icu::Locale::getDefault();
132 g_icu_text_direction
= GetTextDirectionForLocaleInStartUp(locale
.getName());
134 return g_icu_text_direction
== RIGHT_TO_LEFT
;
137 TextDirection
GetTextDirectionForLocaleInStartUp(const char* locale_name
) {
138 // On iOS, check for RTL forcing.
140 if (ios::IsInForcedRTL())
141 return RIGHT_TO_LEFT
;
144 // This list needs to be updated in alphabetical order if we add more RTL
146 static const char* kRTLLanguageCodes
[] = {"ar", "fa", "he", "iw", "ur"};
147 std::vector
<StringPiece
> locale_split
=
148 SplitStringPiece(locale_name
, "-_", KEEP_WHITESPACE
, SPLIT_WANT_ALL
);
149 const StringPiece
& language_code
= locale_split
[0];
150 if (std::binary_search(kRTLLanguageCodes
,
151 kRTLLanguageCodes
+ arraysize(kRTLLanguageCodes
),
153 return RIGHT_TO_LEFT
;
154 return LEFT_TO_RIGHT
;
157 TextDirection
GetTextDirectionForLocale(const char* locale_name
) {
158 // On iOS, check for RTL forcing.
160 if (ios::IsInForcedRTL())
161 return RIGHT_TO_LEFT
;
164 UErrorCode status
= U_ZERO_ERROR
;
165 ULayoutType layout_dir
= uloc_getCharacterOrientation(locale_name
, &status
);
166 DCHECK(U_SUCCESS(status
));
167 // Treat anything other than RTL as LTR.
168 return (layout_dir
!= ULOC_LAYOUT_RTL
) ? LEFT_TO_RIGHT
: RIGHT_TO_LEFT
;
171 TextDirection
GetFirstStrongCharacterDirection(const string16
& text
) {
172 const UChar
* string
= text
.c_str();
173 size_t length
= text
.length();
175 while (position
< length
) {
177 size_t next_position
= position
;
178 U16_NEXT(string
, next_position
, length
, character
);
179 TextDirection direction
= GetCharacterDirection(character
);
180 if (direction
!= UNKNOWN_DIRECTION
)
182 position
= next_position
;
184 return LEFT_TO_RIGHT
;
187 TextDirection
GetLastStrongCharacterDirection(const string16
& text
) {
188 const UChar
* string
= text
.c_str();
189 size_t position
= text
.length();
190 while (position
> 0) {
192 size_t prev_position
= position
;
193 U16_PREV(string
, 0, prev_position
, character
);
194 TextDirection direction
= GetCharacterDirection(character
);
195 if (direction
!= UNKNOWN_DIRECTION
)
197 position
= prev_position
;
199 return LEFT_TO_RIGHT
;
202 TextDirection
GetStringDirection(const string16
& text
) {
203 const UChar
* string
= text
.c_str();
204 size_t length
= text
.length();
207 TextDirection
result(UNKNOWN_DIRECTION
);
208 while (position
< length
) {
210 size_t next_position
= position
;
211 U16_NEXT(string
, next_position
, length
, character
);
212 TextDirection direction
= GetCharacterDirection(character
);
213 if (direction
!= UNKNOWN_DIRECTION
) {
214 if (result
!= UNKNOWN_DIRECTION
&& result
!= direction
)
215 return UNKNOWN_DIRECTION
;
218 position
= next_position
;
221 // Handle the case of a string not containing any strong directionality
222 // characters defaulting to LEFT_TO_RIGHT.
223 if (result
== UNKNOWN_DIRECTION
)
224 return LEFT_TO_RIGHT
;
230 bool AdjustStringForLocaleDirection(string16
* text
) {
231 if (!IsRTL() || text
->empty())
234 // Marking the string as LTR if the locale is RTL and the string does not
235 // contain strong RTL characters. Otherwise, mark the string as RTL.
236 bool has_rtl_chars
= StringContainsStrongRTLChars(*text
);
238 WrapStringWithLTRFormatting(text
);
240 WrapStringWithRTLFormatting(text
);
245 bool UnadjustStringForLocaleDirection(string16
* text
) {
246 if (!IsRTL() || text
->empty())
249 *text
= StripWrappingBidiControlCharacters(*text
);
253 bool AdjustStringForLocaleDirection(string16
* text
) {
254 // On OS X & GTK the directionality of a label is determined by the first
255 // strongly directional character.
256 // However, we want to make sure that in an LTR-language-UI all strings are
257 // left aligned and vice versa.
258 // A problem can arise if we display a string which starts with user input.
259 // User input may be of the opposite directionality to the UI. So the whole
260 // string will be displayed in the opposite directionality, e.g. if we want to
261 // display in an LTR UI [such as US English]:
263 // EMAN_NOISNETXE is now installed.
265 // Since EXTENSION_NAME begins with a strong RTL char, the label's
266 // directionality will be set to RTL and the string will be displayed visually
269 // .is now installed EMAN_NOISNETXE
271 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
272 // strongly directional LTR char.
273 // We also append an LRM at the end, which ensures that we're in an LTR
276 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
277 // box so there is no issue with displaying zero-width bidi control characters
278 // on any system. Thus no need for the !IsRTL() check here.
282 bool ui_direction_is_rtl
= IsRTL();
284 bool has_rtl_chars
= StringContainsStrongRTLChars(*text
);
285 if (!ui_direction_is_rtl
&& has_rtl_chars
) {
286 WrapStringWithRTLFormatting(text
);
287 text
->insert(static_cast<size_t>(0), static_cast<size_t>(1),
289 text
->push_back(kLeftToRightMark
);
290 } else if (ui_direction_is_rtl
&& has_rtl_chars
) {
291 WrapStringWithRTLFormatting(text
);
292 text
->insert(static_cast<size_t>(0), static_cast<size_t>(1),
294 text
->push_back(kRightToLeftMark
);
295 } else if (ui_direction_is_rtl
) {
296 WrapStringWithLTRFormatting(text
);
297 text
->insert(static_cast<size_t>(0), static_cast<size_t>(1),
299 text
->push_back(kRightToLeftMark
);
307 bool UnadjustStringForLocaleDirection(string16
* text
) {
311 size_t begin_index
= 0;
312 char16 begin
= text
->at(begin_index
);
313 if (begin
== kLeftToRightMark
||
314 begin
== kRightToLeftMark
) {
318 size_t end_index
= text
->length() - 1;
319 char16 end
= text
->at(end_index
);
320 if (end
== kLeftToRightMark
||
321 end
== kRightToLeftMark
) {
325 string16 unmarked_text
=
326 text
->substr(begin_index
, end_index
- begin_index
+ 1);
327 *text
= StripWrappingBidiControlCharacters(unmarked_text
);
333 bool StringContainsStrongRTLChars(const string16
& text
) {
334 const UChar
* string
= text
.c_str();
335 size_t length
= text
.length();
337 while (position
< length
) {
339 size_t next_position
= position
;
340 U16_NEXT(string
, next_position
, length
, character
);
342 // Now that we have the character, we use ICU in order to query for the
343 // appropriate Unicode BiDi character type.
344 int32_t property
= u_getIntPropertyValue(character
, UCHAR_BIDI_CLASS
);
345 if ((property
== U_RIGHT_TO_LEFT
) || (property
== U_RIGHT_TO_LEFT_ARABIC
))
348 position
= next_position
;
354 void WrapStringWithLTRFormatting(string16
* text
) {
358 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
359 text
->insert(static_cast<size_t>(0), static_cast<size_t>(1),
360 kLeftToRightEmbeddingMark
);
362 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
363 text
->push_back(kPopDirectionalFormatting
);
366 void WrapStringWithRTLFormatting(string16
* text
) {
370 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
371 text
->insert(static_cast<size_t>(0), static_cast<size_t>(1),
372 kRightToLeftEmbeddingMark
);
374 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
375 text
->push_back(kPopDirectionalFormatting
);
378 void WrapPathWithLTRFormatting(const FilePath
& path
,
379 string16
* rtl_safe_path
) {
380 // Wrap the overall path with LRE-PDF pair which essentialy marks the
381 // string as a Left-To-Right string.
382 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
383 rtl_safe_path
->push_back(kLeftToRightEmbeddingMark
);
384 #if defined(OS_MACOSX)
385 rtl_safe_path
->append(UTF8ToUTF16(path
.value()));
386 #elif defined(OS_WIN)
387 rtl_safe_path
->append(path
.value());
388 #else // defined(OS_POSIX) && !defined(OS_MACOSX)
389 std::wstring wide_path
= base::SysNativeMBToWide(path
.value());
390 rtl_safe_path
->append(WideToUTF16(wide_path
));
392 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
393 rtl_safe_path
->push_back(kPopDirectionalFormatting
);
396 string16
GetDisplayStringInLTRDirectionality(const string16
& text
) {
397 // Always wrap the string in RTL UI (it may be appended to RTL string).
398 // Also wrap strings with an RTL first strong character direction in LTR UI.
399 if (IsRTL() || GetFirstStrongCharacterDirection(text
) == RIGHT_TO_LEFT
) {
400 string16
text_mutable(text
);
401 WrapStringWithLTRFormatting(&text_mutable
);
407 string16
StripWrappingBidiControlCharacters(const string16
& text
) {
410 size_t begin_index
= 0;
411 char16 begin
= text
[begin_index
];
412 if (begin
== kLeftToRightEmbeddingMark
||
413 begin
== kRightToLeftEmbeddingMark
||
414 begin
== kLeftToRightOverride
||
415 begin
== kRightToLeftOverride
)
417 size_t end_index
= text
.length() - 1;
418 if (text
[end_index
] == kPopDirectionalFormatting
)
420 return text
.substr(begin_index
, end_index
- begin_index
+ 1);