rAc - revert invalid suggestions to edit mode
[chromium-blink-merge.git] / base / i18n / rtl.cc
blob851b03642defb3ce43742b1a74405e8f05362cd5
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
7 #include "base/files/file_path.h"
8 #include "base/logging.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/sys_string_conversions.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "third_party/icu/source/common/unicode/locid.h"
13 #include "third_party/icu/source/common/unicode/uchar.h"
14 #include "third_party/icu/source/common/unicode/uscript.h"
15 #include "third_party/icu/source/i18n/unicode/coll.h"
17 #if defined(TOOLKIT_GTK)
18 #include <gtk/gtk.h>
19 #endif
21 namespace {
23 // Extract language, country and variant, but ignore keywords. For example,
24 // en-US, ca@valencia, ca-ES@valencia.
25 std::string GetLocaleString(const icu::Locale& locale) {
26 const char* language = locale.getLanguage();
27 const char* country = locale.getCountry();
28 const char* variant = locale.getVariant();
30 std::string result =
31 (language != NULL && *language != '\0') ? language : "und";
33 if (country != NULL && *country != '\0') {
34 result += '-';
35 result += country;
38 if (variant != NULL && *variant != '\0') {
39 std::string variant_str(variant);
40 StringToLowerASCII(&variant_str);
41 result += '@' + variant_str;
44 return result;
47 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
48 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
49 // http://unicode.org/reports/tr9/ for more information.
50 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
51 // Now that we have the character, we use ICU in order to query for the
52 // appropriate Unicode BiDi character type.
53 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
54 if ((property == U_RIGHT_TO_LEFT) ||
55 (property == U_RIGHT_TO_LEFT_ARABIC) ||
56 (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
57 (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
58 return base::i18n::RIGHT_TO_LEFT;
59 } else if ((property == U_LEFT_TO_RIGHT) ||
60 (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
61 (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
62 return base::i18n::LEFT_TO_RIGHT;
64 return base::i18n::UNKNOWN_DIRECTION;
67 } // namespace
69 namespace base {
70 namespace i18n {
72 // Represents the locale-specific ICU text direction.
73 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
75 // Convert the ICU default locale to a string.
76 std::string GetConfiguredLocale() {
77 return GetLocaleString(icu::Locale::getDefault());
80 // Convert the ICU canonicalized locale to a string.
81 std::string GetCanonicalLocale(const char* locale) {
82 return GetLocaleString(icu::Locale::createCanonical(locale));
85 // Convert Chrome locale name to ICU locale name
86 std::string ICULocaleName(const std::string& locale_string) {
87 // If not Spanish, just return it.
88 if (locale_string.substr(0, 2) != "es")
89 return locale_string;
90 // Expand es to es-ES.
91 if (LowerCaseEqualsASCII(locale_string, "es"))
92 return "es-ES";
93 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
94 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
95 // to es-MX (the most populous in Spanish-speaking Latin America).
96 if (LowerCaseEqualsASCII(locale_string, "es-419")) {
97 const icu::Locale& locale = icu::Locale::getDefault();
98 std::string language = locale.getLanguage();
99 const char* country = locale.getCountry();
100 if (LowerCaseEqualsASCII(language, "es") &&
101 !LowerCaseEqualsASCII(country, "es")) {
102 language += '-';
103 language += country;
104 return language;
106 return "es-MX";
108 // Currently, Chrome has only "es" and "es-419", but later we may have
109 // more specific "es-RR".
110 return locale_string;
113 void SetICUDefaultLocale(const std::string& locale_string) {
114 icu::Locale locale(ICULocaleName(locale_string).c_str());
115 UErrorCode error_code = U_ZERO_ERROR;
116 icu::Locale::setDefault(locale, error_code);
117 // This return value is actually bogus because Locale object is
118 // an ID and setDefault seems to always succeed (regardless of the
119 // presence of actual locale data). However,
120 // it does not hurt to have it as a sanity check.
121 DCHECK(U_SUCCESS(error_code));
122 g_icu_text_direction = UNKNOWN_DIRECTION;
125 bool IsRTL() {
126 #if defined(TOOLKIT_GTK)
127 GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
128 return gtk_dir == GTK_TEXT_DIR_RTL;
129 #else
130 return ICUIsRTL();
131 #endif
134 bool ICUIsRTL() {
135 if (g_icu_text_direction == UNKNOWN_DIRECTION) {
136 const icu::Locale& locale = icu::Locale::getDefault();
137 g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
139 return g_icu_text_direction == RIGHT_TO_LEFT;
142 TextDirection GetTextDirectionForLocale(const char* locale_name) {
143 UErrorCode status = U_ZERO_ERROR;
144 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
145 DCHECK(U_SUCCESS(status));
146 // Treat anything other than RTL as LTR.
147 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
150 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
151 const UChar* string = text.c_str();
152 size_t length = text.length();
153 size_t position = 0;
154 while (position < length) {
155 UChar32 character;
156 size_t next_position = position;
157 U16_NEXT(string, next_position, length, character);
158 TextDirection direction = GetCharacterDirection(character);
159 if (direction != UNKNOWN_DIRECTION)
160 return direction;
161 position = next_position;
163 return LEFT_TO_RIGHT;
166 TextDirection GetLastStrongCharacterDirection(const string16& text) {
167 const UChar* string = text.c_str();
168 size_t position = text.length();
169 while (position > 0) {
170 UChar32 character;
171 size_t prev_position = position;
172 U16_PREV(string, 0, prev_position, character);
173 TextDirection direction = GetCharacterDirection(character);
174 if (direction != UNKNOWN_DIRECTION)
175 return direction;
176 position = prev_position;
178 return LEFT_TO_RIGHT;
181 TextDirection GetStringDirection(const string16& text) {
182 const UChar* string = text.c_str();
183 size_t length = text.length();
184 size_t position = 0;
186 TextDirection result(UNKNOWN_DIRECTION);
187 while (position < length) {
188 UChar32 character;
189 size_t next_position = position;
190 U16_NEXT(string, next_position, length, character);
191 TextDirection direction = GetCharacterDirection(character);
192 if (direction != UNKNOWN_DIRECTION) {
193 if (result != UNKNOWN_DIRECTION && result != direction)
194 return UNKNOWN_DIRECTION;
195 result = direction;
197 position = next_position;
200 // Handle the case of a string not containing any strong directionality
201 // characters defaulting to LEFT_TO_RIGHT.
202 if (result == UNKNOWN_DIRECTION)
203 return LEFT_TO_RIGHT;
205 return result;
208 #if defined(OS_WIN)
209 bool AdjustStringForLocaleDirection(string16* text) {
210 if (!IsRTL() || text->empty())
211 return false;
213 // Marking the string as LTR if the locale is RTL and the string does not
214 // contain strong RTL characters. Otherwise, mark the string as RTL.
215 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
216 if (!has_rtl_chars)
217 WrapStringWithLTRFormatting(text);
218 else
219 WrapStringWithRTLFormatting(text);
221 return true;
224 bool UnadjustStringForLocaleDirection(string16* text) {
225 if (!IsRTL() || text->empty())
226 return false;
228 *text = StripWrappingBidiControlCharacters(*text);
229 return true;
231 #else
232 bool AdjustStringForLocaleDirection(string16* text) {
233 // On OS X & GTK the directionality of a label is determined by the first
234 // strongly directional character.
235 // However, we want to make sure that in an LTR-language-UI all strings are
236 // left aligned and vice versa.
237 // A problem can arise if we display a string which starts with user input.
238 // User input may be of the opposite directionality to the UI. So the whole
239 // string will be displayed in the opposite directionality, e.g. if we want to
240 // display in an LTR UI [such as US English]:
242 // EMAN_NOISNETXE is now installed.
244 // Since EXTENSION_NAME begins with a strong RTL char, the label's
245 // directionality will be set to RTL and the string will be displayed visually
246 // as:
248 // .is now installed EMAN_NOISNETXE
250 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
251 // strongly directional LTR char.
252 // We also append an LRM at the end, which ensures that we're in an LTR
253 // context.
255 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
256 // box so there is no issue with displaying zero-width bidi control characters
257 // on any system. Thus no need for the !IsRTL() check here.
258 if (text->empty())
259 return false;
261 bool ui_direction_is_rtl = IsRTL();
263 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
264 if (!ui_direction_is_rtl && has_rtl_chars) {
265 WrapStringWithRTLFormatting(text);
266 text->insert(0U, 1U, kLeftToRightMark);
267 text->push_back(kLeftToRightMark);
268 } else if (ui_direction_is_rtl && has_rtl_chars) {
269 WrapStringWithRTLFormatting(text);
270 text->insert(0U, 1U, kRightToLeftMark);
271 text->push_back(kRightToLeftMark);
272 } else if (ui_direction_is_rtl) {
273 WrapStringWithLTRFormatting(text);
274 text->insert(0U, 1U, kRightToLeftMark);
275 text->push_back(kRightToLeftMark);
276 } else {
277 return false;
280 return true;
283 bool UnadjustStringForLocaleDirection(string16* text) {
284 if (text->empty())
285 return false;
287 size_t begin_index = 0;
288 char16 begin = text->at(begin_index);
289 if (begin == kLeftToRightMark ||
290 begin == kRightToLeftMark) {
291 ++begin_index;
294 size_t end_index = text->length() - 1;
295 char16 end = text->at(end_index);
296 if (end == kLeftToRightMark ||
297 end == kRightToLeftMark) {
298 --end_index;
301 string16 unmarked_text =
302 text->substr(begin_index, end_index - begin_index + 1);
303 *text = StripWrappingBidiControlCharacters(unmarked_text);
304 return true;
307 #endif // !OS_WIN
309 bool StringContainsStrongRTLChars(const string16& text) {
310 const UChar* string = text.c_str();
311 size_t length = text.length();
312 size_t position = 0;
313 while (position < length) {
314 UChar32 character;
315 size_t next_position = position;
316 U16_NEXT(string, next_position, length, character);
318 // Now that we have the character, we use ICU in order to query for the
319 // appropriate Unicode BiDi character type.
320 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
321 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
322 return true;
324 position = next_position;
327 return false;
330 void WrapStringWithLTRFormatting(string16* text) {
331 if (text->empty())
332 return;
334 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
335 text->insert(0U, 1U, kLeftToRightEmbeddingMark);
337 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
338 text->push_back(kPopDirectionalFormatting);
341 void WrapStringWithRTLFormatting(string16* text) {
342 if (text->empty())
343 return;
345 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
346 text->insert(0U, 1U, kRightToLeftEmbeddingMark);
348 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
349 text->push_back(kPopDirectionalFormatting);
352 void WrapPathWithLTRFormatting(const FilePath& path,
353 string16* rtl_safe_path) {
354 // Wrap the overall path with LRE-PDF pair which essentialy marks the
355 // string as a Left-To-Right string.
356 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
357 rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
358 #if defined(OS_MACOSX)
359 rtl_safe_path->append(UTF8ToUTF16(path.value()));
360 #elif defined(OS_WIN)
361 rtl_safe_path->append(path.value());
362 #else // defined(OS_POSIX) && !defined(OS_MACOSX)
363 std::wstring wide_path = base::SysNativeMBToWide(path.value());
364 rtl_safe_path->append(WideToUTF16(wide_path));
365 #endif
366 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
367 rtl_safe_path->push_back(kPopDirectionalFormatting);
370 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
371 // Always wrap the string in RTL UI (it may be appended to RTL string).
372 // Also wrap strings with an RTL first strong character direction in LTR UI.
373 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
374 string16 text_mutable(text);
375 WrapStringWithLTRFormatting(&text_mutable);
376 return text_mutable;
378 return text;
381 string16 StripWrappingBidiControlCharacters(const string16& text) {
382 if (text.empty())
383 return text;
384 size_t begin_index = 0;
385 char16 begin = text[begin_index];
386 if (begin == kLeftToRightEmbeddingMark ||
387 begin == kRightToLeftEmbeddingMark ||
388 begin == kLeftToRightOverride ||
389 begin == kRightToLeftOverride)
390 ++begin_index;
391 size_t end_index = text.length() - 1;
392 if (text[end_index] == kPopDirectionalFormatting)
393 --end_index;
394 return text.substr(begin_index, end_index - begin_index + 1);
397 } // namespace i18n
398 } // namespace base