Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / base / i18n / rtl.cc
blob1cccae289375f92495963cee7bbc9341167a8a48
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
7 #include "base/files/file_path.h"
8 #include "base/logging.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/sys_string_conversions.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "third_party/icu/source/common/unicode/locid.h"
13 #include "third_party/icu/source/common/unicode/uchar.h"
14 #include "third_party/icu/source/common/unicode/uscript.h"
15 #include "third_party/icu/source/i18n/unicode/coll.h"
17 namespace {
19 // Extract language, country and variant, but ignore keywords. For example,
20 // en-US, ca@valencia, ca-ES@valencia.
21 std::string GetLocaleString(const icu::Locale& locale) {
22 const char* language = locale.getLanguage();
23 const char* country = locale.getCountry();
24 const char* variant = locale.getVariant();
26 std::string result =
27 (language != NULL && *language != '\0') ? language : "und";
29 if (country != NULL && *country != '\0') {
30 result += '-';
31 result += country;
34 if (variant != NULL && *variant != '\0') {
35 std::string variant_str(variant);
36 base::StringToLowerASCII(&variant_str);
37 result += '@' + variant_str;
40 return result;
43 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
44 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
45 // http://unicode.org/reports/tr9/ for more information.
46 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
47 // Now that we have the character, we use ICU in order to query for the
48 // appropriate Unicode BiDi character type.
49 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
50 if ((property == U_RIGHT_TO_LEFT) ||
51 (property == U_RIGHT_TO_LEFT_ARABIC) ||
52 (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
53 (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
54 return base::i18n::RIGHT_TO_LEFT;
55 } else if ((property == U_LEFT_TO_RIGHT) ||
56 (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
57 (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
58 return base::i18n::LEFT_TO_RIGHT;
60 return base::i18n::UNKNOWN_DIRECTION;
63 } // namespace
65 namespace base {
66 namespace i18n {
68 // Represents the locale-specific ICU text direction.
69 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
71 // Convert the ICU default locale to a string.
72 std::string GetConfiguredLocale() {
73 return GetLocaleString(icu::Locale::getDefault());
76 // Convert the ICU canonicalized locale to a string.
77 std::string GetCanonicalLocale(const std::string& locale) {
78 return GetLocaleString(icu::Locale::createCanonical(locale.c_str()));
81 // Convert Chrome locale name to ICU locale name
82 std::string ICULocaleName(const std::string& locale_string) {
83 // If not Spanish, just return it.
84 if (locale_string.substr(0, 2) != "es")
85 return locale_string;
86 // Expand es to es-ES.
87 if (LowerCaseEqualsASCII(locale_string, "es"))
88 return "es-ES";
89 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
90 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
91 // to es-MX (the most populous in Spanish-speaking Latin America).
92 if (LowerCaseEqualsASCII(locale_string, "es-419")) {
93 const icu::Locale& locale = icu::Locale::getDefault();
94 std::string language = locale.getLanguage();
95 const char* country = locale.getCountry();
96 if (LowerCaseEqualsASCII(language, "es") &&
97 !LowerCaseEqualsASCII(country, "es")) {
98 language += '-';
99 language += country;
100 return language;
102 return "es-MX";
104 // Currently, Chrome has only "es" and "es-419", but later we may have
105 // more specific "es-RR".
106 return locale_string;
109 void SetICUDefaultLocale(const std::string& locale_string) {
110 icu::Locale locale(ICULocaleName(locale_string).c_str());
111 UErrorCode error_code = U_ZERO_ERROR;
112 icu::Locale::setDefault(locale, error_code);
113 // This return value is actually bogus because Locale object is
114 // an ID and setDefault seems to always succeed (regardless of the
115 // presence of actual locale data). However,
116 // it does not hurt to have it as a sanity check.
117 DCHECK(U_SUCCESS(error_code));
118 g_icu_text_direction = UNKNOWN_DIRECTION;
121 bool IsRTL() {
122 return ICUIsRTL();
125 bool ICUIsRTL() {
126 if (g_icu_text_direction == UNKNOWN_DIRECTION) {
127 const icu::Locale& locale = icu::Locale::getDefault();
128 g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
130 return g_icu_text_direction == RIGHT_TO_LEFT;
133 TextDirection GetTextDirectionForLocale(const char* locale_name) {
134 UErrorCode status = U_ZERO_ERROR;
135 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
136 DCHECK(U_SUCCESS(status));
137 // Treat anything other than RTL as LTR.
138 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
141 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
142 const UChar* string = text.c_str();
143 size_t length = text.length();
144 size_t position = 0;
145 while (position < length) {
146 UChar32 character;
147 size_t next_position = position;
148 U16_NEXT(string, next_position, length, character);
149 TextDirection direction = GetCharacterDirection(character);
150 if (direction != UNKNOWN_DIRECTION)
151 return direction;
152 position = next_position;
154 return LEFT_TO_RIGHT;
157 TextDirection GetLastStrongCharacterDirection(const string16& text) {
158 const UChar* string = text.c_str();
159 size_t position = text.length();
160 while (position > 0) {
161 UChar32 character;
162 size_t prev_position = position;
163 U16_PREV(string, 0, prev_position, character);
164 TextDirection direction = GetCharacterDirection(character);
165 if (direction != UNKNOWN_DIRECTION)
166 return direction;
167 position = prev_position;
169 return LEFT_TO_RIGHT;
172 TextDirection GetStringDirection(const string16& text) {
173 const UChar* string = text.c_str();
174 size_t length = text.length();
175 size_t position = 0;
177 TextDirection result(UNKNOWN_DIRECTION);
178 while (position < length) {
179 UChar32 character;
180 size_t next_position = position;
181 U16_NEXT(string, next_position, length, character);
182 TextDirection direction = GetCharacterDirection(character);
183 if (direction != UNKNOWN_DIRECTION) {
184 if (result != UNKNOWN_DIRECTION && result != direction)
185 return UNKNOWN_DIRECTION;
186 result = direction;
188 position = next_position;
191 // Handle the case of a string not containing any strong directionality
192 // characters defaulting to LEFT_TO_RIGHT.
193 if (result == UNKNOWN_DIRECTION)
194 return LEFT_TO_RIGHT;
196 return result;
199 #if defined(OS_WIN)
200 bool AdjustStringForLocaleDirection(string16* text) {
201 if (!IsRTL() || text->empty())
202 return false;
204 // Marking the string as LTR if the locale is RTL and the string does not
205 // contain strong RTL characters. Otherwise, mark the string as RTL.
206 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
207 if (!has_rtl_chars)
208 WrapStringWithLTRFormatting(text);
209 else
210 WrapStringWithRTLFormatting(text);
212 return true;
215 bool UnadjustStringForLocaleDirection(string16* text) {
216 if (!IsRTL() || text->empty())
217 return false;
219 *text = StripWrappingBidiControlCharacters(*text);
220 return true;
222 #else
223 bool AdjustStringForLocaleDirection(string16* text) {
224 // On OS X & GTK the directionality of a label is determined by the first
225 // strongly directional character.
226 // However, we want to make sure that in an LTR-language-UI all strings are
227 // left aligned and vice versa.
228 // A problem can arise if we display a string which starts with user input.
229 // User input may be of the opposite directionality to the UI. So the whole
230 // string will be displayed in the opposite directionality, e.g. if we want to
231 // display in an LTR UI [such as US English]:
233 // EMAN_NOISNETXE is now installed.
235 // Since EXTENSION_NAME begins with a strong RTL char, the label's
236 // directionality will be set to RTL and the string will be displayed visually
237 // as:
239 // .is now installed EMAN_NOISNETXE
241 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
242 // strongly directional LTR char.
243 // We also append an LRM at the end, which ensures that we're in an LTR
244 // context.
246 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
247 // box so there is no issue with displaying zero-width bidi control characters
248 // on any system. Thus no need for the !IsRTL() check here.
249 if (text->empty())
250 return false;
252 bool ui_direction_is_rtl = IsRTL();
254 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
255 if (!ui_direction_is_rtl && has_rtl_chars) {
256 WrapStringWithRTLFormatting(text);
257 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
258 kLeftToRightMark);
259 text->push_back(kLeftToRightMark);
260 } else if (ui_direction_is_rtl && has_rtl_chars) {
261 WrapStringWithRTLFormatting(text);
262 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
263 kRightToLeftMark);
264 text->push_back(kRightToLeftMark);
265 } else if (ui_direction_is_rtl) {
266 WrapStringWithLTRFormatting(text);
267 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
268 kRightToLeftMark);
269 text->push_back(kRightToLeftMark);
270 } else {
271 return false;
274 return true;
277 bool UnadjustStringForLocaleDirection(string16* text) {
278 if (text->empty())
279 return false;
281 size_t begin_index = 0;
282 char16 begin = text->at(begin_index);
283 if (begin == kLeftToRightMark ||
284 begin == kRightToLeftMark) {
285 ++begin_index;
288 size_t end_index = text->length() - 1;
289 char16 end = text->at(end_index);
290 if (end == kLeftToRightMark ||
291 end == kRightToLeftMark) {
292 --end_index;
295 string16 unmarked_text =
296 text->substr(begin_index, end_index - begin_index + 1);
297 *text = StripWrappingBidiControlCharacters(unmarked_text);
298 return true;
301 #endif // !OS_WIN
303 bool StringContainsStrongRTLChars(const string16& text) {
304 const UChar* string = text.c_str();
305 size_t length = text.length();
306 size_t position = 0;
307 while (position < length) {
308 UChar32 character;
309 size_t next_position = position;
310 U16_NEXT(string, next_position, length, character);
312 // Now that we have the character, we use ICU in order to query for the
313 // appropriate Unicode BiDi character type.
314 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
315 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
316 return true;
318 position = next_position;
321 return false;
324 void WrapStringWithLTRFormatting(string16* text) {
325 if (text->empty())
326 return;
328 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
329 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
330 kLeftToRightEmbeddingMark);
332 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
333 text->push_back(kPopDirectionalFormatting);
336 void WrapStringWithRTLFormatting(string16* text) {
337 if (text->empty())
338 return;
340 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
341 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
342 kRightToLeftEmbeddingMark);
344 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
345 text->push_back(kPopDirectionalFormatting);
348 void WrapPathWithLTRFormatting(const FilePath& path,
349 string16* rtl_safe_path) {
350 // Wrap the overall path with LRE-PDF pair which essentialy marks the
351 // string as a Left-To-Right string.
352 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
353 rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
354 #if defined(OS_MACOSX)
355 rtl_safe_path->append(UTF8ToUTF16(path.value()));
356 #elif defined(OS_WIN)
357 rtl_safe_path->append(path.value());
358 #else // defined(OS_POSIX) && !defined(OS_MACOSX)
359 std::wstring wide_path = base::SysNativeMBToWide(path.value());
360 rtl_safe_path->append(WideToUTF16(wide_path));
361 #endif
362 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
363 rtl_safe_path->push_back(kPopDirectionalFormatting);
366 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
367 // Always wrap the string in RTL UI (it may be appended to RTL string).
368 // Also wrap strings with an RTL first strong character direction in LTR UI.
369 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
370 string16 text_mutable(text);
371 WrapStringWithLTRFormatting(&text_mutable);
372 return text_mutable;
374 return text;
377 string16 StripWrappingBidiControlCharacters(const string16& text) {
378 if (text.empty())
379 return text;
380 size_t begin_index = 0;
381 char16 begin = text[begin_index];
382 if (begin == kLeftToRightEmbeddingMark ||
383 begin == kRightToLeftEmbeddingMark ||
384 begin == kLeftToRightOverride ||
385 begin == kRightToLeftOverride)
386 ++begin_index;
387 size_t end_index = text.length() - 1;
388 if (text[end_index] == kPopDirectionalFormatting)
389 --end_index;
390 return text.substr(begin_index, end_index - begin_index + 1);
393 } // namespace i18n
394 } // namespace base