Roll src/third_party/WebKit 06cb9e9:a978ee5 (svn 202558:202559)
[chromium-blink-merge.git] / base / i18n / rtl_unittest.cc
blob6deaf34582abecc5d78fdf4b3f58bcd153fccc4f
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
7 #include <algorithm>
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/test/icu_test_util.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "testing/platform_test.h"
16 #include "third_party/icu/source/i18n/unicode/usearch.h"
18 namespace base {
19 namespace i18n {
21 namespace {
23 // A test utility function to set the application default text direction.
24 void SetRTL(bool rtl) {
25 // Override the current locale/direction.
26 SetICUDefaultLocale(rtl ? "he" : "en");
27 EXPECT_EQ(rtl, IsRTL());
30 } // namespace
32 class RTLTest : public PlatformTest {
35 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
36 struct {
37 const wchar_t* text;
38 TextDirection direction;
39 } cases[] = {
40 // Test pure LTR string.
41 { L"foo bar", LEFT_TO_RIGHT },
42 // Test pure RTL string.
43 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
44 // Test bidi string in which the first character with strong directionality
45 // is a character with type L.
46 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
47 // Test bidi string in which the first character with strong directionality
48 // is a character with type R.
49 { L"\x05d0 foo bar", RIGHT_TO_LEFT },
50 // Test bidi string which starts with a character with weak directionality
51 // and in which the first character with strong directionality is a
52 // character with type L.
53 { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
54 // Test bidi string which starts with a character with weak directionality
55 // and in which the first character with strong directionality is a
56 // character with type R.
57 { L",\x05d0 foo bar", RIGHT_TO_LEFT },
58 // Test bidi string in which the first character with strong directionality
59 // is a character with type LRE.
60 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT },
61 // Test bidi string in which the first character with strong directionality
62 // is a character with type LRO.
63 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT },
64 // Test bidi string in which the first character with strong directionality
65 // is a character with type RLE.
66 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
67 // Test bidi string in which the first character with strong directionality
68 // is a character with type RLO.
69 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
70 // Test bidi string in which the first character with strong directionality
71 // is a character with type AL.
72 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
73 // Test a string without strong directionality characters.
74 { L",!.{}", LEFT_TO_RIGHT },
75 // Test empty string.
76 { L"", LEFT_TO_RIGHT },
77 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
78 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
79 // information).
81 #if defined(WCHAR_T_IS_UTF32)
82 L" ! \x10910" L"abc 123",
83 #elif defined(WCHAR_T_IS_UTF16)
84 L" ! \xd802\xdd10" L"abc 123",
85 #else
86 #error wchar_t should be either UTF-16 or UTF-32
87 #endif
88 RIGHT_TO_LEFT },
90 #if defined(WCHAR_T_IS_UTF32)
91 L" ! \x10401" L"abc 123",
92 #elif defined(WCHAR_T_IS_UTF16)
93 L" ! \xd801\xdc01" L"abc 123",
94 #else
95 #error wchar_t should be either UTF-16 or UTF-32
96 #endif
97 LEFT_TO_RIGHT },
100 for (size_t i = 0; i < arraysize(cases); ++i)
101 EXPECT_EQ(cases[i].direction,
102 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
106 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
107 // GetLastStrongCharacterDirection because they should be followed by PDF
108 // character.
109 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
110 struct {
111 const wchar_t* text;
112 TextDirection direction;
113 } cases[] = {
114 // Test pure LTR string.
115 { L"foo bar", LEFT_TO_RIGHT },
116 // Test pure RTL string.
117 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
118 // Test bidi string in which the last character with strong directionality
119 // is a character with type L.
120 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
121 // Test bidi string in which the last character with strong directionality
122 // is a character with type R.
123 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
124 // Test bidi string which ends with a character with weak directionality
125 // and in which the last character with strong directionality is a
126 // character with type L.
127 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
128 // Test bidi string which ends with a character with weak directionality
129 // and in which the last character with strong directionality is a
130 // character with type R.
131 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
132 // Test bidi string in which the last character with strong directionality
133 // is a character with type AL.
134 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
135 // Test a string without strong directionality characters.
136 { L",!.{}", LEFT_TO_RIGHT },
137 // Test empty string.
138 { L"", LEFT_TO_RIGHT },
139 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
140 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
141 // information).
143 #if defined(WCHAR_T_IS_UTF32)
144 L"abc 123" L" ! \x10910 !",
145 #elif defined(WCHAR_T_IS_UTF16)
146 L"abc 123" L" ! \xd802\xdd10 !",
147 #else
148 #error wchar_t should be either UTF-16 or UTF-32
149 #endif
150 RIGHT_TO_LEFT },
152 #if defined(WCHAR_T_IS_UTF32)
153 L"abc 123" L" ! \x10401 !",
154 #elif defined(WCHAR_T_IS_UTF16)
155 L"abc 123" L" ! \xd801\xdc01 !",
156 #else
157 #error wchar_t should be either UTF-16 or UTF-32
158 #endif
159 LEFT_TO_RIGHT },
162 for (size_t i = 0; i < arraysize(cases); ++i)
163 EXPECT_EQ(cases[i].direction,
164 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
167 TEST_F(RTLTest, GetStringDirection) {
168 struct {
169 const wchar_t* text;
170 TextDirection direction;
171 } cases[] = {
172 // Test pure LTR string.
173 { L"foobar", LEFT_TO_RIGHT },
174 { L".foobar", LEFT_TO_RIGHT },
175 { L"foo, bar", LEFT_TO_RIGHT },
176 // Test pure LTR with strong directionality characters of type LRE.
177 { L"\x202a\x202a", LEFT_TO_RIGHT },
178 { L".\x202a\x202a", LEFT_TO_RIGHT },
179 { L"\x202a, \x202a", LEFT_TO_RIGHT },
180 // Test pure LTR with strong directionality characters of type LRO.
181 { L"\x202d\x202d", LEFT_TO_RIGHT },
182 { L".\x202d\x202d", LEFT_TO_RIGHT },
183 { L"\x202d, \x202d", LEFT_TO_RIGHT },
184 // Test pure LTR with various types of strong directionality characters.
185 { L"foo \x202a\x202d", LEFT_TO_RIGHT },
186 { L".\x202d foo \x202a", LEFT_TO_RIGHT },
187 { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
188 // Test pure RTL with strong directionality characters of type R.
189 { L"\x05d0\x05d0", RIGHT_TO_LEFT },
190 { L".\x05d0\x05d0", RIGHT_TO_LEFT },
191 { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
192 // Test pure RTL with strong directionality characters of type RLE.
193 { L"\x202b\x202b", RIGHT_TO_LEFT },
194 { L".\x202b\x202b", RIGHT_TO_LEFT },
195 { L"\x202b, \x202b", RIGHT_TO_LEFT },
196 // Test pure RTL with strong directionality characters of type RLO.
197 { L"\x202e\x202e", RIGHT_TO_LEFT },
198 { L".\x202e\x202e", RIGHT_TO_LEFT },
199 { L"\x202e, \x202e", RIGHT_TO_LEFT },
200 // Test pure RTL with strong directionality characters of type AL.
201 { L"\x0622\x0622", RIGHT_TO_LEFT },
202 { L".\x0622\x0622", RIGHT_TO_LEFT },
203 { L"\x0622, \x0622", RIGHT_TO_LEFT },
204 // Test pure RTL with various types of strong directionality characters.
205 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
206 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
207 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
208 // Test bidi strings.
209 { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
210 { L"\x202b foo bar", UNKNOWN_DIRECTION },
211 { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
212 { L"\x202a\x202b", UNKNOWN_DIRECTION },
213 { L"\x202e\x202d", UNKNOWN_DIRECTION },
214 { L"\x0622\x202a", UNKNOWN_DIRECTION },
215 { L"\x202d\x05d0", UNKNOWN_DIRECTION },
216 // Test a string without strong directionality characters.
217 { L",!.{}", LEFT_TO_RIGHT },
218 // Test empty string.
219 { L"", LEFT_TO_RIGHT },
221 #if defined(WCHAR_T_IS_UTF32)
222 L" ! \x10910" L"abc 123",
223 #elif defined(WCHAR_T_IS_UTF16)
224 L" ! \xd802\xdd10" L"abc 123",
225 #else
226 #error wchar_t should be either UTF-16 or UTF-32
227 #endif
228 UNKNOWN_DIRECTION },
230 #if defined(WCHAR_T_IS_UTF32)
231 L" ! \x10401" L"abc 123",
232 #elif defined(WCHAR_T_IS_UTF16)
233 L" ! \xd801\xdc01" L"abc 123",
234 #else
235 #error wchar_t should be either UTF-16 or UTF-32
236 #endif
237 LEFT_TO_RIGHT },
240 for (size_t i = 0; i < arraysize(cases); ++i)
241 EXPECT_EQ(cases[i].direction,
242 GetStringDirection(WideToUTF16(cases[i].text)));
245 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
246 const wchar_t* cases[] = {
247 // Test common path, such as "c:\foo\bar".
248 L"c:/foo/bar",
249 // Test path with file name, such as "c:\foo\bar\test.jpg".
250 L"c:/foo/bar/test.jpg",
251 // Test path ending with punctuation, such as "c:\(foo)\bar.".
252 L"c:/(foo)/bar.",
253 // Test path ending with separator, such as "c:\foo\bar\".
254 L"c:/foo/bar/",
255 // Test path with RTL character.
256 L"c:/\x05d0",
257 // Test path with 2 level RTL directory names.
258 L"c:/\x05d0/\x0622",
259 // Test path with mixed RTL/LTR directory names and ending with punctuation.
260 L"c:/\x05d0/\x0622/(foo)/b.a.r.",
261 // Test path without driver name, such as "/foo/bar/test/jpg".
262 L"/foo/bar/test.jpg",
263 // Test path start with current directory, such as "./foo".
264 L"./foo",
265 // Test path start with parent directory, such as "../foo/bar.jpg".
266 L"../foo/bar.jpg",
267 // Test absolute path, such as "//foo/bar.jpg".
268 L"//foo/bar.jpg",
269 // Test path with mixed RTL/LTR directory names.
270 L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
271 // Test empty path.
275 for (size_t i = 0; i < arraysize(cases); ++i) {
276 FilePath path;
277 #if defined(OS_WIN)
278 std::wstring win_path(cases[i]);
279 std::replace(win_path.begin(), win_path.end(), '/', '\\');
280 path = FilePath(win_path);
281 std::wstring wrapped_expected =
282 std::wstring(L"\x202a") + win_path + L"\x202c";
283 #else
284 path = FilePath(base::SysWideToNativeMB(cases[i]));
285 std::wstring wrapped_expected =
286 std::wstring(L"\x202a") + cases[i] + L"\x202c";
287 #endif
288 string16 localized_file_path_string;
289 WrapPathWithLTRFormatting(path, &localized_file_path_string);
291 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
292 EXPECT_EQ(wrapped_expected, wrapped_actual);
296 TEST_F(RTLTest, WrapString) {
297 const wchar_t* cases[] = {
298 L" . ",
299 L"abc",
300 L"a" L"\x5d0\x5d1",
301 L"a" L"\x5d1" L"b",
302 L"\x5d0\x5d1\x5d2",
303 L"\x5d0\x5d1" L"a",
304 L"\x5d0" L"a" L"\x5d1",
307 const bool was_rtl = IsRTL();
309 test::ScopedRestoreICUDefaultLocale restore_locale;
310 for (size_t i = 0; i < 2; ++i) {
311 // Toggle the application default text direction (to try each direction).
312 SetRTL(!IsRTL());
314 string16 empty;
315 WrapStringWithLTRFormatting(&empty);
316 EXPECT_TRUE(empty.empty());
317 WrapStringWithRTLFormatting(&empty);
318 EXPECT_TRUE(empty.empty());
320 for (size_t i = 0; i < arraysize(cases); ++i) {
321 string16 input = WideToUTF16(cases[i]);
322 string16 ltr_wrap = input;
323 WrapStringWithLTRFormatting(&ltr_wrap);
324 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
325 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
326 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
328 string16 rtl_wrap = input;
329 WrapStringWithRTLFormatting(&rtl_wrap);
330 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
331 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
332 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
336 EXPECT_EQ(was_rtl, IsRTL());
339 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
340 struct {
341 const wchar_t* path;
342 bool wrap_ltr;
343 bool wrap_rtl;
344 } cases[] = {
345 { L"test", false, true },
346 { L"test.html", false, true },
347 { L"\x05d0\x05d1\x05d2", true, true },
348 { L"\x05d0\x05d1\x05d2.txt", true, true },
349 { L"\x05d0" L"abc", true, true },
350 { L"\x05d0" L"abc.txt", true, true },
351 { L"abc\x05d0\x05d1", false, true },
352 { L"abc\x05d0\x05d1.jpg", false, true },
355 const bool was_rtl = IsRTL();
357 test::ScopedRestoreICUDefaultLocale restore_locale;
358 for (size_t i = 0; i < 2; ++i) {
359 // Toggle the application default text direction (to try each direction).
360 SetRTL(!IsRTL());
361 for (size_t i = 0; i < arraysize(cases); ++i) {
362 string16 input = WideToUTF16(cases[i].path);
363 string16 output = GetDisplayStringInLTRDirectionality(input);
364 // Test the expected wrapping behavior for the current UI directionality.
365 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
366 EXPECT_NE(output, input);
367 else
368 EXPECT_EQ(output, input);
372 EXPECT_EQ(was_rtl, IsRTL());
375 TEST_F(RTLTest, GetTextDirection) {
376 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
377 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
378 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
379 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
380 // iw is an obsolete code for Hebrew.
381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
382 // Although we're not yet localized to Farsi and Urdu, we
383 // do have the text layout direction information for them.
384 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
385 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
386 #if 0
387 // Enable these when we include the minimal locale data for Azerbaijani
388 // written in Arabic and Dhivehi. At the moment, our copy of
389 // ICU data does not have entries for them.
390 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
391 // Dhivehi that uses Thaana script.
392 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
393 #endif
394 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
395 // Chinese in China with '-'.
396 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
397 // Filipino : 3-letter code
398 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
399 // Russian
400 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
401 // Japanese that uses multiple scripts
402 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
405 TEST_F(RTLTest, GetTextDirectionForLocaleInStartUp) {
406 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar"));
407 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar_EG"));
408 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he"));
409 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he_IL"));
410 // iw is an obsolete code for Hebrew.
411 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("iw"));
412 // Although we're not yet localized to Farsi and Urdu, we
413 // do have the text layout direction information for them.
414 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("fa"));
415 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ur"));
416 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("en"));
417 // Chinese in China with '-'.
418 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("zh-CN"));
419 // Filipino : 3-letter code
420 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("fil"));
421 // Russian
422 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ru"));
423 // Japanese that uses multiple scripts
424 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ja"));
427 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
428 // These test strings are borrowed from WrapPathWithLTRFormatting
429 const wchar_t* cases[] = {
430 L"foo bar",
431 L"foo \x05d0 bar",
432 L"\x05d0 foo bar",
433 L"!foo \x05d0 bar",
434 L",\x05d0 foo bar",
435 L"\x202a \x05d0 foo bar",
436 L"\x202d \x05d0 foo bar",
437 L"\x202b foo \x05d0 bar",
438 L"\x202e foo \x05d0 bar",
439 L"\x0622 foo \x05d0 bar",
442 const bool was_rtl = IsRTL();
444 test::ScopedRestoreICUDefaultLocale restore_locale;
445 for (size_t i = 0; i < 2; ++i) {
446 // Toggle the application default text direction (to try each direction).
447 SetRTL(!IsRTL());
449 for (size_t i = 0; i < arraysize(cases); ++i) {
450 string16 test_case = WideToUTF16(cases[i]);
451 string16 adjusted_string = test_case;
453 if (!AdjustStringForLocaleDirection(&adjusted_string))
454 continue;
456 EXPECT_NE(test_case, adjusted_string);
457 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
458 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
459 << "] with IsRTL() == " << IsRTL();
463 EXPECT_EQ(was_rtl, IsRTL());
466 } // namespace i18n
467 } // namespace base