1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/test/icu_test_util.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "testing/platform_test.h"
16 #include "third_party/icu/source/i18n/unicode/usearch.h"
23 // A test utility function to set the application default text direction.
24 void SetRTL(bool rtl
) {
25 // Override the current locale/direction.
26 SetICUDefaultLocale(rtl
? "he" : "en");
27 EXPECT_EQ(rtl
, IsRTL());
32 class RTLTest
: public PlatformTest
{
35 TEST_F(RTLTest
, GetFirstStrongCharacterDirection
) {
38 TextDirection direction
;
40 // Test pure LTR string.
41 { L
"foo bar", LEFT_TO_RIGHT
},
42 // Test pure RTL string.
43 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
44 // Test bidi string in which the first character with strong directionality
45 // is a character with type L.
46 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
47 // Test bidi string in which the first character with strong directionality
48 // is a character with type R.
49 { L
"\x05d0 foo bar", RIGHT_TO_LEFT
},
50 // Test bidi string which starts with a character with weak directionality
51 // and in which the first character with strong directionality is a
52 // character with type L.
53 { L
"!foo \x05d0 bar", LEFT_TO_RIGHT
},
54 // Test bidi string which starts with a character with weak directionality
55 // and in which the first character with strong directionality is a
56 // character with type R.
57 { L
",\x05d0 foo bar", RIGHT_TO_LEFT
},
58 // Test bidi string in which the first character with strong directionality
59 // is a character with type LRE.
60 { L
"\x202a \x05d0 foo bar", LEFT_TO_RIGHT
},
61 // Test bidi string in which the first character with strong directionality
62 // is a character with type LRO.
63 { L
"\x202d \x05d0 foo bar", LEFT_TO_RIGHT
},
64 // Test bidi string in which the first character with strong directionality
65 // is a character with type RLE.
66 { L
"\x202b foo \x05d0 bar", RIGHT_TO_LEFT
},
67 // Test bidi string in which the first character with strong directionality
68 // is a character with type RLO.
69 { L
"\x202e foo \x05d0 bar", RIGHT_TO_LEFT
},
70 // Test bidi string in which the first character with strong directionality
71 // is a character with type AL.
72 { L
"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT
},
73 // Test a string without strong directionality characters.
74 { L
",!.{}", LEFT_TO_RIGHT
},
76 { L
"", LEFT_TO_RIGHT
},
77 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
78 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
81 #if defined(WCHAR_T_IS_UTF32)
82 L
" ! \x10910" L
"abc 123",
83 #elif defined(WCHAR_T_IS_UTF16)
84 L
" ! \xd802\xdd10" L
"abc 123",
86 #error wchar_t should be either UTF-16 or UTF-32
90 #if defined(WCHAR_T_IS_UTF32)
91 L
" ! \x10401" L
"abc 123",
92 #elif defined(WCHAR_T_IS_UTF16)
93 L
" ! \xd801\xdc01" L
"abc 123",
95 #error wchar_t should be either UTF-16 or UTF-32
100 for (size_t i
= 0; i
< arraysize(cases
); ++i
)
101 EXPECT_EQ(cases
[i
].direction
,
102 GetFirstStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
106 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
107 // GetLastStrongCharacterDirection because they should be followed by PDF
109 TEST_F(RTLTest
, GetLastStrongCharacterDirection
) {
112 TextDirection direction
;
114 // Test pure LTR string.
115 { L
"foo bar", LEFT_TO_RIGHT
},
116 // Test pure RTL string.
117 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
118 // Test bidi string in which the last character with strong directionality
119 // is a character with type L.
120 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
121 // Test bidi string in which the last character with strong directionality
122 // is a character with type R.
123 { L
"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT
},
124 // Test bidi string which ends with a character with weak directionality
125 // and in which the last character with strong directionality is a
126 // character with type L.
127 { L
"!foo \x05d0 bar!", LEFT_TO_RIGHT
},
128 // Test bidi string which ends with a character with weak directionality
129 // and in which the last character with strong directionality is a
130 // character with type R.
131 { L
",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT
},
132 // Test bidi string in which the last character with strong directionality
133 // is a character with type AL.
134 { L
"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT
},
135 // Test a string without strong directionality characters.
136 { L
",!.{}", LEFT_TO_RIGHT
},
137 // Test empty string.
138 { L
"", LEFT_TO_RIGHT
},
139 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
140 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
143 #if defined(WCHAR_T_IS_UTF32)
144 L
"abc 123" L
" ! \x10910 !",
145 #elif defined(WCHAR_T_IS_UTF16)
146 L
"abc 123" L
" ! \xd802\xdd10 !",
148 #error wchar_t should be either UTF-16 or UTF-32
152 #if defined(WCHAR_T_IS_UTF32)
153 L
"abc 123" L
" ! \x10401 !",
154 #elif defined(WCHAR_T_IS_UTF16)
155 L
"abc 123" L
" ! \xd801\xdc01 !",
157 #error wchar_t should be either UTF-16 or UTF-32
162 for (size_t i
= 0; i
< arraysize(cases
); ++i
)
163 EXPECT_EQ(cases
[i
].direction
,
164 GetLastStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
167 TEST_F(RTLTest
, GetStringDirection
) {
170 TextDirection direction
;
172 // Test pure LTR string.
173 { L
"foobar", LEFT_TO_RIGHT
},
174 { L
".foobar", LEFT_TO_RIGHT
},
175 { L
"foo, bar", LEFT_TO_RIGHT
},
176 // Test pure LTR with strong directionality characters of type LRE.
177 { L
"\x202a\x202a", LEFT_TO_RIGHT
},
178 { L
".\x202a\x202a", LEFT_TO_RIGHT
},
179 { L
"\x202a, \x202a", LEFT_TO_RIGHT
},
180 // Test pure LTR with strong directionality characters of type LRO.
181 { L
"\x202d\x202d", LEFT_TO_RIGHT
},
182 { L
".\x202d\x202d", LEFT_TO_RIGHT
},
183 { L
"\x202d, \x202d", LEFT_TO_RIGHT
},
184 // Test pure LTR with various types of strong directionality characters.
185 { L
"foo \x202a\x202d", LEFT_TO_RIGHT
},
186 { L
".\x202d foo \x202a", LEFT_TO_RIGHT
},
187 { L
"\x202a, \x202d foo", LEFT_TO_RIGHT
},
188 // Test pure RTL with strong directionality characters of type R.
189 { L
"\x05d0\x05d0", RIGHT_TO_LEFT
},
190 { L
".\x05d0\x05d0", RIGHT_TO_LEFT
},
191 { L
"\x05d0, \x05d0", RIGHT_TO_LEFT
},
192 // Test pure RTL with strong directionality characters of type RLE.
193 { L
"\x202b\x202b", RIGHT_TO_LEFT
},
194 { L
".\x202b\x202b", RIGHT_TO_LEFT
},
195 { L
"\x202b, \x202b", RIGHT_TO_LEFT
},
196 // Test pure RTL with strong directionality characters of type RLO.
197 { L
"\x202e\x202e", RIGHT_TO_LEFT
},
198 { L
".\x202e\x202e", RIGHT_TO_LEFT
},
199 { L
"\x202e, \x202e", RIGHT_TO_LEFT
},
200 // Test pure RTL with strong directionality characters of type AL.
201 { L
"\x0622\x0622", RIGHT_TO_LEFT
},
202 { L
".\x0622\x0622", RIGHT_TO_LEFT
},
203 { L
"\x0622, \x0622", RIGHT_TO_LEFT
},
204 // Test pure RTL with various types of strong directionality characters.
205 { L
"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT
},
206 { L
".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT
},
207 { L
"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT
},
208 // Test bidi strings.
209 { L
"foo \x05d0 bar", UNKNOWN_DIRECTION
},
210 { L
"\x202b foo bar", UNKNOWN_DIRECTION
},
211 { L
"!foo \x0622 bar", UNKNOWN_DIRECTION
},
212 { L
"\x202a\x202b", UNKNOWN_DIRECTION
},
213 { L
"\x202e\x202d", UNKNOWN_DIRECTION
},
214 { L
"\x0622\x202a", UNKNOWN_DIRECTION
},
215 { L
"\x202d\x05d0", UNKNOWN_DIRECTION
},
216 // Test a string without strong directionality characters.
217 { L
",!.{}", LEFT_TO_RIGHT
},
218 // Test empty string.
219 { L
"", LEFT_TO_RIGHT
},
221 #if defined(WCHAR_T_IS_UTF32)
222 L
" ! \x10910" L
"abc 123",
223 #elif defined(WCHAR_T_IS_UTF16)
224 L
" ! \xd802\xdd10" L
"abc 123",
226 #error wchar_t should be either UTF-16 or UTF-32
230 #if defined(WCHAR_T_IS_UTF32)
231 L
" ! \x10401" L
"abc 123",
232 #elif defined(WCHAR_T_IS_UTF16)
233 L
" ! \xd801\xdc01" L
"abc 123",
235 #error wchar_t should be either UTF-16 or UTF-32
240 for (size_t i
= 0; i
< arraysize(cases
); ++i
)
241 EXPECT_EQ(cases
[i
].direction
,
242 GetStringDirection(WideToUTF16(cases
[i
].text
)));
245 TEST_F(RTLTest
, WrapPathWithLTRFormatting
) {
246 const wchar_t* cases
[] = {
247 // Test common path, such as "c:\foo\bar".
249 // Test path with file name, such as "c:\foo\bar\test.jpg".
250 L
"c:/foo/bar/test.jpg",
251 // Test path ending with punctuation, such as "c:\(foo)\bar.".
253 // Test path ending with separator, such as "c:\foo\bar\".
255 // Test path with RTL character.
257 // Test path with 2 level RTL directory names.
259 // Test path with mixed RTL/LTR directory names and ending with punctuation.
260 L
"c:/\x05d0/\x0622/(foo)/b.a.r.",
261 // Test path without driver name, such as "/foo/bar/test/jpg".
262 L
"/foo/bar/test.jpg",
263 // Test path start with current directory, such as "./foo".
265 // Test path start with parent directory, such as "../foo/bar.jpg".
267 // Test absolute path, such as "//foo/bar.jpg".
269 // Test path with mixed RTL/LTR directory names.
270 L
"c:/foo/\x05d0/\x0622/\x05d1.jpg",
275 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
278 std::wstring
win_path(cases
[i
]);
279 std::replace(win_path
.begin(), win_path
.end(), '/', '\\');
280 path
= FilePath(win_path
);
281 std::wstring wrapped_expected
=
282 std::wstring(L
"\x202a") + win_path
+ L
"\x202c";
284 path
= FilePath(base::SysWideToNativeMB(cases
[i
]));
285 std::wstring wrapped_expected
=
286 std::wstring(L
"\x202a") + cases
[i
] + L
"\x202c";
288 string16 localized_file_path_string
;
289 WrapPathWithLTRFormatting(path
, &localized_file_path_string
);
291 std::wstring wrapped_actual
= UTF16ToWide(localized_file_path_string
);
292 EXPECT_EQ(wrapped_expected
, wrapped_actual
);
296 TEST_F(RTLTest
, WrapString
) {
297 const wchar_t* cases
[] = {
304 L
"\x5d0" L
"a" L
"\x5d1",
307 const bool was_rtl
= IsRTL();
309 test::ScopedRestoreICUDefaultLocale restore_locale
;
310 for (size_t i
= 0; i
< 2; ++i
) {
311 // Toggle the application default text direction (to try each direction).
315 WrapStringWithLTRFormatting(&empty
);
316 EXPECT_TRUE(empty
.empty());
317 WrapStringWithRTLFormatting(&empty
);
318 EXPECT_TRUE(empty
.empty());
320 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
321 string16 input
= WideToUTF16(cases
[i
]);
322 string16 ltr_wrap
= input
;
323 WrapStringWithLTRFormatting(<r_wrap
);
324 EXPECT_EQ(ltr_wrap
[0], kLeftToRightEmbeddingMark
);
325 EXPECT_EQ(ltr_wrap
.substr(1, ltr_wrap
.length() - 2), input
);
326 EXPECT_EQ(ltr_wrap
[ltr_wrap
.length() -1], kPopDirectionalFormatting
);
328 string16 rtl_wrap
= input
;
329 WrapStringWithRTLFormatting(&rtl_wrap
);
330 EXPECT_EQ(rtl_wrap
[0], kRightToLeftEmbeddingMark
);
331 EXPECT_EQ(rtl_wrap
.substr(1, rtl_wrap
.length() - 2), input
);
332 EXPECT_EQ(rtl_wrap
[rtl_wrap
.length() -1], kPopDirectionalFormatting
);
336 EXPECT_EQ(was_rtl
, IsRTL());
339 TEST_F(RTLTest
, GetDisplayStringInLTRDirectionality
) {
345 { L
"test", false, true },
346 { L
"test.html", false, true },
347 { L
"\x05d0\x05d1\x05d2", true, true },
348 { L
"\x05d0\x05d1\x05d2.txt", true, true },
349 { L
"\x05d0" L
"abc", true, true },
350 { L
"\x05d0" L
"abc.txt", true, true },
351 { L
"abc\x05d0\x05d1", false, true },
352 { L
"abc\x05d0\x05d1.jpg", false, true },
355 const bool was_rtl
= IsRTL();
357 test::ScopedRestoreICUDefaultLocale restore_locale
;
358 for (size_t i
= 0; i
< 2; ++i
) {
359 // Toggle the application default text direction (to try each direction).
361 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
362 string16 input
= WideToUTF16(cases
[i
].path
);
363 string16 output
= GetDisplayStringInLTRDirectionality(input
);
364 // Test the expected wrapping behavior for the current UI directionality.
365 if (IsRTL() ? cases
[i
].wrap_rtl
: cases
[i
].wrap_ltr
)
366 EXPECT_NE(output
, input
);
368 EXPECT_EQ(output
, input
);
372 EXPECT_EQ(was_rtl
, IsRTL());
375 TEST_F(RTLTest
, GetTextDirection
) {
376 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar"));
377 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar_EG"));
378 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he"));
379 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he_IL"));
380 // iw is an obsolete code for Hebrew.
381 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("iw"));
382 // Although we're not yet localized to Farsi and Urdu, we
383 // do have the text layout direction information for them.
384 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("fa"));
385 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ur"));
387 // Enable these when we include the minimal locale data for Azerbaijani
388 // written in Arabic and Dhivehi. At the moment, our copy of
389 // ICU data does not have entries for them.
390 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("az_Arab"));
391 // Dhivehi that uses Thaana script.
392 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("dv"));
394 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("en"));
395 // Chinese in China with '-'.
396 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("zh-CN"));
397 // Filipino : 3-letter code
398 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("fil"));
400 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ru"));
401 // Japanese that uses multiple scripts
402 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ja"));
405 TEST_F(RTLTest
, GetTextDirectionForLocaleInStartUp
) {
406 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("ar"));
407 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("ar_EG"));
408 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("he"));
409 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("he_IL"));
410 // iw is an obsolete code for Hebrew.
411 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("iw"));
412 // Although we're not yet localized to Farsi and Urdu, we
413 // do have the text layout direction information for them.
414 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("fa"));
415 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocaleInStartUp("ur"));
416 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocaleInStartUp("en"));
417 // Chinese in China with '-'.
418 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocaleInStartUp("zh-CN"));
419 // Filipino : 3-letter code
420 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocaleInStartUp("fil"));
422 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocaleInStartUp("ru"));
423 // Japanese that uses multiple scripts
424 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocaleInStartUp("ja"));
427 TEST_F(RTLTest
, UnadjustStringForLocaleDirection
) {
428 // These test strings are borrowed from WrapPathWithLTRFormatting
429 const wchar_t* cases
[] = {
435 L
"\x202a \x05d0 foo bar",
436 L
"\x202d \x05d0 foo bar",
437 L
"\x202b foo \x05d0 bar",
438 L
"\x202e foo \x05d0 bar",
439 L
"\x0622 foo \x05d0 bar",
442 const bool was_rtl
= IsRTL();
444 test::ScopedRestoreICUDefaultLocale restore_locale
;
445 for (size_t i
= 0; i
< 2; ++i
) {
446 // Toggle the application default text direction (to try each direction).
449 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
450 string16 test_case
= WideToUTF16(cases
[i
]);
451 string16 adjusted_string
= test_case
;
453 if (!AdjustStringForLocaleDirection(&adjusted_string
))
456 EXPECT_NE(test_case
, adjusted_string
);
457 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string
));
458 EXPECT_EQ(test_case
, adjusted_string
) << " for test case [" << test_case
459 << "] with IsRTL() == " << IsRTL();
463 EXPECT_EQ(was_rtl
, IsRTL());