1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
22 // A test utility function to set the application default text direction.
23 void SetRTL(bool rtl
) {
24 // Override the current locale/direction.
25 SetICUDefaultLocale(rtl
? "he" : "en");
26 EXPECT_EQ(rtl
, IsRTL());
31 class RTLTest
: public PlatformTest
{
34 TEST_F(RTLTest
, GetFirstStrongCharacterDirection
) {
37 TextDirection direction
;
39 // Test pure LTR string.
40 { L
"foo bar", LEFT_TO_RIGHT
},
41 // Test pure RTL string.
42 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
43 // Test bidi string in which the first character with strong directionality
44 // is a character with type L.
45 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
46 // Test bidi string in which the first character with strong directionality
47 // is a character with type R.
48 { L
"\x05d0 foo bar", RIGHT_TO_LEFT
},
49 // Test bidi string which starts with a character with weak directionality
50 // and in which the first character with strong directionality is a
51 // character with type L.
52 { L
"!foo \x05d0 bar", LEFT_TO_RIGHT
},
53 // Test bidi string which starts with a character with weak directionality
54 // and in which the first character with strong directionality is a
55 // character with type R.
56 { L
",\x05d0 foo bar", RIGHT_TO_LEFT
},
57 // Test bidi string in which the first character with strong directionality
58 // is a character with type LRE.
59 { L
"\x202a \x05d0 foo bar", LEFT_TO_RIGHT
},
60 // Test bidi string in which the first character with strong directionality
61 // is a character with type LRO.
62 { L
"\x202d \x05d0 foo bar", LEFT_TO_RIGHT
},
63 // Test bidi string in which the first character with strong directionality
64 // is a character with type RLE.
65 { L
"\x202b foo \x05d0 bar", RIGHT_TO_LEFT
},
66 // Test bidi string in which the first character with strong directionality
67 // is a character with type RLO.
68 { L
"\x202e foo \x05d0 bar", RIGHT_TO_LEFT
},
69 // Test bidi string in which the first character with strong directionality
70 // is a character with type AL.
71 { L
"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT
},
72 // Test a string without strong directionality characters.
73 { L
",!.{}", LEFT_TO_RIGHT
},
75 { L
"", LEFT_TO_RIGHT
},
76 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
77 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
80 #if defined(WCHAR_T_IS_UTF32)
81 L
" ! \x10910" L
"abc 123",
82 #elif defined(WCHAR_T_IS_UTF16)
83 L
" ! \xd802\xdd10" L
"abc 123",
85 #error wchar_t should be either UTF-16 or UTF-32
89 #if defined(WCHAR_T_IS_UTF32)
90 L
" ! \x10401" L
"abc 123",
91 #elif defined(WCHAR_T_IS_UTF16)
92 L
" ! \xd801\xdc01" L
"abc 123",
94 #error wchar_t should be either UTF-16 or UTF-32
99 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
100 EXPECT_EQ(cases
[i
].direction
,
101 GetFirstStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
105 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
106 // GetLastStrongCharacterDirection because they should be followed by PDF
108 TEST_F(RTLTest
, GetLastStrongCharacterDirection
) {
111 TextDirection direction
;
113 // Test pure LTR string.
114 { L
"foo bar", LEFT_TO_RIGHT
},
115 // Test pure RTL string.
116 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
117 // Test bidi string in which the last character with strong directionality
118 // is a character with type L.
119 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
120 // Test bidi string in which the last character with strong directionality
121 // is a character with type R.
122 { L
"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT
},
123 // Test bidi string which ends with a character with weak directionality
124 // and in which the last character with strong directionality is a
125 // character with type L.
126 { L
"!foo \x05d0 bar!", LEFT_TO_RIGHT
},
127 // Test bidi string which ends with a character with weak directionality
128 // and in which the last character with strong directionality is a
129 // character with type R.
130 { L
",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT
},
131 // Test bidi string in which the last character with strong directionality
132 // is a character with type AL.
133 { L
"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT
},
134 // Test a string without strong directionality characters.
135 { L
",!.{}", LEFT_TO_RIGHT
},
136 // Test empty string.
137 { L
"", LEFT_TO_RIGHT
},
138 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
139 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
142 #if defined(WCHAR_T_IS_UTF32)
143 L
"abc 123" L
" ! \x10910 !",
144 #elif defined(WCHAR_T_IS_UTF16)
145 L
"abc 123" L
" ! \xd802\xdd10 !",
147 #error wchar_t should be either UTF-16 or UTF-32
151 #if defined(WCHAR_T_IS_UTF32)
152 L
"abc 123" L
" ! \x10401 !",
153 #elif defined(WCHAR_T_IS_UTF16)
154 L
"abc 123" L
" ! \xd801\xdc01 !",
156 #error wchar_t should be either UTF-16 or UTF-32
161 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
162 EXPECT_EQ(cases
[i
].direction
,
163 GetLastStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
166 TEST_F(RTLTest
, GetStringDirection
) {
169 TextDirection direction
;
171 // Test pure LTR string.
172 { L
"foobar", LEFT_TO_RIGHT
},
173 { L
".foobar", LEFT_TO_RIGHT
},
174 { L
"foo, bar", LEFT_TO_RIGHT
},
175 // Test pure LTR with strong directionality characters of type LRE.
176 { L
"\x202a\x202a", LEFT_TO_RIGHT
},
177 { L
".\x202a\x202a", LEFT_TO_RIGHT
},
178 { L
"\x202a, \x202a", LEFT_TO_RIGHT
},
179 // Test pure LTR with strong directionality characters of type LRO.
180 { L
"\x202d\x202d", LEFT_TO_RIGHT
},
181 { L
".\x202d\x202d", LEFT_TO_RIGHT
},
182 { L
"\x202d, \x202d", LEFT_TO_RIGHT
},
183 // Test pure LTR with various types of strong directionality characters.
184 { L
"foo \x202a\x202d", LEFT_TO_RIGHT
},
185 { L
".\x202d foo \x202a", LEFT_TO_RIGHT
},
186 { L
"\x202a, \x202d foo", LEFT_TO_RIGHT
},
187 // Test pure RTL with strong directionality characters of type R.
188 { L
"\x05d0\x05d0", RIGHT_TO_LEFT
},
189 { L
".\x05d0\x05d0", RIGHT_TO_LEFT
},
190 { L
"\x05d0, \x05d0", RIGHT_TO_LEFT
},
191 // Test pure RTL with strong directionality characters of type RLE.
192 { L
"\x202b\x202b", RIGHT_TO_LEFT
},
193 { L
".\x202b\x202b", RIGHT_TO_LEFT
},
194 { L
"\x202b, \x202b", RIGHT_TO_LEFT
},
195 // Test pure RTL with strong directionality characters of type RLO.
196 { L
"\x202e\x202e", RIGHT_TO_LEFT
},
197 { L
".\x202e\x202e", RIGHT_TO_LEFT
},
198 { L
"\x202e, \x202e", RIGHT_TO_LEFT
},
199 // Test pure RTL with strong directionality characters of type AL.
200 { L
"\x0622\x0622", RIGHT_TO_LEFT
},
201 { L
".\x0622\x0622", RIGHT_TO_LEFT
},
202 { L
"\x0622, \x0622", RIGHT_TO_LEFT
},
203 // Test pure RTL with various types of strong directionality characters.
204 { L
"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT
},
205 { L
".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT
},
206 { L
"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT
},
207 // Test bidi strings.
208 { L
"foo \x05d0 bar", UNKNOWN_DIRECTION
},
209 { L
"\x202b foo bar", UNKNOWN_DIRECTION
},
210 { L
"!foo \x0622 bar", UNKNOWN_DIRECTION
},
211 { L
"\x202a\x202b", UNKNOWN_DIRECTION
},
212 { L
"\x202e\x202d", UNKNOWN_DIRECTION
},
213 { L
"\x0622\x202a", UNKNOWN_DIRECTION
},
214 { L
"\x202d\x05d0", UNKNOWN_DIRECTION
},
215 // Test a string without strong directionality characters.
216 { L
",!.{}", LEFT_TO_RIGHT
},
217 // Test empty string.
218 { L
"", LEFT_TO_RIGHT
},
220 #if defined(WCHAR_T_IS_UTF32)
221 L
" ! \x10910" L
"abc 123",
222 #elif defined(WCHAR_T_IS_UTF16)
223 L
" ! \xd802\xdd10" L
"abc 123",
225 #error wchar_t should be either UTF-16 or UTF-32
229 #if defined(WCHAR_T_IS_UTF32)
230 L
" ! \x10401" L
"abc 123",
231 #elif defined(WCHAR_T_IS_UTF16)
232 L
" ! \xd801\xdc01" L
"abc 123",
234 #error wchar_t should be either UTF-16 or UTF-32
239 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
240 EXPECT_EQ(cases
[i
].direction
,
241 GetStringDirection(WideToUTF16(cases
[i
].text
)));
244 TEST_F(RTLTest
, WrapPathWithLTRFormatting
) {
245 const wchar_t* cases
[] = {
246 // Test common path, such as "c:\foo\bar".
248 // Test path with file name, such as "c:\foo\bar\test.jpg".
249 L
"c:/foo/bar/test.jpg",
250 // Test path ending with punctuation, such as "c:\(foo)\bar.".
252 // Test path ending with separator, such as "c:\foo\bar\".
254 // Test path with RTL character.
256 // Test path with 2 level RTL directory names.
258 // Test path with mixed RTL/LTR directory names and ending with punctuation.
259 L
"c:/\x05d0/\x0622/(foo)/b.a.r.",
260 // Test path without driver name, such as "/foo/bar/test/jpg".
261 L
"/foo/bar/test.jpg",
262 // Test path start with current directory, such as "./foo".
264 // Test path start with parent directory, such as "../foo/bar.jpg".
266 // Test absolute path, such as "//foo/bar.jpg".
268 // Test path with mixed RTL/LTR directory names.
269 L
"c:/foo/\x05d0/\x0622/\x05d1.jpg",
274 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
277 std::wstring
win_path(cases
[i
]);
278 std::replace(win_path
.begin(), win_path
.end(), '/', '\\');
279 path
= FilePath(win_path
);
280 std::wstring wrapped_expected
=
281 std::wstring(L
"\x202a") + win_path
+ L
"\x202c";
283 path
= FilePath(base::SysWideToNativeMB(cases
[i
]));
284 std::wstring wrapped_expected
=
285 std::wstring(L
"\x202a") + cases
[i
] + L
"\x202c";
287 string16 localized_file_path_string
;
288 WrapPathWithLTRFormatting(path
, &localized_file_path_string
);
290 std::wstring wrapped_actual
= UTF16ToWide(localized_file_path_string
);
291 EXPECT_EQ(wrapped_expected
, wrapped_actual
);
295 TEST_F(RTLTest
, WrapString
) {
296 const wchar_t* cases
[] = {
303 L
"\x5d0" L
"a" L
"\x5d1",
306 const bool was_rtl
= IsRTL();
308 for (size_t i
= 0; i
< 2; ++i
) {
309 // Toggle the application default text direction (to try each direction).
313 WrapStringWithLTRFormatting(&empty
);
314 EXPECT_TRUE(empty
.empty());
315 WrapStringWithRTLFormatting(&empty
);
316 EXPECT_TRUE(empty
.empty());
318 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
319 string16 input
= WideToUTF16(cases
[i
]);
320 string16 ltr_wrap
= input
;
321 WrapStringWithLTRFormatting(<r_wrap
);
322 EXPECT_EQ(ltr_wrap
[0], kLeftToRightEmbeddingMark
);
323 EXPECT_EQ(ltr_wrap
.substr(1, ltr_wrap
.length() - 2), input
);
324 EXPECT_EQ(ltr_wrap
[ltr_wrap
.length() -1], kPopDirectionalFormatting
);
326 string16 rtl_wrap
= input
;
327 WrapStringWithRTLFormatting(&rtl_wrap
);
328 EXPECT_EQ(rtl_wrap
[0], kRightToLeftEmbeddingMark
);
329 EXPECT_EQ(rtl_wrap
.substr(1, rtl_wrap
.length() - 2), input
);
330 EXPECT_EQ(rtl_wrap
[rtl_wrap
.length() -1], kPopDirectionalFormatting
);
334 EXPECT_EQ(was_rtl
, IsRTL());
337 TEST_F(RTLTest
, GetDisplayStringInLTRDirectionality
) {
343 { L
"test", false, true },
344 { L
"test.html", false, true },
345 { L
"\x05d0\x05d1\x05d2", true, true },
346 { L
"\x05d0\x05d1\x05d2.txt", true, true },
347 { L
"\x05d0" L
"abc", true, true },
348 { L
"\x05d0" L
"abc.txt", true, true },
349 { L
"abc\x05d0\x05d1", false, true },
350 { L
"abc\x05d0\x05d1.jpg", false, true },
353 const bool was_rtl
= IsRTL();
355 for (size_t i
= 0; i
< 2; ++i
) {
356 // Toggle the application default text direction (to try each direction).
358 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
359 string16 input
= WideToUTF16(cases
[i
].path
);
360 string16 output
= GetDisplayStringInLTRDirectionality(input
);
361 // Test the expected wrapping behavior for the current UI directionality.
362 if (IsRTL() ? cases
[i
].wrap_rtl
: cases
[i
].wrap_ltr
)
363 EXPECT_NE(output
, input
);
365 EXPECT_EQ(output
, input
);
369 EXPECT_EQ(was_rtl
, IsRTL());
372 TEST_F(RTLTest
, GetTextDirection
) {
373 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar"));
374 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar_EG"));
375 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he"));
376 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he_IL"));
377 // iw is an obsolete code for Hebrew.
378 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("iw"));
379 // Although we're not yet localized to Farsi and Urdu, we
380 // do have the text layout direction information for them.
381 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("fa"));
382 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ur"));
384 // Enable these when we include the minimal locale data for Azerbaijani
385 // written in Arabic and Dhivehi. At the moment, our copy of
386 // ICU data does not have entries for them.
387 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("az_Arab"));
388 // Dhivehi that uses Thaana script.
389 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("dv"));
391 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("en"));
392 // Chinese in China with '-'.
393 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("zh-CN"));
394 // Filipino : 3-letter code
395 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("fil"));
397 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ru"));
398 // Japanese that uses multiple scripts
399 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ja"));
402 TEST_F(RTLTest
, UnadjustStringForLocaleDirection
) {
403 // These test strings are borrowed from WrapPathWithLTRFormatting
404 const wchar_t* cases
[] = {
410 L
"\x202a \x05d0 foo bar",
411 L
"\x202d \x05d0 foo bar",
412 L
"\x202b foo \x05d0 bar",
413 L
"\x202e foo \x05d0 bar",
414 L
"\x0622 foo \x05d0 bar",
417 const bool was_rtl
= IsRTL();
419 for (size_t i
= 0; i
< 2; ++i
) {
420 // Toggle the application default text direction (to try each direction).
423 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
424 string16 test_case
= WideToUTF16(cases
[i
]);
425 string16 adjusted_string
= test_case
;
427 if (!AdjustStringForLocaleDirection(&adjusted_string
))
430 EXPECT_NE(test_case
, adjusted_string
);
431 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string
));
432 EXPECT_EQ(test_case
, adjusted_string
) << " for test case [" << test_case
433 << "] with IsRTL() == " << IsRTL();
437 EXPECT_EQ(was_rtl
, IsRTL());