1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
17 #if defined(TOOLKIT_GTK)
26 // A test utility function to set the application default text direction.
27 void SetRTL(bool rtl
) {
28 // Override the current locale/direction.
29 SetICUDefaultLocale(rtl
? "he" : "en");
30 #if defined(TOOLKIT_GTK)
31 // Do the same for GTK, which does not rely on the ICU default locale.
32 gtk_widget_set_default_direction(rtl
? GTK_TEXT_DIR_RTL
: GTK_TEXT_DIR_LTR
);
34 EXPECT_EQ(rtl
, IsRTL());
39 class RTLTest
: public PlatformTest
{
42 TEST_F(RTLTest
, GetFirstStrongCharacterDirection
) {
45 TextDirection direction
;
47 // Test pure LTR string.
48 { L
"foo bar", LEFT_TO_RIGHT
},
49 // Test pure RTL string.
50 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
51 // Test bidi string in which the first character with strong directionality
52 // is a character with type L.
53 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
54 // Test bidi string in which the first character with strong directionality
55 // is a character with type R.
56 { L
"\x05d0 foo bar", RIGHT_TO_LEFT
},
57 // Test bidi string which starts with a character with weak directionality
58 // and in which the first character with strong directionality is a
59 // character with type L.
60 { L
"!foo \x05d0 bar", LEFT_TO_RIGHT
},
61 // Test bidi string which starts with a character with weak directionality
62 // and in which the first character with strong directionality is a
63 // character with type R.
64 { L
",\x05d0 foo bar", RIGHT_TO_LEFT
},
65 // Test bidi string in which the first character with strong directionality
66 // is a character with type LRE.
67 { L
"\x202a \x05d0 foo bar", LEFT_TO_RIGHT
},
68 // Test bidi string in which the first character with strong directionality
69 // is a character with type LRO.
70 { L
"\x202d \x05d0 foo bar", LEFT_TO_RIGHT
},
71 // Test bidi string in which the first character with strong directionality
72 // is a character with type RLE.
73 { L
"\x202b foo \x05d0 bar", RIGHT_TO_LEFT
},
74 // Test bidi string in which the first character with strong directionality
75 // is a character with type RLO.
76 { L
"\x202e foo \x05d0 bar", RIGHT_TO_LEFT
},
77 // Test bidi string in which the first character with strong directionality
78 // is a character with type AL.
79 { L
"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT
},
80 // Test a string without strong directionality characters.
81 { L
",!.{}", LEFT_TO_RIGHT
},
83 { L
"", LEFT_TO_RIGHT
},
84 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
85 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
88 #if defined(WCHAR_T_IS_UTF32)
89 L
" ! \x10910" L
"abc 123",
90 #elif defined(WCHAR_T_IS_UTF16)
91 L
" ! \xd802\xdd10" L
"abc 123",
93 #error wchar_t should be either UTF-16 or UTF-32
97 #if defined(WCHAR_T_IS_UTF32)
98 L
" ! \x10401" L
"abc 123",
99 #elif defined(WCHAR_T_IS_UTF16)
100 L
" ! \xd801\xdc01" L
"abc 123",
102 #error wchar_t should be either UTF-16 or UTF-32
107 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
108 EXPECT_EQ(cases
[i
].direction
,
109 GetFirstStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
113 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
114 // GetLastStrongCharacterDirection because they should be followed by PDF
116 TEST_F(RTLTest
, GetLastStrongCharacterDirection
) {
119 TextDirection direction
;
121 // Test pure LTR string.
122 { L
"foo bar", LEFT_TO_RIGHT
},
123 // Test pure RTL string.
124 { L
"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT
},
125 // Test bidi string in which the last character with strong directionality
126 // is a character with type L.
127 { L
"foo \x05d0 bar", LEFT_TO_RIGHT
},
128 // Test bidi string in which the last character with strong directionality
129 // is a character with type R.
130 { L
"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT
},
131 // Test bidi string which ends with a character with weak directionality
132 // and in which the last character with strong directionality is a
133 // character with type L.
134 { L
"!foo \x05d0 bar!", LEFT_TO_RIGHT
},
135 // Test bidi string which ends with a character with weak directionality
136 // and in which the last character with strong directionality is a
137 // character with type R.
138 { L
",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT
},
139 // Test bidi string in which the last character with strong directionality
140 // is a character with type AL.
141 { L
"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT
},
142 // Test a string without strong directionality characters.
143 { L
",!.{}", LEFT_TO_RIGHT
},
144 // Test empty string.
145 { L
"", LEFT_TO_RIGHT
},
146 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
147 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
150 #if defined(WCHAR_T_IS_UTF32)
151 L
"abc 123" L
" ! \x10910 !",
152 #elif defined(WCHAR_T_IS_UTF16)
153 L
"abc 123" L
" ! \xd802\xdd10 !",
155 #error wchar_t should be either UTF-16 or UTF-32
159 #if defined(WCHAR_T_IS_UTF32)
160 L
"abc 123" L
" ! \x10401 !",
161 #elif defined(WCHAR_T_IS_UTF16)
162 L
"abc 123" L
" ! \xd801\xdc01 !",
164 #error wchar_t should be either UTF-16 or UTF-32
169 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
170 EXPECT_EQ(cases
[i
].direction
,
171 GetLastStrongCharacterDirection(WideToUTF16(cases
[i
].text
)));
174 TEST_F(RTLTest
, GetStringDirection
) {
177 TextDirection direction
;
179 // Test pure LTR string.
180 { L
"foobar", LEFT_TO_RIGHT
},
181 { L
".foobar", LEFT_TO_RIGHT
},
182 { L
"foo, bar", LEFT_TO_RIGHT
},
183 // Test pure LTR with strong directionality characters of type LRE.
184 { L
"\x202a\x202a", LEFT_TO_RIGHT
},
185 { L
".\x202a\x202a", LEFT_TO_RIGHT
},
186 { L
"\x202a, \x202a", LEFT_TO_RIGHT
},
187 // Test pure LTR with strong directionality characters of type LRO.
188 { L
"\x202d\x202d", LEFT_TO_RIGHT
},
189 { L
".\x202d\x202d", LEFT_TO_RIGHT
},
190 { L
"\x202d, \x202d", LEFT_TO_RIGHT
},
191 // Test pure LTR with various types of strong directionality characters.
192 { L
"foo \x202a\x202d", LEFT_TO_RIGHT
},
193 { L
".\x202d foo \x202a", LEFT_TO_RIGHT
},
194 { L
"\x202a, \x202d foo", LEFT_TO_RIGHT
},
195 // Test pure RTL with strong directionality characters of type R.
196 { L
"\x05d0\x05d0", RIGHT_TO_LEFT
},
197 { L
".\x05d0\x05d0", RIGHT_TO_LEFT
},
198 { L
"\x05d0, \x05d0", RIGHT_TO_LEFT
},
199 // Test pure RTL with strong directionality characters of type RLE.
200 { L
"\x202b\x202b", RIGHT_TO_LEFT
},
201 { L
".\x202b\x202b", RIGHT_TO_LEFT
},
202 { L
"\x202b, \x202b", RIGHT_TO_LEFT
},
203 // Test pure RTL with strong directionality characters of type RLO.
204 { L
"\x202e\x202e", RIGHT_TO_LEFT
},
205 { L
".\x202e\x202e", RIGHT_TO_LEFT
},
206 { L
"\x202e, \x202e", RIGHT_TO_LEFT
},
207 // Test pure RTL with strong directionality characters of type AL.
208 { L
"\x0622\x0622", RIGHT_TO_LEFT
},
209 { L
".\x0622\x0622", RIGHT_TO_LEFT
},
210 { L
"\x0622, \x0622", RIGHT_TO_LEFT
},
211 // Test pure RTL with various types of strong directionality characters.
212 { L
"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT
},
213 { L
".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT
},
214 { L
"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT
},
215 // Test bidi strings.
216 { L
"foo \x05d0 bar", UNKNOWN_DIRECTION
},
217 { L
"\x202b foo bar", UNKNOWN_DIRECTION
},
218 { L
"!foo \x0622 bar", UNKNOWN_DIRECTION
},
219 { L
"\x202a\x202b", UNKNOWN_DIRECTION
},
220 { L
"\x202e\x202d", UNKNOWN_DIRECTION
},
221 { L
"\x0622\x202a", UNKNOWN_DIRECTION
},
222 { L
"\x202d\x05d0", UNKNOWN_DIRECTION
},
223 // Test a string without strong directionality characters.
224 { L
",!.{}", LEFT_TO_RIGHT
},
225 // Test empty string.
226 { L
"", LEFT_TO_RIGHT
},
228 #if defined(WCHAR_T_IS_UTF32)
229 L
" ! \x10910" L
"abc 123",
230 #elif defined(WCHAR_T_IS_UTF16)
231 L
" ! \xd802\xdd10" L
"abc 123",
233 #error wchar_t should be either UTF-16 or UTF-32
237 #if defined(WCHAR_T_IS_UTF32)
238 L
" ! \x10401" L
"abc 123",
239 #elif defined(WCHAR_T_IS_UTF16)
240 L
" ! \xd801\xdc01" L
"abc 123",
242 #error wchar_t should be either UTF-16 or UTF-32
247 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
248 EXPECT_EQ(cases
[i
].direction
,
249 GetStringDirection(WideToUTF16(cases
[i
].text
)));
252 TEST_F(RTLTest
, WrapPathWithLTRFormatting
) {
253 const wchar_t* cases
[] = {
254 // Test common path, such as "c:\foo\bar".
256 // Test path with file name, such as "c:\foo\bar\test.jpg".
257 L
"c:/foo/bar/test.jpg",
258 // Test path ending with punctuation, such as "c:\(foo)\bar.".
260 // Test path ending with separator, such as "c:\foo\bar\".
262 // Test path with RTL character.
264 // Test path with 2 level RTL directory names.
266 // Test path with mixed RTL/LTR directory names and ending with punctuation.
267 L
"c:/\x05d0/\x0622/(foo)/b.a.r.",
268 // Test path without driver name, such as "/foo/bar/test/jpg".
269 L
"/foo/bar/test.jpg",
270 // Test path start with current directory, such as "./foo".
272 // Test path start with parent directory, such as "../foo/bar.jpg".
274 // Test absolute path, such as "//foo/bar.jpg".
276 // Test path with mixed RTL/LTR directory names.
277 L
"c:/foo/\x05d0/\x0622/\x05d1.jpg",
282 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
285 std::wstring
win_path(cases
[i
]);
286 std::replace(win_path
.begin(), win_path
.end(), '/', '\\');
287 path
= FilePath(win_path
);
288 std::wstring wrapped_expected
=
289 std::wstring(L
"\x202a") + win_path
+ L
"\x202c";
291 path
= FilePath(base::SysWideToNativeMB(cases
[i
]));
292 std::wstring wrapped_expected
=
293 std::wstring(L
"\x202a") + cases
[i
] + L
"\x202c";
295 string16 localized_file_path_string
;
296 WrapPathWithLTRFormatting(path
, &localized_file_path_string
);
298 std::wstring wrapped_actual
= UTF16ToWide(localized_file_path_string
);
299 EXPECT_EQ(wrapped_expected
, wrapped_actual
);
303 TEST_F(RTLTest
, WrapString
) {
304 const wchar_t* cases
[] = {
311 L
"\x5d0" L
"a" L
"\x5d1",
314 const bool was_rtl
= IsRTL();
316 for (size_t i
= 0; i
< 2; ++i
) {
317 // Toggle the application default text direction (to try each direction).
321 WrapStringWithLTRFormatting(&empty
);
322 EXPECT_TRUE(empty
.empty());
323 WrapStringWithRTLFormatting(&empty
);
324 EXPECT_TRUE(empty
.empty());
326 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
327 string16 input
= WideToUTF16(cases
[i
]);
328 string16 ltr_wrap
= input
;
329 WrapStringWithLTRFormatting(<r_wrap
);
330 EXPECT_EQ(ltr_wrap
[0], kLeftToRightEmbeddingMark
);
331 EXPECT_EQ(ltr_wrap
.substr(1, ltr_wrap
.length() - 2), input
);
332 EXPECT_EQ(ltr_wrap
[ltr_wrap
.length() -1], kPopDirectionalFormatting
);
334 string16 rtl_wrap
= input
;
335 WrapStringWithRTLFormatting(&rtl_wrap
);
336 EXPECT_EQ(rtl_wrap
[0], kRightToLeftEmbeddingMark
);
337 EXPECT_EQ(rtl_wrap
.substr(1, rtl_wrap
.length() - 2), input
);
338 EXPECT_EQ(rtl_wrap
[rtl_wrap
.length() -1], kPopDirectionalFormatting
);
342 EXPECT_EQ(was_rtl
, IsRTL());
345 TEST_F(RTLTest
, GetDisplayStringInLTRDirectionality
) {
351 { L
"test", false, true },
352 { L
"test.html", false, true },
353 { L
"\x05d0\x05d1\x05d2", true, true },
354 { L
"\x05d0\x05d1\x05d2.txt", true, true },
355 { L
"\x05d0" L
"abc", true, true },
356 { L
"\x05d0" L
"abc.txt", true, true },
357 { L
"abc\x05d0\x05d1", false, true },
358 { L
"abc\x05d0\x05d1.jpg", false, true },
361 const bool was_rtl
= IsRTL();
363 for (size_t i
= 0; i
< 2; ++i
) {
364 // Toggle the application default text direction (to try each direction).
366 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
367 string16 input
= WideToUTF16(cases
[i
].path
);
368 string16 output
= GetDisplayStringInLTRDirectionality(input
);
369 // Test the expected wrapping behavior for the current UI directionality.
370 if (IsRTL() ? cases
[i
].wrap_rtl
: cases
[i
].wrap_ltr
)
371 EXPECT_NE(output
, input
);
373 EXPECT_EQ(output
, input
);
377 EXPECT_EQ(was_rtl
, IsRTL());
380 TEST_F(RTLTest
, GetTextDirection
) {
381 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar"));
382 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ar_EG"));
383 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he"));
384 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("he_IL"));
385 // iw is an obsolete code for Hebrew.
386 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("iw"));
387 // Although we're not yet localized to Farsi and Urdu, we
388 // do have the text layout direction information for them.
389 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("fa"));
390 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("ur"));
392 // Enable these when we include the minimal locale data for Azerbaijani
393 // written in Arabic and Dhivehi. At the moment, our copy of
394 // ICU data does not have entries for them.
395 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("az_Arab"));
396 // Dhivehi that uses Thaana script.
397 EXPECT_EQ(RIGHT_TO_LEFT
, GetTextDirectionForLocale("dv"));
399 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("en"));
400 // Chinese in China with '-'.
401 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("zh-CN"));
402 // Filipino : 3-letter code
403 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("fil"));
405 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ru"));
406 // Japanese that uses multiple scripts
407 EXPECT_EQ(LEFT_TO_RIGHT
, GetTextDirectionForLocale("ja"));
410 TEST_F(RTLTest
, UnadjustStringForLocaleDirection
) {
411 // These test strings are borrowed from WrapPathWithLTRFormatting
412 const wchar_t* cases
[] = {
418 L
"\x202a \x05d0 foo bar",
419 L
"\x202d \x05d0 foo bar",
420 L
"\x202b foo \x05d0 bar",
421 L
"\x202e foo \x05d0 bar",
422 L
"\x0622 foo \x05d0 bar",
425 const bool was_rtl
= IsRTL();
427 for (size_t i
= 0; i
< 2; ++i
) {
428 // Toggle the application default text direction (to try each direction).
431 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
432 string16 test_case
= WideToUTF16(cases
[i
]);
433 string16 adjusted_string
= test_case
;
435 if (!AdjustStringForLocaleDirection(&adjusted_string
))
438 EXPECT_NE(test_case
, adjusted_string
);
439 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string
));
440 EXPECT_EQ(test_case
, adjusted_string
) << " for test case [" << test_case
441 << "] with IsRTL() == " << IsRTL();
445 EXPECT_EQ(was_rtl
, IsRTL());