Suppress tabs permission warning if there is already a browsingHistory warning.
[chromium-blink-merge.git] / base / i18n / rtl_unittest.cc
blob8faaccf33b5655f9e041b15b90a8c64af8f56e2d
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
7 #include <algorithm>
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
17 #if defined(TOOLKIT_GTK)
18 #include <gtk/gtk.h>
19 #endif
21 namespace base {
22 namespace i18n {
24 namespace {
26 // A test utility function to set the application default text direction.
27 void SetRTL(bool rtl) {
28 // Override the current locale/direction.
29 SetICUDefaultLocale(rtl ? "he" : "en");
30 #if defined(TOOLKIT_GTK)
31 // Do the same for GTK, which does not rely on the ICU default locale.
32 gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
33 #endif
34 EXPECT_EQ(rtl, IsRTL());
37 } // namespace
39 class RTLTest : public PlatformTest {
42 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
43 struct {
44 const wchar_t* text;
45 TextDirection direction;
46 } cases[] = {
47 // Test pure LTR string.
48 { L"foo bar", LEFT_TO_RIGHT },
49 // Test pure RTL string.
50 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
51 // Test bidi string in which the first character with strong directionality
52 // is a character with type L.
53 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
54 // Test bidi string in which the first character with strong directionality
55 // is a character with type R.
56 { L"\x05d0 foo bar", RIGHT_TO_LEFT },
57 // Test bidi string which starts with a character with weak directionality
58 // and in which the first character with strong directionality is a
59 // character with type L.
60 { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
61 // Test bidi string which starts with a character with weak directionality
62 // and in which the first character with strong directionality is a
63 // character with type R.
64 { L",\x05d0 foo bar", RIGHT_TO_LEFT },
65 // Test bidi string in which the first character with strong directionality
66 // is a character with type LRE.
67 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT },
68 // Test bidi string in which the first character with strong directionality
69 // is a character with type LRO.
70 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT },
71 // Test bidi string in which the first character with strong directionality
72 // is a character with type RLE.
73 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
74 // Test bidi string in which the first character with strong directionality
75 // is a character with type RLO.
76 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
77 // Test bidi string in which the first character with strong directionality
78 // is a character with type AL.
79 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
80 // Test a string without strong directionality characters.
81 { L",!.{}", LEFT_TO_RIGHT },
82 // Test empty string.
83 { L"", LEFT_TO_RIGHT },
84 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
85 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
86 // information).
88 #if defined(WCHAR_T_IS_UTF32)
89 L" ! \x10910" L"abc 123",
90 #elif defined(WCHAR_T_IS_UTF16)
91 L" ! \xd802\xdd10" L"abc 123",
92 #else
93 #error wchar_t should be either UTF-16 or UTF-32
94 #endif
95 RIGHT_TO_LEFT },
97 #if defined(WCHAR_T_IS_UTF32)
98 L" ! \x10401" L"abc 123",
99 #elif defined(WCHAR_T_IS_UTF16)
100 L" ! \xd801\xdc01" L"abc 123",
101 #else
102 #error wchar_t should be either UTF-16 or UTF-32
103 #endif
104 LEFT_TO_RIGHT },
107 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
108 EXPECT_EQ(cases[i].direction,
109 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
113 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
114 // GetLastStrongCharacterDirection because they should be followed by PDF
115 // character.
116 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
117 struct {
118 const wchar_t* text;
119 TextDirection direction;
120 } cases[] = {
121 // Test pure LTR string.
122 { L"foo bar", LEFT_TO_RIGHT },
123 // Test pure RTL string.
124 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
125 // Test bidi string in which the last character with strong directionality
126 // is a character with type L.
127 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
128 // Test bidi string in which the last character with strong directionality
129 // is a character with type R.
130 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
131 // Test bidi string which ends with a character with weak directionality
132 // and in which the last character with strong directionality is a
133 // character with type L.
134 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
135 // Test bidi string which ends with a character with weak directionality
136 // and in which the last character with strong directionality is a
137 // character with type R.
138 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
139 // Test bidi string in which the last character with strong directionality
140 // is a character with type AL.
141 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
142 // Test a string without strong directionality characters.
143 { L",!.{}", LEFT_TO_RIGHT },
144 // Test empty string.
145 { L"", LEFT_TO_RIGHT },
146 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
147 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
148 // information).
150 #if defined(WCHAR_T_IS_UTF32)
151 L"abc 123" L" ! \x10910 !",
152 #elif defined(WCHAR_T_IS_UTF16)
153 L"abc 123" L" ! \xd802\xdd10 !",
154 #else
155 #error wchar_t should be either UTF-16 or UTF-32
156 #endif
157 RIGHT_TO_LEFT },
159 #if defined(WCHAR_T_IS_UTF32)
160 L"abc 123" L" ! \x10401 !",
161 #elif defined(WCHAR_T_IS_UTF16)
162 L"abc 123" L" ! \xd801\xdc01 !",
163 #else
164 #error wchar_t should be either UTF-16 or UTF-32
165 #endif
166 LEFT_TO_RIGHT },
169 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
170 EXPECT_EQ(cases[i].direction,
171 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
174 TEST_F(RTLTest, GetStringDirection) {
175 struct {
176 const wchar_t* text;
177 TextDirection direction;
178 } cases[] = {
179 // Test pure LTR string.
180 { L"foobar", LEFT_TO_RIGHT },
181 { L".foobar", LEFT_TO_RIGHT },
182 { L"foo, bar", LEFT_TO_RIGHT },
183 // Test pure LTR with strong directionality characters of type LRE.
184 { L"\x202a\x202a", LEFT_TO_RIGHT },
185 { L".\x202a\x202a", LEFT_TO_RIGHT },
186 { L"\x202a, \x202a", LEFT_TO_RIGHT },
187 // Test pure LTR with strong directionality characters of type LRO.
188 { L"\x202d\x202d", LEFT_TO_RIGHT },
189 { L".\x202d\x202d", LEFT_TO_RIGHT },
190 { L"\x202d, \x202d", LEFT_TO_RIGHT },
191 // Test pure LTR with various types of strong directionality characters.
192 { L"foo \x202a\x202d", LEFT_TO_RIGHT },
193 { L".\x202d foo \x202a", LEFT_TO_RIGHT },
194 { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
195 // Test pure RTL with strong directionality characters of type R.
196 { L"\x05d0\x05d0", RIGHT_TO_LEFT },
197 { L".\x05d0\x05d0", RIGHT_TO_LEFT },
198 { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
199 // Test pure RTL with strong directionality characters of type RLE.
200 { L"\x202b\x202b", RIGHT_TO_LEFT },
201 { L".\x202b\x202b", RIGHT_TO_LEFT },
202 { L"\x202b, \x202b", RIGHT_TO_LEFT },
203 // Test pure RTL with strong directionality characters of type RLO.
204 { L"\x202e\x202e", RIGHT_TO_LEFT },
205 { L".\x202e\x202e", RIGHT_TO_LEFT },
206 { L"\x202e, \x202e", RIGHT_TO_LEFT },
207 // Test pure RTL with strong directionality characters of type AL.
208 { L"\x0622\x0622", RIGHT_TO_LEFT },
209 { L".\x0622\x0622", RIGHT_TO_LEFT },
210 { L"\x0622, \x0622", RIGHT_TO_LEFT },
211 // Test pure RTL with various types of strong directionality characters.
212 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
213 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
214 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
215 // Test bidi strings.
216 { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
217 { L"\x202b foo bar", UNKNOWN_DIRECTION },
218 { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
219 { L"\x202a\x202b", UNKNOWN_DIRECTION },
220 { L"\x202e\x202d", UNKNOWN_DIRECTION },
221 { L"\x0622\x202a", UNKNOWN_DIRECTION },
222 { L"\x202d\x05d0", UNKNOWN_DIRECTION },
223 // Test a string without strong directionality characters.
224 { L",!.{}", LEFT_TO_RIGHT },
225 // Test empty string.
226 { L"", LEFT_TO_RIGHT },
228 #if defined(WCHAR_T_IS_UTF32)
229 L" ! \x10910" L"abc 123",
230 #elif defined(WCHAR_T_IS_UTF16)
231 L" ! \xd802\xdd10" L"abc 123",
232 #else
233 #error wchar_t should be either UTF-16 or UTF-32
234 #endif
235 UNKNOWN_DIRECTION },
237 #if defined(WCHAR_T_IS_UTF32)
238 L" ! \x10401" L"abc 123",
239 #elif defined(WCHAR_T_IS_UTF16)
240 L" ! \xd801\xdc01" L"abc 123",
241 #else
242 #error wchar_t should be either UTF-16 or UTF-32
243 #endif
244 LEFT_TO_RIGHT },
247 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
248 EXPECT_EQ(cases[i].direction,
249 GetStringDirection(WideToUTF16(cases[i].text)));
252 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
253 const wchar_t* cases[] = {
254 // Test common path, such as "c:\foo\bar".
255 L"c:/foo/bar",
256 // Test path with file name, such as "c:\foo\bar\test.jpg".
257 L"c:/foo/bar/test.jpg",
258 // Test path ending with punctuation, such as "c:\(foo)\bar.".
259 L"c:/(foo)/bar.",
260 // Test path ending with separator, such as "c:\foo\bar\".
261 L"c:/foo/bar/",
262 // Test path with RTL character.
263 L"c:/\x05d0",
264 // Test path with 2 level RTL directory names.
265 L"c:/\x05d0/\x0622",
266 // Test path with mixed RTL/LTR directory names and ending with punctuation.
267 L"c:/\x05d0/\x0622/(foo)/b.a.r.",
268 // Test path without driver name, such as "/foo/bar/test/jpg".
269 L"/foo/bar/test.jpg",
270 // Test path start with current directory, such as "./foo".
271 L"./foo",
272 // Test path start with parent directory, such as "../foo/bar.jpg".
273 L"../foo/bar.jpg",
274 // Test absolute path, such as "//foo/bar.jpg".
275 L"//foo/bar.jpg",
276 // Test path with mixed RTL/LTR directory names.
277 L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
278 // Test empty path.
282 for (size_t i = 0; i < arraysize(cases); ++i) {
283 FilePath path;
284 #if defined(OS_WIN)
285 std::wstring win_path(cases[i]);
286 std::replace(win_path.begin(), win_path.end(), '/', '\\');
287 path = FilePath(win_path);
288 std::wstring wrapped_expected =
289 std::wstring(L"\x202a") + win_path + L"\x202c";
290 #else
291 path = FilePath(base::SysWideToNativeMB(cases[i]));
292 std::wstring wrapped_expected =
293 std::wstring(L"\x202a") + cases[i] + L"\x202c";
294 #endif
295 string16 localized_file_path_string;
296 WrapPathWithLTRFormatting(path, &localized_file_path_string);
298 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
299 EXPECT_EQ(wrapped_expected, wrapped_actual);
303 TEST_F(RTLTest, WrapString) {
304 const wchar_t* cases[] = {
305 L" . ",
306 L"abc",
307 L"a" L"\x5d0\x5d1",
308 L"a" L"\x5d1" L"b",
309 L"\x5d0\x5d1\x5d2",
310 L"\x5d0\x5d1" L"a",
311 L"\x5d0" L"a" L"\x5d1",
314 const bool was_rtl = IsRTL();
316 for (size_t i = 0; i < 2; ++i) {
317 // Toggle the application default text direction (to try each direction).
318 SetRTL(!IsRTL());
320 string16 empty;
321 WrapStringWithLTRFormatting(&empty);
322 EXPECT_TRUE(empty.empty());
323 WrapStringWithRTLFormatting(&empty);
324 EXPECT_TRUE(empty.empty());
326 for (size_t i = 0; i < arraysize(cases); ++i) {
327 string16 input = WideToUTF16(cases[i]);
328 string16 ltr_wrap = input;
329 WrapStringWithLTRFormatting(&ltr_wrap);
330 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
331 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
332 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
334 string16 rtl_wrap = input;
335 WrapStringWithRTLFormatting(&rtl_wrap);
336 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
337 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
338 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
342 EXPECT_EQ(was_rtl, IsRTL());
345 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
346 struct {
347 const wchar_t* path;
348 bool wrap_ltr;
349 bool wrap_rtl;
350 } cases[] = {
351 { L"test", false, true },
352 { L"test.html", false, true },
353 { L"\x05d0\x05d1\x05d2", true, true },
354 { L"\x05d0\x05d1\x05d2.txt", true, true },
355 { L"\x05d0" L"abc", true, true },
356 { L"\x05d0" L"abc.txt", true, true },
357 { L"abc\x05d0\x05d1", false, true },
358 { L"abc\x05d0\x05d1.jpg", false, true },
361 const bool was_rtl = IsRTL();
363 for (size_t i = 0; i < 2; ++i) {
364 // Toggle the application default text direction (to try each direction).
365 SetRTL(!IsRTL());
366 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
367 string16 input = WideToUTF16(cases[i].path);
368 string16 output = GetDisplayStringInLTRDirectionality(input);
369 // Test the expected wrapping behavior for the current UI directionality.
370 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
371 EXPECT_NE(output, input);
372 else
373 EXPECT_EQ(output, input);
377 EXPECT_EQ(was_rtl, IsRTL());
380 TEST_F(RTLTest, GetTextDirection) {
381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
382 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
383 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
384 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
385 // iw is an obsolete code for Hebrew.
386 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
387 // Although we're not yet localized to Farsi and Urdu, we
388 // do have the text layout direction information for them.
389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
390 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
391 #if 0
392 // Enable these when we include the minimal locale data for Azerbaijani
393 // written in Arabic and Dhivehi. At the moment, our copy of
394 // ICU data does not have entries for them.
395 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
396 // Dhivehi that uses Thaana script.
397 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
398 #endif
399 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
400 // Chinese in China with '-'.
401 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
402 // Filipino : 3-letter code
403 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
404 // Russian
405 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
406 // Japanese that uses multiple scripts
407 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
410 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
411 // These test strings are borrowed from WrapPathWithLTRFormatting
412 const wchar_t* cases[] = {
413 L"foo bar",
414 L"foo \x05d0 bar",
415 L"\x05d0 foo bar",
416 L"!foo \x05d0 bar",
417 L",\x05d0 foo bar",
418 L"\x202a \x05d0 foo bar",
419 L"\x202d \x05d0 foo bar",
420 L"\x202b foo \x05d0 bar",
421 L"\x202e foo \x05d0 bar",
422 L"\x0622 foo \x05d0 bar",
425 const bool was_rtl = IsRTL();
427 for (size_t i = 0; i < 2; ++i) {
428 // Toggle the application default text direction (to try each direction).
429 SetRTL(!IsRTL());
431 for (size_t i = 0; i < arraysize(cases); ++i) {
432 string16 test_case = WideToUTF16(cases[i]);
433 string16 adjusted_string = test_case;
435 if (!AdjustStringForLocaleDirection(&adjusted_string))
436 continue;
438 EXPECT_NE(test_case, adjusted_string);
439 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
440 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
441 << "] with IsRTL() == " << IsRTL();
445 EXPECT_EQ(was_rtl, IsRTL());
448 } // namespace i18n
449 } // namespace base