1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/case_conversion.h"
6 #include "base/i18n/rtl.h"
7 #include "base/strings/utf_string_conversions.h"
8 #include "base/test/icu_test_util.h"
9 #include "testing/gtest/include/gtest/gtest.h"
10 #include "third_party/icu/source/i18n/unicode/usearch.h"
17 const wchar_t kNonASCIIMixed
[] =
18 L
"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25"
19 L
"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F"
21 const wchar_t kNonASCIILower
[] =
22 L
"\xE4\xF6\xE4\xF6\x20\xEF\xEF"
23 L
"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07"
24 L
"\x1F07\x20\x1E01\x1E01";
25 const wchar_t kNonASCIIUpper
[] =
26 L
"\xC4\xD6\xC4\xD6\x20\xCF\xCF"
27 L
"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F"
28 L
"\x1F0F\x20\x1E00\x1E00";
32 // Test upper and lower case string conversion.
33 TEST(CaseConversionTest
, UpperLower
) {
34 const string16
mixed(ASCIIToUTF16("Text with UPPer & lowER casE."));
35 const string16
expected_lower(ASCIIToUTF16("text with upper & lower case."));
36 const string16
expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE."));
38 string16 result
= ToLower(mixed
);
39 EXPECT_EQ(expected_lower
, result
);
41 result
= ToUpper(mixed
);
42 EXPECT_EQ(expected_upper
, result
);
45 TEST(CaseConversionTest
, NonASCII
) {
46 const string16
mixed(WideToUTF16(kNonASCIIMixed
));
47 const string16
expected_lower(WideToUTF16(kNonASCIILower
));
48 const string16
expected_upper(WideToUTF16(kNonASCIIUpper
));
50 string16 result
= ToLower(mixed
);
51 EXPECT_EQ(expected_lower
, result
);
53 result
= ToUpper(mixed
);
54 EXPECT_EQ(expected_upper
, result
);
57 TEST(CaseConversionTest
, TurkishLocaleConversion
) {
58 const string16
mixed(WideToUTF16(L
"\x49\x131"));
59 const string16
expected_lower(WideToUTF16(L
"\x69\x131"));
60 const string16
expected_upper(WideToUTF16(L
"\x49\x49"));
62 test::ScopedRestoreICUDefaultLocale restore_locale
;
63 i18n::SetICUDefaultLocale("en_US");
65 string16 result
= ToLower(mixed
);
66 EXPECT_EQ(expected_lower
, result
);
68 result
= ToUpper(mixed
);
69 EXPECT_EQ(expected_upper
, result
);
71 i18n::SetICUDefaultLocale("tr");
73 const string16
expected_lower_turkish(WideToUTF16(L
"\x131\x131"));
74 const string16
expected_upper_turkish(WideToUTF16(L
"\x49\x49"));
76 result
= ToLower(mixed
);
77 EXPECT_EQ(expected_lower_turkish
, result
);
79 result
= ToUpper(mixed
);
80 EXPECT_EQ(expected_upper_turkish
, result
);
83 TEST(CaseConversionTest
, FoldCase
) {
84 // Simple ASCII, should lower-case.
85 EXPECT_EQ(ASCIIToUTF16("hello, world"),
86 FoldCase(ASCIIToUTF16("Hello, World")));
88 // Non-ASCII cases from above. They should all fold to the same result.
89 EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed
)),
90 FoldCase(WideToUTF16(kNonASCIILower
)));
91 EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed
)),
92 FoldCase(WideToUTF16(kNonASCIIUpper
)));
94 // Turkish cases from above. This is the lower-case expected result from the
95 // US locale. It should be the same even when the current locale is Turkish.
96 const string16
turkish(WideToUTF16(L
"\x49\x131"));
97 const string16
turkish_expected(WideToUTF16(L
"\x69\x131"));
99 test::ScopedRestoreICUDefaultLocale restore_locale
;
100 i18n::SetICUDefaultLocale("en_US");
101 EXPECT_EQ(turkish_expected
, FoldCase(turkish
));
103 i18n::SetICUDefaultLocale("tr");
104 EXPECT_EQ(turkish_expected
, FoldCase(turkish
));
106 // Test a case that gets bigger when processed.
107 // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case
108 // "i" followed by U+307 COMBINING DOT ABOVE.
109 EXPECT_EQ(WideToUTF16(L
"i\u0307j"), FoldCase(WideToUTF16(L
"\u0130j")));
111 // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss".
112 EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L
"\u00DF\u1E9E")));