Use multiline attribute to check for IA2_STATE_MULTILINE.
[chromium-blink-merge.git] / base / i18n / string_search_unittest.cc
blob9419b267cb584cf7198034cba8746010294bc569
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <string>
7 #include "base/i18n/rtl.h"
8 #include "base/i18n/string_search.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "third_party/icu/source/i18n/unicode/usearch.h"
14 namespace base {
15 namespace i18n {
17 // Note on setting default locale for testing: The current default locale on
18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
19 // string search is case-sensitive, when normally it should be
20 // case-insensitive. In other locales (including en_US which English speakers
21 // in the U.S. use), this search would be case-insensitive as expected.
23 TEST(StringSearchTest, ASCII) {
24 std::string default_locale(uloc_getDefault());
25 bool locale_is_posix = (default_locale == "en_US_POSIX");
26 if (locale_is_posix)
27 SetICUDefaultLocale("en_US");
29 size_t index = 0;
30 size_t length = 0;
32 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
33 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
34 EXPECT_EQ(0U, index);
35 EXPECT_EQ(5U, length);
37 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
38 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
39 &index, &length));
41 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
42 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
43 EXPECT_EQ(4U, index);
44 EXPECT_EQ(6U, length);
46 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
47 ASCIIToUTF16("searching within empty string"), string16(),
48 &index, &length));
50 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
51 string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
52 EXPECT_EQ(0U, index);
53 EXPECT_EQ(0U, length);
55 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
56 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
57 &index, &length));
58 EXPECT_EQ(0U, index);
59 EXPECT_EQ(18U, length);
61 if (locale_is_posix)
62 SetICUDefaultLocale(default_locale.data());
65 TEST(StringSearchTest, UnicodeLocaleIndependent) {
66 // Base characters
67 const string16 e_base = WideToUTF16(L"e");
68 const string16 E_base = WideToUTF16(L"E");
69 const string16 a_base = WideToUTF16(L"a");
71 // Composed characters
72 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
73 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
74 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
75 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
76 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
78 // Decomposed characters
79 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
80 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
81 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
82 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
83 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
85 std::string default_locale(uloc_getDefault());
86 bool locale_is_posix = (default_locale == "en_US_POSIX");
87 if (locale_is_posix)
88 SetICUDefaultLocale("en_US");
90 size_t index = 0;
91 size_t length = 0;
93 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
94 e_base, e_with_acute_accent, &index, &length));
95 EXPECT_EQ(0U, index);
96 EXPECT_EQ(e_with_acute_accent.size(), length);
98 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
99 e_with_acute_accent, e_base, &index, &length));
100 EXPECT_EQ(0U, index);
101 EXPECT_EQ(e_base.size(), length);
103 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
104 e_base, e_with_acute_combining_mark, &index, &length));
105 EXPECT_EQ(0U, index);
106 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
108 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
109 e_with_acute_combining_mark, e_base, &index, &length));
110 EXPECT_EQ(0U, index);
111 EXPECT_EQ(e_base.size(), length);
113 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
114 e_with_acute_combining_mark, e_with_acute_accent,
115 &index, &length));
116 EXPECT_EQ(0U, index);
117 EXPECT_EQ(e_with_acute_accent.size(), length);
119 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
120 e_with_acute_accent, e_with_acute_combining_mark,
121 &index, &length));
122 EXPECT_EQ(0U, index);
123 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
125 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
126 e_with_acute_combining_mark, e_with_grave_combining_mark,
127 &index, &length));
128 EXPECT_EQ(0U, index);
129 EXPECT_EQ(e_with_grave_combining_mark.size(), length);
131 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
132 e_with_grave_combining_mark, e_with_acute_combining_mark,
133 &index, &length));
134 EXPECT_EQ(0U, index);
135 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
137 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
138 e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
139 EXPECT_EQ(0U, index);
140 EXPECT_EQ(e_with_grave_accent.size(), length);
142 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
143 e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
144 EXPECT_EQ(0U, index);
145 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
147 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
148 E_with_acute_accent, e_with_acute_accent, &index, &length));
149 EXPECT_EQ(0U, index);
150 EXPECT_EQ(e_with_acute_accent.size(), length);
152 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
153 E_with_grave_accent, e_with_acute_accent, &index, &length));
154 EXPECT_EQ(0U, index);
155 EXPECT_EQ(e_with_acute_accent.size(), length);
157 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
158 E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
159 EXPECT_EQ(0U, index);
160 EXPECT_EQ(e_with_grave_accent.size(), length);
162 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
163 E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
164 EXPECT_EQ(0U, index);
165 EXPECT_EQ(e_with_acute_accent.size(), length);
167 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
168 E_base, e_with_grave_accent, &index, &length));
169 EXPECT_EQ(0U, index);
170 EXPECT_EQ(e_with_grave_accent.size(), length);
172 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
173 a_with_acute_accent, e_with_acute_accent, &index, &length));
175 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
176 a_with_acute_combining_mark, e_with_acute_combining_mark,
177 &index, &length));
179 if (locale_is_posix)
180 SetICUDefaultLocale(default_locale.data());
183 TEST(StringSearchTest, UnicodeLocaleDependent) {
184 // Base characters
185 const string16 a_base = WideToUTF16(L"a");
187 // Composed characters
188 const string16 a_with_ring = WideToUTF16(L"\u00e5");
190 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
191 a_base, a_with_ring, NULL, NULL));
193 const char* default_locale = uloc_getDefault();
194 SetICUDefaultLocale("da");
196 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
197 a_base, a_with_ring, NULL, NULL));
199 SetICUDefaultLocale(default_locale);
202 TEST(StringSearchTest, FixedPatternMultipleSearch) {
203 std::string default_locale(uloc_getDefault());
204 bool locale_is_posix = (default_locale == "en_US_POSIX");
205 if (locale_is_posix)
206 SetICUDefaultLocale("en_US");
208 size_t index = 0;
209 size_t length = 0;
211 // Search "hello" over multiple texts.
212 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
213 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
214 EXPECT_EQ(2U, index);
215 EXPECT_EQ(5U, length);
216 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
217 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
218 EXPECT_EQ(0U, index);
219 EXPECT_EQ(5U, length);
221 if (locale_is_posix)
222 SetICUDefaultLocale(default_locale.data());
225 } // namespace i18n
226 } // namespace base