2 * Copyright (c) 2013 Yandex LLC. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Yandex LLC nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "platform/text/UnicodeUtilities.h"
34 #include "wtf/Vector.h"
35 #include "wtf/text/CharacterNames.h"
36 #include "wtf/text/WTFString.h"
37 #include <gtest/gtest.h>
38 #include <unicode/uchar.h>
42 static const UChar32 kMaxLatinCharCount
= 256;
44 static bool isTestFirstAndLastCharsInCategoryFailed
= false;
45 UBool U_CALLCONV
testFirstAndLastCharsInCategory(const void *context
, UChar32 start
, UChar32 limit
, UCharCategory type
)
47 if (start
>= kMaxLatinCharCount
48 && U_MASK(type
) & (U_GC_S_MASK
| U_GC_P_MASK
| U_GC_Z_MASK
| U_GC_CF_MASK
)
49 && (!isSeparator(start
) || !isSeparator(limit
- 1))) {
50 isTestFirstAndLastCharsInCategoryFailed
= true;
52 // Break enumeration process
59 TEST(UnicodeUtilitiesTest
, Separators
)
61 static const bool latinSeparatorTable
[kMaxLatinCharCount
] = {
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
65 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ?
66 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _
68 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // `
69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
80 for (UChar32 character
= 0; character
< kMaxLatinCharCount
; ++character
) {
81 EXPECT_EQ(isSeparator(character
), latinSeparatorTable
[character
]);
84 isTestFirstAndLastCharsInCategoryFailed
= false;
85 u_enumCharTypes(&testFirstAndLastCharsInCategory
, 0);
86 EXPECT_FALSE(isTestFirstAndLastCharsInCategoryFailed
);
89 TEST(UnicodeUtilitiesTest
, KanaLetters
)
92 for (UChar character
= 0; character
< 0x3041; ++character
)
93 EXPECT_FALSE(isKanaLetter(character
));
96 for (UChar character
= 0x3041; character
<= 0x3096; ++character
)
97 EXPECT_TRUE(isKanaLetter(character
));
100 for (UChar character
= 0x30A1; character
<= 0x30FA; ++character
)
101 EXPECT_TRUE(isKanaLetter(character
));
104 TEST(UnicodeUtilitiesTest
, ContainsKanaLetters
)
107 String nonKanaString
;
108 for (UChar character
= 0; character
< 0x3041; ++character
)
109 nonKanaString
.append(character
);
110 EXPECT_FALSE(containsKanaLetters(nonKanaString
));
113 for (UChar character
= 0x3041; character
<= 0x3096; ++character
) {
114 String
str(nonKanaString
);
115 str
.append(character
);
116 EXPECT_TRUE(containsKanaLetters(str
));
120 for (UChar character
= 0x30A1; character
<= 0x30FA; ++character
) {
121 String
str(nonKanaString
);
122 str
.append(character
);
123 EXPECT_TRUE(containsKanaLetters(str
));
127 TEST(UnicodeUtilitiesTest
, FoldQuoteMarkOrSoftHyphenTest
)
129 const UChar charactersToFold
[] = {
130 hebrewPunctuationGershayimCharacter
, leftDoubleQuotationMarkCharacter
, rightDoubleQuotationMarkCharacter
,
131 hebrewPunctuationGereshCharacter
, leftSingleQuotationMarkCharacter
, rightSingleQuotationMarkCharacter
,
135 String
stringToFold(charactersToFold
, WTF_ARRAY_LENGTH(charactersToFold
));
136 Vector
<UChar
> buffer
;
137 stringToFold
.appendTo(buffer
);
139 foldQuoteMarksAndSoftHyphens(stringToFold
);
141 const String
foldedString("\"\"\"\'\'\'\0", WTF_ARRAY_LENGTH(charactersToFold
));
142 EXPECT_EQ(stringToFold
, foldedString
);
144 foldQuoteMarksAndSoftHyphens(buffer
.data(), buffer
.size());
145 EXPECT_EQ(String(buffer
), foldedString
);
148 TEST(UnicodeUtilitiesTest
, OnlyKanaLettersEqualityTest
)
150 const UChar nonKanaString1
[] = { 'a', 'b', 'c', 'd' };
151 const UChar nonKanaString2
[] = { 'e', 'f', 'g' };
153 // Check that non-Kana letters will be skipped.
154 EXPECT_TRUE(checkOnlyKanaLettersInStrings(
155 nonKanaString1
, WTF_ARRAY_LENGTH(nonKanaString1
),
156 nonKanaString2
, WTF_ARRAY_LENGTH(nonKanaString2
)));
158 const UChar kanaString
[] = { 'e', 'f', 'g', 0x3041 };
159 EXPECT_FALSE(checkOnlyKanaLettersInStrings(
160 kanaString
, WTF_ARRAY_LENGTH(kanaString
),
161 nonKanaString2
, WTF_ARRAY_LENGTH(nonKanaString2
)));
163 // Compare with self.
164 EXPECT_TRUE(checkOnlyKanaLettersInStrings(
165 kanaString
, WTF_ARRAY_LENGTH(kanaString
),
166 kanaString
, WTF_ARRAY_LENGTH(kanaString
)));
168 UChar voicedKanaString1
[] = { 0x3042, 0x3099 };
169 UChar voicedKanaString2
[] = { 0x3042, 0x309A };
171 // Comparing strings with different sound marks should fail.
172 EXPECT_FALSE(checkOnlyKanaLettersInStrings(
173 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
174 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));
176 // Now strings will be the same.
177 voicedKanaString2
[1] = 0x3099;
178 EXPECT_TRUE(checkOnlyKanaLettersInStrings(
179 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
180 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));
182 voicedKanaString2
[0] = 0x3043;
183 EXPECT_FALSE(checkOnlyKanaLettersInStrings(
184 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
185 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));
188 TEST(UnicodeUtilitiesTest
, StringsWithKanaLettersTest
)
190 const UChar nonKanaString1
[] = { 'a', 'b', 'c' };
191 const UChar nonKanaString2
[] = { 'a', 'b', 'c' };
193 // Check that non-Kana letters will be compared.
194 EXPECT_TRUE(checkKanaStringsEqual(
195 nonKanaString1
, WTF_ARRAY_LENGTH(nonKanaString1
),
196 nonKanaString2
, WTF_ARRAY_LENGTH(nonKanaString2
)));
198 const UChar kanaString
[] = { 'a', 'b', 'c', 0x3041 };
199 EXPECT_FALSE(checkKanaStringsEqual(
200 kanaString
, WTF_ARRAY_LENGTH(kanaString
),
201 nonKanaString2
, WTF_ARRAY_LENGTH(nonKanaString2
)));
203 // Compare with self.
204 EXPECT_TRUE(checkKanaStringsEqual(
205 kanaString
, WTF_ARRAY_LENGTH(kanaString
),
206 kanaString
, WTF_ARRAY_LENGTH(kanaString
)));
208 const UChar kanaString2
[] = { 'x', 'y', 'z', 0x3041 };
209 // Comparing strings with different non-Kana letters should fail.
210 EXPECT_FALSE(checkKanaStringsEqual(
211 kanaString
, WTF_ARRAY_LENGTH(kanaString
),
212 kanaString2
, WTF_ARRAY_LENGTH(kanaString2
)));
214 const UChar kanaString3
[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o' };
215 // Check that non-Kana letters after Kana letters will be compared.
216 EXPECT_TRUE(checkKanaStringsEqual(
217 kanaString3
, WTF_ARRAY_LENGTH(kanaString3
),
218 kanaString3
, WTF_ARRAY_LENGTH(kanaString3
)));
220 const UChar kanaString4
[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o', 'p' };
221 // And now comparing should fail.
222 EXPECT_FALSE(checkKanaStringsEqual(
223 kanaString3
, WTF_ARRAY_LENGTH(kanaString3
),
224 kanaString4
, WTF_ARRAY_LENGTH(kanaString4
)));
226 UChar voicedKanaString1
[] = { 0x3042, 0x3099 };
227 UChar voicedKanaString2
[] = { 0x3042, 0x309A };
229 // Comparing strings with different sound marks should fail.
230 EXPECT_FALSE(checkKanaStringsEqual(
231 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
232 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));
234 // Now strings will be the same.
235 voicedKanaString2
[1] = 0x3099;
236 EXPECT_TRUE(checkKanaStringsEqual(
237 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
238 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));
240 voicedKanaString2
[0] = 0x3043;
241 EXPECT_FALSE(checkKanaStringsEqual(
242 voicedKanaString1
, WTF_ARRAY_LENGTH(voicedKanaString1
),
243 voicedKanaString2
, WTF_ARRAY_LENGTH(voicedKanaString2
)));