1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
12 #include "base/basictypes.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 using ::testing::ElementsAre
;
22 static const struct trim_case
{
24 const TrimPositions positions
;
25 const wchar_t* output
;
26 const TrimPositions return_value
;
28 {L
" Google Video ", TRIM_LEADING
, L
"Google Video ", TRIM_LEADING
},
29 {L
" Google Video ", TRIM_TRAILING
, L
" Google Video", TRIM_TRAILING
},
30 {L
" Google Video ", TRIM_ALL
, L
"Google Video", TRIM_ALL
},
31 {L
"Google Video", TRIM_ALL
, L
"Google Video", TRIM_NONE
},
32 {L
"", TRIM_ALL
, L
"", TRIM_NONE
},
33 {L
" ", TRIM_LEADING
, L
"", TRIM_LEADING
},
34 {L
" ", TRIM_TRAILING
, L
"", TRIM_TRAILING
},
35 {L
" ", TRIM_ALL
, L
"", TRIM_ALL
},
36 {L
"\t\rTest String\n", TRIM_ALL
, L
"Test String", TRIM_ALL
},
37 {L
"\x2002Test String\x00A0\x3000", TRIM_ALL
, L
"Test String", TRIM_ALL
},
40 static const struct trim_case_ascii
{
42 const TrimPositions positions
;
44 const TrimPositions return_value
;
45 } trim_cases_ascii
[] = {
46 {" Google Video ", TRIM_LEADING
, "Google Video ", TRIM_LEADING
},
47 {" Google Video ", TRIM_TRAILING
, " Google Video", TRIM_TRAILING
},
48 {" Google Video ", TRIM_ALL
, "Google Video", TRIM_ALL
},
49 {"Google Video", TRIM_ALL
, "Google Video", TRIM_NONE
},
50 {"", TRIM_ALL
, "", TRIM_NONE
},
51 {" ", TRIM_LEADING
, "", TRIM_LEADING
},
52 {" ", TRIM_TRAILING
, "", TRIM_TRAILING
},
53 {" ", TRIM_ALL
, "", TRIM_ALL
},
54 {"\t\rTest String\n", TRIM_ALL
, "Test String", TRIM_ALL
},
59 // Helper used to test TruncateUTF8ToByteSize.
60 bool Truncated(const std::string
& input
, const size_t byte_size
,
61 std::string
* output
) {
62 size_t prev
= input
.length();
63 TruncateUTF8ToByteSize(input
, byte_size
, output
);
64 return prev
!= output
->length();
69 TEST(StringUtilTest
, TruncateUTF8ToByteSize
) {
72 // Empty strings and invalid byte_size arguments
73 EXPECT_FALSE(Truncated(std::string(), 0, &output
));
74 EXPECT_EQ(output
, "");
75 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output
));
76 EXPECT_EQ(output
, "");
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output
));
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output
));
80 // Testing the truncation of valid UTF8 correctly
81 EXPECT_TRUE(Truncated("abc", 2, &output
));
82 EXPECT_EQ(output
, "ab");
83 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output
));
84 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
85 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output
));
86 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
87 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output
));
88 EXPECT_EQ(output
.compare("\xc2\x81\xc2\x81"), 0);
91 const char array
[] = "\x00\x00\xc2\x81\xc2\x81";
92 const std::string
array_string(array
, arraysize(array
));
93 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
94 EXPECT_EQ(output
.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 const char array
[] = "\x00\xc2\x81\xc2\x81";
99 const std::string
array_string(array
, arraysize(array
));
100 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
101 EXPECT_EQ(output
.compare(std::string("\x00\xc2\x81", 3)), 0);
104 // Testing invalid UTF8
105 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output
));
106 EXPECT_EQ(output
.compare(""), 0);
107 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output
));
108 EXPECT_EQ(output
.compare(""), 0);
109 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output
));
110 EXPECT_EQ(output
.compare(""), 0);
112 // Testing invalid UTF8 mixed with valid UTF8
113 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output
));
114 EXPECT_EQ(output
.compare("\xe1\x80\xbf"), 0);
115 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output
));
116 EXPECT_EQ(output
.compare("\xf1\x80\xa0\xbf"), 0);
117 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
119 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
120 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
122 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
123 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output
));
124 EXPECT_EQ(output
.compare("\xef\xbb\xbf" "abc"), 0);
126 // Overlong sequences
127 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output
));
128 EXPECT_EQ(output
.compare(""), 0);
129 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output
));
130 EXPECT_EQ(output
.compare(""), 0);
131 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output
));
132 EXPECT_EQ(output
.compare(""), 0);
133 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output
));
134 EXPECT_EQ(output
.compare(""), 0);
135 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output
));
136 EXPECT_EQ(output
.compare(""), 0);
137 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output
));
138 EXPECT_EQ(output
.compare(""), 0);
139 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output
));
140 EXPECT_EQ(output
.compare(""), 0);
141 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output
));
142 EXPECT_EQ(output
.compare(""), 0);
143 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output
));
144 EXPECT_EQ(output
.compare(""), 0);
145 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output
));
146 EXPECT_EQ(output
.compare(""), 0);
147 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output
));
148 EXPECT_EQ(output
.compare(""), 0);
150 // Beyond U+10FFFF (the upper limit of Unicode codespace)
151 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output
));
152 EXPECT_EQ(output
.compare(""), 0);
153 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output
));
154 EXPECT_EQ(output
.compare(""), 0);
155 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output
));
156 EXPECT_EQ(output
.compare(""), 0);
158 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
159 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output
));
160 EXPECT_EQ(output
.compare(""), 0);
161 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output
));
162 EXPECT_EQ(output
.compare(""), 0);
165 const char array
[] = "\x00\x00\xfe\xff";
166 const std::string
array_string(array
, arraysize(array
));
167 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
168 EXPECT_EQ(output
.compare(std::string("\x00\x00", 2)), 0);
171 // Variants on the previous test
173 const char array
[] = "\xff\xfe\x00\x00";
174 const std::string
array_string(array
, 4);
175 EXPECT_FALSE(Truncated(array_string
, 4, &output
));
176 EXPECT_EQ(output
.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
179 const char array
[] = "\xff\x00\x00\xfe";
180 const std::string
array_string(array
, arraysize(array
));
181 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
182 EXPECT_EQ(output
.compare(std::string("\xff\x00\x00", 3)), 0);
185 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
186 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output
));
187 EXPECT_EQ(output
.compare(""), 0);
188 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output
));
189 EXPECT_EQ(output
.compare(""), 0);
190 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output
));
191 EXPECT_EQ(output
.compare(""), 0);
192 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output
));
193 EXPECT_EQ(output
.compare(""), 0);
194 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output
));
195 EXPECT_EQ(output
.compare(""), 0);
197 // Strings in legacy encodings that are valid in UTF-8, but
198 // are invalid as UTF-8 in real data.
199 EXPECT_TRUE(Truncated("caf\xe9", 4, &output
));
200 EXPECT_EQ(output
.compare("caf"), 0);
201 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output
));
202 EXPECT_EQ(output
.compare(""), 0);
203 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output
));
204 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
205 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
207 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
209 // Testing using the same string as input and output.
210 EXPECT_FALSE(Truncated(output
, 4, &output
));
211 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
212 EXPECT_TRUE(Truncated(output
, 3, &output
));
213 EXPECT_EQ(output
.compare("\xa7\x41"), 0);
215 // "abc" with U+201[CD] in windows-125[0-8]
216 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output
));
217 EXPECT_EQ(output
.compare("\x93" "abc"), 0);
219 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
220 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output
));
221 EXPECT_EQ(output
.compare(""), 0);
223 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
224 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output
));
225 EXPECT_EQ(output
.compare(""), 0);
228 TEST(StringUtilTest
, TrimWhitespace
) {
229 string16 output
; // Allow contents to carry over to next testcase
230 for (size_t i
= 0; i
< arraysize(trim_cases
); ++i
) {
231 const trim_case
& value
= trim_cases
[i
];
232 EXPECT_EQ(value
.return_value
,
233 TrimWhitespace(WideToUTF16(value
.input
), value
.positions
,
235 EXPECT_EQ(WideToUTF16(value
.output
), output
);
238 // Test that TrimWhitespace() can take the same string for input and output
239 output
= ASCIIToUTF16(" This is a test \r\n");
240 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
241 EXPECT_EQ(ASCIIToUTF16("This is a test"), output
);
243 // Once more, but with a string of whitespace
244 output
= ASCIIToUTF16(" \r\n");
245 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
246 EXPECT_EQ(string16(), output
);
248 std::string output_ascii
;
249 for (size_t i
= 0; i
< arraysize(trim_cases_ascii
); ++i
) {
250 const trim_case_ascii
& value
= trim_cases_ascii
[i
];
251 EXPECT_EQ(value
.return_value
,
252 TrimWhitespace(value
.input
, value
.positions
, &output_ascii
));
253 EXPECT_EQ(value
.output
, output_ascii
);
257 static const struct collapse_case
{
258 const wchar_t* input
;
260 const wchar_t* output
;
261 } collapse_cases
[] = {
262 {L
" Google Video ", false, L
"Google Video"},
263 {L
"Google Video", false, L
"Google Video"},
266 {L
"\t\rTest String\n", false, L
"Test String"},
267 {L
"\x2002Test String\x00A0\x3000", false, L
"Test String"},
268 {L
" Test \n \t String ", false, L
"Test String"},
269 {L
"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L
"Test String"},
270 {L
" Test String", false, L
"Test String"},
271 {L
"Test String ", false, L
"Test String"},
272 {L
"Test String", false, L
"Test String"},
275 {L
" \r ", true, L
""},
276 {L
"\nFoo", true, L
"Foo"},
277 {L
"\r Foo ", true, L
"Foo"},
278 {L
" Foo bar ", true, L
"Foo bar"},
279 {L
" \tFoo bar \n", true, L
"Foo bar"},
280 {L
" a \r b\n c \r\n d \t\re \t f \n ", true, L
"abcde f"},
283 TEST(StringUtilTest
, CollapseWhitespace
) {
284 for (size_t i
= 0; i
< arraysize(collapse_cases
); ++i
) {
285 const collapse_case
& value
= collapse_cases
[i
];
286 EXPECT_EQ(WideToUTF16(value
.output
),
287 CollapseWhitespace(WideToUTF16(value
.input
), value
.trim
));
291 static const struct collapse_case_ascii
{
295 } collapse_cases_ascii
[] = {
296 {" Google Video ", false, "Google Video"},
297 {"Google Video", false, "Google Video"},
300 {"\t\rTest String\n", false, "Test String"},
301 {" Test \n \t String ", false, "Test String"},
302 {" Test String", false, "Test String"},
303 {"Test String ", false, "Test String"},
304 {"Test String", false, "Test String"},
308 {"\nFoo", true, "Foo"},
309 {"\r Foo ", true, "Foo"},
310 {" Foo bar ", true, "Foo bar"},
311 {" \tFoo bar \n", true, "Foo bar"},
312 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
315 TEST(StringUtilTest
, CollapseWhitespaceASCII
) {
316 for (size_t i
= 0; i
< arraysize(collapse_cases_ascii
); ++i
) {
317 const collapse_case_ascii
& value
= collapse_cases_ascii
[i
];
318 EXPECT_EQ(value
.output
, CollapseWhitespaceASCII(value
.input
, value
.trim
));
322 TEST(StringUtilTest
, IsStringUTF8
) {
323 EXPECT_TRUE(IsStringUTF8("abc"));
324 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
325 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
326 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
327 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
328 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
330 // surrogate code points
331 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
333 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
335 // overlong sequences
336 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
337 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
338 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
339 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
340 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
341 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
344 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
345 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
346 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
348 // Beyond U+10FFFF (the upper limit of Unicode codespace)
349 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
350 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
351 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
353 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
354 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
355 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
356 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
357 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
359 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
360 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
361 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
362 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
363 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
365 // Strings in legacy encodings. We can certainly make up strings
366 // in a legacy encoding that are valid in UTF-8, but in real data,
367 // most of them are invalid as UTF-8.
368 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
369 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
370 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
371 // "abc" with U+201[CD] in windows-125[0-8]
372 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
373 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
374 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
375 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
376 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
378 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
379 // representation, and the second uses a 2-byte sequence. The second version
380 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
381 // given codepoint must be used.
382 static const char kEmbeddedNull
[] = "embedded\0null";
383 EXPECT_TRUE(IsStringUTF8(
384 std::string(kEmbeddedNull
, sizeof(kEmbeddedNull
))));
385 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
388 TEST(StringUtilTest
, ConvertASCII
) {
389 static const char* char_cases
[] = {
392 "0123ABCDwxyz \a\b\t\r\n!+,.~"
395 static const wchar_t* const wchar_cases
[] = {
398 L
"0123ABCDwxyz \a\b\t\r\n!+,.~"
401 for (size_t i
= 0; i
< arraysize(char_cases
); ++i
) {
402 EXPECT_TRUE(IsStringASCII(char_cases
[i
]));
403 string16 utf16
= ASCIIToUTF16(char_cases
[i
]);
404 EXPECT_EQ(WideToUTF16(wchar_cases
[i
]), utf16
);
406 std::string ascii
= UTF16ToASCII(WideToUTF16(wchar_cases
[i
]));
407 EXPECT_EQ(char_cases
[i
], ascii
);
410 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
412 // Convert empty strings.
415 EXPECT_EQ(empty
, UTF16ToASCII(empty16
));
416 EXPECT_EQ(empty16
, ASCIIToUTF16(empty
));
418 // Convert strings with an embedded NUL character.
419 const char chars_with_nul
[] = "test\0string";
420 const int length_with_nul
= arraysize(chars_with_nul
) - 1;
421 std::string
string_with_nul(chars_with_nul
, length_with_nul
);
422 std::wstring wide_with_nul
= ASCIIToWide(string_with_nul
);
423 EXPECT_EQ(static_cast<std::wstring::size_type
>(length_with_nul
),
424 wide_with_nul
.length());
425 std::string narrow_with_nul
= UTF16ToASCII(WideToUTF16(wide_with_nul
));
426 EXPECT_EQ(static_cast<std::string::size_type
>(length_with_nul
),
427 narrow_with_nul
.length());
428 EXPECT_EQ(0, string_with_nul
.compare(narrow_with_nul
));
431 TEST(StringUtilTest
, ToUpperASCII
) {
432 EXPECT_EQ('C', ToUpperASCII('C'));
433 EXPECT_EQ('C', ToUpperASCII('c'));
434 EXPECT_EQ('2', ToUpperASCII('2'));
436 EXPECT_EQ(L
'C', ToUpperASCII(L
'C'));
437 EXPECT_EQ(L
'C', ToUpperASCII(L
'c'));
438 EXPECT_EQ(L
'2', ToUpperASCII(L
'2'));
440 std::string
in_place_a("Cc2");
441 StringToUpperASCII(&in_place_a
);
442 EXPECT_EQ("CC2", in_place_a
);
444 std::wstring
in_place_w(L
"Cc2");
445 StringToUpperASCII(&in_place_w
);
446 EXPECT_EQ(L
"CC2", in_place_w
);
448 std::string
original_a("Cc2");
449 std::string upper_a
= StringToUpperASCII(original_a
);
450 EXPECT_EQ("CC2", upper_a
);
452 std::wstring
original_w(L
"Cc2");
453 std::wstring upper_w
= StringToUpperASCII(original_w
);
454 EXPECT_EQ(L
"CC2", upper_w
);
457 TEST(StringUtilTest
, LowerCaseEqualsASCII
) {
458 static const struct {
461 } lowercase_cases
[] = {
467 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(lowercase_cases
); ++i
) {
468 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases
[i
].src_a
),
469 lowercase_cases
[i
].dst
));
470 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases
[i
].src_a
,
471 lowercase_cases
[i
].dst
));
475 TEST(StringUtilTest
, FormatBytesUnlocalized
) {
476 static const struct {
478 const char* expected
;
480 // Expected behavior: we show one post-decimal digit when we have
481 // under two pre-decimal digits, except in cases where it makes no
482 // sense (zero or bytes).
483 // Since we switch units once we cross the 1000 mark, this keeps
484 // the display of file sizes or bytes consistently around three
488 {1024*1024, "1.0 MB"},
489 {1024*1024*1024, "1.0 GB"},
490 {10LL*1024*1024*1024, "10.0 GB"},
491 {99LL*1024*1024*1024, "99.0 GB"},
492 {105LL*1024*1024*1024, "105 GB"},
493 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
494 {~(1LL<<63), "8192 PB"},
496 {99*1024 + 103, "99.1 kB"},
497 {1024*1024 + 103, "1.0 MB"},
498 {1024*1024 + 205 * 1024, "1.2 MB"},
499 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
500 {10LL*1024*1024*1024, "10.0 GB"},
501 {100LL*1024*1024*1024, "100 GB"},
504 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
505 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
),
506 FormatBytesUnlocalized(cases
[i
].bytes
));
509 TEST(StringUtilTest
, ReplaceSubstringsAfterOffset
) {
510 static const struct {
512 string16::size_type start_offset
;
513 const char* find_this
;
514 const char* replace_with
;
515 const char* expected
;
517 {"aaa", 0, "a", "b", "bbb"},
518 {"abb", 0, "ab", "a", "ab"},
519 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
520 {"Not found", 0, "x", "0", "Not found"},
521 {"Not found again", 5, "x", "0", "Not found again"},
522 {" Making it much longer ", 0, " ", "Four score and seven years ago",
523 "Four score and seven years agoMakingFour score and seven years agoit"
524 "Four score and seven years agomuchFour score and seven years agolonger"
525 "Four score and seven years ago"},
526 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
527 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
528 {"abababab", 2, "ab", "c", "abccc"},
531 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
532 string16 str
= ASCIIToUTF16(cases
[i
].str
);
533 ReplaceSubstringsAfterOffset(&str
, cases
[i
].start_offset
,
534 ASCIIToUTF16(cases
[i
].find_this
),
535 ASCIIToUTF16(cases
[i
].replace_with
));
536 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
540 TEST(StringUtilTest
, ReplaceFirstSubstringAfterOffset
) {
541 static const struct {
543 string16::size_type start_offset
;
544 const char* find_this
;
545 const char* replace_with
;
546 const char* expected
;
548 {"aaa", 0, "a", "b", "baa"},
549 {"abb", 0, "ab", "a", "ab"},
550 {"Removing some substrings inging", 0, "ing", "",
551 "Remov some substrings inging"},
552 {"Not found", 0, "x", "0", "Not found"},
553 {"Not found again", 5, "x", "0", "Not found again"},
554 {" Making it much longer ", 0, " ", "Four score and seven years ago",
555 "Four score and seven years agoMaking it much longer "},
556 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
557 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
558 {"abababab", 2, "ab", "c", "abcabab"},
561 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
562 string16 str
= ASCIIToUTF16(cases
[i
].str
);
563 ReplaceFirstSubstringAfterOffset(&str
, cases
[i
].start_offset
,
564 ASCIIToUTF16(cases
[i
].find_this
),
565 ASCIIToUTF16(cases
[i
].replace_with
));
566 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
570 TEST(StringUtilTest
, HexDigitToInt
) {
571 EXPECT_EQ(0, HexDigitToInt('0'));
572 EXPECT_EQ(1, HexDigitToInt('1'));
573 EXPECT_EQ(2, HexDigitToInt('2'));
574 EXPECT_EQ(3, HexDigitToInt('3'));
575 EXPECT_EQ(4, HexDigitToInt('4'));
576 EXPECT_EQ(5, HexDigitToInt('5'));
577 EXPECT_EQ(6, HexDigitToInt('6'));
578 EXPECT_EQ(7, HexDigitToInt('7'));
579 EXPECT_EQ(8, HexDigitToInt('8'));
580 EXPECT_EQ(9, HexDigitToInt('9'));
581 EXPECT_EQ(10, HexDigitToInt('A'));
582 EXPECT_EQ(11, HexDigitToInt('B'));
583 EXPECT_EQ(12, HexDigitToInt('C'));
584 EXPECT_EQ(13, HexDigitToInt('D'));
585 EXPECT_EQ(14, HexDigitToInt('E'));
586 EXPECT_EQ(15, HexDigitToInt('F'));
588 // Verify the lower case as well.
589 EXPECT_EQ(10, HexDigitToInt('a'));
590 EXPECT_EQ(11, HexDigitToInt('b'));
591 EXPECT_EQ(12, HexDigitToInt('c'));
592 EXPECT_EQ(13, HexDigitToInt('d'));
593 EXPECT_EQ(14, HexDigitToInt('e'));
594 EXPECT_EQ(15, HexDigitToInt('f'));
597 // This checks where we can use the assignment operator for a va_list. We need
598 // a way to do this since Visual C doesn't support va_copy, but assignment on
599 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
601 static void VariableArgsFunc(const char* format
, ...) {
603 va_start(org
, format
);
606 GG_VA_COPY(dup
, org
);
607 int i1
= va_arg(org
, int);
608 int j1
= va_arg(org
, int);
609 char* s1
= va_arg(org
, char*);
610 double d1
= va_arg(org
, double);
613 int i2
= va_arg(dup
, int);
614 int j2
= va_arg(dup
, int);
615 char* s2
= va_arg(dup
, char*);
616 double d2
= va_arg(dup
, double);
620 EXPECT_STREQ(s1
, s2
);
626 TEST(StringUtilTest
, VAList
) {
627 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
631 template <typename STR
>
632 void TokenizeTest() {
636 size
= Tokenize(STR("This is a string"), STR(" "), &r
);
638 ASSERT_EQ(4U, r
.size());
639 EXPECT_EQ(r
[0], STR("This"));
640 EXPECT_EQ(r
[1], STR("is"));
641 EXPECT_EQ(r
[2], STR("a"));
642 EXPECT_EQ(r
[3], STR("string"));
645 size
= Tokenize(STR("one,two,three"), STR(","), &r
);
647 ASSERT_EQ(3U, r
.size());
648 EXPECT_EQ(r
[0], STR("one"));
649 EXPECT_EQ(r
[1], STR("two"));
650 EXPECT_EQ(r
[2], STR("three"));
653 size
= Tokenize(STR("one,two:three;four"), STR(",:"), &r
);
655 ASSERT_EQ(3U, r
.size());
656 EXPECT_EQ(r
[0], STR("one"));
657 EXPECT_EQ(r
[1], STR("two"));
658 EXPECT_EQ(r
[2], STR("three;four"));
661 size
= Tokenize(STR("one,two:three;four"), STR(";,:"), &r
);
663 ASSERT_EQ(4U, r
.size());
664 EXPECT_EQ(r
[0], STR("one"));
665 EXPECT_EQ(r
[1], STR("two"));
666 EXPECT_EQ(r
[2], STR("three"));
667 EXPECT_EQ(r
[3], STR("four"));
670 size
= Tokenize(STR("one, two, three"), STR(","), &r
);
672 ASSERT_EQ(3U, r
.size());
673 EXPECT_EQ(r
[0], STR("one"));
674 EXPECT_EQ(r
[1], STR(" two"));
675 EXPECT_EQ(r
[2], STR(" three"));
678 size
= Tokenize(STR("one, two, three, "), STR(","), &r
);
680 ASSERT_EQ(4U, r
.size());
681 EXPECT_EQ(r
[0], STR("one"));
682 EXPECT_EQ(r
[1], STR(" two"));
683 EXPECT_EQ(r
[2], STR(" three"));
684 EXPECT_EQ(r
[3], STR(" "));
687 size
= Tokenize(STR("one, two, three,"), STR(","), &r
);
689 ASSERT_EQ(3U, r
.size());
690 EXPECT_EQ(r
[0], STR("one"));
691 EXPECT_EQ(r
[1], STR(" two"));
692 EXPECT_EQ(r
[2], STR(" three"));
695 size
= Tokenize(STR(), STR(","), &r
);
697 ASSERT_EQ(0U, r
.size());
700 size
= Tokenize(STR(","), STR(","), &r
);
702 ASSERT_EQ(0U, r
.size());
705 size
= Tokenize(STR(",;:."), STR(".:;,"), &r
);
707 ASSERT_EQ(0U, r
.size());
710 size
= Tokenize(STR("\t\ta\t"), STR("\t"), &r
);
712 ASSERT_EQ(1U, r
.size());
713 EXPECT_EQ(r
[0], STR("a"));
716 size
= Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r
);
718 ASSERT_EQ(2U, r
.size());
719 EXPECT_EQ(r
[0], STR("\ta\t"));
720 EXPECT_EQ(r
[1], STR("b\tcc"));
724 TEST(StringUtilTest
, TokenizeStdString
) {
725 TokenizeTest
<std::string
>();
728 TEST(StringUtilTest
, TokenizeStringPiece
) {
729 TokenizeTest
<base::StringPiece
>();
732 // Test for JoinString
733 TEST(StringUtilTest
, JoinString
) {
734 std::vector
<std::string
> in
;
735 EXPECT_EQ("", JoinString(in
, ','));
738 EXPECT_EQ("a", JoinString(in
, ','));
742 EXPECT_EQ("a,b,c", JoinString(in
, ','));
744 in
.push_back(std::string());
745 EXPECT_EQ("a,b,c,", JoinString(in
, ','));
747 EXPECT_EQ("a|b|c|| ", JoinString(in
, '|'));
750 // Test for JoinString overloaded with std::string separator
751 TEST(StringUtilTest
, JoinStringWithString
) {
752 std::string
separator(", ");
753 std::vector
<std::string
> parts
;
754 EXPECT_EQ(std::string(), JoinString(parts
, separator
));
756 parts
.push_back("a");
757 EXPECT_EQ("a", JoinString(parts
, separator
));
759 parts
.push_back("b");
760 parts
.push_back("c");
761 EXPECT_EQ("a, b, c", JoinString(parts
, separator
));
763 parts
.push_back(std::string());
764 EXPECT_EQ("a, b, c, ", JoinString(parts
, separator
));
765 parts
.push_back(" ");
766 EXPECT_EQ("a|b|c|| ", JoinString(parts
, "|"));
769 // Test for JoinString overloaded with string16 separator
770 TEST(StringUtilTest
, JoinStringWithString16
) {
771 string16 separator
= ASCIIToUTF16(", ");
772 std::vector
<string16
> parts
;
773 EXPECT_EQ(string16(), JoinString(parts
, separator
));
775 parts
.push_back(ASCIIToUTF16("a"));
776 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts
, separator
));
778 parts
.push_back(ASCIIToUTF16("b"));
779 parts
.push_back(ASCIIToUTF16("c"));
780 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts
, separator
));
782 parts
.push_back(ASCIIToUTF16(""));
783 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts
, separator
));
784 parts
.push_back(ASCIIToUTF16(" "));
785 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts
, ASCIIToUTF16("|")));
788 TEST(StringUtilTest
, StartsWith
) {
789 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
790 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
791 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
792 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
793 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
794 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
795 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
796 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
797 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
798 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
800 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
801 ASCIIToUTF16("javascript"), true));
802 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
803 ASCIIToUTF16("javascript"), true));
804 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
805 ASCIIToUTF16("javascript"), false));
806 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
807 ASCIIToUTF16("javascript"), false));
808 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
809 ASCIIToUTF16("javascript"), true));
810 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
811 ASCIIToUTF16("javascript"), false));
812 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
813 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
814 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
815 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
818 TEST(StringUtilTest
, EndsWith
) {
819 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
820 ASCIIToUTF16(".plugin"), true));
821 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
822 ASCIIToUTF16(".plugin"), true));
823 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
824 ASCIIToUTF16(".plugin"), false));
825 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
826 ASCIIToUTF16(".plugin"), false));
827 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
828 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
829 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
830 ASCIIToUTF16(".plugin"), true));
831 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
832 ASCIIToUTF16(".plugin"), false));
833 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
834 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
835 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
836 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
837 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
838 ASCIIToUTF16(".plugin"), false));
839 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
840 EXPECT_TRUE(EndsWith(string16(), string16(), false));
841 EXPECT_TRUE(EndsWith(string16(), string16(), true));
844 TEST(StringUtilTest
, GetStringFWithOffsets
) {
845 std::vector
<string16
> subst
;
846 subst
.push_back(ASCIIToUTF16("1"));
847 subst
.push_back(ASCIIToUTF16("2"));
848 std::vector
<size_t> offsets
;
850 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
853 EXPECT_EQ(2U, offsets
.size());
854 EXPECT_EQ(7U, offsets
[0]);
855 EXPECT_EQ(25U, offsets
[1]);
858 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
861 EXPECT_EQ(2U, offsets
.size());
862 EXPECT_EQ(25U, offsets
[0]);
863 EXPECT_EQ(7U, offsets
[1]);
867 TEST(StringUtilTest
, ReplaceStringPlaceholdersTooFew
) {
868 // Test whether replacestringplaceholders works as expected when there
869 // are fewer inputs than outputs.
870 std::vector
<string16
> subst
;
871 subst
.push_back(ASCIIToUTF16("9a"));
872 subst
.push_back(ASCIIToUTF16("8b"));
873 subst
.push_back(ASCIIToUTF16("7c"));
876 ReplaceStringPlaceholders(
877 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst
, NULL
);
879 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
882 TEST(StringUtilTest
, ReplaceStringPlaceholders
) {
883 std::vector
<string16
> subst
;
884 subst
.push_back(ASCIIToUTF16("9a"));
885 subst
.push_back(ASCIIToUTF16("8b"));
886 subst
.push_back(ASCIIToUTF16("7c"));
887 subst
.push_back(ASCIIToUTF16("6d"));
888 subst
.push_back(ASCIIToUTF16("5e"));
889 subst
.push_back(ASCIIToUTF16("4f"));
890 subst
.push_back(ASCIIToUTF16("3g"));
891 subst
.push_back(ASCIIToUTF16("2h"));
892 subst
.push_back(ASCIIToUTF16("1i"));
895 ReplaceStringPlaceholders(
896 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst
, NULL
);
898 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
901 TEST(StringUtilTest
, ReplaceStringPlaceholdersMoreThan9Replacements
) {
902 std::vector
<string16
> subst
;
903 subst
.push_back(ASCIIToUTF16("9a"));
904 subst
.push_back(ASCIIToUTF16("8b"));
905 subst
.push_back(ASCIIToUTF16("7c"));
906 subst
.push_back(ASCIIToUTF16("6d"));
907 subst
.push_back(ASCIIToUTF16("5e"));
908 subst
.push_back(ASCIIToUTF16("4f"));
909 subst
.push_back(ASCIIToUTF16("3g"));
910 subst
.push_back(ASCIIToUTF16("2h"));
911 subst
.push_back(ASCIIToUTF16("1i"));
912 subst
.push_back(ASCIIToUTF16("0j"));
913 subst
.push_back(ASCIIToUTF16("-1k"));
914 subst
.push_back(ASCIIToUTF16("-2l"));
915 subst
.push_back(ASCIIToUTF16("-3m"));
916 subst
.push_back(ASCIIToUTF16("-4n"));
919 ReplaceStringPlaceholders(
920 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
921 "$10j,$11k,$12l,$13m,$14n,$1"), subst
, NULL
);
923 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
924 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
927 TEST(StringUtilTest
, StdStringReplaceStringPlaceholders
) {
928 std::vector
<std::string
> subst
;
929 subst
.push_back("9a");
930 subst
.push_back("8b");
931 subst
.push_back("7c");
932 subst
.push_back("6d");
933 subst
.push_back("5e");
934 subst
.push_back("4f");
935 subst
.push_back("3g");
936 subst
.push_back("2h");
937 subst
.push_back("1i");
939 std::string formatted
=
940 ReplaceStringPlaceholders(
941 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst
, NULL
);
943 EXPECT_EQ(formatted
, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
946 TEST(StringUtilTest
, ReplaceStringPlaceholdersConsecutiveDollarSigns
) {
947 std::vector
<std::string
> subst
;
948 subst
.push_back("a");
949 subst
.push_back("b");
950 subst
.push_back("c");
951 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst
, NULL
),
955 TEST(StringUtilTest
, MatchPatternTest
) {
956 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
957 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
958 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
959 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
960 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
961 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
962 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
963 EXPECT_FALSE(MatchPattern("", "*.*"));
964 EXPECT_TRUE(MatchPattern("", "*"));
965 EXPECT_TRUE(MatchPattern("", "?"));
966 EXPECT_TRUE(MatchPattern("", ""));
967 EXPECT_FALSE(MatchPattern("Hello", ""));
968 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
969 // Stop after a certain recursion depth.
970 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
972 // Test UTF8 matching.
973 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
974 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
975 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
976 // Invalid sequences should be handled as a single invalid character.
977 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
978 // If the pattern has invalid characters, it shouldn't match anything.
979 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
981 // Test UTF16 character matching.
982 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
983 UTF8ToUTF16("*.com")));
984 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
985 UTF8ToUTF16("He??o\\*1*")));
987 // This test verifies that consecutive wild cards are collapsed into 1
988 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
990 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
991 UTF8ToUTF16("He********************************o")));
994 TEST(StringUtilTest
, LcpyTest
) {
995 // Test the normal case where we fit in our buffer.
999 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1000 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1001 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1002 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1005 // Test dst_size == 0, nothing should be written to |dst| and we should
1006 // have the equivalent of strlen(src).
1008 char dst
[2] = {1, 2};
1009 wchar_t wdst
[2] = {1, 2};
1010 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", 0));
1011 EXPECT_EQ(1, dst
[0]);
1012 EXPECT_EQ(2, dst
[1]);
1013 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", 0));
1014 EXPECT_EQ(static_cast<wchar_t>(1), wdst
[0]);
1015 EXPECT_EQ(static_cast<wchar_t>(2), wdst
[1]);
1018 // Test the case were we _just_ competely fit including the null.
1022 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1023 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1024 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1025 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1028 // Test the case were we we are one smaller, so we can't fit the null.
1032 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1033 EXPECT_EQ(0, memcmp(dst
, "abcdef", 7));
1034 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1035 EXPECT_EQ(0, memcmp(wdst
, L
"abcdef", sizeof(wchar_t) * 7));
1038 // Test the case were we are just too small.
1042 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1043 EXPECT_EQ(0, memcmp(dst
, "ab", 3));
1044 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1045 EXPECT_EQ(0, memcmp(wdst
, L
"ab", sizeof(wchar_t) * 3));
1049 TEST(StringUtilTest
, WprintfFormatPortabilityTest
) {
1050 static const struct {
1051 const wchar_t* input
;
1058 { L
"Hello, %s", false },
1063 { L
"%ls %s", false },
1064 { L
"%s %ls", false },
1065 { L
"%s %ls %s", false },
1067 { L
"%f %F", false },
1068 { L
"%d %D", false },
1069 { L
"%o %O", false },
1070 { L
"%u %U", false },
1071 { L
"%f %d %o %u", true },
1072 { L
"%-8d (%02.1f%)", true },
1073 { L
"% 10s", false },
1076 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
1077 EXPECT_EQ(cases
[i
].portable
, base::IsWprintfFormatPortable(cases
[i
].input
));
1080 TEST(StringUtilTest
, RemoveChars
) {
1081 const char* kRemoveChars
= "-/+*";
1082 std::string input
= "A-+bc/d!*";
1083 EXPECT_TRUE(RemoveChars(input
, kRemoveChars
, &input
));
1084 EXPECT_EQ("Abcd!", input
);
1086 // No characters match kRemoveChars.
1087 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1088 EXPECT_EQ("Abcd!", input
);
1092 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1093 EXPECT_EQ(std::string(), input
);
1096 TEST(StringUtilTest
, ReplaceChars
) {
1099 const char* replace_chars
;
1100 const char* replace_with
;
1104 { "", "", "", "", false },
1105 { "test", "", "", "test", false },
1106 { "test", "", "!", "test", false },
1107 { "test", "z", "!", "test", false },
1108 { "test", "e", "!", "t!st", true },
1109 { "test", "e", "!?", "t!?st", true },
1110 { "test", "ez", "!", "t!st", true },
1111 { "test", "zed", "!?", "t!?st", true },
1112 { "test", "t", "!?", "!?es!?", true },
1113 { "test", "et", "!>", "!>!>s!>", true },
1114 { "test", "zest", "!", "!!!!", true },
1115 { "test", "szt", "!", "!e!!", true },
1116 { "test", "t", "test", "testestest", true },
1119 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
1121 bool result
= ReplaceChars(cases
[i
].input
,
1122 cases
[i
].replace_chars
,
1123 cases
[i
].replace_with
,
1125 EXPECT_EQ(cases
[i
].result
, result
);
1126 EXPECT_EQ(cases
[i
].output
, output
);
1130 TEST(StringUtilTest
, ContainsOnlyChars
) {
1131 // Providing an empty list of characters should return false but for the empty
1133 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1134 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1136 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1137 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1138 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1139 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1140 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1142 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII
));
1143 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII
));
1144 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII
));
1145 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII
));
1146 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII
));
1147 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII
));
1149 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16
));
1150 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16
));
1151 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16
));
1152 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16
));
1153 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16
));
1154 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1158 class WriteIntoTest
: public testing::Test
{
1160 static void WritesCorrectly(size_t num_chars
) {
1162 char kOriginal
[] = "supercali";
1163 strncpy(WriteInto(&buffer
, num_chars
+ 1), kOriginal
, num_chars
);
1164 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1165 // string at the first \0.
1166 EXPECT_EQ(std::string(kOriginal
,
1167 std::min(num_chars
, arraysize(kOriginal
) - 1)),
1168 std::string(buffer
.c_str()));
1169 EXPECT_EQ(num_chars
, buffer
.size());
1173 TEST_F(WriteIntoTest
, WriteInto
) {
1174 // Validate that WriteInto reserves enough space and
1175 // sizes a string correctly.
1178 WritesCorrectly(5000);
1180 // Validate that WriteInto doesn't modify other strings
1181 // when using a Copy-on-Write implementation.
1182 const char kLive
[] = "live";
1183 const char kDead
[] = "dead";
1184 const std::string live
= kLive
;
1185 std::string dead
= live
;
1186 strncpy(WriteInto(&dead
, 5), kDead
, 4);
1187 EXPECT_EQ(kDead
, dead
);
1188 EXPECT_EQ(4u, dead
.size());
1189 EXPECT_EQ(kLive
, live
);
1190 EXPECT_EQ(4u, live
.size());