Move media/base/simd/x86inc.asm to third_party/x86inc
[chromium-blink-merge.git] / base / string_util_unittest.cc
blobd36b9551c873d6ad22feff7722a062fde92e4960
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <math.h>
6 #include <stdarg.h>
8 #include <limits>
9 #include <sstream>
11 #include "base/basictypes.h"
12 #include "base/string16.h"
13 #include "base/string_util.h"
14 #include "base/utf_string_conversions.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 using ::testing::ElementsAre;
20 namespace base {
22 static const struct trim_case {
23 const wchar_t* input;
24 const TrimPositions positions;
25 const wchar_t* output;
26 const TrimPositions return_value;
27 } trim_cases[] = {
28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
32 {L"", TRIM_ALL, L"", TRIM_NONE},
33 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
35 {L" ", TRIM_ALL, L"", TRIM_ALL},
36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 static const struct trim_case_ascii {
41 const char* input;
42 const TrimPositions positions;
43 const char* output;
44 const TrimPositions return_value;
45 } trim_cases_ascii[] = {
46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
50 {"", TRIM_ALL, "", TRIM_NONE},
51 {" ", TRIM_LEADING, "", TRIM_LEADING},
52 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
53 {" ", TRIM_ALL, "", TRIM_ALL},
54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 namespace {
59 // Helper used to test TruncateUTF8ToByteSize.
60 bool Truncated(const std::string& input, const size_t byte_size,
61 std::string* output) {
62 size_t prev = input.length();
63 TruncateUTF8ToByteSize(input, byte_size, output);
64 return prev != output->length();
67 } // namespace
69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
70 std::string output;
72 // Empty strings and invalid byte_size arguments
73 EXPECT_FALSE(Truncated("", 0, &output));
74 EXPECT_EQ(output, "");
75 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
76 EXPECT_EQ(output, "");
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
80 // Testing the truncation of valid UTF8 correctly
81 EXPECT_TRUE(Truncated("abc", 2, &output));
82 EXPECT_EQ(output, "ab");
83 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
84 EXPECT_EQ(output.compare("\xc2\x81"), 0);
85 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
86 EXPECT_EQ(output.compare("\xc2\x81"), 0);
87 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
88 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
91 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
92 const std::string array_string(array, arraysize(array));
93 EXPECT_TRUE(Truncated(array_string, 4, &output));
94 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 const char array[] = "\x00\xc2\x81\xc2\x81";
99 const std::string array_string(array, arraysize(array));
100 EXPECT_TRUE(Truncated(array_string, 4, &output));
101 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
104 // Testing invalid UTF8
105 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
106 EXPECT_EQ(output.compare(""), 0);
107 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
108 EXPECT_EQ(output.compare(""), 0);
109 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
110 EXPECT_EQ(output.compare(""), 0);
112 // Testing invalid UTF8 mixed with valid UTF8
113 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
114 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
115 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
116 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
117 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
118 10, &output));
119 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
120 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
121 10, &output));
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
123 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
124 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
126 // Overlong sequences
127 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
128 EXPECT_EQ(output.compare(""), 0);
129 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
130 EXPECT_EQ(output.compare(""), 0);
131 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
132 EXPECT_EQ(output.compare(""), 0);
133 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
134 EXPECT_EQ(output.compare(""), 0);
135 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
136 EXPECT_EQ(output.compare(""), 0);
137 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
138 EXPECT_EQ(output.compare(""), 0);
139 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
140 EXPECT_EQ(output.compare(""), 0);
141 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
142 EXPECT_EQ(output.compare(""), 0);
143 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
144 EXPECT_EQ(output.compare(""), 0);
145 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
146 EXPECT_EQ(output.compare(""), 0);
147 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
148 EXPECT_EQ(output.compare(""), 0);
150 // Beyond U+10FFFF (the upper limit of Unicode codespace)
151 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
152 EXPECT_EQ(output.compare(""), 0);
153 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
154 EXPECT_EQ(output.compare(""), 0);
155 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
156 EXPECT_EQ(output.compare(""), 0);
158 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
159 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
160 EXPECT_EQ(output.compare(""), 0);
161 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
162 EXPECT_EQ(output.compare(""), 0);
165 const char array[] = "\x00\x00\xfe\xff";
166 const std::string array_string(array, arraysize(array));
167 EXPECT_TRUE(Truncated(array_string, 4, &output));
168 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
171 // Variants on the previous test
173 const char array[] = "\xff\xfe\x00\x00";
174 const std::string array_string(array, 4);
175 EXPECT_FALSE(Truncated(array_string, 4, &output));
176 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
179 const char array[] = "\xff\x00\x00\xfe";
180 const std::string array_string(array, arraysize(array));
181 EXPECT_TRUE(Truncated(array_string, 4, &output));
182 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
185 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
186 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
187 EXPECT_EQ(output.compare(""), 0);
188 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
189 EXPECT_EQ(output.compare(""), 0);
190 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
191 EXPECT_EQ(output.compare(""), 0);
192 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
193 EXPECT_EQ(output.compare(""), 0);
194 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
195 EXPECT_EQ(output.compare(""), 0);
197 // Strings in legacy encodings that are valid in UTF-8, but
198 // are invalid as UTF-8 in real data.
199 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
200 EXPECT_EQ(output.compare("caf"), 0);
201 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
202 EXPECT_EQ(output.compare(""), 0);
203 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
204 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
205 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
206 &output));
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
209 // Testing using the same string as input and output.
210 EXPECT_FALSE(Truncated(output, 4, &output));
211 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
212 EXPECT_TRUE(Truncated(output, 3, &output));
213 EXPECT_EQ(output.compare("\xa7\x41"), 0);
215 // "abc" with U+201[CD] in windows-125[0-8]
216 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
217 EXPECT_EQ(output.compare("\x93" "abc"), 0);
219 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
220 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
221 EXPECT_EQ(output.compare(""), 0);
223 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
224 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
225 EXPECT_EQ(output.compare(""), 0);
228 TEST(StringUtilTest, TrimWhitespace) {
229 string16 output; // Allow contents to carry over to next testcase
230 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
231 const trim_case& value = trim_cases[i];
232 EXPECT_EQ(value.return_value,
233 TrimWhitespace(WideToUTF16(value.input), value.positions,
234 &output));
235 EXPECT_EQ(WideToUTF16(value.output), output);
238 // Test that TrimWhitespace() can take the same string for input and output
239 output = ASCIIToUTF16(" This is a test \r\n");
240 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
241 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
243 // Once more, but with a string of whitespace
244 output = ASCIIToUTF16(" \r\n");
245 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
246 EXPECT_EQ(string16(), output);
248 std::string output_ascii;
249 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
250 const trim_case_ascii& value = trim_cases_ascii[i];
251 EXPECT_EQ(value.return_value,
252 TrimWhitespace(value.input, value.positions, &output_ascii));
253 EXPECT_EQ(value.output, output_ascii);
257 static const struct collapse_case {
258 const wchar_t* input;
259 const bool trim;
260 const wchar_t* output;
261 } collapse_cases[] = {
262 {L" Google Video ", false, L"Google Video"},
263 {L"Google Video", false, L"Google Video"},
264 {L"", false, L""},
265 {L" ", false, L""},
266 {L"\t\rTest String\n", false, L"Test String"},
267 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
268 {L" Test \n \t String ", false, L"Test String"},
269 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
270 {L" Test String", false, L"Test String"},
271 {L"Test String ", false, L"Test String"},
272 {L"Test String", false, L"Test String"},
273 {L"", true, L""},
274 {L"\n", true, L""},
275 {L" \r ", true, L""},
276 {L"\nFoo", true, L"Foo"},
277 {L"\r Foo ", true, L"Foo"},
278 {L" Foo bar ", true, L"Foo bar"},
279 {L" \tFoo bar \n", true, L"Foo bar"},
280 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
283 TEST(StringUtilTest, CollapseWhitespace) {
284 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
285 const collapse_case& value = collapse_cases[i];
286 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
290 static const struct collapse_case_ascii {
291 const char* input;
292 const bool trim;
293 const char* output;
294 } collapse_cases_ascii[] = {
295 {" Google Video ", false, "Google Video"},
296 {"Google Video", false, "Google Video"},
297 {"", false, ""},
298 {" ", false, ""},
299 {"\t\rTest String\n", false, "Test String"},
300 {" Test \n \t String ", false, "Test String"},
301 {" Test String", false, "Test String"},
302 {"Test String ", false, "Test String"},
303 {"Test String", false, "Test String"},
304 {"", true, ""},
305 {"\n", true, ""},
306 {" \r ", true, ""},
307 {"\nFoo", true, "Foo"},
308 {"\r Foo ", true, "Foo"},
309 {" Foo bar ", true, "Foo bar"},
310 {" \tFoo bar \n", true, "Foo bar"},
311 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
314 TEST(StringUtilTest, CollapseWhitespaceASCII) {
315 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
316 const collapse_case_ascii& value = collapse_cases_ascii[i];
317 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
326 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
330 TEST(StringUtilTest, ContainsOnlyWhitespace) {
331 EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
335 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
339 TEST(StringUtilTest, IsStringUTF8) {
340 EXPECT_TRUE(IsStringUTF8("abc"));
341 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
342 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
343 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
344 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
345 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
347 // surrogate code points
348 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
350 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
352 // overlong sequences
353 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
354 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
355 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
356 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
357 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
361 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
362 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
363 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
365 // Beyond U+10FFFF (the upper limit of Unicode codespace)
366 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
367 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
368 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
370 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
371 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
372 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
373 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
374 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
376 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
377 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
378 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
379 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
380 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
382 // Strings in legacy encodings. We can certainly make up strings
383 // in a legacy encoding that are valid in UTF-8, but in real data,
384 // most of them are invalid as UTF-8.
385 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
386 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
387 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
388 // "abc" with U+201[CD] in windows-125[0-8]
389 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
390 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
391 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
392 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
393 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
395 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
396 // representation, and the second uses a 2-byte sequence. The second version
397 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
398 // given codepoint must be used.
399 static const char kEmbeddedNull[] = "embedded\0null";
400 EXPECT_TRUE(IsStringUTF8(
401 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
402 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
405 TEST(StringUtilTest, ConvertASCII) {
406 static const char* char_cases[] = {
407 "Google Video",
408 "Hello, world\n",
409 "0123ABCDwxyz \a\b\t\r\n!+,.~"
412 static const wchar_t* const wchar_cases[] = {
413 L"Google Video",
414 L"Hello, world\n",
415 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
418 for (size_t i = 0; i < arraysize(char_cases); ++i) {
419 EXPECT_TRUE(IsStringASCII(char_cases[i]));
420 std::wstring wide = ASCIIToWide(char_cases[i]);
421 EXPECT_EQ(wchar_cases[i], wide);
423 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
424 std::string ascii = WideToASCII(wchar_cases[i]);
425 EXPECT_EQ(char_cases[i], ascii);
428 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
429 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
431 // Convert empty strings.
432 std::wstring wempty;
433 std::string empty;
434 EXPECT_EQ(empty, WideToASCII(wempty));
435 EXPECT_EQ(wempty, ASCIIToWide(empty));
437 // Convert strings with an embedded NUL character.
438 const char chars_with_nul[] = "test\0string";
439 const int length_with_nul = arraysize(chars_with_nul) - 1;
440 std::string string_with_nul(chars_with_nul, length_with_nul);
441 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
442 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
443 wide_with_nul.length());
444 std::string narrow_with_nul = WideToASCII(wide_with_nul);
445 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
446 narrow_with_nul.length());
447 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
450 TEST(StringUtilTest, ToUpperASCII) {
451 EXPECT_EQ('C', ToUpperASCII('C'));
452 EXPECT_EQ('C', ToUpperASCII('c'));
453 EXPECT_EQ('2', ToUpperASCII('2'));
455 EXPECT_EQ(L'C', ToUpperASCII(L'C'));
456 EXPECT_EQ(L'C', ToUpperASCII(L'c'));
457 EXPECT_EQ(L'2', ToUpperASCII(L'2'));
459 std::string in_place_a("Cc2");
460 StringToUpperASCII(&in_place_a);
461 EXPECT_EQ("CC2", in_place_a);
463 std::wstring in_place_w(L"Cc2");
464 StringToUpperASCII(&in_place_w);
465 EXPECT_EQ(L"CC2", in_place_w);
467 std::string original_a("Cc2");
468 std::string upper_a = StringToUpperASCII(original_a);
469 EXPECT_EQ("CC2", upper_a);
471 std::wstring original_w(L"Cc2");
472 std::wstring upper_w = StringToUpperASCII(original_w);
473 EXPECT_EQ(L"CC2", upper_w);
476 TEST(StringUtilTest, LowerCaseEqualsASCII) {
477 static const struct {
478 const wchar_t* src_w;
479 const char* src_a;
480 const char* dst;
481 } lowercase_cases[] = {
482 { L"FoO", "FoO", "foo" },
483 { L"foo", "foo", "foo" },
484 { L"FOO", "FOO", "foo" },
487 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
489 lowercase_cases[i].dst));
490 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
491 lowercase_cases[i].dst));
495 TEST(StringUtilTest, FormatBytesUnlocalized) {
496 static const struct {
497 int64 bytes;
498 const char* expected;
499 } cases[] = {
500 // Expected behavior: we show one post-decimal digit when we have
501 // under two pre-decimal digits, except in cases where it makes no
502 // sense (zero or bytes).
503 // Since we switch units once we cross the 1000 mark, this keeps
504 // the display of file sizes or bytes consistently around three
505 // digits.
506 {0, "0 B"},
507 {512, "512 B"},
508 {1024*1024, "1.0 MB"},
509 {1024*1024*1024, "1.0 GB"},
510 {10LL*1024*1024*1024, "10.0 GB"},
511 {99LL*1024*1024*1024, "99.0 GB"},
512 {105LL*1024*1024*1024, "105 GB"},
513 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
514 {~(1LL<<63), "8192 PB"},
516 {99*1024 + 103, "99.1 kB"},
517 {1024*1024 + 103, "1.0 MB"},
518 {1024*1024 + 205 * 1024, "1.2 MB"},
519 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
520 {10LL*1024*1024*1024, "10.0 GB"},
521 {100LL*1024*1024*1024, "100 GB"},
524 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
525 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
526 FormatBytesUnlocalized(cases[i].bytes));
529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
530 static const struct {
531 const char* str;
532 string16::size_type start_offset;
533 const char* find_this;
534 const char* replace_with;
535 const char* expected;
536 } cases[] = {
537 {"aaa", 0, "a", "b", "bbb"},
538 {"abb", 0, "ab", "a", "ab"},
539 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
540 {"Not found", 0, "x", "0", "Not found"},
541 {"Not found again", 5, "x", "0", "Not found again"},
542 {" Making it much longer ", 0, " ", "Four score and seven years ago",
543 "Four score and seven years agoMakingFour score and seven years agoit"
544 "Four score and seven years agomuchFour score and seven years agolonger"
545 "Four score and seven years ago"},
546 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
547 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
548 {"abababab", 2, "ab", "c", "abccc"},
551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
552 string16 str = ASCIIToUTF16(cases[i].str);
553 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
554 ASCIIToUTF16(cases[i].find_this),
555 ASCIIToUTF16(cases[i].replace_with));
556 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
561 static const struct {
562 const char* str;
563 string16::size_type start_offset;
564 const char* find_this;
565 const char* replace_with;
566 const char* expected;
567 } cases[] = {
568 {"aaa", 0, "a", "b", "baa"},
569 {"abb", 0, "ab", "a", "ab"},
570 {"Removing some substrings inging", 0, "ing", "",
571 "Remov some substrings inging"},
572 {"Not found", 0, "x", "0", "Not found"},
573 {"Not found again", 5, "x", "0", "Not found again"},
574 {" Making it much longer ", 0, " ", "Four score and seven years ago",
575 "Four score and seven years agoMaking it much longer "},
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
578 {"abababab", 2, "ab", "c", "abcabab"},
581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582 string16 str = ASCIIToUTF16(cases[i].str);
583 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
584 ASCIIToUTF16(cases[i].find_this),
585 ASCIIToUTF16(cases[i].replace_with));
586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
590 TEST(StringUtilTest, HexDigitToInt) {
591 EXPECT_EQ(0, HexDigitToInt('0'));
592 EXPECT_EQ(1, HexDigitToInt('1'));
593 EXPECT_EQ(2, HexDigitToInt('2'));
594 EXPECT_EQ(3, HexDigitToInt('3'));
595 EXPECT_EQ(4, HexDigitToInt('4'));
596 EXPECT_EQ(5, HexDigitToInt('5'));
597 EXPECT_EQ(6, HexDigitToInt('6'));
598 EXPECT_EQ(7, HexDigitToInt('7'));
599 EXPECT_EQ(8, HexDigitToInt('8'));
600 EXPECT_EQ(9, HexDigitToInt('9'));
601 EXPECT_EQ(10, HexDigitToInt('A'));
602 EXPECT_EQ(11, HexDigitToInt('B'));
603 EXPECT_EQ(12, HexDigitToInt('C'));
604 EXPECT_EQ(13, HexDigitToInt('D'));
605 EXPECT_EQ(14, HexDigitToInt('E'));
606 EXPECT_EQ(15, HexDigitToInt('F'));
608 // Verify the lower case as well.
609 EXPECT_EQ(10, HexDigitToInt('a'));
610 EXPECT_EQ(11, HexDigitToInt('b'));
611 EXPECT_EQ(12, HexDigitToInt('c'));
612 EXPECT_EQ(13, HexDigitToInt('d'));
613 EXPECT_EQ(14, HexDigitToInt('e'));
614 EXPECT_EQ(15, HexDigitToInt('f'));
617 // This checks where we can use the assignment operator for a va_list. We need
618 // a way to do this since Visual C doesn't support va_copy, but assignment on
619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
620 // capability.
621 static void VariableArgsFunc(const char* format, ...) {
622 va_list org;
623 va_start(org, format);
625 va_list dup;
626 GG_VA_COPY(dup, org);
627 int i1 = va_arg(org, int);
628 int j1 = va_arg(org, int);
629 char* s1 = va_arg(org, char*);
630 double d1 = va_arg(org, double);
631 va_end(org);
633 int i2 = va_arg(dup, int);
634 int j2 = va_arg(dup, int);
635 char* s2 = va_arg(dup, char*);
636 double d2 = va_arg(dup, double);
638 EXPECT_EQ(i1, i2);
639 EXPECT_EQ(j1, j2);
640 EXPECT_STREQ(s1, s2);
641 EXPECT_EQ(d1, d2);
643 va_end(dup);
646 TEST(StringUtilTest, VAList) {
647 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
650 // Test for Tokenize
651 template <typename STR>
652 void TokenizeTest() {
653 std::vector<STR> r;
654 size_t size;
656 size = Tokenize(STR("This is a string"), STR(" "), &r);
657 EXPECT_EQ(4U, size);
658 ASSERT_EQ(4U, r.size());
659 EXPECT_EQ(r[0], STR("This"));
660 EXPECT_EQ(r[1], STR("is"));
661 EXPECT_EQ(r[2], STR("a"));
662 EXPECT_EQ(r[3], STR("string"));
663 r.clear();
665 size = Tokenize(STR("one,two,three"), STR(","), &r);
666 EXPECT_EQ(3U, size);
667 ASSERT_EQ(3U, r.size());
668 EXPECT_EQ(r[0], STR("one"));
669 EXPECT_EQ(r[1], STR("two"));
670 EXPECT_EQ(r[2], STR("three"));
671 r.clear();
673 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
674 EXPECT_EQ(3U, size);
675 ASSERT_EQ(3U, r.size());
676 EXPECT_EQ(r[0], STR("one"));
677 EXPECT_EQ(r[1], STR("two"));
678 EXPECT_EQ(r[2], STR("three;four"));
679 r.clear();
681 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
682 EXPECT_EQ(4U, size);
683 ASSERT_EQ(4U, r.size());
684 EXPECT_EQ(r[0], STR("one"));
685 EXPECT_EQ(r[1], STR("two"));
686 EXPECT_EQ(r[2], STR("three"));
687 EXPECT_EQ(r[3], STR("four"));
688 r.clear();
690 size = Tokenize(STR("one, two, three"), STR(","), &r);
691 EXPECT_EQ(3U, size);
692 ASSERT_EQ(3U, r.size());
693 EXPECT_EQ(r[0], STR("one"));
694 EXPECT_EQ(r[1], STR(" two"));
695 EXPECT_EQ(r[2], STR(" three"));
696 r.clear();
698 size = Tokenize(STR("one, two, three, "), STR(","), &r);
699 EXPECT_EQ(4U, size);
700 ASSERT_EQ(4U, r.size());
701 EXPECT_EQ(r[0], STR("one"));
702 EXPECT_EQ(r[1], STR(" two"));
703 EXPECT_EQ(r[2], STR(" three"));
704 EXPECT_EQ(r[3], STR(" "));
705 r.clear();
707 size = Tokenize(STR("one, two, three,"), STR(","), &r);
708 EXPECT_EQ(3U, size);
709 ASSERT_EQ(3U, r.size());
710 EXPECT_EQ(r[0], STR("one"));
711 EXPECT_EQ(r[1], STR(" two"));
712 EXPECT_EQ(r[2], STR(" three"));
713 r.clear();
715 size = Tokenize(STR(""), STR(","), &r);
716 EXPECT_EQ(0U, size);
717 ASSERT_EQ(0U, r.size());
718 r.clear();
720 size = Tokenize(STR(","), STR(","), &r);
721 EXPECT_EQ(0U, size);
722 ASSERT_EQ(0U, r.size());
723 r.clear();
725 size = Tokenize(STR(",;:."), STR(".:;,"), &r);
726 EXPECT_EQ(0U, size);
727 ASSERT_EQ(0U, r.size());
728 r.clear();
730 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
731 EXPECT_EQ(1U, size);
732 ASSERT_EQ(1U, r.size());
733 EXPECT_EQ(r[0], STR("a"));
734 r.clear();
736 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
737 EXPECT_EQ(2U, size);
738 ASSERT_EQ(2U, r.size());
739 EXPECT_EQ(r[0], STR("\ta\t"));
740 EXPECT_EQ(r[1], STR("b\tcc"));
741 r.clear();
744 TEST(StringUtilTest, TokenizeStdString) {
745 TokenizeTest<std::string>();
748 TEST(StringUtilTest, TokenizeStringPiece) {
749 TokenizeTest<base::StringPiece>();
752 // Test for JoinString
753 TEST(StringUtilTest, JoinString) {
754 std::vector<std::string> in;
755 EXPECT_EQ("", JoinString(in, ','));
757 in.push_back("a");
758 EXPECT_EQ("a", JoinString(in, ','));
760 in.push_back("b");
761 in.push_back("c");
762 EXPECT_EQ("a,b,c", JoinString(in, ','));
764 in.push_back("");
765 EXPECT_EQ("a,b,c,", JoinString(in, ','));
766 in.push_back(" ");
767 EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
770 // Test for JoinString overloaded with std::string separator
771 TEST(StringUtilTest, JoinStringWithString) {
772 std::string separator(", ");
773 std::vector<std::string> parts;
774 EXPECT_EQ(std::string(), JoinString(parts, separator));
776 parts.push_back("a");
777 EXPECT_EQ("a", JoinString(parts, separator));
779 parts.push_back("b");
780 parts.push_back("c");
781 EXPECT_EQ("a, b, c", JoinString(parts, separator));
783 parts.push_back("");
784 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
785 parts.push_back(" ");
786 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
789 // Test for JoinString overloaded with string16 separator
790 TEST(StringUtilTest, JoinStringWithString16) {
791 string16 separator = ASCIIToUTF16(", ");
792 std::vector<string16> parts;
793 EXPECT_EQ(string16(), JoinString(parts, separator));
795 parts.push_back(ASCIIToUTF16("a"));
796 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
798 parts.push_back(ASCIIToUTF16("b"));
799 parts.push_back(ASCIIToUTF16("c"));
800 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
802 parts.push_back(ASCIIToUTF16(""));
803 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
804 parts.push_back(ASCIIToUTF16(" "));
805 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
808 TEST(StringUtilTest, StartsWith) {
809 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
810 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
811 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
812 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
813 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
814 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
815 EXPECT_FALSE(StartsWithASCII("", "javascript", false));
816 EXPECT_FALSE(StartsWithASCII("", "javascript", true));
817 EXPECT_TRUE(StartsWithASCII("java", "", false));
818 EXPECT_TRUE(StartsWithASCII("java", "", true));
820 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
821 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
822 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
823 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
824 EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
825 EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
826 EXPECT_FALSE(StartsWith(L"", L"javascript", false));
827 EXPECT_FALSE(StartsWith(L"", L"javascript", true));
828 EXPECT_TRUE(StartsWith(L"java", L"", false));
829 EXPECT_TRUE(StartsWith(L"java", L"", true));
832 TEST(StringUtilTest, EndsWith) {
833 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
834 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
836 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
837 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
838 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
839 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
840 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
841 EXPECT_FALSE(EndsWith(L"", L".plugin", false));
842 EXPECT_FALSE(EndsWith(L"", L".plugin", true));
843 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
844 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
845 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
846 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
847 EXPECT_TRUE(EndsWith(L"", L"", false));
848 EXPECT_TRUE(EndsWith(L"", L"", true));
851 TEST(StringUtilTest, GetStringFWithOffsets) {
852 std::vector<string16> subst;
853 subst.push_back(ASCIIToUTF16("1"));
854 subst.push_back(ASCIIToUTF16("2"));
855 std::vector<size_t> offsets;
857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
858 subst,
859 &offsets);
860 EXPECT_EQ(2U, offsets.size());
861 EXPECT_EQ(7U, offsets[0]);
862 EXPECT_EQ(25U, offsets[1]);
863 offsets.clear();
865 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
866 subst,
867 &offsets);
868 EXPECT_EQ(2U, offsets.size());
869 EXPECT_EQ(25U, offsets[0]);
870 EXPECT_EQ(7U, offsets[1]);
871 offsets.clear();
874 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
875 // Test whether replacestringplaceholders works as expected when there
876 // are fewer inputs than outputs.
877 std::vector<string16> subst;
878 subst.push_back(ASCIIToUTF16("9a"));
879 subst.push_back(ASCIIToUTF16("8b"));
880 subst.push_back(ASCIIToUTF16("7c"));
882 string16 formatted =
883 ReplaceStringPlaceholders(
884 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
886 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
889 TEST(StringUtilTest, ReplaceStringPlaceholders) {
890 std::vector<string16> subst;
891 subst.push_back(ASCIIToUTF16("9a"));
892 subst.push_back(ASCIIToUTF16("8b"));
893 subst.push_back(ASCIIToUTF16("7c"));
894 subst.push_back(ASCIIToUTF16("6d"));
895 subst.push_back(ASCIIToUTF16("5e"));
896 subst.push_back(ASCIIToUTF16("4f"));
897 subst.push_back(ASCIIToUTF16("3g"));
898 subst.push_back(ASCIIToUTF16("2h"));
899 subst.push_back(ASCIIToUTF16("1i"));
901 string16 formatted =
902 ReplaceStringPlaceholders(
903 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
905 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
908 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
909 std::vector<string16> subst;
910 subst.push_back(ASCIIToUTF16("9a"));
911 subst.push_back(ASCIIToUTF16("8b"));
912 subst.push_back(ASCIIToUTF16("7c"));
913 subst.push_back(ASCIIToUTF16("6d"));
914 subst.push_back(ASCIIToUTF16("5e"));
915 subst.push_back(ASCIIToUTF16("4f"));
916 subst.push_back(ASCIIToUTF16("3g"));
917 subst.push_back(ASCIIToUTF16("2h"));
918 subst.push_back(ASCIIToUTF16("1i"));
919 subst.push_back(ASCIIToUTF16("0j"));
920 subst.push_back(ASCIIToUTF16("-1k"));
921 subst.push_back(ASCIIToUTF16("-2l"));
922 subst.push_back(ASCIIToUTF16("-3m"));
923 subst.push_back(ASCIIToUTF16("-4n"));
925 string16 formatted =
926 ReplaceStringPlaceholders(
927 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
928 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
930 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
931 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
934 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
935 std::vector<std::string> subst;
936 subst.push_back("9a");
937 subst.push_back("8b");
938 subst.push_back("7c");
939 subst.push_back("6d");
940 subst.push_back("5e");
941 subst.push_back("4f");
942 subst.push_back("3g");
943 subst.push_back("2h");
944 subst.push_back("1i");
946 std::string formatted =
947 ReplaceStringPlaceholders(
948 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
950 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
953 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
954 std::vector<std::string> subst;
955 subst.push_back("a");
956 subst.push_back("b");
957 subst.push_back("c");
958 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
959 "$1 $$2 $$$3");
962 TEST(StringUtilTest, MatchPatternTest) {
963 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
964 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
965 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
966 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
967 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
968 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
969 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
970 EXPECT_FALSE(MatchPattern("", "*.*"));
971 EXPECT_TRUE(MatchPattern("", "*"));
972 EXPECT_TRUE(MatchPattern("", "?"));
973 EXPECT_TRUE(MatchPattern("", ""));
974 EXPECT_FALSE(MatchPattern("Hello", ""));
975 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
976 // Stop after a certain recursion depth.
977 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
979 // Test UTF8 matching.
980 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
981 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
982 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
983 // Invalid sequences should be handled as a single invalid character.
984 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
985 // If the pattern has invalid characters, it shouldn't match anything.
986 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
988 // Test UTF16 character matching.
989 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
990 UTF8ToUTF16("*.com")));
991 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
992 UTF8ToUTF16("He??o\\*1*")));
994 // This test verifies that consecutive wild cards are collapsed into 1
995 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
996 // recursion depth).
997 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
998 UTF8ToUTF16("He********************************o")));
1001 TEST(StringUtilTest, LcpyTest) {
1002 // Test the normal case where we fit in our buffer.
1004 char dst[10];
1005 wchar_t wdst[10];
1006 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1007 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1008 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1009 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1012 // Test dst_size == 0, nothing should be written to |dst| and we should
1013 // have the equivalent of strlen(src).
1015 char dst[2] = {1, 2};
1016 wchar_t wdst[2] = {1, 2};
1017 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1018 EXPECT_EQ(1, dst[0]);
1019 EXPECT_EQ(2, dst[1]);
1020 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1021 #if defined(WCHAR_T_IS_UNSIGNED)
1022 EXPECT_EQ(1U, wdst[0]);
1023 EXPECT_EQ(2U, wdst[1]);
1024 #else
1025 EXPECT_EQ(1, wdst[0]);
1026 EXPECT_EQ(2, wdst[1]);
1027 #endif
1030 // Test the case were we _just_ competely fit including the null.
1032 char dst[8];
1033 wchar_t wdst[8];
1034 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1035 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1036 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1037 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1040 // Test the case were we we are one smaller, so we can't fit the null.
1042 char dst[7];
1043 wchar_t wdst[7];
1044 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1045 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1046 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1047 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1050 // Test the case were we are just too small.
1052 char dst[3];
1053 wchar_t wdst[3];
1054 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1055 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1056 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1057 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1061 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1062 static const struct {
1063 const wchar_t* input;
1064 bool portable;
1065 } cases[] = {
1066 { L"%ls", true },
1067 { L"%s", false },
1068 { L"%S", false },
1069 { L"%lS", false },
1070 { L"Hello, %s", false },
1071 { L"%lc", true },
1072 { L"%c", false },
1073 { L"%C", false },
1074 { L"%lC", false },
1075 { L"%ls %s", false },
1076 { L"%s %ls", false },
1077 { L"%s %ls %s", false },
1078 { L"%f", true },
1079 { L"%f %F", false },
1080 { L"%d %D", false },
1081 { L"%o %O", false },
1082 { L"%u %U", false },
1083 { L"%f %d %o %u", true },
1084 { L"%-8d (%02.1f%)", true },
1085 { L"% 10s", false },
1086 { L"% 10ls", true }
1088 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1089 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1092 TEST(StringUtilTest, RemoveChars) {
1093 const char* kRemoveChars = "-/+*";
1094 std::string input = "A-+bc/d!*";
1095 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1096 EXPECT_EQ("Abcd!", input);
1098 // No characters match kRemoveChars.
1099 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1100 EXPECT_EQ("Abcd!", input);
1102 // Empty string.
1103 input.clear();
1104 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1105 EXPECT_EQ(std::string(), input);
1108 TEST(StringUtilTest, ReplaceChars) {
1109 struct TestData {
1110 const char* input;
1111 const char* replace_chars;
1112 const char* replace_with;
1113 const char* output;
1114 bool result;
1115 } cases[] = {
1116 { "", "", "", "", false },
1117 { "test", "", "", "test", false },
1118 { "test", "", "!", "test", false },
1119 { "test", "z", "!", "test", false },
1120 { "test", "e", "!", "t!st", true },
1121 { "test", "e", "!?", "t!?st", true },
1122 { "test", "ez", "!", "t!st", true },
1123 { "test", "zed", "!?", "t!?st", true },
1124 { "test", "t", "!?", "!?es!?", true },
1125 { "test", "et", "!>", "!>!>s!>", true },
1126 { "test", "zest", "!", "!!!!", true },
1127 { "test", "szt", "!", "!e!!", true },
1128 { "test", "t", "test", "testestest", true },
1131 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1132 std::string output;
1133 bool result = ReplaceChars(cases[i].input,
1134 cases[i].replace_chars,
1135 cases[i].replace_with,
1136 &output);
1137 EXPECT_EQ(cases[i].result, result);
1138 EXPECT_EQ(cases[i].output, output);
1142 TEST(StringUtilTest, ContainsOnlyChars) {
1143 // Providing an empty list of characters should return false but for the empty
1144 // string.
1145 EXPECT_TRUE(ContainsOnlyChars("", ""));
1146 EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1148 EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1149 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1150 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1151 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1152 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1155 class WriteIntoTest : public testing::Test {
1156 protected:
1157 static void WritesCorrectly(size_t num_chars) {
1158 std::string buffer;
1159 char kOriginal[] = "supercali";
1160 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1161 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1162 // string at the first \0.
1163 EXPECT_EQ(std::string(kOriginal,
1164 std::min(num_chars, arraysize(kOriginal) - 1)),
1165 std::string(buffer.c_str()));
1166 EXPECT_EQ(num_chars, buffer.size());
1170 TEST_F(WriteIntoTest, WriteInto) {
1171 // Validate that WriteInto reserves enough space and
1172 // sizes a string correctly.
1173 WritesCorrectly(1);
1174 WritesCorrectly(2);
1175 WritesCorrectly(5000);
1177 // Validate that WriteInto doesn't modify other strings
1178 // when using a Copy-on-Write implementation.
1179 const char kLive[] = "live";
1180 const char kDead[] = "dead";
1181 const std::string live = kLive;
1182 std::string dead = live;
1183 strncpy(WriteInto(&dead, 5), kDead, 4);
1184 EXPECT_EQ(kDead, dead);
1185 EXPECT_EQ(4u, dead.size());
1186 EXPECT_EQ(kLive, live);
1187 EXPECT_EQ(4u, live.size());
1190 } // namespace base