Fix style nits in gdata directory.
[chromium-blink-merge.git] / base / string_util_unittest.cc
blob5c51c2af9164634f9e7770dd91f8c6ae9f1f9668
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <math.h>
6 #include <stdarg.h>
8 #include <limits>
9 #include <sstream>
11 #include "base/basictypes.h"
12 #include "base/string16.h"
13 #include "base/string_util.h"
14 #include "base/utf_string_conversions.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 using ::testing::ElementsAre;
20 namespace base {
22 static const struct trim_case {
23 const wchar_t* input;
24 const TrimPositions positions;
25 const wchar_t* output;
26 const TrimPositions return_value;
27 } trim_cases[] = {
28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
32 {L"", TRIM_ALL, L"", TRIM_NONE},
33 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
35 {L" ", TRIM_ALL, L"", TRIM_ALL},
36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 static const struct trim_case_ascii {
41 const char* input;
42 const TrimPositions positions;
43 const char* output;
44 const TrimPositions return_value;
45 } trim_cases_ascii[] = {
46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
50 {"", TRIM_ALL, "", TRIM_NONE},
51 {" ", TRIM_LEADING, "", TRIM_LEADING},
52 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
53 {" ", TRIM_ALL, "", TRIM_ALL},
54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 namespace {
59 // Helper used to test TruncateUTF8ToByteSize.
60 bool Truncated(const std::string& input, const size_t byte_size,
61 std::string* output) {
62 size_t prev = input.length();
63 TruncateUTF8ToByteSize(input, byte_size, output);
64 return prev != output->length();
67 } // namespace
69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
70 std::string output;
72 // Empty strings and invalid byte_size arguments
73 EXPECT_FALSE(Truncated("", 0, &output));
74 EXPECT_EQ(output, "");
75 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
76 EXPECT_EQ(output, "");
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
80 // Testing the truncation of valid UTF8 correctly
81 EXPECT_TRUE(Truncated("abc", 2, &output));
82 EXPECT_EQ(output, "ab");
83 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
84 EXPECT_EQ(output.compare("\xc2\x81"), 0);
85 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
86 EXPECT_EQ(output.compare("\xc2\x81"), 0);
87 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
88 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
91 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
92 const std::string array_string(array, arraysize(array));
93 EXPECT_TRUE(Truncated(array_string, 4, &output));
94 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 const char array[] = "\x00\xc2\x81\xc2\x81";
99 const std::string array_string(array, arraysize(array));
100 EXPECT_TRUE(Truncated(array_string, 4, &output));
101 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
104 // Testing invalid UTF8
105 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
106 EXPECT_EQ(output.compare(""), 0);
107 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
108 EXPECT_EQ(output.compare(""), 0);
109 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
110 EXPECT_EQ(output.compare(""), 0);
112 // Testing invalid UTF8 mixed with valid UTF8
113 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
114 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
115 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
116 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
117 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
118 10, &output));
119 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
120 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
121 10, &output));
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
123 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
124 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
126 // Overlong sequences
127 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
128 EXPECT_EQ(output.compare(""), 0);
129 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
130 EXPECT_EQ(output.compare(""), 0);
131 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
132 EXPECT_EQ(output.compare(""), 0);
133 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
134 EXPECT_EQ(output.compare(""), 0);
135 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
136 EXPECT_EQ(output.compare(""), 0);
137 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
138 EXPECT_EQ(output.compare(""), 0);
139 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
140 EXPECT_EQ(output.compare(""), 0);
141 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
142 EXPECT_EQ(output.compare(""), 0);
143 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
144 EXPECT_EQ(output.compare(""), 0);
145 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
146 EXPECT_EQ(output.compare(""), 0);
147 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
148 EXPECT_EQ(output.compare(""), 0);
150 // Beyond U+10FFFF (the upper limit of Unicode codespace)
151 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
152 EXPECT_EQ(output.compare(""), 0);
153 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
154 EXPECT_EQ(output.compare(""), 0);
155 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
156 EXPECT_EQ(output.compare(""), 0);
158 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
159 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
160 EXPECT_EQ(output.compare(""), 0);
161 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
162 EXPECT_EQ(output.compare(""), 0);
165 const char array[] = "\x00\x00\xfe\xff";
166 const std::string array_string(array, arraysize(array));
167 EXPECT_TRUE(Truncated(array_string, 4, &output));
168 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
171 // Variants on the previous test
173 const char array[] = "\xff\xfe\x00\x00";
174 const std::string array_string(array, 4);
175 EXPECT_FALSE(Truncated(array_string, 4, &output));
176 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
179 const char array[] = "\xff\x00\x00\xfe";
180 const std::string array_string(array, arraysize(array));
181 EXPECT_TRUE(Truncated(array_string, 4, &output));
182 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
185 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
186 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
187 EXPECT_EQ(output.compare(""), 0);
188 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
189 EXPECT_EQ(output.compare(""), 0);
190 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
191 EXPECT_EQ(output.compare(""), 0);
192 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
193 EXPECT_EQ(output.compare(""), 0);
194 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
195 EXPECT_EQ(output.compare(""), 0);
197 // Strings in legacy encodings that are valid in UTF-8, but
198 // are invalid as UTF-8 in real data.
199 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
200 EXPECT_EQ(output.compare("caf"), 0);
201 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
202 EXPECT_EQ(output.compare(""), 0);
203 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
204 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
205 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
206 &output));
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
209 // Testing using the same string as input and output.
210 EXPECT_FALSE(Truncated(output, 4, &output));
211 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
212 EXPECT_TRUE(Truncated(output, 3, &output));
213 EXPECT_EQ(output.compare("\xa7\x41"), 0);
215 // "abc" with U+201[CD] in windows-125[0-8]
216 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
217 EXPECT_EQ(output.compare("\x93" "abc"), 0);
219 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
220 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
221 EXPECT_EQ(output.compare(""), 0);
223 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
224 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
225 EXPECT_EQ(output.compare(""), 0);
228 TEST(StringUtilTest, TrimWhitespace) {
229 string16 output; // Allow contents to carry over to next testcase
230 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
231 const trim_case& value = trim_cases[i];
232 EXPECT_EQ(value.return_value,
233 TrimWhitespace(WideToUTF16(value.input), value.positions,
234 &output));
235 EXPECT_EQ(WideToUTF16(value.output), output);
238 // Test that TrimWhitespace() can take the same string for input and output
239 output = ASCIIToUTF16(" This is a test \r\n");
240 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
241 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
243 // Once more, but with a string of whitespace
244 output = ASCIIToUTF16(" \r\n");
245 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
246 EXPECT_EQ(string16(), output);
248 std::string output_ascii;
249 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
250 const trim_case_ascii& value = trim_cases_ascii[i];
251 EXPECT_EQ(value.return_value,
252 TrimWhitespace(value.input, value.positions, &output_ascii));
253 EXPECT_EQ(value.output, output_ascii);
257 static const struct collapse_case {
258 const wchar_t* input;
259 const bool trim;
260 const wchar_t* output;
261 } collapse_cases[] = {
262 {L" Google Video ", false, L"Google Video"},
263 {L"Google Video", false, L"Google Video"},
264 {L"", false, L""},
265 {L" ", false, L""},
266 {L"\t\rTest String\n", false, L"Test String"},
267 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
268 {L" Test \n \t String ", false, L"Test String"},
269 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
270 {L" Test String", false, L"Test String"},
271 {L"Test String ", false, L"Test String"},
272 {L"Test String", false, L"Test String"},
273 {L"", true, L""},
274 {L"\n", true, L""},
275 {L" \r ", true, L""},
276 {L"\nFoo", true, L"Foo"},
277 {L"\r Foo ", true, L"Foo"},
278 {L" Foo bar ", true, L"Foo bar"},
279 {L" \tFoo bar \n", true, L"Foo bar"},
280 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
283 TEST(StringUtilTest, CollapseWhitespace) {
284 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
285 const collapse_case& value = collapse_cases[i];
286 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
290 static const struct collapse_case_ascii {
291 const char* input;
292 const bool trim;
293 const char* output;
294 } collapse_cases_ascii[] = {
295 {" Google Video ", false, "Google Video"},
296 {"Google Video", false, "Google Video"},
297 {"", false, ""},
298 {" ", false, ""},
299 {"\t\rTest String\n", false, "Test String"},
300 {" Test \n \t String ", false, "Test String"},
301 {" Test String", false, "Test String"},
302 {"Test String ", false, "Test String"},
303 {"Test String", false, "Test String"},
304 {"", true, ""},
305 {"\n", true, ""},
306 {" \r ", true, ""},
307 {"\nFoo", true, "Foo"},
308 {"\r Foo ", true, "Foo"},
309 {" Foo bar ", true, "Foo bar"},
310 {" \tFoo bar \n", true, "Foo bar"},
311 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
314 TEST(StringUtilTest, CollapseWhitespaceASCII) {
315 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
316 const collapse_case_ascii& value = collapse_cases_ascii[i];
317 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
326 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
330 TEST(StringUtilTest, ContainsOnlyWhitespace) {
331 EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
335 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
339 TEST(StringUtilTest, IsStringUTF8) {
340 EXPECT_TRUE(IsStringUTF8("abc"));
341 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
342 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
343 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
344 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
345 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
347 // surrogate code points
348 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
350 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
352 // overlong sequences
353 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
354 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
355 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
356 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
357 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
361 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
362 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
363 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
365 // Beyond U+10FFFF (the upper limit of Unicode codespace)
366 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
367 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
368 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
370 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
371 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
372 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
373 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
374 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
376 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
377 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
378 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
379 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
380 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
382 // Strings in legacy encodings. We can certainly make up strings
383 // in a legacy encoding that are valid in UTF-8, but in real data,
384 // most of them are invalid as UTF-8.
385 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
386 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
387 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
388 // "abc" with U+201[CD] in windows-125[0-8]
389 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
390 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
391 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
392 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
393 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
395 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
396 // representation, and the second uses a 2-byte sequence. The second version
397 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
398 // given codepoint must be used.
399 static const char kEmbeddedNull[] = "embedded\0null";
400 EXPECT_TRUE(IsStringUTF8(
401 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
402 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
405 TEST(StringUtilTest, ConvertASCII) {
406 static const char* char_cases[] = {
407 "Google Video",
408 "Hello, world\n",
409 "0123ABCDwxyz \a\b\t\r\n!+,.~"
412 static const wchar_t* const wchar_cases[] = {
413 L"Google Video",
414 L"Hello, world\n",
415 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
418 for (size_t i = 0; i < arraysize(char_cases); ++i) {
419 EXPECT_TRUE(IsStringASCII(char_cases[i]));
420 std::wstring wide = ASCIIToWide(char_cases[i]);
421 EXPECT_EQ(wchar_cases[i], wide);
423 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
424 std::string ascii = WideToASCII(wchar_cases[i]);
425 EXPECT_EQ(char_cases[i], ascii);
428 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
429 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
431 // Convert empty strings.
432 std::wstring wempty;
433 std::string empty;
434 EXPECT_EQ(empty, WideToASCII(wempty));
435 EXPECT_EQ(wempty, ASCIIToWide(empty));
437 // Convert strings with an embedded NUL character.
438 const char chars_with_nul[] = "test\0string";
439 const int length_with_nul = arraysize(chars_with_nul) - 1;
440 std::string string_with_nul(chars_with_nul, length_with_nul);
441 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
442 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
443 wide_with_nul.length());
444 std::string narrow_with_nul = WideToASCII(wide_with_nul);
445 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
446 narrow_with_nul.length());
447 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
450 TEST(StringUtilTest, ToUpperASCII) {
451 EXPECT_EQ('C', ToUpperASCII('C'));
452 EXPECT_EQ('C', ToUpperASCII('c'));
453 EXPECT_EQ('2', ToUpperASCII('2'));
455 EXPECT_EQ(L'C', ToUpperASCII(L'C'));
456 EXPECT_EQ(L'C', ToUpperASCII(L'c'));
457 EXPECT_EQ(L'2', ToUpperASCII(L'2'));
459 std::string in_place_a("Cc2");
460 StringToUpperASCII(&in_place_a);
461 EXPECT_EQ("CC2", in_place_a);
463 std::wstring in_place_w(L"Cc2");
464 StringToUpperASCII(&in_place_w);
465 EXPECT_EQ(L"CC2", in_place_w);
467 std::string original_a("Cc2");
468 std::string upper_a = StringToUpperASCII(original_a);
469 EXPECT_EQ("CC2", upper_a);
471 std::wstring original_w(L"Cc2");
472 std::wstring upper_w = StringToUpperASCII(original_w);
473 EXPECT_EQ(L"CC2", upper_w);
476 TEST(StringUtilTest, LowerCaseEqualsASCII) {
477 static const struct {
478 const wchar_t* src_w;
479 const char* src_a;
480 const char* dst;
481 } lowercase_cases[] = {
482 { L"FoO", "FoO", "foo" },
483 { L"foo", "foo", "foo" },
484 { L"FOO", "FOO", "foo" },
487 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
489 lowercase_cases[i].dst));
490 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
491 lowercase_cases[i].dst));
495 TEST(StringUtilTest, FormatBytesUnlocalized) {
496 static const struct {
497 int64 bytes;
498 const char* expected;
499 } cases[] = {
500 // Expected behavior: we show one post-decimal digit when we have
501 // under two pre-decimal digits, except in cases where it makes no
502 // sense (zero or bytes).
503 // Since we switch units once we cross the 1000 mark, this keeps
504 // the display of file sizes or bytes consistently around three
505 // digits.
506 {0, "0 B"},
507 {512, "512 B"},
508 {1024*1024, "1.0 MB"},
509 {1024*1024*1024, "1.0 GB"},
510 {10LL*1024*1024*1024, "10.0 GB"},
511 {99LL*1024*1024*1024, "99.0 GB"},
512 {105LL*1024*1024*1024, "105 GB"},
513 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
514 {~(1LL<<63), "8192 PB"},
516 {99*1024 + 103, "99.1 kB"},
517 {1024*1024 + 103, "1.0 MB"},
518 {1024*1024 + 205 * 1024, "1.2 MB"},
519 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
520 {10LL*1024*1024*1024, "10.0 GB"},
521 {100LL*1024*1024*1024, "100 GB"},
524 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
525 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
526 FormatBytesUnlocalized(cases[i].bytes));
529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
530 static const struct {
531 const char* str;
532 string16::size_type start_offset;
533 const char* find_this;
534 const char* replace_with;
535 const char* expected;
536 } cases[] = {
537 {"aaa", 0, "a", "b", "bbb"},
538 {"abb", 0, "ab", "a", "ab"},
539 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
540 {"Not found", 0, "x", "0", "Not found"},
541 {"Not found again", 5, "x", "0", "Not found again"},
542 {" Making it much longer ", 0, " ", "Four score and seven years ago",
543 "Four score and seven years agoMakingFour score and seven years agoit"
544 "Four score and seven years agomuchFour score and seven years agolonger"
545 "Four score and seven years ago"},
546 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
547 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
548 {"abababab", 2, "ab", "c", "abccc"},
551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
552 string16 str = ASCIIToUTF16(cases[i].str);
553 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
554 ASCIIToUTF16(cases[i].find_this),
555 ASCIIToUTF16(cases[i].replace_with));
556 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
561 static const struct {
562 const char* str;
563 string16::size_type start_offset;
564 const char* find_this;
565 const char* replace_with;
566 const char* expected;
567 } cases[] = {
568 {"aaa", 0, "a", "b", "baa"},
569 {"abb", 0, "ab", "a", "ab"},
570 {"Removing some substrings inging", 0, "ing", "",
571 "Remov some substrings inging"},
572 {"Not found", 0, "x", "0", "Not found"},
573 {"Not found again", 5, "x", "0", "Not found again"},
574 {" Making it much longer ", 0, " ", "Four score and seven years ago",
575 "Four score and seven years agoMaking it much longer "},
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
578 {"abababab", 2, "ab", "c", "abcabab"},
581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582 string16 str = ASCIIToUTF16(cases[i].str);
583 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
584 ASCIIToUTF16(cases[i].find_this),
585 ASCIIToUTF16(cases[i].replace_with));
586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
590 TEST(StringUtilTest, HexDigitToInt) {
591 EXPECT_EQ(0, HexDigitToInt('0'));
592 EXPECT_EQ(1, HexDigitToInt('1'));
593 EXPECT_EQ(2, HexDigitToInt('2'));
594 EXPECT_EQ(3, HexDigitToInt('3'));
595 EXPECT_EQ(4, HexDigitToInt('4'));
596 EXPECT_EQ(5, HexDigitToInt('5'));
597 EXPECT_EQ(6, HexDigitToInt('6'));
598 EXPECT_EQ(7, HexDigitToInt('7'));
599 EXPECT_EQ(8, HexDigitToInt('8'));
600 EXPECT_EQ(9, HexDigitToInt('9'));
601 EXPECT_EQ(10, HexDigitToInt('A'));
602 EXPECT_EQ(11, HexDigitToInt('B'));
603 EXPECT_EQ(12, HexDigitToInt('C'));
604 EXPECT_EQ(13, HexDigitToInt('D'));
605 EXPECT_EQ(14, HexDigitToInt('E'));
606 EXPECT_EQ(15, HexDigitToInt('F'));
608 // Verify the lower case as well.
609 EXPECT_EQ(10, HexDigitToInt('a'));
610 EXPECT_EQ(11, HexDigitToInt('b'));
611 EXPECT_EQ(12, HexDigitToInt('c'));
612 EXPECT_EQ(13, HexDigitToInt('d'));
613 EXPECT_EQ(14, HexDigitToInt('e'));
614 EXPECT_EQ(15, HexDigitToInt('f'));
617 // This checks where we can use the assignment operator for a va_list. We need
618 // a way to do this since Visual C doesn't support va_copy, but assignment on
619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
620 // capability.
621 static void VariableArgsFunc(const char* format, ...) {
622 va_list org;
623 va_start(org, format);
625 va_list dup;
626 GG_VA_COPY(dup, org);
627 int i1 = va_arg(org, int);
628 int j1 = va_arg(org, int);
629 char* s1 = va_arg(org, char*);
630 double d1 = va_arg(org, double);
631 va_end(org);
633 int i2 = va_arg(dup, int);
634 int j2 = va_arg(dup, int);
635 char* s2 = va_arg(dup, char*);
636 double d2 = va_arg(dup, double);
638 EXPECT_EQ(i1, i2);
639 EXPECT_EQ(j1, j2);
640 EXPECT_STREQ(s1, s2);
641 EXPECT_EQ(d1, d2);
643 va_end(dup);
646 TEST(StringUtilTest, VAList) {
647 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
650 // Test for Tokenize
651 template <typename STR>
652 void TokenizeTest() {
653 std::vector<STR> r;
654 size_t size;
656 size = Tokenize(STR("This is a string"), STR(" "), &r);
657 EXPECT_EQ(4U, size);
658 ASSERT_EQ(4U, r.size());
659 EXPECT_EQ(r[0], STR("This"));
660 EXPECT_EQ(r[1], STR("is"));
661 EXPECT_EQ(r[2], STR("a"));
662 EXPECT_EQ(r[3], STR("string"));
663 r.clear();
665 size = Tokenize(STR("one,two,three"), STR(","), &r);
666 EXPECT_EQ(3U, size);
667 ASSERT_EQ(3U, r.size());
668 EXPECT_EQ(r[0], STR("one"));
669 EXPECT_EQ(r[1], STR("two"));
670 EXPECT_EQ(r[2], STR("three"));
671 r.clear();
673 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
674 EXPECT_EQ(3U, size);
675 ASSERT_EQ(3U, r.size());
676 EXPECT_EQ(r[0], STR("one"));
677 EXPECT_EQ(r[1], STR("two"));
678 EXPECT_EQ(r[2], STR("three;four"));
679 r.clear();
681 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
682 EXPECT_EQ(4U, size);
683 ASSERT_EQ(4U, r.size());
684 EXPECT_EQ(r[0], STR("one"));
685 EXPECT_EQ(r[1], STR("two"));
686 EXPECT_EQ(r[2], STR("three"));
687 EXPECT_EQ(r[3], STR("four"));
688 r.clear();
690 size = Tokenize(STR("one, two, three"), STR(","), &r);
691 EXPECT_EQ(3U, size);
692 ASSERT_EQ(3U, r.size());
693 EXPECT_EQ(r[0], STR("one"));
694 EXPECT_EQ(r[1], STR(" two"));
695 EXPECT_EQ(r[2], STR(" three"));
696 r.clear();
698 size = Tokenize(STR("one, two, three, "), STR(","), &r);
699 EXPECT_EQ(4U, size);
700 ASSERT_EQ(4U, r.size());
701 EXPECT_EQ(r[0], STR("one"));
702 EXPECT_EQ(r[1], STR(" two"));
703 EXPECT_EQ(r[2], STR(" three"));
704 EXPECT_EQ(r[3], STR(" "));
705 r.clear();
707 size = Tokenize(STR("one, two, three,"), STR(","), &r);
708 EXPECT_EQ(3U, size);
709 ASSERT_EQ(3U, r.size());
710 EXPECT_EQ(r[0], STR("one"));
711 EXPECT_EQ(r[1], STR(" two"));
712 EXPECT_EQ(r[2], STR(" three"));
713 r.clear();
715 size = Tokenize(STR(""), STR(","), &r);
716 EXPECT_EQ(0U, size);
717 ASSERT_EQ(0U, r.size());
718 r.clear();
720 size = Tokenize(STR(","), STR(","), &r);
721 EXPECT_EQ(0U, size);
722 ASSERT_EQ(0U, r.size());
723 r.clear();
725 size = Tokenize(STR(",;:."), STR(".:;,"), &r);
726 EXPECT_EQ(0U, size);
727 ASSERT_EQ(0U, r.size());
728 r.clear();
730 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
731 EXPECT_EQ(1U, size);
732 ASSERT_EQ(1U, r.size());
733 EXPECT_EQ(r[0], STR("a"));
734 r.clear();
736 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
737 EXPECT_EQ(2U, size);
738 ASSERT_EQ(2U, r.size());
739 EXPECT_EQ(r[0], STR("\ta\t"));
740 EXPECT_EQ(r[1], STR("b\tcc"));
741 r.clear();
744 TEST(StringUtilTest, TokenizeStdString) {
745 TokenizeTest<std::string>();
748 TEST(StringUtilTest, TokenizeStringPiece) {
749 TokenizeTest<base::StringPiece>();
752 // Test for JoinString
753 TEST(StringUtilTest, JoinString) {
754 std::vector<std::string> in;
755 EXPECT_EQ("", JoinString(in, ','));
757 in.push_back("a");
758 EXPECT_EQ("a", JoinString(in, ','));
760 in.push_back("b");
761 in.push_back("c");
762 EXPECT_EQ("a,b,c", JoinString(in, ','));
764 in.push_back("");
765 EXPECT_EQ("a,b,c,", JoinString(in, ','));
766 in.push_back(" ");
767 EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
770 TEST(StringUtilTest, StartsWith) {
771 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
772 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
773 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
774 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
775 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
776 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
777 EXPECT_FALSE(StartsWithASCII("", "javascript", false));
778 EXPECT_FALSE(StartsWithASCII("", "javascript", true));
779 EXPECT_TRUE(StartsWithASCII("java", "", false));
780 EXPECT_TRUE(StartsWithASCII("java", "", true));
782 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
783 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
784 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
785 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
786 EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
787 EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
788 EXPECT_FALSE(StartsWith(L"", L"javascript", false));
789 EXPECT_FALSE(StartsWith(L"", L"javascript", true));
790 EXPECT_TRUE(StartsWith(L"java", L"", false));
791 EXPECT_TRUE(StartsWith(L"java", L"", true));
794 TEST(StringUtilTest, EndsWith) {
795 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
796 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
797 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
798 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
799 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
800 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
801 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
802 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
803 EXPECT_FALSE(EndsWith(L"", L".plugin", false));
804 EXPECT_FALSE(EndsWith(L"", L".plugin", true));
805 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
806 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
807 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
808 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
809 EXPECT_TRUE(EndsWith(L"", L"", false));
810 EXPECT_TRUE(EndsWith(L"", L"", true));
813 TEST(StringUtilTest, GetStringFWithOffsets) {
814 std::vector<string16> subst;
815 subst.push_back(ASCIIToUTF16("1"));
816 subst.push_back(ASCIIToUTF16("2"));
817 std::vector<size_t> offsets;
819 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
820 subst,
821 &offsets);
822 EXPECT_EQ(2U, offsets.size());
823 EXPECT_EQ(7U, offsets[0]);
824 EXPECT_EQ(25U, offsets[1]);
825 offsets.clear();
827 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
828 subst,
829 &offsets);
830 EXPECT_EQ(2U, offsets.size());
831 EXPECT_EQ(25U, offsets[0]);
832 EXPECT_EQ(7U, offsets[1]);
833 offsets.clear();
836 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
837 // Test whether replacestringplaceholders works as expected when there
838 // are fewer inputs than outputs.
839 std::vector<string16> subst;
840 subst.push_back(ASCIIToUTF16("9a"));
841 subst.push_back(ASCIIToUTF16("8b"));
842 subst.push_back(ASCIIToUTF16("7c"));
844 string16 formatted =
845 ReplaceStringPlaceholders(
846 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
848 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
851 TEST(StringUtilTest, ReplaceStringPlaceholders) {
852 std::vector<string16> subst;
853 subst.push_back(ASCIIToUTF16("9a"));
854 subst.push_back(ASCIIToUTF16("8b"));
855 subst.push_back(ASCIIToUTF16("7c"));
856 subst.push_back(ASCIIToUTF16("6d"));
857 subst.push_back(ASCIIToUTF16("5e"));
858 subst.push_back(ASCIIToUTF16("4f"));
859 subst.push_back(ASCIIToUTF16("3g"));
860 subst.push_back(ASCIIToUTF16("2h"));
861 subst.push_back(ASCIIToUTF16("1i"));
863 string16 formatted =
864 ReplaceStringPlaceholders(
865 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
867 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
870 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
871 std::vector<string16> subst;
872 subst.push_back(ASCIIToUTF16("9a"));
873 subst.push_back(ASCIIToUTF16("8b"));
874 subst.push_back(ASCIIToUTF16("7c"));
875 subst.push_back(ASCIIToUTF16("6d"));
876 subst.push_back(ASCIIToUTF16("5e"));
877 subst.push_back(ASCIIToUTF16("4f"));
878 subst.push_back(ASCIIToUTF16("3g"));
879 subst.push_back(ASCIIToUTF16("2h"));
880 subst.push_back(ASCIIToUTF16("1i"));
881 subst.push_back(ASCIIToUTF16("0j"));
882 subst.push_back(ASCIIToUTF16("-1k"));
883 subst.push_back(ASCIIToUTF16("-2l"));
884 subst.push_back(ASCIIToUTF16("-3m"));
885 subst.push_back(ASCIIToUTF16("-4n"));
887 string16 formatted =
888 ReplaceStringPlaceholders(
889 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
890 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
892 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
893 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
896 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
897 std::vector<std::string> subst;
898 subst.push_back("9a");
899 subst.push_back("8b");
900 subst.push_back("7c");
901 subst.push_back("6d");
902 subst.push_back("5e");
903 subst.push_back("4f");
904 subst.push_back("3g");
905 subst.push_back("2h");
906 subst.push_back("1i");
908 std::string formatted =
909 ReplaceStringPlaceholders(
910 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
912 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
915 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
916 std::vector<std::string> subst;
917 subst.push_back("a");
918 subst.push_back("b");
919 subst.push_back("c");
920 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
921 "$1 $$2 $$$3");
924 TEST(StringUtilTest, MatchPatternTest) {
925 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
926 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
927 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
928 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
929 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
930 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
931 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
932 EXPECT_FALSE(MatchPattern("", "*.*"));
933 EXPECT_TRUE(MatchPattern("", "*"));
934 EXPECT_TRUE(MatchPattern("", "?"));
935 EXPECT_TRUE(MatchPattern("", ""));
936 EXPECT_FALSE(MatchPattern("Hello", ""));
937 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
938 // Stop after a certain recursion depth.
939 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
941 // Test UTF8 matching.
942 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
943 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
944 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
945 // Invalid sequences should be handled as a single invalid character.
946 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
947 // If the pattern has invalid characters, it shouldn't match anything.
948 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
950 // Test UTF16 character matching.
951 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
952 UTF8ToUTF16("*.com")));
953 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
954 UTF8ToUTF16("He??o\\*1*")));
956 // This test verifies that consecutive wild cards are collapsed into 1
957 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
958 // recursion depth).
959 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
960 UTF8ToUTF16("He********************************o")));
963 TEST(StringUtilTest, LcpyTest) {
964 // Test the normal case where we fit in our buffer.
966 char dst[10];
967 wchar_t wdst[10];
968 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
969 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
970 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
971 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
974 // Test dst_size == 0, nothing should be written to |dst| and we should
975 // have the equivalent of strlen(src).
977 char dst[2] = {1, 2};
978 wchar_t wdst[2] = {1, 2};
979 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
980 EXPECT_EQ(1, dst[0]);
981 EXPECT_EQ(2, dst[1]);
982 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
983 #if defined(WCHAR_T_IS_UNSIGNED)
984 EXPECT_EQ(1U, wdst[0]);
985 EXPECT_EQ(2U, wdst[1]);
986 #else
987 EXPECT_EQ(1, wdst[0]);
988 EXPECT_EQ(2, wdst[1]);
989 #endif
992 // Test the case were we _just_ competely fit including the null.
994 char dst[8];
995 wchar_t wdst[8];
996 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
997 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
998 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
999 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1002 // Test the case were we we are one smaller, so we can't fit the null.
1004 char dst[7];
1005 wchar_t wdst[7];
1006 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1007 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1008 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1009 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1012 // Test the case were we are just too small.
1014 char dst[3];
1015 wchar_t wdst[3];
1016 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1017 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1018 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1019 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1023 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1024 static const struct {
1025 const wchar_t* input;
1026 bool portable;
1027 } cases[] = {
1028 { L"%ls", true },
1029 { L"%s", false },
1030 { L"%S", false },
1031 { L"%lS", false },
1032 { L"Hello, %s", false },
1033 { L"%lc", true },
1034 { L"%c", false },
1035 { L"%C", false },
1036 { L"%lC", false },
1037 { L"%ls %s", false },
1038 { L"%s %ls", false },
1039 { L"%s %ls %s", false },
1040 { L"%f", true },
1041 { L"%f %F", false },
1042 { L"%d %D", false },
1043 { L"%o %O", false },
1044 { L"%u %U", false },
1045 { L"%f %d %o %u", true },
1046 { L"%-8d (%02.1f%)", true },
1047 { L"% 10s", false },
1048 { L"% 10ls", true }
1050 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1051 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1054 TEST(StringUtilTest, RemoveChars) {
1055 const char* kRemoveChars = "-/+*";
1056 std::string input = "A-+bc/d!*";
1057 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1058 EXPECT_EQ("Abcd!", input);
1060 // No characters match kRemoveChars.
1061 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1062 EXPECT_EQ("Abcd!", input);
1064 // Empty string.
1065 input.clear();
1066 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067 EXPECT_EQ(std::string(), input);
1070 TEST(StringUtilTest, ReplaceChars) {
1071 struct TestData {
1072 const char* input;
1073 const char* replace_chars;
1074 const char* replace_with;
1075 const char* output;
1076 bool result;
1077 } cases[] = {
1078 { "", "", "", "", false },
1079 { "test", "", "", "test", false },
1080 { "test", "", "!", "test", false },
1081 { "test", "z", "!", "test", false },
1082 { "test", "e", "!", "t!st", true },
1083 { "test", "e", "!?", "t!?st", true },
1084 { "test", "ez", "!", "t!st", true },
1085 { "test", "zed", "!?", "t!?st", true },
1086 { "test", "t", "!?", "!?es!?", true },
1087 { "test", "et", "!>", "!>!>s!>", true },
1088 { "test", "zest", "!", "!!!!", true },
1089 { "test", "szt", "!", "!e!!", true },
1090 { "test", "t", "test", "testestest", true },
1093 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1094 std::string output;
1095 bool result = ReplaceChars(cases[i].input,
1096 cases[i].replace_chars,
1097 cases[i].replace_with,
1098 &output);
1099 EXPECT_EQ(cases[i].result, result);
1100 EXPECT_EQ(cases[i].output, output);
1104 TEST(StringUtilTest, ContainsOnlyChars) {
1105 // Providing an empty list of characters should return false but for the empty
1106 // string.
1107 EXPECT_TRUE(ContainsOnlyChars("", ""));
1108 EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1110 EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1111 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1112 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1113 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1114 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1117 class WriteIntoTest : public testing::Test {
1118 protected:
1119 static void WritesCorrectly(size_t num_chars) {
1120 std::string buffer;
1121 char kOriginal[] = "supercali";
1122 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1123 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1124 // string at the first \0.
1125 EXPECT_EQ(std::string(kOriginal,
1126 std::min(num_chars, arraysize(kOriginal) - 1)),
1127 std::string(buffer.c_str()));
1128 EXPECT_EQ(num_chars, buffer.size());
1132 TEST_F(WriteIntoTest, WriteInto) {
1133 // Validate that WriteInto reserves enough space and
1134 // sizes a string correctly.
1135 WritesCorrectly(1);
1136 WritesCorrectly(2);
1137 WritesCorrectly(5000);
1139 // Validate that WriteInto doesn't modify other strings
1140 // when using a Copy-on-Write implementation.
1141 const char kLive[] = "live";
1142 const char kDead[] = "dead";
1143 const std::string live = kLive;
1144 std::string dead = live;
1145 strncpy(WriteInto(&dead, 5), kDead, 4);
1146 EXPECT_EQ(kDead, dead);
1147 EXPECT_EQ(4u, dead.size());
1148 EXPECT_EQ(kLive, live);
1149 EXPECT_EQ(4u, live.size());
1152 } // namespace base