1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
13 #include "base/basictypes.h"
14 #include "base/strings/string16.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "testing/gmock/include/gmock/gmock.h"
17 #include "testing/gtest/include/gtest/gtest.h"
19 using ::testing::ElementsAre
;
23 static const struct trim_case
{
25 const TrimPositions positions
;
26 const wchar_t* output
;
27 const TrimPositions return_value
;
29 {L
" Google Video ", TRIM_LEADING
, L
"Google Video ", TRIM_LEADING
},
30 {L
" Google Video ", TRIM_TRAILING
, L
" Google Video", TRIM_TRAILING
},
31 {L
" Google Video ", TRIM_ALL
, L
"Google Video", TRIM_ALL
},
32 {L
"Google Video", TRIM_ALL
, L
"Google Video", TRIM_NONE
},
33 {L
"", TRIM_ALL
, L
"", TRIM_NONE
},
34 {L
" ", TRIM_LEADING
, L
"", TRIM_LEADING
},
35 {L
" ", TRIM_TRAILING
, L
"", TRIM_TRAILING
},
36 {L
" ", TRIM_ALL
, L
"", TRIM_ALL
},
37 {L
"\t\rTest String\n", TRIM_ALL
, L
"Test String", TRIM_ALL
},
38 {L
"\x2002Test String\x00A0\x3000", TRIM_ALL
, L
"Test String", TRIM_ALL
},
41 static const struct trim_case_ascii
{
43 const TrimPositions positions
;
45 const TrimPositions return_value
;
46 } trim_cases_ascii
[] = {
47 {" Google Video ", TRIM_LEADING
, "Google Video ", TRIM_LEADING
},
48 {" Google Video ", TRIM_TRAILING
, " Google Video", TRIM_TRAILING
},
49 {" Google Video ", TRIM_ALL
, "Google Video", TRIM_ALL
},
50 {"Google Video", TRIM_ALL
, "Google Video", TRIM_NONE
},
51 {"", TRIM_ALL
, "", TRIM_NONE
},
52 {" ", TRIM_LEADING
, "", TRIM_LEADING
},
53 {" ", TRIM_TRAILING
, "", TRIM_TRAILING
},
54 {" ", TRIM_ALL
, "", TRIM_ALL
},
55 {"\t\rTest String\n", TRIM_ALL
, "Test String", TRIM_ALL
},
60 // Helper used to test TruncateUTF8ToByteSize.
61 bool Truncated(const std::string
& input
, const size_t byte_size
,
62 std::string
* output
) {
63 size_t prev
= input
.length();
64 TruncateUTF8ToByteSize(input
, byte_size
, output
);
65 return prev
!= output
->length();
70 TEST(StringUtilTest
, TruncateUTF8ToByteSize
) {
73 // Empty strings and invalid byte_size arguments
74 EXPECT_FALSE(Truncated(std::string(), 0, &output
));
75 EXPECT_EQ(output
, "");
76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output
));
77 EXPECT_EQ(output
, "");
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output
));
79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output
));
81 // Testing the truncation of valid UTF8 correctly
82 EXPECT_TRUE(Truncated("abc", 2, &output
));
83 EXPECT_EQ(output
, "ab");
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output
));
85 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output
));
87 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output
));
89 EXPECT_EQ(output
.compare("\xc2\x81\xc2\x81"), 0);
92 const char array
[] = "\x00\x00\xc2\x81\xc2\x81";
93 const std::string
array_string(array
, arraysize(array
));
94 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
95 EXPECT_EQ(output
.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
99 const char array
[] = "\x00\xc2\x81\xc2\x81";
100 const std::string
array_string(array
, arraysize(array
));
101 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
102 EXPECT_EQ(output
.compare(std::string("\x00\xc2\x81", 3)), 0);
105 // Testing invalid UTF8
106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output
));
107 EXPECT_EQ(output
.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output
));
109 EXPECT_EQ(output
.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output
));
111 EXPECT_EQ(output
.compare(""), 0);
113 // Testing invalid UTF8 mixed with valid UTF8
114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output
));
115 EXPECT_EQ(output
.compare("\xe1\x80\xbf"), 0);
116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output
));
117 EXPECT_EQ(output
.compare("\xf1\x80\xa0\xbf"), 0);
118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
120 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
123 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output
));
125 EXPECT_EQ(output
.compare("\xef\xbb\xbf" "abc"), 0);
127 // Overlong sequences
128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output
));
129 EXPECT_EQ(output
.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output
));
131 EXPECT_EQ(output
.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output
));
133 EXPECT_EQ(output
.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output
));
135 EXPECT_EQ(output
.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output
));
137 EXPECT_EQ(output
.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output
));
139 EXPECT_EQ(output
.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output
));
141 EXPECT_EQ(output
.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output
));
143 EXPECT_EQ(output
.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output
));
145 EXPECT_EQ(output
.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output
));
147 EXPECT_EQ(output
.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output
));
149 EXPECT_EQ(output
.compare(""), 0);
151 // Beyond U+10FFFF (the upper limit of Unicode codespace)
152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output
));
153 EXPECT_EQ(output
.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output
));
155 EXPECT_EQ(output
.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output
));
157 EXPECT_EQ(output
.compare(""), 0);
159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output
));
161 EXPECT_EQ(output
.compare(""), 0);
162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output
));
163 EXPECT_EQ(output
.compare(""), 0);
166 const char array
[] = "\x00\x00\xfe\xff";
167 const std::string
array_string(array
, arraysize(array
));
168 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
169 EXPECT_EQ(output
.compare(std::string("\x00\x00", 2)), 0);
172 // Variants on the previous test
174 const char array
[] = "\xff\xfe\x00\x00";
175 const std::string
array_string(array
, 4);
176 EXPECT_FALSE(Truncated(array_string
, 4, &output
));
177 EXPECT_EQ(output
.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 const char array
[] = "\xff\x00\x00\xfe";
181 const std::string
array_string(array
, arraysize(array
));
182 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
183 EXPECT_EQ(output
.compare(std::string("\xff\x00\x00", 3)), 0);
186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output
));
188 EXPECT_EQ(output
.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output
));
190 EXPECT_EQ(output
.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output
));
192 EXPECT_EQ(output
.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output
));
194 EXPECT_EQ(output
.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output
));
196 EXPECT_EQ(output
.compare(""), 0);
198 // Strings in legacy encodings that are valid in UTF-8, but
199 // are invalid as UTF-8 in real data.
200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output
));
201 EXPECT_EQ(output
.compare("caf"), 0);
202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output
));
203 EXPECT_EQ(output
.compare(""), 0);
204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output
));
205 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
208 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
210 // Testing using the same string as input and output.
211 EXPECT_FALSE(Truncated(output
, 4, &output
));
212 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
213 EXPECT_TRUE(Truncated(output
, 3, &output
));
214 EXPECT_EQ(output
.compare("\xa7\x41"), 0);
216 // "abc" with U+201[CD] in windows-125[0-8]
217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output
));
218 EXPECT_EQ(output
.compare("\x93" "abc"), 0);
220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output
));
222 EXPECT_EQ(output
.compare(""), 0);
224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output
));
226 EXPECT_EQ(output
.compare(""), 0);
229 TEST(StringUtilTest
, TrimWhitespace
) {
230 string16 output
; // Allow contents to carry over to next testcase
231 for (size_t i
= 0; i
< arraysize(trim_cases
); ++i
) {
232 const trim_case
& value
= trim_cases
[i
];
233 EXPECT_EQ(value
.return_value
,
234 TrimWhitespace(WideToUTF16(value
.input
), value
.positions
,
236 EXPECT_EQ(WideToUTF16(value
.output
), output
);
239 // Test that TrimWhitespace() can take the same string for input and output
240 output
= ASCIIToUTF16(" This is a test \r\n");
241 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output
);
244 // Once more, but with a string of whitespace
245 output
= ASCIIToUTF16(" \r\n");
246 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
247 EXPECT_EQ(string16(), output
);
249 std::string output_ascii
;
250 for (size_t i
= 0; i
< arraysize(trim_cases_ascii
); ++i
) {
251 const trim_case_ascii
& value
= trim_cases_ascii
[i
];
252 EXPECT_EQ(value
.return_value
,
253 TrimWhitespace(value
.input
, value
.positions
, &output_ascii
));
254 EXPECT_EQ(value
.output
, output_ascii
);
258 static const struct collapse_case
{
259 const wchar_t* input
;
261 const wchar_t* output
;
262 } collapse_cases
[] = {
263 {L
" Google Video ", false, L
"Google Video"},
264 {L
"Google Video", false, L
"Google Video"},
267 {L
"\t\rTest String\n", false, L
"Test String"},
268 {L
"\x2002Test String\x00A0\x3000", false, L
"Test String"},
269 {L
" Test \n \t String ", false, L
"Test String"},
270 {L
"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L
"Test String"},
271 {L
" Test String", false, L
"Test String"},
272 {L
"Test String ", false, L
"Test String"},
273 {L
"Test String", false, L
"Test String"},
276 {L
" \r ", true, L
""},
277 {L
"\nFoo", true, L
"Foo"},
278 {L
"\r Foo ", true, L
"Foo"},
279 {L
" Foo bar ", true, L
"Foo bar"},
280 {L
" \tFoo bar \n", true, L
"Foo bar"},
281 {L
" a \r b\n c \r\n d \t\re \t f \n ", true, L
"abcde f"},
284 TEST(StringUtilTest
, CollapseWhitespace
) {
285 for (size_t i
= 0; i
< arraysize(collapse_cases
); ++i
) {
286 const collapse_case
& value
= collapse_cases
[i
];
287 EXPECT_EQ(value
.output
, CollapseWhitespace(value
.input
, value
.trim
));
291 static const struct collapse_case_ascii
{
295 } collapse_cases_ascii
[] = {
296 {" Google Video ", false, "Google Video"},
297 {"Google Video", false, "Google Video"},
300 {"\t\rTest String\n", false, "Test String"},
301 {" Test \n \t String ", false, "Test String"},
302 {" Test String", false, "Test String"},
303 {"Test String ", false, "Test String"},
304 {"Test String", false, "Test String"},
308 {"\nFoo", true, "Foo"},
309 {"\r Foo ", true, "Foo"},
310 {" Foo bar ", true, "Foo bar"},
311 {" \tFoo bar \n", true, "Foo bar"},
312 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
315 TEST(StringUtilTest
, CollapseWhitespaceASCII
) {
316 for (size_t i
= 0; i
< arraysize(collapse_cases_ascii
); ++i
) {
317 const collapse_case_ascii
& value
= collapse_cases_ascii
[i
];
318 EXPECT_EQ(value
.output
, CollapseWhitespaceASCII(value
.input
, value
.trim
));
322 TEST(StringUtilTest
, ContainsOnlyWhitespaceASCII
) {
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
326 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
328 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
331 TEST(StringUtilTest
, ContainsOnlyWhitespace
) {
332 EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
335 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
337 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
340 TEST(StringUtilTest
, IsStringUTF8
) {
341 EXPECT_TRUE(IsStringUTF8("abc"));
342 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
343 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
344 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
345 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
346 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
348 // surrogate code points
349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
350 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
351 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
353 // overlong sequences
354 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
355 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
356 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
357 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
358 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
361 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
363 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
364 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
366 // Beyond U+10FFFF (the upper limit of Unicode codespace)
367 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
368 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
369 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
371 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
372 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
373 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
374 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
375 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
377 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
378 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
379 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
380 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
382 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
383 // Strings in legacy encodings. We can certainly make up strings
384 // in a legacy encoding that are valid in UTF-8, but in real data,
385 // most of them are invalid as UTF-8.
386 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
387 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
388 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
389 // "abc" with U+201[CD] in windows-125[0-8]
390 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
391 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
392 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
393 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
394 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
396 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
397 // representation, and the second uses a 2-byte sequence. The second version
398 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
399 // given codepoint must be used.
400 static const char kEmbeddedNull
[] = "embedded\0null";
401 EXPECT_TRUE(IsStringUTF8(
402 std::string(kEmbeddedNull
, sizeof(kEmbeddedNull
))));
403 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
406 TEST(StringUtilTest
, ConvertASCII
) {
407 static const char* char_cases
[] = {
410 "0123ABCDwxyz \a\b\t\r\n!+,.~"
413 static const wchar_t* const wchar_cases
[] = {
416 L
"0123ABCDwxyz \a\b\t\r\n!+,.~"
419 for (size_t i
= 0; i
< arraysize(char_cases
); ++i
) {
420 EXPECT_TRUE(IsStringASCII(char_cases
[i
]));
421 std::wstring wide
= ASCIIToWide(char_cases
[i
]);
422 EXPECT_EQ(wchar_cases
[i
], wide
);
424 EXPECT_TRUE(IsStringASCII(wchar_cases
[i
]));
425 std::string ascii
= WideToASCII(wchar_cases
[i
]);
426 EXPECT_EQ(char_cases
[i
], ascii
);
429 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
430 EXPECT_FALSE(IsStringASCII(L
"Google \x80Video"));
432 // Convert empty strings.
435 EXPECT_EQ(empty
, WideToASCII(wempty
));
436 EXPECT_EQ(wempty
, ASCIIToWide(empty
));
438 // Convert strings with an embedded NUL character.
439 const char chars_with_nul
[] = "test\0string";
440 const int length_with_nul
= arraysize(chars_with_nul
) - 1;
441 std::string
string_with_nul(chars_with_nul
, length_with_nul
);
442 std::wstring wide_with_nul
= ASCIIToWide(string_with_nul
);
443 EXPECT_EQ(static_cast<std::wstring::size_type
>(length_with_nul
),
444 wide_with_nul
.length());
445 std::string narrow_with_nul
= WideToASCII(wide_with_nul
);
446 EXPECT_EQ(static_cast<std::string::size_type
>(length_with_nul
),
447 narrow_with_nul
.length());
448 EXPECT_EQ(0, string_with_nul
.compare(narrow_with_nul
));
451 TEST(StringUtilTest
, ToUpperASCII
) {
452 EXPECT_EQ('C', ToUpperASCII('C'));
453 EXPECT_EQ('C', ToUpperASCII('c'));
454 EXPECT_EQ('2', ToUpperASCII('2'));
456 EXPECT_EQ(L
'C', ToUpperASCII(L
'C'));
457 EXPECT_EQ(L
'C', ToUpperASCII(L
'c'));
458 EXPECT_EQ(L
'2', ToUpperASCII(L
'2'));
460 std::string
in_place_a("Cc2");
461 StringToUpperASCII(&in_place_a
);
462 EXPECT_EQ("CC2", in_place_a
);
464 std::wstring
in_place_w(L
"Cc2");
465 StringToUpperASCII(&in_place_w
);
466 EXPECT_EQ(L
"CC2", in_place_w
);
468 std::string
original_a("Cc2");
469 std::string upper_a
= StringToUpperASCII(original_a
);
470 EXPECT_EQ("CC2", upper_a
);
472 std::wstring
original_w(L
"Cc2");
473 std::wstring upper_w
= StringToUpperASCII(original_w
);
474 EXPECT_EQ(L
"CC2", upper_w
);
477 TEST(StringUtilTest
, LowerCaseEqualsASCII
) {
478 static const struct {
479 const wchar_t* src_w
;
482 } lowercase_cases
[] = {
483 { L
"FoO", "FoO", "foo" },
484 { L
"foo", "foo", "foo" },
485 { L
"FOO", "FOO", "foo" },
488 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(lowercase_cases
); ++i
) {
489 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases
[i
].src_w
,
490 lowercase_cases
[i
].dst
));
491 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases
[i
].src_a
,
492 lowercase_cases
[i
].dst
));
496 TEST(StringUtilTest
, FormatBytesUnlocalized
) {
497 static const struct {
499 const char* expected
;
501 // Expected behavior: we show one post-decimal digit when we have
502 // under two pre-decimal digits, except in cases where it makes no
503 // sense (zero or bytes).
504 // Since we switch units once we cross the 1000 mark, this keeps
505 // the display of file sizes or bytes consistently around three
509 {1024*1024, "1.0 MB"},
510 {1024*1024*1024, "1.0 GB"},
511 {10LL*1024*1024*1024, "10.0 GB"},
512 {99LL*1024*1024*1024, "99.0 GB"},
513 {105LL*1024*1024*1024, "105 GB"},
514 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
515 {~(1LL<<63), "8192 PB"},
517 {99*1024 + 103, "99.1 kB"},
518 {1024*1024 + 103, "1.0 MB"},
519 {1024*1024 + 205 * 1024, "1.2 MB"},
520 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
521 {10LL*1024*1024*1024, "10.0 GB"},
522 {100LL*1024*1024*1024, "100 GB"},
525 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
526 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
),
527 FormatBytesUnlocalized(cases
[i
].bytes
));
530 TEST(StringUtilTest
, ReplaceSubstringsAfterOffset
) {
531 static const struct {
533 string16::size_type start_offset
;
534 const char* find_this
;
535 const char* replace_with
;
536 const char* expected
;
538 {"aaa", 0, "a", "b", "bbb"},
539 {"abb", 0, "ab", "a", "ab"},
540 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
541 {"Not found", 0, "x", "0", "Not found"},
542 {"Not found again", 5, "x", "0", "Not found again"},
543 {" Making it much longer ", 0, " ", "Four score and seven years ago",
544 "Four score and seven years agoMakingFour score and seven years agoit"
545 "Four score and seven years agomuchFour score and seven years agolonger"
546 "Four score and seven years ago"},
547 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
548 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
549 {"abababab", 2, "ab", "c", "abccc"},
552 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
553 string16 str
= ASCIIToUTF16(cases
[i
].str
);
554 ReplaceSubstringsAfterOffset(&str
, cases
[i
].start_offset
,
555 ASCIIToUTF16(cases
[i
].find_this
),
556 ASCIIToUTF16(cases
[i
].replace_with
));
557 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
561 TEST(StringUtilTest
, ReplaceFirstSubstringAfterOffset
) {
562 static const struct {
564 string16::size_type start_offset
;
565 const char* find_this
;
566 const char* replace_with
;
567 const char* expected
;
569 {"aaa", 0, "a", "b", "baa"},
570 {"abb", 0, "ab", "a", "ab"},
571 {"Removing some substrings inging", 0, "ing", "",
572 "Remov some substrings inging"},
573 {"Not found", 0, "x", "0", "Not found"},
574 {"Not found again", 5, "x", "0", "Not found again"},
575 {" Making it much longer ", 0, " ", "Four score and seven years ago",
576 "Four score and seven years agoMaking it much longer "},
577 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
578 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
579 {"abababab", 2, "ab", "c", "abcabab"},
582 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
583 string16 str
= ASCIIToUTF16(cases
[i
].str
);
584 ReplaceFirstSubstringAfterOffset(&str
, cases
[i
].start_offset
,
585 ASCIIToUTF16(cases
[i
].find_this
),
586 ASCIIToUTF16(cases
[i
].replace_with
));
587 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
591 TEST(StringUtilTest
, HexDigitToInt
) {
592 EXPECT_EQ(0, HexDigitToInt('0'));
593 EXPECT_EQ(1, HexDigitToInt('1'));
594 EXPECT_EQ(2, HexDigitToInt('2'));
595 EXPECT_EQ(3, HexDigitToInt('3'));
596 EXPECT_EQ(4, HexDigitToInt('4'));
597 EXPECT_EQ(5, HexDigitToInt('5'));
598 EXPECT_EQ(6, HexDigitToInt('6'));
599 EXPECT_EQ(7, HexDigitToInt('7'));
600 EXPECT_EQ(8, HexDigitToInt('8'));
601 EXPECT_EQ(9, HexDigitToInt('9'));
602 EXPECT_EQ(10, HexDigitToInt('A'));
603 EXPECT_EQ(11, HexDigitToInt('B'));
604 EXPECT_EQ(12, HexDigitToInt('C'));
605 EXPECT_EQ(13, HexDigitToInt('D'));
606 EXPECT_EQ(14, HexDigitToInt('E'));
607 EXPECT_EQ(15, HexDigitToInt('F'));
609 // Verify the lower case as well.
610 EXPECT_EQ(10, HexDigitToInt('a'));
611 EXPECT_EQ(11, HexDigitToInt('b'));
612 EXPECT_EQ(12, HexDigitToInt('c'));
613 EXPECT_EQ(13, HexDigitToInt('d'));
614 EXPECT_EQ(14, HexDigitToInt('e'));
615 EXPECT_EQ(15, HexDigitToInt('f'));
618 // This checks where we can use the assignment operator for a va_list. We need
619 // a way to do this since Visual C doesn't support va_copy, but assignment on
620 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
622 static void VariableArgsFunc(const char* format
, ...) {
624 va_start(org
, format
);
627 GG_VA_COPY(dup
, org
);
628 int i1
= va_arg(org
, int);
629 int j1
= va_arg(org
, int);
630 char* s1
= va_arg(org
, char*);
631 double d1
= va_arg(org
, double);
634 int i2
= va_arg(dup
, int);
635 int j2
= va_arg(dup
, int);
636 char* s2
= va_arg(dup
, char*);
637 double d2
= va_arg(dup
, double);
641 EXPECT_STREQ(s1
, s2
);
647 TEST(StringUtilTest
, VAList
) {
648 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
652 template <typename STR
>
653 void TokenizeTest() {
657 size
= Tokenize(STR("This is a string"), STR(" "), &r
);
659 ASSERT_EQ(4U, r
.size());
660 EXPECT_EQ(r
[0], STR("This"));
661 EXPECT_EQ(r
[1], STR("is"));
662 EXPECT_EQ(r
[2], STR("a"));
663 EXPECT_EQ(r
[3], STR("string"));
666 size
= Tokenize(STR("one,two,three"), STR(","), &r
);
668 ASSERT_EQ(3U, r
.size());
669 EXPECT_EQ(r
[0], STR("one"));
670 EXPECT_EQ(r
[1], STR("two"));
671 EXPECT_EQ(r
[2], STR("three"));
674 size
= Tokenize(STR("one,two:three;four"), STR(",:"), &r
);
676 ASSERT_EQ(3U, r
.size());
677 EXPECT_EQ(r
[0], STR("one"));
678 EXPECT_EQ(r
[1], STR("two"));
679 EXPECT_EQ(r
[2], STR("three;four"));
682 size
= Tokenize(STR("one,two:three;four"), STR(";,:"), &r
);
684 ASSERT_EQ(4U, r
.size());
685 EXPECT_EQ(r
[0], STR("one"));
686 EXPECT_EQ(r
[1], STR("two"));
687 EXPECT_EQ(r
[2], STR("three"));
688 EXPECT_EQ(r
[3], STR("four"));
691 size
= Tokenize(STR("one, two, three"), STR(","), &r
);
693 ASSERT_EQ(3U, r
.size());
694 EXPECT_EQ(r
[0], STR("one"));
695 EXPECT_EQ(r
[1], STR(" two"));
696 EXPECT_EQ(r
[2], STR(" three"));
699 size
= Tokenize(STR("one, two, three, "), STR(","), &r
);
701 ASSERT_EQ(4U, r
.size());
702 EXPECT_EQ(r
[0], STR("one"));
703 EXPECT_EQ(r
[1], STR(" two"));
704 EXPECT_EQ(r
[2], STR(" three"));
705 EXPECT_EQ(r
[3], STR(" "));
708 size
= Tokenize(STR("one, two, three,"), STR(","), &r
);
710 ASSERT_EQ(3U, r
.size());
711 EXPECT_EQ(r
[0], STR("one"));
712 EXPECT_EQ(r
[1], STR(" two"));
713 EXPECT_EQ(r
[2], STR(" three"));
716 size
= Tokenize(STR(), STR(","), &r
);
718 ASSERT_EQ(0U, r
.size());
721 size
= Tokenize(STR(","), STR(","), &r
);
723 ASSERT_EQ(0U, r
.size());
726 size
= Tokenize(STR(",;:."), STR(".:;,"), &r
);
728 ASSERT_EQ(0U, r
.size());
731 size
= Tokenize(STR("\t\ta\t"), STR("\t"), &r
);
733 ASSERT_EQ(1U, r
.size());
734 EXPECT_EQ(r
[0], STR("a"));
737 size
= Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r
);
739 ASSERT_EQ(2U, r
.size());
740 EXPECT_EQ(r
[0], STR("\ta\t"));
741 EXPECT_EQ(r
[1], STR("b\tcc"));
745 TEST(StringUtilTest
, TokenizeStdString
) {
746 TokenizeTest
<std::string
>();
749 TEST(StringUtilTest
, TokenizeStringPiece
) {
750 TokenizeTest
<base::StringPiece
>();
753 // Test for JoinString
754 TEST(StringUtilTest
, JoinString
) {
755 std::vector
<std::string
> in
;
756 EXPECT_EQ("", JoinString(in
, ','));
759 EXPECT_EQ("a", JoinString(in
, ','));
763 EXPECT_EQ("a,b,c", JoinString(in
, ','));
765 in
.push_back(std::string());
766 EXPECT_EQ("a,b,c,", JoinString(in
, ','));
768 EXPECT_EQ("a|b|c|| ", JoinString(in
, '|'));
771 // Test for JoinString overloaded with std::string separator
772 TEST(StringUtilTest
, JoinStringWithString
) {
773 std::string
separator(", ");
774 std::vector
<std::string
> parts
;
775 EXPECT_EQ(std::string(), JoinString(parts
, separator
));
777 parts
.push_back("a");
778 EXPECT_EQ("a", JoinString(parts
, separator
));
780 parts
.push_back("b");
781 parts
.push_back("c");
782 EXPECT_EQ("a, b, c", JoinString(parts
, separator
));
784 parts
.push_back(std::string());
785 EXPECT_EQ("a, b, c, ", JoinString(parts
, separator
));
786 parts
.push_back(" ");
787 EXPECT_EQ("a|b|c|| ", JoinString(parts
, "|"));
790 // Test for JoinString overloaded with string16 separator
791 TEST(StringUtilTest
, JoinStringWithString16
) {
792 string16 separator
= ASCIIToUTF16(", ");
793 std::vector
<string16
> parts
;
794 EXPECT_EQ(string16(), JoinString(parts
, separator
));
796 parts
.push_back(ASCIIToUTF16("a"));
797 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts
, separator
));
799 parts
.push_back(ASCIIToUTF16("b"));
800 parts
.push_back(ASCIIToUTF16("c"));
801 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts
, separator
));
803 parts
.push_back(ASCIIToUTF16(""));
804 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts
, separator
));
805 parts
.push_back(ASCIIToUTF16(" "));
806 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts
, ASCIIToUTF16("|")));
809 TEST(StringUtilTest
, StartsWith
) {
810 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
811 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
812 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
813 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
814 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
815 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
816 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
817 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
818 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
819 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
821 EXPECT_TRUE(StartsWith(L
"javascript:url", L
"javascript", true));
822 EXPECT_FALSE(StartsWith(L
"JavaScript:url", L
"javascript", true));
823 EXPECT_TRUE(StartsWith(L
"javascript:url", L
"javascript", false));
824 EXPECT_TRUE(StartsWith(L
"JavaScript:url", L
"javascript", false));
825 EXPECT_FALSE(StartsWith(L
"java", L
"javascript", true));
826 EXPECT_FALSE(StartsWith(L
"java", L
"javascript", false));
827 EXPECT_FALSE(StartsWith(std::wstring(), L
"javascript", false));
828 EXPECT_FALSE(StartsWith(std::wstring(), L
"javascript", true));
829 EXPECT_TRUE(StartsWith(L
"java", std::wstring(), false));
830 EXPECT_TRUE(StartsWith(L
"java", std::wstring(), true));
833 TEST(StringUtilTest
, EndsWith
) {
834 EXPECT_TRUE(EndsWith(L
"Foo.plugin", L
".plugin", true));
835 EXPECT_FALSE(EndsWith(L
"Foo.Plugin", L
".plugin", true));
836 EXPECT_TRUE(EndsWith(L
"Foo.plugin", L
".plugin", false));
837 EXPECT_TRUE(EndsWith(L
"Foo.Plugin", L
".plugin", false));
838 EXPECT_FALSE(EndsWith(L
".plug", L
".plugin", true));
839 EXPECT_FALSE(EndsWith(L
".plug", L
".plugin", false));
840 EXPECT_FALSE(EndsWith(L
"Foo.plugin Bar", L
".plugin", true));
841 EXPECT_FALSE(EndsWith(L
"Foo.plugin Bar", L
".plugin", false));
842 EXPECT_FALSE(EndsWith(std::wstring(), L
".plugin", false));
843 EXPECT_FALSE(EndsWith(std::wstring(), L
".plugin", true));
844 EXPECT_TRUE(EndsWith(L
"Foo.plugin", std::wstring(), false));
845 EXPECT_TRUE(EndsWith(L
"Foo.plugin", std::wstring(), true));
846 EXPECT_TRUE(EndsWith(L
".plugin", L
".plugin", false));
847 EXPECT_TRUE(EndsWith(L
".plugin", L
".plugin", true));
848 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
849 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
852 TEST(StringUtilTest
, GetStringFWithOffsets
) {
853 std::vector
<string16
> subst
;
854 subst
.push_back(ASCIIToUTF16("1"));
855 subst
.push_back(ASCIIToUTF16("2"));
856 std::vector
<size_t> offsets
;
858 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
861 EXPECT_EQ(2U, offsets
.size());
862 EXPECT_EQ(7U, offsets
[0]);
863 EXPECT_EQ(25U, offsets
[1]);
866 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
869 EXPECT_EQ(2U, offsets
.size());
870 EXPECT_EQ(25U, offsets
[0]);
871 EXPECT_EQ(7U, offsets
[1]);
875 TEST(StringUtilTest
, ReplaceStringPlaceholdersTooFew
) {
876 // Test whether replacestringplaceholders works as expected when there
877 // are fewer inputs than outputs.
878 std::vector
<string16
> subst
;
879 subst
.push_back(ASCIIToUTF16("9a"));
880 subst
.push_back(ASCIIToUTF16("8b"));
881 subst
.push_back(ASCIIToUTF16("7c"));
884 ReplaceStringPlaceholders(
885 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst
, NULL
);
887 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
890 TEST(StringUtilTest
, ReplaceStringPlaceholders
) {
891 std::vector
<string16
> subst
;
892 subst
.push_back(ASCIIToUTF16("9a"));
893 subst
.push_back(ASCIIToUTF16("8b"));
894 subst
.push_back(ASCIIToUTF16("7c"));
895 subst
.push_back(ASCIIToUTF16("6d"));
896 subst
.push_back(ASCIIToUTF16("5e"));
897 subst
.push_back(ASCIIToUTF16("4f"));
898 subst
.push_back(ASCIIToUTF16("3g"));
899 subst
.push_back(ASCIIToUTF16("2h"));
900 subst
.push_back(ASCIIToUTF16("1i"));
903 ReplaceStringPlaceholders(
904 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst
, NULL
);
906 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
909 TEST(StringUtilTest
, ReplaceStringPlaceholdersMoreThan9Replacements
) {
910 std::vector
<string16
> subst
;
911 subst
.push_back(ASCIIToUTF16("9a"));
912 subst
.push_back(ASCIIToUTF16("8b"));
913 subst
.push_back(ASCIIToUTF16("7c"));
914 subst
.push_back(ASCIIToUTF16("6d"));
915 subst
.push_back(ASCIIToUTF16("5e"));
916 subst
.push_back(ASCIIToUTF16("4f"));
917 subst
.push_back(ASCIIToUTF16("3g"));
918 subst
.push_back(ASCIIToUTF16("2h"));
919 subst
.push_back(ASCIIToUTF16("1i"));
920 subst
.push_back(ASCIIToUTF16("0j"));
921 subst
.push_back(ASCIIToUTF16("-1k"));
922 subst
.push_back(ASCIIToUTF16("-2l"));
923 subst
.push_back(ASCIIToUTF16("-3m"));
924 subst
.push_back(ASCIIToUTF16("-4n"));
927 ReplaceStringPlaceholders(
928 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
929 "$10j,$11k,$12l,$13m,$14n,$1"), subst
, NULL
);
931 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
932 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
935 TEST(StringUtilTest
, StdStringReplaceStringPlaceholders
) {
936 std::vector
<std::string
> subst
;
937 subst
.push_back("9a");
938 subst
.push_back("8b");
939 subst
.push_back("7c");
940 subst
.push_back("6d");
941 subst
.push_back("5e");
942 subst
.push_back("4f");
943 subst
.push_back("3g");
944 subst
.push_back("2h");
945 subst
.push_back("1i");
947 std::string formatted
=
948 ReplaceStringPlaceholders(
949 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst
, NULL
);
951 EXPECT_EQ(formatted
, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
954 TEST(StringUtilTest
, ReplaceStringPlaceholdersConsecutiveDollarSigns
) {
955 std::vector
<std::string
> subst
;
956 subst
.push_back("a");
957 subst
.push_back("b");
958 subst
.push_back("c");
959 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst
, NULL
),
963 TEST(StringUtilTest
, MatchPatternTest
) {
964 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
965 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
966 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
967 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
968 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
969 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
970 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
971 EXPECT_FALSE(MatchPattern("", "*.*"));
972 EXPECT_TRUE(MatchPattern("", "*"));
973 EXPECT_TRUE(MatchPattern("", "?"));
974 EXPECT_TRUE(MatchPattern("", ""));
975 EXPECT_FALSE(MatchPattern("Hello", ""));
976 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
977 // Stop after a certain recursion depth.
978 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
980 // Test UTF8 matching.
981 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
982 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
983 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
984 // Invalid sequences should be handled as a single invalid character.
985 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
986 // If the pattern has invalid characters, it shouldn't match anything.
987 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
989 // Test UTF16 character matching.
990 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
991 UTF8ToUTF16("*.com")));
992 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
993 UTF8ToUTF16("He??o\\*1*")));
995 // This test verifies that consecutive wild cards are collapsed into 1
996 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
998 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
999 UTF8ToUTF16("He********************************o")));
1002 TEST(StringUtilTest
, LcpyTest
) {
1003 // Test the normal case where we fit in our buffer.
1007 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1008 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1009 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1010 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1013 // Test dst_size == 0, nothing should be written to |dst| and we should
1014 // have the equivalent of strlen(src).
1016 char dst
[2] = {1, 2};
1017 wchar_t wdst
[2] = {1, 2};
1018 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", 0));
1019 EXPECT_EQ(1, dst
[0]);
1020 EXPECT_EQ(2, dst
[1]);
1021 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", 0));
1022 #if defined(WCHAR_T_IS_UNSIGNED)
1023 EXPECT_EQ(1U, wdst
[0]);
1024 EXPECT_EQ(2U, wdst
[1]);
1026 EXPECT_EQ(1, wdst
[0]);
1027 EXPECT_EQ(2, wdst
[1]);
1031 // Test the case were we _just_ competely fit including the null.
1035 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1036 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1037 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1038 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1041 // Test the case were we we are one smaller, so we can't fit the null.
1045 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1046 EXPECT_EQ(0, memcmp(dst
, "abcdef", 7));
1047 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1048 EXPECT_EQ(0, memcmp(wdst
, L
"abcdef", sizeof(wchar_t) * 7));
1051 // Test the case were we are just too small.
1055 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1056 EXPECT_EQ(0, memcmp(dst
, "ab", 3));
1057 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1058 EXPECT_EQ(0, memcmp(wdst
, L
"ab", sizeof(wchar_t) * 3));
1062 TEST(StringUtilTest
, WprintfFormatPortabilityTest
) {
1063 static const struct {
1064 const wchar_t* input
;
1071 { L
"Hello, %s", false },
1076 { L
"%ls %s", false },
1077 { L
"%s %ls", false },
1078 { L
"%s %ls %s", false },
1080 { L
"%f %F", false },
1081 { L
"%d %D", false },
1082 { L
"%o %O", false },
1083 { L
"%u %U", false },
1084 { L
"%f %d %o %u", true },
1085 { L
"%-8d (%02.1f%)", true },
1086 { L
"% 10s", false },
1089 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
1090 EXPECT_EQ(cases
[i
].portable
, base::IsWprintfFormatPortable(cases
[i
].input
));
1093 TEST(StringUtilTest
, RemoveChars
) {
1094 const char* kRemoveChars
= "-/+*";
1095 std::string input
= "A-+bc/d!*";
1096 EXPECT_TRUE(RemoveChars(input
, kRemoveChars
, &input
));
1097 EXPECT_EQ("Abcd!", input
);
1099 // No characters match kRemoveChars.
1100 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1101 EXPECT_EQ("Abcd!", input
);
1105 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1106 EXPECT_EQ(std::string(), input
);
1109 TEST(StringUtilTest
, ReplaceChars
) {
1112 const char* replace_chars
;
1113 const char* replace_with
;
1117 { "", "", "", "", false },
1118 { "test", "", "", "test", false },
1119 { "test", "", "!", "test", false },
1120 { "test", "z", "!", "test", false },
1121 { "test", "e", "!", "t!st", true },
1122 { "test", "e", "!?", "t!?st", true },
1123 { "test", "ez", "!", "t!st", true },
1124 { "test", "zed", "!?", "t!?st", true },
1125 { "test", "t", "!?", "!?es!?", true },
1126 { "test", "et", "!>", "!>!>s!>", true },
1127 { "test", "zest", "!", "!!!!", true },
1128 { "test", "szt", "!", "!e!!", true },
1129 { "test", "t", "test", "testestest", true },
1132 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
1134 bool result
= ReplaceChars(cases
[i
].input
,
1135 cases
[i
].replace_chars
,
1136 cases
[i
].replace_with
,
1138 EXPECT_EQ(cases
[i
].result
, result
);
1139 EXPECT_EQ(cases
[i
].output
, output
);
1143 TEST(StringUtilTest
, ContainsOnlyChars
) {
1144 // Providing an empty list of characters should return false but for the empty
1146 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1147 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1149 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1150 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1151 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1152 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1153 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1156 class WriteIntoTest
: public testing::Test
{
1158 static void WritesCorrectly(size_t num_chars
) {
1160 char kOriginal
[] = "supercali";
1161 strncpy(WriteInto(&buffer
, num_chars
+ 1), kOriginal
, num_chars
);
1162 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1163 // string at the first \0.
1164 EXPECT_EQ(std::string(kOriginal
,
1165 std::min(num_chars
, arraysize(kOriginal
) - 1)),
1166 std::string(buffer
.c_str()));
1167 EXPECT_EQ(num_chars
, buffer
.size());
1171 TEST_F(WriteIntoTest
, WriteInto
) {
1172 // Validate that WriteInto reserves enough space and
1173 // sizes a string correctly.
1176 WritesCorrectly(5000);
1178 // Validate that WriteInto doesn't modify other strings
1179 // when using a Copy-on-Write implementation.
1180 const char kLive
[] = "live";
1181 const char kDead
[] = "dead";
1182 const std::string live
= kLive
;
1183 std::string dead
= live
;
1184 strncpy(WriteInto(&dead
, 5), kDead
, 4);
1185 EXPECT_EQ(kDead
, dead
);
1186 EXPECT_EQ(4u, dead
.size());
1187 EXPECT_EQ(kLive
, live
);
1188 EXPECT_EQ(4u, live
.size());