1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
12 #include "base/basictypes.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 using ::testing::ElementsAre
;
22 static const struct trim_case
{
24 const TrimPositions positions
;
25 const wchar_t* output
;
26 const TrimPositions return_value
;
28 {L
" Google Video ", TRIM_LEADING
, L
"Google Video ", TRIM_LEADING
},
29 {L
" Google Video ", TRIM_TRAILING
, L
" Google Video", TRIM_TRAILING
},
30 {L
" Google Video ", TRIM_ALL
, L
"Google Video", TRIM_ALL
},
31 {L
"Google Video", TRIM_ALL
, L
"Google Video", TRIM_NONE
},
32 {L
"", TRIM_ALL
, L
"", TRIM_NONE
},
33 {L
" ", TRIM_LEADING
, L
"", TRIM_LEADING
},
34 {L
" ", TRIM_TRAILING
, L
"", TRIM_TRAILING
},
35 {L
" ", TRIM_ALL
, L
"", TRIM_ALL
},
36 {L
"\t\rTest String\n", TRIM_ALL
, L
"Test String", TRIM_ALL
},
37 {L
"\x2002Test String\x00A0\x3000", TRIM_ALL
, L
"Test String", TRIM_ALL
},
40 static const struct trim_case_ascii
{
42 const TrimPositions positions
;
44 const TrimPositions return_value
;
45 } trim_cases_ascii
[] = {
46 {" Google Video ", TRIM_LEADING
, "Google Video ", TRIM_LEADING
},
47 {" Google Video ", TRIM_TRAILING
, " Google Video", TRIM_TRAILING
},
48 {" Google Video ", TRIM_ALL
, "Google Video", TRIM_ALL
},
49 {"Google Video", TRIM_ALL
, "Google Video", TRIM_NONE
},
50 {"", TRIM_ALL
, "", TRIM_NONE
},
51 {" ", TRIM_LEADING
, "", TRIM_LEADING
},
52 {" ", TRIM_TRAILING
, "", TRIM_TRAILING
},
53 {" ", TRIM_ALL
, "", TRIM_ALL
},
54 {"\t\rTest String\n", TRIM_ALL
, "Test String", TRIM_ALL
},
59 // Helper used to test TruncateUTF8ToByteSize.
60 bool Truncated(const std::string
& input
,
61 const size_t byte_size
,
62 std::string
* output
) {
63 size_t prev
= input
.length();
64 TruncateUTF8ToByteSize(input
, byte_size
, output
);
65 return prev
!= output
->length();
70 TEST(StringUtilTest
, TruncateUTF8ToByteSize
) {
73 // Empty strings and invalid byte_size arguments
74 EXPECT_FALSE(Truncated(std::string(), 0, &output
));
75 EXPECT_EQ(output
, "");
76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output
));
77 EXPECT_EQ(output
, "");
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output
));
79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output
));
81 // Testing the truncation of valid UTF8 correctly
82 EXPECT_TRUE(Truncated("abc", 2, &output
));
83 EXPECT_EQ(output
, "ab");
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output
));
85 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output
));
87 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output
));
89 EXPECT_EQ(output
.compare("\xc2\x81\xc2\x81"), 0);
92 const char array
[] = "\x00\x00\xc2\x81\xc2\x81";
93 const std::string
array_string(array
, arraysize(array
));
94 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
95 EXPECT_EQ(output
.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
99 const char array
[] = "\x00\xc2\x81\xc2\x81";
100 const std::string
array_string(array
, arraysize(array
));
101 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
102 EXPECT_EQ(output
.compare(std::string("\x00\xc2\x81", 3)), 0);
105 // Testing invalid UTF8
106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output
));
107 EXPECT_EQ(output
.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output
));
109 EXPECT_EQ(output
.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output
));
111 EXPECT_EQ(output
.compare(""), 0);
113 // Testing invalid UTF8 mixed with valid UTF8
114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output
));
115 EXPECT_EQ(output
.compare("\xe1\x80\xbf"), 0);
116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output
));
117 EXPECT_EQ(output
.compare("\xf1\x80\xa0\xbf"), 0);
118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
120 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
123 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output
));
125 EXPECT_EQ(output
.compare("\xef\xbb\xbf" "abc"), 0);
127 // Overlong sequences
128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output
));
129 EXPECT_EQ(output
.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output
));
131 EXPECT_EQ(output
.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output
));
133 EXPECT_EQ(output
.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output
));
135 EXPECT_EQ(output
.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output
));
137 EXPECT_EQ(output
.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output
));
139 EXPECT_EQ(output
.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output
));
141 EXPECT_EQ(output
.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output
));
143 EXPECT_EQ(output
.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output
));
145 EXPECT_EQ(output
.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output
));
147 EXPECT_EQ(output
.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output
));
149 EXPECT_EQ(output
.compare(""), 0);
151 // Beyond U+10FFFF (the upper limit of Unicode codespace)
152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output
));
153 EXPECT_EQ(output
.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output
));
155 EXPECT_EQ(output
.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output
));
157 EXPECT_EQ(output
.compare(""), 0);
159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output
));
161 EXPECT_EQ(output
.compare(""), 0);
162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output
));
163 EXPECT_EQ(output
.compare(""), 0);
166 const char array
[] = "\x00\x00\xfe\xff";
167 const std::string
array_string(array
, arraysize(array
));
168 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
169 EXPECT_EQ(output
.compare(std::string("\x00\x00", 2)), 0);
172 // Variants on the previous test
174 const char array
[] = "\xff\xfe\x00\x00";
175 const std::string
array_string(array
, 4);
176 EXPECT_FALSE(Truncated(array_string
, 4, &output
));
177 EXPECT_EQ(output
.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 const char array
[] = "\xff\x00\x00\xfe";
181 const std::string
array_string(array
, arraysize(array
));
182 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
183 EXPECT_EQ(output
.compare(std::string("\xff\x00\x00", 3)), 0);
186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output
));
188 EXPECT_EQ(output
.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output
));
190 EXPECT_EQ(output
.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output
));
192 EXPECT_EQ(output
.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output
));
194 EXPECT_EQ(output
.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output
));
196 EXPECT_EQ(output
.compare(""), 0);
198 // Strings in legacy encodings that are valid in UTF-8, but
199 // are invalid as UTF-8 in real data.
200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output
));
201 EXPECT_EQ(output
.compare("caf"), 0);
202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output
));
203 EXPECT_EQ(output
.compare(""), 0);
204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output
));
205 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
208 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
210 // Testing using the same string as input and output.
211 EXPECT_FALSE(Truncated(output
, 4, &output
));
212 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
213 EXPECT_TRUE(Truncated(output
, 3, &output
));
214 EXPECT_EQ(output
.compare("\xa7\x41"), 0);
216 // "abc" with U+201[CD] in windows-125[0-8]
217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output
));
218 EXPECT_EQ(output
.compare("\x93" "abc"), 0);
220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output
));
222 EXPECT_EQ(output
.compare(""), 0);
224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output
));
226 EXPECT_EQ(output
.compare(""), 0);
229 TEST(StringUtilTest
, TrimWhitespace
) {
230 string16 output
; // Allow contents to carry over to next testcase
231 for (size_t i
= 0; i
< arraysize(trim_cases
); ++i
) {
232 const trim_case
& value
= trim_cases
[i
];
233 EXPECT_EQ(value
.return_value
,
234 TrimWhitespace(WideToUTF16(value
.input
), value
.positions
,
236 EXPECT_EQ(WideToUTF16(value
.output
), output
);
239 // Test that TrimWhitespace() can take the same string for input and output
240 output
= ASCIIToUTF16(" This is a test \r\n");
241 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output
);
244 // Once more, but with a string of whitespace
245 output
= ASCIIToUTF16(" \r\n");
246 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
247 EXPECT_EQ(string16(), output
);
249 std::string output_ascii
;
250 for (size_t i
= 0; i
< arraysize(trim_cases_ascii
); ++i
) {
251 const trim_case_ascii
& value
= trim_cases_ascii
[i
];
252 EXPECT_EQ(value
.return_value
,
253 TrimWhitespace(value
.input
, value
.positions
, &output_ascii
));
254 EXPECT_EQ(value
.output
, output_ascii
);
258 static const struct collapse_case
{
259 const wchar_t* input
;
261 const wchar_t* output
;
262 } collapse_cases
[] = {
263 {L
" Google Video ", false, L
"Google Video"},
264 {L
"Google Video", false, L
"Google Video"},
267 {L
"\t\rTest String\n", false, L
"Test String"},
268 {L
"\x2002Test String\x00A0\x3000", false, L
"Test String"},
269 {L
" Test \n \t String ", false, L
"Test String"},
270 {L
"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L
"Test String"},
271 {L
" Test String", false, L
"Test String"},
272 {L
"Test String ", false, L
"Test String"},
273 {L
"Test String", false, L
"Test String"},
276 {L
" \r ", true, L
""},
277 {L
"\nFoo", true, L
"Foo"},
278 {L
"\r Foo ", true, L
"Foo"},
279 {L
" Foo bar ", true, L
"Foo bar"},
280 {L
" \tFoo bar \n", true, L
"Foo bar"},
281 {L
" a \r b\n c \r\n d \t\re \t f \n ", true, L
"abcde f"},
284 TEST(StringUtilTest
, CollapseWhitespace
) {
285 for (size_t i
= 0; i
< arraysize(collapse_cases
); ++i
) {
286 const collapse_case
& value
= collapse_cases
[i
];
287 EXPECT_EQ(WideToUTF16(value
.output
),
288 CollapseWhitespace(WideToUTF16(value
.input
), value
.trim
));
292 static const struct collapse_case_ascii
{
296 } collapse_cases_ascii
[] = {
297 {" Google Video ", false, "Google Video"},
298 {"Google Video", false, "Google Video"},
301 {"\t\rTest String\n", false, "Test String"},
302 {" Test \n \t String ", false, "Test String"},
303 {" Test String", false, "Test String"},
304 {"Test String ", false, "Test String"},
305 {"Test String", false, "Test String"},
309 {"\nFoo", true, "Foo"},
310 {"\r Foo ", true, "Foo"},
311 {" Foo bar ", true, "Foo bar"},
312 {" \tFoo bar \n", true, "Foo bar"},
313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 TEST(StringUtilTest
, CollapseWhitespaceASCII
) {
317 for (size_t i
= 0; i
< arraysize(collapse_cases_ascii
); ++i
) {
318 const collapse_case_ascii
& value
= collapse_cases_ascii
[i
];
319 EXPECT_EQ(value
.output
, CollapseWhitespaceASCII(value
.input
, value
.trim
));
323 TEST(StringUtilTest
, IsStringUTF8
) {
324 EXPECT_TRUE(IsStringUTF8("abc"));
325 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
331 // surrogate code points
332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
336 // overlong sequences
337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
349 // Beyond U+10FFFF (the upper limit of Unicode codespace)
350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
354 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
355 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
356 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
366 // Strings in legacy encodings. We can certainly make up strings
367 // in a legacy encoding that are valid in UTF-8, but in real data,
368 // most of them are invalid as UTF-8.
369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
372 // "abc" with U+201[CD] in windows-125[0-8]
373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
380 // representation, and the second uses a 2-byte sequence. The second version
381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
382 // given codepoint must be used.
383 static const char kEmbeddedNull
[] = "embedded\0null";
384 EXPECT_TRUE(IsStringUTF8(
385 std::string(kEmbeddedNull
, sizeof(kEmbeddedNull
))));
386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 TEST(StringUtilTest
, IsStringASCII
) {
390 static char char_ascii
[] =
391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
392 static char16 char16_ascii
[] = {
393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
397 // Test a variety of the fragment start positions and lengths in order to make
398 // sure that bit masking in IsStringASCII works correctly.
399 // Also, test that a non-ASCII character will be detected regardless of its
400 // position inside the string.
402 const size_t string_length
= arraysize(char_ascii
) - 1;
403 for (size_t offset
= 0; offset
< 8; ++offset
) {
404 for (size_t len
= 0, max_len
= string_length
- offset
; len
< max_len
;
406 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii
+ offset
, len
)));
407 for (size_t char_pos
= offset
; char_pos
< len
; ++char_pos
) {
408 char_ascii
[char_pos
] |= '\x80';
409 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii
+ offset
, len
)));
410 char_ascii
[char_pos
] &= ~'\x80';
417 const size_t string_length
= arraysize(char16_ascii
) - 1;
418 for (size_t offset
= 0; offset
< 4; ++offset
) {
419 for (size_t len
= 0, max_len
= string_length
- offset
; len
< max_len
;
421 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
422 for (size_t char_pos
= offset
; char_pos
< len
; ++char_pos
) {
423 char16_ascii
[char_pos
] |= 0x80;
425 IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
426 char16_ascii
[char_pos
] &= ~0x80;
427 // Also test when the upper half is non-zero.
428 char16_ascii
[char_pos
] |= 0x100;
430 IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
431 char16_ascii
[char_pos
] &= ~0x100;
438 TEST(StringUtilTest
, ConvertASCII
) {
439 static const char* char_cases
[] = {
442 "0123ABCDwxyz \a\b\t\r\n!+,.~"
445 static const wchar_t* const wchar_cases
[] = {
448 L
"0123ABCDwxyz \a\b\t\r\n!+,.~"
451 for (size_t i
= 0; i
< arraysize(char_cases
); ++i
) {
452 EXPECT_TRUE(IsStringASCII(char_cases
[i
]));
453 string16 utf16
= ASCIIToUTF16(char_cases
[i
]);
454 EXPECT_EQ(WideToUTF16(wchar_cases
[i
]), utf16
);
456 std::string ascii
= UTF16ToASCII(WideToUTF16(wchar_cases
[i
]));
457 EXPECT_EQ(char_cases
[i
], ascii
);
460 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
462 // Convert empty strings.
465 EXPECT_EQ(empty
, UTF16ToASCII(empty16
));
466 EXPECT_EQ(empty16
, ASCIIToUTF16(empty
));
468 // Convert strings with an embedded NUL character.
469 const char chars_with_nul
[] = "test\0string";
470 const int length_with_nul
= arraysize(chars_with_nul
) - 1;
471 std::string
string_with_nul(chars_with_nul
, length_with_nul
);
472 std::wstring wide_with_nul
= ASCIIToWide(string_with_nul
);
473 EXPECT_EQ(static_cast<std::wstring::size_type
>(length_with_nul
),
474 wide_with_nul
.length());
475 std::string narrow_with_nul
= UTF16ToASCII(WideToUTF16(wide_with_nul
));
476 EXPECT_EQ(static_cast<std::string::size_type
>(length_with_nul
),
477 narrow_with_nul
.length());
478 EXPECT_EQ(0, string_with_nul
.compare(narrow_with_nul
));
481 TEST(StringUtilTest
, ToUpperASCII
) {
482 EXPECT_EQ('C', ToUpperASCII('C'));
483 EXPECT_EQ('C', ToUpperASCII('c'));
484 EXPECT_EQ('2', ToUpperASCII('2'));
486 EXPECT_EQ(L
'C', ToUpperASCII(L
'C'));
487 EXPECT_EQ(L
'C', ToUpperASCII(L
'c'));
488 EXPECT_EQ(L
'2', ToUpperASCII(L
'2'));
490 std::string
in_place_a("Cc2");
491 StringToUpperASCII(&in_place_a
);
492 EXPECT_EQ("CC2", in_place_a
);
494 std::wstring
in_place_w(L
"Cc2");
495 StringToUpperASCII(&in_place_w
);
496 EXPECT_EQ(L
"CC2", in_place_w
);
498 std::string
original_a("Cc2");
499 std::string upper_a
= StringToUpperASCII(original_a
);
500 EXPECT_EQ("CC2", upper_a
);
502 std::wstring
original_w(L
"Cc2");
503 std::wstring upper_w
= StringToUpperASCII(original_w
);
504 EXPECT_EQ(L
"CC2", upper_w
);
507 TEST(StringUtilTest
, LowerCaseEqualsASCII
) {
508 static const struct {
511 } lowercase_cases
[] = {
517 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(lowercase_cases
); ++i
) {
518 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases
[i
].src_a
),
519 lowercase_cases
[i
].dst
));
520 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases
[i
].src_a
,
521 lowercase_cases
[i
].dst
));
525 TEST(StringUtilTest
, FormatBytesUnlocalized
) {
526 static const struct {
528 const char* expected
;
530 // Expected behavior: we show one post-decimal digit when we have
531 // under two pre-decimal digits, except in cases where it makes no
532 // sense (zero or bytes).
533 // Since we switch units once we cross the 1000 mark, this keeps
534 // the display of file sizes or bytes consistently around three
538 {1024*1024, "1.0 MB"},
539 {1024*1024*1024, "1.0 GB"},
540 {10LL*1024*1024*1024, "10.0 GB"},
541 {99LL*1024*1024*1024, "99.0 GB"},
542 {105LL*1024*1024*1024, "105 GB"},
543 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
544 {~(1LL<<63), "8192 PB"},
546 {99*1024 + 103, "99.1 kB"},
547 {1024*1024 + 103, "1.0 MB"},
548 {1024*1024 + 205 * 1024, "1.2 MB"},
549 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
550 {10LL*1024*1024*1024, "10.0 GB"},
551 {100LL*1024*1024*1024, "100 GB"},
554 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
555 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
),
556 FormatBytesUnlocalized(cases
[i
].bytes
));
559 TEST(StringUtilTest
, ReplaceSubstringsAfterOffset
) {
560 static const struct {
562 string16::size_type start_offset
;
563 const char* find_this
;
564 const char* replace_with
;
565 const char* expected
;
567 {"aaa", 0, "a", "b", "bbb"},
568 {"abb", 0, "ab", "a", "ab"},
569 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
570 {"Not found", 0, "x", "0", "Not found"},
571 {"Not found again", 5, "x", "0", "Not found again"},
572 {" Making it much longer ", 0, " ", "Four score and seven years ago",
573 "Four score and seven years agoMakingFour score and seven years agoit"
574 "Four score and seven years agomuchFour score and seven years agolonger"
575 "Four score and seven years ago"},
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
578 {"abababab", 2, "ab", "c", "abccc"},
581 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
582 string16 str
= ASCIIToUTF16(cases
[i
].str
);
583 ReplaceSubstringsAfterOffset(&str
, cases
[i
].start_offset
,
584 ASCIIToUTF16(cases
[i
].find_this
),
585 ASCIIToUTF16(cases
[i
].replace_with
));
586 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
590 TEST(StringUtilTest
, ReplaceFirstSubstringAfterOffset
) {
591 static const struct {
593 string16::size_type start_offset
;
594 const char* find_this
;
595 const char* replace_with
;
596 const char* expected
;
598 {"aaa", 0, "a", "b", "baa"},
599 {"abb", 0, "ab", "a", "ab"},
600 {"Removing some substrings inging", 0, "ing", "",
601 "Remov some substrings inging"},
602 {"Not found", 0, "x", "0", "Not found"},
603 {"Not found again", 5, "x", "0", "Not found again"},
604 {" Making it much longer ", 0, " ", "Four score and seven years ago",
605 "Four score and seven years agoMaking it much longer "},
606 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
607 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
608 {"abababab", 2, "ab", "c", "abcabab"},
611 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); i
++) {
612 string16 str
= ASCIIToUTF16(cases
[i
].str
);
613 ReplaceFirstSubstringAfterOffset(&str
, cases
[i
].start_offset
,
614 ASCIIToUTF16(cases
[i
].find_this
),
615 ASCIIToUTF16(cases
[i
].replace_with
));
616 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
620 TEST(StringUtilTest
, HexDigitToInt
) {
621 EXPECT_EQ(0, HexDigitToInt('0'));
622 EXPECT_EQ(1, HexDigitToInt('1'));
623 EXPECT_EQ(2, HexDigitToInt('2'));
624 EXPECT_EQ(3, HexDigitToInt('3'));
625 EXPECT_EQ(4, HexDigitToInt('4'));
626 EXPECT_EQ(5, HexDigitToInt('5'));
627 EXPECT_EQ(6, HexDigitToInt('6'));
628 EXPECT_EQ(7, HexDigitToInt('7'));
629 EXPECT_EQ(8, HexDigitToInt('8'));
630 EXPECT_EQ(9, HexDigitToInt('9'));
631 EXPECT_EQ(10, HexDigitToInt('A'));
632 EXPECT_EQ(11, HexDigitToInt('B'));
633 EXPECT_EQ(12, HexDigitToInt('C'));
634 EXPECT_EQ(13, HexDigitToInt('D'));
635 EXPECT_EQ(14, HexDigitToInt('E'));
636 EXPECT_EQ(15, HexDigitToInt('F'));
638 // Verify the lower case as well.
639 EXPECT_EQ(10, HexDigitToInt('a'));
640 EXPECT_EQ(11, HexDigitToInt('b'));
641 EXPECT_EQ(12, HexDigitToInt('c'));
642 EXPECT_EQ(13, HexDigitToInt('d'));
643 EXPECT_EQ(14, HexDigitToInt('e'));
644 EXPECT_EQ(15, HexDigitToInt('f'));
647 // This checks where we can use the assignment operator for a va_list. We need
648 // a way to do this since Visual C doesn't support va_copy, but assignment on
649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
651 static void VariableArgsFunc(const char* format
, ...) {
653 va_start(org
, format
);
656 GG_VA_COPY(dup
, org
);
657 int i1
= va_arg(org
, int);
658 int j1
= va_arg(org
, int);
659 char* s1
= va_arg(org
, char*);
660 double d1
= va_arg(org
, double);
663 int i2
= va_arg(dup
, int);
664 int j2
= va_arg(dup
, int);
665 char* s2
= va_arg(dup
, char*);
666 double d2
= va_arg(dup
, double);
670 EXPECT_STREQ(s1
, s2
);
676 TEST(StringUtilTest
, VAList
) {
677 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
681 template <typename STR
>
682 void TokenizeTest() {
686 size
= Tokenize(STR("This is a string"), STR(" "), &r
);
688 ASSERT_EQ(4U, r
.size());
689 EXPECT_EQ(r
[0], STR("This"));
690 EXPECT_EQ(r
[1], STR("is"));
691 EXPECT_EQ(r
[2], STR("a"));
692 EXPECT_EQ(r
[3], STR("string"));
695 size
= Tokenize(STR("one,two,three"), STR(","), &r
);
697 ASSERT_EQ(3U, r
.size());
698 EXPECT_EQ(r
[0], STR("one"));
699 EXPECT_EQ(r
[1], STR("two"));
700 EXPECT_EQ(r
[2], STR("three"));
703 size
= Tokenize(STR("one,two:three;four"), STR(",:"), &r
);
705 ASSERT_EQ(3U, r
.size());
706 EXPECT_EQ(r
[0], STR("one"));
707 EXPECT_EQ(r
[1], STR("two"));
708 EXPECT_EQ(r
[2], STR("three;four"));
711 size
= Tokenize(STR("one,two:three;four"), STR(";,:"), &r
);
713 ASSERT_EQ(4U, r
.size());
714 EXPECT_EQ(r
[0], STR("one"));
715 EXPECT_EQ(r
[1], STR("two"));
716 EXPECT_EQ(r
[2], STR("three"));
717 EXPECT_EQ(r
[3], STR("four"));
720 size
= Tokenize(STR("one, two, three"), STR(","), &r
);
722 ASSERT_EQ(3U, r
.size());
723 EXPECT_EQ(r
[0], STR("one"));
724 EXPECT_EQ(r
[1], STR(" two"));
725 EXPECT_EQ(r
[2], STR(" three"));
728 size
= Tokenize(STR("one, two, three, "), STR(","), &r
);
730 ASSERT_EQ(4U, r
.size());
731 EXPECT_EQ(r
[0], STR("one"));
732 EXPECT_EQ(r
[1], STR(" two"));
733 EXPECT_EQ(r
[2], STR(" three"));
734 EXPECT_EQ(r
[3], STR(" "));
737 size
= Tokenize(STR("one, two, three,"), STR(","), &r
);
739 ASSERT_EQ(3U, r
.size());
740 EXPECT_EQ(r
[0], STR("one"));
741 EXPECT_EQ(r
[1], STR(" two"));
742 EXPECT_EQ(r
[2], STR(" three"));
745 size
= Tokenize(STR(), STR(","), &r
);
747 ASSERT_EQ(0U, r
.size());
750 size
= Tokenize(STR(","), STR(","), &r
);
752 ASSERT_EQ(0U, r
.size());
755 size
= Tokenize(STR(",;:."), STR(".:;,"), &r
);
757 ASSERT_EQ(0U, r
.size());
760 size
= Tokenize(STR("\t\ta\t"), STR("\t"), &r
);
762 ASSERT_EQ(1U, r
.size());
763 EXPECT_EQ(r
[0], STR("a"));
766 size
= Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r
);
768 ASSERT_EQ(2U, r
.size());
769 EXPECT_EQ(r
[0], STR("\ta\t"));
770 EXPECT_EQ(r
[1], STR("b\tcc"));
774 TEST(StringUtilTest
, TokenizeStdString
) {
775 TokenizeTest
<std::string
>();
778 TEST(StringUtilTest
, TokenizeStringPiece
) {
779 TokenizeTest
<base::StringPiece
>();
782 // Test for JoinString
783 TEST(StringUtilTest
, JoinString
) {
784 std::vector
<std::string
> in
;
785 EXPECT_EQ("", JoinString(in
, ','));
788 EXPECT_EQ("a", JoinString(in
, ','));
792 EXPECT_EQ("a,b,c", JoinString(in
, ','));
794 in
.push_back(std::string());
795 EXPECT_EQ("a,b,c,", JoinString(in
, ','));
797 EXPECT_EQ("a|b|c|| ", JoinString(in
, '|'));
800 // Test for JoinString overloaded with std::string separator
801 TEST(StringUtilTest
, JoinStringWithString
) {
802 std::string
separator(", ");
803 std::vector
<std::string
> parts
;
804 EXPECT_EQ(std::string(), JoinString(parts
, separator
));
806 parts
.push_back("a");
807 EXPECT_EQ("a", JoinString(parts
, separator
));
809 parts
.push_back("b");
810 parts
.push_back("c");
811 EXPECT_EQ("a, b, c", JoinString(parts
, separator
));
813 parts
.push_back(std::string());
814 EXPECT_EQ("a, b, c, ", JoinString(parts
, separator
));
815 parts
.push_back(" ");
816 EXPECT_EQ("a|b|c|| ", JoinString(parts
, "|"));
819 // Test for JoinString overloaded with string16 separator
820 TEST(StringUtilTest
, JoinStringWithString16
) {
821 string16 separator
= ASCIIToUTF16(", ");
822 std::vector
<string16
> parts
;
823 EXPECT_EQ(string16(), JoinString(parts
, separator
));
825 parts
.push_back(ASCIIToUTF16("a"));
826 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts
, separator
));
828 parts
.push_back(ASCIIToUTF16("b"));
829 parts
.push_back(ASCIIToUTF16("c"));
830 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts
, separator
));
832 parts
.push_back(ASCIIToUTF16(""));
833 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts
, separator
));
834 parts
.push_back(ASCIIToUTF16(" "));
835 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts
, ASCIIToUTF16("|")));
838 TEST(StringUtilTest
, StartsWith
) {
839 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
840 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
841 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
842 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
843 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
844 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
845 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
846 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
847 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
848 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
850 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
851 ASCIIToUTF16("javascript"), true));
852 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
853 ASCIIToUTF16("javascript"), true));
854 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
855 ASCIIToUTF16("javascript"), false));
856 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
857 ASCIIToUTF16("javascript"), false));
858 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
859 ASCIIToUTF16("javascript"), true));
860 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
861 ASCIIToUTF16("javascript"), false));
862 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
863 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
864 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
865 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
868 TEST(StringUtilTest
, EndsWith
) {
869 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
870 ASCIIToUTF16(".plugin"), true));
871 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
872 ASCIIToUTF16(".plugin"), true));
873 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
874 ASCIIToUTF16(".plugin"), false));
875 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
876 ASCIIToUTF16(".plugin"), false));
877 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
878 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
879 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
880 ASCIIToUTF16(".plugin"), true));
881 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
882 ASCIIToUTF16(".plugin"), false));
883 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
884 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
885 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
886 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
887 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
888 ASCIIToUTF16(".plugin"), false));
889 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
890 EXPECT_TRUE(EndsWith(string16(), string16(), false));
891 EXPECT_TRUE(EndsWith(string16(), string16(), true));
894 TEST(StringUtilTest
, GetStringFWithOffsets
) {
895 std::vector
<string16
> subst
;
896 subst
.push_back(ASCIIToUTF16("1"));
897 subst
.push_back(ASCIIToUTF16("2"));
898 std::vector
<size_t> offsets
;
900 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
903 EXPECT_EQ(2U, offsets
.size());
904 EXPECT_EQ(7U, offsets
[0]);
905 EXPECT_EQ(25U, offsets
[1]);
908 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
911 EXPECT_EQ(2U, offsets
.size());
912 EXPECT_EQ(25U, offsets
[0]);
913 EXPECT_EQ(7U, offsets
[1]);
917 TEST(StringUtilTest
, ReplaceStringPlaceholdersTooFew
) {
918 // Test whether replacestringplaceholders works as expected when there
919 // are fewer inputs than outputs.
920 std::vector
<string16
> subst
;
921 subst
.push_back(ASCIIToUTF16("9a"));
922 subst
.push_back(ASCIIToUTF16("8b"));
923 subst
.push_back(ASCIIToUTF16("7c"));
926 ReplaceStringPlaceholders(
927 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst
, NULL
);
929 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
932 TEST(StringUtilTest
, ReplaceStringPlaceholders
) {
933 std::vector
<string16
> subst
;
934 subst
.push_back(ASCIIToUTF16("9a"));
935 subst
.push_back(ASCIIToUTF16("8b"));
936 subst
.push_back(ASCIIToUTF16("7c"));
937 subst
.push_back(ASCIIToUTF16("6d"));
938 subst
.push_back(ASCIIToUTF16("5e"));
939 subst
.push_back(ASCIIToUTF16("4f"));
940 subst
.push_back(ASCIIToUTF16("3g"));
941 subst
.push_back(ASCIIToUTF16("2h"));
942 subst
.push_back(ASCIIToUTF16("1i"));
945 ReplaceStringPlaceholders(
946 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst
, NULL
);
948 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
951 TEST(StringUtilTest
, ReplaceStringPlaceholdersMoreThan9Replacements
) {
952 std::vector
<string16
> subst
;
953 subst
.push_back(ASCIIToUTF16("9a"));
954 subst
.push_back(ASCIIToUTF16("8b"));
955 subst
.push_back(ASCIIToUTF16("7c"));
956 subst
.push_back(ASCIIToUTF16("6d"));
957 subst
.push_back(ASCIIToUTF16("5e"));
958 subst
.push_back(ASCIIToUTF16("4f"));
959 subst
.push_back(ASCIIToUTF16("3g"));
960 subst
.push_back(ASCIIToUTF16("2h"));
961 subst
.push_back(ASCIIToUTF16("1i"));
962 subst
.push_back(ASCIIToUTF16("0j"));
963 subst
.push_back(ASCIIToUTF16("-1k"));
964 subst
.push_back(ASCIIToUTF16("-2l"));
965 subst
.push_back(ASCIIToUTF16("-3m"));
966 subst
.push_back(ASCIIToUTF16("-4n"));
969 ReplaceStringPlaceholders(
970 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
971 "$10j,$11k,$12l,$13m,$14n,$1"), subst
, NULL
);
973 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
974 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
977 TEST(StringUtilTest
, StdStringReplaceStringPlaceholders
) {
978 std::vector
<std::string
> subst
;
979 subst
.push_back("9a");
980 subst
.push_back("8b");
981 subst
.push_back("7c");
982 subst
.push_back("6d");
983 subst
.push_back("5e");
984 subst
.push_back("4f");
985 subst
.push_back("3g");
986 subst
.push_back("2h");
987 subst
.push_back("1i");
989 std::string formatted
=
990 ReplaceStringPlaceholders(
991 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst
, NULL
);
993 EXPECT_EQ(formatted
, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
996 TEST(StringUtilTest
, ReplaceStringPlaceholdersConsecutiveDollarSigns
) {
997 std::vector
<std::string
> subst
;
998 subst
.push_back("a");
999 subst
.push_back("b");
1000 subst
.push_back("c");
1001 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst
, NULL
),
1005 TEST(StringUtilTest
, MatchPatternTest
) {
1006 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
1007 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
1008 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
1009 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
1010 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
1011 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
1012 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
1013 EXPECT_FALSE(MatchPattern("", "*.*"));
1014 EXPECT_TRUE(MatchPattern("", "*"));
1015 EXPECT_TRUE(MatchPattern("", "?"));
1016 EXPECT_TRUE(MatchPattern("", ""));
1017 EXPECT_FALSE(MatchPattern("Hello", ""));
1018 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
1019 // Stop after a certain recursion depth.
1020 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1022 // Test UTF8 matching.
1023 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1024 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1025 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1026 // Invalid sequences should be handled as a single invalid character.
1027 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1028 // If the pattern has invalid characters, it shouldn't match anything.
1029 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1031 // Test UTF16 character matching.
1032 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1033 UTF8ToUTF16("*.com")));
1034 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1035 UTF8ToUTF16("He??o\\*1*")));
1037 // This test verifies that consecutive wild cards are collapsed into 1
1038 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
1039 // recursion depth).
1040 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
1041 UTF8ToUTF16("He********************************o")));
1044 TEST(StringUtilTest
, LcpyTest
) {
1045 // Test the normal case where we fit in our buffer.
1049 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1050 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1051 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1052 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1055 // Test dst_size == 0, nothing should be written to |dst| and we should
1056 // have the equivalent of strlen(src).
1058 char dst
[2] = {1, 2};
1059 wchar_t wdst
[2] = {1, 2};
1060 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", 0));
1061 EXPECT_EQ(1, dst
[0]);
1062 EXPECT_EQ(2, dst
[1]);
1063 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", 0));
1064 EXPECT_EQ(static_cast<wchar_t>(1), wdst
[0]);
1065 EXPECT_EQ(static_cast<wchar_t>(2), wdst
[1]);
1068 // Test the case were we _just_ competely fit including the null.
1072 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1073 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1074 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1075 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1078 // Test the case were we we are one smaller, so we can't fit the null.
1082 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1083 EXPECT_EQ(0, memcmp(dst
, "abcdef", 7));
1084 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1085 EXPECT_EQ(0, memcmp(wdst
, L
"abcdef", sizeof(wchar_t) * 7));
1088 // Test the case were we are just too small.
1092 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1093 EXPECT_EQ(0, memcmp(dst
, "ab", 3));
1094 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1095 EXPECT_EQ(0, memcmp(wdst
, L
"ab", sizeof(wchar_t) * 3));
1099 TEST(StringUtilTest
, WprintfFormatPortabilityTest
) {
1100 static const struct {
1101 const wchar_t* input
;
1108 { L
"Hello, %s", false },
1113 { L
"%ls %s", false },
1114 { L
"%s %ls", false },
1115 { L
"%s %ls %s", false },
1117 { L
"%f %F", false },
1118 { L
"%d %D", false },
1119 { L
"%o %O", false },
1120 { L
"%u %U", false },
1121 { L
"%f %d %o %u", true },
1122 { L
"%-8d (%02.1f%)", true },
1123 { L
"% 10s", false },
1126 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
)
1127 EXPECT_EQ(cases
[i
].portable
, base::IsWprintfFormatPortable(cases
[i
].input
));
1130 TEST(StringUtilTest
, RemoveChars
) {
1131 const char* kRemoveChars
= "-/+*";
1132 std::string input
= "A-+bc/d!*";
1133 EXPECT_TRUE(RemoveChars(input
, kRemoveChars
, &input
));
1134 EXPECT_EQ("Abcd!", input
);
1136 // No characters match kRemoveChars.
1137 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1138 EXPECT_EQ("Abcd!", input
);
1142 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1143 EXPECT_EQ(std::string(), input
);
1146 TEST(StringUtilTest
, ReplaceChars
) {
1149 const char* replace_chars
;
1150 const char* replace_with
;
1154 { "", "", "", "", false },
1155 { "test", "", "", "test", false },
1156 { "test", "", "!", "test", false },
1157 { "test", "z", "!", "test", false },
1158 { "test", "e", "!", "t!st", true },
1159 { "test", "e", "!?", "t!?st", true },
1160 { "test", "ez", "!", "t!st", true },
1161 { "test", "zed", "!?", "t!?st", true },
1162 { "test", "t", "!?", "!?es!?", true },
1163 { "test", "et", "!>", "!>!>s!>", true },
1164 { "test", "zest", "!", "!!!!", true },
1165 { "test", "szt", "!", "!e!!", true },
1166 { "test", "t", "test", "testestest", true },
1169 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(cases
); ++i
) {
1171 bool result
= ReplaceChars(cases
[i
].input
,
1172 cases
[i
].replace_chars
,
1173 cases
[i
].replace_with
,
1175 EXPECT_EQ(cases
[i
].result
, result
);
1176 EXPECT_EQ(cases
[i
].output
, output
);
1180 TEST(StringUtilTest
, ContainsOnlyChars
) {
1181 // Providing an empty list of characters should return false but for the empty
1183 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1184 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1186 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1187 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1188 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1189 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1190 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1192 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII
));
1193 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII
));
1194 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII
));
1195 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII
));
1196 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII
));
1197 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII
));
1199 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16
));
1200 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16
));
1201 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16
));
1202 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16
));
1203 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16
));
1204 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1208 class WriteIntoTest
: public testing::Test
{
1210 static void WritesCorrectly(size_t num_chars
) {
1212 char kOriginal
[] = "supercali";
1213 strncpy(WriteInto(&buffer
, num_chars
+ 1), kOriginal
, num_chars
);
1214 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1215 // string at the first \0.
1216 EXPECT_EQ(std::string(kOriginal
,
1217 std::min(num_chars
, arraysize(kOriginal
) - 1)),
1218 std::string(buffer
.c_str()));
1219 EXPECT_EQ(num_chars
, buffer
.size());
1223 TEST_F(WriteIntoTest
, WriteInto
) {
1224 // Validate that WriteInto reserves enough space and
1225 // sizes a string correctly.
1228 WritesCorrectly(5000);
1230 // Validate that WriteInto doesn't modify other strings
1231 // when using a Copy-on-Write implementation.
1232 const char kLive
[] = "live";
1233 const char kDead
[] = "dead";
1234 const std::string live
= kLive
;
1235 std::string dead
= live
;
1236 strncpy(WriteInto(&dead
, 5), kDead
, 4);
1237 EXPECT_EQ(kDead
, dead
);
1238 EXPECT_EQ(4u, dead
.size());
1239 EXPECT_EQ(kLive
, live
);
1240 EXPECT_EQ(4u, live
.size());