1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
12 #include "base/basictypes.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 using ::testing::ElementsAre
;
22 static const struct trim_case
{
24 const TrimPositions positions
;
25 const wchar_t* output
;
26 const TrimPositions return_value
;
28 {L
" Google Video ", TRIM_LEADING
, L
"Google Video ", TRIM_LEADING
},
29 {L
" Google Video ", TRIM_TRAILING
, L
" Google Video", TRIM_TRAILING
},
30 {L
" Google Video ", TRIM_ALL
, L
"Google Video", TRIM_ALL
},
31 {L
"Google Video", TRIM_ALL
, L
"Google Video", TRIM_NONE
},
32 {L
"", TRIM_ALL
, L
"", TRIM_NONE
},
33 {L
" ", TRIM_LEADING
, L
"", TRIM_LEADING
},
34 {L
" ", TRIM_TRAILING
, L
"", TRIM_TRAILING
},
35 {L
" ", TRIM_ALL
, L
"", TRIM_ALL
},
36 {L
"\t\rTest String\n", TRIM_ALL
, L
"Test String", TRIM_ALL
},
37 {L
"\x2002Test String\x00A0\x3000", TRIM_ALL
, L
"Test String", TRIM_ALL
},
40 static const struct trim_case_ascii
{
42 const TrimPositions positions
;
44 const TrimPositions return_value
;
45 } trim_cases_ascii
[] = {
46 {" Google Video ", TRIM_LEADING
, "Google Video ", TRIM_LEADING
},
47 {" Google Video ", TRIM_TRAILING
, " Google Video", TRIM_TRAILING
},
48 {" Google Video ", TRIM_ALL
, "Google Video", TRIM_ALL
},
49 {"Google Video", TRIM_ALL
, "Google Video", TRIM_NONE
},
50 {"", TRIM_ALL
, "", TRIM_NONE
},
51 {" ", TRIM_LEADING
, "", TRIM_LEADING
},
52 {" ", TRIM_TRAILING
, "", TRIM_TRAILING
},
53 {" ", TRIM_ALL
, "", TRIM_ALL
},
54 {"\t\rTest String\n", TRIM_ALL
, "Test String", TRIM_ALL
},
59 // Helper used to test TruncateUTF8ToByteSize.
60 bool Truncated(const std::string
& input
,
61 const size_t byte_size
,
62 std::string
* output
) {
63 size_t prev
= input
.length();
64 TruncateUTF8ToByteSize(input
, byte_size
, output
);
65 return prev
!= output
->length();
70 TEST(StringUtilTest
, TruncateUTF8ToByteSize
) {
73 // Empty strings and invalid byte_size arguments
74 EXPECT_FALSE(Truncated(std::string(), 0, &output
));
75 EXPECT_EQ(output
, "");
76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output
));
77 EXPECT_EQ(output
, "");
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output
));
79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output
));
81 // Testing the truncation of valid UTF8 correctly
82 EXPECT_TRUE(Truncated("abc", 2, &output
));
83 EXPECT_EQ(output
, "ab");
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output
));
85 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output
));
87 EXPECT_EQ(output
.compare("\xc2\x81"), 0);
88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output
));
89 EXPECT_EQ(output
.compare("\xc2\x81\xc2\x81"), 0);
92 const char array
[] = "\x00\x00\xc2\x81\xc2\x81";
93 const std::string
array_string(array
, arraysize(array
));
94 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
95 EXPECT_EQ(output
.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
99 const char array
[] = "\x00\xc2\x81\xc2\x81";
100 const std::string
array_string(array
, arraysize(array
));
101 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
102 EXPECT_EQ(output
.compare(std::string("\x00\xc2\x81", 3)), 0);
105 // Testing invalid UTF8
106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output
));
107 EXPECT_EQ(output
.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output
));
109 EXPECT_EQ(output
.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output
));
111 EXPECT_EQ(output
.compare(""), 0);
113 // Testing invalid UTF8 mixed with valid UTF8
114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output
));
115 EXPECT_EQ(output
.compare("\xe1\x80\xbf"), 0);
116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output
));
117 EXPECT_EQ(output
.compare("\xf1\x80\xa0\xbf"), 0);
118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
120 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
123 EXPECT_EQ(output
.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output
));
125 EXPECT_EQ(output
.compare("\xef\xbb\xbf" "abc"), 0);
127 // Overlong sequences
128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output
));
129 EXPECT_EQ(output
.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output
));
131 EXPECT_EQ(output
.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output
));
133 EXPECT_EQ(output
.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output
));
135 EXPECT_EQ(output
.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output
));
137 EXPECT_EQ(output
.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output
));
139 EXPECT_EQ(output
.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output
));
141 EXPECT_EQ(output
.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output
));
143 EXPECT_EQ(output
.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output
));
145 EXPECT_EQ(output
.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output
));
147 EXPECT_EQ(output
.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output
));
149 EXPECT_EQ(output
.compare(""), 0);
151 // Beyond U+10FFFF (the upper limit of Unicode codespace)
152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output
));
153 EXPECT_EQ(output
.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output
));
155 EXPECT_EQ(output
.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output
));
157 EXPECT_EQ(output
.compare(""), 0);
159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output
));
161 EXPECT_EQ(output
.compare(""), 0);
162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output
));
163 EXPECT_EQ(output
.compare(""), 0);
166 const char array
[] = "\x00\x00\xfe\xff";
167 const std::string
array_string(array
, arraysize(array
));
168 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
169 EXPECT_EQ(output
.compare(std::string("\x00\x00", 2)), 0);
172 // Variants on the previous test
174 const char array
[] = "\xff\xfe\x00\x00";
175 const std::string
array_string(array
, 4);
176 EXPECT_FALSE(Truncated(array_string
, 4, &output
));
177 EXPECT_EQ(output
.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 const char array
[] = "\xff\x00\x00\xfe";
181 const std::string
array_string(array
, arraysize(array
));
182 EXPECT_TRUE(Truncated(array_string
, 4, &output
));
183 EXPECT_EQ(output
.compare(std::string("\xff\x00\x00", 3)), 0);
186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output
));
188 EXPECT_EQ(output
.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output
));
190 EXPECT_EQ(output
.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output
));
192 EXPECT_EQ(output
.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output
));
194 EXPECT_EQ(output
.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output
));
196 EXPECT_EQ(output
.compare(""), 0);
198 // Strings in legacy encodings that are valid in UTF-8, but
199 // are invalid as UTF-8 in real data.
200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output
));
201 EXPECT_EQ(output
.compare("caf"), 0);
202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output
));
203 EXPECT_EQ(output
.compare(""), 0);
204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output
));
205 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
208 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
210 // Testing using the same string as input and output.
211 EXPECT_FALSE(Truncated(output
, 4, &output
));
212 EXPECT_EQ(output
.compare("\xa7\x41\xa6\x6e"), 0);
213 EXPECT_TRUE(Truncated(output
, 3, &output
));
214 EXPECT_EQ(output
.compare("\xa7\x41"), 0);
216 // "abc" with U+201[CD] in windows-125[0-8]
217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output
));
218 EXPECT_EQ(output
.compare("\x93" "abc"), 0);
220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output
));
222 EXPECT_EQ(output
.compare(""), 0);
224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output
));
226 EXPECT_EQ(output
.compare(""), 0);
229 TEST(StringUtilTest
, TrimWhitespace
) {
230 string16 output
; // Allow contents to carry over to next testcase
231 for (size_t i
= 0; i
< arraysize(trim_cases
); ++i
) {
232 const trim_case
& value
= trim_cases
[i
];
233 EXPECT_EQ(value
.return_value
,
234 TrimWhitespace(WideToUTF16(value
.input
), value
.positions
,
236 EXPECT_EQ(WideToUTF16(value
.output
), output
);
239 // Test that TrimWhitespace() can take the same string for input and output
240 output
= ASCIIToUTF16(" This is a test \r\n");
241 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output
);
244 // Once more, but with a string of whitespace
245 output
= ASCIIToUTF16(" \r\n");
246 EXPECT_EQ(TRIM_ALL
, TrimWhitespace(output
, TRIM_ALL
, &output
));
247 EXPECT_EQ(string16(), output
);
249 std::string output_ascii
;
250 for (size_t i
= 0; i
< arraysize(trim_cases_ascii
); ++i
) {
251 const trim_case_ascii
& value
= trim_cases_ascii
[i
];
252 EXPECT_EQ(value
.return_value
,
253 TrimWhitespace(value
.input
, value
.positions
, &output_ascii
));
254 EXPECT_EQ(value
.output
, output_ascii
);
258 static const struct collapse_case
{
259 const wchar_t* input
;
261 const wchar_t* output
;
262 } collapse_cases
[] = {
263 {L
" Google Video ", false, L
"Google Video"},
264 {L
"Google Video", false, L
"Google Video"},
267 {L
"\t\rTest String\n", false, L
"Test String"},
268 {L
"\x2002Test String\x00A0\x3000", false, L
"Test String"},
269 {L
" Test \n \t String ", false, L
"Test String"},
270 {L
"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L
"Test String"},
271 {L
" Test String", false, L
"Test String"},
272 {L
"Test String ", false, L
"Test String"},
273 {L
"Test String", false, L
"Test String"},
276 {L
" \r ", true, L
""},
277 {L
"\nFoo", true, L
"Foo"},
278 {L
"\r Foo ", true, L
"Foo"},
279 {L
" Foo bar ", true, L
"Foo bar"},
280 {L
" \tFoo bar \n", true, L
"Foo bar"},
281 {L
" a \r b\n c \r\n d \t\re \t f \n ", true, L
"abcde f"},
284 TEST(StringUtilTest
, CollapseWhitespace
) {
285 for (size_t i
= 0; i
< arraysize(collapse_cases
); ++i
) {
286 const collapse_case
& value
= collapse_cases
[i
];
287 EXPECT_EQ(WideToUTF16(value
.output
),
288 CollapseWhitespace(WideToUTF16(value
.input
), value
.trim
));
292 static const struct collapse_case_ascii
{
296 } collapse_cases_ascii
[] = {
297 {" Google Video ", false, "Google Video"},
298 {"Google Video", false, "Google Video"},
301 {"\t\rTest String\n", false, "Test String"},
302 {" Test \n \t String ", false, "Test String"},
303 {" Test String", false, "Test String"},
304 {"Test String ", false, "Test String"},
305 {"Test String", false, "Test String"},
309 {"\nFoo", true, "Foo"},
310 {"\r Foo ", true, "Foo"},
311 {" Foo bar ", true, "Foo bar"},
312 {" \tFoo bar \n", true, "Foo bar"},
313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 TEST(StringUtilTest
, CollapseWhitespaceASCII
) {
317 for (size_t i
= 0; i
< arraysize(collapse_cases_ascii
); ++i
) {
318 const collapse_case_ascii
& value
= collapse_cases_ascii
[i
];
319 EXPECT_EQ(value
.output
, CollapseWhitespaceASCII(value
.input
, value
.trim
));
323 TEST(StringUtilTest
, IsStringUTF8
) {
324 EXPECT_TRUE(IsStringUTF8("abc"));
325 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
331 // surrogate code points
332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
336 // overlong sequences
337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
349 // Beyond U+10FFFF (the upper limit of Unicode codespace)
350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
354 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
355 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
356 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
366 // Strings in legacy encodings. We can certainly make up strings
367 // in a legacy encoding that are valid in UTF-8, but in real data,
368 // most of them are invalid as UTF-8.
369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
372 // "abc" with U+201[CD] in windows-125[0-8]
373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
380 // representation, and the second uses a 2-byte sequence. The second version
381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
382 // given codepoint must be used.
383 static const char kEmbeddedNull
[] = "embedded\0null";
384 EXPECT_TRUE(IsStringUTF8(
385 std::string(kEmbeddedNull
, sizeof(kEmbeddedNull
))));
386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 TEST(StringUtilTest
, IsStringASCII
) {
390 static char char_ascii
[] =
391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
392 static char16 char16_ascii
[] = {
393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
396 static std::wstring
wchar_ascii(
397 L
"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
399 // Test a variety of the fragment start positions and lengths in order to make
400 // sure that bit masking in IsStringASCII works correctly.
401 // Also, test that a non-ASCII character will be detected regardless of its
402 // position inside the string.
404 const size_t string_length
= arraysize(char_ascii
) - 1;
405 for (size_t offset
= 0; offset
< 8; ++offset
) {
406 for (size_t len
= 0, max_len
= string_length
- offset
; len
< max_len
;
408 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii
+ offset
, len
)));
409 for (size_t char_pos
= offset
; char_pos
< len
; ++char_pos
) {
410 char_ascii
[char_pos
] |= '\x80';
411 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii
+ offset
, len
)));
412 char_ascii
[char_pos
] &= ~'\x80';
419 const size_t string_length
= arraysize(char16_ascii
) - 1;
420 for (size_t offset
= 0; offset
< 4; ++offset
) {
421 for (size_t len
= 0, max_len
= string_length
- offset
; len
< max_len
;
423 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
424 for (size_t char_pos
= offset
; char_pos
< len
; ++char_pos
) {
425 char16_ascii
[char_pos
] |= 0x80;
427 IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
428 char16_ascii
[char_pos
] &= ~0x80;
429 // Also test when the upper half is non-zero.
430 char16_ascii
[char_pos
] |= 0x100;
432 IsStringASCII(StringPiece16(char16_ascii
+ offset
, len
)));
433 char16_ascii
[char_pos
] &= ~0x100;
440 const size_t string_length
= wchar_ascii
.length();
441 for (size_t len
= 0; len
< string_length
; ++len
) {
442 EXPECT_TRUE(IsStringASCII(wchar_ascii
.substr(0, len
)));
443 for (size_t char_pos
= 0; char_pos
< len
; ++char_pos
) {
444 wchar_ascii
[char_pos
] |= 0x80;
446 IsStringASCII(wchar_ascii
.substr(0, len
)));
447 wchar_ascii
[char_pos
] &= ~0x80;
448 wchar_ascii
[char_pos
] |= 0x100;
450 IsStringASCII(wchar_ascii
.substr(0, len
)));
451 wchar_ascii
[char_pos
] &= ~0x100;
452 #if defined(WCHAR_T_IS_UTF32)
453 wchar_ascii
[char_pos
] |= 0x10000;
455 IsStringASCII(wchar_ascii
.substr(0, len
)));
456 wchar_ascii
[char_pos
] &= ~0x10000;
457 #endif // WCHAR_T_IS_UTF32
463 TEST(StringUtilTest
, ConvertASCII
) {
464 static const char* const char_cases
[] = {
467 "0123ABCDwxyz \a\b\t\r\n!+,.~"
470 static const wchar_t* const wchar_cases
[] = {
473 L
"0123ABCDwxyz \a\b\t\r\n!+,.~"
476 for (size_t i
= 0; i
< arraysize(char_cases
); ++i
) {
477 EXPECT_TRUE(IsStringASCII(char_cases
[i
]));
478 string16 utf16
= ASCIIToUTF16(char_cases
[i
]);
479 EXPECT_EQ(WideToUTF16(wchar_cases
[i
]), utf16
);
481 std::string ascii
= UTF16ToASCII(WideToUTF16(wchar_cases
[i
]));
482 EXPECT_EQ(char_cases
[i
], ascii
);
485 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
487 // Convert empty strings.
490 EXPECT_EQ(empty
, UTF16ToASCII(empty16
));
491 EXPECT_EQ(empty16
, ASCIIToUTF16(empty
));
493 // Convert strings with an embedded NUL character.
494 const char chars_with_nul
[] = "test\0string";
495 const int length_with_nul
= arraysize(chars_with_nul
) - 1;
496 std::string
string_with_nul(chars_with_nul
, length_with_nul
);
497 std::wstring wide_with_nul
= ASCIIToWide(string_with_nul
);
498 EXPECT_EQ(static_cast<std::wstring::size_type
>(length_with_nul
),
499 wide_with_nul
.length());
500 std::string narrow_with_nul
= UTF16ToASCII(WideToUTF16(wide_with_nul
));
501 EXPECT_EQ(static_cast<std::string::size_type
>(length_with_nul
),
502 narrow_with_nul
.length());
503 EXPECT_EQ(0, string_with_nul
.compare(narrow_with_nul
));
506 TEST(StringUtilTest
, ToUpperASCII
) {
507 EXPECT_EQ('C', ToUpperASCII('C'));
508 EXPECT_EQ('C', ToUpperASCII('c'));
509 EXPECT_EQ('2', ToUpperASCII('2'));
511 EXPECT_EQ(L
'C', ToUpperASCII(L
'C'));
512 EXPECT_EQ(L
'C', ToUpperASCII(L
'c'));
513 EXPECT_EQ(L
'2', ToUpperASCII(L
'2'));
515 std::string
in_place_a("Cc2");
516 StringToUpperASCII(&in_place_a
);
517 EXPECT_EQ("CC2", in_place_a
);
519 std::wstring
in_place_w(L
"Cc2");
520 StringToUpperASCII(&in_place_w
);
521 EXPECT_EQ(L
"CC2", in_place_w
);
523 std::string
original_a("Cc2");
524 std::string upper_a
= StringToUpperASCII(original_a
);
525 EXPECT_EQ("CC2", upper_a
);
527 std::wstring
original_w(L
"Cc2");
528 std::wstring upper_w
= StringToUpperASCII(original_w
);
529 EXPECT_EQ(L
"CC2", upper_w
);
532 TEST(StringUtilTest
, LowerCaseEqualsASCII
) {
533 static const struct {
536 } lowercase_cases
[] = {
542 for (size_t i
= 0; i
< arraysize(lowercase_cases
); ++i
) {
543 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases
[i
].src_a
),
544 lowercase_cases
[i
].dst
));
545 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases
[i
].src_a
,
546 lowercase_cases
[i
].dst
));
550 TEST(StringUtilTest
, FormatBytesUnlocalized
) {
551 static const struct {
553 const char* expected
;
555 // Expected behavior: we show one post-decimal digit when we have
556 // under two pre-decimal digits, except in cases where it makes no
557 // sense (zero or bytes).
558 // Since we switch units once we cross the 1000 mark, this keeps
559 // the display of file sizes or bytes consistently around three
563 {1024*1024, "1.0 MB"},
564 {1024*1024*1024, "1.0 GB"},
565 {10LL*1024*1024*1024, "10.0 GB"},
566 {99LL*1024*1024*1024, "99.0 GB"},
567 {105LL*1024*1024*1024, "105 GB"},
568 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
569 {~(1LL << 63), "8192 PB"},
571 {99*1024 + 103, "99.1 kB"},
572 {1024*1024 + 103, "1.0 MB"},
573 {1024*1024 + 205 * 1024, "1.2 MB"},
574 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
575 {10LL*1024*1024*1024, "10.0 GB"},
576 {100LL*1024*1024*1024, "100 GB"},
579 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
580 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
),
581 FormatBytesUnlocalized(cases
[i
].bytes
));
584 TEST(StringUtilTest
, ReplaceSubstringsAfterOffset
) {
585 static const struct {
587 string16::size_type start_offset
;
588 const char* find_this
;
589 const char* replace_with
;
590 const char* expected
;
592 {"aaa", 0, "a", "b", "bbb"},
593 {"abb", 0, "ab", "a", "ab"},
594 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
595 {"Not found", 0, "x", "0", "Not found"},
596 {"Not found again", 5, "x", "0", "Not found again"},
597 {" Making it much longer ", 0, " ", "Four score and seven years ago",
598 "Four score and seven years agoMakingFour score and seven years agoit"
599 "Four score and seven years agomuchFour score and seven years agolonger"
600 "Four score and seven years ago"},
601 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
602 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
603 {"abababab", 2, "ab", "c", "abccc"},
606 for (size_t i
= 0; i
< arraysize(cases
); i
++) {
607 string16 str
= ASCIIToUTF16(cases
[i
].str
);
608 ReplaceSubstringsAfterOffset(&str
, cases
[i
].start_offset
,
609 ASCIIToUTF16(cases
[i
].find_this
),
610 ASCIIToUTF16(cases
[i
].replace_with
));
611 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
615 TEST(StringUtilTest
, ReplaceFirstSubstringAfterOffset
) {
616 static const struct {
618 string16::size_type start_offset
;
619 const char* find_this
;
620 const char* replace_with
;
621 const char* expected
;
623 {"aaa", 0, "a", "b", "baa"},
624 {"abb", 0, "ab", "a", "ab"},
625 {"Removing some substrings inging", 0, "ing", "",
626 "Remov some substrings inging"},
627 {"Not found", 0, "x", "0", "Not found"},
628 {"Not found again", 5, "x", "0", "Not found again"},
629 {" Making it much longer ", 0, " ", "Four score and seven years ago",
630 "Four score and seven years agoMaking it much longer "},
631 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
632 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
633 {"abababab", 2, "ab", "c", "abcabab"},
636 for (size_t i
= 0; i
< arraysize(cases
); i
++) {
637 string16 str
= ASCIIToUTF16(cases
[i
].str
);
638 ReplaceFirstSubstringAfterOffset(&str
, cases
[i
].start_offset
,
639 ASCIIToUTF16(cases
[i
].find_this
),
640 ASCIIToUTF16(cases
[i
].replace_with
));
641 EXPECT_EQ(ASCIIToUTF16(cases
[i
].expected
), str
);
645 TEST(StringUtilTest
, HexDigitToInt
) {
646 EXPECT_EQ(0, HexDigitToInt('0'));
647 EXPECT_EQ(1, HexDigitToInt('1'));
648 EXPECT_EQ(2, HexDigitToInt('2'));
649 EXPECT_EQ(3, HexDigitToInt('3'));
650 EXPECT_EQ(4, HexDigitToInt('4'));
651 EXPECT_EQ(5, HexDigitToInt('5'));
652 EXPECT_EQ(6, HexDigitToInt('6'));
653 EXPECT_EQ(7, HexDigitToInt('7'));
654 EXPECT_EQ(8, HexDigitToInt('8'));
655 EXPECT_EQ(9, HexDigitToInt('9'));
656 EXPECT_EQ(10, HexDigitToInt('A'));
657 EXPECT_EQ(11, HexDigitToInt('B'));
658 EXPECT_EQ(12, HexDigitToInt('C'));
659 EXPECT_EQ(13, HexDigitToInt('D'));
660 EXPECT_EQ(14, HexDigitToInt('E'));
661 EXPECT_EQ(15, HexDigitToInt('F'));
663 // Verify the lower case as well.
664 EXPECT_EQ(10, HexDigitToInt('a'));
665 EXPECT_EQ(11, HexDigitToInt('b'));
666 EXPECT_EQ(12, HexDigitToInt('c'));
667 EXPECT_EQ(13, HexDigitToInt('d'));
668 EXPECT_EQ(14, HexDigitToInt('e'));
669 EXPECT_EQ(15, HexDigitToInt('f'));
672 // This checks where we can use the assignment operator for a va_list. We need
673 // a way to do this since Visual C doesn't support va_copy, but assignment on
674 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
676 static void VariableArgsFunc(const char* format
, ...) {
678 va_start(org
, format
);
681 GG_VA_COPY(dup
, org
);
682 int i1
= va_arg(org
, int);
683 int j1
= va_arg(org
, int);
684 char* s1
= va_arg(org
, char*);
685 double d1
= va_arg(org
, double);
688 int i2
= va_arg(dup
, int);
689 int j2
= va_arg(dup
, int);
690 char* s2
= va_arg(dup
, char*);
691 double d2
= va_arg(dup
, double);
695 EXPECT_STREQ(s1
, s2
);
701 TEST(StringUtilTest
, VAList
) {
702 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
706 template <typename STR
>
707 void TokenizeTest() {
711 size
= Tokenize(STR("This is a string"), STR(" "), &r
);
713 ASSERT_EQ(4U, r
.size());
714 EXPECT_EQ(r
[0], STR("This"));
715 EXPECT_EQ(r
[1], STR("is"));
716 EXPECT_EQ(r
[2], STR("a"));
717 EXPECT_EQ(r
[3], STR("string"));
720 size
= Tokenize(STR("one,two,three"), STR(","), &r
);
722 ASSERT_EQ(3U, r
.size());
723 EXPECT_EQ(r
[0], STR("one"));
724 EXPECT_EQ(r
[1], STR("two"));
725 EXPECT_EQ(r
[2], STR("three"));
728 size
= Tokenize(STR("one,two:three;four"), STR(",:"), &r
);
730 ASSERT_EQ(3U, r
.size());
731 EXPECT_EQ(r
[0], STR("one"));
732 EXPECT_EQ(r
[1], STR("two"));
733 EXPECT_EQ(r
[2], STR("three;four"));
736 size
= Tokenize(STR("one,two:three;four"), STR(";,:"), &r
);
738 ASSERT_EQ(4U, r
.size());
739 EXPECT_EQ(r
[0], STR("one"));
740 EXPECT_EQ(r
[1], STR("two"));
741 EXPECT_EQ(r
[2], STR("three"));
742 EXPECT_EQ(r
[3], STR("four"));
745 size
= Tokenize(STR("one, two, three"), STR(","), &r
);
747 ASSERT_EQ(3U, r
.size());
748 EXPECT_EQ(r
[0], STR("one"));
749 EXPECT_EQ(r
[1], STR(" two"));
750 EXPECT_EQ(r
[2], STR(" three"));
753 size
= Tokenize(STR("one, two, three, "), STR(","), &r
);
755 ASSERT_EQ(4U, r
.size());
756 EXPECT_EQ(r
[0], STR("one"));
757 EXPECT_EQ(r
[1], STR(" two"));
758 EXPECT_EQ(r
[2], STR(" three"));
759 EXPECT_EQ(r
[3], STR(" "));
762 size
= Tokenize(STR("one, two, three,"), STR(","), &r
);
764 ASSERT_EQ(3U, r
.size());
765 EXPECT_EQ(r
[0], STR("one"));
766 EXPECT_EQ(r
[1], STR(" two"));
767 EXPECT_EQ(r
[2], STR(" three"));
770 size
= Tokenize(STR(), STR(","), &r
);
772 ASSERT_EQ(0U, r
.size());
775 size
= Tokenize(STR(","), STR(","), &r
);
777 ASSERT_EQ(0U, r
.size());
780 size
= Tokenize(STR(",;:."), STR(".:;,"), &r
);
782 ASSERT_EQ(0U, r
.size());
785 size
= Tokenize(STR("\t\ta\t"), STR("\t"), &r
);
787 ASSERT_EQ(1U, r
.size());
788 EXPECT_EQ(r
[0], STR("a"));
791 size
= Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r
);
793 ASSERT_EQ(2U, r
.size());
794 EXPECT_EQ(r
[0], STR("\ta\t"));
795 EXPECT_EQ(r
[1], STR("b\tcc"));
799 TEST(StringUtilTest
, TokenizeStdString
) {
800 TokenizeTest
<std::string
>();
803 TEST(StringUtilTest
, TokenizeStringPiece
) {
804 TokenizeTest
<base::StringPiece
>();
807 // Test for JoinString
808 TEST(StringUtilTest
, JoinString
) {
809 std::vector
<std::string
> in
;
810 EXPECT_EQ("", JoinString(in
, ','));
813 EXPECT_EQ("a", JoinString(in
, ','));
817 EXPECT_EQ("a,b,c", JoinString(in
, ','));
819 in
.push_back(std::string());
820 EXPECT_EQ("a,b,c,", JoinString(in
, ','));
822 EXPECT_EQ("a|b|c|| ", JoinString(in
, '|'));
825 // Test for JoinString overloaded with std::string separator
826 TEST(StringUtilTest
, JoinStringWithString
) {
827 std::string
separator(", ");
828 std::vector
<std::string
> parts
;
829 EXPECT_EQ(std::string(), JoinString(parts
, separator
));
831 parts
.push_back("a");
832 EXPECT_EQ("a", JoinString(parts
, separator
));
834 parts
.push_back("b");
835 parts
.push_back("c");
836 EXPECT_EQ("a, b, c", JoinString(parts
, separator
));
838 parts
.push_back(std::string());
839 EXPECT_EQ("a, b, c, ", JoinString(parts
, separator
));
840 parts
.push_back(" ");
841 EXPECT_EQ("a|b|c|| ", JoinString(parts
, "|"));
844 // Test for JoinString overloaded with string16 separator
845 TEST(StringUtilTest
, JoinStringWithString16
) {
846 string16 separator
= ASCIIToUTF16(", ");
847 std::vector
<string16
> parts
;
848 EXPECT_EQ(string16(), JoinString(parts
, separator
));
850 parts
.push_back(ASCIIToUTF16("a"));
851 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts
, separator
));
853 parts
.push_back(ASCIIToUTF16("b"));
854 parts
.push_back(ASCIIToUTF16("c"));
855 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts
, separator
));
857 parts
.push_back(ASCIIToUTF16(""));
858 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts
, separator
));
859 parts
.push_back(ASCIIToUTF16(" "));
860 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts
, ASCIIToUTF16("|")));
863 TEST(StringUtilTest
, StartsWith
) {
864 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
865 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
866 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
867 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
868 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
869 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
870 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
871 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
872 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
873 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
875 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
876 ASCIIToUTF16("javascript"), true));
877 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
878 ASCIIToUTF16("javascript"), true));
879 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
880 ASCIIToUTF16("javascript"), false));
881 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
882 ASCIIToUTF16("javascript"), false));
883 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
884 ASCIIToUTF16("javascript"), true));
885 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
886 ASCIIToUTF16("javascript"), false));
887 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
888 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
889 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
890 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
893 TEST(StringUtilTest
, EndsWith
) {
894 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
895 ASCIIToUTF16(".plugin"), true));
896 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
897 ASCIIToUTF16(".plugin"), true));
898 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
899 ASCIIToUTF16(".plugin"), false));
900 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
901 ASCIIToUTF16(".plugin"), false));
902 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
903 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
904 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
905 ASCIIToUTF16(".plugin"), true));
906 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
907 ASCIIToUTF16(".plugin"), false));
908 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
909 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
910 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
911 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
912 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
913 ASCIIToUTF16(".plugin"), false));
914 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
915 EXPECT_TRUE(EndsWith(string16(), string16(), false));
916 EXPECT_TRUE(EndsWith(string16(), string16(), true));
919 TEST(StringUtilTest
, GetStringFWithOffsets
) {
920 std::vector
<string16
> subst
;
921 subst
.push_back(ASCIIToUTF16("1"));
922 subst
.push_back(ASCIIToUTF16("2"));
923 std::vector
<size_t> offsets
;
925 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
928 EXPECT_EQ(2U, offsets
.size());
929 EXPECT_EQ(7U, offsets
[0]);
930 EXPECT_EQ(25U, offsets
[1]);
933 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
936 EXPECT_EQ(2U, offsets
.size());
937 EXPECT_EQ(25U, offsets
[0]);
938 EXPECT_EQ(7U, offsets
[1]);
942 TEST(StringUtilTest
, ReplaceStringPlaceholdersTooFew
) {
943 // Test whether replacestringplaceholders works as expected when there
944 // are fewer inputs than outputs.
945 std::vector
<string16
> subst
;
946 subst
.push_back(ASCIIToUTF16("9a"));
947 subst
.push_back(ASCIIToUTF16("8b"));
948 subst
.push_back(ASCIIToUTF16("7c"));
951 ReplaceStringPlaceholders(
952 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst
, NULL
);
954 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
957 TEST(StringUtilTest
, ReplaceStringPlaceholders
) {
958 std::vector
<string16
> subst
;
959 subst
.push_back(ASCIIToUTF16("9a"));
960 subst
.push_back(ASCIIToUTF16("8b"));
961 subst
.push_back(ASCIIToUTF16("7c"));
962 subst
.push_back(ASCIIToUTF16("6d"));
963 subst
.push_back(ASCIIToUTF16("5e"));
964 subst
.push_back(ASCIIToUTF16("4f"));
965 subst
.push_back(ASCIIToUTF16("3g"));
966 subst
.push_back(ASCIIToUTF16("2h"));
967 subst
.push_back(ASCIIToUTF16("1i"));
970 ReplaceStringPlaceholders(
971 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst
, NULL
);
973 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
976 TEST(StringUtilTest
, ReplaceStringPlaceholdersMoreThan9Replacements
) {
977 std::vector
<string16
> subst
;
978 subst
.push_back(ASCIIToUTF16("9a"));
979 subst
.push_back(ASCIIToUTF16("8b"));
980 subst
.push_back(ASCIIToUTF16("7c"));
981 subst
.push_back(ASCIIToUTF16("6d"));
982 subst
.push_back(ASCIIToUTF16("5e"));
983 subst
.push_back(ASCIIToUTF16("4f"));
984 subst
.push_back(ASCIIToUTF16("3g"));
985 subst
.push_back(ASCIIToUTF16("2h"));
986 subst
.push_back(ASCIIToUTF16("1i"));
987 subst
.push_back(ASCIIToUTF16("0j"));
988 subst
.push_back(ASCIIToUTF16("-1k"));
989 subst
.push_back(ASCIIToUTF16("-2l"));
990 subst
.push_back(ASCIIToUTF16("-3m"));
991 subst
.push_back(ASCIIToUTF16("-4n"));
994 ReplaceStringPlaceholders(
995 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
996 "$10j,$11k,$12l,$13m,$14n,$1"), subst
, NULL
);
998 EXPECT_EQ(formatted
, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
999 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
1002 TEST(StringUtilTest
, StdStringReplaceStringPlaceholders
) {
1003 std::vector
<std::string
> subst
;
1004 subst
.push_back("9a");
1005 subst
.push_back("8b");
1006 subst
.push_back("7c");
1007 subst
.push_back("6d");
1008 subst
.push_back("5e");
1009 subst
.push_back("4f");
1010 subst
.push_back("3g");
1011 subst
.push_back("2h");
1012 subst
.push_back("1i");
1014 std::string formatted
=
1015 ReplaceStringPlaceholders(
1016 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst
, NULL
);
1018 EXPECT_EQ(formatted
, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
1021 TEST(StringUtilTest
, ReplaceStringPlaceholdersConsecutiveDollarSigns
) {
1022 std::vector
<std::string
> subst
;
1023 subst
.push_back("a");
1024 subst
.push_back("b");
1025 subst
.push_back("c");
1026 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst
, NULL
),
1030 TEST(StringUtilTest
, MatchPatternTest
) {
1031 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
1032 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
1033 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
1034 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
1035 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
1036 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
1037 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
1038 EXPECT_FALSE(MatchPattern("", "*.*"));
1039 EXPECT_TRUE(MatchPattern("", "*"));
1040 EXPECT_TRUE(MatchPattern("", "?"));
1041 EXPECT_TRUE(MatchPattern("", ""));
1042 EXPECT_FALSE(MatchPattern("Hello", ""));
1043 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
1044 // Stop after a certain recursion depth.
1045 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1047 // Test UTF8 matching.
1048 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1049 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1050 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1051 // Invalid sequences should be handled as a single invalid character.
1052 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1053 // If the pattern has invalid characters, it shouldn't match anything.
1054 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1056 // Test UTF16 character matching.
1057 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1058 UTF8ToUTF16("*.com")));
1059 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1060 UTF8ToUTF16("He??o\\*1*")));
1062 // This test verifies that consecutive wild cards are collapsed into 1
1063 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
1064 // recursion depth).
1065 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
1066 UTF8ToUTF16("He********************************o")));
1069 TEST(StringUtilTest
, LcpyTest
) {
1070 // Test the normal case where we fit in our buffer.
1074 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1075 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1076 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1077 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1080 // Test dst_size == 0, nothing should be written to |dst| and we should
1081 // have the equivalent of strlen(src).
1083 char dst
[2] = {1, 2};
1084 wchar_t wdst
[2] = {1, 2};
1085 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", 0));
1086 EXPECT_EQ(1, dst
[0]);
1087 EXPECT_EQ(2, dst
[1]);
1088 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", 0));
1089 EXPECT_EQ(static_cast<wchar_t>(1), wdst
[0]);
1090 EXPECT_EQ(static_cast<wchar_t>(2), wdst
[1]);
1093 // Test the case were we _just_ competely fit including the null.
1097 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1098 EXPECT_EQ(0, memcmp(dst
, "abcdefg", 8));
1099 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1100 EXPECT_EQ(0, memcmp(wdst
, L
"abcdefg", sizeof(wchar_t) * 8));
1103 // Test the case were we we are one smaller, so we can't fit the null.
1107 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1108 EXPECT_EQ(0, memcmp(dst
, "abcdef", 7));
1109 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1110 EXPECT_EQ(0, memcmp(wdst
, L
"abcdef", sizeof(wchar_t) * 7));
1113 // Test the case were we are just too small.
1117 EXPECT_EQ(7U, base::strlcpy(dst
, "abcdefg", arraysize(dst
)));
1118 EXPECT_EQ(0, memcmp(dst
, "ab", 3));
1119 EXPECT_EQ(7U, base::wcslcpy(wdst
, L
"abcdefg", arraysize(wdst
)));
1120 EXPECT_EQ(0, memcmp(wdst
, L
"ab", sizeof(wchar_t) * 3));
1124 TEST(StringUtilTest
, WprintfFormatPortabilityTest
) {
1125 static const struct {
1126 const wchar_t* input
;
1133 { L
"Hello, %s", false },
1138 { L
"%ls %s", false },
1139 { L
"%s %ls", false },
1140 { L
"%s %ls %s", false },
1142 { L
"%f %F", false },
1143 { L
"%d %D", false },
1144 { L
"%o %O", false },
1145 { L
"%u %U", false },
1146 { L
"%f %d %o %u", true },
1147 { L
"%-8d (%02.1f%)", true },
1148 { L
"% 10s", false },
1151 for (size_t i
= 0; i
< arraysize(cases
); ++i
)
1152 EXPECT_EQ(cases
[i
].portable
, base::IsWprintfFormatPortable(cases
[i
].input
));
1155 TEST(StringUtilTest
, RemoveChars
) {
1156 const char kRemoveChars
[] = "-/+*";
1157 std::string input
= "A-+bc/d!*";
1158 EXPECT_TRUE(RemoveChars(input
, kRemoveChars
, &input
));
1159 EXPECT_EQ("Abcd!", input
);
1161 // No characters match kRemoveChars.
1162 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1163 EXPECT_EQ("Abcd!", input
);
1167 EXPECT_FALSE(RemoveChars(input
, kRemoveChars
, &input
));
1168 EXPECT_EQ(std::string(), input
);
1171 TEST(StringUtilTest
, ReplaceChars
) {
1174 const char* replace_chars
;
1175 const char* replace_with
;
1179 { "", "", "", "", false },
1180 { "test", "", "", "test", false },
1181 { "test", "", "!", "test", false },
1182 { "test", "z", "!", "test", false },
1183 { "test", "e", "!", "t!st", true },
1184 { "test", "e", "!?", "t!?st", true },
1185 { "test", "ez", "!", "t!st", true },
1186 { "test", "zed", "!?", "t!?st", true },
1187 { "test", "t", "!?", "!?es!?", true },
1188 { "test", "et", "!>", "!>!>s!>", true },
1189 { "test", "zest", "!", "!!!!", true },
1190 { "test", "szt", "!", "!e!!", true },
1191 { "test", "t", "test", "testestest", true },
1194 for (size_t i
= 0; i
< arraysize(cases
); ++i
) {
1196 bool result
= ReplaceChars(cases
[i
].input
,
1197 cases
[i
].replace_chars
,
1198 cases
[i
].replace_with
,
1200 EXPECT_EQ(cases
[i
].result
, result
);
1201 EXPECT_EQ(cases
[i
].output
, output
);
1205 TEST(StringUtilTest
, ContainsOnlyChars
) {
1206 // Providing an empty list of characters should return false but for the empty
1208 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1209 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1211 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1212 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1213 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1214 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1215 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1217 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII
));
1218 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII
));
1219 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII
));
1220 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII
));
1221 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII
));
1222 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII
));
1224 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16
));
1225 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16
));
1226 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16
));
1227 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16
));
1228 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16
));
1229 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1233 class WriteIntoTest
: public testing::Test
{
1235 static void WritesCorrectly(size_t num_chars
) {
1237 char kOriginal
[] = "supercali";
1238 strncpy(WriteInto(&buffer
, num_chars
+ 1), kOriginal
, num_chars
);
1239 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1240 // string at the first \0.
1241 EXPECT_EQ(std::string(kOriginal
,
1242 std::min(num_chars
, arraysize(kOriginal
) - 1)),
1243 std::string(buffer
.c_str()));
1244 EXPECT_EQ(num_chars
, buffer
.size());
1248 TEST_F(WriteIntoTest
, WriteInto
) {
1249 // Validate that WriteInto reserves enough space and
1250 // sizes a string correctly.
1253 WritesCorrectly(5000);
1255 // Validate that WriteInto doesn't modify other strings
1256 // when using a Copy-on-Write implementation.
1257 const char kLive
[] = "live";
1258 const char kDead
[] = "dead";
1259 const std::string live
= kLive
;
1260 std::string dead
= live
;
1261 strncpy(WriteInto(&dead
, 5), kDead
, 4);
1262 EXPECT_EQ(kDead
, dead
);
1263 EXPECT_EQ(4u, dead
.size());
1264 EXPECT_EQ(kLive
, live
);
1265 EXPECT_EQ(4u, live
.size());