base/strings/string_util_unittest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <math.h>
   8 #include <stdarg.h>
   9
  10 #include <algorithm>
  11
  12 #include "base/basictypes.h"
  13 #include "base/strings/string16.h"
  14 #include "base/strings/utf_string_conversions.h"
  15 #include "testing/gmock/include/gmock/gmock.h"
  16 #include "testing/gtest/include/gtest/gtest.h"
  17
  18 using ::testing::ElementsAre;
  19
  20 namespace base {
  21
  22 static const struct trim_case {
  23   const wchar_t* input;
  24   const TrimPositions positions;
  25   const wchar_t* output;
  26   const TrimPositions return_value;
  27 } trim_cases[] = {
  28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  32   {L"", TRIM_ALL, L"", TRIM_NONE},
  33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  38 };
  39
  40 static const struct trim_case_ascii {
  41   const char* input;
  42   const TrimPositions positions;
  43   const char* output;
  44   const TrimPositions return_value;
  45 } trim_cases_ascii[] = {
  46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  50   {"", TRIM_ALL, "", TRIM_NONE},
  51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  53   {"  ", TRIM_ALL, "", TRIM_ALL},
  54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  55 };
  56
  57 namespace {
  58
  59 // Helper used to test TruncateUTF8ToByteSize.
  60 bool Truncated(const std::string& input, const size_t byte_size,
  61                std::string* output) {
  62     size_t prev = input.length();
  63     TruncateUTF8ToByteSize(input, byte_size, output);
  64     return prev != output->length();
  65 }
  66
  67 }  // namespace
  68
  69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  70   std::string output;
  71
  72   // Empty strings and invalid byte_size arguments
  73   EXPECT_FALSE(Truncated(std::string(), 0, &output));
  74   EXPECT_EQ(output, "");
  75   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  76   EXPECT_EQ(output, "");
  77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  79
  80   // Testing the truncation of valid UTF8 correctly
  81   EXPECT_TRUE(Truncated("abc", 2, &output));
  82   EXPECT_EQ(output, "ab");
  83   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  84   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  85   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  86   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  87   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  88   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  89
  90   {
  91     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  92     const std::string array_string(array, arraysize(array));
  93     EXPECT_TRUE(Truncated(array_string, 4, &output));
  94     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  95   }
  96
  97   {
  98     const char array[] = "\x00\xc2\x81\xc2\x81";
  99     const std::string array_string(array, arraysize(array));
 100     EXPECT_TRUE(Truncated(array_string, 4, &output));
 101     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 102   }
 103
 104   // Testing invalid UTF8
 105   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 106   EXPECT_EQ(output.compare(""), 0);
 107   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 108   EXPECT_EQ(output.compare(""), 0);
 109   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 110   EXPECT_EQ(output.compare(""), 0);
 111
 112   // Testing invalid UTF8 mixed with valid UTF8
 113   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 114   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 115   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 116   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 117   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 118               10, &output));
 119   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 120   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 121               10, &output));
 122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 123   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 124   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 125
 126   // Overlong sequences
 127   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 128   EXPECT_EQ(output.compare(""), 0);
 129   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 130   EXPECT_EQ(output.compare(""), 0);
 131   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 132   EXPECT_EQ(output.compare(""), 0);
 133   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 134   EXPECT_EQ(output.compare(""), 0);
 135   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 136   EXPECT_EQ(output.compare(""), 0);
 137   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 138   EXPECT_EQ(output.compare(""), 0);
 139   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 140   EXPECT_EQ(output.compare(""), 0);
 141   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 142   EXPECT_EQ(output.compare(""), 0);
 143   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 144   EXPECT_EQ(output.compare(""), 0);
 145   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 146   EXPECT_EQ(output.compare(""), 0);
 147   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 148   EXPECT_EQ(output.compare(""), 0);
 149
 150   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 151   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 152   EXPECT_EQ(output.compare(""), 0);
 153   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 154   EXPECT_EQ(output.compare(""), 0);
 155   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 156   EXPECT_EQ(output.compare(""), 0);
 157
 158   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 159   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 160   EXPECT_EQ(output.compare(""), 0);
 161   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 162   EXPECT_EQ(output.compare(""), 0);
 163
 164   {
 165     const char array[] = "\x00\x00\xfe\xff";
 166     const std::string array_string(array, arraysize(array));
 167     EXPECT_TRUE(Truncated(array_string, 4, &output));
 168     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 169   }
 170
 171   // Variants on the previous test
 172   {
 173     const char array[] = "\xff\xfe\x00\x00";
 174     const std::string array_string(array, 4);
 175     EXPECT_FALSE(Truncated(array_string, 4, &output));
 176     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 177   }
 178   {
 179     const char array[] = "\xff\x00\x00\xfe";
 180     const std::string array_string(array, arraysize(array));
 181     EXPECT_TRUE(Truncated(array_string, 4, &output));
 182     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 183   }
 184
 185   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 186   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 187   EXPECT_EQ(output.compare(""), 0);
 188   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 189   EXPECT_EQ(output.compare(""), 0);
 190   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 191   EXPECT_EQ(output.compare(""), 0);
 192   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 193   EXPECT_EQ(output.compare(""), 0);
 194   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 195   EXPECT_EQ(output.compare(""), 0);
 196
 197   // Strings in legacy encodings that are valid in UTF-8, but
 198   // are invalid as UTF-8 in real data.
 199   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 200   EXPECT_EQ(output.compare("caf"), 0);
 201   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 202   EXPECT_EQ(output.compare(""), 0);
 203   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 204   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 205   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 206               &output));
 207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 208
 209   // Testing using the same string as input and output.
 210   EXPECT_FALSE(Truncated(output, 4, &output));
 211   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 212   EXPECT_TRUE(Truncated(output, 3, &output));
 213   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 214
 215   // "abc" with U+201[CD] in windows-125[0-8]
 216   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 217   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 218
 219   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 220   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 221   EXPECT_EQ(output.compare(""), 0);
 222
 223   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 224   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 225   EXPECT_EQ(output.compare(""), 0);
 226 }
 227
 228 TEST(StringUtilTest, TrimWhitespace) {
 229   string16 output;  // Allow contents to carry over to next testcase
 230   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 231     const trim_case& value = trim_cases[i];
 232     EXPECT_EQ(value.return_value,
 233               TrimWhitespace(WideToUTF16(value.input), value.positions,
 234                              &output));
 235     EXPECT_EQ(WideToUTF16(value.output), output);
 236   }
 237
 238   // Test that TrimWhitespace() can take the same string for input and output
 239   output = ASCIIToUTF16("  This is a test \r\n");
 240   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 241   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 242
 243   // Once more, but with a string of whitespace
 244   output = ASCIIToUTF16("  \r\n");
 245   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 246   EXPECT_EQ(string16(), output);
 247
 248   std::string output_ascii;
 249   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 250     const trim_case_ascii& value = trim_cases_ascii[i];
 251     EXPECT_EQ(value.return_value,
 252               TrimWhitespace(value.input, value.positions, &output_ascii));
 253     EXPECT_EQ(value.output, output_ascii);
 254   }
 255 }
 256
 257 static const struct collapse_case {
 258   const wchar_t* input;
 259   const bool trim;
 260   const wchar_t* output;
 261 } collapse_cases[] = {
 262   {L" Google Video ", false, L"Google Video"},
 263   {L"Google Video", false, L"Google Video"},
 264   {L"", false, L""},
 265   {L"  ", false, L""},
 266   {L"\t\rTest String\n", false, L"Test String"},
 267   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 268   {L"    Test     \n  \t String    ", false, L"Test String"},
 269   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 270   {L"   Test String", false, L"Test String"},
 271   {L"Test String    ", false, L"Test String"},
 272   {L"Test String", false, L"Test String"},
 273   {L"", true, L""},
 274   {L"\n", true, L""},
 275   {L"  \r  ", true, L""},
 276   {L"\nFoo", true, L"Foo"},
 277   {L"\r  Foo  ", true, L"Foo"},
 278   {L" Foo bar ", true, L"Foo bar"},
 279   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 280   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 281 };
 282
 283 TEST(StringUtilTest, CollapseWhitespace) {
 284   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 285     const collapse_case& value = collapse_cases[i];
 286     EXPECT_EQ(WideToUTF16(value.output),
 287               CollapseWhitespace(WideToUTF16(value.input), value.trim));
 288   }
 289 }
 290
 291 static const struct collapse_case_ascii {
 292   const char* input;
 293   const bool trim;
 294   const char* output;
 295 } collapse_cases_ascii[] = {
 296   {" Google Video ", false, "Google Video"},
 297   {"Google Video", false, "Google Video"},
 298   {"", false, ""},
 299   {"  ", false, ""},
 300   {"\t\rTest String\n", false, "Test String"},
 301   {"    Test     \n  \t String    ", false, "Test String"},
 302   {"   Test String", false, "Test String"},
 303   {"Test String    ", false, "Test String"},
 304   {"Test String", false, "Test String"},
 305   {"", true, ""},
 306   {"\n", true, ""},
 307   {"  \r  ", true, ""},
 308   {"\nFoo", true, "Foo"},
 309   {"\r  Foo  ", true, "Foo"},
 310   {" Foo bar ", true, "Foo bar"},
 311   {"  \tFoo  bar  \n", true, "Foo bar"},
 312   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 313 };
 314
 315 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 316   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 317     const collapse_case_ascii& value = collapse_cases_ascii[i];
 318     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 319   }
 320 }
 321
 322 TEST(StringUtilTest, IsStringUTF8) {
 323   EXPECT_TRUE(IsStringUTF8("abc"));
 324   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 325   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 326   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 327   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 328   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 329
 330   // surrogate code points
 331   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 332   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 333   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 334
 335   // overlong sequences
 336   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 337   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 338   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 339   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 340   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 341   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 342   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 343   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 344   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 345   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 346   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 347
 348   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 349   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 350   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 351   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 352
 353   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 354   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 355   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 356   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 357   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 358
 359   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 360   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 361   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 362   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 363   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 364   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 365   // Strings in legacy encodings. We can certainly make up strings
 366   // in a legacy encoding that are valid in UTF-8, but in real data,
 367   // most of them are invalid as UTF-8.
 368   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 369   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 370   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 371   // "abc" with U+201[CD] in windows-125[0-8]
 372   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 373   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 374   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 375   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 376   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 377
 378   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 379   // representation, and the second uses a 2-byte sequence. The second version
 380   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 381   // given codepoint must be used.
 382   static const char kEmbeddedNull[] = "embedded\0null";
 383   EXPECT_TRUE(IsStringUTF8(
 384       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 385   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 386 }
 387
 388 TEST(StringUtilTest, ConvertASCII) {
 389   static const char* char_cases[] = {
 390     "Google Video",
 391     "Hello, world\n",
 392     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 393   };
 394
 395   static const wchar_t* const wchar_cases[] = {
 396     L"Google Video",
 397     L"Hello, world\n",
 398     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 399   };
 400
 401   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 402     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 403     string16 utf16 = ASCIIToUTF16(char_cases[i]);
 404     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
 405
 406     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
 407     EXPECT_EQ(char_cases[i], ascii);
 408   }
 409
 410   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 411
 412   // Convert empty strings.
 413   string16 empty16;
 414   std::string empty;
 415   EXPECT_EQ(empty, UTF16ToASCII(empty16));
 416   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
 417
 418   // Convert strings with an embedded NUL character.
 419   const char chars_with_nul[] = "test\0string";
 420   const int length_with_nul = arraysize(chars_with_nul) - 1;
 421   std::string string_with_nul(chars_with_nul, length_with_nul);
 422   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 423   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 424             wide_with_nul.length());
 425   std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
 426   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 427             narrow_with_nul.length());
 428   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 429 }
 430
 431 TEST(StringUtilTest, ToUpperASCII) {
 432   EXPECT_EQ('C', ToUpperASCII('C'));
 433   EXPECT_EQ('C', ToUpperASCII('c'));
 434   EXPECT_EQ('2', ToUpperASCII('2'));
 435
 436   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 437   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 438   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 439
 440   std::string in_place_a("Cc2");
 441   StringToUpperASCII(&in_place_a);
 442   EXPECT_EQ("CC2", in_place_a);
 443
 444   std::wstring in_place_w(L"Cc2");
 445   StringToUpperASCII(&in_place_w);
 446   EXPECT_EQ(L"CC2", in_place_w);
 447
 448   std::string original_a("Cc2");
 449   std::string upper_a = StringToUpperASCII(original_a);
 450   EXPECT_EQ("CC2", upper_a);
 451
 452   std::wstring original_w(L"Cc2");
 453   std::wstring upper_w = StringToUpperASCII(original_w);
 454   EXPECT_EQ(L"CC2", upper_w);
 455 }
 456
 457 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 458   static const struct {
 459     const char*    src_a;
 460     const char*    dst;
 461   } lowercase_cases[] = {
 462     { "FoO", "foo" },
 463     { "foo", "foo" },
 464     { "FOO", "foo" },
 465   };
 466
 467   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 468     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
 469                                      lowercase_cases[i].dst));
 470     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 471                                      lowercase_cases[i].dst));
 472   }
 473 }
 474
 475 TEST(StringUtilTest, FormatBytesUnlocalized) {
 476   static const struct {
 477     int64 bytes;
 478     const char* expected;
 479   } cases[] = {
 480     // Expected behavior: we show one post-decimal digit when we have
 481     // under two pre-decimal digits, except in cases where it makes no
 482     // sense (zero or bytes).
 483     // Since we switch units once we cross the 1000 mark, this keeps
 484     // the display of file sizes or bytes consistently around three
 485     // digits.
 486     {0, "0 B"},
 487     {512, "512 B"},
 488     {1024*1024, "1.0 MB"},
 489     {1024*1024*1024, "1.0 GB"},
 490     {10LL*1024*1024*1024, "10.0 GB"},
 491     {99LL*1024*1024*1024, "99.0 GB"},
 492     {105LL*1024*1024*1024, "105 GB"},
 493     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 494     {~(1LL<<63), "8192 PB"},
 495
 496     {99*1024 + 103, "99.1 kB"},
 497     {1024*1024 + 103, "1.0 MB"},
 498     {1024*1024 + 205 * 1024, "1.2 MB"},
 499     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 500     {10LL*1024*1024*1024, "10.0 GB"},
 501     {100LL*1024*1024*1024, "100 GB"},
 502   };
 503
 504   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 505     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 506               FormatBytesUnlocalized(cases[i].bytes));
 507   }
 508 }
 509 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 510   static const struct {
 511     const char* str;
 512     string16::size_type start_offset;
 513     const char* find_this;
 514     const char* replace_with;
 515     const char* expected;
 516   } cases[] = {
 517     {"aaa", 0, "a", "b", "bbb"},
 518     {"abb", 0, "ab", "a", "ab"},
 519     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 520     {"Not found", 0, "x", "0", "Not found"},
 521     {"Not found again", 5, "x", "0", "Not found again"},
 522     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 523      "Four score and seven years agoMakingFour score and seven years agoit"
 524      "Four score and seven years agomuchFour score and seven years agolonger"
 525      "Four score and seven years ago"},
 526     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 527     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 528     {"abababab", 2, "ab", "c", "abccc"},
 529   };
 530
 531   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 532     string16 str = ASCIIToUTF16(cases[i].str);
 533     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 534                                  ASCIIToUTF16(cases[i].find_this),
 535                                  ASCIIToUTF16(cases[i].replace_with));
 536     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 537   }
 538 }
 539
 540 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 541   static const struct {
 542     const char* str;
 543     string16::size_type start_offset;
 544     const char* find_this;
 545     const char* replace_with;
 546     const char* expected;
 547   } cases[] = {
 548     {"aaa", 0, "a", "b", "baa"},
 549     {"abb", 0, "ab", "a", "ab"},
 550     {"Removing some substrings inging", 0, "ing", "",
 551       "Remov some substrings inging"},
 552     {"Not found", 0, "x", "0", "Not found"},
 553     {"Not found again", 5, "x", "0", "Not found again"},
 554     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 555      "Four score and seven years agoMaking it much longer "},
 556     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 557     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 558     {"abababab", 2, "ab", "c", "abcabab"},
 559   };
 560
 561   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 562     string16 str = ASCIIToUTF16(cases[i].str);
 563     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 564                                      ASCIIToUTF16(cases[i].find_this),
 565                                      ASCIIToUTF16(cases[i].replace_with));
 566     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 567   }
 568 }
 569
 570 TEST(StringUtilTest, HexDigitToInt) {
 571   EXPECT_EQ(0, HexDigitToInt('0'));
 572   EXPECT_EQ(1, HexDigitToInt('1'));
 573   EXPECT_EQ(2, HexDigitToInt('2'));
 574   EXPECT_EQ(3, HexDigitToInt('3'));
 575   EXPECT_EQ(4, HexDigitToInt('4'));
 576   EXPECT_EQ(5, HexDigitToInt('5'));
 577   EXPECT_EQ(6, HexDigitToInt('6'));
 578   EXPECT_EQ(7, HexDigitToInt('7'));
 579   EXPECT_EQ(8, HexDigitToInt('8'));
 580   EXPECT_EQ(9, HexDigitToInt('9'));
 581   EXPECT_EQ(10, HexDigitToInt('A'));
 582   EXPECT_EQ(11, HexDigitToInt('B'));
 583   EXPECT_EQ(12, HexDigitToInt('C'));
 584   EXPECT_EQ(13, HexDigitToInt('D'));
 585   EXPECT_EQ(14, HexDigitToInt('E'));
 586   EXPECT_EQ(15, HexDigitToInt('F'));
 587
 588   // Verify the lower case as well.
 589   EXPECT_EQ(10, HexDigitToInt('a'));
 590   EXPECT_EQ(11, HexDigitToInt('b'));
 591   EXPECT_EQ(12, HexDigitToInt('c'));
 592   EXPECT_EQ(13, HexDigitToInt('d'));
 593   EXPECT_EQ(14, HexDigitToInt('e'));
 594   EXPECT_EQ(15, HexDigitToInt('f'));
 595 }
 596
 597 // This checks where we can use the assignment operator for a va_list. We need
 598 // a way to do this since Visual C doesn't support va_copy, but assignment on
 599 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 600 // capability.
 601 static void VariableArgsFunc(const char* format, ...) {
 602   va_list org;
 603   va_start(org, format);
 604
 605   va_list dup;
 606   GG_VA_COPY(dup, org);
 607   int i1 = va_arg(org, int);
 608   int j1 = va_arg(org, int);
 609   char* s1 = va_arg(org, char*);
 610   double d1 = va_arg(org, double);
 611   va_end(org);
 612
 613   int i2 = va_arg(dup, int);
 614   int j2 = va_arg(dup, int);
 615   char* s2 = va_arg(dup, char*);
 616   double d2 = va_arg(dup, double);
 617
 618   EXPECT_EQ(i1, i2);
 619   EXPECT_EQ(j1, j2);
 620   EXPECT_STREQ(s1, s2);
 621   EXPECT_EQ(d1, d2);
 622
 623   va_end(dup);
 624 }
 625
 626 TEST(StringUtilTest, VAList) {
 627   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 628 }
 629
 630 // Test for Tokenize
 631 template <typename STR>
 632 void TokenizeTest() {
 633   std::vector<STR> r;
 634   size_t size;
 635
 636   size = Tokenize(STR("This is a string"), STR(" "), &r);
 637   EXPECT_EQ(4U, size);
 638   ASSERT_EQ(4U, r.size());
 639   EXPECT_EQ(r[0], STR("This"));
 640   EXPECT_EQ(r[1], STR("is"));
 641   EXPECT_EQ(r[2], STR("a"));
 642   EXPECT_EQ(r[3], STR("string"));
 643   r.clear();
 644
 645   size = Tokenize(STR("one,two,three"), STR(","), &r);
 646   EXPECT_EQ(3U, size);
 647   ASSERT_EQ(3U, r.size());
 648   EXPECT_EQ(r[0], STR("one"));
 649   EXPECT_EQ(r[1], STR("two"));
 650   EXPECT_EQ(r[2], STR("three"));
 651   r.clear();
 652
 653   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 654   EXPECT_EQ(3U, size);
 655   ASSERT_EQ(3U, r.size());
 656   EXPECT_EQ(r[0], STR("one"));
 657   EXPECT_EQ(r[1], STR("two"));
 658   EXPECT_EQ(r[2], STR("three;four"));
 659   r.clear();
 660
 661   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 662   EXPECT_EQ(4U, size);
 663   ASSERT_EQ(4U, r.size());
 664   EXPECT_EQ(r[0], STR("one"));
 665   EXPECT_EQ(r[1], STR("two"));
 666   EXPECT_EQ(r[2], STR("three"));
 667   EXPECT_EQ(r[3], STR("four"));
 668   r.clear();
 669
 670   size = Tokenize(STR("one, two, three"), STR(","), &r);
 671   EXPECT_EQ(3U, size);
 672   ASSERT_EQ(3U, r.size());
 673   EXPECT_EQ(r[0], STR("one"));
 674   EXPECT_EQ(r[1], STR(" two"));
 675   EXPECT_EQ(r[2], STR(" three"));
 676   r.clear();
 677
 678   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 679   EXPECT_EQ(4U, size);
 680   ASSERT_EQ(4U, r.size());
 681   EXPECT_EQ(r[0], STR("one"));
 682   EXPECT_EQ(r[1], STR(" two"));
 683   EXPECT_EQ(r[2], STR(" three"));
 684   EXPECT_EQ(r[3], STR(" "));
 685   r.clear();
 686
 687   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 688   EXPECT_EQ(3U, size);
 689   ASSERT_EQ(3U, r.size());
 690   EXPECT_EQ(r[0], STR("one"));
 691   EXPECT_EQ(r[1], STR(" two"));
 692   EXPECT_EQ(r[2], STR(" three"));
 693   r.clear();
 694
 695   size = Tokenize(STR(), STR(","), &r);
 696   EXPECT_EQ(0U, size);
 697   ASSERT_EQ(0U, r.size());
 698   r.clear();
 699
 700   size = Tokenize(STR(","), STR(","), &r);
 701   EXPECT_EQ(0U, size);
 702   ASSERT_EQ(0U, r.size());
 703   r.clear();
 704
 705   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 706   EXPECT_EQ(0U, size);
 707   ASSERT_EQ(0U, r.size());
 708   r.clear();
 709
 710   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 711   EXPECT_EQ(1U, size);
 712   ASSERT_EQ(1U, r.size());
 713   EXPECT_EQ(r[0], STR("a"));
 714   r.clear();
 715
 716   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 717   EXPECT_EQ(2U, size);
 718   ASSERT_EQ(2U, r.size());
 719   EXPECT_EQ(r[0], STR("\ta\t"));
 720   EXPECT_EQ(r[1], STR("b\tcc"));
 721   r.clear();
 722 }
 723
 724 TEST(StringUtilTest, TokenizeStdString) {
 725   TokenizeTest<std::string>();
 726 }
 727
 728 TEST(StringUtilTest, TokenizeStringPiece) {
 729   TokenizeTest<base::StringPiece>();
 730 }
 731
 732 // Test for JoinString
 733 TEST(StringUtilTest, JoinString) {
 734   std::vector<std::string> in;
 735   EXPECT_EQ("", JoinString(in, ','));
 736
 737   in.push_back("a");
 738   EXPECT_EQ("a", JoinString(in, ','));
 739
 740   in.push_back("b");
 741   in.push_back("c");
 742   EXPECT_EQ("a,b,c", JoinString(in, ','));
 743
 744   in.push_back(std::string());
 745   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 746   in.push_back(" ");
 747   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 748 }
 749
 750 // Test for JoinString overloaded with std::string separator
 751 TEST(StringUtilTest, JoinStringWithString) {
 752   std::string separator(", ");
 753   std::vector<std::string> parts;
 754   EXPECT_EQ(std::string(), JoinString(parts, separator));
 755
 756   parts.push_back("a");
 757   EXPECT_EQ("a", JoinString(parts, separator));
 758
 759   parts.push_back("b");
 760   parts.push_back("c");
 761   EXPECT_EQ("a, b, c", JoinString(parts, separator));
 762
 763   parts.push_back(std::string());
 764   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
 765   parts.push_back(" ");
 766   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
 767 }
 768
 769 // Test for JoinString overloaded with string16 separator
 770 TEST(StringUtilTest, JoinStringWithString16) {
 771   string16 separator = ASCIIToUTF16(", ");
 772   std::vector<string16> parts;
 773   EXPECT_EQ(string16(), JoinString(parts, separator));
 774
 775   parts.push_back(ASCIIToUTF16("a"));
 776   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
 777
 778   parts.push_back(ASCIIToUTF16("b"));
 779   parts.push_back(ASCIIToUTF16("c"));
 780   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
 781
 782   parts.push_back(ASCIIToUTF16(""));
 783   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
 784   parts.push_back(ASCIIToUTF16(" "));
 785   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
 786 }
 787
 788 TEST(StringUtilTest, StartsWith) {
 789   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 790   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 791   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 792   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 793   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 794   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 795   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
 796   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
 797   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
 798   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
 799
 800   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 801                          ASCIIToUTF16("javascript"), true));
 802   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 803                           ASCIIToUTF16("javascript"), true));
 804   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 805                          ASCIIToUTF16("javascript"), false));
 806   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 807                          ASCIIToUTF16("javascript"), false));
 808   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 809                           ASCIIToUTF16("javascript"), true));
 810   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 811                           ASCIIToUTF16("javascript"), false));
 812   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
 813   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
 814   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
 815   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
 816 }
 817
 818 TEST(StringUtilTest, EndsWith) {
 819   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 820                        ASCIIToUTF16(".plugin"), true));
 821   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 822                         ASCIIToUTF16(".plugin"), true));
 823   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 824                        ASCIIToUTF16(".plugin"), false));
 825   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 826                        ASCIIToUTF16(".plugin"), false));
 827   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
 828   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
 829   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 830                         ASCIIToUTF16(".plugin"), true));
 831   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 832                         ASCIIToUTF16(".plugin"), false));
 833   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
 834   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
 835   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
 836   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
 837   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
 838                        ASCIIToUTF16(".plugin"), false));
 839   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
 840   EXPECT_TRUE(EndsWith(string16(), string16(), false));
 841   EXPECT_TRUE(EndsWith(string16(), string16(), true));
 842 }
 843
 844 TEST(StringUtilTest, GetStringFWithOffsets) {
 845   std::vector<string16> subst;
 846   subst.push_back(ASCIIToUTF16("1"));
 847   subst.push_back(ASCIIToUTF16("2"));
 848   std::vector<size_t> offsets;
 849
 850   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 851                             subst,
 852                             &offsets);
 853   EXPECT_EQ(2U, offsets.size());
 854   EXPECT_EQ(7U, offsets[0]);
 855   EXPECT_EQ(25U, offsets[1]);
 856   offsets.clear();
 857
 858   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 859                             subst,
 860                             &offsets);
 861   EXPECT_EQ(2U, offsets.size());
 862   EXPECT_EQ(25U, offsets[0]);
 863   EXPECT_EQ(7U, offsets[1]);
 864   offsets.clear();
 865 }
 866
 867 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 868   // Test whether replacestringplaceholders works as expected when there
 869   // are fewer inputs than outputs.
 870   std::vector<string16> subst;
 871   subst.push_back(ASCIIToUTF16("9a"));
 872   subst.push_back(ASCIIToUTF16("8b"));
 873   subst.push_back(ASCIIToUTF16("7c"));
 874
 875   string16 formatted =
 876       ReplaceStringPlaceholders(
 877           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 878
 879   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 880 }
 881
 882 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 883   std::vector<string16> subst;
 884   subst.push_back(ASCIIToUTF16("9a"));
 885   subst.push_back(ASCIIToUTF16("8b"));
 886   subst.push_back(ASCIIToUTF16("7c"));
 887   subst.push_back(ASCIIToUTF16("6d"));
 888   subst.push_back(ASCIIToUTF16("5e"));
 889   subst.push_back(ASCIIToUTF16("4f"));
 890   subst.push_back(ASCIIToUTF16("3g"));
 891   subst.push_back(ASCIIToUTF16("2h"));
 892   subst.push_back(ASCIIToUTF16("1i"));
 893
 894   string16 formatted =
 895       ReplaceStringPlaceholders(
 896           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 897
 898   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 899 }
 900
 901 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 902   std::vector<string16> subst;
 903   subst.push_back(ASCIIToUTF16("9a"));
 904   subst.push_back(ASCIIToUTF16("8b"));
 905   subst.push_back(ASCIIToUTF16("7c"));
 906   subst.push_back(ASCIIToUTF16("6d"));
 907   subst.push_back(ASCIIToUTF16("5e"));
 908   subst.push_back(ASCIIToUTF16("4f"));
 909   subst.push_back(ASCIIToUTF16("3g"));
 910   subst.push_back(ASCIIToUTF16("2h"));
 911   subst.push_back(ASCIIToUTF16("1i"));
 912   subst.push_back(ASCIIToUTF16("0j"));
 913   subst.push_back(ASCIIToUTF16("-1k"));
 914   subst.push_back(ASCIIToUTF16("-2l"));
 915   subst.push_back(ASCIIToUTF16("-3m"));
 916   subst.push_back(ASCIIToUTF16("-4n"));
 917
 918   string16 formatted =
 919       ReplaceStringPlaceholders(
 920           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 921                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 922
 923   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 924                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
 925 }
 926
 927 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 928   std::vector<std::string> subst;
 929   subst.push_back("9a");
 930   subst.push_back("8b");
 931   subst.push_back("7c");
 932   subst.push_back("6d");
 933   subst.push_back("5e");
 934   subst.push_back("4f");
 935   subst.push_back("3g");
 936   subst.push_back("2h");
 937   subst.push_back("1i");
 938
 939   std::string formatted =
 940       ReplaceStringPlaceholders(
 941           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 942
 943   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 944 }
 945
 946 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 947   std::vector<std::string> subst;
 948   subst.push_back("a");
 949   subst.push_back("b");
 950   subst.push_back("c");
 951   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
 952             "$1 $$2 $$$3");
 953 }
 954
 955 TEST(StringUtilTest, MatchPatternTest) {
 956   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
 957   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
 958   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
 959   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
 960   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
 961   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
 962   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
 963   EXPECT_FALSE(MatchPattern("", "*.*"));
 964   EXPECT_TRUE(MatchPattern("", "*"));
 965   EXPECT_TRUE(MatchPattern("", "?"));
 966   EXPECT_TRUE(MatchPattern("", ""));
 967   EXPECT_FALSE(MatchPattern("Hello", ""));
 968   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
 969   // Stop after a certain recursion depth.
 970   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
 971
 972   // Test UTF8 matching.
 973   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
 974   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
 975   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
 976   // Invalid sequences should be handled as a single invalid character.
 977   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
 978   // If the pattern has invalid characters, it shouldn't match anything.
 979   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
 980
 981   // Test UTF16 character matching.
 982   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
 983                            UTF8ToUTF16("*.com")));
 984   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
 985                            UTF8ToUTF16("He??o\\*1*")));
 986
 987   // This test verifies that consecutive wild cards are collapsed into 1
 988   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
 989   // recursion depth).
 990   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
 991                            UTF8ToUTF16("He********************************o")));
 992 }
 993
 994 TEST(StringUtilTest, LcpyTest) {
 995   // Test the normal case where we fit in our buffer.
 996   {
 997     char dst[10];
 998     wchar_t wdst[10];
 999     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1000     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1001     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1002     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1003   }
1004
1005   // Test dst_size == 0, nothing should be written to |dst| and we should
1006   // have the equivalent of strlen(src).
1007   {
1008     char dst[2] = {1, 2};
1009     wchar_t wdst[2] = {1, 2};
1010     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1011     EXPECT_EQ(1, dst[0]);
1012     EXPECT_EQ(2, dst[1]);
1013     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1014     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1015     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1016   }
1017
1018   // Test the case were we _just_ competely fit including the null.
1019   {
1020     char dst[8];
1021     wchar_t wdst[8];
1022     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1023     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1024     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1025     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1026   }
1027
1028   // Test the case were we we are one smaller, so we can't fit the null.
1029   {
1030     char dst[7];
1031     wchar_t wdst[7];
1032     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1033     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1034     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1035     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1036   }
1037
1038   // Test the case were we are just too small.
1039   {
1040     char dst[3];
1041     wchar_t wdst[3];
1042     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1043     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1044     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1045     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1046   }
1047 }
1048
1049 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1050   static const struct {
1051     const wchar_t* input;
1052     bool portable;
1053   } cases[] = {
1054     { L"%ls", true },
1055     { L"%s", false },
1056     { L"%S", false },
1057     { L"%lS", false },
1058     { L"Hello, %s", false },
1059     { L"%lc", true },
1060     { L"%c", false },
1061     { L"%C", false },
1062     { L"%lC", false },
1063     { L"%ls %s", false },
1064     { L"%s %ls", false },
1065     { L"%s %ls %s", false },
1066     { L"%f", true },
1067     { L"%f %F", false },
1068     { L"%d %D", false },
1069     { L"%o %O", false },
1070     { L"%u %U", false },
1071     { L"%f %d %o %u", true },
1072     { L"%-8d (%02.1f%)", true },
1073     { L"% 10s", false },
1074     { L"% 10ls", true }
1075   };
1076   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1077     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1078 }
1079
1080 TEST(StringUtilTest, RemoveChars) {
1081   const char* kRemoveChars = "-/+*";
1082   std::string input = "A-+bc/d!*";
1083   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1084   EXPECT_EQ("Abcd!", input);
1085
1086   // No characters match kRemoveChars.
1087   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1088   EXPECT_EQ("Abcd!", input);
1089
1090   // Empty string.
1091   input.clear();
1092   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1093   EXPECT_EQ(std::string(), input);
1094 }
1095
1096 TEST(StringUtilTest, ReplaceChars) {
1097   struct TestData {
1098     const char* input;
1099     const char* replace_chars;
1100     const char* replace_with;
1101     const char* output;
1102     bool result;
1103   } cases[] = {
1104     { "", "", "", "", false },
1105     { "test", "", "", "test", false },
1106     { "test", "", "!", "test", false },
1107     { "test", "z", "!", "test", false },
1108     { "test", "e", "!", "t!st", true },
1109     { "test", "e", "!?", "t!?st", true },
1110     { "test", "ez", "!", "t!st", true },
1111     { "test", "zed", "!?", "t!?st", true },
1112     { "test", "t", "!?", "!?es!?", true },
1113     { "test", "et", "!>", "!>!>s!>", true },
1114     { "test", "zest", "!", "!!!!", true },
1115     { "test", "szt", "!", "!e!!", true },
1116     { "test", "t", "test", "testestest", true },
1117   };
1118
1119   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1120     std::string output;
1121     bool result = ReplaceChars(cases[i].input,
1122                                cases[i].replace_chars,
1123                                cases[i].replace_with,
1124                                &output);
1125     EXPECT_EQ(cases[i].result, result);
1126     EXPECT_EQ(cases[i].output, output);
1127   }
1128 }
1129
1130 TEST(StringUtilTest, ContainsOnlyChars) {
1131   // Providing an empty list of characters should return false but for the empty
1132   // string.
1133   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1134   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1135
1136   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1137   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1138   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1139   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1140   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1141
1142   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1143   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1144   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1145   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1146   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1147   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1148
1149   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1150   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1151   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1152   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1153   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1154   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1155                                   kWhitespaceUTF16));
1156 }
1157
1158 class WriteIntoTest : public testing::Test {
1159  protected:
1160   static void WritesCorrectly(size_t num_chars) {
1161     std::string buffer;
1162     char kOriginal[] = "supercali";
1163     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1164     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1165     // string at the first \0.
1166     EXPECT_EQ(std::string(kOriginal,
1167                           std::min(num_chars, arraysize(kOriginal) - 1)),
1168               std::string(buffer.c_str()));
1169     EXPECT_EQ(num_chars, buffer.size());
1170   }
1171 };
1172
1173 TEST_F(WriteIntoTest, WriteInto) {
1174   // Validate that WriteInto reserves enough space and
1175   // sizes a string correctly.
1176   WritesCorrectly(1);
1177   WritesCorrectly(2);
1178   WritesCorrectly(5000);
1179
1180   // Validate that WriteInto doesn't modify other strings
1181   // when using a Copy-on-Write implementation.
1182   const char kLive[] = "live";
1183   const char kDead[] = "dead";
1184   const std::string live = kLive;
1185   std::string dead = live;
1186   strncpy(WriteInto(&dead, 5), kDead, 4);
1187   EXPECT_EQ(kDead, dead);
1188   EXPECT_EQ(4u, dead.size());
1189   EXPECT_EQ(kLive, live);
1190   EXPECT_EQ(4u, live.size());
1191 }
1192
1193 }  // namespace base