base/string_util_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <stdarg.h>
   7
   8 #include <limits>
   9 #include <sstream>
  10
  11 #include "base/basictypes.h"
  12 #include "base/string16.h"
  13 #include "base/string_util.h"
  14 #include "base/utf_string_conversions.h"
  15 #include "testing/gmock/include/gmock/gmock.h"
  16 #include "testing/gtest/include/gtest/gtest.h"
  17
  18 using ::testing::ElementsAre;
  19
  20 namespace base {
  21
  22 static const struct trim_case {
  23   const wchar_t* input;
  24   const TrimPositions positions;
  25   const wchar_t* output;
  26   const TrimPositions return_value;
  27 } trim_cases[] = {
  28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  32   {L"", TRIM_ALL, L"", TRIM_NONE},
  33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  38 };
  39
  40 static const struct trim_case_ascii {
  41   const char* input;
  42   const TrimPositions positions;
  43   const char* output;
  44   const TrimPositions return_value;
  45 } trim_cases_ascii[] = {
  46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  50   {"", TRIM_ALL, "", TRIM_NONE},
  51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  53   {"  ", TRIM_ALL, "", TRIM_ALL},
  54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  55 };
  56
  57 namespace {
  58
  59 // Helper used to test TruncateUTF8ToByteSize.
  60 bool Truncated(const std::string& input, const size_t byte_size,
  61                std::string* output) {
  62     size_t prev = input.length();
  63     TruncateUTF8ToByteSize(input, byte_size, output);
  64     return prev != output->length();
  65 }
  66
  67 }  // namespace
  68
  69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  70   std::string output;
  71
  72   // Empty strings and invalid byte_size arguments
  73   EXPECT_FALSE(Truncated("", 0, &output));
  74   EXPECT_EQ(output, "");
  75   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  76   EXPECT_EQ(output, "");
  77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  79
  80   // Testing the truncation of valid UTF8 correctly
  81   EXPECT_TRUE(Truncated("abc", 2, &output));
  82   EXPECT_EQ(output, "ab");
  83   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  84   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  85   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  86   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  87   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  88   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  89
  90   {
  91     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  92     const std::string array_string(array, arraysize(array));
  93     EXPECT_TRUE(Truncated(array_string, 4, &output));
  94     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  95   }
  96
  97   {
  98     const char array[] = "\x00\xc2\x81\xc2\x81";
  99     const std::string array_string(array, arraysize(array));
 100     EXPECT_TRUE(Truncated(array_string, 4, &output));
 101     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 102   }
 103
 104   // Testing invalid UTF8
 105   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 106   EXPECT_EQ(output.compare(""), 0);
 107   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 108   EXPECT_EQ(output.compare(""), 0);
 109   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 110   EXPECT_EQ(output.compare(""), 0);
 111
 112   // Testing invalid UTF8 mixed with valid UTF8
 113   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 114   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 115   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 116   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 117   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 118               10, &output));
 119   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 120   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 121               10, &output));
 122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 123   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 124   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 125
 126   // Overlong sequences
 127   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 128   EXPECT_EQ(output.compare(""), 0);
 129   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 130   EXPECT_EQ(output.compare(""), 0);
 131   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 132   EXPECT_EQ(output.compare(""), 0);
 133   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 134   EXPECT_EQ(output.compare(""), 0);
 135   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 136   EXPECT_EQ(output.compare(""), 0);
 137   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 138   EXPECT_EQ(output.compare(""), 0);
 139   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 140   EXPECT_EQ(output.compare(""), 0);
 141   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 142   EXPECT_EQ(output.compare(""), 0);
 143   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 144   EXPECT_EQ(output.compare(""), 0);
 145   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 146   EXPECT_EQ(output.compare(""), 0);
 147   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 148   EXPECT_EQ(output.compare(""), 0);
 149
 150   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 151   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 152   EXPECT_EQ(output.compare(""), 0);
 153   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 154   EXPECT_EQ(output.compare(""), 0);
 155   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 156   EXPECT_EQ(output.compare(""), 0);
 157
 158   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 159   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 160   EXPECT_EQ(output.compare(""), 0);
 161   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 162   EXPECT_EQ(output.compare(""), 0);
 163
 164   {
 165     const char array[] = "\x00\x00\xfe\xff";
 166     const std::string array_string(array, arraysize(array));
 167     EXPECT_TRUE(Truncated(array_string, 4, &output));
 168     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 169   }
 170
 171   // Variants on the previous test
 172   {
 173     const char array[] = "\xff\xfe\x00\x00";
 174     const std::string array_string(array, 4);
 175     EXPECT_FALSE(Truncated(array_string, 4, &output));
 176     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 177   }
 178   {
 179     const char array[] = "\xff\x00\x00\xfe";
 180     const std::string array_string(array, arraysize(array));
 181     EXPECT_TRUE(Truncated(array_string, 4, &output));
 182     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 183   }
 184
 185   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 186   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 187   EXPECT_EQ(output.compare(""), 0);
 188   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 189   EXPECT_EQ(output.compare(""), 0);
 190   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 191   EXPECT_EQ(output.compare(""), 0);
 192   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 193   EXPECT_EQ(output.compare(""), 0);
 194   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 195   EXPECT_EQ(output.compare(""), 0);
 196
 197   // Strings in legacy encodings that are valid in UTF-8, but
 198   // are invalid as UTF-8 in real data.
 199   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 200   EXPECT_EQ(output.compare("caf"), 0);
 201   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 202   EXPECT_EQ(output.compare(""), 0);
 203   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 204   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 205   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 206               &output));
 207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 208
 209   // Testing using the same string as input and output.
 210   EXPECT_FALSE(Truncated(output, 4, &output));
 211   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 212   EXPECT_TRUE(Truncated(output, 3, &output));
 213   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 214
 215   // "abc" with U+201[CD] in windows-125[0-8]
 216   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 217   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 218
 219   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 220   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 221   EXPECT_EQ(output.compare(""), 0);
 222
 223   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 224   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 225   EXPECT_EQ(output.compare(""), 0);
 226 }
 227
 228 TEST(StringUtilTest, TrimWhitespace) {
 229   string16 output;  // Allow contents to carry over to next testcase
 230   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 231     const trim_case& value = trim_cases[i];
 232     EXPECT_EQ(value.return_value,
 233               TrimWhitespace(WideToUTF16(value.input), value.positions,
 234                              &output));
 235     EXPECT_EQ(WideToUTF16(value.output), output);
 236   }
 237
 238   // Test that TrimWhitespace() can take the same string for input and output
 239   output = ASCIIToUTF16("  This is a test \r\n");
 240   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 241   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 242
 243   // Once more, but with a string of whitespace
 244   output = ASCIIToUTF16("  \r\n");
 245   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 246   EXPECT_EQ(string16(), output);
 247
 248   std::string output_ascii;
 249   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 250     const trim_case_ascii& value = trim_cases_ascii[i];
 251     EXPECT_EQ(value.return_value,
 252               TrimWhitespace(value.input, value.positions, &output_ascii));
 253     EXPECT_EQ(value.output, output_ascii);
 254   }
 255 }
 256
 257 static const struct collapse_case {
 258   const wchar_t* input;
 259   const bool trim;
 260   const wchar_t* output;
 261 } collapse_cases[] = {
 262   {L" Google Video ", false, L"Google Video"},
 263   {L"Google Video", false, L"Google Video"},
 264   {L"", false, L""},
 265   {L"  ", false, L""},
 266   {L"\t\rTest String\n", false, L"Test String"},
 267   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 268   {L"    Test     \n  \t String    ", false, L"Test String"},
 269   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 270   {L"   Test String", false, L"Test String"},
 271   {L"Test String    ", false, L"Test String"},
 272   {L"Test String", false, L"Test String"},
 273   {L"", true, L""},
 274   {L"\n", true, L""},
 275   {L"  \r  ", true, L""},
 276   {L"\nFoo", true, L"Foo"},
 277   {L"\r  Foo  ", true, L"Foo"},
 278   {L" Foo bar ", true, L"Foo bar"},
 279   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 280   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 281 };
 282
 283 TEST(StringUtilTest, CollapseWhitespace) {
 284   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 285     const collapse_case& value = collapse_cases[i];
 286     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
 287   }
 288 }
 289
 290 static const struct collapse_case_ascii {
 291   const char* input;
 292   const bool trim;
 293   const char* output;
 294 } collapse_cases_ascii[] = {
 295   {" Google Video ", false, "Google Video"},
 296   {"Google Video", false, "Google Video"},
 297   {"", false, ""},
 298   {"  ", false, ""},
 299   {"\t\rTest String\n", false, "Test String"},
 300   {"    Test     \n  \t String    ", false, "Test String"},
 301   {"   Test String", false, "Test String"},
 302   {"Test String    ", false, "Test String"},
 303   {"Test String", false, "Test String"},
 304   {"", true, ""},
 305   {"\n", true, ""},
 306   {"  \r  ", true, ""},
 307   {"\nFoo", true, "Foo"},
 308   {"\r  Foo  ", true, "Foo"},
 309   {" Foo bar ", true, "Foo bar"},
 310   {"  \tFoo  bar  \n", true, "Foo bar"},
 311   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 312 };
 313
 314 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 315   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 316     const collapse_case_ascii& value = collapse_cases_ascii[i];
 317     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 318   }
 319 }
 320
 321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
 322   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
 323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
 324   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
 325   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
 326   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
 327   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
 328 }
 329
 330 TEST(StringUtilTest, ContainsOnlyWhitespace) {
 331   EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
 332   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
 333   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
 334   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
 335   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
 336   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
 337 }
 338
 339 TEST(StringUtilTest, IsStringUTF8) {
 340   EXPECT_TRUE(IsStringUTF8("abc"));
 341   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 342   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 343   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 344   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 345   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 346
 347   // surrogate code points
 348   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 349   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 350   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 351
 352   // overlong sequences
 353   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 354   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 355   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 356   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 357   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 358   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 359   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 360   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 361   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 362   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 363   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 364
 365   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 366   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 367   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 368   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 369
 370   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 371   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 372   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 373   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 374   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 375
 376   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 377   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 378   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 379   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 380   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 381   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 382   // Strings in legacy encodings. We can certainly make up strings
 383   // in a legacy encoding that are valid in UTF-8, but in real data,
 384   // most of them are invalid as UTF-8.
 385   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 386   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 387   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 388   // "abc" with U+201[CD] in windows-125[0-8]
 389   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 390   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 391   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 392   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 393   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 394
 395   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 396   // representation, and the second uses a 2-byte sequence. The second version
 397   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 398   // given codepoint must be used.
 399   static const char kEmbeddedNull[] = "embedded\0null";
 400   EXPECT_TRUE(IsStringUTF8(
 401       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 402   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 403 }
 404
 405 TEST(StringUtilTest, ConvertASCII) {
 406   static const char* char_cases[] = {
 407     "Google Video",
 408     "Hello, world\n",
 409     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 410   };
 411
 412   static const wchar_t* const wchar_cases[] = {
 413     L"Google Video",
 414     L"Hello, world\n",
 415     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 416   };
 417
 418   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 419     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 420     std::wstring wide = ASCIIToWide(char_cases[i]);
 421     EXPECT_EQ(wchar_cases[i], wide);
 422
 423     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
 424     std::string ascii = WideToASCII(wchar_cases[i]);
 425     EXPECT_EQ(char_cases[i], ascii);
 426   }
 427
 428   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 429   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
 430
 431   // Convert empty strings.
 432   std::wstring wempty;
 433   std::string empty;
 434   EXPECT_EQ(empty, WideToASCII(wempty));
 435   EXPECT_EQ(wempty, ASCIIToWide(empty));
 436
 437   // Convert strings with an embedded NUL character.
 438   const char chars_with_nul[] = "test\0string";
 439   const int length_with_nul = arraysize(chars_with_nul) - 1;
 440   std::string string_with_nul(chars_with_nul, length_with_nul);
 441   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 442   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 443             wide_with_nul.length());
 444   std::string narrow_with_nul = WideToASCII(wide_with_nul);
 445   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 446             narrow_with_nul.length());
 447   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 448 }
 449
 450 TEST(StringUtilTest, ToUpperASCII) {
 451   EXPECT_EQ('C', ToUpperASCII('C'));
 452   EXPECT_EQ('C', ToUpperASCII('c'));
 453   EXPECT_EQ('2', ToUpperASCII('2'));
 454
 455   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 456   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 457   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 458
 459   std::string in_place_a("Cc2");
 460   StringToUpperASCII(&in_place_a);
 461   EXPECT_EQ("CC2", in_place_a);
 462
 463   std::wstring in_place_w(L"Cc2");
 464   StringToUpperASCII(&in_place_w);
 465   EXPECT_EQ(L"CC2", in_place_w);
 466
 467   std::string original_a("Cc2");
 468   std::string upper_a = StringToUpperASCII(original_a);
 469   EXPECT_EQ("CC2", upper_a);
 470
 471   std::wstring original_w(L"Cc2");
 472   std::wstring upper_w = StringToUpperASCII(original_w);
 473   EXPECT_EQ(L"CC2", upper_w);
 474 }
 475
 476 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 477   static const struct {
 478     const wchar_t* src_w;
 479     const char*    src_a;
 480     const char*    dst;
 481   } lowercase_cases[] = {
 482     { L"FoO", "FoO", "foo" },
 483     { L"foo", "foo", "foo" },
 484     { L"FOO", "FOO", "foo" },
 485   };
 486
 487   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 488     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
 489                                      lowercase_cases[i].dst));
 490     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 491                                      lowercase_cases[i].dst));
 492   }
 493 }
 494
 495 TEST(StringUtilTest, FormatBytesUnlocalized) {
 496   static const struct {
 497     int64 bytes;
 498     const char* expected;
 499   } cases[] = {
 500     // Expected behavior: we show one post-decimal digit when we have
 501     // under two pre-decimal digits, except in cases where it makes no
 502     // sense (zero or bytes).
 503     // Since we switch units once we cross the 1000 mark, this keeps
 504     // the display of file sizes or bytes consistently around three
 505     // digits.
 506     {0, "0 B"},
 507     {512, "512 B"},
 508     {1024*1024, "1.0 MB"},
 509     {1024*1024*1024, "1.0 GB"},
 510     {10LL*1024*1024*1024, "10.0 GB"},
 511     {99LL*1024*1024*1024, "99.0 GB"},
 512     {105LL*1024*1024*1024, "105 GB"},
 513     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 514     {~(1LL<<63), "8192 PB"},
 515
 516     {99*1024 + 103, "99.1 kB"},
 517     {1024*1024 + 103, "1.0 MB"},
 518     {1024*1024 + 205 * 1024, "1.2 MB"},
 519     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 520     {10LL*1024*1024*1024, "10.0 GB"},
 521     {100LL*1024*1024*1024, "100 GB"},
 522   };
 523
 524   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 525     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 526               FormatBytesUnlocalized(cases[i].bytes));
 527   }
 528 }
 529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 530   static const struct {
 531     const char* str;
 532     string16::size_type start_offset;
 533     const char* find_this;
 534     const char* replace_with;
 535     const char* expected;
 536   } cases[] = {
 537     {"aaa", 0, "a", "b", "bbb"},
 538     {"abb", 0, "ab", "a", "ab"},
 539     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 540     {"Not found", 0, "x", "0", "Not found"},
 541     {"Not found again", 5, "x", "0", "Not found again"},
 542     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 543      "Four score and seven years agoMakingFour score and seven years agoit"
 544      "Four score and seven years agomuchFour score and seven years agolonger"
 545      "Four score and seven years ago"},
 546     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 547     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 548     {"abababab", 2, "ab", "c", "abccc"},
 549   };
 550
 551   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 552     string16 str = ASCIIToUTF16(cases[i].str);
 553     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 554                                  ASCIIToUTF16(cases[i].find_this),
 555                                  ASCIIToUTF16(cases[i].replace_with));
 556     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 557   }
 558 }
 559
 560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 561   static const struct {
 562     const char* str;
 563     string16::size_type start_offset;
 564     const char* find_this;
 565     const char* replace_with;
 566     const char* expected;
 567   } cases[] = {
 568     {"aaa", 0, "a", "b", "baa"},
 569     {"abb", 0, "ab", "a", "ab"},
 570     {"Removing some substrings inging", 0, "ing", "",
 571       "Remov some substrings inging"},
 572     {"Not found", 0, "x", "0", "Not found"},
 573     {"Not found again", 5, "x", "0", "Not found again"},
 574     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 575      "Four score and seven years agoMaking it much longer "},
 576     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 577     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 578     {"abababab", 2, "ab", "c", "abcabab"},
 579   };
 580
 581   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 582     string16 str = ASCIIToUTF16(cases[i].str);
 583     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 584                                      ASCIIToUTF16(cases[i].find_this),
 585                                      ASCIIToUTF16(cases[i].replace_with));
 586     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 587   }
 588 }
 589
 590 TEST(StringUtilTest, HexDigitToInt) {
 591   EXPECT_EQ(0, HexDigitToInt('0'));
 592   EXPECT_EQ(1, HexDigitToInt('1'));
 593   EXPECT_EQ(2, HexDigitToInt('2'));
 594   EXPECT_EQ(3, HexDigitToInt('3'));
 595   EXPECT_EQ(4, HexDigitToInt('4'));
 596   EXPECT_EQ(5, HexDigitToInt('5'));
 597   EXPECT_EQ(6, HexDigitToInt('6'));
 598   EXPECT_EQ(7, HexDigitToInt('7'));
 599   EXPECT_EQ(8, HexDigitToInt('8'));
 600   EXPECT_EQ(9, HexDigitToInt('9'));
 601   EXPECT_EQ(10, HexDigitToInt('A'));
 602   EXPECT_EQ(11, HexDigitToInt('B'));
 603   EXPECT_EQ(12, HexDigitToInt('C'));
 604   EXPECT_EQ(13, HexDigitToInt('D'));
 605   EXPECT_EQ(14, HexDigitToInt('E'));
 606   EXPECT_EQ(15, HexDigitToInt('F'));
 607
 608   // Verify the lower case as well.
 609   EXPECT_EQ(10, HexDigitToInt('a'));
 610   EXPECT_EQ(11, HexDigitToInt('b'));
 611   EXPECT_EQ(12, HexDigitToInt('c'));
 612   EXPECT_EQ(13, HexDigitToInt('d'));
 613   EXPECT_EQ(14, HexDigitToInt('e'));
 614   EXPECT_EQ(15, HexDigitToInt('f'));
 615 }
 616
 617 // This checks where we can use the assignment operator for a va_list. We need
 618 // a way to do this since Visual C doesn't support va_copy, but assignment on
 619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 620 // capability.
 621 static void VariableArgsFunc(const char* format, ...) {
 622   va_list org;
 623   va_start(org, format);
 624
 625   va_list dup;
 626   GG_VA_COPY(dup, org);
 627   int i1 = va_arg(org, int);
 628   int j1 = va_arg(org, int);
 629   char* s1 = va_arg(org, char*);
 630   double d1 = va_arg(org, double);
 631   va_end(org);
 632
 633   int i2 = va_arg(dup, int);
 634   int j2 = va_arg(dup, int);
 635   char* s2 = va_arg(dup, char*);
 636   double d2 = va_arg(dup, double);
 637
 638   EXPECT_EQ(i1, i2);
 639   EXPECT_EQ(j1, j2);
 640   EXPECT_STREQ(s1, s2);
 641   EXPECT_EQ(d1, d2);
 642
 643   va_end(dup);
 644 }
 645
 646 TEST(StringUtilTest, VAList) {
 647   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 648 }
 649
 650 // Test for Tokenize
 651 template <typename STR>
 652 void TokenizeTest() {
 653   std::vector<STR> r;
 654   size_t size;
 655
 656   size = Tokenize(STR("This is a string"), STR(" "), &r);
 657   EXPECT_EQ(4U, size);
 658   ASSERT_EQ(4U, r.size());
 659   EXPECT_EQ(r[0], STR("This"));
 660   EXPECT_EQ(r[1], STR("is"));
 661   EXPECT_EQ(r[2], STR("a"));
 662   EXPECT_EQ(r[3], STR("string"));
 663   r.clear();
 664
 665   size = Tokenize(STR("one,two,three"), STR(","), &r);
 666   EXPECT_EQ(3U, size);
 667   ASSERT_EQ(3U, r.size());
 668   EXPECT_EQ(r[0], STR("one"));
 669   EXPECT_EQ(r[1], STR("two"));
 670   EXPECT_EQ(r[2], STR("three"));
 671   r.clear();
 672
 673   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 674   EXPECT_EQ(3U, size);
 675   ASSERT_EQ(3U, r.size());
 676   EXPECT_EQ(r[0], STR("one"));
 677   EXPECT_EQ(r[1], STR("two"));
 678   EXPECT_EQ(r[2], STR("three;four"));
 679   r.clear();
 680
 681   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 682   EXPECT_EQ(4U, size);
 683   ASSERT_EQ(4U, r.size());
 684   EXPECT_EQ(r[0], STR("one"));
 685   EXPECT_EQ(r[1], STR("two"));
 686   EXPECT_EQ(r[2], STR("three"));
 687   EXPECT_EQ(r[3], STR("four"));
 688   r.clear();
 689
 690   size = Tokenize(STR("one, two, three"), STR(","), &r);
 691   EXPECT_EQ(3U, size);
 692   ASSERT_EQ(3U, r.size());
 693   EXPECT_EQ(r[0], STR("one"));
 694   EXPECT_EQ(r[1], STR(" two"));
 695   EXPECT_EQ(r[2], STR(" three"));
 696   r.clear();
 697
 698   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 699   EXPECT_EQ(4U, size);
 700   ASSERT_EQ(4U, r.size());
 701   EXPECT_EQ(r[0], STR("one"));
 702   EXPECT_EQ(r[1], STR(" two"));
 703   EXPECT_EQ(r[2], STR(" three"));
 704   EXPECT_EQ(r[3], STR(" "));
 705   r.clear();
 706
 707   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 708   EXPECT_EQ(3U, size);
 709   ASSERT_EQ(3U, r.size());
 710   EXPECT_EQ(r[0], STR("one"));
 711   EXPECT_EQ(r[1], STR(" two"));
 712   EXPECT_EQ(r[2], STR(" three"));
 713   r.clear();
 714
 715   size = Tokenize(STR(""), STR(","), &r);
 716   EXPECT_EQ(0U, size);
 717   ASSERT_EQ(0U, r.size());
 718   r.clear();
 719
 720   size = Tokenize(STR(","), STR(","), &r);
 721   EXPECT_EQ(0U, size);
 722   ASSERT_EQ(0U, r.size());
 723   r.clear();
 724
 725   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 726   EXPECT_EQ(0U, size);
 727   ASSERT_EQ(0U, r.size());
 728   r.clear();
 729
 730   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 731   EXPECT_EQ(1U, size);
 732   ASSERT_EQ(1U, r.size());
 733   EXPECT_EQ(r[0], STR("a"));
 734   r.clear();
 735
 736   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 737   EXPECT_EQ(2U, size);
 738   ASSERT_EQ(2U, r.size());
 739   EXPECT_EQ(r[0], STR("\ta\t"));
 740   EXPECT_EQ(r[1], STR("b\tcc"));
 741   r.clear();
 742 }
 743
 744 TEST(StringUtilTest, TokenizeStdString) {
 745   TokenizeTest<std::string>();
 746 }
 747
 748 TEST(StringUtilTest, TokenizeStringPiece) {
 749   TokenizeTest<base::StringPiece>();
 750 }
 751
 752 // Test for JoinString
 753 TEST(StringUtilTest, JoinString) {
 754   std::vector<std::string> in;
 755   EXPECT_EQ("", JoinString(in, ','));
 756
 757   in.push_back("a");
 758   EXPECT_EQ("a", JoinString(in, ','));
 759
 760   in.push_back("b");
 761   in.push_back("c");
 762   EXPECT_EQ("a,b,c", JoinString(in, ','));
 763
 764   in.push_back("");
 765   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 766   in.push_back(" ");
 767   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 768 }
 769
 770 TEST(StringUtilTest, StartsWith) {
 771   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 772   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 773   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 774   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 775   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 776   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 777   EXPECT_FALSE(StartsWithASCII("", "javascript", false));
 778   EXPECT_FALSE(StartsWithASCII("", "javascript", true));
 779   EXPECT_TRUE(StartsWithASCII("java", "", false));
 780   EXPECT_TRUE(StartsWithASCII("java", "", true));
 781
 782   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
 783   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
 784   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
 785   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
 786   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
 787   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
 788   EXPECT_FALSE(StartsWith(L"", L"javascript", false));
 789   EXPECT_FALSE(StartsWith(L"", L"javascript", true));
 790   EXPECT_TRUE(StartsWith(L"java", L"", false));
 791   EXPECT_TRUE(StartsWith(L"java", L"", true));
 792 }
 793
 794 TEST(StringUtilTest, EndsWith) {
 795   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
 796   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
 797   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
 798   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
 799   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
 800   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
 801   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
 802   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
 803   EXPECT_FALSE(EndsWith(L"", L".plugin", false));
 804   EXPECT_FALSE(EndsWith(L"", L".plugin", true));
 805   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
 806   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
 807   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
 808   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
 809   EXPECT_TRUE(EndsWith(L"", L"", false));
 810   EXPECT_TRUE(EndsWith(L"", L"", true));
 811 }
 812
 813 TEST(StringUtilTest, GetStringFWithOffsets) {
 814   std::vector<string16> subst;
 815   subst.push_back(ASCIIToUTF16("1"));
 816   subst.push_back(ASCIIToUTF16("2"));
 817   std::vector<size_t> offsets;
 818
 819   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 820                             subst,
 821                             &offsets);
 822   EXPECT_EQ(2U, offsets.size());
 823   EXPECT_EQ(7U, offsets[0]);
 824   EXPECT_EQ(25U, offsets[1]);
 825   offsets.clear();
 826
 827   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 828                             subst,
 829                             &offsets);
 830   EXPECT_EQ(2U, offsets.size());
 831   EXPECT_EQ(25U, offsets[0]);
 832   EXPECT_EQ(7U, offsets[1]);
 833   offsets.clear();
 834 }
 835
 836 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 837   // Test whether replacestringplaceholders works as expected when there
 838   // are fewer inputs than outputs.
 839   std::vector<string16> subst;
 840   subst.push_back(ASCIIToUTF16("9a"));
 841   subst.push_back(ASCIIToUTF16("8b"));
 842   subst.push_back(ASCIIToUTF16("7c"));
 843
 844   string16 formatted =
 845       ReplaceStringPlaceholders(
 846           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 847
 848   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 849 }
 850
 851 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 852   std::vector<string16> subst;
 853   subst.push_back(ASCIIToUTF16("9a"));
 854   subst.push_back(ASCIIToUTF16("8b"));
 855   subst.push_back(ASCIIToUTF16("7c"));
 856   subst.push_back(ASCIIToUTF16("6d"));
 857   subst.push_back(ASCIIToUTF16("5e"));
 858   subst.push_back(ASCIIToUTF16("4f"));
 859   subst.push_back(ASCIIToUTF16("3g"));
 860   subst.push_back(ASCIIToUTF16("2h"));
 861   subst.push_back(ASCIIToUTF16("1i"));
 862
 863   string16 formatted =
 864       ReplaceStringPlaceholders(
 865           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 866
 867   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 868 }
 869
 870 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 871   std::vector<string16> subst;
 872   subst.push_back(ASCIIToUTF16("9a"));
 873   subst.push_back(ASCIIToUTF16("8b"));
 874   subst.push_back(ASCIIToUTF16("7c"));
 875   subst.push_back(ASCIIToUTF16("6d"));
 876   subst.push_back(ASCIIToUTF16("5e"));
 877   subst.push_back(ASCIIToUTF16("4f"));
 878   subst.push_back(ASCIIToUTF16("3g"));
 879   subst.push_back(ASCIIToUTF16("2h"));
 880   subst.push_back(ASCIIToUTF16("1i"));
 881   subst.push_back(ASCIIToUTF16("0j"));
 882   subst.push_back(ASCIIToUTF16("-1k"));
 883   subst.push_back(ASCIIToUTF16("-2l"));
 884   subst.push_back(ASCIIToUTF16("-3m"));
 885   subst.push_back(ASCIIToUTF16("-4n"));
 886
 887   string16 formatted =
 888       ReplaceStringPlaceholders(
 889           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 890                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 891
 892   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 893                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
 894 }
 895
 896 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 897   std::vector<std::string> subst;
 898   subst.push_back("9a");
 899   subst.push_back("8b");
 900   subst.push_back("7c");
 901   subst.push_back("6d");
 902   subst.push_back("5e");
 903   subst.push_back("4f");
 904   subst.push_back("3g");
 905   subst.push_back("2h");
 906   subst.push_back("1i");
 907
 908   std::string formatted =
 909       ReplaceStringPlaceholders(
 910           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 911
 912   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 913 }
 914
 915 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 916   std::vector<std::string> subst;
 917   subst.push_back("a");
 918   subst.push_back("b");
 919   subst.push_back("c");
 920   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
 921             "$1 $$2 $$$3");
 922 }
 923
 924 TEST(StringUtilTest, MatchPatternTest) {
 925   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
 926   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
 927   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
 928   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
 929   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
 930   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
 931   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
 932   EXPECT_FALSE(MatchPattern("", "*.*"));
 933   EXPECT_TRUE(MatchPattern("", "*"));
 934   EXPECT_TRUE(MatchPattern("", "?"));
 935   EXPECT_TRUE(MatchPattern("", ""));
 936   EXPECT_FALSE(MatchPattern("Hello", ""));
 937   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
 938   // Stop after a certain recursion depth.
 939   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
 940
 941   // Test UTF8 matching.
 942   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
 943   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
 944   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
 945   // Invalid sequences should be handled as a single invalid character.
 946   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
 947   // If the pattern has invalid characters, it shouldn't match anything.
 948   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
 949
 950   // Test UTF16 character matching.
 951   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
 952                            UTF8ToUTF16("*.com")));
 953   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
 954                            UTF8ToUTF16("He??o\\*1*")));
 955
 956   // This test verifies that consecutive wild cards are collapsed into 1
 957   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
 958   // recursion depth).
 959   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
 960                            UTF8ToUTF16("He********************************o")));
 961 }
 962
 963 TEST(StringUtilTest, LcpyTest) {
 964   // Test the normal case where we fit in our buffer.
 965   {
 966     char dst[10];
 967     wchar_t wdst[10];
 968     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
 969     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
 970     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
 971     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
 972   }
 973
 974   // Test dst_size == 0, nothing should be written to |dst| and we should
 975   // have the equivalent of strlen(src).
 976   {
 977     char dst[2] = {1, 2};
 978     wchar_t wdst[2] = {1, 2};
 979     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
 980     EXPECT_EQ(1, dst[0]);
 981     EXPECT_EQ(2, dst[1]);
 982     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
 983 #if defined(WCHAR_T_IS_UNSIGNED)
 984     EXPECT_EQ(1U, wdst[0]);
 985     EXPECT_EQ(2U, wdst[1]);
 986 #else
 987     EXPECT_EQ(1, wdst[0]);
 988     EXPECT_EQ(2, wdst[1]);
 989 #endif
 990   }
 991
 992   // Test the case were we _just_ competely fit including the null.
 993   {
 994     char dst[8];
 995     wchar_t wdst[8];
 996     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
 997     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
 998     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
 999     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1000   }
1001
1002   // Test the case were we we are one smaller, so we can't fit the null.
1003   {
1004     char dst[7];
1005     wchar_t wdst[7];
1006     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1007     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1008     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1009     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1010   }
1011
1012   // Test the case were we are just too small.
1013   {
1014     char dst[3];
1015     wchar_t wdst[3];
1016     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1017     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1018     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1019     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1020   }
1021 }
1022
1023 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1024   static const struct {
1025     const wchar_t* input;
1026     bool portable;
1027   } cases[] = {
1028     { L"%ls", true },
1029     { L"%s", false },
1030     { L"%S", false },
1031     { L"%lS", false },
1032     { L"Hello, %s", false },
1033     { L"%lc", true },
1034     { L"%c", false },
1035     { L"%C", false },
1036     { L"%lC", false },
1037     { L"%ls %s", false },
1038     { L"%s %ls", false },
1039     { L"%s %ls %s", false },
1040     { L"%f", true },
1041     { L"%f %F", false },
1042     { L"%d %D", false },
1043     { L"%o %O", false },
1044     { L"%u %U", false },
1045     { L"%f %d %o %u", true },
1046     { L"%-8d (%02.1f%)", true },
1047     { L"% 10s", false },
1048     { L"% 10ls", true }
1049   };
1050   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1051     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1052 }
1053
1054 TEST(StringUtilTest, RemoveChars) {
1055   const char* kRemoveChars = "-/+*";
1056   std::string input = "A-+bc/d!*";
1057   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1058   EXPECT_EQ("Abcd!", input);
1059
1060   // No characters match kRemoveChars.
1061   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1062   EXPECT_EQ("Abcd!", input);
1063
1064   // Empty string.
1065   input.clear();
1066   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067   EXPECT_EQ(std::string(), input);
1068 }
1069
1070 TEST(StringUtilTest, ReplaceChars) {
1071   struct TestData {
1072     const char* input;
1073     const char* replace_chars;
1074     const char* replace_with;
1075     const char* output;
1076     bool result;
1077   } cases[] = {
1078     { "", "", "", "", false },
1079     { "test", "", "", "test", false },
1080     { "test", "", "!", "test", false },
1081     { "test", "z", "!", "test", false },
1082     { "test", "e", "!", "t!st", true },
1083     { "test", "e", "!?", "t!?st", true },
1084     { "test", "ez", "!", "t!st", true },
1085     { "test", "zed", "!?", "t!?st", true },
1086     { "test", "t", "!?", "!?es!?", true },
1087     { "test", "et", "!>", "!>!>s!>", true },
1088     { "test", "zest", "!", "!!!!", true },
1089     { "test", "szt", "!", "!e!!", true },
1090     { "test", "t", "test", "testestest", true },
1091   };
1092
1093   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1094     std::string output;
1095     bool result = ReplaceChars(cases[i].input,
1096                                cases[i].replace_chars,
1097                                cases[i].replace_with,
1098                                &output);
1099     EXPECT_EQ(cases[i].result, result);
1100     EXPECT_EQ(cases[i].output, output);
1101   }
1102 }
1103
1104 TEST(StringUtilTest, ContainsOnlyChars) {
1105   // Providing an empty list of characters should return false but for the empty
1106   // string.
1107   EXPECT_TRUE(ContainsOnlyChars("", ""));
1108   EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1109
1110   EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1111   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1112   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1113   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1114   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1115 }
1116
1117 class WriteIntoTest : public testing::Test {
1118  protected:
1119   static void WritesCorrectly(size_t num_chars) {
1120     std::string buffer;
1121     char kOriginal[] = "supercali";
1122     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1123     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1124     // string at the first \0.
1125     EXPECT_EQ(std::string(kOriginal,
1126                           std::min(num_chars, arraysize(kOriginal) - 1)),
1127               std::string(buffer.c_str()));
1128     EXPECT_EQ(num_chars, buffer.size());
1129   }
1130 };
1131
1132 TEST_F(WriteIntoTest, WriteInto) {
1133   // Validate that WriteInto reserves enough space and
1134   // sizes a string correctly.
1135   WritesCorrectly(1);
1136   WritesCorrectly(2);
1137   WritesCorrectly(5000);
1138
1139   // Validate that WriteInto doesn't modify other strings
1140   // when using a Copy-on-Write implementation.
1141   const char kLive[] = "live";
1142   const char kDead[] = "dead";
1143   const std::string live = kLive;
1144   std::string dead = live;
1145   strncpy(WriteInto(&dead, 5), kDead, 4);
1146   EXPECT_EQ(kDead, dead);
1147   EXPECT_EQ(4u, dead.size());
1148   EXPECT_EQ(kLive, live);
1149   EXPECT_EQ(4u, live.size());
1150 }
1151
1152 }  // namespace base