base/strings/string_util_unittest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <math.h>
   8 #include <stdarg.h>
   9
  10 #include <algorithm>
  11
  12 #include "base/basictypes.h"
  13 #include "base/strings/string16.h"
  14 #include "base/strings/utf_string_conversions.h"
  15 #include "testing/gmock/include/gmock/gmock.h"
  16 #include "testing/gtest/include/gtest/gtest.h"
  17
  18 using ::testing::ElementsAre;
  19
  20 namespace base {
  21
  22 static const struct trim_case {
  23   const wchar_t* input;
  24   const TrimPositions positions;
  25   const wchar_t* output;
  26   const TrimPositions return_value;
  27 } trim_cases[] = {
  28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  32   {L"", TRIM_ALL, L"", TRIM_NONE},
  33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  38 };
  39
  40 static const struct trim_case_ascii {
  41   const char* input;
  42   const TrimPositions positions;
  43   const char* output;
  44   const TrimPositions return_value;
  45 } trim_cases_ascii[] = {
  46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  50   {"", TRIM_ALL, "", TRIM_NONE},
  51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  53   {"  ", TRIM_ALL, "", TRIM_ALL},
  54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  55 };
  56
  57 namespace {
  58
  59 // Helper used to test TruncateUTF8ToByteSize.
  60 bool Truncated(const std::string& input,
  61                const size_t byte_size,
  62                std::string* output) {
  63     size_t prev = input.length();
  64     TruncateUTF8ToByteSize(input, byte_size, output);
  65     return prev != output->length();
  66 }
  67
  68 }  // namespace
  69
  70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  71   std::string output;
  72
  73   // Empty strings and invalid byte_size arguments
  74   EXPECT_FALSE(Truncated(std::string(), 0, &output));
  75   EXPECT_EQ(output, "");
  76   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  77   EXPECT_EQ(output, "");
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
  79   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  80
  81   // Testing the truncation of valid UTF8 correctly
  82   EXPECT_TRUE(Truncated("abc", 2, &output));
  83   EXPECT_EQ(output, "ab");
  84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  88   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  89   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  90
  91   {
  92     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  93     const std::string array_string(array, arraysize(array));
  94     EXPECT_TRUE(Truncated(array_string, 4, &output));
  95     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  96   }
  97
  98   {
  99     const char array[] = "\x00\xc2\x81\xc2\x81";
 100     const std::string array_string(array, arraysize(array));
 101     EXPECT_TRUE(Truncated(array_string, 4, &output));
 102     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 103   }
 104
 105   // Testing invalid UTF8
 106   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 107   EXPECT_EQ(output.compare(""), 0);
 108   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 109   EXPECT_EQ(output.compare(""), 0);
 110   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 111   EXPECT_EQ(output.compare(""), 0);
 112
 113   // Testing invalid UTF8 mixed with valid UTF8
 114   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 115   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 116   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 117   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 118   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 119               10, &output));
 120   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 121   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 122               10, &output));
 123   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 124   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 125   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 126
 127   // Overlong sequences
 128   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 129   EXPECT_EQ(output.compare(""), 0);
 130   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 131   EXPECT_EQ(output.compare(""), 0);
 132   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 133   EXPECT_EQ(output.compare(""), 0);
 134   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 135   EXPECT_EQ(output.compare(""), 0);
 136   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 137   EXPECT_EQ(output.compare(""), 0);
 138   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 139   EXPECT_EQ(output.compare(""), 0);
 140   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 141   EXPECT_EQ(output.compare(""), 0);
 142   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 143   EXPECT_EQ(output.compare(""), 0);
 144   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 145   EXPECT_EQ(output.compare(""), 0);
 146   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 147   EXPECT_EQ(output.compare(""), 0);
 148   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 149   EXPECT_EQ(output.compare(""), 0);
 150
 151   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 152   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 153   EXPECT_EQ(output.compare(""), 0);
 154   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 155   EXPECT_EQ(output.compare(""), 0);
 156   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 157   EXPECT_EQ(output.compare(""), 0);
 158
 159   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 160   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 161   EXPECT_EQ(output.compare(""), 0);
 162   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 163   EXPECT_EQ(output.compare(""), 0);
 164
 165   {
 166     const char array[] = "\x00\x00\xfe\xff";
 167     const std::string array_string(array, arraysize(array));
 168     EXPECT_TRUE(Truncated(array_string, 4, &output));
 169     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 170   }
 171
 172   // Variants on the previous test
 173   {
 174     const char array[] = "\xff\xfe\x00\x00";
 175     const std::string array_string(array, 4);
 176     EXPECT_FALSE(Truncated(array_string, 4, &output));
 177     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 178   }
 179   {
 180     const char array[] = "\xff\x00\x00\xfe";
 181     const std::string array_string(array, arraysize(array));
 182     EXPECT_TRUE(Truncated(array_string, 4, &output));
 183     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 184   }
 185
 186   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 187   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 188   EXPECT_EQ(output.compare(""), 0);
 189   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 190   EXPECT_EQ(output.compare(""), 0);
 191   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 192   EXPECT_EQ(output.compare(""), 0);
 193   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 194   EXPECT_EQ(output.compare(""), 0);
 195   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 196   EXPECT_EQ(output.compare(""), 0);
 197
 198   // Strings in legacy encodings that are valid in UTF-8, but
 199   // are invalid as UTF-8 in real data.
 200   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 201   EXPECT_EQ(output.compare("caf"), 0);
 202   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 203   EXPECT_EQ(output.compare(""), 0);
 204   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 205   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 206   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 207               &output));
 208   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 209
 210   // Testing using the same string as input and output.
 211   EXPECT_FALSE(Truncated(output, 4, &output));
 212   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 213   EXPECT_TRUE(Truncated(output, 3, &output));
 214   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 215
 216   // "abc" with U+201[CD] in windows-125[0-8]
 217   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 218   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 219
 220   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 221   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 222   EXPECT_EQ(output.compare(""), 0);
 223
 224   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 225   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 226   EXPECT_EQ(output.compare(""), 0);
 227 }
 228
 229 TEST(StringUtilTest, TrimWhitespace) {
 230   string16 output;  // Allow contents to carry over to next testcase
 231   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 232     const trim_case& value = trim_cases[i];
 233     EXPECT_EQ(value.return_value,
 234               TrimWhitespace(WideToUTF16(value.input), value.positions,
 235                              &output));
 236     EXPECT_EQ(WideToUTF16(value.output), output);
 237   }
 238
 239   // Test that TrimWhitespace() can take the same string for input and output
 240   output = ASCIIToUTF16("  This is a test \r\n");
 241   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 242   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 243
 244   // Once more, but with a string of whitespace
 245   output = ASCIIToUTF16("  \r\n");
 246   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 247   EXPECT_EQ(string16(), output);
 248
 249   std::string output_ascii;
 250   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 251     const trim_case_ascii& value = trim_cases_ascii[i];
 252     EXPECT_EQ(value.return_value,
 253               TrimWhitespace(value.input, value.positions, &output_ascii));
 254     EXPECT_EQ(value.output, output_ascii);
 255   }
 256 }
 257
 258 static const struct collapse_case {
 259   const wchar_t* input;
 260   const bool trim;
 261   const wchar_t* output;
 262 } collapse_cases[] = {
 263   {L" Google Video ", false, L"Google Video"},
 264   {L"Google Video", false, L"Google Video"},
 265   {L"", false, L""},
 266   {L"  ", false, L""},
 267   {L"\t\rTest String\n", false, L"Test String"},
 268   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 269   {L"    Test     \n  \t String    ", false, L"Test String"},
 270   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 271   {L"   Test String", false, L"Test String"},
 272   {L"Test String    ", false, L"Test String"},
 273   {L"Test String", false, L"Test String"},
 274   {L"", true, L""},
 275   {L"\n", true, L""},
 276   {L"  \r  ", true, L""},
 277   {L"\nFoo", true, L"Foo"},
 278   {L"\r  Foo  ", true, L"Foo"},
 279   {L" Foo bar ", true, L"Foo bar"},
 280   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 281   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 282 };
 283
 284 TEST(StringUtilTest, CollapseWhitespace) {
 285   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 286     const collapse_case& value = collapse_cases[i];
 287     EXPECT_EQ(WideToUTF16(value.output),
 288               CollapseWhitespace(WideToUTF16(value.input), value.trim));
 289   }
 290 }
 291
 292 static const struct collapse_case_ascii {
 293   const char* input;
 294   const bool trim;
 295   const char* output;
 296 } collapse_cases_ascii[] = {
 297   {" Google Video ", false, "Google Video"},
 298   {"Google Video", false, "Google Video"},
 299   {"", false, ""},
 300   {"  ", false, ""},
 301   {"\t\rTest String\n", false, "Test String"},
 302   {"    Test     \n  \t String    ", false, "Test String"},
 303   {"   Test String", false, "Test String"},
 304   {"Test String    ", false, "Test String"},
 305   {"Test String", false, "Test String"},
 306   {"", true, ""},
 307   {"\n", true, ""},
 308   {"  \r  ", true, ""},
 309   {"\nFoo", true, "Foo"},
 310   {"\r  Foo  ", true, "Foo"},
 311   {" Foo bar ", true, "Foo bar"},
 312   {"  \tFoo  bar  \n", true, "Foo bar"},
 313   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 314 };
 315
 316 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 317   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 318     const collapse_case_ascii& value = collapse_cases_ascii[i];
 319     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 320   }
 321 }
 322
 323 TEST(StringUtilTest, IsStringUTF8) {
 324   EXPECT_TRUE(IsStringUTF8("abc"));
 325   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 326   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 327   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 328   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 329   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 330
 331   // surrogate code points
 332   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 333   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 334   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 335
 336   // overlong sequences
 337   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 338   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 339   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 340   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 341   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 342   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 343   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 345   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 346   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 347   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 348
 349   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 350   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 351   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 352   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 353
 354   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 355   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 356   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 357   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 358   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 359
 360   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 361   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 362   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 363   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 364   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 365   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 366   // Strings in legacy encodings. We can certainly make up strings
 367   // in a legacy encoding that are valid in UTF-8, but in real data,
 368   // most of them are invalid as UTF-8.
 369   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 370   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 371   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 372   // "abc" with U+201[CD] in windows-125[0-8]
 373   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 374   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 375   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 376   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 377   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 378
 379   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 380   // representation, and the second uses a 2-byte sequence. The second version
 381   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 382   // given codepoint must be used.
 383   static const char kEmbeddedNull[] = "embedded\0null";
 384   EXPECT_TRUE(IsStringUTF8(
 385       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 386   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 387 }
 388
 389 TEST(StringUtilTest, IsStringASCII) {
 390   static char char_ascii[] =
 391       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
 392   static char16 char16_ascii[] = {
 393       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
 394       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
 395       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
 396   static std::wstring wchar_ascii(
 397       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
 398
 399   // Test a variety of the fragment start positions and lengths in order to make
 400   // sure that bit masking in IsStringASCII works correctly.
 401   // Also, test that a non-ASCII character will be detected regardless of its
 402   // position inside the string.
 403   {
 404     const size_t string_length = arraysize(char_ascii) - 1;
 405     for (size_t offset = 0; offset < 8; ++offset) {
 406       for (size_t len = 0, max_len = string_length - offset; len < max_len;
 407            ++len) {
 408         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
 409         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
 410           char_ascii[char_pos] |= '\x80';
 411           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
 412           char_ascii[char_pos] &= ~'\x80';
 413         }
 414       }
 415     }
 416   }
 417
 418   {
 419     const size_t string_length = arraysize(char16_ascii) - 1;
 420     for (size_t offset = 0; offset < 4; ++offset) {
 421       for (size_t len = 0, max_len = string_length - offset; len < max_len;
 422            ++len) {
 423         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 424         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
 425           char16_ascii[char_pos] |= 0x80;
 426           EXPECT_FALSE(
 427               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 428           char16_ascii[char_pos] &= ~0x80;
 429           // Also test when the upper half is non-zero.
 430           char16_ascii[char_pos] |= 0x100;
 431           EXPECT_FALSE(
 432               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 433           char16_ascii[char_pos] &= ~0x100;
 434         }
 435       }
 436     }
 437   }
 438
 439   {
 440     const size_t string_length = wchar_ascii.length();
 441     for (size_t len = 0; len < string_length; ++len) {
 442       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
 443       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
 444         wchar_ascii[char_pos] |= 0x80;
 445         EXPECT_FALSE(
 446             IsStringASCII(wchar_ascii.substr(0, len)));
 447         wchar_ascii[char_pos] &= ~0x80;
 448         wchar_ascii[char_pos] |= 0x100;
 449         EXPECT_FALSE(
 450             IsStringASCII(wchar_ascii.substr(0, len)));
 451         wchar_ascii[char_pos] &= ~0x100;
 452 #if defined(WCHAR_T_IS_UTF32)
 453         wchar_ascii[char_pos] |= 0x10000;
 454         EXPECT_FALSE(
 455             IsStringASCII(wchar_ascii.substr(0, len)));
 456         wchar_ascii[char_pos] &= ~0x10000;
 457 #endif  // WCHAR_T_IS_UTF32
 458       }
 459     }
 460   }
 461 }
 462
 463 TEST(StringUtilTest, ConvertASCII) {
 464   static const char* const char_cases[] = {
 465     "Google Video",
 466     "Hello, world\n",
 467     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 468   };
 469
 470   static const wchar_t* const wchar_cases[] = {
 471     L"Google Video",
 472     L"Hello, world\n",
 473     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 474   };
 475
 476   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 477     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 478     string16 utf16 = ASCIIToUTF16(char_cases[i]);
 479     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
 480
 481     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
 482     EXPECT_EQ(char_cases[i], ascii);
 483   }
 484
 485   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 486
 487   // Convert empty strings.
 488   string16 empty16;
 489   std::string empty;
 490   EXPECT_EQ(empty, UTF16ToASCII(empty16));
 491   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
 492
 493   // Convert strings with an embedded NUL character.
 494   const char chars_with_nul[] = "test\0string";
 495   const int length_with_nul = arraysize(chars_with_nul) - 1;
 496   std::string string_with_nul(chars_with_nul, length_with_nul);
 497   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 498   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 499             wide_with_nul.length());
 500   std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
 501   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 502             narrow_with_nul.length());
 503   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 504 }
 505
 506 TEST(StringUtilTest, ToUpperASCII) {
 507   EXPECT_EQ('C', ToUpperASCII('C'));
 508   EXPECT_EQ('C', ToUpperASCII('c'));
 509   EXPECT_EQ('2', ToUpperASCII('2'));
 510
 511   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 512   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 513   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 514
 515   std::string in_place_a("Cc2");
 516   StringToUpperASCII(&in_place_a);
 517   EXPECT_EQ("CC2", in_place_a);
 518
 519   std::wstring in_place_w(L"Cc2");
 520   StringToUpperASCII(&in_place_w);
 521   EXPECT_EQ(L"CC2", in_place_w);
 522
 523   std::string original_a("Cc2");
 524   std::string upper_a = StringToUpperASCII(original_a);
 525   EXPECT_EQ("CC2", upper_a);
 526
 527   std::wstring original_w(L"Cc2");
 528   std::wstring upper_w = StringToUpperASCII(original_w);
 529   EXPECT_EQ(L"CC2", upper_w);
 530 }
 531
 532 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 533   static const struct {
 534     const char*    src_a;
 535     const char*    dst;
 536   } lowercase_cases[] = {
 537     { "FoO", "foo" },
 538     { "foo", "foo" },
 539     { "FOO", "foo" },
 540   };
 541
 542   for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
 543     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
 544                                      lowercase_cases[i].dst));
 545     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 546                                      lowercase_cases[i].dst));
 547   }
 548 }
 549
 550 TEST(StringUtilTest, FormatBytesUnlocalized) {
 551   static const struct {
 552     int64 bytes;
 553     const char* expected;
 554   } cases[] = {
 555     // Expected behavior: we show one post-decimal digit when we have
 556     // under two pre-decimal digits, except in cases where it makes no
 557     // sense (zero or bytes).
 558     // Since we switch units once we cross the 1000 mark, this keeps
 559     // the display of file sizes or bytes consistently around three
 560     // digits.
 561     {0, "0 B"},
 562     {512, "512 B"},
 563     {1024*1024, "1.0 MB"},
 564     {1024*1024*1024, "1.0 GB"},
 565     {10LL*1024*1024*1024, "10.0 GB"},
 566     {99LL*1024*1024*1024, "99.0 GB"},
 567     {105LL*1024*1024*1024, "105 GB"},
 568     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 569     {~(1LL << 63), "8192 PB"},
 570
 571     {99*1024 + 103, "99.1 kB"},
 572     {1024*1024 + 103, "1.0 MB"},
 573     {1024*1024 + 205 * 1024, "1.2 MB"},
 574     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 575     {10LL*1024*1024*1024, "10.0 GB"},
 576     {100LL*1024*1024*1024, "100 GB"},
 577   };
 578
 579   for (size_t i = 0; i < arraysize(cases); ++i) {
 580     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 581               FormatBytesUnlocalized(cases[i].bytes));
 582   }
 583 }
 584 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 585   static const struct {
 586     const char* str;
 587     string16::size_type start_offset;
 588     const char* find_this;
 589     const char* replace_with;
 590     const char* expected;
 591   } cases[] = {
 592     {"aaa", 0, "a", "b", "bbb"},
 593     {"abb", 0, "ab", "a", "ab"},
 594     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 595     {"Not found", 0, "x", "0", "Not found"},
 596     {"Not found again", 5, "x", "0", "Not found again"},
 597     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 598      "Four score and seven years agoMakingFour score and seven years agoit"
 599      "Four score and seven years agomuchFour score and seven years agolonger"
 600      "Four score and seven years ago"},
 601     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 602     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 603     {"abababab", 2, "ab", "c", "abccc"},
 604   };
 605
 606   for (size_t i = 0; i < arraysize(cases); i++) {
 607     string16 str = ASCIIToUTF16(cases[i].str);
 608     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 609                                  ASCIIToUTF16(cases[i].find_this),
 610                                  ASCIIToUTF16(cases[i].replace_with));
 611     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 612   }
 613 }
 614
 615 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 616   static const struct {
 617     const char* str;
 618     string16::size_type start_offset;
 619     const char* find_this;
 620     const char* replace_with;
 621     const char* expected;
 622   } cases[] = {
 623     {"aaa", 0, "a", "b", "baa"},
 624     {"abb", 0, "ab", "a", "ab"},
 625     {"Removing some substrings inging", 0, "ing", "",
 626       "Remov some substrings inging"},
 627     {"Not found", 0, "x", "0", "Not found"},
 628     {"Not found again", 5, "x", "0", "Not found again"},
 629     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 630      "Four score and seven years agoMaking it much longer "},
 631     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 632     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 633     {"abababab", 2, "ab", "c", "abcabab"},
 634   };
 635
 636   for (size_t i = 0; i < arraysize(cases); i++) {
 637     string16 str = ASCIIToUTF16(cases[i].str);
 638     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 639                                      ASCIIToUTF16(cases[i].find_this),
 640                                      ASCIIToUTF16(cases[i].replace_with));
 641     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 642   }
 643 }
 644
 645 TEST(StringUtilTest, HexDigitToInt) {
 646   EXPECT_EQ(0, HexDigitToInt('0'));
 647   EXPECT_EQ(1, HexDigitToInt('1'));
 648   EXPECT_EQ(2, HexDigitToInt('2'));
 649   EXPECT_EQ(3, HexDigitToInt('3'));
 650   EXPECT_EQ(4, HexDigitToInt('4'));
 651   EXPECT_EQ(5, HexDigitToInt('5'));
 652   EXPECT_EQ(6, HexDigitToInt('6'));
 653   EXPECT_EQ(7, HexDigitToInt('7'));
 654   EXPECT_EQ(8, HexDigitToInt('8'));
 655   EXPECT_EQ(9, HexDigitToInt('9'));
 656   EXPECT_EQ(10, HexDigitToInt('A'));
 657   EXPECT_EQ(11, HexDigitToInt('B'));
 658   EXPECT_EQ(12, HexDigitToInt('C'));
 659   EXPECT_EQ(13, HexDigitToInt('D'));
 660   EXPECT_EQ(14, HexDigitToInt('E'));
 661   EXPECT_EQ(15, HexDigitToInt('F'));
 662
 663   // Verify the lower case as well.
 664   EXPECT_EQ(10, HexDigitToInt('a'));
 665   EXPECT_EQ(11, HexDigitToInt('b'));
 666   EXPECT_EQ(12, HexDigitToInt('c'));
 667   EXPECT_EQ(13, HexDigitToInt('d'));
 668   EXPECT_EQ(14, HexDigitToInt('e'));
 669   EXPECT_EQ(15, HexDigitToInt('f'));
 670 }
 671
 672 // This checks where we can use the assignment operator for a va_list. We need
 673 // a way to do this since Visual C doesn't support va_copy, but assignment on
 674 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 675 // capability.
 676 static void VariableArgsFunc(const char* format, ...) {
 677   va_list org;
 678   va_start(org, format);
 679
 680   va_list dup;
 681   GG_VA_COPY(dup, org);
 682   int i1 = va_arg(org, int);
 683   int j1 = va_arg(org, int);
 684   char* s1 = va_arg(org, char*);
 685   double d1 = va_arg(org, double);
 686   va_end(org);
 687
 688   int i2 = va_arg(dup, int);
 689   int j2 = va_arg(dup, int);
 690   char* s2 = va_arg(dup, char*);
 691   double d2 = va_arg(dup, double);
 692
 693   EXPECT_EQ(i1, i2);
 694   EXPECT_EQ(j1, j2);
 695   EXPECT_STREQ(s1, s2);
 696   EXPECT_EQ(d1, d2);
 697
 698   va_end(dup);
 699 }
 700
 701 TEST(StringUtilTest, VAList) {
 702   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 703 }
 704
 705 // Test for Tokenize
 706 template <typename STR>
 707 void TokenizeTest() {
 708   std::vector<STR> r;
 709   size_t size;
 710
 711   size = Tokenize(STR("This is a string"), STR(" "), &r);
 712   EXPECT_EQ(4U, size);
 713   ASSERT_EQ(4U, r.size());
 714   EXPECT_EQ(r[0], STR("This"));
 715   EXPECT_EQ(r[1], STR("is"));
 716   EXPECT_EQ(r[2], STR("a"));
 717   EXPECT_EQ(r[3], STR("string"));
 718   r.clear();
 719
 720   size = Tokenize(STR("one,two,three"), STR(","), &r);
 721   EXPECT_EQ(3U, size);
 722   ASSERT_EQ(3U, r.size());
 723   EXPECT_EQ(r[0], STR("one"));
 724   EXPECT_EQ(r[1], STR("two"));
 725   EXPECT_EQ(r[2], STR("three"));
 726   r.clear();
 727
 728   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 729   EXPECT_EQ(3U, size);
 730   ASSERT_EQ(3U, r.size());
 731   EXPECT_EQ(r[0], STR("one"));
 732   EXPECT_EQ(r[1], STR("two"));
 733   EXPECT_EQ(r[2], STR("three;four"));
 734   r.clear();
 735
 736   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 737   EXPECT_EQ(4U, size);
 738   ASSERT_EQ(4U, r.size());
 739   EXPECT_EQ(r[0], STR("one"));
 740   EXPECT_EQ(r[1], STR("two"));
 741   EXPECT_EQ(r[2], STR("three"));
 742   EXPECT_EQ(r[3], STR("four"));
 743   r.clear();
 744
 745   size = Tokenize(STR("one, two, three"), STR(","), &r);
 746   EXPECT_EQ(3U, size);
 747   ASSERT_EQ(3U, r.size());
 748   EXPECT_EQ(r[0], STR("one"));
 749   EXPECT_EQ(r[1], STR(" two"));
 750   EXPECT_EQ(r[2], STR(" three"));
 751   r.clear();
 752
 753   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 754   EXPECT_EQ(4U, size);
 755   ASSERT_EQ(4U, r.size());
 756   EXPECT_EQ(r[0], STR("one"));
 757   EXPECT_EQ(r[1], STR(" two"));
 758   EXPECT_EQ(r[2], STR(" three"));
 759   EXPECT_EQ(r[3], STR(" "));
 760   r.clear();
 761
 762   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 763   EXPECT_EQ(3U, size);
 764   ASSERT_EQ(3U, r.size());
 765   EXPECT_EQ(r[0], STR("one"));
 766   EXPECT_EQ(r[1], STR(" two"));
 767   EXPECT_EQ(r[2], STR(" three"));
 768   r.clear();
 769
 770   size = Tokenize(STR(), STR(","), &r);
 771   EXPECT_EQ(0U, size);
 772   ASSERT_EQ(0U, r.size());
 773   r.clear();
 774
 775   size = Tokenize(STR(","), STR(","), &r);
 776   EXPECT_EQ(0U, size);
 777   ASSERT_EQ(0U, r.size());
 778   r.clear();
 779
 780   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 781   EXPECT_EQ(0U, size);
 782   ASSERT_EQ(0U, r.size());
 783   r.clear();
 784
 785   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 786   EXPECT_EQ(1U, size);
 787   ASSERT_EQ(1U, r.size());
 788   EXPECT_EQ(r[0], STR("a"));
 789   r.clear();
 790
 791   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 792   EXPECT_EQ(2U, size);
 793   ASSERT_EQ(2U, r.size());
 794   EXPECT_EQ(r[0], STR("\ta\t"));
 795   EXPECT_EQ(r[1], STR("b\tcc"));
 796   r.clear();
 797 }
 798
 799 TEST(StringUtilTest, TokenizeStdString) {
 800   TokenizeTest<std::string>();
 801 }
 802
 803 TEST(StringUtilTest, TokenizeStringPiece) {
 804   TokenizeTest<base::StringPiece>();
 805 }
 806
 807 // Test for JoinString
 808 TEST(StringUtilTest, JoinString) {
 809   std::vector<std::string> in;
 810   EXPECT_EQ("", JoinString(in, ','));
 811
 812   in.push_back("a");
 813   EXPECT_EQ("a", JoinString(in, ','));
 814
 815   in.push_back("b");
 816   in.push_back("c");
 817   EXPECT_EQ("a,b,c", JoinString(in, ','));
 818
 819   in.push_back(std::string());
 820   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 821   in.push_back(" ");
 822   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 823 }
 824
 825 // Test for JoinString overloaded with std::string separator
 826 TEST(StringUtilTest, JoinStringWithString) {
 827   std::string separator(", ");
 828   std::vector<std::string> parts;
 829   EXPECT_EQ(std::string(), JoinString(parts, separator));
 830
 831   parts.push_back("a");
 832   EXPECT_EQ("a", JoinString(parts, separator));
 833
 834   parts.push_back("b");
 835   parts.push_back("c");
 836   EXPECT_EQ("a, b, c", JoinString(parts, separator));
 837
 838   parts.push_back(std::string());
 839   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
 840   parts.push_back(" ");
 841   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
 842 }
 843
 844 // Test for JoinString overloaded with string16 separator
 845 TEST(StringUtilTest, JoinStringWithString16) {
 846   string16 separator = ASCIIToUTF16(", ");
 847   std::vector<string16> parts;
 848   EXPECT_EQ(string16(), JoinString(parts, separator));
 849
 850   parts.push_back(ASCIIToUTF16("a"));
 851   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
 852
 853   parts.push_back(ASCIIToUTF16("b"));
 854   parts.push_back(ASCIIToUTF16("c"));
 855   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
 856
 857   parts.push_back(ASCIIToUTF16(""));
 858   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
 859   parts.push_back(ASCIIToUTF16(" "));
 860   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
 861 }
 862
 863 TEST(StringUtilTest, StartsWith) {
 864   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 865   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 866   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 867   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 868   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 869   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 870   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
 871   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
 872   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
 873   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
 874
 875   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 876                          ASCIIToUTF16("javascript"), true));
 877   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 878                           ASCIIToUTF16("javascript"), true));
 879   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 880                          ASCIIToUTF16("javascript"), false));
 881   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 882                          ASCIIToUTF16("javascript"), false));
 883   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 884                           ASCIIToUTF16("javascript"), true));
 885   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 886                           ASCIIToUTF16("javascript"), false));
 887   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
 888   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
 889   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
 890   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
 891 }
 892
 893 TEST(StringUtilTest, EndsWith) {
 894   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 895                        ASCIIToUTF16(".plugin"), true));
 896   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 897                         ASCIIToUTF16(".plugin"), true));
 898   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 899                        ASCIIToUTF16(".plugin"), false));
 900   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 901                        ASCIIToUTF16(".plugin"), false));
 902   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
 903   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
 904   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 905                         ASCIIToUTF16(".plugin"), true));
 906   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 907                         ASCIIToUTF16(".plugin"), false));
 908   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
 909   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
 910   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
 911   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
 912   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
 913                        ASCIIToUTF16(".plugin"), false));
 914   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
 915   EXPECT_TRUE(EndsWith(string16(), string16(), false));
 916   EXPECT_TRUE(EndsWith(string16(), string16(), true));
 917 }
 918
 919 TEST(StringUtilTest, GetStringFWithOffsets) {
 920   std::vector<string16> subst;
 921   subst.push_back(ASCIIToUTF16("1"));
 922   subst.push_back(ASCIIToUTF16("2"));
 923   std::vector<size_t> offsets;
 924
 925   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 926                             subst,
 927                             &offsets);
 928   EXPECT_EQ(2U, offsets.size());
 929   EXPECT_EQ(7U, offsets[0]);
 930   EXPECT_EQ(25U, offsets[1]);
 931   offsets.clear();
 932
 933   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 934                             subst,
 935                             &offsets);
 936   EXPECT_EQ(2U, offsets.size());
 937   EXPECT_EQ(25U, offsets[0]);
 938   EXPECT_EQ(7U, offsets[1]);
 939   offsets.clear();
 940 }
 941
 942 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 943   // Test whether replacestringplaceholders works as expected when there
 944   // are fewer inputs than outputs.
 945   std::vector<string16> subst;
 946   subst.push_back(ASCIIToUTF16("9a"));
 947   subst.push_back(ASCIIToUTF16("8b"));
 948   subst.push_back(ASCIIToUTF16("7c"));
 949
 950   string16 formatted =
 951       ReplaceStringPlaceholders(
 952           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 953
 954   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 955 }
 956
 957 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 958   std::vector<string16> subst;
 959   subst.push_back(ASCIIToUTF16("9a"));
 960   subst.push_back(ASCIIToUTF16("8b"));
 961   subst.push_back(ASCIIToUTF16("7c"));
 962   subst.push_back(ASCIIToUTF16("6d"));
 963   subst.push_back(ASCIIToUTF16("5e"));
 964   subst.push_back(ASCIIToUTF16("4f"));
 965   subst.push_back(ASCIIToUTF16("3g"));
 966   subst.push_back(ASCIIToUTF16("2h"));
 967   subst.push_back(ASCIIToUTF16("1i"));
 968
 969   string16 formatted =
 970       ReplaceStringPlaceholders(
 971           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 972
 973   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 974 }
 975
 976 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 977   std::vector<string16> subst;
 978   subst.push_back(ASCIIToUTF16("9a"));
 979   subst.push_back(ASCIIToUTF16("8b"));
 980   subst.push_back(ASCIIToUTF16("7c"));
 981   subst.push_back(ASCIIToUTF16("6d"));
 982   subst.push_back(ASCIIToUTF16("5e"));
 983   subst.push_back(ASCIIToUTF16("4f"));
 984   subst.push_back(ASCIIToUTF16("3g"));
 985   subst.push_back(ASCIIToUTF16("2h"));
 986   subst.push_back(ASCIIToUTF16("1i"));
 987   subst.push_back(ASCIIToUTF16("0j"));
 988   subst.push_back(ASCIIToUTF16("-1k"));
 989   subst.push_back(ASCIIToUTF16("-2l"));
 990   subst.push_back(ASCIIToUTF16("-3m"));
 991   subst.push_back(ASCIIToUTF16("-4n"));
 992
 993   string16 formatted =
 994       ReplaceStringPlaceholders(
 995           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 996                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 997
 998   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 999                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
1000 }
1001
1002 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1003   std::vector<std::string> subst;
1004   subst.push_back("9a");
1005   subst.push_back("8b");
1006   subst.push_back("7c");
1007   subst.push_back("6d");
1008   subst.push_back("5e");
1009   subst.push_back("4f");
1010   subst.push_back("3g");
1011   subst.push_back("2h");
1012   subst.push_back("1i");
1013
1014   std::string formatted =
1015       ReplaceStringPlaceholders(
1016           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
1017
1018   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
1019 }
1020
1021 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1022   std::vector<std::string> subst;
1023   subst.push_back("a");
1024   subst.push_back("b");
1025   subst.push_back("c");
1026   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
1027             "$1 $$2 $$$3");
1028 }
1029
1030 TEST(StringUtilTest, MatchPatternTest) {
1031   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
1032   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
1033   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
1034   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
1035   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
1036   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
1037   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
1038   EXPECT_FALSE(MatchPattern("", "*.*"));
1039   EXPECT_TRUE(MatchPattern("", "*"));
1040   EXPECT_TRUE(MatchPattern("", "?"));
1041   EXPECT_TRUE(MatchPattern("", ""));
1042   EXPECT_FALSE(MatchPattern("Hello", ""));
1043   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
1044   // Stop after a certain recursion depth.
1045   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1046
1047   // Test UTF8 matching.
1048   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1049   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1050   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1051   // Invalid sequences should be handled as a single invalid character.
1052   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1053   // If the pattern has invalid characters, it shouldn't match anything.
1054   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1055
1056   // Test UTF16 character matching.
1057   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1058                            UTF8ToUTF16("*.com")));
1059   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1060                            UTF8ToUTF16("He??o\\*1*")));
1061
1062   // This test verifies that consecutive wild cards are collapsed into 1
1063   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
1064   // recursion depth).
1065   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
1066                            UTF8ToUTF16("He********************************o")));
1067 }
1068
1069 TEST(StringUtilTest, LcpyTest) {
1070   // Test the normal case where we fit in our buffer.
1071   {
1072     char dst[10];
1073     wchar_t wdst[10];
1074     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1075     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1076     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1077     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1078   }
1079
1080   // Test dst_size == 0, nothing should be written to |dst| and we should
1081   // have the equivalent of strlen(src).
1082   {
1083     char dst[2] = {1, 2};
1084     wchar_t wdst[2] = {1, 2};
1085     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1086     EXPECT_EQ(1, dst[0]);
1087     EXPECT_EQ(2, dst[1]);
1088     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1089     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1090     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1091   }
1092
1093   // Test the case were we _just_ competely fit including the null.
1094   {
1095     char dst[8];
1096     wchar_t wdst[8];
1097     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1098     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1099     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1100     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1101   }
1102
1103   // Test the case were we we are one smaller, so we can't fit the null.
1104   {
1105     char dst[7];
1106     wchar_t wdst[7];
1107     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1108     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1109     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1110     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1111   }
1112
1113   // Test the case were we are just too small.
1114   {
1115     char dst[3];
1116     wchar_t wdst[3];
1117     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1118     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1119     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1120     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1121   }
1122 }
1123
1124 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1125   static const struct {
1126     const wchar_t* input;
1127     bool portable;
1128   } cases[] = {
1129     { L"%ls", true },
1130     { L"%s", false },
1131     { L"%S", false },
1132     { L"%lS", false },
1133     { L"Hello, %s", false },
1134     { L"%lc", true },
1135     { L"%c", false },
1136     { L"%C", false },
1137     { L"%lC", false },
1138     { L"%ls %s", false },
1139     { L"%s %ls", false },
1140     { L"%s %ls %s", false },
1141     { L"%f", true },
1142     { L"%f %F", false },
1143     { L"%d %D", false },
1144     { L"%o %O", false },
1145     { L"%u %U", false },
1146     { L"%f %d %o %u", true },
1147     { L"%-8d (%02.1f%)", true },
1148     { L"% 10s", false },
1149     { L"% 10ls", true }
1150   };
1151   for (size_t i = 0; i < arraysize(cases); ++i)
1152     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1153 }
1154
1155 TEST(StringUtilTest, RemoveChars) {
1156   const char kRemoveChars[] = "-/+*";
1157   std::string input = "A-+bc/d!*";
1158   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1159   EXPECT_EQ("Abcd!", input);
1160
1161   // No characters match kRemoveChars.
1162   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1163   EXPECT_EQ("Abcd!", input);
1164
1165   // Empty string.
1166   input.clear();
1167   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1168   EXPECT_EQ(std::string(), input);
1169 }
1170
1171 TEST(StringUtilTest, ReplaceChars) {
1172   struct TestData {
1173     const char* input;
1174     const char* replace_chars;
1175     const char* replace_with;
1176     const char* output;
1177     bool result;
1178   } cases[] = {
1179     { "", "", "", "", false },
1180     { "test", "", "", "test", false },
1181     { "test", "", "!", "test", false },
1182     { "test", "z", "!", "test", false },
1183     { "test", "e", "!", "t!st", true },
1184     { "test", "e", "!?", "t!?st", true },
1185     { "test", "ez", "!", "t!st", true },
1186     { "test", "zed", "!?", "t!?st", true },
1187     { "test", "t", "!?", "!?es!?", true },
1188     { "test", "et", "!>", "!>!>s!>", true },
1189     { "test", "zest", "!", "!!!!", true },
1190     { "test", "szt", "!", "!e!!", true },
1191     { "test", "t", "test", "testestest", true },
1192   };
1193
1194   for (size_t i = 0; i < arraysize(cases); ++i) {
1195     std::string output;
1196     bool result = ReplaceChars(cases[i].input,
1197                                cases[i].replace_chars,
1198                                cases[i].replace_with,
1199                                &output);
1200     EXPECT_EQ(cases[i].result, result);
1201     EXPECT_EQ(cases[i].output, output);
1202   }
1203 }
1204
1205 TEST(StringUtilTest, ContainsOnlyChars) {
1206   // Providing an empty list of characters should return false but for the empty
1207   // string.
1208   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1209   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1210
1211   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1212   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1213   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1214   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1215   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1216
1217   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1218   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1219   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1220   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1221   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1222   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1223
1224   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1225   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1226   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1227   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1228   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1229   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1230                                   kWhitespaceUTF16));
1231 }
1232
1233 class WriteIntoTest : public testing::Test {
1234  protected:
1235   static void WritesCorrectly(size_t num_chars) {
1236     std::string buffer;
1237     char kOriginal[] = "supercali";
1238     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1239     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1240     // string at the first \0.
1241     EXPECT_EQ(std::string(kOriginal,
1242                           std::min(num_chars, arraysize(kOriginal) - 1)),
1243               std::string(buffer.c_str()));
1244     EXPECT_EQ(num_chars, buffer.size());
1245   }
1246 };
1247
1248 TEST_F(WriteIntoTest, WriteInto) {
1249   // Validate that WriteInto reserves enough space and
1250   // sizes a string correctly.
1251   WritesCorrectly(1);
1252   WritesCorrectly(2);
1253   WritesCorrectly(5000);
1254
1255   // Validate that WriteInto doesn't modify other strings
1256   // when using a Copy-on-Write implementation.
1257   const char kLive[] = "live";
1258   const char kDead[] = "dead";
1259   const std::string live = kLive;
1260   std::string dead = live;
1261   strncpy(WriteInto(&dead, 5), kDead, 4);
1262   EXPECT_EQ(kDead, dead);
1263   EXPECT_EQ(4u, dead.size());
1264   EXPECT_EQ(kLive, live);
1265   EXPECT_EQ(4u, live.size());
1266 }
1267
1268 }  // namespace base