base/strings/string_util_unittest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <math.h>
   8 #include <stdarg.h>
   9
  10 #include <algorithm>
  11
  12 #include "base/basictypes.h"
  13 #include "base/strings/string16.h"
  14 #include "base/strings/utf_string_conversions.h"
  15 #include "testing/gmock/include/gmock/gmock.h"
  16 #include "testing/gtest/include/gtest/gtest.h"
  17
  18 using ::testing::ElementsAre;
  19
  20 namespace base {
  21
  22 static const struct trim_case {
  23   const wchar_t* input;
  24   const TrimPositions positions;
  25   const wchar_t* output;
  26   const TrimPositions return_value;
  27 } trim_cases[] = {
  28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  32   {L"", TRIM_ALL, L"", TRIM_NONE},
  33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  38 };
  39
  40 static const struct trim_case_ascii {
  41   const char* input;
  42   const TrimPositions positions;
  43   const char* output;
  44   const TrimPositions return_value;
  45 } trim_cases_ascii[] = {
  46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  50   {"", TRIM_ALL, "", TRIM_NONE},
  51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  53   {"  ", TRIM_ALL, "", TRIM_ALL},
  54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  55 };
  56
  57 namespace {
  58
  59 // Helper used to test TruncateUTF8ToByteSize.
  60 bool Truncated(const std::string& input,
  61                const size_t byte_size,
  62                std::string* output) {
  63     size_t prev = input.length();
  64     TruncateUTF8ToByteSize(input, byte_size, output);
  65     return prev != output->length();
  66 }
  67
  68 }  // namespace
  69
  70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  71   std::string output;
  72
  73   // Empty strings and invalid byte_size arguments
  74   EXPECT_FALSE(Truncated(std::string(), 0, &output));
  75   EXPECT_EQ(output, "");
  76   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  77   EXPECT_EQ(output, "");
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
  79   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  80
  81   // Testing the truncation of valid UTF8 correctly
  82   EXPECT_TRUE(Truncated("abc", 2, &output));
  83   EXPECT_EQ(output, "ab");
  84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  88   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  89   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  90
  91   {
  92     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  93     const std::string array_string(array, arraysize(array));
  94     EXPECT_TRUE(Truncated(array_string, 4, &output));
  95     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  96   }
  97
  98   {
  99     const char array[] = "\x00\xc2\x81\xc2\x81";
 100     const std::string array_string(array, arraysize(array));
 101     EXPECT_TRUE(Truncated(array_string, 4, &output));
 102     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 103   }
 104
 105   // Testing invalid UTF8
 106   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 107   EXPECT_EQ(output.compare(""), 0);
 108   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 109   EXPECT_EQ(output.compare(""), 0);
 110   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 111   EXPECT_EQ(output.compare(""), 0);
 112
 113   // Testing invalid UTF8 mixed with valid UTF8
 114   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 115   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 116   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 117   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 118   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 119               10, &output));
 120   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 121   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 122               10, &output));
 123   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 124   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 125   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 126
 127   // Overlong sequences
 128   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 129   EXPECT_EQ(output.compare(""), 0);
 130   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 131   EXPECT_EQ(output.compare(""), 0);
 132   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 133   EXPECT_EQ(output.compare(""), 0);
 134   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 135   EXPECT_EQ(output.compare(""), 0);
 136   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 137   EXPECT_EQ(output.compare(""), 0);
 138   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 139   EXPECT_EQ(output.compare(""), 0);
 140   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 141   EXPECT_EQ(output.compare(""), 0);
 142   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 143   EXPECT_EQ(output.compare(""), 0);
 144   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 145   EXPECT_EQ(output.compare(""), 0);
 146   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 147   EXPECT_EQ(output.compare(""), 0);
 148   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 149   EXPECT_EQ(output.compare(""), 0);
 150
 151   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 152   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 153   EXPECT_EQ(output.compare(""), 0);
 154   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 155   EXPECT_EQ(output.compare(""), 0);
 156   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 157   EXPECT_EQ(output.compare(""), 0);
 158
 159   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 160   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 161   EXPECT_EQ(output.compare(""), 0);
 162   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 163   EXPECT_EQ(output.compare(""), 0);
 164
 165   {
 166     const char array[] = "\x00\x00\xfe\xff";
 167     const std::string array_string(array, arraysize(array));
 168     EXPECT_TRUE(Truncated(array_string, 4, &output));
 169     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 170   }
 171
 172   // Variants on the previous test
 173   {
 174     const char array[] = "\xff\xfe\x00\x00";
 175     const std::string array_string(array, 4);
 176     EXPECT_FALSE(Truncated(array_string, 4, &output));
 177     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 178   }
 179   {
 180     const char array[] = "\xff\x00\x00\xfe";
 181     const std::string array_string(array, arraysize(array));
 182     EXPECT_TRUE(Truncated(array_string, 4, &output));
 183     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 184   }
 185
 186   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 187   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 188   EXPECT_EQ(output.compare(""), 0);
 189   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 190   EXPECT_EQ(output.compare(""), 0);
 191   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 192   EXPECT_EQ(output.compare(""), 0);
 193   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 194   EXPECT_EQ(output.compare(""), 0);
 195   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 196   EXPECT_EQ(output.compare(""), 0);
 197
 198   // Strings in legacy encodings that are valid in UTF-8, but
 199   // are invalid as UTF-8 in real data.
 200   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 201   EXPECT_EQ(output.compare("caf"), 0);
 202   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 203   EXPECT_EQ(output.compare(""), 0);
 204   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 205   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 206   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 207               &output));
 208   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 209
 210   // Testing using the same string as input and output.
 211   EXPECT_FALSE(Truncated(output, 4, &output));
 212   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 213   EXPECT_TRUE(Truncated(output, 3, &output));
 214   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 215
 216   // "abc" with U+201[CD] in windows-125[0-8]
 217   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 218   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 219
 220   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 221   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 222   EXPECT_EQ(output.compare(""), 0);
 223
 224   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 225   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 226   EXPECT_EQ(output.compare(""), 0);
 227 }
 228
 229 TEST(StringUtilTest, TrimWhitespace) {
 230   string16 output;  // Allow contents to carry over to next testcase
 231   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 232     const trim_case& value = trim_cases[i];
 233     EXPECT_EQ(value.return_value,
 234               TrimWhitespace(WideToUTF16(value.input), value.positions,
 235                              &output));
 236     EXPECT_EQ(WideToUTF16(value.output), output);
 237   }
 238
 239   // Test that TrimWhitespace() can take the same string for input and output
 240   output = ASCIIToUTF16("  This is a test \r\n");
 241   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 242   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 243
 244   // Once more, but with a string of whitespace
 245   output = ASCIIToUTF16("  \r\n");
 246   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 247   EXPECT_EQ(string16(), output);
 248
 249   std::string output_ascii;
 250   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 251     const trim_case_ascii& value = trim_cases_ascii[i];
 252     EXPECT_EQ(value.return_value,
 253               TrimWhitespace(value.input, value.positions, &output_ascii));
 254     EXPECT_EQ(value.output, output_ascii);
 255   }
 256 }
 257
 258 static const struct collapse_case {
 259   const wchar_t* input;
 260   const bool trim;
 261   const wchar_t* output;
 262 } collapse_cases[] = {
 263   {L" Google Video ", false, L"Google Video"},
 264   {L"Google Video", false, L"Google Video"},
 265   {L"", false, L""},
 266   {L"  ", false, L""},
 267   {L"\t\rTest String\n", false, L"Test String"},
 268   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 269   {L"    Test     \n  \t String    ", false, L"Test String"},
 270   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 271   {L"   Test String", false, L"Test String"},
 272   {L"Test String    ", false, L"Test String"},
 273   {L"Test String", false, L"Test String"},
 274   {L"", true, L""},
 275   {L"\n", true, L""},
 276   {L"  \r  ", true, L""},
 277   {L"\nFoo", true, L"Foo"},
 278   {L"\r  Foo  ", true, L"Foo"},
 279   {L" Foo bar ", true, L"Foo bar"},
 280   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 281   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 282 };
 283
 284 TEST(StringUtilTest, CollapseWhitespace) {
 285   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 286     const collapse_case& value = collapse_cases[i];
 287     EXPECT_EQ(WideToUTF16(value.output),
 288               CollapseWhitespace(WideToUTF16(value.input), value.trim));
 289   }
 290 }
 291
 292 static const struct collapse_case_ascii {
 293   const char* input;
 294   const bool trim;
 295   const char* output;
 296 } collapse_cases_ascii[] = {
 297   {" Google Video ", false, "Google Video"},
 298   {"Google Video", false, "Google Video"},
 299   {"", false, ""},
 300   {"  ", false, ""},
 301   {"\t\rTest String\n", false, "Test String"},
 302   {"    Test     \n  \t String    ", false, "Test String"},
 303   {"   Test String", false, "Test String"},
 304   {"Test String    ", false, "Test String"},
 305   {"Test String", false, "Test String"},
 306   {"", true, ""},
 307   {"\n", true, ""},
 308   {"  \r  ", true, ""},
 309   {"\nFoo", true, "Foo"},
 310   {"\r  Foo  ", true, "Foo"},
 311   {" Foo bar ", true, "Foo bar"},
 312   {"  \tFoo  bar  \n", true, "Foo bar"},
 313   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 314 };
 315
 316 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 317   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 318     const collapse_case_ascii& value = collapse_cases_ascii[i];
 319     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 320   }
 321 }
 322
 323 TEST(StringUtilTest, IsStringUTF8) {
 324   EXPECT_TRUE(IsStringUTF8("abc"));
 325   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 326   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 327   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 328   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 329   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 330
 331   // surrogate code points
 332   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 333   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 334   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 335
 336   // overlong sequences
 337   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 338   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 339   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 340   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 341   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 342   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 343   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 345   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 346   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 347   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 348
 349   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 350   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 351   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 352   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 353
 354   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 355   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 356   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 357   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 358   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 359
 360   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 361   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 362   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 363   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 364   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 365   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 366   // Strings in legacy encodings. We can certainly make up strings
 367   // in a legacy encoding that are valid in UTF-8, but in real data,
 368   // most of them are invalid as UTF-8.
 369   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 370   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 371   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 372   // "abc" with U+201[CD] in windows-125[0-8]
 373   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 374   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 375   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 376   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 377   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 378
 379   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 380   // representation, and the second uses a 2-byte sequence. The second version
 381   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 382   // given codepoint must be used.
 383   static const char kEmbeddedNull[] = "embedded\0null";
 384   EXPECT_TRUE(IsStringUTF8(
 385       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 386   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 387 }
 388
 389 TEST(StringUtilTest, IsStringASCII) {
 390   static char char_ascii[] =
 391       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
 392   static char16 char16_ascii[] = {
 393       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
 394       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
 395       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
 396
 397   // Test a variety of the fragment start positions and lengths in order to make
 398   // sure that bit masking in IsStringASCII works correctly.
 399   // Also, test that a non-ASCII character will be detected regardless of its
 400   // position inside the string.
 401   {
 402     const size_t string_length = arraysize(char_ascii) - 1;
 403     for (size_t offset = 0; offset < 8; ++offset) {
 404       for (size_t len = 0, max_len = string_length - offset; len < max_len;
 405            ++len) {
 406         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
 407         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
 408           char_ascii[char_pos] |= '\x80';
 409           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
 410           char_ascii[char_pos] &= ~'\x80';
 411         }
 412       }
 413     }
 414   }
 415
 416   {
 417     const size_t string_length = arraysize(char16_ascii) - 1;
 418     for (size_t offset = 0; offset < 4; ++offset) {
 419       for (size_t len = 0, max_len = string_length - offset; len < max_len;
 420            ++len) {
 421         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 422         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
 423           char16_ascii[char_pos] |= 0x80;
 424           EXPECT_FALSE(
 425               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 426           char16_ascii[char_pos] &= ~0x80;
 427           // Also test when the upper half is non-zero.
 428           char16_ascii[char_pos] |= 0x100;
 429           EXPECT_FALSE(
 430               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
 431           char16_ascii[char_pos] &= ~0x100;
 432         }
 433       }
 434     }
 435   }
 436 }
 437
 438 TEST(StringUtilTest, ConvertASCII) {
 439   static const char* char_cases[] = {
 440     "Google Video",
 441     "Hello, world\n",
 442     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 443   };
 444
 445   static const wchar_t* const wchar_cases[] = {
 446     L"Google Video",
 447     L"Hello, world\n",
 448     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 449   };
 450
 451   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 452     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 453     string16 utf16 = ASCIIToUTF16(char_cases[i]);
 454     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
 455
 456     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
 457     EXPECT_EQ(char_cases[i], ascii);
 458   }
 459
 460   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 461
 462   // Convert empty strings.
 463   string16 empty16;
 464   std::string empty;
 465   EXPECT_EQ(empty, UTF16ToASCII(empty16));
 466   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
 467
 468   // Convert strings with an embedded NUL character.
 469   const char chars_with_nul[] = "test\0string";
 470   const int length_with_nul = arraysize(chars_with_nul) - 1;
 471   std::string string_with_nul(chars_with_nul, length_with_nul);
 472   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 473   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 474             wide_with_nul.length());
 475   std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
 476   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 477             narrow_with_nul.length());
 478   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 479 }
 480
 481 TEST(StringUtilTest, ToUpperASCII) {
 482   EXPECT_EQ('C', ToUpperASCII('C'));
 483   EXPECT_EQ('C', ToUpperASCII('c'));
 484   EXPECT_EQ('2', ToUpperASCII('2'));
 485
 486   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 487   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 488   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 489
 490   std::string in_place_a("Cc2");
 491   StringToUpperASCII(&in_place_a);
 492   EXPECT_EQ("CC2", in_place_a);
 493
 494   std::wstring in_place_w(L"Cc2");
 495   StringToUpperASCII(&in_place_w);
 496   EXPECT_EQ(L"CC2", in_place_w);
 497
 498   std::string original_a("Cc2");
 499   std::string upper_a = StringToUpperASCII(original_a);
 500   EXPECT_EQ("CC2", upper_a);
 501
 502   std::wstring original_w(L"Cc2");
 503   std::wstring upper_w = StringToUpperASCII(original_w);
 504   EXPECT_EQ(L"CC2", upper_w);
 505 }
 506
 507 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 508   static const struct {
 509     const char*    src_a;
 510     const char*    dst;
 511   } lowercase_cases[] = {
 512     { "FoO", "foo" },
 513     { "foo", "foo" },
 514     { "FOO", "foo" },
 515   };
 516
 517   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 518     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
 519                                      lowercase_cases[i].dst));
 520     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 521                                      lowercase_cases[i].dst));
 522   }
 523 }
 524
 525 TEST(StringUtilTest, FormatBytesUnlocalized) {
 526   static const struct {
 527     int64 bytes;
 528     const char* expected;
 529   } cases[] = {
 530     // Expected behavior: we show one post-decimal digit when we have
 531     // under two pre-decimal digits, except in cases where it makes no
 532     // sense (zero or bytes).
 533     // Since we switch units once we cross the 1000 mark, this keeps
 534     // the display of file sizes or bytes consistently around three
 535     // digits.
 536     {0, "0 B"},
 537     {512, "512 B"},
 538     {1024*1024, "1.0 MB"},
 539     {1024*1024*1024, "1.0 GB"},
 540     {10LL*1024*1024*1024, "10.0 GB"},
 541     {99LL*1024*1024*1024, "99.0 GB"},
 542     {105LL*1024*1024*1024, "105 GB"},
 543     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 544     {~(1LL<<63), "8192 PB"},
 545
 546     {99*1024 + 103, "99.1 kB"},
 547     {1024*1024 + 103, "1.0 MB"},
 548     {1024*1024 + 205 * 1024, "1.2 MB"},
 549     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 550     {10LL*1024*1024*1024, "10.0 GB"},
 551     {100LL*1024*1024*1024, "100 GB"},
 552   };
 553
 554   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 555     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 556               FormatBytesUnlocalized(cases[i].bytes));
 557   }
 558 }
 559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 560   static const struct {
 561     const char* str;
 562     string16::size_type start_offset;
 563     const char* find_this;
 564     const char* replace_with;
 565     const char* expected;
 566   } cases[] = {
 567     {"aaa", 0, "a", "b", "bbb"},
 568     {"abb", 0, "ab", "a", "ab"},
 569     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 570     {"Not found", 0, "x", "0", "Not found"},
 571     {"Not found again", 5, "x", "0", "Not found again"},
 572     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 573      "Four score and seven years agoMakingFour score and seven years agoit"
 574      "Four score and seven years agomuchFour score and seven years agolonger"
 575      "Four score and seven years ago"},
 576     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 577     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 578     {"abababab", 2, "ab", "c", "abccc"},
 579   };
 580
 581   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 582     string16 str = ASCIIToUTF16(cases[i].str);
 583     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 584                                  ASCIIToUTF16(cases[i].find_this),
 585                                  ASCIIToUTF16(cases[i].replace_with));
 586     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 587   }
 588 }
 589
 590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 591   static const struct {
 592     const char* str;
 593     string16::size_type start_offset;
 594     const char* find_this;
 595     const char* replace_with;
 596     const char* expected;
 597   } cases[] = {
 598     {"aaa", 0, "a", "b", "baa"},
 599     {"abb", 0, "ab", "a", "ab"},
 600     {"Removing some substrings inging", 0, "ing", "",
 601       "Remov some substrings inging"},
 602     {"Not found", 0, "x", "0", "Not found"},
 603     {"Not found again", 5, "x", "0", "Not found again"},
 604     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 605      "Four score and seven years agoMaking it much longer "},
 606     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 607     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 608     {"abababab", 2, "ab", "c", "abcabab"},
 609   };
 610
 611   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 612     string16 str = ASCIIToUTF16(cases[i].str);
 613     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 614                                      ASCIIToUTF16(cases[i].find_this),
 615                                      ASCIIToUTF16(cases[i].replace_with));
 616     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 617   }
 618 }
 619
 620 TEST(StringUtilTest, HexDigitToInt) {
 621   EXPECT_EQ(0, HexDigitToInt('0'));
 622   EXPECT_EQ(1, HexDigitToInt('1'));
 623   EXPECT_EQ(2, HexDigitToInt('2'));
 624   EXPECT_EQ(3, HexDigitToInt('3'));
 625   EXPECT_EQ(4, HexDigitToInt('4'));
 626   EXPECT_EQ(5, HexDigitToInt('5'));
 627   EXPECT_EQ(6, HexDigitToInt('6'));
 628   EXPECT_EQ(7, HexDigitToInt('7'));
 629   EXPECT_EQ(8, HexDigitToInt('8'));
 630   EXPECT_EQ(9, HexDigitToInt('9'));
 631   EXPECT_EQ(10, HexDigitToInt('A'));
 632   EXPECT_EQ(11, HexDigitToInt('B'));
 633   EXPECT_EQ(12, HexDigitToInt('C'));
 634   EXPECT_EQ(13, HexDigitToInt('D'));
 635   EXPECT_EQ(14, HexDigitToInt('E'));
 636   EXPECT_EQ(15, HexDigitToInt('F'));
 637
 638   // Verify the lower case as well.
 639   EXPECT_EQ(10, HexDigitToInt('a'));
 640   EXPECT_EQ(11, HexDigitToInt('b'));
 641   EXPECT_EQ(12, HexDigitToInt('c'));
 642   EXPECT_EQ(13, HexDigitToInt('d'));
 643   EXPECT_EQ(14, HexDigitToInt('e'));
 644   EXPECT_EQ(15, HexDigitToInt('f'));
 645 }
 646
 647 // This checks where we can use the assignment operator for a va_list. We need
 648 // a way to do this since Visual C doesn't support va_copy, but assignment on
 649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 650 // capability.
 651 static void VariableArgsFunc(const char* format, ...) {
 652   va_list org;
 653   va_start(org, format);
 654
 655   va_list dup;
 656   GG_VA_COPY(dup, org);
 657   int i1 = va_arg(org, int);
 658   int j1 = va_arg(org, int);
 659   char* s1 = va_arg(org, char*);
 660   double d1 = va_arg(org, double);
 661   va_end(org);
 662
 663   int i2 = va_arg(dup, int);
 664   int j2 = va_arg(dup, int);
 665   char* s2 = va_arg(dup, char*);
 666   double d2 = va_arg(dup, double);
 667
 668   EXPECT_EQ(i1, i2);
 669   EXPECT_EQ(j1, j2);
 670   EXPECT_STREQ(s1, s2);
 671   EXPECT_EQ(d1, d2);
 672
 673   va_end(dup);
 674 }
 675
 676 TEST(StringUtilTest, VAList) {
 677   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 678 }
 679
 680 // Test for Tokenize
 681 template <typename STR>
 682 void TokenizeTest() {
 683   std::vector<STR> r;
 684   size_t size;
 685
 686   size = Tokenize(STR("This is a string"), STR(" "), &r);
 687   EXPECT_EQ(4U, size);
 688   ASSERT_EQ(4U, r.size());
 689   EXPECT_EQ(r[0], STR("This"));
 690   EXPECT_EQ(r[1], STR("is"));
 691   EXPECT_EQ(r[2], STR("a"));
 692   EXPECT_EQ(r[3], STR("string"));
 693   r.clear();
 694
 695   size = Tokenize(STR("one,two,three"), STR(","), &r);
 696   EXPECT_EQ(3U, size);
 697   ASSERT_EQ(3U, r.size());
 698   EXPECT_EQ(r[0], STR("one"));
 699   EXPECT_EQ(r[1], STR("two"));
 700   EXPECT_EQ(r[2], STR("three"));
 701   r.clear();
 702
 703   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 704   EXPECT_EQ(3U, size);
 705   ASSERT_EQ(3U, r.size());
 706   EXPECT_EQ(r[0], STR("one"));
 707   EXPECT_EQ(r[1], STR("two"));
 708   EXPECT_EQ(r[2], STR("three;four"));
 709   r.clear();
 710
 711   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 712   EXPECT_EQ(4U, size);
 713   ASSERT_EQ(4U, r.size());
 714   EXPECT_EQ(r[0], STR("one"));
 715   EXPECT_EQ(r[1], STR("two"));
 716   EXPECT_EQ(r[2], STR("three"));
 717   EXPECT_EQ(r[3], STR("four"));
 718   r.clear();
 719
 720   size = Tokenize(STR("one, two, three"), STR(","), &r);
 721   EXPECT_EQ(3U, size);
 722   ASSERT_EQ(3U, r.size());
 723   EXPECT_EQ(r[0], STR("one"));
 724   EXPECT_EQ(r[1], STR(" two"));
 725   EXPECT_EQ(r[2], STR(" three"));
 726   r.clear();
 727
 728   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 729   EXPECT_EQ(4U, size);
 730   ASSERT_EQ(4U, r.size());
 731   EXPECT_EQ(r[0], STR("one"));
 732   EXPECT_EQ(r[1], STR(" two"));
 733   EXPECT_EQ(r[2], STR(" three"));
 734   EXPECT_EQ(r[3], STR(" "));
 735   r.clear();
 736
 737   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 738   EXPECT_EQ(3U, size);
 739   ASSERT_EQ(3U, r.size());
 740   EXPECT_EQ(r[0], STR("one"));
 741   EXPECT_EQ(r[1], STR(" two"));
 742   EXPECT_EQ(r[2], STR(" three"));
 743   r.clear();
 744
 745   size = Tokenize(STR(), STR(","), &r);
 746   EXPECT_EQ(0U, size);
 747   ASSERT_EQ(0U, r.size());
 748   r.clear();
 749
 750   size = Tokenize(STR(","), STR(","), &r);
 751   EXPECT_EQ(0U, size);
 752   ASSERT_EQ(0U, r.size());
 753   r.clear();
 754
 755   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 756   EXPECT_EQ(0U, size);
 757   ASSERT_EQ(0U, r.size());
 758   r.clear();
 759
 760   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 761   EXPECT_EQ(1U, size);
 762   ASSERT_EQ(1U, r.size());
 763   EXPECT_EQ(r[0], STR("a"));
 764   r.clear();
 765
 766   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 767   EXPECT_EQ(2U, size);
 768   ASSERT_EQ(2U, r.size());
 769   EXPECT_EQ(r[0], STR("\ta\t"));
 770   EXPECT_EQ(r[1], STR("b\tcc"));
 771   r.clear();
 772 }
 773
 774 TEST(StringUtilTest, TokenizeStdString) {
 775   TokenizeTest<std::string>();
 776 }
 777
 778 TEST(StringUtilTest, TokenizeStringPiece) {
 779   TokenizeTest<base::StringPiece>();
 780 }
 781
 782 // Test for JoinString
 783 TEST(StringUtilTest, JoinString) {
 784   std::vector<std::string> in;
 785   EXPECT_EQ("", JoinString(in, ','));
 786
 787   in.push_back("a");
 788   EXPECT_EQ("a", JoinString(in, ','));
 789
 790   in.push_back("b");
 791   in.push_back("c");
 792   EXPECT_EQ("a,b,c", JoinString(in, ','));
 793
 794   in.push_back(std::string());
 795   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 796   in.push_back(" ");
 797   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 798 }
 799
 800 // Test for JoinString overloaded with std::string separator
 801 TEST(StringUtilTest, JoinStringWithString) {
 802   std::string separator(", ");
 803   std::vector<std::string> parts;
 804   EXPECT_EQ(std::string(), JoinString(parts, separator));
 805
 806   parts.push_back("a");
 807   EXPECT_EQ("a", JoinString(parts, separator));
 808
 809   parts.push_back("b");
 810   parts.push_back("c");
 811   EXPECT_EQ("a, b, c", JoinString(parts, separator));
 812
 813   parts.push_back(std::string());
 814   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
 815   parts.push_back(" ");
 816   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
 817 }
 818
 819 // Test for JoinString overloaded with string16 separator
 820 TEST(StringUtilTest, JoinStringWithString16) {
 821   string16 separator = ASCIIToUTF16(", ");
 822   std::vector<string16> parts;
 823   EXPECT_EQ(string16(), JoinString(parts, separator));
 824
 825   parts.push_back(ASCIIToUTF16("a"));
 826   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
 827
 828   parts.push_back(ASCIIToUTF16("b"));
 829   parts.push_back(ASCIIToUTF16("c"));
 830   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
 831
 832   parts.push_back(ASCIIToUTF16(""));
 833   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
 834   parts.push_back(ASCIIToUTF16(" "));
 835   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
 836 }
 837
 838 TEST(StringUtilTest, StartsWith) {
 839   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 840   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 841   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 842   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 843   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 844   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 845   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
 846   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
 847   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
 848   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
 849
 850   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 851                          ASCIIToUTF16("javascript"), true));
 852   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 853                           ASCIIToUTF16("javascript"), true));
 854   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
 855                          ASCIIToUTF16("javascript"), false));
 856   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
 857                          ASCIIToUTF16("javascript"), false));
 858   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 859                           ASCIIToUTF16("javascript"), true));
 860   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
 861                           ASCIIToUTF16("javascript"), false));
 862   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
 863   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
 864   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
 865   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
 866 }
 867
 868 TEST(StringUtilTest, EndsWith) {
 869   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 870                        ASCIIToUTF16(".plugin"), true));
 871   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 872                         ASCIIToUTF16(".plugin"), true));
 873   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
 874                        ASCIIToUTF16(".plugin"), false));
 875   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
 876                        ASCIIToUTF16(".plugin"), false));
 877   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
 878   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
 879   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 880                         ASCIIToUTF16(".plugin"), true));
 881   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
 882                         ASCIIToUTF16(".plugin"), false));
 883   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
 884   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
 885   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
 886   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
 887   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
 888                        ASCIIToUTF16(".plugin"), false));
 889   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
 890   EXPECT_TRUE(EndsWith(string16(), string16(), false));
 891   EXPECT_TRUE(EndsWith(string16(), string16(), true));
 892 }
 893
 894 TEST(StringUtilTest, GetStringFWithOffsets) {
 895   std::vector<string16> subst;
 896   subst.push_back(ASCIIToUTF16("1"));
 897   subst.push_back(ASCIIToUTF16("2"));
 898   std::vector<size_t> offsets;
 899
 900   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 901                             subst,
 902                             &offsets);
 903   EXPECT_EQ(2U, offsets.size());
 904   EXPECT_EQ(7U, offsets[0]);
 905   EXPECT_EQ(25U, offsets[1]);
 906   offsets.clear();
 907
 908   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 909                             subst,
 910                             &offsets);
 911   EXPECT_EQ(2U, offsets.size());
 912   EXPECT_EQ(25U, offsets[0]);
 913   EXPECT_EQ(7U, offsets[1]);
 914   offsets.clear();
 915 }
 916
 917 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 918   // Test whether replacestringplaceholders works as expected when there
 919   // are fewer inputs than outputs.
 920   std::vector<string16> subst;
 921   subst.push_back(ASCIIToUTF16("9a"));
 922   subst.push_back(ASCIIToUTF16("8b"));
 923   subst.push_back(ASCIIToUTF16("7c"));
 924
 925   string16 formatted =
 926       ReplaceStringPlaceholders(
 927           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 928
 929   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 930 }
 931
 932 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 933   std::vector<string16> subst;
 934   subst.push_back(ASCIIToUTF16("9a"));
 935   subst.push_back(ASCIIToUTF16("8b"));
 936   subst.push_back(ASCIIToUTF16("7c"));
 937   subst.push_back(ASCIIToUTF16("6d"));
 938   subst.push_back(ASCIIToUTF16("5e"));
 939   subst.push_back(ASCIIToUTF16("4f"));
 940   subst.push_back(ASCIIToUTF16("3g"));
 941   subst.push_back(ASCIIToUTF16("2h"));
 942   subst.push_back(ASCIIToUTF16("1i"));
 943
 944   string16 formatted =
 945       ReplaceStringPlaceholders(
 946           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 947
 948   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 949 }
 950
 951 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 952   std::vector<string16> subst;
 953   subst.push_back(ASCIIToUTF16("9a"));
 954   subst.push_back(ASCIIToUTF16("8b"));
 955   subst.push_back(ASCIIToUTF16("7c"));
 956   subst.push_back(ASCIIToUTF16("6d"));
 957   subst.push_back(ASCIIToUTF16("5e"));
 958   subst.push_back(ASCIIToUTF16("4f"));
 959   subst.push_back(ASCIIToUTF16("3g"));
 960   subst.push_back(ASCIIToUTF16("2h"));
 961   subst.push_back(ASCIIToUTF16("1i"));
 962   subst.push_back(ASCIIToUTF16("0j"));
 963   subst.push_back(ASCIIToUTF16("-1k"));
 964   subst.push_back(ASCIIToUTF16("-2l"));
 965   subst.push_back(ASCIIToUTF16("-3m"));
 966   subst.push_back(ASCIIToUTF16("-4n"));
 967
 968   string16 formatted =
 969       ReplaceStringPlaceholders(
 970           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 971                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 972
 973   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 974                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
 975 }
 976
 977 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 978   std::vector<std::string> subst;
 979   subst.push_back("9a");
 980   subst.push_back("8b");
 981   subst.push_back("7c");
 982   subst.push_back("6d");
 983   subst.push_back("5e");
 984   subst.push_back("4f");
 985   subst.push_back("3g");
 986   subst.push_back("2h");
 987   subst.push_back("1i");
 988
 989   std::string formatted =
 990       ReplaceStringPlaceholders(
 991           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 992
 993   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 994 }
 995
 996 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 997   std::vector<std::string> subst;
 998   subst.push_back("a");
 999   subst.push_back("b");
1000   subst.push_back("c");
1001   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
1002             "$1 $$2 $$$3");
1003 }
1004
1005 TEST(StringUtilTest, MatchPatternTest) {
1006   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
1007   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
1008   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
1009   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
1010   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
1011   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
1012   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
1013   EXPECT_FALSE(MatchPattern("", "*.*"));
1014   EXPECT_TRUE(MatchPattern("", "*"));
1015   EXPECT_TRUE(MatchPattern("", "?"));
1016   EXPECT_TRUE(MatchPattern("", ""));
1017   EXPECT_FALSE(MatchPattern("Hello", ""));
1018   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
1019   // Stop after a certain recursion depth.
1020   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1021
1022   // Test UTF8 matching.
1023   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1024   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1025   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1026   // Invalid sequences should be handled as a single invalid character.
1027   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1028   // If the pattern has invalid characters, it shouldn't match anything.
1029   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1030
1031   // Test UTF16 character matching.
1032   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1033                            UTF8ToUTF16("*.com")));
1034   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1035                            UTF8ToUTF16("He??o\\*1*")));
1036
1037   // This test verifies that consecutive wild cards are collapsed into 1
1038   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
1039   // recursion depth).
1040   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
1041                            UTF8ToUTF16("He********************************o")));
1042 }
1043
1044 TEST(StringUtilTest, LcpyTest) {
1045   // Test the normal case where we fit in our buffer.
1046   {
1047     char dst[10];
1048     wchar_t wdst[10];
1049     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1050     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1051     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1052     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1053   }
1054
1055   // Test dst_size == 0, nothing should be written to |dst| and we should
1056   // have the equivalent of strlen(src).
1057   {
1058     char dst[2] = {1, 2};
1059     wchar_t wdst[2] = {1, 2};
1060     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1061     EXPECT_EQ(1, dst[0]);
1062     EXPECT_EQ(2, dst[1]);
1063     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1064     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1065     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1066   }
1067
1068   // Test the case were we _just_ competely fit including the null.
1069   {
1070     char dst[8];
1071     wchar_t wdst[8];
1072     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1073     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1074     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1075     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1076   }
1077
1078   // Test the case were we we are one smaller, so we can't fit the null.
1079   {
1080     char dst[7];
1081     wchar_t wdst[7];
1082     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1083     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1084     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1085     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1086   }
1087
1088   // Test the case were we are just too small.
1089   {
1090     char dst[3];
1091     wchar_t wdst[3];
1092     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1093     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1094     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1095     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1096   }
1097 }
1098
1099 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1100   static const struct {
1101     const wchar_t* input;
1102     bool portable;
1103   } cases[] = {
1104     { L"%ls", true },
1105     { L"%s", false },
1106     { L"%S", false },
1107     { L"%lS", false },
1108     { L"Hello, %s", false },
1109     { L"%lc", true },
1110     { L"%c", false },
1111     { L"%C", false },
1112     { L"%lC", false },
1113     { L"%ls %s", false },
1114     { L"%s %ls", false },
1115     { L"%s %ls %s", false },
1116     { L"%f", true },
1117     { L"%f %F", false },
1118     { L"%d %D", false },
1119     { L"%o %O", false },
1120     { L"%u %U", false },
1121     { L"%f %d %o %u", true },
1122     { L"%-8d (%02.1f%)", true },
1123     { L"% 10s", false },
1124     { L"% 10ls", true }
1125   };
1126   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1127     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1128 }
1129
1130 TEST(StringUtilTest, RemoveChars) {
1131   const char* kRemoveChars = "-/+*";
1132   std::string input = "A-+bc/d!*";
1133   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1134   EXPECT_EQ("Abcd!", input);
1135
1136   // No characters match kRemoveChars.
1137   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1138   EXPECT_EQ("Abcd!", input);
1139
1140   // Empty string.
1141   input.clear();
1142   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1143   EXPECT_EQ(std::string(), input);
1144 }
1145
1146 TEST(StringUtilTest, ReplaceChars) {
1147   struct TestData {
1148     const char* input;
1149     const char* replace_chars;
1150     const char* replace_with;
1151     const char* output;
1152     bool result;
1153   } cases[] = {
1154     { "", "", "", "", false },
1155     { "test", "", "", "test", false },
1156     { "test", "", "!", "test", false },
1157     { "test", "z", "!", "test", false },
1158     { "test", "e", "!", "t!st", true },
1159     { "test", "e", "!?", "t!?st", true },
1160     { "test", "ez", "!", "t!st", true },
1161     { "test", "zed", "!?", "t!?st", true },
1162     { "test", "t", "!?", "!?es!?", true },
1163     { "test", "et", "!>", "!>!>s!>", true },
1164     { "test", "zest", "!", "!!!!", true },
1165     { "test", "szt", "!", "!e!!", true },
1166     { "test", "t", "test", "testestest", true },
1167   };
1168
1169   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1170     std::string output;
1171     bool result = ReplaceChars(cases[i].input,
1172                                cases[i].replace_chars,
1173                                cases[i].replace_with,
1174                                &output);
1175     EXPECT_EQ(cases[i].result, result);
1176     EXPECT_EQ(cases[i].output, output);
1177   }
1178 }
1179
1180 TEST(StringUtilTest, ContainsOnlyChars) {
1181   // Providing an empty list of characters should return false but for the empty
1182   // string.
1183   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1184   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1185
1186   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1187   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1188   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1189   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1190   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1191
1192   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1193   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1194   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1195   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1196   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1197   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1198
1199   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1200   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1201   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1202   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1203   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1204   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1205                                   kWhitespaceUTF16));
1206 }
1207
1208 class WriteIntoTest : public testing::Test {
1209  protected:
1210   static void WritesCorrectly(size_t num_chars) {
1211     std::string buffer;
1212     char kOriginal[] = "supercali";
1213     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1214     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1215     // string at the first \0.
1216     EXPECT_EQ(std::string(kOriginal,
1217                           std::min(num_chars, arraysize(kOriginal) - 1)),
1218               std::string(buffer.c_str()));
1219     EXPECT_EQ(num_chars, buffer.size());
1220   }
1221 };
1222
1223 TEST_F(WriteIntoTest, WriteInto) {
1224   // Validate that WriteInto reserves enough space and
1225   // sizes a string correctly.
1226   WritesCorrectly(1);
1227   WritesCorrectly(2);
1228   WritesCorrectly(5000);
1229
1230   // Validate that WriteInto doesn't modify other strings
1231   // when using a Copy-on-Write implementation.
1232   const char kLive[] = "live";
1233   const char kDead[] = "dead";
1234   const std::string live = kLive;
1235   std::string dead = live;
1236   strncpy(WriteInto(&dead, 5), kDead, 4);
1237   EXPECT_EQ(kDead, dead);
1238   EXPECT_EQ(4u, dead.size());
1239   EXPECT_EQ(kLive, live);
1240   EXPECT_EQ(4u, live.size());
1241 }
1242
1243 }  // namespace base