base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/string_split.h"
  25 #include "base/strings/utf_string_conversion_utils.h"
  26 #include "base/strings/utf_string_conversions.h"
  27 #include "base/third_party/icu/icu_utf.h"
  28 #include "build/build_config.h"
  29
  30 namespace base {
  31
  32 namespace {
  33
  34 // Force the singleton used by EmptyString[16] to be a unique type. This
  35 // prevents other code that might accidentally use Singleton<string> from
  36 // getting our internal one.
  37 struct EmptyStrings {
  38   EmptyStrings() {}
  39   const std::string s;
  40   const string16 s16;
  41
  42   static EmptyStrings* GetInstance() {
  43     return Singleton<EmptyStrings>::get();
  44   }
  45 };
  46
  47 // Used by ReplaceStringPlaceholders to track the position in the string of
  48 // replaced parameters.
  49 struct ReplacementOffset {
  50   ReplacementOffset(uintptr_t parameter, size_t offset)
  51       : parameter(parameter),
  52         offset(offset) {}
  53
  54   // Index of the parameter.
  55   uintptr_t parameter;
  56
  57   // Starting position in the string.
  58   size_t offset;
  59 };
  60
  61 static bool CompareParameter(const ReplacementOffset& elem1,
  62                              const ReplacementOffset& elem2) {
  63   return elem1.parameter < elem2.parameter;
  64 }
  65
  66 // Assuming that a pointer is the size of a "machine word", then
  67 // uintptr_t is an integer type that is also a machine word.
  68 typedef uintptr_t MachineWord;
  69 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
  70
  71 inline bool IsAlignedToMachineWord(const void* pointer) {
  72   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
  73 }
  74
  75 template<typename T> inline T* AlignToMachineWord(T* pointer) {
  76   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
  77                               ~kMachineWordAlignmentMask);
  78 }
  79
  80 template<size_t size, typename CharacterType> struct NonASCIIMask;
  81 template<> struct NonASCIIMask<4, char16> {
  82     static inline uint32_t value() { return 0xFF80FF80U; }
  83 };
  84 template<> struct NonASCIIMask<4, char> {
  85     static inline uint32_t value() { return 0x80808080U; }
  86 };
  87 template<> struct NonASCIIMask<8, char16> {
  88     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
  89 };
  90 template<> struct NonASCIIMask<8, char> {
  91     static inline uint64_t value() { return 0x8080808080808080ULL; }
  92 };
  93 #if defined(WCHAR_T_IS_UTF32)
  94 template<> struct NonASCIIMask<4, wchar_t> {
  95     static inline uint32_t value() { return 0xFFFFFF80U; }
  96 };
  97 template<> struct NonASCIIMask<8, wchar_t> {
  98     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
  99 };
 100 #endif  // WCHAR_T_IS_UTF32
 101
 102 // DO NOT USE. http://crbug.com/24917
 103 //
 104 // tolower() will given incorrect results for non-ASCII characters. Use the
 105 // ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here
 106 // for backwards-compat for StartsWith until such calls can be updated.
 107 struct CaseInsensitiveCompareDeprecated {
 108  public:
 109   bool operator()(char16 x, char16 y) const {
 110     return tolower(x) == tolower(y);
 111   }
 112 };
 113
 114 }  // namespace
 115
 116 bool IsWprintfFormatPortable(const wchar_t* format) {
 117   for (const wchar_t* position = format; *position != '\0'; ++position) {
 118     if (*position == '%') {
 119       bool in_specification = true;
 120       bool modifier_l = false;
 121       while (in_specification) {
 122         // Eat up characters until reaching a known specifier.
 123         if (*++position == '\0') {
 124           // The format string ended in the middle of a specification.  Call
 125           // it portable because no unportable specifications were found.  The
 126           // string is equally broken on all platforms.
 127           return true;
 128         }
 129
 130         if (*position == 'l') {
 131           // 'l' is the only thing that can save the 's' and 'c' specifiers.
 132           modifier_l = true;
 133         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
 134                    *position == 'S' || *position == 'C' || *position == 'F' ||
 135                    *position == 'D' || *position == 'O' || *position == 'U') {
 136           // Not portable.
 137           return false;
 138         }
 139
 140         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
 141           // Portable, keep scanning the rest of the format string.
 142           in_specification = false;
 143         }
 144       }
 145     }
 146   }
 147
 148   return true;
 149 }
 150
 151 namespace {
 152
 153 template<typename StringType>
 154 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
 155   StringType ret;
 156   ret.reserve(str.size());
 157   for (size_t i = 0; i < str.size(); i++)
 158     ret.push_back(ToLowerASCII(str[i]));
 159   return ret;
 160 }
 161
 162 template<typename StringType>
 163 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
 164   StringType ret;
 165   ret.reserve(str.size());
 166   for (size_t i = 0; i < str.size(); i++)
 167     ret.push_back(ToUpperASCII(str[i]));
 168   return ret;
 169 }
 170
 171 }  // namespace
 172
 173 std::string ToLowerASCII(StringPiece str) {
 174   return ToLowerASCIIImpl<std::string>(str);
 175 }
 176
 177 string16 ToLowerASCII(StringPiece16 str) {
 178   return ToLowerASCIIImpl<string16>(str);
 179 }
 180
 181 std::string ToUpperASCII(StringPiece str) {
 182   return ToUpperASCIIImpl<std::string>(str);
 183 }
 184
 185 string16 ToUpperASCII(StringPiece16 str) {
 186   return ToUpperASCIIImpl<string16>(str);
 187 }
 188
 189 template<class StringType>
 190 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
 191                                  BasicStringPiece<StringType> b) {
 192   // Find the first characters that aren't equal and compare them.  If the end
 193   // of one of the strings is found before a nonequal character, the lengths
 194   // of the strings are compared.
 195   size_t i = 0;
 196   while (i < a.length() && i < b.length()) {
 197     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
 198     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
 199     if (lower_a < lower_b)
 200       return -1;
 201     if (lower_a > lower_b)
 202       return 1;
 203     i++;
 204   }
 205
 206   // End of one string hit before finding a different character. Expect the
 207   // common case to be "strings equal" at this point so check that first.
 208   if (a.length() == b.length())
 209     return 0;
 210
 211   if (a.length() < b.length())
 212     return -1;
 213   return 1;
 214 }
 215
 216 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
 217   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
 218 }
 219
 220 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
 221   return CompareCaseInsensitiveASCIIT<string16>(a, b);
 222 }
 223
 224 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
 225   if (a.length() != b.length())
 226     return false;
 227   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
 228 }
 229
 230 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
 231   if (a.length() != b.length())
 232     return false;
 233   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
 234 }
 235
 236 const std::string& EmptyString() {
 237   return EmptyStrings::GetInstance()->s;
 238 }
 239
 240 const string16& EmptyString16() {
 241   return EmptyStrings::GetInstance()->s16;
 242 }
 243
 244 template<typename STR>
 245 bool ReplaceCharsT(const STR& input,
 246                    const STR& replace_chars,
 247                    const STR& replace_with,
 248                    STR* output) {
 249   bool removed = false;
 250   size_t replace_length = replace_with.length();
 251
 252   *output = input;
 253
 254   size_t found = output->find_first_of(replace_chars);
 255   while (found != STR::npos) {
 256     removed = true;
 257     output->replace(found, 1, replace_with);
 258     found = output->find_first_of(replace_chars, found + replace_length);
 259   }
 260
 261   return removed;
 262 }
 263
 264 bool ReplaceChars(const string16& input,
 265                   const StringPiece16& replace_chars,
 266                   const string16& replace_with,
 267                   string16* output) {
 268   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 269 }
 270
 271 bool ReplaceChars(const std::string& input,
 272                   const StringPiece& replace_chars,
 273                   const std::string& replace_with,
 274                   std::string* output) {
 275   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 276 }
 277
 278 bool RemoveChars(const string16& input,
 279                  const StringPiece16& remove_chars,
 280                  string16* output) {
 281   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 282 }
 283
 284 bool RemoveChars(const std::string& input,
 285                  const StringPiece& remove_chars,
 286                  std::string* output) {
 287   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 288 }
 289
 290 template<typename Str>
 291 TrimPositions TrimStringT(const Str& input,
 292                           BasicStringPiece<Str> trim_chars,
 293                           TrimPositions positions,
 294                           Str* output) {
 295   // Find the edges of leading/trailing whitespace as desired. Need to use
 296   // a StringPiece version of input to be able to call find* on it with the
 297   // StringPiece version of trim_chars (normally the trim_chars will be a
 298   // constant so avoid making a copy).
 299   BasicStringPiece<Str> input_piece(input);
 300   const size_t last_char = input.length() - 1;
 301   const size_t first_good_char = (positions & TRIM_LEADING) ?
 302       input_piece.find_first_not_of(trim_chars) : 0;
 303   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 304       input_piece.find_last_not_of(trim_chars) : last_char;
 305
 306   // When the string was all trimmed, report that we stripped off characters
 307   // from whichever position the caller was interested in. For empty input, we
 308   // stripped no characters, but we still need to clear |output|.
 309   if (input.empty() ||
 310       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
 311     bool input_was_empty = input.empty();  // in case output == &input
 312     output->clear();
 313     return input_was_empty ? TRIM_NONE : positions;
 314   }
 315
 316   // Trim.
 317   *output =
 318       input.substr(first_good_char, last_good_char - first_good_char + 1);
 319
 320   // Return where we trimmed from.
 321   return static_cast<TrimPositions>(
 322       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 323       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 324 }
 325
 326 bool TrimString(const string16& input,
 327                 StringPiece16 trim_chars,
 328                 string16* output) {
 329   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 330 }
 331
 332 bool TrimString(const std::string& input,
 333                 StringPiece trim_chars,
 334                 std::string* output) {
 335   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 336 }
 337
 338 template<typename Str>
 339 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
 340                                        BasicStringPiece<Str> trim_chars,
 341                                        TrimPositions positions) {
 342   size_t begin = (positions & TRIM_LEADING) ?
 343       input.find_first_not_of(trim_chars) : 0;
 344   size_t end = (positions & TRIM_TRAILING) ?
 345       input.find_last_not_of(trim_chars) + 1 : input.size();
 346   return input.substr(begin, end - begin);
 347 }
 348
 349 StringPiece16 TrimString(StringPiece16 input,
 350                          const StringPiece16& trim_chars,
 351                          TrimPositions positions) {
 352   return TrimStringPieceT(input, trim_chars, positions);
 353 }
 354
 355 StringPiece TrimString(StringPiece input,
 356                        const StringPiece& trim_chars,
 357                        TrimPositions positions) {
 358   return TrimStringPieceT(input, trim_chars, positions);
 359 }
 360
 361 void TruncateUTF8ToByteSize(const std::string& input,
 362                             const size_t byte_size,
 363                             std::string* output) {
 364   DCHECK(output);
 365   if (byte_size > input.length()) {
 366     *output = input;
 367     return;
 368   }
 369   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 370   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 371   int32 truncation_length = static_cast<int32>(byte_size);
 372   int32 char_index = truncation_length - 1;
 373   const char* data = input.data();
 374
 375   // Using CBU8, we will move backwards from the truncation point
 376   // to the beginning of the string looking for a valid UTF8
 377   // character.  Once a full UTF8 character is found, we will
 378   // truncate the string to the end of that character.
 379   while (char_index >= 0) {
 380     int32 prev = char_index;
 381     base_icu::UChar32 code_point = 0;
 382     CBU8_NEXT(data, char_index, truncation_length, code_point);
 383     if (!IsValidCharacter(code_point) ||
 384         !IsValidCodepoint(code_point)) {
 385       char_index = prev - 1;
 386     } else {
 387       break;
 388     }
 389   }
 390
 391   if (char_index >= 0 )
 392     *output = input.substr(0, char_index);
 393   else
 394     output->clear();
 395 }
 396
 397 TrimPositions TrimWhitespace(const string16& input,
 398                              TrimPositions positions,
 399                              string16* output) {
 400   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
 401 }
 402
 403 StringPiece16 TrimWhitespaceASCII(StringPiece16 input,
 404                                   TrimPositions positions) {
 405   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
 406 }
 407
 408 TrimPositions TrimWhitespaceASCII(const std::string& input,
 409                                   TrimPositions positions,
 410                                   std::string* output) {
 411   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
 412 }
 413
 414 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
 415   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
 416 }
 417
 418 // This function is only for backward-compatibility.
 419 // To be removed when all callers are updated.
 420 TrimPositions TrimWhitespace(const std::string& input,
 421                              TrimPositions positions,
 422                              std::string* output) {
 423   return TrimWhitespaceASCII(input, positions, output);
 424 }
 425
 426 template<typename STR>
 427 STR CollapseWhitespaceT(const STR& text,
 428                         bool trim_sequences_with_line_breaks) {
 429   STR result;
 430   result.resize(text.size());
 431
 432   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 433   // will trim any leading whitespace.
 434   bool in_whitespace = true;
 435   bool already_trimmed = true;
 436
 437   int chars_written = 0;
 438   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 439     if (IsUnicodeWhitespace(*i)) {
 440       if (!in_whitespace) {
 441         // Reduce all whitespace sequences to a single space.
 442         in_whitespace = true;
 443         result[chars_written++] = L' ';
 444       }
 445       if (trim_sequences_with_line_breaks && !already_trimmed &&
 446           ((*i == '\n') || (*i == '\r'))) {
 447         // Whitespace sequences containing CR or LF are eliminated entirely.
 448         already_trimmed = true;
 449         --chars_written;
 450       }
 451     } else {
 452       // Non-whitespace chracters are copied straight across.
 453       in_whitespace = false;
 454       already_trimmed = false;
 455       result[chars_written++] = *i;
 456     }
 457   }
 458
 459   if (in_whitespace && !already_trimmed) {
 460     // Any trailing whitespace is eliminated.
 461     --chars_written;
 462   }
 463
 464   result.resize(chars_written);
 465   return result;
 466 }
 467
 468 string16 CollapseWhitespace(const string16& text,
 469                             bool trim_sequences_with_line_breaks) {
 470   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 471 }
 472
 473 std::string CollapseWhitespaceASCII(const std::string& text,
 474                                     bool trim_sequences_with_line_breaks) {
 475   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 476 }
 477
 478 bool ContainsOnlyChars(const StringPiece& input,
 479                        const StringPiece& characters) {
 480   return input.find_first_not_of(characters) == StringPiece::npos;
 481 }
 482
 483 bool ContainsOnlyChars(const StringPiece16& input,
 484                        const StringPiece16& characters) {
 485   return input.find_first_not_of(characters) == StringPiece16::npos;
 486 }
 487
 488 template <class Char>
 489 inline bool DoIsStringASCII(const Char* characters, size_t length) {
 490   MachineWord all_char_bits = 0;
 491   const Char* end = characters + length;
 492
 493   // Prologue: align the input.
 494   while (!IsAlignedToMachineWord(characters) && characters != end) {
 495     all_char_bits |= *characters;
 496     ++characters;
 497   }
 498
 499   // Compare the values of CPU word size.
 500   const Char* word_end = AlignToMachineWord(end);
 501   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 502   while (characters < word_end) {
 503     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 504     characters += loop_increment;
 505   }
 506
 507   // Process the remaining bytes.
 508   while (characters != end) {
 509     all_char_bits |= *characters;
 510     ++characters;
 511   }
 512
 513   MachineWord non_ascii_bit_mask =
 514       NonASCIIMask<sizeof(MachineWord), Char>::value();
 515   return !(all_char_bits & non_ascii_bit_mask);
 516 }
 517
 518 bool IsStringASCII(const StringPiece& str) {
 519   return DoIsStringASCII(str.data(), str.length());
 520 }
 521
 522 bool IsStringASCII(const StringPiece16& str) {
 523   return DoIsStringASCII(str.data(), str.length());
 524 }
 525
 526 bool IsStringASCII(const string16& str) {
 527   return DoIsStringASCII(str.data(), str.length());
 528 }
 529
 530 #if defined(WCHAR_T_IS_UTF32)
 531 bool IsStringASCII(const std::wstring& str) {
 532   return DoIsStringASCII(str.data(), str.length());
 533 }
 534 #endif
 535
 536 bool IsStringUTF8(const StringPiece& str) {
 537   const char *src = str.data();
 538   int32 src_len = static_cast<int32>(str.length());
 539   int32 char_index = 0;
 540
 541   while (char_index < src_len) {
 542     int32 code_point;
 543     CBU8_NEXT(src, char_index, src_len, code_point);
 544     if (!IsValidCharacter(code_point))
 545       return false;
 546   }
 547   return true;
 548 }
 549
 550 // Implementation note: Normally this function will be called with a hardcoded
 551 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
 552 // a C constant requires running strlen, so the result will be two passes
 553 // through the buffers, one to file the length of lowercase_ascii, and one to
 554 // compare each letter.
 555 //
 556 // This function could have taken a const char* to avoid this and only do one
 557 // pass through the string. But the strlen is faster than the case-insensitive
 558 // compares and lets us early-exit in the case that the strings are different
 559 // lengths (will often be the case for non-matches). So whether one approach or
 560 // the other will be faster depends on the case.
 561 //
 562 // The hardcoded strings are typically very short so it doesn't matter, and the
 563 // string piece gives additional flexibility for the caller (doesn't have to be
 564 // null terminated) so we choose the StringPiece route.
 565 template<typename Str>
 566 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
 567                                           StringPiece lowercase_ascii) {
 568   if (str.size() != lowercase_ascii.size())
 569     return false;
 570   for (size_t i = 0; i < str.size(); i++) {
 571     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
 572       return false;
 573   }
 574   return true;
 575 }
 576
 577 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
 578   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
 579 }
 580
 581 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
 582   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
 583 }
 584
 585 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
 586   if (str.length() != ascii.length())
 587     return false;
 588   return std::equal(ascii.begin(), ascii.end(), str.begin());
 589 }
 590
 591 template<typename Str>
 592 bool StartsWithT(BasicStringPiece<Str> str,
 593                  BasicStringPiece<Str> search_for,
 594                  CompareCase case_sensitivity) {
 595   if (search_for.size() > str.size())
 596     return false;
 597
 598   BasicStringPiece<Str> source = str.substr(0, search_for.size());
 599
 600   switch (case_sensitivity) {
 601     case CompareCase::SENSITIVE:
 602       return source == search_for;
 603
 604     case CompareCase::INSENSITIVE_ASCII:
 605       return std::equal(
 606           search_for.begin(), search_for.end(),
 607           source.begin(),
 608           CaseInsensitiveCompareASCII<typename Str::value_type>());
 609
 610     default:
 611       NOTREACHED();
 612       return false;
 613   }
 614 }
 615
 616 bool StartsWith(StringPiece str,
 617                 StringPiece search_for,
 618                 CompareCase case_sensitivity) {
 619   return StartsWithT<std::string>(str, search_for, case_sensitivity);
 620 }
 621
 622 bool StartsWith(StringPiece16 str,
 623                 StringPiece16 search_for,
 624                 CompareCase case_sensitivity) {
 625   return StartsWithT<string16>(str, search_for, case_sensitivity);
 626 }
 627
 628 bool StartsWith(const string16& str,
 629                 const string16& search,
 630                 bool case_sensitive) {
 631   if (!case_sensitive) {
 632     // This function was originally written using the current locale functions
 633     // for case-insensitive comparisons. Emulate this behavior until callers
 634     // can be converted either to use the case-insensitive ASCII one (most
 635     // callers) or ICU functions in base_i18n.
 636     if (search.size() > str.size())
 637       return false;
 638     return std::equal(search.begin(), search.end(), str.begin(),
 639                       CaseInsensitiveCompareDeprecated());
 640   }
 641   return StartsWith(StringPiece16(str), StringPiece16(search),
 642                     CompareCase::SENSITIVE);
 643 }
 644
 645 template <typename Str>
 646 bool EndsWithT(BasicStringPiece<Str> str,
 647                BasicStringPiece<Str> search_for,
 648                CompareCase case_sensitivity) {
 649   if (search_for.size() > str.size())
 650     return false;
 651
 652   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
 653                                             search_for.size());
 654
 655   switch (case_sensitivity) {
 656     case CompareCase::SENSITIVE:
 657       return source == search_for;
 658
 659     case CompareCase::INSENSITIVE_ASCII:
 660       return std::equal(
 661           source.begin(), source.end(),
 662           search_for.begin(),
 663           CaseInsensitiveCompareASCII<typename Str::value_type>());
 664
 665     default:
 666       NOTREACHED();
 667       return false;
 668   }
 669 }
 670
 671 bool EndsWith(StringPiece str,
 672               StringPiece search_for,
 673               CompareCase case_sensitivity) {
 674   return EndsWithT<std::string>(str, search_for, case_sensitivity);
 675 }
 676
 677 bool EndsWith(StringPiece16 str,
 678               StringPiece16 search_for,
 679                           CompareCase case_sensitivity) {
 680   return EndsWithT<string16>(str, search_for, case_sensitivity);
 681 }
 682
 683 bool EndsWith(const string16& str,
 684               const string16& search,
 685               bool case_sensitive) {
 686   if (!case_sensitive) {
 687     // This function was originally written using the current locale functions
 688     // for case-insensitive comparisons. Emulate this behavior until callers
 689     // can be converted either to use the case-insensitive ASCII one (most
 690     // callers) or ICU functions in base_i18n.
 691     if (search.size() > str.size())
 692       return false;
 693     return std::equal(search.begin(), search.end(),
 694                       str.begin() + (str.size() - search.size()),
 695                       CaseInsensitiveCompareDeprecated());
 696   }
 697   return EndsWith(StringPiece16(str), StringPiece16(search),
 698                   CompareCase::SENSITIVE);
 699 }
 700
 701 char HexDigitToInt(wchar_t c) {
 702   DCHECK(IsHexDigit(c));
 703   if (c >= '0' && c <= '9')
 704     return static_cast<char>(c - '0');
 705   if (c >= 'A' && c <= 'F')
 706     return static_cast<char>(c - 'A' + 10);
 707   if (c >= 'a' && c <= 'f')
 708     return static_cast<char>(c - 'a' + 10);
 709   return 0;
 710 }
 711
 712 static const char* const kByteStringsUnlocalized[] = {
 713   " B",
 714   " kB",
 715   " MB",
 716   " GB",
 717   " TB",
 718   " PB"
 719 };
 720
 721 string16 FormatBytesUnlocalized(int64 bytes) {
 722   double unit_amount = static_cast<double>(bytes);
 723   size_t dimension = 0;
 724   const int kKilo = 1024;
 725   while (unit_amount >= kKilo &&
 726          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 727     unit_amount /= kKilo;
 728     dimension++;
 729   }
 730
 731   char buf[64];
 732   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 733     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 734                    kByteStringsUnlocalized[dimension]);
 735   } else {
 736     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 737                    kByteStringsUnlocalized[dimension]);
 738   }
 739
 740   return ASCIIToUTF16(buf);
 741 }
 742
 743 // Runs in O(n) time in the length of |str|.
 744 template<class StringType>
 745 void DoReplaceSubstringsAfterOffset(StringType* str,
 746                                     size_t offset,
 747                                     BasicStringPiece<StringType> find_this,
 748                                     BasicStringPiece<StringType> replace_with,
 749                                     bool replace_all) {
 750   DCHECK(!find_this.empty());
 751
 752   // If the find string doesn't appear, there's nothing to do.
 753   offset = str->find(find_this.data(), offset, find_this.size());
 754   if (offset == StringType::npos)
 755     return;
 756
 757   // If we're only replacing one instance, there's no need to do anything
 758   // complicated.
 759   size_t find_length = find_this.length();
 760   if (!replace_all) {
 761     str->replace(offset, find_length, replace_with.data(), replace_with.size());
 762     return;
 763   }
 764
 765   // If the find and replace strings are the same length, we can simply use
 766   // replace() on each instance, and finish the entire operation in O(n) time.
 767   size_t replace_length = replace_with.length();
 768   if (find_length == replace_length) {
 769     do {
 770       str->replace(offset, find_length,
 771                    replace_with.data(), replace_with.size());
 772       offset = str->find(find_this.data(), offset + replace_length,
 773                          find_this.size());
 774     } while (offset != StringType::npos);
 775     return;
 776   }
 777
 778   // Since the find and replace strings aren't the same length, a loop like the
 779   // one above would be O(n^2) in the worst case, as replace() will shift the
 780   // entire remaining string each time.  We need to be more clever to keep
 781   // things O(n).
 782   //
 783   // If we're shortening the string, we can alternate replacements with shifting
 784   // forward the intervening characters using memmove().
 785   size_t str_length = str->length();
 786   if (find_length > replace_length) {
 787     size_t write_offset = offset;
 788     do {
 789       if (replace_length) {
 790         str->replace(write_offset, replace_length,
 791                      replace_with.data(), replace_with.size());
 792         write_offset += replace_length;
 793       }
 794       size_t read_offset = offset + find_length;
 795       offset = std::min(
 796           str->find(find_this.data(), read_offset, find_this.size()),
 797           str_length);
 798       size_t length = offset - read_offset;
 799       if (length) {
 800         memmove(&(*str)[write_offset], &(*str)[read_offset],
 801                 length * sizeof(typename StringType::value_type));
 802         write_offset += length;
 803       }
 804     } while (offset < str_length);
 805     str->resize(write_offset);
 806     return;
 807   }
 808
 809   // We're lengthening the string.  We can use alternating replacements and
 810   // memmove() calls like above, but we need to precalculate the final string
 811   // length and then expand from back-to-front to avoid overwriting the string
 812   // as we're reading it, needing to shift, or having to copy to a second string
 813   // temporarily.
 814   size_t first_match = offset;
 815
 816   // First, calculate the final length and resize the string.
 817   size_t final_length = str_length;
 818   size_t expansion = replace_length - find_length;
 819   size_t current_match;
 820   do {
 821     final_length += expansion;
 822     // Minor optimization: save this offset into |current_match|, so that on
 823     // exit from the loop, |current_match| will point at the last instance of
 824     // the find string, and we won't need to find() it again immediately.
 825     current_match = offset;
 826     offset = str->find(find_this.data(), offset + find_length,
 827                        find_this.size());
 828   } while (offset != StringType::npos);
 829   str->resize(final_length);
 830
 831   // Now do the replacement loop, working backwards through the string.
 832   for (size_t prev_match = str_length, write_offset = final_length; ;
 833        current_match = str->rfind(find_this.data(), current_match - 1,
 834                                   find_this.size())) {
 835     size_t read_offset = current_match + find_length;
 836     size_t length = prev_match - read_offset;
 837     if (length) {
 838       write_offset -= length;
 839       memmove(&(*str)[write_offset], &(*str)[read_offset],
 840               length * sizeof(typename StringType::value_type));
 841     }
 842     write_offset -= replace_length;
 843     str->replace(write_offset, replace_length,
 844                  replace_with.data(), replace_with.size());
 845     if (current_match == first_match)
 846       return;
 847     prev_match = current_match;
 848   }
 849 }
 850
 851 void ReplaceFirstSubstringAfterOffset(string16* str,
 852                                       size_t start_offset,
 853                                       StringPiece16 find_this,
 854                                       StringPiece16 replace_with) {
 855   DoReplaceSubstringsAfterOffset<string16>(
 856       str, start_offset, find_this, replace_with, false);  // Replace first.
 857 }
 858
 859 void ReplaceFirstSubstringAfterOffset(std::string* str,
 860                                       size_t start_offset,
 861                                       StringPiece find_this,
 862                                       StringPiece replace_with) {
 863   DoReplaceSubstringsAfterOffset<std::string>(
 864       str, start_offset, find_this, replace_with, false);  // Replace first.
 865 }
 866
 867 void ReplaceSubstringsAfterOffset(string16* str,
 868                                   size_t start_offset,
 869                                   StringPiece16 find_this,
 870                                   StringPiece16 replace_with) {
 871   DoReplaceSubstringsAfterOffset<string16>(
 872       str, start_offset, find_this, replace_with, true);  // Replace all.
 873 }
 874
 875 void ReplaceSubstringsAfterOffset(std::string* str,
 876                                   size_t start_offset,
 877                                   StringPiece find_this,
 878                                   StringPiece replace_with) {
 879   DoReplaceSubstringsAfterOffset<std::string>(
 880       str, start_offset, find_this, replace_with, true);  // Replace all.
 881 }
 882
 883 template <class string_type>
 884 inline typename string_type::value_type* WriteIntoT(string_type* str,
 885                                                     size_t length_with_null) {
 886   DCHECK_GT(length_with_null, 1u);
 887   str->reserve(length_with_null);
 888   str->resize(length_with_null - 1);
 889   return &((*str)[0]);
 890 }
 891
 892 char* WriteInto(std::string* str, size_t length_with_null) {
 893   return WriteIntoT(str, length_with_null);
 894 }
 895
 896 char16* WriteInto(string16* str, size_t length_with_null) {
 897   return WriteIntoT(str, length_with_null);
 898 }
 899
 900 template<typename STR>
 901 static STR JoinStringT(const std::vector<STR>& parts,
 902                        BasicStringPiece<STR> sep) {
 903   if (parts.empty())
 904     return STR();
 905
 906   STR result(parts[0]);
 907   auto iter = parts.begin();
 908   ++iter;
 909
 910   for (; iter != parts.end(); ++iter) {
 911     sep.AppendToString(&result);
 912     result += *iter;
 913   }
 914
 915   return result;
 916 }
 917
 918 std::string JoinString(const std::vector<std::string>& parts,
 919                        StringPiece separator) {
 920   return JoinStringT(parts, separator);
 921 }
 922
 923 string16 JoinString(const std::vector<string16>& parts,
 924                     StringPiece16 separator) {
 925   return JoinStringT(parts, separator);
 926 }
 927
 928 template<class FormatStringType, class OutStringType>
 929 OutStringType DoReplaceStringPlaceholders(
 930     const FormatStringType& format_string,
 931     const std::vector<OutStringType>& subst,
 932     std::vector<size_t>* offsets) {
 933   size_t substitutions = subst.size();
 934
 935   size_t sub_length = 0;
 936   for (const auto& cur : subst)
 937     sub_length += cur.length();
 938
 939   OutStringType formatted;
 940   formatted.reserve(format_string.length() + sub_length);
 941
 942   std::vector<ReplacementOffset> r_offsets;
 943   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
 944     if ('$' == *i) {
 945       if (i + 1 != format_string.end()) {
 946         ++i;
 947         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 948         if ('$' == *i) {
 949           while (i != format_string.end() && '$' == *i) {
 950             formatted.push_back('$');
 951             ++i;
 952           }
 953           --i;
 954         } else {
 955           uintptr_t index = 0;
 956           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 957             index *= 10;
 958             index += *i - '0';
 959             ++i;
 960           }
 961           --i;
 962           index -= 1;
 963           if (offsets) {
 964             ReplacementOffset r_offset(index,
 965                 static_cast<int>(formatted.size()));
 966             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 967                                               r_offsets.end(),
 968                                               r_offset,
 969                                               &CompareParameter),
 970                              r_offset);
 971           }
 972           if (index < substitutions)
 973             formatted.append(subst.at(index));
 974         }
 975       }
 976     } else {
 977       formatted.push_back(*i);
 978     }
 979   }
 980   if (offsets) {
 981     for (const auto& cur : r_offsets)
 982       offsets->push_back(cur.offset);
 983   }
 984   return formatted;
 985 }
 986
 987 string16 ReplaceStringPlaceholders(const string16& format_string,
 988                                    const std::vector<string16>& subst,
 989                                    std::vector<size_t>* offsets) {
 990   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 991 }
 992
 993 std::string ReplaceStringPlaceholders(const StringPiece& format_string,
 994                                       const std::vector<std::string>& subst,
 995                                       std::vector<size_t>* offsets) {
 996   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 997 }
 998
 999 string16 ReplaceStringPlaceholders(const string16& format_string,
1000                                    const string16& a,
1001                                    size_t* offset) {
1002   std::vector<size_t> offsets;
1003   std::vector<string16> subst;
1004   subst.push_back(a);
1005   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
1006
1007   DCHECK_EQ(1U, offsets.size());
1008   if (offset)
1009     *offset = offsets[0];
1010   return result;
1011 }
1012
1013 // The following code is compatible with the OpenBSD lcpy interface.  See:
1014 //   http://www.gratisoft.us/todd/papers/strlcpy.html
1015 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1016
1017 namespace {
1018
1019 template <typename CHAR>
1020 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1021   for (size_t i = 0; i < dst_size; ++i) {
1022     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
1023       return i;
1024   }
1025
1026   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1027   if (dst_size != 0)
1028     dst[dst_size - 1] = 0;
1029
1030   // Count the rest of the |src|, and return it's length in characters.
1031   while (src[dst_size]) ++dst_size;
1032   return dst_size;
1033 }
1034
1035 }  // namespace
1036
1037 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
1038   return lcpyT<char>(dst, src, dst_size);
1039 }
1040 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1041   return lcpyT<wchar_t>(dst, src, dst_size);
1042 }
1043
1044 }  // namespace base