base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "build/build_config.h"
  28
  29 // Remove when this entire file is in the base namespace.
  30 using base::char16;
  31 using base::string16;
  32
  33 namespace {
  34
  35 // Force the singleton used by EmptyString[16] to be a unique type. This
  36 // prevents other code that might accidentally use Singleton<string> from
  37 // getting our internal one.
  38 struct EmptyStrings {
  39   EmptyStrings() {}
  40   const std::string s;
  41   const string16 s16;
  42
  43   static EmptyStrings* GetInstance() {
  44     return Singleton<EmptyStrings>::get();
  45   }
  46 };
  47
  48 // Used by ReplaceStringPlaceholders to track the position in the string of
  49 // replaced parameters.
  50 struct ReplacementOffset {
  51   ReplacementOffset(uintptr_t parameter, size_t offset)
  52       : parameter(parameter),
  53         offset(offset) {}
  54
  55   // Index of the parameter.
  56   uintptr_t parameter;
  57
  58   // Starting position in the string.
  59   size_t offset;
  60 };
  61
  62 static bool CompareParameter(const ReplacementOffset& elem1,
  63                              const ReplacementOffset& elem2) {
  64   return elem1.parameter < elem2.parameter;
  65 }
  66
  67 // Assuming that a pointer is the size of a "machine word", then
  68 // uintptr_t is an integer type that is also a machine word.
  69 typedef uintptr_t MachineWord;
  70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
  71
  72 inline bool IsAlignedToMachineWord(const void* pointer) {
  73   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
  74 }
  75
  76 template<typename T> inline T* AlignToMachineWord(T* pointer) {
  77   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
  78                               ~kMachineWordAlignmentMask);
  79 }
  80
  81 template<size_t size, typename CharacterType> struct NonASCIIMask;
  82 template<> struct NonASCIIMask<4, base::char16> {
  83     static inline uint32_t value() { return 0xFF80FF80U; }
  84 };
  85 template<> struct NonASCIIMask<4, char> {
  86     static inline uint32_t value() { return 0x80808080U; }
  87 };
  88 template<> struct NonASCIIMask<8, base::char16> {
  89     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
  90 };
  91 template<> struct NonASCIIMask<8, char> {
  92     static inline uint64_t value() { return 0x8080808080808080ULL; }
  93 };
  94
  95 }  // namespace
  96
  97 namespace base {
  98
  99 bool IsWprintfFormatPortable(const wchar_t* format) {
 100   for (const wchar_t* position = format; *position != '\0'; ++position) {
 101     if (*position == '%') {
 102       bool in_specification = true;
 103       bool modifier_l = false;
 104       while (in_specification) {
 105         // Eat up characters until reaching a known specifier.
 106         if (*++position == '\0') {
 107           // The format string ended in the middle of a specification.  Call
 108           // it portable because no unportable specifications were found.  The
 109           // string is equally broken on all platforms.
 110           return true;
 111         }
 112
 113         if (*position == 'l') {
 114           // 'l' is the only thing that can save the 's' and 'c' specifiers.
 115           modifier_l = true;
 116         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
 117                    *position == 'S' || *position == 'C' || *position == 'F' ||
 118                    *position == 'D' || *position == 'O' || *position == 'U') {
 119           // Not portable.
 120           return false;
 121         }
 122
 123         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
 124           // Portable, keep scanning the rest of the format string.
 125           in_specification = false;
 126         }
 127       }
 128     }
 129   }
 130
 131   return true;
 132 }
 133
 134 const std::string& EmptyString() {
 135   return EmptyStrings::GetInstance()->s;
 136 }
 137
 138 const string16& EmptyString16() {
 139   return EmptyStrings::GetInstance()->s16;
 140 }
 141
 142 template<typename STR>
 143 bool ReplaceCharsT(const STR& input,
 144                    const STR& replace_chars,
 145                    const STR& replace_with,
 146                    STR* output) {
 147   bool removed = false;
 148   size_t replace_length = replace_with.length();
 149
 150   *output = input;
 151
 152   size_t found = output->find_first_of(replace_chars);
 153   while (found != STR::npos) {
 154     removed = true;
 155     output->replace(found, 1, replace_with);
 156     found = output->find_first_of(replace_chars, found + replace_length);
 157   }
 158
 159   return removed;
 160 }
 161
 162 bool ReplaceChars(const string16& input,
 163                   const base::StringPiece16& replace_chars,
 164                   const string16& replace_with,
 165                   string16* output) {
 166   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 167 }
 168
 169 bool ReplaceChars(const std::string& input,
 170                   const base::StringPiece& replace_chars,
 171                   const std::string& replace_with,
 172                   std::string* output) {
 173   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 174 }
 175
 176 bool RemoveChars(const string16& input,
 177                  const base::StringPiece16& remove_chars,
 178                  string16* output) {
 179   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 180 }
 181
 182 bool RemoveChars(const std::string& input,
 183                  const base::StringPiece& remove_chars,
 184                  std::string* output) {
 185   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 186 }
 187
 188 template<typename STR>
 189 TrimPositions TrimStringT(const STR& input,
 190                           const STR& trim_chars,
 191                           TrimPositions positions,
 192                           STR* output) {
 193   // Find the edges of leading/trailing whitespace as desired.
 194   const size_t last_char = input.length() - 1;
 195   const size_t first_good_char = (positions & TRIM_LEADING) ?
 196       input.find_first_not_of(trim_chars) : 0;
 197   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 198       input.find_last_not_of(trim_chars) : last_char;
 199
 200   // When the string was all whitespace, report that we stripped off whitespace
 201   // from whichever position the caller was interested in.  For empty input, we
 202   // stripped no whitespace, but we still need to clear |output|.
 203   if (input.empty() ||
 204       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 205     bool input_was_empty = input.empty();  // in case output == &input
 206     output->clear();
 207     return input_was_empty ? TRIM_NONE : positions;
 208   }
 209
 210   // Trim the whitespace.
 211   *output =
 212       input.substr(first_good_char, last_good_char - first_good_char + 1);
 213
 214   // Return where we trimmed from.
 215   return static_cast<TrimPositions>(
 216       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 217       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 218 }
 219
 220 bool TrimString(const string16& input,
 221                 const base::StringPiece16& trim_chars,
 222                 string16* output) {
 223   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 224       TRIM_NONE;
 225 }
 226
 227 bool TrimString(const std::string& input,
 228                 const base::StringPiece& trim_chars,
 229                 std::string* output) {
 230   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 231       TRIM_NONE;
 232 }
 233
 234 void TruncateUTF8ToByteSize(const std::string& input,
 235                             const size_t byte_size,
 236                             std::string* output) {
 237   DCHECK(output);
 238   if (byte_size > input.length()) {
 239     *output = input;
 240     return;
 241   }
 242   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 243   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 244   int32 truncation_length = static_cast<int32>(byte_size);
 245   int32 char_index = truncation_length - 1;
 246   const char* data = input.data();
 247
 248   // Using CBU8, we will move backwards from the truncation point
 249   // to the beginning of the string looking for a valid UTF8
 250   // character.  Once a full UTF8 character is found, we will
 251   // truncate the string to the end of that character.
 252   while (char_index >= 0) {
 253     int32 prev = char_index;
 254     base_icu::UChar32 code_point = 0;
 255     CBU8_NEXT(data, char_index, truncation_length, code_point);
 256     if (!IsValidCharacter(code_point) ||
 257         !IsValidCodepoint(code_point)) {
 258       char_index = prev - 1;
 259     } else {
 260       break;
 261     }
 262   }
 263
 264   if (char_index >= 0 )
 265     *output = input.substr(0, char_index);
 266   else
 267     output->clear();
 268 }
 269
 270 TrimPositions TrimWhitespace(const string16& input,
 271                              TrimPositions positions,
 272                              string16* output) {
 273   return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
 274                      output);
 275 }
 276
 277 TrimPositions TrimWhitespaceASCII(const std::string& input,
 278                                   TrimPositions positions,
 279                                   std::string* output) {
 280   return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
 281 }
 282
 283 // This function is only for backward-compatibility.
 284 // To be removed when all callers are updated.
 285 TrimPositions TrimWhitespace(const std::string& input,
 286                              TrimPositions positions,
 287                              std::string* output) {
 288   return TrimWhitespaceASCII(input, positions, output);
 289 }
 290
 291 template<typename STR>
 292 STR CollapseWhitespaceT(const STR& text,
 293                         bool trim_sequences_with_line_breaks) {
 294   STR result;
 295   result.resize(text.size());
 296
 297   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 298   // will trim any leading whitespace.
 299   bool in_whitespace = true;
 300   bool already_trimmed = true;
 301
 302   int chars_written = 0;
 303   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 304     if (IsWhitespace(*i)) {
 305       if (!in_whitespace) {
 306         // Reduce all whitespace sequences to a single space.
 307         in_whitespace = true;
 308         result[chars_written++] = L' ';
 309       }
 310       if (trim_sequences_with_line_breaks && !already_trimmed &&
 311           ((*i == '\n') || (*i == '\r'))) {
 312         // Whitespace sequences containing CR or LF are eliminated entirely.
 313         already_trimmed = true;
 314         --chars_written;
 315       }
 316     } else {
 317       // Non-whitespace chracters are copied straight across.
 318       in_whitespace = false;
 319       already_trimmed = false;
 320       result[chars_written++] = *i;
 321     }
 322   }
 323
 324   if (in_whitespace && !already_trimmed) {
 325     // Any trailing whitespace is eliminated.
 326     --chars_written;
 327   }
 328
 329   result.resize(chars_written);
 330   return result;
 331 }
 332
 333 string16 CollapseWhitespace(const string16& text,
 334                             bool trim_sequences_with_line_breaks) {
 335   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 336 }
 337
 338 std::string CollapseWhitespaceASCII(const std::string& text,
 339                                     bool trim_sequences_with_line_breaks) {
 340   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 341 }
 342
 343 bool ContainsOnlyChars(const StringPiece& input,
 344                        const StringPiece& characters) {
 345   return input.find_first_not_of(characters) == StringPiece::npos;
 346 }
 347
 348 bool ContainsOnlyChars(const StringPiece16& input,
 349                        const StringPiece16& characters) {
 350   return input.find_first_not_of(characters) == StringPiece16::npos;
 351 }
 352
 353 template <class Char>
 354 inline bool DoIsStringASCII(const Char* characters, size_t length) {
 355   MachineWord all_char_bits = 0;
 356   const Char* end = characters + length;
 357
 358   // Prologue: align the input.
 359   while (!IsAlignedToMachineWord(characters) && characters != end) {
 360     all_char_bits |= *characters;
 361     ++characters;
 362   }
 363
 364   // Compare the values of CPU word size.
 365   const Char* word_end = AlignToMachineWord(end);
 366   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 367   while (characters < word_end) {
 368     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 369     characters += loop_increment;
 370   }
 371
 372   // Process the remaining bytes.
 373   while (characters != end) {
 374     all_char_bits |= *characters;
 375     ++characters;
 376   }
 377
 378   MachineWord non_ascii_bit_mask =
 379       NonASCIIMask<sizeof(MachineWord), Char>::value();
 380   return !(all_char_bits & non_ascii_bit_mask);
 381 }
 382
 383 bool IsStringASCII(const StringPiece& str) {
 384   return DoIsStringASCII(str.data(), str.length());
 385 }
 386
 387 bool IsStringASCII(const StringPiece16& str) {
 388   return DoIsStringASCII(str.data(), str.length());
 389 }
 390
 391 bool IsStringASCII(const string16& str) {
 392   return DoIsStringASCII(str.data(), str.length());
 393 }
 394
 395 bool IsStringUTF8(const std::string& str) {
 396   const char *src = str.data();
 397   int32 src_len = static_cast<int32>(str.length());
 398   int32 char_index = 0;
 399
 400   while (char_index < src_len) {
 401     int32 code_point;
 402     CBU8_NEXT(src, char_index, src_len, code_point);
 403     if (!IsValidCharacter(code_point))
 404       return false;
 405   }
 406   return true;
 407 }
 408
 409 }  // namespace base
 410
 411 template<typename Iter>
 412 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 413                                           Iter a_end,
 414                                           const char* b) {
 415   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 416     if (!*b || base::ToLowerASCII(*it) != *b)
 417       return false;
 418   }
 419   return *b == 0;
 420 }
 421
 422 // Front-ends for LowerCaseEqualsASCII.
 423 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 424   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 425 }
 426
 427 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 428   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 429 }
 430
 431 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 432                           std::string::const_iterator a_end,
 433                           const char* b) {
 434   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 435 }
 436
 437 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 438                           string16::const_iterator a_end,
 439                           const char* b) {
 440   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 441 }
 442
 443 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 444 #if !defined(OS_ANDROID)
 445 bool LowerCaseEqualsASCII(const char* a_begin,
 446                           const char* a_end,
 447                           const char* b) {
 448   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 449 }
 450
 451 bool LowerCaseEqualsASCII(const char16* a_begin,
 452                           const char16* a_end,
 453                           const char* b) {
 454   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 455 }
 456
 457 #endif  // !defined(OS_ANDROID)
 458
 459 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 460   if (a.length() != b.length())
 461     return false;
 462   return std::equal(b.begin(), b.end(), a.begin());
 463 }
 464
 465 bool StartsWithASCII(const std::string& str,
 466                      const std::string& search,
 467                      bool case_sensitive) {
 468   if (case_sensitive)
 469     return str.compare(0, search.length(), search) == 0;
 470   else
 471     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 472 }
 473
 474 template <typename STR>
 475 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 476   if (case_sensitive) {
 477     return str.compare(0, search.length(), search) == 0;
 478   } else {
 479     if (search.size() > str.size())
 480       return false;
 481     return std::equal(search.begin(), search.end(), str.begin(),
 482                       base::CaseInsensitiveCompare<typename STR::value_type>());
 483   }
 484 }
 485
 486 bool StartsWith(const string16& str, const string16& search,
 487                 bool case_sensitive) {
 488   return StartsWithT(str, search, case_sensitive);
 489 }
 490
 491 template <typename STR>
 492 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 493   size_t str_length = str.length();
 494   size_t search_length = search.length();
 495   if (search_length > str_length)
 496     return false;
 497   if (case_sensitive)
 498     return str.compare(str_length - search_length, search_length, search) == 0;
 499   return std::equal(search.begin(), search.end(),
 500                     str.begin() + (str_length - search_length),
 501                     base::CaseInsensitiveCompare<typename STR::value_type>());
 502 }
 503
 504 bool EndsWith(const std::string& str, const std::string& search,
 505               bool case_sensitive) {
 506   return EndsWithT(str, search, case_sensitive);
 507 }
 508
 509 bool EndsWith(const string16& str, const string16& search,
 510               bool case_sensitive) {
 511   return EndsWithT(str, search, case_sensitive);
 512 }
 513
 514 static const char* const kByteStringsUnlocalized[] = {
 515   " B",
 516   " kB",
 517   " MB",
 518   " GB",
 519   " TB",
 520   " PB"
 521 };
 522
 523 string16 FormatBytesUnlocalized(int64 bytes) {
 524   double unit_amount = static_cast<double>(bytes);
 525   size_t dimension = 0;
 526   const int kKilo = 1024;
 527   while (unit_amount >= kKilo &&
 528          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 529     unit_amount /= kKilo;
 530     dimension++;
 531   }
 532
 533   char buf[64];
 534   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 535     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 536                    kByteStringsUnlocalized[dimension]);
 537   } else {
 538     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 539                    kByteStringsUnlocalized[dimension]);
 540   }
 541
 542   return base::ASCIIToUTF16(buf);
 543 }
 544
 545 template<class StringType>
 546 void DoReplaceSubstringsAfterOffset(StringType* str,
 547                                     size_t start_offset,
 548                                     const StringType& find_this,
 549                                     const StringType& replace_with,
 550                                     bool replace_all) {
 551   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 552     return;
 553
 554   DCHECK(!find_this.empty());
 555   for (size_t offs(str->find(find_this, start_offset));
 556       offs != StringType::npos; offs = str->find(find_this, offs)) {
 557     str->replace(offs, find_this.length(), replace_with);
 558     offs += replace_with.length();
 559
 560     if (!replace_all)
 561       break;
 562   }
 563 }
 564
 565 void ReplaceFirstSubstringAfterOffset(string16* str,
 566                                       size_t start_offset,
 567                                       const string16& find_this,
 568                                       const string16& replace_with) {
 569   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 570                                  false);  // replace first instance
 571 }
 572
 573 void ReplaceFirstSubstringAfterOffset(std::string* str,
 574                                       size_t start_offset,
 575                                       const std::string& find_this,
 576                                       const std::string& replace_with) {
 577   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 578                                  false);  // replace first instance
 579 }
 580
 581 void ReplaceSubstringsAfterOffset(string16* str,
 582                                   size_t start_offset,
 583                                   const string16& find_this,
 584                                   const string16& replace_with) {
 585   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 586                                  true);  // replace all instances
 587 }
 588
 589 void ReplaceSubstringsAfterOffset(std::string* str,
 590                                   size_t start_offset,
 591                                   const std::string& find_this,
 592                                   const std::string& replace_with) {
 593   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 594                                  true);  // replace all instances
 595 }
 596
 597
 598 template<typename STR>
 599 static size_t TokenizeT(const STR& str,
 600                         const STR& delimiters,
 601                         std::vector<STR>* tokens) {
 602   tokens->clear();
 603
 604   size_t start = str.find_first_not_of(delimiters);
 605   while (start != STR::npos) {
 606     size_t end = str.find_first_of(delimiters, start + 1);
 607     if (end == STR::npos) {
 608       tokens->push_back(str.substr(start));
 609       break;
 610     } else {
 611       tokens->push_back(str.substr(start, end - start));
 612       start = str.find_first_not_of(delimiters, end + 1);
 613     }
 614   }
 615
 616   return tokens->size();
 617 }
 618
 619 size_t Tokenize(const string16& str,
 620                 const string16& delimiters,
 621                 std::vector<string16>* tokens) {
 622   return TokenizeT(str, delimiters, tokens);
 623 }
 624
 625 size_t Tokenize(const std::string& str,
 626                 const std::string& delimiters,
 627                 std::vector<std::string>* tokens) {
 628   return TokenizeT(str, delimiters, tokens);
 629 }
 630
 631 size_t Tokenize(const base::StringPiece& str,
 632                 const base::StringPiece& delimiters,
 633                 std::vector<base::StringPiece>* tokens) {
 634   return TokenizeT(str, delimiters, tokens);
 635 }
 636
 637 template<typename STR>
 638 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 639   if (parts.empty())
 640     return STR();
 641
 642   STR result(parts[0]);
 643   typename std::vector<STR>::const_iterator iter = parts.begin();
 644   ++iter;
 645
 646   for (; iter != parts.end(); ++iter) {
 647     result += sep;
 648     result += *iter;
 649   }
 650
 651   return result;
 652 }
 653
 654 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 655   return JoinStringT(parts, std::string(1, sep));
 656 }
 657
 658 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 659   return JoinStringT(parts, string16(1, sep));
 660 }
 661
 662 std::string JoinString(const std::vector<std::string>& parts,
 663                        const std::string& separator) {
 664   return JoinStringT(parts, separator);
 665 }
 666
 667 string16 JoinString(const std::vector<string16>& parts,
 668                     const string16& separator) {
 669   return JoinStringT(parts, separator);
 670 }
 671
 672 template<class FormatStringType, class OutStringType>
 673 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 674     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 675   size_t substitutions = subst.size();
 676
 677   size_t sub_length = 0;
 678   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 679        iter != subst.end(); ++iter) {
 680     sub_length += iter->length();
 681   }
 682
 683   OutStringType formatted;
 684   formatted.reserve(format_string.length() + sub_length);
 685
 686   std::vector<ReplacementOffset> r_offsets;
 687   for (typename FormatStringType::const_iterator i = format_string.begin();
 688        i != format_string.end(); ++i) {
 689     if ('$' == *i) {
 690       if (i + 1 != format_string.end()) {
 691         ++i;
 692         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 693         if ('$' == *i) {
 694           while (i != format_string.end() && '$' == *i) {
 695             formatted.push_back('$');
 696             ++i;
 697           }
 698           --i;
 699         } else {
 700           uintptr_t index = 0;
 701           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 702             index *= 10;
 703             index += *i - '0';
 704             ++i;
 705           }
 706           --i;
 707           index -= 1;
 708           if (offsets) {
 709             ReplacementOffset r_offset(index,
 710                 static_cast<int>(formatted.size()));
 711             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 712                                               r_offsets.end(),
 713                                               r_offset,
 714                                               &CompareParameter),
 715                              r_offset);
 716           }
 717           if (index < substitutions)
 718             formatted.append(subst.at(index));
 719         }
 720       }
 721     } else {
 722       formatted.push_back(*i);
 723     }
 724   }
 725   if (offsets) {
 726     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 727          i != r_offsets.end(); ++i) {
 728       offsets->push_back(i->offset);
 729     }
 730   }
 731   return formatted;
 732 }
 733
 734 string16 ReplaceStringPlaceholders(const string16& format_string,
 735                                    const std::vector<string16>& subst,
 736                                    std::vector<size_t>* offsets) {
 737   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 738 }
 739
 740 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 741                                       const std::vector<std::string>& subst,
 742                                       std::vector<size_t>* offsets) {
 743   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 744 }
 745
 746 string16 ReplaceStringPlaceholders(const string16& format_string,
 747                                    const string16& a,
 748                                    size_t* offset) {
 749   std::vector<size_t> offsets;
 750   std::vector<string16> subst;
 751   subst.push_back(a);
 752   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 753
 754   DCHECK_EQ(1U, offsets.size());
 755   if (offset)
 756     *offset = offsets[0];
 757   return result;
 758 }
 759
 760 static bool IsWildcard(base_icu::UChar32 character) {
 761   return character == '*' || character == '?';
 762 }
 763
 764 // Move the strings pointers to the point where they start to differ.
 765 template <typename CHAR, typename NEXT>
 766 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 767                          const CHAR** string, const CHAR* string_end,
 768                          NEXT next) {
 769   const CHAR* escape = NULL;
 770   while (*pattern != pattern_end && *string != string_end) {
 771     if (!escape && IsWildcard(**pattern)) {
 772       // We don't want to match wildcard here, except if it's escaped.
 773       return;
 774     }
 775
 776     // Check if the escapement char is found. If so, skip it and move to the
 777     // next character.
 778     if (!escape && **pattern == '\\') {
 779       escape = *pattern;
 780       next(pattern, pattern_end);
 781       continue;
 782     }
 783
 784     // Check if the chars match, if so, increment the ptrs.
 785     const CHAR* pattern_next = *pattern;
 786     const CHAR* string_next = *string;
 787     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 788     if (pattern_char == next(&string_next, string_end) &&
 789         pattern_char != CBU_SENTINEL) {
 790       *pattern = pattern_next;
 791       *string = string_next;
 792     } else {
 793       // Uh oh, it did not match, we are done. If the last char was an
 794       // escapement, that means that it was an error to advance the ptr here,
 795       // let's put it back where it was. This also mean that the MatchPattern
 796       // function will return false because if we can't match an escape char
 797       // here, then no one will.
 798       if (escape) {
 799         *pattern = escape;
 800       }
 801       return;
 802     }
 803
 804     escape = NULL;
 805   }
 806 }
 807
 808 template <typename CHAR, typename NEXT>
 809 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 810   while (*pattern != end) {
 811     if (!IsWildcard(**pattern))
 812       return;
 813     next(pattern, end);
 814   }
 815 }
 816
 817 template <typename CHAR, typename NEXT>
 818 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 819                           const CHAR* pattern, const CHAR* pattern_end,
 820                           int depth,
 821                           NEXT next) {
 822   const int kMaxDepth = 16;
 823   if (depth > kMaxDepth)
 824     return false;
 825
 826   // Eat all the matching chars.
 827   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 828
 829   // If the string is empty, then the pattern must be empty too, or contains
 830   // only wildcards.
 831   if (eval == eval_end) {
 832     EatWildcard(&pattern, pattern_end, next);
 833     return pattern == pattern_end;
 834   }
 835
 836   // Pattern is empty but not string, this is not a match.
 837   if (pattern == pattern_end)
 838     return false;
 839
 840   // If this is a question mark, then we need to compare the rest with
 841   // the current string or the string with one character eaten.
 842   const CHAR* next_pattern = pattern;
 843   next(&next_pattern, pattern_end);
 844   if (pattern[0] == '?') {
 845     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 846                       depth + 1, next))
 847       return true;
 848     const CHAR* next_eval = eval;
 849     next(&next_eval, eval_end);
 850     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 851                       depth + 1, next))
 852       return true;
 853   }
 854
 855   // This is a *, try to match all the possible substrings with the remainder
 856   // of the pattern.
 857   if (pattern[0] == '*') {
 858     // Collapse duplicate wild cards (********** into *) so that the
 859     // method does not recurse unnecessarily. http://crbug.com/52839
 860     EatWildcard(&next_pattern, pattern_end, next);
 861
 862     while (eval != eval_end) {
 863       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 864                         depth + 1, next))
 865         return true;
 866       eval++;
 867     }
 868
 869     // We reached the end of the string, let see if the pattern contains only
 870     // wildcards.
 871     if (eval == eval_end) {
 872       EatWildcard(&pattern, pattern_end, next);
 873       if (pattern != pattern_end)
 874         return false;
 875       return true;
 876     }
 877   }
 878
 879   return false;
 880 }
 881
 882 struct NextCharUTF8 {
 883   base_icu::UChar32 operator()(const char** p, const char* end) {
 884     base_icu::UChar32 c;
 885     int offset = 0;
 886     CBU8_NEXT(*p, offset, end - *p, c);
 887     *p += offset;
 888     return c;
 889   }
 890 };
 891
 892 struct NextCharUTF16 {
 893   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 894     base_icu::UChar32 c;
 895     int offset = 0;
 896     CBU16_NEXT(*p, offset, end - *p, c);
 897     *p += offset;
 898     return c;
 899   }
 900 };
 901
 902 bool MatchPattern(const base::StringPiece& eval,
 903                   const base::StringPiece& pattern) {
 904   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 905                        pattern.data(), pattern.data() + pattern.size(),
 906                        0, NextCharUTF8());
 907 }
 908
 909 bool MatchPattern(const string16& eval, const string16& pattern) {
 910   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 911                        pattern.c_str(), pattern.c_str() + pattern.size(),
 912                        0, NextCharUTF16());
 913 }
 914
 915 // The following code is compatible with the OpenBSD lcpy interface.  See:
 916 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 917 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 918
 919 namespace {
 920
 921 template <typename CHAR>
 922 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 923   for (size_t i = 0; i < dst_size; ++i) {
 924     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 925       return i;
 926   }
 927
 928   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 929   if (dst_size != 0)
 930     dst[dst_size - 1] = 0;
 931
 932   // Count the rest of the |src|, and return it's length in characters.
 933   while (src[dst_size]) ++dst_size;
 934   return dst_size;
 935 }
 936
 937 }  // namespace
 938
 939 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 940   return lcpyT<char>(dst, src, dst_size);
 941 }
 942 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 943   return lcpyT<wchar_t>(dst, src, dst_size);
 944 }