base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "build/build_config.h"
  28
  29 // Remove when this entire file is in the base namespace.
  30 using base::char16;
  31 using base::string16;
  32
  33 namespace {
  34
  35 // Force the singleton used by EmptyString[16] to be a unique type. This
  36 // prevents other code that might accidentally use Singleton<string> from
  37 // getting our internal one.
  38 struct EmptyStrings {
  39   EmptyStrings() {}
  40   const std::string s;
  41   const string16 s16;
  42
  43   static EmptyStrings* GetInstance() {
  44     return Singleton<EmptyStrings>::get();
  45   }
  46 };
  47
  48 // Used by ReplaceStringPlaceholders to track the position in the string of
  49 // replaced parameters.
  50 struct ReplacementOffset {
  51   ReplacementOffset(uintptr_t parameter, size_t offset)
  52       : parameter(parameter),
  53         offset(offset) {}
  54
  55   // Index of the parameter.
  56   uintptr_t parameter;
  57
  58   // Starting position in the string.
  59   size_t offset;
  60 };
  61
  62 static bool CompareParameter(const ReplacementOffset& elem1,
  63                              const ReplacementOffset& elem2) {
  64   return elem1.parameter < elem2.parameter;
  65 }
  66
  67 // Assuming that a pointer is the size of a "machine word", then
  68 // uintptr_t is an integer type that is also a machine word.
  69 typedef uintptr_t MachineWord;
  70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
  71
  72 inline bool IsAlignedToMachineWord(const void* pointer) {
  73   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
  74 }
  75
  76 template<typename T> inline T* AlignToMachineWord(T* pointer) {
  77   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
  78                               ~kMachineWordAlignmentMask);
  79 }
  80
  81 template<size_t size, typename CharacterType> struct NonASCIIMask;
  82 template<> struct NonASCIIMask<4, base::char16> {
  83     static inline uint32_t value() { return 0xFF80FF80U; }
  84 };
  85 template<> struct NonASCIIMask<4, char> {
  86     static inline uint32_t value() { return 0x80808080U; }
  87 };
  88 template<> struct NonASCIIMask<8, base::char16> {
  89     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
  90 };
  91 template<> struct NonASCIIMask<8, char> {
  92     static inline uint64_t value() { return 0x8080808080808080ULL; }
  93 };
  94 #if defined(WCHAR_T_IS_UTF32)
  95 template<> struct NonASCIIMask<4, wchar_t> {
  96     static inline uint32_t value() { return 0xFFFFFF80U; }
  97 };
  98 template<> struct NonASCIIMask<8, wchar_t> {
  99     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
 100 };
 101 #endif  // WCHAR_T_IS_UTF32
 102
 103 }  // namespace
 104
 105 namespace base {
 106
 107 bool IsWprintfFormatPortable(const wchar_t* format) {
 108   for (const wchar_t* position = format; *position != '\0'; ++position) {
 109     if (*position == '%') {
 110       bool in_specification = true;
 111       bool modifier_l = false;
 112       while (in_specification) {
 113         // Eat up characters until reaching a known specifier.
 114         if (*++position == '\0') {
 115           // The format string ended in the middle of a specification.  Call
 116           // it portable because no unportable specifications were found.  The
 117           // string is equally broken on all platforms.
 118           return true;
 119         }
 120
 121         if (*position == 'l') {
 122           // 'l' is the only thing that can save the 's' and 'c' specifiers.
 123           modifier_l = true;
 124         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
 125                    *position == 'S' || *position == 'C' || *position == 'F' ||
 126                    *position == 'D' || *position == 'O' || *position == 'U') {
 127           // Not portable.
 128           return false;
 129         }
 130
 131         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
 132           // Portable, keep scanning the rest of the format string.
 133           in_specification = false;
 134         }
 135       }
 136     }
 137   }
 138
 139   return true;
 140 }
 141
 142 const std::string& EmptyString() {
 143   return EmptyStrings::GetInstance()->s;
 144 }
 145
 146 const string16& EmptyString16() {
 147   return EmptyStrings::GetInstance()->s16;
 148 }
 149
 150 template<typename STR>
 151 bool ReplaceCharsT(const STR& input,
 152                    const STR& replace_chars,
 153                    const STR& replace_with,
 154                    STR* output) {
 155   bool removed = false;
 156   size_t replace_length = replace_with.length();
 157
 158   *output = input;
 159
 160   size_t found = output->find_first_of(replace_chars);
 161   while (found != STR::npos) {
 162     removed = true;
 163     output->replace(found, 1, replace_with);
 164     found = output->find_first_of(replace_chars, found + replace_length);
 165   }
 166
 167   return removed;
 168 }
 169
 170 bool ReplaceChars(const string16& input,
 171                   const base::StringPiece16& replace_chars,
 172                   const string16& replace_with,
 173                   string16* output) {
 174   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 175 }
 176
 177 bool ReplaceChars(const std::string& input,
 178                   const base::StringPiece& replace_chars,
 179                   const std::string& replace_with,
 180                   std::string* output) {
 181   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 182 }
 183
 184 bool RemoveChars(const string16& input,
 185                  const base::StringPiece16& remove_chars,
 186                  string16* output) {
 187   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 188 }
 189
 190 bool RemoveChars(const std::string& input,
 191                  const base::StringPiece& remove_chars,
 192                  std::string* output) {
 193   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 194 }
 195
 196 template<typename STR>
 197 TrimPositions TrimStringT(const STR& input,
 198                           const STR& trim_chars,
 199                           TrimPositions positions,
 200                           STR* output) {
 201   // Find the edges of leading/trailing whitespace as desired.
 202   const size_t last_char = input.length() - 1;
 203   const size_t first_good_char = (positions & TRIM_LEADING) ?
 204       input.find_first_not_of(trim_chars) : 0;
 205   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 206       input.find_last_not_of(trim_chars) : last_char;
 207
 208   // When the string was all whitespace, report that we stripped off whitespace
 209   // from whichever position the caller was interested in.  For empty input, we
 210   // stripped no whitespace, but we still need to clear |output|.
 211   if (input.empty() ||
 212       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 213     bool input_was_empty = input.empty();  // in case output == &input
 214     output->clear();
 215     return input_was_empty ? TRIM_NONE : positions;
 216   }
 217
 218   // Trim the whitespace.
 219   *output =
 220       input.substr(first_good_char, last_good_char - first_good_char + 1);
 221
 222   // Return where we trimmed from.
 223   return static_cast<TrimPositions>(
 224       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 225       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 226 }
 227
 228 bool TrimString(const string16& input,
 229                 const base::StringPiece16& trim_chars,
 230                 string16* output) {
 231   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 232       TRIM_NONE;
 233 }
 234
 235 bool TrimString(const std::string& input,
 236                 const base::StringPiece& trim_chars,
 237                 std::string* output) {
 238   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 239       TRIM_NONE;
 240 }
 241
 242 void TruncateUTF8ToByteSize(const std::string& input,
 243                             const size_t byte_size,
 244                             std::string* output) {
 245   DCHECK(output);
 246   if (byte_size > input.length()) {
 247     *output = input;
 248     return;
 249   }
 250   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 251   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 252   int32 truncation_length = static_cast<int32>(byte_size);
 253   int32 char_index = truncation_length - 1;
 254   const char* data = input.data();
 255
 256   // Using CBU8, we will move backwards from the truncation point
 257   // to the beginning of the string looking for a valid UTF8
 258   // character.  Once a full UTF8 character is found, we will
 259   // truncate the string to the end of that character.
 260   while (char_index >= 0) {
 261     int32 prev = char_index;
 262     base_icu::UChar32 code_point = 0;
 263     CBU8_NEXT(data, char_index, truncation_length, code_point);
 264     if (!IsValidCharacter(code_point) ||
 265         !IsValidCodepoint(code_point)) {
 266       char_index = prev - 1;
 267     } else {
 268       break;
 269     }
 270   }
 271
 272   if (char_index >= 0 )
 273     *output = input.substr(0, char_index);
 274   else
 275     output->clear();
 276 }
 277
 278 TrimPositions TrimWhitespace(const string16& input,
 279                              TrimPositions positions,
 280                              string16* output) {
 281   return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
 282                      output);
 283 }
 284
 285 TrimPositions TrimWhitespaceASCII(const std::string& input,
 286                                   TrimPositions positions,
 287                                   std::string* output) {
 288   return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
 289 }
 290
 291 // This function is only for backward-compatibility.
 292 // To be removed when all callers are updated.
 293 TrimPositions TrimWhitespace(const std::string& input,
 294                              TrimPositions positions,
 295                              std::string* output) {
 296   return TrimWhitespaceASCII(input, positions, output);
 297 }
 298
 299 template<typename STR>
 300 STR CollapseWhitespaceT(const STR& text,
 301                         bool trim_sequences_with_line_breaks) {
 302   STR result;
 303   result.resize(text.size());
 304
 305   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 306   // will trim any leading whitespace.
 307   bool in_whitespace = true;
 308   bool already_trimmed = true;
 309
 310   int chars_written = 0;
 311   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 312     if (IsWhitespace(*i)) {
 313       if (!in_whitespace) {
 314         // Reduce all whitespace sequences to a single space.
 315         in_whitespace = true;
 316         result[chars_written++] = L' ';
 317       }
 318       if (trim_sequences_with_line_breaks && !already_trimmed &&
 319           ((*i == '\n') || (*i == '\r'))) {
 320         // Whitespace sequences containing CR or LF are eliminated entirely.
 321         already_trimmed = true;
 322         --chars_written;
 323       }
 324     } else {
 325       // Non-whitespace chracters are copied straight across.
 326       in_whitespace = false;
 327       already_trimmed = false;
 328       result[chars_written++] = *i;
 329     }
 330   }
 331
 332   if (in_whitespace && !already_trimmed) {
 333     // Any trailing whitespace is eliminated.
 334     --chars_written;
 335   }
 336
 337   result.resize(chars_written);
 338   return result;
 339 }
 340
 341 string16 CollapseWhitespace(const string16& text,
 342                             bool trim_sequences_with_line_breaks) {
 343   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 344 }
 345
 346 std::string CollapseWhitespaceASCII(const std::string& text,
 347                                     bool trim_sequences_with_line_breaks) {
 348   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 349 }
 350
 351 bool ContainsOnlyChars(const StringPiece& input,
 352                        const StringPiece& characters) {
 353   return input.find_first_not_of(characters) == StringPiece::npos;
 354 }
 355
 356 bool ContainsOnlyChars(const StringPiece16& input,
 357                        const StringPiece16& characters) {
 358   return input.find_first_not_of(characters) == StringPiece16::npos;
 359 }
 360
 361 template <class Char>
 362 inline bool DoIsStringASCII(const Char* characters, size_t length) {
 363   MachineWord all_char_bits = 0;
 364   const Char* end = characters + length;
 365
 366   // Prologue: align the input.
 367   while (!IsAlignedToMachineWord(characters) && characters != end) {
 368     all_char_bits |= *characters;
 369     ++characters;
 370   }
 371
 372   // Compare the values of CPU word size.
 373   const Char* word_end = AlignToMachineWord(end);
 374   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 375   while (characters < word_end) {
 376     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 377     characters += loop_increment;
 378   }
 379
 380   // Process the remaining bytes.
 381   while (characters != end) {
 382     all_char_bits |= *characters;
 383     ++characters;
 384   }
 385
 386   MachineWord non_ascii_bit_mask =
 387       NonASCIIMask<sizeof(MachineWord), Char>::value();
 388   return !(all_char_bits & non_ascii_bit_mask);
 389 }
 390
 391 bool IsStringASCII(const StringPiece& str) {
 392   return DoIsStringASCII(str.data(), str.length());
 393 }
 394
 395 bool IsStringASCII(const StringPiece16& str) {
 396   return DoIsStringASCII(str.data(), str.length());
 397 }
 398
 399 bool IsStringASCII(const string16& str) {
 400   return DoIsStringASCII(str.data(), str.length());
 401 }
 402
 403 #if defined(WCHAR_T_IS_UTF32)
 404 bool IsStringASCII(const std::wstring& str) {
 405   return DoIsStringASCII(str.data(), str.length());
 406 }
 407 #endif
 408
 409 bool IsStringUTF8(const std::string& str) {
 410   const char *src = str.data();
 411   int32 src_len = static_cast<int32>(str.length());
 412   int32 char_index = 0;
 413
 414   while (char_index < src_len) {
 415     int32 code_point;
 416     CBU8_NEXT(src, char_index, src_len, code_point);
 417     if (!IsValidCharacter(code_point))
 418       return false;
 419   }
 420   return true;
 421 }
 422
 423 }  // namespace base
 424
 425 template<typename Iter>
 426 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 427                                           Iter a_end,
 428                                           const char* b) {
 429   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 430     if (!*b || base::ToLowerASCII(*it) != *b)
 431       return false;
 432   }
 433   return *b == 0;
 434 }
 435
 436 // Front-ends for LowerCaseEqualsASCII.
 437 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 438   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 439 }
 440
 441 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 442   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 443 }
 444
 445 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 446                           std::string::const_iterator a_end,
 447                           const char* b) {
 448   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 449 }
 450
 451 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 452                           string16::const_iterator a_end,
 453                           const char* b) {
 454   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 455 }
 456
 457 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 458 #if !defined(OS_ANDROID)
 459 bool LowerCaseEqualsASCII(const char* a_begin,
 460                           const char* a_end,
 461                           const char* b) {
 462   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 463 }
 464
 465 bool LowerCaseEqualsASCII(const char16* a_begin,
 466                           const char16* a_end,
 467                           const char* b) {
 468   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 469 }
 470
 471 #endif  // !defined(OS_ANDROID)
 472
 473 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 474   if (a.length() != b.length())
 475     return false;
 476   return std::equal(b.begin(), b.end(), a.begin());
 477 }
 478
 479 bool StartsWithASCII(const std::string& str,
 480                      const std::string& search,
 481                      bool case_sensitive) {
 482   if (case_sensitive)
 483     return str.compare(0, search.length(), search) == 0;
 484   else
 485     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 486 }
 487
 488 template <typename STR>
 489 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 490   if (case_sensitive) {
 491     return str.compare(0, search.length(), search) == 0;
 492   } else {
 493     if (search.size() > str.size())
 494       return false;
 495     return std::equal(search.begin(), search.end(), str.begin(),
 496                       base::CaseInsensitiveCompare<typename STR::value_type>());
 497   }
 498 }
 499
 500 bool StartsWith(const string16& str, const string16& search,
 501                 bool case_sensitive) {
 502   return StartsWithT(str, search, case_sensitive);
 503 }
 504
 505 template <typename STR>
 506 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 507   size_t str_length = str.length();
 508   size_t search_length = search.length();
 509   if (search_length > str_length)
 510     return false;
 511   if (case_sensitive)
 512     return str.compare(str_length - search_length, search_length, search) == 0;
 513   return std::equal(search.begin(), search.end(),
 514                     str.begin() + (str_length - search_length),
 515                     base::CaseInsensitiveCompare<typename STR::value_type>());
 516 }
 517
 518 bool EndsWith(const std::string& str, const std::string& search,
 519               bool case_sensitive) {
 520   return EndsWithT(str, search, case_sensitive);
 521 }
 522
 523 bool EndsWith(const string16& str, const string16& search,
 524               bool case_sensitive) {
 525   return EndsWithT(str, search, case_sensitive);
 526 }
 527
 528 static const char* const kByteStringsUnlocalized[] = {
 529   " B",
 530   " kB",
 531   " MB",
 532   " GB",
 533   " TB",
 534   " PB"
 535 };
 536
 537 string16 FormatBytesUnlocalized(int64 bytes) {
 538   double unit_amount = static_cast<double>(bytes);
 539   size_t dimension = 0;
 540   const int kKilo = 1024;
 541   while (unit_amount >= kKilo &&
 542          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 543     unit_amount /= kKilo;
 544     dimension++;
 545   }
 546
 547   char buf[64];
 548   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 549     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 550                    kByteStringsUnlocalized[dimension]);
 551   } else {
 552     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 553                    kByteStringsUnlocalized[dimension]);
 554   }
 555
 556   return base::ASCIIToUTF16(buf);
 557 }
 558
 559 template<class StringType>
 560 void DoReplaceSubstringsAfterOffset(StringType* str,
 561                                     size_t start_offset,
 562                                     const StringType& find_this,
 563                                     const StringType& replace_with,
 564                                     bool replace_all) {
 565   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 566     return;
 567
 568   DCHECK(!find_this.empty());
 569   for (size_t offs(str->find(find_this, start_offset));
 570       offs != StringType::npos; offs = str->find(find_this, offs)) {
 571     str->replace(offs, find_this.length(), replace_with);
 572     offs += replace_with.length();
 573
 574     if (!replace_all)
 575       break;
 576   }
 577 }
 578
 579 void ReplaceFirstSubstringAfterOffset(string16* str,
 580                                       size_t start_offset,
 581                                       const string16& find_this,
 582                                       const string16& replace_with) {
 583   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 584                                  false);  // replace first instance
 585 }
 586
 587 void ReplaceFirstSubstringAfterOffset(std::string* str,
 588                                       size_t start_offset,
 589                                       const std::string& find_this,
 590                                       const std::string& replace_with) {
 591   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 592                                  false);  // replace first instance
 593 }
 594
 595 void ReplaceSubstringsAfterOffset(string16* str,
 596                                   size_t start_offset,
 597                                   const string16& find_this,
 598                                   const string16& replace_with) {
 599   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 600                                  true);  // replace all instances
 601 }
 602
 603 void ReplaceSubstringsAfterOffset(std::string* str,
 604                                   size_t start_offset,
 605                                   const std::string& find_this,
 606                                   const std::string& replace_with) {
 607   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 608                                  true);  // replace all instances
 609 }
 610
 611
 612 template<typename STR>
 613 static size_t TokenizeT(const STR& str,
 614                         const STR& delimiters,
 615                         std::vector<STR>* tokens) {
 616   tokens->clear();
 617
 618   size_t start = str.find_first_not_of(delimiters);
 619   while (start != STR::npos) {
 620     size_t end = str.find_first_of(delimiters, start + 1);
 621     if (end == STR::npos) {
 622       tokens->push_back(str.substr(start));
 623       break;
 624     } else {
 625       tokens->push_back(str.substr(start, end - start));
 626       start = str.find_first_not_of(delimiters, end + 1);
 627     }
 628   }
 629
 630   return tokens->size();
 631 }
 632
 633 size_t Tokenize(const string16& str,
 634                 const string16& delimiters,
 635                 std::vector<string16>* tokens) {
 636   return TokenizeT(str, delimiters, tokens);
 637 }
 638
 639 size_t Tokenize(const std::string& str,
 640                 const std::string& delimiters,
 641                 std::vector<std::string>* tokens) {
 642   return TokenizeT(str, delimiters, tokens);
 643 }
 644
 645 size_t Tokenize(const base::StringPiece& str,
 646                 const base::StringPiece& delimiters,
 647                 std::vector<base::StringPiece>* tokens) {
 648   return TokenizeT(str, delimiters, tokens);
 649 }
 650
 651 template<typename STR>
 652 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 653   if (parts.empty())
 654     return STR();
 655
 656   STR result(parts[0]);
 657   typename std::vector<STR>::const_iterator iter = parts.begin();
 658   ++iter;
 659
 660   for (; iter != parts.end(); ++iter) {
 661     result += sep;
 662     result += *iter;
 663   }
 664
 665   return result;
 666 }
 667
 668 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 669   return JoinStringT(parts, std::string(1, sep));
 670 }
 671
 672 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 673   return JoinStringT(parts, string16(1, sep));
 674 }
 675
 676 std::string JoinString(const std::vector<std::string>& parts,
 677                        const std::string& separator) {
 678   return JoinStringT(parts, separator);
 679 }
 680
 681 string16 JoinString(const std::vector<string16>& parts,
 682                     const string16& separator) {
 683   return JoinStringT(parts, separator);
 684 }
 685
 686 template<class FormatStringType, class OutStringType>
 687 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 688     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 689   size_t substitutions = subst.size();
 690
 691   size_t sub_length = 0;
 692   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 693        iter != subst.end(); ++iter) {
 694     sub_length += iter->length();
 695   }
 696
 697   OutStringType formatted;
 698   formatted.reserve(format_string.length() + sub_length);
 699
 700   std::vector<ReplacementOffset> r_offsets;
 701   for (typename FormatStringType::const_iterator i = format_string.begin();
 702        i != format_string.end(); ++i) {
 703     if ('$' == *i) {
 704       if (i + 1 != format_string.end()) {
 705         ++i;
 706         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 707         if ('$' == *i) {
 708           while (i != format_string.end() && '$' == *i) {
 709             formatted.push_back('$');
 710             ++i;
 711           }
 712           --i;
 713         } else {
 714           uintptr_t index = 0;
 715           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 716             index *= 10;
 717             index += *i - '0';
 718             ++i;
 719           }
 720           --i;
 721           index -= 1;
 722           if (offsets) {
 723             ReplacementOffset r_offset(index,
 724                 static_cast<int>(formatted.size()));
 725             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 726                                               r_offsets.end(),
 727                                               r_offset,
 728                                               &CompareParameter),
 729                              r_offset);
 730           }
 731           if (index < substitutions)
 732             formatted.append(subst.at(index));
 733         }
 734       }
 735     } else {
 736       formatted.push_back(*i);
 737     }
 738   }
 739   if (offsets) {
 740     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 741          i != r_offsets.end(); ++i) {
 742       offsets->push_back(i->offset);
 743     }
 744   }
 745   return formatted;
 746 }
 747
 748 string16 ReplaceStringPlaceholders(const string16& format_string,
 749                                    const std::vector<string16>& subst,
 750                                    std::vector<size_t>* offsets) {
 751   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 752 }
 753
 754 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 755                                       const std::vector<std::string>& subst,
 756                                       std::vector<size_t>* offsets) {
 757   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 758 }
 759
 760 string16 ReplaceStringPlaceholders(const string16& format_string,
 761                                    const string16& a,
 762                                    size_t* offset) {
 763   std::vector<size_t> offsets;
 764   std::vector<string16> subst;
 765   subst.push_back(a);
 766   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 767
 768   DCHECK_EQ(1U, offsets.size());
 769   if (offset)
 770     *offset = offsets[0];
 771   return result;
 772 }
 773
 774 static bool IsWildcard(base_icu::UChar32 character) {
 775   return character == '*' || character == '?';
 776 }
 777
 778 // Move the strings pointers to the point where they start to differ.
 779 template <typename CHAR, typename NEXT>
 780 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 781                          const CHAR** string, const CHAR* string_end,
 782                          NEXT next) {
 783   const CHAR* escape = NULL;
 784   while (*pattern != pattern_end && *string != string_end) {
 785     if (!escape && IsWildcard(**pattern)) {
 786       // We don't want to match wildcard here, except if it's escaped.
 787       return;
 788     }
 789
 790     // Check if the escapement char is found. If so, skip it and move to the
 791     // next character.
 792     if (!escape && **pattern == '\\') {
 793       escape = *pattern;
 794       next(pattern, pattern_end);
 795       continue;
 796     }
 797
 798     // Check if the chars match, if so, increment the ptrs.
 799     const CHAR* pattern_next = *pattern;
 800     const CHAR* string_next = *string;
 801     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 802     if (pattern_char == next(&string_next, string_end) &&
 803         pattern_char != CBU_SENTINEL) {
 804       *pattern = pattern_next;
 805       *string = string_next;
 806     } else {
 807       // Uh oh, it did not match, we are done. If the last char was an
 808       // escapement, that means that it was an error to advance the ptr here,
 809       // let's put it back where it was. This also mean that the MatchPattern
 810       // function will return false because if we can't match an escape char
 811       // here, then no one will.
 812       if (escape) {
 813         *pattern = escape;
 814       }
 815       return;
 816     }
 817
 818     escape = NULL;
 819   }
 820 }
 821
 822 template <typename CHAR, typename NEXT>
 823 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 824   while (*pattern != end) {
 825     if (!IsWildcard(**pattern))
 826       return;
 827     next(pattern, end);
 828   }
 829 }
 830
 831 template <typename CHAR, typename NEXT>
 832 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 833                           const CHAR* pattern, const CHAR* pattern_end,
 834                           int depth,
 835                           NEXT next) {
 836   const int kMaxDepth = 16;
 837   if (depth > kMaxDepth)
 838     return false;
 839
 840   // Eat all the matching chars.
 841   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 842
 843   // If the string is empty, then the pattern must be empty too, or contains
 844   // only wildcards.
 845   if (eval == eval_end) {
 846     EatWildcard(&pattern, pattern_end, next);
 847     return pattern == pattern_end;
 848   }
 849
 850   // Pattern is empty but not string, this is not a match.
 851   if (pattern == pattern_end)
 852     return false;
 853
 854   // If this is a question mark, then we need to compare the rest with
 855   // the current string or the string with one character eaten.
 856   const CHAR* next_pattern = pattern;
 857   next(&next_pattern, pattern_end);
 858   if (pattern[0] == '?') {
 859     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 860                       depth + 1, next))
 861       return true;
 862     const CHAR* next_eval = eval;
 863     next(&next_eval, eval_end);
 864     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 865                       depth + 1, next))
 866       return true;
 867   }
 868
 869   // This is a *, try to match all the possible substrings with the remainder
 870   // of the pattern.
 871   if (pattern[0] == '*') {
 872     // Collapse duplicate wild cards (********** into *) so that the
 873     // method does not recurse unnecessarily. http://crbug.com/52839
 874     EatWildcard(&next_pattern, pattern_end, next);
 875
 876     while (eval != eval_end) {
 877       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 878                         depth + 1, next))
 879         return true;
 880       eval++;
 881     }
 882
 883     // We reached the end of the string, let see if the pattern contains only
 884     // wildcards.
 885     if (eval == eval_end) {
 886       EatWildcard(&pattern, pattern_end, next);
 887       if (pattern != pattern_end)
 888         return false;
 889       return true;
 890     }
 891   }
 892
 893   return false;
 894 }
 895
 896 struct NextCharUTF8 {
 897   base_icu::UChar32 operator()(const char** p, const char* end) {
 898     base_icu::UChar32 c;
 899     int offset = 0;
 900     CBU8_NEXT(*p, offset, end - *p, c);
 901     *p += offset;
 902     return c;
 903   }
 904 };
 905
 906 struct NextCharUTF16 {
 907   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 908     base_icu::UChar32 c;
 909     int offset = 0;
 910     CBU16_NEXT(*p, offset, end - *p, c);
 911     *p += offset;
 912     return c;
 913   }
 914 };
 915
 916 bool MatchPattern(const base::StringPiece& eval,
 917                   const base::StringPiece& pattern) {
 918   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 919                        pattern.data(), pattern.data() + pattern.size(),
 920                        0, NextCharUTF8());
 921 }
 922
 923 bool MatchPattern(const string16& eval, const string16& pattern) {
 924   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 925                        pattern.c_str(), pattern.c_str() + pattern.size(),
 926                        0, NextCharUTF16());
 927 }
 928
 929 // The following code is compatible with the OpenBSD lcpy interface.  See:
 930 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 931 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 932
 933 namespace {
 934
 935 template <typename CHAR>
 936 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 937   for (size_t i = 0; i < dst_size; ++i) {
 938     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 939       return i;
 940   }
 941
 942   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 943   if (dst_size != 0)
 944     dst[dst_size - 1] = 0;
 945
 946   // Count the rest of the |src|, and return it's length in characters.
 947   while (src[dst_size]) ++dst_size;
 948   return dst_size;
 949 }
 950
 951 }  // namespace
 952
 953 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 954   return lcpyT<char>(dst, src, dst_size);
 955 }
 956 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 957   return lcpyT<wchar_t>(dst, src, dst_size);
 958 }