base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "build/build_config.h"
  28
  29 // Remove when this entire file is in the base namespace.
  30 using base::char16;
  31 using base::string16;
  32
  33 namespace {
  34
  35 // Force the singleton used by Empty[W]String[16] to be a unique type. This
  36 // prevents other code that might accidentally use Singleton<string> from
  37 // getting our internal one.
  38 struct EmptyStrings {
  39   EmptyStrings() {}
  40   const std::string s;
  41   const std::wstring ws;
  42   const string16 s16;
  43
  44   static EmptyStrings* GetInstance() {
  45     return Singleton<EmptyStrings>::get();
  46   }
  47 };
  48
  49 // Used by ReplaceStringPlaceholders to track the position in the string of
  50 // replaced parameters.
  51 struct ReplacementOffset {
  52   ReplacementOffset(uintptr_t parameter, size_t offset)
  53       : parameter(parameter),
  54         offset(offset) {}
  55
  56   // Index of the parameter.
  57   uintptr_t parameter;
  58
  59   // Starting position in the string.
  60   size_t offset;
  61 };
  62
  63 static bool CompareParameter(const ReplacementOffset& elem1,
  64                              const ReplacementOffset& elem2) {
  65   return elem1.parameter < elem2.parameter;
  66 }
  67
  68 }  // namespace
  69
  70 namespace base {
  71
  72 bool IsWprintfFormatPortable(const wchar_t* format) {
  73   for (const wchar_t* position = format; *position != '\0'; ++position) {
  74     if (*position == '%') {
  75       bool in_specification = true;
  76       bool modifier_l = false;
  77       while (in_specification) {
  78         // Eat up characters until reaching a known specifier.
  79         if (*++position == '\0') {
  80           // The format string ended in the middle of a specification.  Call
  81           // it portable because no unportable specifications were found.  The
  82           // string is equally broken on all platforms.
  83           return true;
  84         }
  85
  86         if (*position == 'l') {
  87           // 'l' is the only thing that can save the 's' and 'c' specifiers.
  88           modifier_l = true;
  89         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
  90                    *position == 'S' || *position == 'C' || *position == 'F' ||
  91                    *position == 'D' || *position == 'O' || *position == 'U') {
  92           // Not portable.
  93           return false;
  94         }
  95
  96         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
  97           // Portable, keep scanning the rest of the format string.
  98           in_specification = false;
  99         }
 100       }
 101     }
 102   }
 103
 104   return true;
 105 }
 106
 107 const std::string& EmptyString() {
 108   return EmptyStrings::GetInstance()->s;
 109 }
 110
 111 const std::wstring& EmptyWString() {
 112   return EmptyStrings::GetInstance()->ws;
 113 }
 114
 115 const string16& EmptyString16() {
 116   return EmptyStrings::GetInstance()->s16;
 117 }
 118
 119 template<typename STR>
 120 bool ReplaceCharsT(const STR& input,
 121                    const typename STR::value_type replace_chars[],
 122                    const STR& replace_with,
 123                    STR* output) {
 124   bool removed = false;
 125   size_t replace_length = replace_with.length();
 126
 127   *output = input;
 128
 129   size_t found = output->find_first_of(replace_chars);
 130   while (found != STR::npos) {
 131     removed = true;
 132     output->replace(found, 1, replace_with);
 133     found = output->find_first_of(replace_chars, found + replace_length);
 134   }
 135
 136   return removed;
 137 }
 138
 139 bool ReplaceChars(const string16& input,
 140                   const char16 replace_chars[],
 141                   const string16& replace_with,
 142                   string16* output) {
 143   return ReplaceCharsT(input, replace_chars, replace_with, output);
 144 }
 145
 146 bool ReplaceChars(const std::string& input,
 147                   const char replace_chars[],
 148                   const std::string& replace_with,
 149                   std::string* output) {
 150   return ReplaceCharsT(input, replace_chars, replace_with, output);
 151 }
 152
 153 bool RemoveChars(const string16& input,
 154                  const char16 remove_chars[],
 155                  string16* output) {
 156   return ReplaceChars(input, remove_chars, string16(), output);
 157 }
 158
 159 bool RemoveChars(const std::string& input,
 160                  const char remove_chars[],
 161                  std::string* output) {
 162   return ReplaceChars(input, remove_chars, std::string(), output);
 163 }
 164
 165 template<typename STR>
 166 TrimPositions TrimStringT(const STR& input,
 167                           const typename STR::value_type trim_chars[],
 168                           TrimPositions positions,
 169                           STR* output) {
 170   // Find the edges of leading/trailing whitespace as desired.
 171   const typename STR::size_type last_char = input.length() - 1;
 172   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
 173       input.find_first_not_of(trim_chars) : 0;
 174   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
 175       input.find_last_not_of(trim_chars) : last_char;
 176
 177   // When the string was all whitespace, report that we stripped off whitespace
 178   // from whichever position the caller was interested in.  For empty input, we
 179   // stripped no whitespace, but we still need to clear |output|.
 180   if (input.empty() ||
 181       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 182     bool input_was_empty = input.empty();  // in case output == &input
 183     output->clear();
 184     return input_was_empty ? TRIM_NONE : positions;
 185   }
 186
 187   // Trim the whitespace.
 188   *output =
 189       input.substr(first_good_char, last_good_char - first_good_char + 1);
 190
 191   // Return where we trimmed from.
 192   return static_cast<TrimPositions>(
 193       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 194       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 195 }
 196
 197 bool TrimString(const string16& input,
 198                 const char16 trim_chars[],
 199                 string16* output) {
 200   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 201 }
 202
 203 bool TrimString(const std::string& input,
 204                 const char trim_chars[],
 205                 std::string* output) {
 206   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 207 }
 208
 209 void TruncateUTF8ToByteSize(const std::string& input,
 210                             const size_t byte_size,
 211                             std::string* output) {
 212   DCHECK(output);
 213   if (byte_size > input.length()) {
 214     *output = input;
 215     return;
 216   }
 217   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 218   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 219   int32 truncation_length = static_cast<int32>(byte_size);
 220   int32 char_index = truncation_length - 1;
 221   const char* data = input.data();
 222
 223   // Using CBU8, we will move backwards from the truncation point
 224   // to the beginning of the string looking for a valid UTF8
 225   // character.  Once a full UTF8 character is found, we will
 226   // truncate the string to the end of that character.
 227   while (char_index >= 0) {
 228     int32 prev = char_index;
 229     uint32 code_point = 0;
 230     CBU8_NEXT(data, char_index, truncation_length, code_point);
 231     if (!IsValidCharacter(code_point) ||
 232         !IsValidCodepoint(code_point)) {
 233       char_index = prev - 1;
 234     } else {
 235       break;
 236     }
 237   }
 238
 239   if (char_index >= 0 )
 240     *output = input.substr(0, char_index);
 241   else
 242     output->clear();
 243 }
 244
 245 TrimPositions TrimWhitespace(const string16& input,
 246                              TrimPositions positions,
 247                              string16* output) {
 248   return TrimStringT(input, kWhitespaceUTF16, positions, output);
 249 }
 250
 251 TrimPositions TrimWhitespaceASCII(const std::string& input,
 252                                   TrimPositions positions,
 253                                   std::string* output) {
 254   return TrimStringT(input, kWhitespaceASCII, positions, output);
 255 }
 256
 257 // This function is only for backward-compatibility.
 258 // To be removed when all callers are updated.
 259 TrimPositions TrimWhitespace(const std::string& input,
 260                              TrimPositions positions,
 261                              std::string* output) {
 262   return TrimWhitespaceASCII(input, positions, output);
 263 }
 264
 265 template<typename STR>
 266 STR CollapseWhitespaceT(const STR& text,
 267                         bool trim_sequences_with_line_breaks) {
 268   STR result;
 269   result.resize(text.size());
 270
 271   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 272   // will trim any leading whitespace.
 273   bool in_whitespace = true;
 274   bool already_trimmed = true;
 275
 276   int chars_written = 0;
 277   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 278     if (IsWhitespace(*i)) {
 279       if (!in_whitespace) {
 280         // Reduce all whitespace sequences to a single space.
 281         in_whitespace = true;
 282         result[chars_written++] = L' ';
 283       }
 284       if (trim_sequences_with_line_breaks && !already_trimmed &&
 285           ((*i == '\n') || (*i == '\r'))) {
 286         // Whitespace sequences containing CR or LF are eliminated entirely.
 287         already_trimmed = true;
 288         --chars_written;
 289       }
 290     } else {
 291       // Non-whitespace chracters are copied straight across.
 292       in_whitespace = false;
 293       already_trimmed = false;
 294       result[chars_written++] = *i;
 295     }
 296   }
 297
 298   if (in_whitespace && !already_trimmed) {
 299     // Any trailing whitespace is eliminated.
 300     --chars_written;
 301   }
 302
 303   result.resize(chars_written);
 304   return result;
 305 }
 306
 307 string16 CollapseWhitespace(const string16& text,
 308                             bool trim_sequences_with_line_breaks) {
 309   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 310 }
 311
 312 std::string CollapseWhitespaceASCII(const std::string& text,
 313                                     bool trim_sequences_with_line_breaks) {
 314   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 315 }
 316
 317 bool ContainsOnlyChars(const StringPiece& input,
 318                        const StringPiece& characters) {
 319   return input.find_first_not_of(characters) == StringPiece::npos;
 320 }
 321
 322 bool ContainsOnlyChars(const StringPiece16& input,
 323                        const StringPiece16& characters) {
 324   return input.find_first_not_of(characters) == StringPiece16::npos;
 325 }
 326
 327 template<class STR>
 328 static bool DoIsStringASCII(const STR& str) {
 329   for (size_t i = 0; i < str.length(); i++) {
 330     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
 331     if (c > 0x7F)
 332       return false;
 333   }
 334   return true;
 335 }
 336
 337 bool IsStringASCII(const StringPiece& str) {
 338   return DoIsStringASCII(str);
 339 }
 340
 341 bool IsStringASCII(const string16& str) {
 342   return DoIsStringASCII(str);
 343 }
 344
 345 bool IsStringUTF8(const std::string& str) {
 346   const char *src = str.data();
 347   int32 src_len = static_cast<int32>(str.length());
 348   int32 char_index = 0;
 349
 350   while (char_index < src_len) {
 351     int32 code_point;
 352     CBU8_NEXT(src, char_index, src_len, code_point);
 353     if (!IsValidCharacter(code_point))
 354       return false;
 355   }
 356   return true;
 357 }
 358
 359 }  // namespace base
 360
 361 template<typename Iter>
 362 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 363                                           Iter a_end,
 364                                           const char* b) {
 365   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 366     if (!*b || base::ToLowerASCII(*it) != *b)
 367       return false;
 368   }
 369   return *b == 0;
 370 }
 371
 372 // Front-ends for LowerCaseEqualsASCII.
 373 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 374   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 375 }
 376
 377 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 378   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 379 }
 380
 381 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 382                           std::string::const_iterator a_end,
 383                           const char* b) {
 384   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 385 }
 386
 387 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 388                           string16::const_iterator a_end,
 389                           const char* b) {
 390   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 391 }
 392
 393 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 394 #if !defined(OS_ANDROID)
 395 bool LowerCaseEqualsASCII(const char* a_begin,
 396                           const char* a_end,
 397                           const char* b) {
 398   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 399 }
 400
 401 bool LowerCaseEqualsASCII(const char16* a_begin,
 402                           const char16* a_end,
 403                           const char* b) {
 404   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 405 }
 406
 407 #endif  // !defined(OS_ANDROID)
 408
 409 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 410   if (a.length() != b.length())
 411     return false;
 412   return std::equal(b.begin(), b.end(), a.begin());
 413 }
 414
 415 bool StartsWithASCII(const std::string& str,
 416                      const std::string& search,
 417                      bool case_sensitive) {
 418   if (case_sensitive)
 419     return str.compare(0, search.length(), search) == 0;
 420   else
 421     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 422 }
 423
 424 template <typename STR>
 425 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 426   if (case_sensitive) {
 427     return str.compare(0, search.length(), search) == 0;
 428   } else {
 429     if (search.size() > str.size())
 430       return false;
 431     return std::equal(search.begin(), search.end(), str.begin(),
 432                       base::CaseInsensitiveCompare<typename STR::value_type>());
 433   }
 434 }
 435
 436 bool StartsWith(const string16& str, const string16& search,
 437                 bool case_sensitive) {
 438   return StartsWithT(str, search, case_sensitive);
 439 }
 440
 441 template <typename STR>
 442 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 443   typename STR::size_type str_length = str.length();
 444   typename STR::size_type search_length = search.length();
 445   if (search_length > str_length)
 446     return false;
 447   if (case_sensitive) {
 448     return str.compare(str_length - search_length, search_length, search) == 0;
 449   } else {
 450     return std::equal(search.begin(), search.end(),
 451                       str.begin() + (str_length - search_length),
 452                       base::CaseInsensitiveCompare<typename STR::value_type>());
 453   }
 454 }
 455
 456 bool EndsWith(const std::string& str, const std::string& search,
 457               bool case_sensitive) {
 458   return EndsWithT(str, search, case_sensitive);
 459 }
 460
 461 bool EndsWith(const string16& str, const string16& search,
 462               bool case_sensitive) {
 463   return EndsWithT(str, search, case_sensitive);
 464 }
 465
 466 static const char* const kByteStringsUnlocalized[] = {
 467   " B",
 468   " kB",
 469   " MB",
 470   " GB",
 471   " TB",
 472   " PB"
 473 };
 474
 475 string16 FormatBytesUnlocalized(int64 bytes) {
 476   double unit_amount = static_cast<double>(bytes);
 477   size_t dimension = 0;
 478   const int kKilo = 1024;
 479   while (unit_amount >= kKilo &&
 480          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 481     unit_amount /= kKilo;
 482     dimension++;
 483   }
 484
 485   char buf[64];
 486   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 487     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 488                    kByteStringsUnlocalized[dimension]);
 489   } else {
 490     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 491                    kByteStringsUnlocalized[dimension]);
 492   }
 493
 494   return base::ASCIIToUTF16(buf);
 495 }
 496
 497 template<class StringType>
 498 void DoReplaceSubstringsAfterOffset(StringType* str,
 499                                     typename StringType::size_type start_offset,
 500                                     const StringType& find_this,
 501                                     const StringType& replace_with,
 502                                     bool replace_all) {
 503   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 504     return;
 505
 506   DCHECK(!find_this.empty());
 507   for (typename StringType::size_type offs(str->find(find_this, start_offset));
 508       offs != StringType::npos; offs = str->find(find_this, offs)) {
 509     str->replace(offs, find_this.length(), replace_with);
 510     offs += replace_with.length();
 511
 512     if (!replace_all)
 513       break;
 514   }
 515 }
 516
 517 void ReplaceFirstSubstringAfterOffset(string16* str,
 518                                       string16::size_type start_offset,
 519                                       const string16& find_this,
 520                                       const string16& replace_with) {
 521   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 522                                  false);  // replace first instance
 523 }
 524
 525 void ReplaceFirstSubstringAfterOffset(std::string* str,
 526                                       std::string::size_type start_offset,
 527                                       const std::string& find_this,
 528                                       const std::string& replace_with) {
 529   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 530                                  false);  // replace first instance
 531 }
 532
 533 void ReplaceSubstringsAfterOffset(string16* str,
 534                                   string16::size_type start_offset,
 535                                   const string16& find_this,
 536                                   const string16& replace_with) {
 537   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 538                                  true);  // replace all instances
 539 }
 540
 541 void ReplaceSubstringsAfterOffset(std::string* str,
 542                                   std::string::size_type start_offset,
 543                                   const std::string& find_this,
 544                                   const std::string& replace_with) {
 545   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 546                                  true);  // replace all instances
 547 }
 548
 549
 550 template<typename STR>
 551 static size_t TokenizeT(const STR& str,
 552                         const STR& delimiters,
 553                         std::vector<STR>* tokens) {
 554   tokens->clear();
 555
 556   typename STR::size_type start = str.find_first_not_of(delimiters);
 557   while (start != STR::npos) {
 558     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
 559     if (end == STR::npos) {
 560       tokens->push_back(str.substr(start));
 561       break;
 562     } else {
 563       tokens->push_back(str.substr(start, end - start));
 564       start = str.find_first_not_of(delimiters, end + 1);
 565     }
 566   }
 567
 568   return tokens->size();
 569 }
 570
 571 size_t Tokenize(const string16& str,
 572                 const string16& delimiters,
 573                 std::vector<string16>* tokens) {
 574   return TokenizeT(str, delimiters, tokens);
 575 }
 576
 577 size_t Tokenize(const std::string& str,
 578                 const std::string& delimiters,
 579                 std::vector<std::string>* tokens) {
 580   return TokenizeT(str, delimiters, tokens);
 581 }
 582
 583 size_t Tokenize(const base::StringPiece& str,
 584                 const base::StringPiece& delimiters,
 585                 std::vector<base::StringPiece>* tokens) {
 586   return TokenizeT(str, delimiters, tokens);
 587 }
 588
 589 template<typename STR>
 590 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 591   if (parts.empty())
 592     return STR();
 593
 594   STR result(parts[0]);
 595   typename std::vector<STR>::const_iterator iter = parts.begin();
 596   ++iter;
 597
 598   for (; iter != parts.end(); ++iter) {
 599     result += sep;
 600     result += *iter;
 601   }
 602
 603   return result;
 604 }
 605
 606 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 607   return JoinStringT(parts, std::string(1, sep));
 608 }
 609
 610 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 611   return JoinStringT(parts, string16(1, sep));
 612 }
 613
 614 std::string JoinString(const std::vector<std::string>& parts,
 615                        const std::string& separator) {
 616   return JoinStringT(parts, separator);
 617 }
 618
 619 string16 JoinString(const std::vector<string16>& parts,
 620                     const string16& separator) {
 621   return JoinStringT(parts, separator);
 622 }
 623
 624 template<class FormatStringType, class OutStringType>
 625 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 626     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 627   size_t substitutions = subst.size();
 628
 629   size_t sub_length = 0;
 630   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 631        iter != subst.end(); ++iter) {
 632     sub_length += iter->length();
 633   }
 634
 635   OutStringType formatted;
 636   formatted.reserve(format_string.length() + sub_length);
 637
 638   std::vector<ReplacementOffset> r_offsets;
 639   for (typename FormatStringType::const_iterator i = format_string.begin();
 640        i != format_string.end(); ++i) {
 641     if ('$' == *i) {
 642       if (i + 1 != format_string.end()) {
 643         ++i;
 644         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 645         if ('$' == *i) {
 646           while (i != format_string.end() && '$' == *i) {
 647             formatted.push_back('$');
 648             ++i;
 649           }
 650           --i;
 651         } else {
 652           uintptr_t index = 0;
 653           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 654             index *= 10;
 655             index += *i - '0';
 656             ++i;
 657           }
 658           --i;
 659           index -= 1;
 660           if (offsets) {
 661             ReplacementOffset r_offset(index,
 662                 static_cast<int>(formatted.size()));
 663             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 664                                               r_offsets.end(),
 665                                               r_offset,
 666                                               &CompareParameter),
 667                              r_offset);
 668           }
 669           if (index < substitutions)
 670             formatted.append(subst.at(index));
 671         }
 672       }
 673     } else {
 674       formatted.push_back(*i);
 675     }
 676   }
 677   if (offsets) {
 678     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 679          i != r_offsets.end(); ++i) {
 680       offsets->push_back(i->offset);
 681     }
 682   }
 683   return formatted;
 684 }
 685
 686 string16 ReplaceStringPlaceholders(const string16& format_string,
 687                                    const std::vector<string16>& subst,
 688                                    std::vector<size_t>* offsets) {
 689   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 690 }
 691
 692 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 693                                       const std::vector<std::string>& subst,
 694                                       std::vector<size_t>* offsets) {
 695   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 696 }
 697
 698 string16 ReplaceStringPlaceholders(const string16& format_string,
 699                                    const string16& a,
 700                                    size_t* offset) {
 701   std::vector<size_t> offsets;
 702   std::vector<string16> subst;
 703   subst.push_back(a);
 704   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 705
 706   DCHECK_EQ(1U, offsets.size());
 707   if (offset)
 708     *offset = offsets[0];
 709   return result;
 710 }
 711
 712 static bool IsWildcard(base_icu::UChar32 character) {
 713   return character == '*' || character == '?';
 714 }
 715
 716 // Move the strings pointers to the point where they start to differ.
 717 template <typename CHAR, typename NEXT>
 718 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 719                          const CHAR** string, const CHAR* string_end,
 720                          NEXT next) {
 721   const CHAR* escape = NULL;
 722   while (*pattern != pattern_end && *string != string_end) {
 723     if (!escape && IsWildcard(**pattern)) {
 724       // We don't want to match wildcard here, except if it's escaped.
 725       return;
 726     }
 727
 728     // Check if the escapement char is found. If so, skip it and move to the
 729     // next character.
 730     if (!escape && **pattern == '\\') {
 731       escape = *pattern;
 732       next(pattern, pattern_end);
 733       continue;
 734     }
 735
 736     // Check if the chars match, if so, increment the ptrs.
 737     const CHAR* pattern_next = *pattern;
 738     const CHAR* string_next = *string;
 739     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 740     if (pattern_char == next(&string_next, string_end) &&
 741         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
 742       *pattern = pattern_next;
 743       *string = string_next;
 744     } else {
 745       // Uh ho, it did not match, we are done. If the last char was an
 746       // escapement, that means that it was an error to advance the ptr here,
 747       // let's put it back where it was. This also mean that the MatchPattern
 748       // function will return false because if we can't match an escape char
 749       // here, then no one will.
 750       if (escape) {
 751         *pattern = escape;
 752       }
 753       return;
 754     }
 755
 756     escape = NULL;
 757   }
 758 }
 759
 760 template <typename CHAR, typename NEXT>
 761 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 762   while (*pattern != end) {
 763     if (!IsWildcard(**pattern))
 764       return;
 765     next(pattern, end);
 766   }
 767 }
 768
 769 template <typename CHAR, typename NEXT>
 770 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 771                           const CHAR* pattern, const CHAR* pattern_end,
 772                           int depth,
 773                           NEXT next) {
 774   const int kMaxDepth = 16;
 775   if (depth > kMaxDepth)
 776     return false;
 777
 778   // Eat all the matching chars.
 779   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 780
 781   // If the string is empty, then the pattern must be empty too, or contains
 782   // only wildcards.
 783   if (eval == eval_end) {
 784     EatWildcard(&pattern, pattern_end, next);
 785     return pattern == pattern_end;
 786   }
 787
 788   // Pattern is empty but not string, this is not a match.
 789   if (pattern == pattern_end)
 790     return false;
 791
 792   // If this is a question mark, then we need to compare the rest with
 793   // the current string or the string with one character eaten.
 794   const CHAR* next_pattern = pattern;
 795   next(&next_pattern, pattern_end);
 796   if (pattern[0] == '?') {
 797     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 798                       depth + 1, next))
 799       return true;
 800     const CHAR* next_eval = eval;
 801     next(&next_eval, eval_end);
 802     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 803                       depth + 1, next))
 804       return true;
 805   }
 806
 807   // This is a *, try to match all the possible substrings with the remainder
 808   // of the pattern.
 809   if (pattern[0] == '*') {
 810     // Collapse duplicate wild cards (********** into *) so that the
 811     // method does not recurse unnecessarily. http://crbug.com/52839
 812     EatWildcard(&next_pattern, pattern_end, next);
 813
 814     while (eval != eval_end) {
 815       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 816                         depth + 1, next))
 817         return true;
 818       eval++;
 819     }
 820
 821     // We reached the end of the string, let see if the pattern contains only
 822     // wildcards.
 823     if (eval == eval_end) {
 824       EatWildcard(&pattern, pattern_end, next);
 825       if (pattern != pattern_end)
 826         return false;
 827       return true;
 828     }
 829   }
 830
 831   return false;
 832 }
 833
 834 struct NextCharUTF8 {
 835   base_icu::UChar32 operator()(const char** p, const char* end) {
 836     base_icu::UChar32 c;
 837     int offset = 0;
 838     CBU8_NEXT(*p, offset, end - *p, c);
 839     *p += offset;
 840     return c;
 841   }
 842 };
 843
 844 struct NextCharUTF16 {
 845   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 846     base_icu::UChar32 c;
 847     int offset = 0;
 848     CBU16_NEXT(*p, offset, end - *p, c);
 849     *p += offset;
 850     return c;
 851   }
 852 };
 853
 854 bool MatchPattern(const base::StringPiece& eval,
 855                   const base::StringPiece& pattern) {
 856   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 857                        pattern.data(), pattern.data() + pattern.size(),
 858                        0, NextCharUTF8());
 859 }
 860
 861 bool MatchPattern(const string16& eval, const string16& pattern) {
 862   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 863                        pattern.c_str(), pattern.c_str() + pattern.size(),
 864                        0, NextCharUTF16());
 865 }
 866
 867 // The following code is compatible with the OpenBSD lcpy interface.  See:
 868 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 869 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 870
 871 namespace {
 872
 873 template <typename CHAR>
 874 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 875   for (size_t i = 0; i < dst_size; ++i) {
 876     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 877       return i;
 878   }
 879
 880   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 881   if (dst_size != 0)
 882     dst[dst_size - 1] = 0;
 883
 884   // Count the rest of the |src|, and return it's length in characters.
 885   while (src[dst_size]) ++dst_size;
 886   return dst_size;
 887 }
 888
 889 }  // namespace
 890
 891 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 892   return lcpyT<char>(dst, src, dst_size);
 893 }
 894 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 895   return lcpyT<wchar_t>(dst, src, dst_size);
 896 }