base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "build/build_config.h"
  28
  29 // Remove when this entire file is in the base namespace.
  30 using base::char16;
  31 using base::string16;
  32
  33 namespace {
  34
  35 // Force the singleton used by EmptyString[16] to be a unique type. This
  36 // prevents other code that might accidentally use Singleton<string> from
  37 // getting our internal one.
  38 struct EmptyStrings {
  39   EmptyStrings() {}
  40   const std::string s;
  41   const string16 s16;
  42
  43   static EmptyStrings* GetInstance() {
  44     return Singleton<EmptyStrings>::get();
  45   }
  46 };
  47
  48 // Used by ReplaceStringPlaceholders to track the position in the string of
  49 // replaced parameters.
  50 struct ReplacementOffset {
  51   ReplacementOffset(uintptr_t parameter, size_t offset)
  52       : parameter(parameter),
  53         offset(offset) {}
  54
  55   // Index of the parameter.
  56   uintptr_t parameter;
  57
  58   // Starting position in the string.
  59   size_t offset;
  60 };
  61
  62 static bool CompareParameter(const ReplacementOffset& elem1,
  63                              const ReplacementOffset& elem2) {
  64   return elem1.parameter < elem2.parameter;
  65 }
  66
  67 }  // namespace
  68
  69 namespace base {
  70
  71 bool IsWprintfFormatPortable(const wchar_t* format) {
  72   for (const wchar_t* position = format; *position != '\0'; ++position) {
  73     if (*position == '%') {
  74       bool in_specification = true;
  75       bool modifier_l = false;
  76       while (in_specification) {
  77         // Eat up characters until reaching a known specifier.
  78         if (*++position == '\0') {
  79           // The format string ended in the middle of a specification.  Call
  80           // it portable because no unportable specifications were found.  The
  81           // string is equally broken on all platforms.
  82           return true;
  83         }
  84
  85         if (*position == 'l') {
  86           // 'l' is the only thing that can save the 's' and 'c' specifiers.
  87           modifier_l = true;
  88         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
  89                    *position == 'S' || *position == 'C' || *position == 'F' ||
  90                    *position == 'D' || *position == 'O' || *position == 'U') {
  91           // Not portable.
  92           return false;
  93         }
  94
  95         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
  96           // Portable, keep scanning the rest of the format string.
  97           in_specification = false;
  98         }
  99       }
 100     }
 101   }
 102
 103   return true;
 104 }
 105
 106 const std::string& EmptyString() {
 107   return EmptyStrings::GetInstance()->s;
 108 }
 109
 110 const string16& EmptyString16() {
 111   return EmptyStrings::GetInstance()->s16;
 112 }
 113
 114 template<typename STR>
 115 bool ReplaceCharsT(const STR& input,
 116                    const STR& replace_chars,
 117                    const STR& replace_with,
 118                    STR* output) {
 119   bool removed = false;
 120   size_t replace_length = replace_with.length();
 121
 122   *output = input;
 123
 124   size_t found = output->find_first_of(replace_chars);
 125   while (found != STR::npos) {
 126     removed = true;
 127     output->replace(found, 1, replace_with);
 128     found = output->find_first_of(replace_chars, found + replace_length);
 129   }
 130
 131   return removed;
 132 }
 133
 134 bool ReplaceChars(const string16& input,
 135                   const base::StringPiece16& replace_chars,
 136                   const string16& replace_with,
 137                   string16* output) {
 138   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 139 }
 140
 141 bool ReplaceChars(const std::string& input,
 142                   const base::StringPiece& replace_chars,
 143                   const std::string& replace_with,
 144                   std::string* output) {
 145   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 146 }
 147
 148 bool RemoveChars(const string16& input,
 149                  const base::StringPiece16& remove_chars,
 150                  string16* output) {
 151   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 152 }
 153
 154 bool RemoveChars(const std::string& input,
 155                  const base::StringPiece& remove_chars,
 156                  std::string* output) {
 157   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 158 }
 159
 160 template<typename STR>
 161 TrimPositions TrimStringT(const STR& input,
 162                           const STR& trim_chars,
 163                           TrimPositions positions,
 164                           STR* output) {
 165   // Find the edges of leading/trailing whitespace as desired.
 166   const size_t last_char = input.length() - 1;
 167   const size_t first_good_char = (positions & TRIM_LEADING) ?
 168       input.find_first_not_of(trim_chars) : 0;
 169   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 170       input.find_last_not_of(trim_chars) : last_char;
 171
 172   // When the string was all whitespace, report that we stripped off whitespace
 173   // from whichever position the caller was interested in.  For empty input, we
 174   // stripped no whitespace, but we still need to clear |output|.
 175   if (input.empty() ||
 176       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 177     bool input_was_empty = input.empty();  // in case output == &input
 178     output->clear();
 179     return input_was_empty ? TRIM_NONE : positions;
 180   }
 181
 182   // Trim the whitespace.
 183   *output =
 184       input.substr(first_good_char, last_good_char - first_good_char + 1);
 185
 186   // Return where we trimmed from.
 187   return static_cast<TrimPositions>(
 188       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 189       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 190 }
 191
 192 bool TrimString(const string16& input,
 193                 const base::StringPiece16& trim_chars,
 194                 string16* output) {
 195   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 196       TRIM_NONE;
 197 }
 198
 199 bool TrimString(const std::string& input,
 200                 const base::StringPiece& trim_chars,
 201                 std::string* output) {
 202   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
 203       TRIM_NONE;
 204 }
 205
 206 void TruncateUTF8ToByteSize(const std::string& input,
 207                             const size_t byte_size,
 208                             std::string* output) {
 209   DCHECK(output);
 210   if (byte_size > input.length()) {
 211     *output = input;
 212     return;
 213   }
 214   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 215   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 216   int32 truncation_length = static_cast<int32>(byte_size);
 217   int32 char_index = truncation_length - 1;
 218   const char* data = input.data();
 219
 220   // Using CBU8, we will move backwards from the truncation point
 221   // to the beginning of the string looking for a valid UTF8
 222   // character.  Once a full UTF8 character is found, we will
 223   // truncate the string to the end of that character.
 224   while (char_index >= 0) {
 225     int32 prev = char_index;
 226     base_icu::UChar32 code_point = 0;
 227     CBU8_NEXT(data, char_index, truncation_length, code_point);
 228     if (!IsValidCharacter(code_point) ||
 229         !IsValidCodepoint(code_point)) {
 230       char_index = prev - 1;
 231     } else {
 232       break;
 233     }
 234   }
 235
 236   if (char_index >= 0 )
 237     *output = input.substr(0, char_index);
 238   else
 239     output->clear();
 240 }
 241
 242 TrimPositions TrimWhitespace(const string16& input,
 243                              TrimPositions positions,
 244                              string16* output) {
 245   return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
 246                      output);
 247 }
 248
 249 TrimPositions TrimWhitespaceASCII(const std::string& input,
 250                                   TrimPositions positions,
 251                                   std::string* output) {
 252   return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
 253 }
 254
 255 // This function is only for backward-compatibility.
 256 // To be removed when all callers are updated.
 257 TrimPositions TrimWhitespace(const std::string& input,
 258                              TrimPositions positions,
 259                              std::string* output) {
 260   return TrimWhitespaceASCII(input, positions, output);
 261 }
 262
 263 template<typename STR>
 264 STR CollapseWhitespaceT(const STR& text,
 265                         bool trim_sequences_with_line_breaks) {
 266   STR result;
 267   result.resize(text.size());
 268
 269   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 270   // will trim any leading whitespace.
 271   bool in_whitespace = true;
 272   bool already_trimmed = true;
 273
 274   int chars_written = 0;
 275   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 276     if (IsWhitespace(*i)) {
 277       if (!in_whitespace) {
 278         // Reduce all whitespace sequences to a single space.
 279         in_whitespace = true;
 280         result[chars_written++] = L' ';
 281       }
 282       if (trim_sequences_with_line_breaks && !already_trimmed &&
 283           ((*i == '\n') || (*i == '\r'))) {
 284         // Whitespace sequences containing CR or LF are eliminated entirely.
 285         already_trimmed = true;
 286         --chars_written;
 287       }
 288     } else {
 289       // Non-whitespace chracters are copied straight across.
 290       in_whitespace = false;
 291       already_trimmed = false;
 292       result[chars_written++] = *i;
 293     }
 294   }
 295
 296   if (in_whitespace && !already_trimmed) {
 297     // Any trailing whitespace is eliminated.
 298     --chars_written;
 299   }
 300
 301   result.resize(chars_written);
 302   return result;
 303 }
 304
 305 string16 CollapseWhitespace(const string16& text,
 306                             bool trim_sequences_with_line_breaks) {
 307   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 308 }
 309
 310 std::string CollapseWhitespaceASCII(const std::string& text,
 311                                     bool trim_sequences_with_line_breaks) {
 312   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 313 }
 314
 315 bool ContainsOnlyChars(const StringPiece& input,
 316                        const StringPiece& characters) {
 317   return input.find_first_not_of(characters) == StringPiece::npos;
 318 }
 319
 320 bool ContainsOnlyChars(const StringPiece16& input,
 321                        const StringPiece16& characters) {
 322   return input.find_first_not_of(characters) == StringPiece16::npos;
 323 }
 324
 325 template<class STR>
 326 static bool DoIsStringASCII(const STR& str) {
 327   for (size_t i = 0; i < str.length(); i++) {
 328     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
 329     if (c > 0x7F)
 330       return false;
 331   }
 332   return true;
 333 }
 334
 335 bool IsStringASCII(const StringPiece& str) {
 336   return DoIsStringASCII(str);
 337 }
 338
 339 bool IsStringASCII(const string16& str) {
 340   return DoIsStringASCII(str);
 341 }
 342
 343 bool IsStringUTF8(const std::string& str) {
 344   const char *src = str.data();
 345   int32 src_len = static_cast<int32>(str.length());
 346   int32 char_index = 0;
 347
 348   while (char_index < src_len) {
 349     int32 code_point;
 350     CBU8_NEXT(src, char_index, src_len, code_point);
 351     if (!IsValidCharacter(code_point))
 352       return false;
 353   }
 354   return true;
 355 }
 356
 357 }  // namespace base
 358
 359 template<typename Iter>
 360 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 361                                           Iter a_end,
 362                                           const char* b) {
 363   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 364     if (!*b || base::ToLowerASCII(*it) != *b)
 365       return false;
 366   }
 367   return *b == 0;
 368 }
 369
 370 // Front-ends for LowerCaseEqualsASCII.
 371 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 372   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 373 }
 374
 375 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 376   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 377 }
 378
 379 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 380                           std::string::const_iterator a_end,
 381                           const char* b) {
 382   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 383 }
 384
 385 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 386                           string16::const_iterator a_end,
 387                           const char* b) {
 388   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 389 }
 390
 391 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 392 #if !defined(OS_ANDROID)
 393 bool LowerCaseEqualsASCII(const char* a_begin,
 394                           const char* a_end,
 395                           const char* b) {
 396   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 397 }
 398
 399 bool LowerCaseEqualsASCII(const char16* a_begin,
 400                           const char16* a_end,
 401                           const char* b) {
 402   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 403 }
 404
 405 #endif  // !defined(OS_ANDROID)
 406
 407 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 408   if (a.length() != b.length())
 409     return false;
 410   return std::equal(b.begin(), b.end(), a.begin());
 411 }
 412
 413 bool StartsWithASCII(const std::string& str,
 414                      const std::string& search,
 415                      bool case_sensitive) {
 416   if (case_sensitive)
 417     return str.compare(0, search.length(), search) == 0;
 418   else
 419     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 420 }
 421
 422 template <typename STR>
 423 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 424   if (case_sensitive) {
 425     return str.compare(0, search.length(), search) == 0;
 426   } else {
 427     if (search.size() > str.size())
 428       return false;
 429     return std::equal(search.begin(), search.end(), str.begin(),
 430                       base::CaseInsensitiveCompare<typename STR::value_type>());
 431   }
 432 }
 433
 434 bool StartsWith(const string16& str, const string16& search,
 435                 bool case_sensitive) {
 436   return StartsWithT(str, search, case_sensitive);
 437 }
 438
 439 template <typename STR>
 440 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 441   size_t str_length = str.length();
 442   size_t search_length = search.length();
 443   if (search_length > str_length)
 444     return false;
 445   if (case_sensitive)
 446     return str.compare(str_length - search_length, search_length, search) == 0;
 447   return std::equal(search.begin(), search.end(),
 448                     str.begin() + (str_length - search_length),
 449                     base::CaseInsensitiveCompare<typename STR::value_type>());
 450 }
 451
 452 bool EndsWith(const std::string& str, const std::string& search,
 453               bool case_sensitive) {
 454   return EndsWithT(str, search, case_sensitive);
 455 }
 456
 457 bool EndsWith(const string16& str, const string16& search,
 458               bool case_sensitive) {
 459   return EndsWithT(str, search, case_sensitive);
 460 }
 461
 462 static const char* const kByteStringsUnlocalized[] = {
 463   " B",
 464   " kB",
 465   " MB",
 466   " GB",
 467   " TB",
 468   " PB"
 469 };
 470
 471 string16 FormatBytesUnlocalized(int64 bytes) {
 472   double unit_amount = static_cast<double>(bytes);
 473   size_t dimension = 0;
 474   const int kKilo = 1024;
 475   while (unit_amount >= kKilo &&
 476          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 477     unit_amount /= kKilo;
 478     dimension++;
 479   }
 480
 481   char buf[64];
 482   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 483     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 484                    kByteStringsUnlocalized[dimension]);
 485   } else {
 486     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 487                    kByteStringsUnlocalized[dimension]);
 488   }
 489
 490   return base::ASCIIToUTF16(buf);
 491 }
 492
 493 template<class StringType>
 494 void DoReplaceSubstringsAfterOffset(StringType* str,
 495                                     size_t start_offset,
 496                                     const StringType& find_this,
 497                                     const StringType& replace_with,
 498                                     bool replace_all) {
 499   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 500     return;
 501
 502   DCHECK(!find_this.empty());
 503   for (size_t offs(str->find(find_this, start_offset));
 504       offs != StringType::npos; offs = str->find(find_this, offs)) {
 505     str->replace(offs, find_this.length(), replace_with);
 506     offs += replace_with.length();
 507
 508     if (!replace_all)
 509       break;
 510   }
 511 }
 512
 513 void ReplaceFirstSubstringAfterOffset(string16* str,
 514                                       size_t start_offset,
 515                                       const string16& find_this,
 516                                       const string16& replace_with) {
 517   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 518                                  false);  // replace first instance
 519 }
 520
 521 void ReplaceFirstSubstringAfterOffset(std::string* str,
 522                                       size_t start_offset,
 523                                       const std::string& find_this,
 524                                       const std::string& replace_with) {
 525   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 526                                  false);  // replace first instance
 527 }
 528
 529 void ReplaceSubstringsAfterOffset(string16* str,
 530                                   size_t start_offset,
 531                                   const string16& find_this,
 532                                   const string16& replace_with) {
 533   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 534                                  true);  // replace all instances
 535 }
 536
 537 void ReplaceSubstringsAfterOffset(std::string* str,
 538                                   size_t start_offset,
 539                                   const std::string& find_this,
 540                                   const std::string& replace_with) {
 541   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 542                                  true);  // replace all instances
 543 }
 544
 545
 546 template<typename STR>
 547 static size_t TokenizeT(const STR& str,
 548                         const STR& delimiters,
 549                         std::vector<STR>* tokens) {
 550   tokens->clear();
 551
 552   size_t start = str.find_first_not_of(delimiters);
 553   while (start != STR::npos) {
 554     size_t end = str.find_first_of(delimiters, start + 1);
 555     if (end == STR::npos) {
 556       tokens->push_back(str.substr(start));
 557       break;
 558     } else {
 559       tokens->push_back(str.substr(start, end - start));
 560       start = str.find_first_not_of(delimiters, end + 1);
 561     }
 562   }
 563
 564   return tokens->size();
 565 }
 566
 567 size_t Tokenize(const string16& str,
 568                 const string16& delimiters,
 569                 std::vector<string16>* tokens) {
 570   return TokenizeT(str, delimiters, tokens);
 571 }
 572
 573 size_t Tokenize(const std::string& str,
 574                 const std::string& delimiters,
 575                 std::vector<std::string>* tokens) {
 576   return TokenizeT(str, delimiters, tokens);
 577 }
 578
 579 size_t Tokenize(const base::StringPiece& str,
 580                 const base::StringPiece& delimiters,
 581                 std::vector<base::StringPiece>* tokens) {
 582   return TokenizeT(str, delimiters, tokens);
 583 }
 584
 585 template<typename STR>
 586 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 587   if (parts.empty())
 588     return STR();
 589
 590   STR result(parts[0]);
 591   typename std::vector<STR>::const_iterator iter = parts.begin();
 592   ++iter;
 593
 594   for (; iter != parts.end(); ++iter) {
 595     result += sep;
 596     result += *iter;
 597   }
 598
 599   return result;
 600 }
 601
 602 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 603   return JoinStringT(parts, std::string(1, sep));
 604 }
 605
 606 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 607   return JoinStringT(parts, string16(1, sep));
 608 }
 609
 610 std::string JoinString(const std::vector<std::string>& parts,
 611                        const std::string& separator) {
 612   return JoinStringT(parts, separator);
 613 }
 614
 615 string16 JoinString(const std::vector<string16>& parts,
 616                     const string16& separator) {
 617   return JoinStringT(parts, separator);
 618 }
 619
 620 template<class FormatStringType, class OutStringType>
 621 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 622     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 623   size_t substitutions = subst.size();
 624
 625   size_t sub_length = 0;
 626   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 627        iter != subst.end(); ++iter) {
 628     sub_length += iter->length();
 629   }
 630
 631   OutStringType formatted;
 632   formatted.reserve(format_string.length() + sub_length);
 633
 634   std::vector<ReplacementOffset> r_offsets;
 635   for (typename FormatStringType::const_iterator i = format_string.begin();
 636        i != format_string.end(); ++i) {
 637     if ('$' == *i) {
 638       if (i + 1 != format_string.end()) {
 639         ++i;
 640         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 641         if ('$' == *i) {
 642           while (i != format_string.end() && '$' == *i) {
 643             formatted.push_back('$');
 644             ++i;
 645           }
 646           --i;
 647         } else {
 648           uintptr_t index = 0;
 649           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 650             index *= 10;
 651             index += *i - '0';
 652             ++i;
 653           }
 654           --i;
 655           index -= 1;
 656           if (offsets) {
 657             ReplacementOffset r_offset(index,
 658                 static_cast<int>(formatted.size()));
 659             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 660                                               r_offsets.end(),
 661                                               r_offset,
 662                                               &CompareParameter),
 663                              r_offset);
 664           }
 665           if (index < substitutions)
 666             formatted.append(subst.at(index));
 667         }
 668       }
 669     } else {
 670       formatted.push_back(*i);
 671     }
 672   }
 673   if (offsets) {
 674     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 675          i != r_offsets.end(); ++i) {
 676       offsets->push_back(i->offset);
 677     }
 678   }
 679   return formatted;
 680 }
 681
 682 string16 ReplaceStringPlaceholders(const string16& format_string,
 683                                    const std::vector<string16>& subst,
 684                                    std::vector<size_t>* offsets) {
 685   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 686 }
 687
 688 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 689                                       const std::vector<std::string>& subst,
 690                                       std::vector<size_t>* offsets) {
 691   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 692 }
 693
 694 string16 ReplaceStringPlaceholders(const string16& format_string,
 695                                    const string16& a,
 696                                    size_t* offset) {
 697   std::vector<size_t> offsets;
 698   std::vector<string16> subst;
 699   subst.push_back(a);
 700   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 701
 702   DCHECK_EQ(1U, offsets.size());
 703   if (offset)
 704     *offset = offsets[0];
 705   return result;
 706 }
 707
 708 static bool IsWildcard(base_icu::UChar32 character) {
 709   return character == '*' || character == '?';
 710 }
 711
 712 // Move the strings pointers to the point where they start to differ.
 713 template <typename CHAR, typename NEXT>
 714 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 715                          const CHAR** string, const CHAR* string_end,
 716                          NEXT next) {
 717   const CHAR* escape = NULL;
 718   while (*pattern != pattern_end && *string != string_end) {
 719     if (!escape && IsWildcard(**pattern)) {
 720       // We don't want to match wildcard here, except if it's escaped.
 721       return;
 722     }
 723
 724     // Check if the escapement char is found. If so, skip it and move to the
 725     // next character.
 726     if (!escape && **pattern == '\\') {
 727       escape = *pattern;
 728       next(pattern, pattern_end);
 729       continue;
 730     }
 731
 732     // Check if the chars match, if so, increment the ptrs.
 733     const CHAR* pattern_next = *pattern;
 734     const CHAR* string_next = *string;
 735     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 736     if (pattern_char == next(&string_next, string_end) &&
 737         pattern_char != CBU_SENTINEL) {
 738       *pattern = pattern_next;
 739       *string = string_next;
 740     } else {
 741       // Uh oh, it did not match, we are done. If the last char was an
 742       // escapement, that means that it was an error to advance the ptr here,
 743       // let's put it back where it was. This also mean that the MatchPattern
 744       // function will return false because if we can't match an escape char
 745       // here, then no one will.
 746       if (escape) {
 747         *pattern = escape;
 748       }
 749       return;
 750     }
 751
 752     escape = NULL;
 753   }
 754 }
 755
 756 template <typename CHAR, typename NEXT>
 757 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 758   while (*pattern != end) {
 759     if (!IsWildcard(**pattern))
 760       return;
 761     next(pattern, end);
 762   }
 763 }
 764
 765 template <typename CHAR, typename NEXT>
 766 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 767                           const CHAR* pattern, const CHAR* pattern_end,
 768                           int depth,
 769                           NEXT next) {
 770   const int kMaxDepth = 16;
 771   if (depth > kMaxDepth)
 772     return false;
 773
 774   // Eat all the matching chars.
 775   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 776
 777   // If the string is empty, then the pattern must be empty too, or contains
 778   // only wildcards.
 779   if (eval == eval_end) {
 780     EatWildcard(&pattern, pattern_end, next);
 781     return pattern == pattern_end;
 782   }
 783
 784   // Pattern is empty but not string, this is not a match.
 785   if (pattern == pattern_end)
 786     return false;
 787
 788   // If this is a question mark, then we need to compare the rest with
 789   // the current string or the string with one character eaten.
 790   const CHAR* next_pattern = pattern;
 791   next(&next_pattern, pattern_end);
 792   if (pattern[0] == '?') {
 793     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 794                       depth + 1, next))
 795       return true;
 796     const CHAR* next_eval = eval;
 797     next(&next_eval, eval_end);
 798     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 799                       depth + 1, next))
 800       return true;
 801   }
 802
 803   // This is a *, try to match all the possible substrings with the remainder
 804   // of the pattern.
 805   if (pattern[0] == '*') {
 806     // Collapse duplicate wild cards (********** into *) so that the
 807     // method does not recurse unnecessarily. http://crbug.com/52839
 808     EatWildcard(&next_pattern, pattern_end, next);
 809
 810     while (eval != eval_end) {
 811       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 812                         depth + 1, next))
 813         return true;
 814       eval++;
 815     }
 816
 817     // We reached the end of the string, let see if the pattern contains only
 818     // wildcards.
 819     if (eval == eval_end) {
 820       EatWildcard(&pattern, pattern_end, next);
 821       if (pattern != pattern_end)
 822         return false;
 823       return true;
 824     }
 825   }
 826
 827   return false;
 828 }
 829
 830 struct NextCharUTF8 {
 831   base_icu::UChar32 operator()(const char** p, const char* end) {
 832     base_icu::UChar32 c;
 833     int offset = 0;
 834     CBU8_NEXT(*p, offset, end - *p, c);
 835     *p += offset;
 836     return c;
 837   }
 838 };
 839
 840 struct NextCharUTF16 {
 841   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 842     base_icu::UChar32 c;
 843     int offset = 0;
 844     CBU16_NEXT(*p, offset, end - *p, c);
 845     *p += offset;
 846     return c;
 847   }
 848 };
 849
 850 bool MatchPattern(const base::StringPiece& eval,
 851                   const base::StringPiece& pattern) {
 852   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 853                        pattern.data(), pattern.data() + pattern.size(),
 854                        0, NextCharUTF8());
 855 }
 856
 857 bool MatchPattern(const string16& eval, const string16& pattern) {
 858   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 859                        pattern.c_str(), pattern.c_str() + pattern.size(),
 860                        0, NextCharUTF16());
 861 }
 862
 863 // The following code is compatible with the OpenBSD lcpy interface.  See:
 864 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 865 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 866
 867 namespace {
 868
 869 template <typename CHAR>
 870 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 871   for (size_t i = 0; i < dst_size; ++i) {
 872     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 873       return i;
 874   }
 875
 876   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 877   if (dst_size != 0)
 878     dst[dst_size - 1] = 0;
 879
 880   // Count the rest of the |src|, and return it's length in characters.
 881   while (src[dst_size]) ++dst_size;
 882   return dst_size;
 883 }
 884
 885 }  // namespace
 886
 887 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 888   return lcpyT<char>(dst, src, dst_size);
 889 }
 890 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 891   return lcpyT<wchar_t>(dst, src, dst_size);
 892 }