[Easy Unlock] Fix a DCHECK: Load localized string correctly.
[chromium-blink-merge.git] / base / strings / string_util.cc
blob65eeacb34a33bf973136eb12f4a525739fac99f0
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <time.h>
15 #include <wchar.h>
16 #include <wctype.h>
18 #include <algorithm>
19 #include <vector>
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 #include "build/build_config.h"
29 // Remove when this entire file is in the base namespace.
30 using base::char16;
31 using base::string16;
33 namespace {
35 // Force the singleton used by EmptyString[16] to be a unique type. This
36 // prevents other code that might accidentally use Singleton<string> from
37 // getting our internal one.
38 struct EmptyStrings {
39 EmptyStrings() {}
40 const std::string s;
41 const string16 s16;
43 static EmptyStrings* GetInstance() {
44 return Singleton<EmptyStrings>::get();
48 // Used by ReplaceStringPlaceholders to track the position in the string of
49 // replaced parameters.
50 struct ReplacementOffset {
51 ReplacementOffset(uintptr_t parameter, size_t offset)
52 : parameter(parameter),
53 offset(offset) {}
55 // Index of the parameter.
56 uintptr_t parameter;
58 // Starting position in the string.
59 size_t offset;
62 static bool CompareParameter(const ReplacementOffset& elem1,
63 const ReplacementOffset& elem2) {
64 return elem1.parameter < elem2.parameter;
67 // Assuming that a pointer is the size of a "machine word", then
68 // uintptr_t is an integer type that is also a machine word.
69 typedef uintptr_t MachineWord;
70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
72 inline bool IsAlignedToMachineWord(const void* pointer) {
73 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
76 template<typename T> inline T* AlignToMachineWord(T* pointer) {
77 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
78 ~kMachineWordAlignmentMask);
81 template<size_t size, typename CharacterType> struct NonASCIIMask;
82 template<> struct NonASCIIMask<4, base::char16> {
83 static inline uint32_t value() { return 0xFF80FF80U; }
85 template<> struct NonASCIIMask<4, char> {
86 static inline uint32_t value() { return 0x80808080U; }
88 template<> struct NonASCIIMask<8, base::char16> {
89 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
91 template<> struct NonASCIIMask<8, char> {
92 static inline uint64_t value() { return 0x8080808080808080ULL; }
95 } // namespace
97 namespace base {
99 bool IsWprintfFormatPortable(const wchar_t* format) {
100 for (const wchar_t* position = format; *position != '\0'; ++position) {
101 if (*position == '%') {
102 bool in_specification = true;
103 bool modifier_l = false;
104 while (in_specification) {
105 // Eat up characters until reaching a known specifier.
106 if (*++position == '\0') {
107 // The format string ended in the middle of a specification. Call
108 // it portable because no unportable specifications were found. The
109 // string is equally broken on all platforms.
110 return true;
113 if (*position == 'l') {
114 // 'l' is the only thing that can save the 's' and 'c' specifiers.
115 modifier_l = true;
116 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
117 *position == 'S' || *position == 'C' || *position == 'F' ||
118 *position == 'D' || *position == 'O' || *position == 'U') {
119 // Not portable.
120 return false;
123 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
124 // Portable, keep scanning the rest of the format string.
125 in_specification = false;
131 return true;
134 const std::string& EmptyString() {
135 return EmptyStrings::GetInstance()->s;
138 const string16& EmptyString16() {
139 return EmptyStrings::GetInstance()->s16;
142 template<typename STR>
143 bool ReplaceCharsT(const STR& input,
144 const STR& replace_chars,
145 const STR& replace_with,
146 STR* output) {
147 bool removed = false;
148 size_t replace_length = replace_with.length();
150 *output = input;
152 size_t found = output->find_first_of(replace_chars);
153 while (found != STR::npos) {
154 removed = true;
155 output->replace(found, 1, replace_with);
156 found = output->find_first_of(replace_chars, found + replace_length);
159 return removed;
162 bool ReplaceChars(const string16& input,
163 const base::StringPiece16& replace_chars,
164 const string16& replace_with,
165 string16* output) {
166 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
169 bool ReplaceChars(const std::string& input,
170 const base::StringPiece& replace_chars,
171 const std::string& replace_with,
172 std::string* output) {
173 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
176 bool RemoveChars(const string16& input,
177 const base::StringPiece16& remove_chars,
178 string16* output) {
179 return ReplaceChars(input, remove_chars.as_string(), string16(), output);
182 bool RemoveChars(const std::string& input,
183 const base::StringPiece& remove_chars,
184 std::string* output) {
185 return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
188 template<typename STR>
189 TrimPositions TrimStringT(const STR& input,
190 const STR& trim_chars,
191 TrimPositions positions,
192 STR* output) {
193 // Find the edges of leading/trailing whitespace as desired.
194 const size_t last_char = input.length() - 1;
195 const size_t first_good_char = (positions & TRIM_LEADING) ?
196 input.find_first_not_of(trim_chars) : 0;
197 const size_t last_good_char = (positions & TRIM_TRAILING) ?
198 input.find_last_not_of(trim_chars) : last_char;
200 // When the string was all whitespace, report that we stripped off whitespace
201 // from whichever position the caller was interested in. For empty input, we
202 // stripped no whitespace, but we still need to clear |output|.
203 if (input.empty() ||
204 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
205 bool input_was_empty = input.empty(); // in case output == &input
206 output->clear();
207 return input_was_empty ? TRIM_NONE : positions;
210 // Trim the whitespace.
211 *output =
212 input.substr(first_good_char, last_good_char - first_good_char + 1);
214 // Return where we trimmed from.
215 return static_cast<TrimPositions>(
216 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
217 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
220 bool TrimString(const string16& input,
221 const base::StringPiece16& trim_chars,
222 string16* output) {
223 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
224 TRIM_NONE;
227 bool TrimString(const std::string& input,
228 const base::StringPiece& trim_chars,
229 std::string* output) {
230 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
231 TRIM_NONE;
234 void TruncateUTF8ToByteSize(const std::string& input,
235 const size_t byte_size,
236 std::string* output) {
237 DCHECK(output);
238 if (byte_size > input.length()) {
239 *output = input;
240 return;
242 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
243 // Note: This cast is necessary because CBU8_NEXT uses int32s.
244 int32 truncation_length = static_cast<int32>(byte_size);
245 int32 char_index = truncation_length - 1;
246 const char* data = input.data();
248 // Using CBU8, we will move backwards from the truncation point
249 // to the beginning of the string looking for a valid UTF8
250 // character. Once a full UTF8 character is found, we will
251 // truncate the string to the end of that character.
252 while (char_index >= 0) {
253 int32 prev = char_index;
254 base_icu::UChar32 code_point = 0;
255 CBU8_NEXT(data, char_index, truncation_length, code_point);
256 if (!IsValidCharacter(code_point) ||
257 !IsValidCodepoint(code_point)) {
258 char_index = prev - 1;
259 } else {
260 break;
264 if (char_index >= 0 )
265 *output = input.substr(0, char_index);
266 else
267 output->clear();
270 TrimPositions TrimWhitespace(const string16& input,
271 TrimPositions positions,
272 string16* output) {
273 return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
274 output);
277 TrimPositions TrimWhitespaceASCII(const std::string& input,
278 TrimPositions positions,
279 std::string* output) {
280 return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
283 // This function is only for backward-compatibility.
284 // To be removed when all callers are updated.
285 TrimPositions TrimWhitespace(const std::string& input,
286 TrimPositions positions,
287 std::string* output) {
288 return TrimWhitespaceASCII(input, positions, output);
291 template<typename STR>
292 STR CollapseWhitespaceT(const STR& text,
293 bool trim_sequences_with_line_breaks) {
294 STR result;
295 result.resize(text.size());
297 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
298 // will trim any leading whitespace.
299 bool in_whitespace = true;
300 bool already_trimmed = true;
302 int chars_written = 0;
303 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
304 if (IsWhitespace(*i)) {
305 if (!in_whitespace) {
306 // Reduce all whitespace sequences to a single space.
307 in_whitespace = true;
308 result[chars_written++] = L' ';
310 if (trim_sequences_with_line_breaks && !already_trimmed &&
311 ((*i == '\n') || (*i == '\r'))) {
312 // Whitespace sequences containing CR or LF are eliminated entirely.
313 already_trimmed = true;
314 --chars_written;
316 } else {
317 // Non-whitespace chracters are copied straight across.
318 in_whitespace = false;
319 already_trimmed = false;
320 result[chars_written++] = *i;
324 if (in_whitespace && !already_trimmed) {
325 // Any trailing whitespace is eliminated.
326 --chars_written;
329 result.resize(chars_written);
330 return result;
333 string16 CollapseWhitespace(const string16& text,
334 bool trim_sequences_with_line_breaks) {
335 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
338 std::string CollapseWhitespaceASCII(const std::string& text,
339 bool trim_sequences_with_line_breaks) {
340 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
343 bool ContainsOnlyChars(const StringPiece& input,
344 const StringPiece& characters) {
345 return input.find_first_not_of(characters) == StringPiece::npos;
348 bool ContainsOnlyChars(const StringPiece16& input,
349 const StringPiece16& characters) {
350 return input.find_first_not_of(characters) == StringPiece16::npos;
353 template <class Char>
354 inline bool DoIsStringASCII(const Char* characters, size_t length) {
355 MachineWord all_char_bits = 0;
356 const Char* end = characters + length;
358 // Prologue: align the input.
359 while (!IsAlignedToMachineWord(characters) && characters != end) {
360 all_char_bits |= *characters;
361 ++characters;
364 // Compare the values of CPU word size.
365 const Char* word_end = AlignToMachineWord(end);
366 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
367 while (characters < word_end) {
368 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
369 characters += loop_increment;
372 // Process the remaining bytes.
373 while (characters != end) {
374 all_char_bits |= *characters;
375 ++characters;
378 MachineWord non_ascii_bit_mask =
379 NonASCIIMask<sizeof(MachineWord), Char>::value();
380 return !(all_char_bits & non_ascii_bit_mask);
383 bool IsStringASCII(const StringPiece& str) {
384 return DoIsStringASCII(str.data(), str.length());
387 bool IsStringASCII(const StringPiece16& str) {
388 return DoIsStringASCII(str.data(), str.length());
391 bool IsStringASCII(const string16& str) {
392 return DoIsStringASCII(str.data(), str.length());
395 bool IsStringUTF8(const std::string& str) {
396 const char *src = str.data();
397 int32 src_len = static_cast<int32>(str.length());
398 int32 char_index = 0;
400 while (char_index < src_len) {
401 int32 code_point;
402 CBU8_NEXT(src, char_index, src_len, code_point);
403 if (!IsValidCharacter(code_point))
404 return false;
406 return true;
409 } // namespace base
411 template<typename Iter>
412 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
413 Iter a_end,
414 const char* b) {
415 for (Iter it = a_begin; it != a_end; ++it, ++b) {
416 if (!*b || base::ToLowerASCII(*it) != *b)
417 return false;
419 return *b == 0;
422 // Front-ends for LowerCaseEqualsASCII.
423 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
424 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
427 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
428 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
431 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
432 std::string::const_iterator a_end,
433 const char* b) {
434 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
437 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
438 string16::const_iterator a_end,
439 const char* b) {
440 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
443 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
444 #if !defined(OS_ANDROID)
445 bool LowerCaseEqualsASCII(const char* a_begin,
446 const char* a_end,
447 const char* b) {
448 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
451 bool LowerCaseEqualsASCII(const char16* a_begin,
452 const char16* a_end,
453 const char* b) {
454 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
457 #endif // !defined(OS_ANDROID)
459 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
460 if (a.length() != b.length())
461 return false;
462 return std::equal(b.begin(), b.end(), a.begin());
465 bool StartsWithASCII(const std::string& str,
466 const std::string& search,
467 bool case_sensitive) {
468 if (case_sensitive)
469 return str.compare(0, search.length(), search) == 0;
470 else
471 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
474 template <typename STR>
475 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
476 if (case_sensitive) {
477 return str.compare(0, search.length(), search) == 0;
478 } else {
479 if (search.size() > str.size())
480 return false;
481 return std::equal(search.begin(), search.end(), str.begin(),
482 base::CaseInsensitiveCompare<typename STR::value_type>());
486 bool StartsWith(const string16& str, const string16& search,
487 bool case_sensitive) {
488 return StartsWithT(str, search, case_sensitive);
491 template <typename STR>
492 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
493 size_t str_length = str.length();
494 size_t search_length = search.length();
495 if (search_length > str_length)
496 return false;
497 if (case_sensitive)
498 return str.compare(str_length - search_length, search_length, search) == 0;
499 return std::equal(search.begin(), search.end(),
500 str.begin() + (str_length - search_length),
501 base::CaseInsensitiveCompare<typename STR::value_type>());
504 bool EndsWith(const std::string& str, const std::string& search,
505 bool case_sensitive) {
506 return EndsWithT(str, search, case_sensitive);
509 bool EndsWith(const string16& str, const string16& search,
510 bool case_sensitive) {
511 return EndsWithT(str, search, case_sensitive);
514 static const char* const kByteStringsUnlocalized[] = {
515 " B",
516 " kB",
517 " MB",
518 " GB",
519 " TB",
520 " PB"
523 string16 FormatBytesUnlocalized(int64 bytes) {
524 double unit_amount = static_cast<double>(bytes);
525 size_t dimension = 0;
526 const int kKilo = 1024;
527 while (unit_amount >= kKilo &&
528 dimension < arraysize(kByteStringsUnlocalized) - 1) {
529 unit_amount /= kKilo;
530 dimension++;
533 char buf[64];
534 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
535 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
536 kByteStringsUnlocalized[dimension]);
537 } else {
538 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
539 kByteStringsUnlocalized[dimension]);
542 return base::ASCIIToUTF16(buf);
545 template<class StringType>
546 void DoReplaceSubstringsAfterOffset(StringType* str,
547 size_t start_offset,
548 const StringType& find_this,
549 const StringType& replace_with,
550 bool replace_all) {
551 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
552 return;
554 DCHECK(!find_this.empty());
555 for (size_t offs(str->find(find_this, start_offset));
556 offs != StringType::npos; offs = str->find(find_this, offs)) {
557 str->replace(offs, find_this.length(), replace_with);
558 offs += replace_with.length();
560 if (!replace_all)
561 break;
565 void ReplaceFirstSubstringAfterOffset(string16* str,
566 size_t start_offset,
567 const string16& find_this,
568 const string16& replace_with) {
569 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
570 false); // replace first instance
573 void ReplaceFirstSubstringAfterOffset(std::string* str,
574 size_t start_offset,
575 const std::string& find_this,
576 const std::string& replace_with) {
577 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
578 false); // replace first instance
581 void ReplaceSubstringsAfterOffset(string16* str,
582 size_t start_offset,
583 const string16& find_this,
584 const string16& replace_with) {
585 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
586 true); // replace all instances
589 void ReplaceSubstringsAfterOffset(std::string* str,
590 size_t start_offset,
591 const std::string& find_this,
592 const std::string& replace_with) {
593 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
594 true); // replace all instances
598 template<typename STR>
599 static size_t TokenizeT(const STR& str,
600 const STR& delimiters,
601 std::vector<STR>* tokens) {
602 tokens->clear();
604 size_t start = str.find_first_not_of(delimiters);
605 while (start != STR::npos) {
606 size_t end = str.find_first_of(delimiters, start + 1);
607 if (end == STR::npos) {
608 tokens->push_back(str.substr(start));
609 break;
610 } else {
611 tokens->push_back(str.substr(start, end - start));
612 start = str.find_first_not_of(delimiters, end + 1);
616 return tokens->size();
619 size_t Tokenize(const string16& str,
620 const string16& delimiters,
621 std::vector<string16>* tokens) {
622 return TokenizeT(str, delimiters, tokens);
625 size_t Tokenize(const std::string& str,
626 const std::string& delimiters,
627 std::vector<std::string>* tokens) {
628 return TokenizeT(str, delimiters, tokens);
631 size_t Tokenize(const base::StringPiece& str,
632 const base::StringPiece& delimiters,
633 std::vector<base::StringPiece>* tokens) {
634 return TokenizeT(str, delimiters, tokens);
637 template<typename STR>
638 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
639 if (parts.empty())
640 return STR();
642 STR result(parts[0]);
643 typename std::vector<STR>::const_iterator iter = parts.begin();
644 ++iter;
646 for (; iter != parts.end(); ++iter) {
647 result += sep;
648 result += *iter;
651 return result;
654 std::string JoinString(const std::vector<std::string>& parts, char sep) {
655 return JoinStringT(parts, std::string(1, sep));
658 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
659 return JoinStringT(parts, string16(1, sep));
662 std::string JoinString(const std::vector<std::string>& parts,
663 const std::string& separator) {
664 return JoinStringT(parts, separator);
667 string16 JoinString(const std::vector<string16>& parts,
668 const string16& separator) {
669 return JoinStringT(parts, separator);
672 template<class FormatStringType, class OutStringType>
673 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
674 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
675 size_t substitutions = subst.size();
677 size_t sub_length = 0;
678 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
679 iter != subst.end(); ++iter) {
680 sub_length += iter->length();
683 OutStringType formatted;
684 formatted.reserve(format_string.length() + sub_length);
686 std::vector<ReplacementOffset> r_offsets;
687 for (typename FormatStringType::const_iterator i = format_string.begin();
688 i != format_string.end(); ++i) {
689 if ('$' == *i) {
690 if (i + 1 != format_string.end()) {
691 ++i;
692 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
693 if ('$' == *i) {
694 while (i != format_string.end() && '$' == *i) {
695 formatted.push_back('$');
696 ++i;
698 --i;
699 } else {
700 uintptr_t index = 0;
701 while (i != format_string.end() && '0' <= *i && *i <= '9') {
702 index *= 10;
703 index += *i - '0';
704 ++i;
706 --i;
707 index -= 1;
708 if (offsets) {
709 ReplacementOffset r_offset(index,
710 static_cast<int>(formatted.size()));
711 r_offsets.insert(std::lower_bound(r_offsets.begin(),
712 r_offsets.end(),
713 r_offset,
714 &CompareParameter),
715 r_offset);
717 if (index < substitutions)
718 formatted.append(subst.at(index));
721 } else {
722 formatted.push_back(*i);
725 if (offsets) {
726 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
727 i != r_offsets.end(); ++i) {
728 offsets->push_back(i->offset);
731 return formatted;
734 string16 ReplaceStringPlaceholders(const string16& format_string,
735 const std::vector<string16>& subst,
736 std::vector<size_t>* offsets) {
737 return DoReplaceStringPlaceholders(format_string, subst, offsets);
740 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
741 const std::vector<std::string>& subst,
742 std::vector<size_t>* offsets) {
743 return DoReplaceStringPlaceholders(format_string, subst, offsets);
746 string16 ReplaceStringPlaceholders(const string16& format_string,
747 const string16& a,
748 size_t* offset) {
749 std::vector<size_t> offsets;
750 std::vector<string16> subst;
751 subst.push_back(a);
752 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
754 DCHECK_EQ(1U, offsets.size());
755 if (offset)
756 *offset = offsets[0];
757 return result;
760 static bool IsWildcard(base_icu::UChar32 character) {
761 return character == '*' || character == '?';
764 // Move the strings pointers to the point where they start to differ.
765 template <typename CHAR, typename NEXT>
766 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
767 const CHAR** string, const CHAR* string_end,
768 NEXT next) {
769 const CHAR* escape = NULL;
770 while (*pattern != pattern_end && *string != string_end) {
771 if (!escape && IsWildcard(**pattern)) {
772 // We don't want to match wildcard here, except if it's escaped.
773 return;
776 // Check if the escapement char is found. If so, skip it and move to the
777 // next character.
778 if (!escape && **pattern == '\\') {
779 escape = *pattern;
780 next(pattern, pattern_end);
781 continue;
784 // Check if the chars match, if so, increment the ptrs.
785 const CHAR* pattern_next = *pattern;
786 const CHAR* string_next = *string;
787 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
788 if (pattern_char == next(&string_next, string_end) &&
789 pattern_char != CBU_SENTINEL) {
790 *pattern = pattern_next;
791 *string = string_next;
792 } else {
793 // Uh oh, it did not match, we are done. If the last char was an
794 // escapement, that means that it was an error to advance the ptr here,
795 // let's put it back where it was. This also mean that the MatchPattern
796 // function will return false because if we can't match an escape char
797 // here, then no one will.
798 if (escape) {
799 *pattern = escape;
801 return;
804 escape = NULL;
808 template <typename CHAR, typename NEXT>
809 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
810 while (*pattern != end) {
811 if (!IsWildcard(**pattern))
812 return;
813 next(pattern, end);
817 template <typename CHAR, typename NEXT>
818 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
819 const CHAR* pattern, const CHAR* pattern_end,
820 int depth,
821 NEXT next) {
822 const int kMaxDepth = 16;
823 if (depth > kMaxDepth)
824 return false;
826 // Eat all the matching chars.
827 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
829 // If the string is empty, then the pattern must be empty too, or contains
830 // only wildcards.
831 if (eval == eval_end) {
832 EatWildcard(&pattern, pattern_end, next);
833 return pattern == pattern_end;
836 // Pattern is empty but not string, this is not a match.
837 if (pattern == pattern_end)
838 return false;
840 // If this is a question mark, then we need to compare the rest with
841 // the current string or the string with one character eaten.
842 const CHAR* next_pattern = pattern;
843 next(&next_pattern, pattern_end);
844 if (pattern[0] == '?') {
845 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
846 depth + 1, next))
847 return true;
848 const CHAR* next_eval = eval;
849 next(&next_eval, eval_end);
850 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
851 depth + 1, next))
852 return true;
855 // This is a *, try to match all the possible substrings with the remainder
856 // of the pattern.
857 if (pattern[0] == '*') {
858 // Collapse duplicate wild cards (********** into *) so that the
859 // method does not recurse unnecessarily. http://crbug.com/52839
860 EatWildcard(&next_pattern, pattern_end, next);
862 while (eval != eval_end) {
863 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
864 depth + 1, next))
865 return true;
866 eval++;
869 // We reached the end of the string, let see if the pattern contains only
870 // wildcards.
871 if (eval == eval_end) {
872 EatWildcard(&pattern, pattern_end, next);
873 if (pattern != pattern_end)
874 return false;
875 return true;
879 return false;
882 struct NextCharUTF8 {
883 base_icu::UChar32 operator()(const char** p, const char* end) {
884 base_icu::UChar32 c;
885 int offset = 0;
886 CBU8_NEXT(*p, offset, end - *p, c);
887 *p += offset;
888 return c;
892 struct NextCharUTF16 {
893 base_icu::UChar32 operator()(const char16** p, const char16* end) {
894 base_icu::UChar32 c;
895 int offset = 0;
896 CBU16_NEXT(*p, offset, end - *p, c);
897 *p += offset;
898 return c;
902 bool MatchPattern(const base::StringPiece& eval,
903 const base::StringPiece& pattern) {
904 return MatchPatternT(eval.data(), eval.data() + eval.size(),
905 pattern.data(), pattern.data() + pattern.size(),
906 0, NextCharUTF8());
909 bool MatchPattern(const string16& eval, const string16& pattern) {
910 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
911 pattern.c_str(), pattern.c_str() + pattern.size(),
912 0, NextCharUTF16());
915 // The following code is compatible with the OpenBSD lcpy interface. See:
916 // http://www.gratisoft.us/todd/papers/strlcpy.html
917 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
919 namespace {
921 template <typename CHAR>
922 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
923 for (size_t i = 0; i < dst_size; ++i) {
924 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
925 return i;
928 // We were left off at dst_size. We over copied 1 byte. Null terminate.
929 if (dst_size != 0)
930 dst[dst_size - 1] = 0;
932 // Count the rest of the |src|, and return it's length in characters.
933 while (src[dst_size]) ++dst_size;
934 return dst_size;
937 } // namespace
939 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
940 return lcpyT<char>(dst, src, dst_size);
942 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
943 return lcpyT<wchar_t>(dst, src, dst_size);