Blink roll 174234:174238
[chromium-blink-merge.git] / base / strings / string_util.cc
blob0adb98959fc0ea335e4bcb2bd408709455441627
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <time.h>
15 #include <wchar.h>
16 #include <wctype.h>
18 #include <algorithm>
19 #include <vector>
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 #include "build/build_config.h"
29 // Remove when this entire file is in the base namespace.
30 using base::char16;
31 using base::string16;
33 namespace {
35 // Force the singleton used by Empty[W]String[16] to be a unique type. This
36 // prevents other code that might accidentally use Singleton<string> from
37 // getting our internal one.
38 struct EmptyStrings {
39 EmptyStrings() {}
40 const std::string s;
41 const std::wstring ws;
42 const string16 s16;
44 static EmptyStrings* GetInstance() {
45 return Singleton<EmptyStrings>::get();
49 // Used by ReplaceStringPlaceholders to track the position in the string of
50 // replaced parameters.
51 struct ReplacementOffset {
52 ReplacementOffset(uintptr_t parameter, size_t offset)
53 : parameter(parameter),
54 offset(offset) {}
56 // Index of the parameter.
57 uintptr_t parameter;
59 // Starting position in the string.
60 size_t offset;
63 static bool CompareParameter(const ReplacementOffset& elem1,
64 const ReplacementOffset& elem2) {
65 return elem1.parameter < elem2.parameter;
68 } // namespace
70 namespace base {
72 bool IsWprintfFormatPortable(const wchar_t* format) {
73 for (const wchar_t* position = format; *position != '\0'; ++position) {
74 if (*position == '%') {
75 bool in_specification = true;
76 bool modifier_l = false;
77 while (in_specification) {
78 // Eat up characters until reaching a known specifier.
79 if (*++position == '\0') {
80 // The format string ended in the middle of a specification. Call
81 // it portable because no unportable specifications were found. The
82 // string is equally broken on all platforms.
83 return true;
86 if (*position == 'l') {
87 // 'l' is the only thing that can save the 's' and 'c' specifiers.
88 modifier_l = true;
89 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
90 *position == 'S' || *position == 'C' || *position == 'F' ||
91 *position == 'D' || *position == 'O' || *position == 'U') {
92 // Not portable.
93 return false;
96 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
97 // Portable, keep scanning the rest of the format string.
98 in_specification = false;
104 return true;
107 const std::string& EmptyString() {
108 return EmptyStrings::GetInstance()->s;
111 const std::wstring& EmptyWString() {
112 return EmptyStrings::GetInstance()->ws;
115 const string16& EmptyString16() {
116 return EmptyStrings::GetInstance()->s16;
119 template<typename STR>
120 bool ReplaceCharsT(const STR& input,
121 const typename STR::value_type replace_chars[],
122 const STR& replace_with,
123 STR* output) {
124 bool removed = false;
125 size_t replace_length = replace_with.length();
127 *output = input;
129 size_t found = output->find_first_of(replace_chars);
130 while (found != STR::npos) {
131 removed = true;
132 output->replace(found, 1, replace_with);
133 found = output->find_first_of(replace_chars, found + replace_length);
136 return removed;
139 bool ReplaceChars(const string16& input,
140 const char16 replace_chars[],
141 const string16& replace_with,
142 string16* output) {
143 return ReplaceCharsT(input, replace_chars, replace_with, output);
146 bool ReplaceChars(const std::string& input,
147 const char replace_chars[],
148 const std::string& replace_with,
149 std::string* output) {
150 return ReplaceCharsT(input, replace_chars, replace_with, output);
153 bool RemoveChars(const string16& input,
154 const char16 remove_chars[],
155 string16* output) {
156 return ReplaceChars(input, remove_chars, string16(), output);
159 bool RemoveChars(const std::string& input,
160 const char remove_chars[],
161 std::string* output) {
162 return ReplaceChars(input, remove_chars, std::string(), output);
165 template<typename STR>
166 TrimPositions TrimStringT(const STR& input,
167 const typename STR::value_type trim_chars[],
168 TrimPositions positions,
169 STR* output) {
170 // Find the edges of leading/trailing whitespace as desired.
171 const typename STR::size_type last_char = input.length() - 1;
172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
173 input.find_first_not_of(trim_chars) : 0;
174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
175 input.find_last_not_of(trim_chars) : last_char;
177 // When the string was all whitespace, report that we stripped off whitespace
178 // from whichever position the caller was interested in. For empty input, we
179 // stripped no whitespace, but we still need to clear |output|.
180 if (input.empty() ||
181 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
182 bool input_was_empty = input.empty(); // in case output == &input
183 output->clear();
184 return input_was_empty ? TRIM_NONE : positions;
187 // Trim the whitespace.
188 *output =
189 input.substr(first_good_char, last_good_char - first_good_char + 1);
191 // Return where we trimmed from.
192 return static_cast<TrimPositions>(
193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
197 bool TrimString(const string16& input,
198 const char16 trim_chars[],
199 string16* output) {
200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
203 bool TrimString(const std::string& input,
204 const char trim_chars[],
205 std::string* output) {
206 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
209 void TruncateUTF8ToByteSize(const std::string& input,
210 const size_t byte_size,
211 std::string* output) {
212 DCHECK(output);
213 if (byte_size > input.length()) {
214 *output = input;
215 return;
217 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
218 // Note: This cast is necessary because CBU8_NEXT uses int32s.
219 int32 truncation_length = static_cast<int32>(byte_size);
220 int32 char_index = truncation_length - 1;
221 const char* data = input.data();
223 // Using CBU8, we will move backwards from the truncation point
224 // to the beginning of the string looking for a valid UTF8
225 // character. Once a full UTF8 character is found, we will
226 // truncate the string to the end of that character.
227 while (char_index >= 0) {
228 int32 prev = char_index;
229 uint32 code_point = 0;
230 CBU8_NEXT(data, char_index, truncation_length, code_point);
231 if (!IsValidCharacter(code_point) ||
232 !IsValidCodepoint(code_point)) {
233 char_index = prev - 1;
234 } else {
235 break;
239 if (char_index >= 0 )
240 *output = input.substr(0, char_index);
241 else
242 output->clear();
245 TrimPositions TrimWhitespace(const string16& input,
246 TrimPositions positions,
247 string16* output) {
248 return TrimStringT(input, kWhitespaceUTF16, positions, output);
251 TrimPositions TrimWhitespaceASCII(const std::string& input,
252 TrimPositions positions,
253 std::string* output) {
254 return TrimStringT(input, kWhitespaceASCII, positions, output);
257 // This function is only for backward-compatibility.
258 // To be removed when all callers are updated.
259 TrimPositions TrimWhitespace(const std::string& input,
260 TrimPositions positions,
261 std::string* output) {
262 return TrimWhitespaceASCII(input, positions, output);
265 template<typename STR>
266 STR CollapseWhitespaceT(const STR& text,
267 bool trim_sequences_with_line_breaks) {
268 STR result;
269 result.resize(text.size());
271 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
272 // will trim any leading whitespace.
273 bool in_whitespace = true;
274 bool already_trimmed = true;
276 int chars_written = 0;
277 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
278 if (IsWhitespace(*i)) {
279 if (!in_whitespace) {
280 // Reduce all whitespace sequences to a single space.
281 in_whitespace = true;
282 result[chars_written++] = L' ';
284 if (trim_sequences_with_line_breaks && !already_trimmed &&
285 ((*i == '\n') || (*i == '\r'))) {
286 // Whitespace sequences containing CR or LF are eliminated entirely.
287 already_trimmed = true;
288 --chars_written;
290 } else {
291 // Non-whitespace chracters are copied straight across.
292 in_whitespace = false;
293 already_trimmed = false;
294 result[chars_written++] = *i;
298 if (in_whitespace && !already_trimmed) {
299 // Any trailing whitespace is eliminated.
300 --chars_written;
303 result.resize(chars_written);
304 return result;
307 string16 CollapseWhitespace(const string16& text,
308 bool trim_sequences_with_line_breaks) {
309 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
312 std::string CollapseWhitespaceASCII(const std::string& text,
313 bool trim_sequences_with_line_breaks) {
314 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
317 bool ContainsOnlyChars(const StringPiece& input,
318 const StringPiece& characters) {
319 return input.find_first_not_of(characters) == StringPiece::npos;
322 bool ContainsOnlyChars(const StringPiece16& input,
323 const StringPiece16& characters) {
324 return input.find_first_not_of(characters) == StringPiece16::npos;
327 template<class STR>
328 static bool DoIsStringASCII(const STR& str) {
329 for (size_t i = 0; i < str.length(); i++) {
330 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
331 if (c > 0x7F)
332 return false;
334 return true;
337 bool IsStringASCII(const StringPiece& str) {
338 return DoIsStringASCII(str);
341 bool IsStringASCII(const string16& str) {
342 return DoIsStringASCII(str);
345 bool IsStringUTF8(const std::string& str) {
346 const char *src = str.data();
347 int32 src_len = static_cast<int32>(str.length());
348 int32 char_index = 0;
350 while (char_index < src_len) {
351 int32 code_point;
352 CBU8_NEXT(src, char_index, src_len, code_point);
353 if (!IsValidCharacter(code_point))
354 return false;
356 return true;
359 } // namespace base
361 template<typename Iter>
362 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
363 Iter a_end,
364 const char* b) {
365 for (Iter it = a_begin; it != a_end; ++it, ++b) {
366 if (!*b || base::ToLowerASCII(*it) != *b)
367 return false;
369 return *b == 0;
372 // Front-ends for LowerCaseEqualsASCII.
373 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
374 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
377 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
378 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
381 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
382 std::string::const_iterator a_end,
383 const char* b) {
384 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
387 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
388 string16::const_iterator a_end,
389 const char* b) {
390 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
393 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
394 #if !defined(OS_ANDROID)
395 bool LowerCaseEqualsASCII(const char* a_begin,
396 const char* a_end,
397 const char* b) {
398 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
401 bool LowerCaseEqualsASCII(const char16* a_begin,
402 const char16* a_end,
403 const char* b) {
404 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
407 #endif // !defined(OS_ANDROID)
409 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
410 if (a.length() != b.length())
411 return false;
412 return std::equal(b.begin(), b.end(), a.begin());
415 bool StartsWithASCII(const std::string& str,
416 const std::string& search,
417 bool case_sensitive) {
418 if (case_sensitive)
419 return str.compare(0, search.length(), search) == 0;
420 else
421 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
424 template <typename STR>
425 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
426 if (case_sensitive) {
427 return str.compare(0, search.length(), search) == 0;
428 } else {
429 if (search.size() > str.size())
430 return false;
431 return std::equal(search.begin(), search.end(), str.begin(),
432 base::CaseInsensitiveCompare<typename STR::value_type>());
436 bool StartsWith(const string16& str, const string16& search,
437 bool case_sensitive) {
438 return StartsWithT(str, search, case_sensitive);
441 template <typename STR>
442 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
443 typename STR::size_type str_length = str.length();
444 typename STR::size_type search_length = search.length();
445 if (search_length > str_length)
446 return false;
447 if (case_sensitive) {
448 return str.compare(str_length - search_length, search_length, search) == 0;
449 } else {
450 return std::equal(search.begin(), search.end(),
451 str.begin() + (str_length - search_length),
452 base::CaseInsensitiveCompare<typename STR::value_type>());
456 bool EndsWith(const std::string& str, const std::string& search,
457 bool case_sensitive) {
458 return EndsWithT(str, search, case_sensitive);
461 bool EndsWith(const string16& str, const string16& search,
462 bool case_sensitive) {
463 return EndsWithT(str, search, case_sensitive);
466 static const char* const kByteStringsUnlocalized[] = {
467 " B",
468 " kB",
469 " MB",
470 " GB",
471 " TB",
472 " PB"
475 string16 FormatBytesUnlocalized(int64 bytes) {
476 double unit_amount = static_cast<double>(bytes);
477 size_t dimension = 0;
478 const int kKilo = 1024;
479 while (unit_amount >= kKilo &&
480 dimension < arraysize(kByteStringsUnlocalized) - 1) {
481 unit_amount /= kKilo;
482 dimension++;
485 char buf[64];
486 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
487 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
488 kByteStringsUnlocalized[dimension]);
489 } else {
490 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
491 kByteStringsUnlocalized[dimension]);
494 return base::ASCIIToUTF16(buf);
497 template<class StringType>
498 void DoReplaceSubstringsAfterOffset(StringType* str,
499 typename StringType::size_type start_offset,
500 const StringType& find_this,
501 const StringType& replace_with,
502 bool replace_all) {
503 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
504 return;
506 DCHECK(!find_this.empty());
507 for (typename StringType::size_type offs(str->find(find_this, start_offset));
508 offs != StringType::npos; offs = str->find(find_this, offs)) {
509 str->replace(offs, find_this.length(), replace_with);
510 offs += replace_with.length();
512 if (!replace_all)
513 break;
517 void ReplaceFirstSubstringAfterOffset(string16* str,
518 string16::size_type start_offset,
519 const string16& find_this,
520 const string16& replace_with) {
521 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
522 false); // replace first instance
525 void ReplaceFirstSubstringAfterOffset(std::string* str,
526 std::string::size_type start_offset,
527 const std::string& find_this,
528 const std::string& replace_with) {
529 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
530 false); // replace first instance
533 void ReplaceSubstringsAfterOffset(string16* str,
534 string16::size_type start_offset,
535 const string16& find_this,
536 const string16& replace_with) {
537 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
538 true); // replace all instances
541 void ReplaceSubstringsAfterOffset(std::string* str,
542 std::string::size_type start_offset,
543 const std::string& find_this,
544 const std::string& replace_with) {
545 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
546 true); // replace all instances
550 template<typename STR>
551 static size_t TokenizeT(const STR& str,
552 const STR& delimiters,
553 std::vector<STR>* tokens) {
554 tokens->clear();
556 typename STR::size_type start = str.find_first_not_of(delimiters);
557 while (start != STR::npos) {
558 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
559 if (end == STR::npos) {
560 tokens->push_back(str.substr(start));
561 break;
562 } else {
563 tokens->push_back(str.substr(start, end - start));
564 start = str.find_first_not_of(delimiters, end + 1);
568 return tokens->size();
571 size_t Tokenize(const string16& str,
572 const string16& delimiters,
573 std::vector<string16>* tokens) {
574 return TokenizeT(str, delimiters, tokens);
577 size_t Tokenize(const std::string& str,
578 const std::string& delimiters,
579 std::vector<std::string>* tokens) {
580 return TokenizeT(str, delimiters, tokens);
583 size_t Tokenize(const base::StringPiece& str,
584 const base::StringPiece& delimiters,
585 std::vector<base::StringPiece>* tokens) {
586 return TokenizeT(str, delimiters, tokens);
589 template<typename STR>
590 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
591 if (parts.empty())
592 return STR();
594 STR result(parts[0]);
595 typename std::vector<STR>::const_iterator iter = parts.begin();
596 ++iter;
598 for (; iter != parts.end(); ++iter) {
599 result += sep;
600 result += *iter;
603 return result;
606 std::string JoinString(const std::vector<std::string>& parts, char sep) {
607 return JoinStringT(parts, std::string(1, sep));
610 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
611 return JoinStringT(parts, string16(1, sep));
614 std::string JoinString(const std::vector<std::string>& parts,
615 const std::string& separator) {
616 return JoinStringT(parts, separator);
619 string16 JoinString(const std::vector<string16>& parts,
620 const string16& separator) {
621 return JoinStringT(parts, separator);
624 template<class FormatStringType, class OutStringType>
625 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
626 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
627 size_t substitutions = subst.size();
629 size_t sub_length = 0;
630 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
631 iter != subst.end(); ++iter) {
632 sub_length += iter->length();
635 OutStringType formatted;
636 formatted.reserve(format_string.length() + sub_length);
638 std::vector<ReplacementOffset> r_offsets;
639 for (typename FormatStringType::const_iterator i = format_string.begin();
640 i != format_string.end(); ++i) {
641 if ('$' == *i) {
642 if (i + 1 != format_string.end()) {
643 ++i;
644 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
645 if ('$' == *i) {
646 while (i != format_string.end() && '$' == *i) {
647 formatted.push_back('$');
648 ++i;
650 --i;
651 } else {
652 uintptr_t index = 0;
653 while (i != format_string.end() && '0' <= *i && *i <= '9') {
654 index *= 10;
655 index += *i - '0';
656 ++i;
658 --i;
659 index -= 1;
660 if (offsets) {
661 ReplacementOffset r_offset(index,
662 static_cast<int>(formatted.size()));
663 r_offsets.insert(std::lower_bound(r_offsets.begin(),
664 r_offsets.end(),
665 r_offset,
666 &CompareParameter),
667 r_offset);
669 if (index < substitutions)
670 formatted.append(subst.at(index));
673 } else {
674 formatted.push_back(*i);
677 if (offsets) {
678 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
679 i != r_offsets.end(); ++i) {
680 offsets->push_back(i->offset);
683 return formatted;
686 string16 ReplaceStringPlaceholders(const string16& format_string,
687 const std::vector<string16>& subst,
688 std::vector<size_t>* offsets) {
689 return DoReplaceStringPlaceholders(format_string, subst, offsets);
692 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
693 const std::vector<std::string>& subst,
694 std::vector<size_t>* offsets) {
695 return DoReplaceStringPlaceholders(format_string, subst, offsets);
698 string16 ReplaceStringPlaceholders(const string16& format_string,
699 const string16& a,
700 size_t* offset) {
701 std::vector<size_t> offsets;
702 std::vector<string16> subst;
703 subst.push_back(a);
704 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
706 DCHECK_EQ(1U, offsets.size());
707 if (offset)
708 *offset = offsets[0];
709 return result;
712 static bool IsWildcard(base_icu::UChar32 character) {
713 return character == '*' || character == '?';
716 // Move the strings pointers to the point where they start to differ.
717 template <typename CHAR, typename NEXT>
718 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
719 const CHAR** string, const CHAR* string_end,
720 NEXT next) {
721 const CHAR* escape = NULL;
722 while (*pattern != pattern_end && *string != string_end) {
723 if (!escape && IsWildcard(**pattern)) {
724 // We don't want to match wildcard here, except if it's escaped.
725 return;
728 // Check if the escapement char is found. If so, skip it and move to the
729 // next character.
730 if (!escape && **pattern == '\\') {
731 escape = *pattern;
732 next(pattern, pattern_end);
733 continue;
736 // Check if the chars match, if so, increment the ptrs.
737 const CHAR* pattern_next = *pattern;
738 const CHAR* string_next = *string;
739 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
740 if (pattern_char == next(&string_next, string_end) &&
741 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
742 *pattern = pattern_next;
743 *string = string_next;
744 } else {
745 // Uh ho, it did not match, we are done. If the last char was an
746 // escapement, that means that it was an error to advance the ptr here,
747 // let's put it back where it was. This also mean that the MatchPattern
748 // function will return false because if we can't match an escape char
749 // here, then no one will.
750 if (escape) {
751 *pattern = escape;
753 return;
756 escape = NULL;
760 template <typename CHAR, typename NEXT>
761 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
762 while (*pattern != end) {
763 if (!IsWildcard(**pattern))
764 return;
765 next(pattern, end);
769 template <typename CHAR, typename NEXT>
770 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
771 const CHAR* pattern, const CHAR* pattern_end,
772 int depth,
773 NEXT next) {
774 const int kMaxDepth = 16;
775 if (depth > kMaxDepth)
776 return false;
778 // Eat all the matching chars.
779 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
781 // If the string is empty, then the pattern must be empty too, or contains
782 // only wildcards.
783 if (eval == eval_end) {
784 EatWildcard(&pattern, pattern_end, next);
785 return pattern == pattern_end;
788 // Pattern is empty but not string, this is not a match.
789 if (pattern == pattern_end)
790 return false;
792 // If this is a question mark, then we need to compare the rest with
793 // the current string or the string with one character eaten.
794 const CHAR* next_pattern = pattern;
795 next(&next_pattern, pattern_end);
796 if (pattern[0] == '?') {
797 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
798 depth + 1, next))
799 return true;
800 const CHAR* next_eval = eval;
801 next(&next_eval, eval_end);
802 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
803 depth + 1, next))
804 return true;
807 // This is a *, try to match all the possible substrings with the remainder
808 // of the pattern.
809 if (pattern[0] == '*') {
810 // Collapse duplicate wild cards (********** into *) so that the
811 // method does not recurse unnecessarily. http://crbug.com/52839
812 EatWildcard(&next_pattern, pattern_end, next);
814 while (eval != eval_end) {
815 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
816 depth + 1, next))
817 return true;
818 eval++;
821 // We reached the end of the string, let see if the pattern contains only
822 // wildcards.
823 if (eval == eval_end) {
824 EatWildcard(&pattern, pattern_end, next);
825 if (pattern != pattern_end)
826 return false;
827 return true;
831 return false;
834 struct NextCharUTF8 {
835 base_icu::UChar32 operator()(const char** p, const char* end) {
836 base_icu::UChar32 c;
837 int offset = 0;
838 CBU8_NEXT(*p, offset, end - *p, c);
839 *p += offset;
840 return c;
844 struct NextCharUTF16 {
845 base_icu::UChar32 operator()(const char16** p, const char16* end) {
846 base_icu::UChar32 c;
847 int offset = 0;
848 CBU16_NEXT(*p, offset, end - *p, c);
849 *p += offset;
850 return c;
854 bool MatchPattern(const base::StringPiece& eval,
855 const base::StringPiece& pattern) {
856 return MatchPatternT(eval.data(), eval.data() + eval.size(),
857 pattern.data(), pattern.data() + pattern.size(),
858 0, NextCharUTF8());
861 bool MatchPattern(const string16& eval, const string16& pattern) {
862 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
863 pattern.c_str(), pattern.c_str() + pattern.size(),
864 0, NextCharUTF16());
867 // The following code is compatible with the OpenBSD lcpy interface. See:
868 // http://www.gratisoft.us/todd/papers/strlcpy.html
869 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
871 namespace {
873 template <typename CHAR>
874 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
875 for (size_t i = 0; i < dst_size; ++i) {
876 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
877 return i;
880 // We were left off at dst_size. We over copied 1 byte. Null terminate.
881 if (dst_size != 0)
882 dst[dst_size - 1] = 0;
884 // Count the rest of the |src|, and return it's length in characters.
885 while (src[dst_size]) ++dst_size;
886 return dst_size;
889 } // namespace
891 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
892 return lcpyT<char>(dst, src, dst_size);
894 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
895 return lcpyT<wchar_t>(dst, src, dst_size);