1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 #include "build/build_config.h"
29 // Remove when this entire file is in the base namespace.
35 // Force the singleton used by EmptyString[16] to be a unique type. This
36 // prevents other code that might accidentally use Singleton<string> from
37 // getting our internal one.
43 static EmptyStrings
* GetInstance() {
44 return Singleton
<EmptyStrings
>::get();
48 // Used by ReplaceStringPlaceholders to track the position in the string of
49 // replaced parameters.
50 struct ReplacementOffset
{
51 ReplacementOffset(uintptr_t parameter
, size_t offset
)
52 : parameter(parameter
),
55 // Index of the parameter.
58 // Starting position in the string.
62 static bool CompareParameter(const ReplacementOffset
& elem1
,
63 const ReplacementOffset
& elem2
) {
64 return elem1
.parameter
< elem2
.parameter
;
67 // Assuming that a pointer is the size of a "machine word", then
68 // uintptr_t is an integer type that is also a machine word.
69 typedef uintptr_t MachineWord
;
70 const uintptr_t kMachineWordAlignmentMask
= sizeof(MachineWord
) - 1;
72 inline bool IsAlignedToMachineWord(const void* pointer
) {
73 return !(reinterpret_cast<MachineWord
>(pointer
) & kMachineWordAlignmentMask
);
76 template<typename T
> inline T
* AlignToMachineWord(T
* pointer
) {
77 return reinterpret_cast<T
*>(reinterpret_cast<MachineWord
>(pointer
) &
78 ~kMachineWordAlignmentMask
);
81 template<size_t size
, typename CharacterType
> struct NonASCIIMask
;
82 template<> struct NonASCIIMask
<4, base::char16
> {
83 static inline uint32_t value() { return 0xFF80FF80U
; }
85 template<> struct NonASCIIMask
<4, char> {
86 static inline uint32_t value() { return 0x80808080U
; }
88 template<> struct NonASCIIMask
<8, base::char16
> {
89 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL
; }
91 template<> struct NonASCIIMask
<8, char> {
92 static inline uint64_t value() { return 0x8080808080808080ULL
; }
94 #if defined(WCHAR_T_IS_UTF32)
95 template<> struct NonASCIIMask
<4, wchar_t> {
96 static inline uint32_t value() { return 0xFFFFFF80U
; }
98 template<> struct NonASCIIMask
<8, wchar_t> {
99 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL
; }
101 #endif // WCHAR_T_IS_UTF32
107 bool IsWprintfFormatPortable(const wchar_t* format
) {
108 for (const wchar_t* position
= format
; *position
!= '\0'; ++position
) {
109 if (*position
== '%') {
110 bool in_specification
= true;
111 bool modifier_l
= false;
112 while (in_specification
) {
113 // Eat up characters until reaching a known specifier.
114 if (*++position
== '\0') {
115 // The format string ended in the middle of a specification. Call
116 // it portable because no unportable specifications were found. The
117 // string is equally broken on all platforms.
121 if (*position
== 'l') {
122 // 'l' is the only thing that can save the 's' and 'c' specifiers.
124 } else if (((*position
== 's' || *position
== 'c') && !modifier_l
) ||
125 *position
== 'S' || *position
== 'C' || *position
== 'F' ||
126 *position
== 'D' || *position
== 'O' || *position
== 'U') {
131 if (wcschr(L
"diouxXeEfgGaAcspn%", *position
)) {
132 // Portable, keep scanning the rest of the format string.
133 in_specification
= false;
142 const std::string
& EmptyString() {
143 return EmptyStrings::GetInstance()->s
;
146 const string16
& EmptyString16() {
147 return EmptyStrings::GetInstance()->s16
;
150 template<typename STR
>
151 bool ReplaceCharsT(const STR
& input
,
152 const STR
& replace_chars
,
153 const STR
& replace_with
,
155 bool removed
= false;
156 size_t replace_length
= replace_with
.length();
160 size_t found
= output
->find_first_of(replace_chars
);
161 while (found
!= STR::npos
) {
163 output
->replace(found
, 1, replace_with
);
164 found
= output
->find_first_of(replace_chars
, found
+ replace_length
);
170 bool ReplaceChars(const string16
& input
,
171 const base::StringPiece16
& replace_chars
,
172 const string16
& replace_with
,
174 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
177 bool ReplaceChars(const std::string
& input
,
178 const base::StringPiece
& replace_chars
,
179 const std::string
& replace_with
,
180 std::string
* output
) {
181 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
184 bool RemoveChars(const string16
& input
,
185 const base::StringPiece16
& remove_chars
,
187 return ReplaceChars(input
, remove_chars
.as_string(), string16(), output
);
190 bool RemoveChars(const std::string
& input
,
191 const base::StringPiece
& remove_chars
,
192 std::string
* output
) {
193 return ReplaceChars(input
, remove_chars
.as_string(), std::string(), output
);
196 template<typename STR
>
197 TrimPositions
TrimStringT(const STR
& input
,
198 const STR
& trim_chars
,
199 TrimPositions positions
,
201 // Find the edges of leading/trailing whitespace as desired.
202 const size_t last_char
= input
.length() - 1;
203 const size_t first_good_char
= (positions
& TRIM_LEADING
) ?
204 input
.find_first_not_of(trim_chars
) : 0;
205 const size_t last_good_char
= (positions
& TRIM_TRAILING
) ?
206 input
.find_last_not_of(trim_chars
) : last_char
;
208 // When the string was all whitespace, report that we stripped off whitespace
209 // from whichever position the caller was interested in. For empty input, we
210 // stripped no whitespace, but we still need to clear |output|.
212 (first_good_char
== STR::npos
) || (last_good_char
== STR::npos
)) {
213 bool input_was_empty
= input
.empty(); // in case output == &input
215 return input_was_empty
? TRIM_NONE
: positions
;
218 // Trim the whitespace.
220 input
.substr(first_good_char
, last_good_char
- first_good_char
+ 1);
222 // Return where we trimmed from.
223 return static_cast<TrimPositions
>(
224 ((first_good_char
== 0) ? TRIM_NONE
: TRIM_LEADING
) |
225 ((last_good_char
== last_char
) ? TRIM_NONE
: TRIM_TRAILING
));
228 bool TrimString(const string16
& input
,
229 const base::StringPiece16
& trim_chars
,
231 return TrimStringT(input
, trim_chars
.as_string(), TRIM_ALL
, output
) !=
235 bool TrimString(const std::string
& input
,
236 const base::StringPiece
& trim_chars
,
237 std::string
* output
) {
238 return TrimStringT(input
, trim_chars
.as_string(), TRIM_ALL
, output
) !=
242 void TruncateUTF8ToByteSize(const std::string
& input
,
243 const size_t byte_size
,
244 std::string
* output
) {
246 if (byte_size
> input
.length()) {
250 DCHECK_LE(byte_size
, static_cast<uint32
>(kint32max
));
251 // Note: This cast is necessary because CBU8_NEXT uses int32s.
252 int32 truncation_length
= static_cast<int32
>(byte_size
);
253 int32 char_index
= truncation_length
- 1;
254 const char* data
= input
.data();
256 // Using CBU8, we will move backwards from the truncation point
257 // to the beginning of the string looking for a valid UTF8
258 // character. Once a full UTF8 character is found, we will
259 // truncate the string to the end of that character.
260 while (char_index
>= 0) {
261 int32 prev
= char_index
;
262 base_icu::UChar32 code_point
= 0;
263 CBU8_NEXT(data
, char_index
, truncation_length
, code_point
);
264 if (!IsValidCharacter(code_point
) ||
265 !IsValidCodepoint(code_point
)) {
266 char_index
= prev
- 1;
272 if (char_index
>= 0 )
273 *output
= input
.substr(0, char_index
);
278 TrimPositions
TrimWhitespace(const string16
& input
,
279 TrimPositions positions
,
281 return TrimStringT(input
, base::string16(kWhitespaceUTF16
), positions
,
285 TrimPositions
TrimWhitespaceASCII(const std::string
& input
,
286 TrimPositions positions
,
287 std::string
* output
) {
288 return TrimStringT(input
, std::string(kWhitespaceASCII
), positions
, output
);
291 // This function is only for backward-compatibility.
292 // To be removed when all callers are updated.
293 TrimPositions
TrimWhitespace(const std::string
& input
,
294 TrimPositions positions
,
295 std::string
* output
) {
296 return TrimWhitespaceASCII(input
, positions
, output
);
299 template<typename STR
>
300 STR
CollapseWhitespaceT(const STR
& text
,
301 bool trim_sequences_with_line_breaks
) {
303 result
.resize(text
.size());
305 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
306 // will trim any leading whitespace.
307 bool in_whitespace
= true;
308 bool already_trimmed
= true;
310 int chars_written
= 0;
311 for (typename
STR::const_iterator
i(text
.begin()); i
!= text
.end(); ++i
) {
312 if (IsWhitespace(*i
)) {
313 if (!in_whitespace
) {
314 // Reduce all whitespace sequences to a single space.
315 in_whitespace
= true;
316 result
[chars_written
++] = L
' ';
318 if (trim_sequences_with_line_breaks
&& !already_trimmed
&&
319 ((*i
== '\n') || (*i
== '\r'))) {
320 // Whitespace sequences containing CR or LF are eliminated entirely.
321 already_trimmed
= true;
325 // Non-whitespace chracters are copied straight across.
326 in_whitespace
= false;
327 already_trimmed
= false;
328 result
[chars_written
++] = *i
;
332 if (in_whitespace
&& !already_trimmed
) {
333 // Any trailing whitespace is eliminated.
337 result
.resize(chars_written
);
341 string16
CollapseWhitespace(const string16
& text
,
342 bool trim_sequences_with_line_breaks
) {
343 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
346 std::string
CollapseWhitespaceASCII(const std::string
& text
,
347 bool trim_sequences_with_line_breaks
) {
348 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
351 bool ContainsOnlyChars(const StringPiece
& input
,
352 const StringPiece
& characters
) {
353 return input
.find_first_not_of(characters
) == StringPiece::npos
;
356 bool ContainsOnlyChars(const StringPiece16
& input
,
357 const StringPiece16
& characters
) {
358 return input
.find_first_not_of(characters
) == StringPiece16::npos
;
361 template <class Char
>
362 inline bool DoIsStringASCII(const Char
* characters
, size_t length
) {
363 MachineWord all_char_bits
= 0;
364 const Char
* end
= characters
+ length
;
366 // Prologue: align the input.
367 while (!IsAlignedToMachineWord(characters
) && characters
!= end
) {
368 all_char_bits
|= *characters
;
372 // Compare the values of CPU word size.
373 const Char
* word_end
= AlignToMachineWord(end
);
374 const size_t loop_increment
= sizeof(MachineWord
) / sizeof(Char
);
375 while (characters
< word_end
) {
376 all_char_bits
|= *(reinterpret_cast<const MachineWord
*>(characters
));
377 characters
+= loop_increment
;
380 // Process the remaining bytes.
381 while (characters
!= end
) {
382 all_char_bits
|= *characters
;
386 MachineWord non_ascii_bit_mask
=
387 NonASCIIMask
<sizeof(MachineWord
), Char
>::value();
388 return !(all_char_bits
& non_ascii_bit_mask
);
391 bool IsStringASCII(const StringPiece
& str
) {
392 return DoIsStringASCII(str
.data(), str
.length());
395 bool IsStringASCII(const StringPiece16
& str
) {
396 return DoIsStringASCII(str
.data(), str
.length());
399 bool IsStringASCII(const string16
& str
) {
400 return DoIsStringASCII(str
.data(), str
.length());
403 #if defined(WCHAR_T_IS_UTF32)
404 bool IsStringASCII(const std::wstring
& str
) {
405 return DoIsStringASCII(str
.data(), str
.length());
409 bool IsStringUTF8(const StringPiece
& str
) {
410 const char *src
= str
.data();
411 int32 src_len
= static_cast<int32
>(str
.length());
412 int32 char_index
= 0;
414 while (char_index
< src_len
) {
416 CBU8_NEXT(src
, char_index
, src_len
, code_point
);
417 if (!IsValidCharacter(code_point
))
425 template<typename Iter
>
426 static inline bool DoLowerCaseEqualsASCII(Iter a_begin
,
429 for (Iter it
= a_begin
; it
!= a_end
; ++it
, ++b
) {
430 if (!*b
|| base::ToLowerASCII(*it
) != *b
)
436 // Front-ends for LowerCaseEqualsASCII.
437 bool LowerCaseEqualsASCII(const std::string
& a
, const char* b
) {
438 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
441 bool LowerCaseEqualsASCII(const string16
& a
, const char* b
) {
442 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
445 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin
,
446 std::string::const_iterator a_end
,
448 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
451 bool LowerCaseEqualsASCII(string16::const_iterator a_begin
,
452 string16::const_iterator a_end
,
454 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
457 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
458 #if !defined(OS_ANDROID)
459 bool LowerCaseEqualsASCII(const char* a_begin
,
462 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
465 bool LowerCaseEqualsASCII(const char16
* a_begin
,
468 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
471 #endif // !defined(OS_ANDROID)
473 bool EqualsASCII(const string16
& a
, const base::StringPiece
& b
) {
474 if (a
.length() != b
.length())
476 return std::equal(b
.begin(), b
.end(), a
.begin());
479 bool StartsWithASCII(const std::string
& str
,
480 const std::string
& search
,
481 bool case_sensitive
) {
483 return str
.compare(0, search
.length(), search
) == 0;
485 return base::strncasecmp(str
.c_str(), search
.c_str(), search
.length()) == 0;
488 template <typename STR
>
489 bool StartsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
490 if (case_sensitive
) {
491 return str
.compare(0, search
.length(), search
) == 0;
493 if (search
.size() > str
.size())
495 return std::equal(search
.begin(), search
.end(), str
.begin(),
496 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
500 bool StartsWith(const string16
& str
, const string16
& search
,
501 bool case_sensitive
) {
502 return StartsWithT(str
, search
, case_sensitive
);
505 template <typename STR
>
506 bool EndsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
507 size_t str_length
= str
.length();
508 size_t search_length
= search
.length();
509 if (search_length
> str_length
)
512 return str
.compare(str_length
- search_length
, search_length
, search
) == 0;
513 return std::equal(search
.begin(), search
.end(),
514 str
.begin() + (str_length
- search_length
),
515 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
518 bool EndsWith(const std::string
& str
, const std::string
& search
,
519 bool case_sensitive
) {
520 return EndsWithT(str
, search
, case_sensitive
);
523 bool EndsWith(const string16
& str
, const string16
& search
,
524 bool case_sensitive
) {
525 return EndsWithT(str
, search
, case_sensitive
);
528 static const char* const kByteStringsUnlocalized
[] = {
537 string16
FormatBytesUnlocalized(int64 bytes
) {
538 double unit_amount
= static_cast<double>(bytes
);
539 size_t dimension
= 0;
540 const int kKilo
= 1024;
541 while (unit_amount
>= kKilo
&&
542 dimension
< arraysize(kByteStringsUnlocalized
) - 1) {
543 unit_amount
/= kKilo
;
548 if (bytes
!= 0 && dimension
> 0 && unit_amount
< 100) {
549 base::snprintf(buf
, arraysize(buf
), "%.1lf%s", unit_amount
,
550 kByteStringsUnlocalized
[dimension
]);
552 base::snprintf(buf
, arraysize(buf
), "%.0lf%s", unit_amount
,
553 kByteStringsUnlocalized
[dimension
]);
556 return base::ASCIIToUTF16(buf
);
559 // Runs in O(n) time in the length of |str|.
560 template<class StringType
>
561 void DoReplaceSubstringsAfterOffset(StringType
* str
,
563 const StringType
& find_this
,
564 const StringType
& replace_with
,
566 DCHECK(!find_this
.empty());
568 // If the find string doesn't appear, there's nothing to do.
569 offset
= str
->find(find_this
, offset
);
570 if (offset
== StringType::npos
)
573 // If we're only replacing one instance, there's no need to do anything
575 size_t find_length
= find_this
.length();
577 str
->replace(offset
, find_length
, replace_with
);
581 // If the find and replace strings are the same length, we can simply use
582 // replace() on each instance, and finish the entire operation in O(n) time.
583 size_t replace_length
= replace_with
.length();
584 if (find_length
== replace_length
) {
586 str
->replace(offset
, find_length
, replace_with
);
587 offset
= str
->find(find_this
, offset
+ replace_length
);
588 } while (offset
!= StringType::npos
);
592 // Since the find and replace strings aren't the same length, a loop like the
593 // one above would be O(n^2) in the worst case, as replace() will shift the
594 // entire remaining string each time. We need to be more clever to keep
597 // If we're shortening the string, we can alternate replacements with shifting
598 // forward the intervening characters using memmove().
599 size_t str_length
= str
->length();
600 if (find_length
> replace_length
) {
601 size_t write_offset
= offset
;
603 if (replace_length
) {
604 str
->replace(write_offset
, replace_length
, replace_with
);
605 write_offset
+= replace_length
;
607 size_t read_offset
= offset
+ find_length
;
608 offset
= std::min(str
->find(find_this
, read_offset
), str_length
);
609 size_t length
= offset
- read_offset
;
611 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
612 length
* sizeof(typename
StringType::value_type
));
613 write_offset
+= length
;
615 } while (offset
< str_length
);
616 str
->resize(write_offset
);
620 // We're lengthening the string. We can use alternating replacements and
621 // memmove() calls like above, but we need to precalculate the final string
622 // length and then expand from back-to-front to avoid overwriting the string
623 // as we're reading it, needing to shift, or having to copy to a second string
625 size_t first_match
= offset
;
627 // First, calculate the final length and resize the string.
628 size_t final_length
= str_length
;
629 size_t expansion
= replace_length
- find_length
;
630 size_t current_match
;
632 final_length
+= expansion
;
633 // Minor optimization: save this offset into |current_match|, so that on
634 // exit from the loop, |current_match| will point at the last instance of
635 // the find string, and we won't need to find() it again immediately.
636 current_match
= offset
;
637 offset
= str
->find(find_this
, offset
+ find_length
);
638 } while (offset
!= StringType::npos
);
639 str
->resize(final_length
);
641 // Now do the replacement loop, working backwards through the string.
642 for (size_t prev_match
= str_length
, write_offset
= final_length
; ;
643 current_match
= str
->rfind(find_this
, current_match
- 1)) {
644 size_t read_offset
= current_match
+ find_length
;
645 size_t length
= prev_match
- read_offset
;
647 write_offset
-= length
;
648 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
649 length
* sizeof(typename
StringType::value_type
));
651 write_offset
-= replace_length
;
652 str
->replace(write_offset
, replace_length
, replace_with
);
653 if (current_match
== first_match
)
655 prev_match
= current_match
;
659 void ReplaceFirstSubstringAfterOffset(string16
* str
,
661 const string16
& find_this
,
662 const string16
& replace_with
) {
663 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
664 false); // replace first instance
667 void ReplaceFirstSubstringAfterOffset(std::string
* str
,
669 const std::string
& find_this
,
670 const std::string
& replace_with
) {
671 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
672 false); // replace first instance
675 void ReplaceSubstringsAfterOffset(string16
* str
,
677 const string16
& find_this
,
678 const string16
& replace_with
) {
679 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
680 true); // replace all instances
683 void ReplaceSubstringsAfterOffset(std::string
* str
,
685 const std::string
& find_this
,
686 const std::string
& replace_with
) {
687 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
688 true); // replace all instances
692 template<typename STR
>
693 static size_t TokenizeT(const STR
& str
,
694 const STR
& delimiters
,
695 std::vector
<STR
>* tokens
) {
698 size_t start
= str
.find_first_not_of(delimiters
);
699 while (start
!= STR::npos
) {
700 size_t end
= str
.find_first_of(delimiters
, start
+ 1);
701 if (end
== STR::npos
) {
702 tokens
->push_back(str
.substr(start
));
705 tokens
->push_back(str
.substr(start
, end
- start
));
706 start
= str
.find_first_not_of(delimiters
, end
+ 1);
710 return tokens
->size();
713 size_t Tokenize(const string16
& str
,
714 const string16
& delimiters
,
715 std::vector
<string16
>* tokens
) {
716 return TokenizeT(str
, delimiters
, tokens
);
719 size_t Tokenize(const std::string
& str
,
720 const std::string
& delimiters
,
721 std::vector
<std::string
>* tokens
) {
722 return TokenizeT(str
, delimiters
, tokens
);
725 size_t Tokenize(const base::StringPiece
& str
,
726 const base::StringPiece
& delimiters
,
727 std::vector
<base::StringPiece
>* tokens
) {
728 return TokenizeT(str
, delimiters
, tokens
);
731 template<typename STR
>
732 static STR
JoinStringT(const std::vector
<STR
>& parts
, const STR
& sep
) {
736 STR
result(parts
[0]);
737 typename
std::vector
<STR
>::const_iterator iter
= parts
.begin();
740 for (; iter
!= parts
.end(); ++iter
) {
748 std::string
JoinString(const std::vector
<std::string
>& parts
, char sep
) {
749 return JoinStringT(parts
, std::string(1, sep
));
752 string16
JoinString(const std::vector
<string16
>& parts
, char16 sep
) {
753 return JoinStringT(parts
, string16(1, sep
));
756 std::string
JoinString(const std::vector
<std::string
>& parts
,
757 const std::string
& separator
) {
758 return JoinStringT(parts
, separator
);
761 string16
JoinString(const std::vector
<string16
>& parts
,
762 const string16
& separator
) {
763 return JoinStringT(parts
, separator
);
766 template<class FormatStringType
, class OutStringType
>
767 OutStringType
DoReplaceStringPlaceholders(const FormatStringType
& format_string
,
768 const std::vector
<OutStringType
>& subst
, std::vector
<size_t>* offsets
) {
769 size_t substitutions
= subst
.size();
771 size_t sub_length
= 0;
772 for (typename
std::vector
<OutStringType
>::const_iterator iter
= subst
.begin();
773 iter
!= subst
.end(); ++iter
) {
774 sub_length
+= iter
->length();
777 OutStringType formatted
;
778 formatted
.reserve(format_string
.length() + sub_length
);
780 std::vector
<ReplacementOffset
> r_offsets
;
781 for (typename
FormatStringType::const_iterator i
= format_string
.begin();
782 i
!= format_string
.end(); ++i
) {
784 if (i
+ 1 != format_string
.end()) {
786 DCHECK('$' == *i
|| '1' <= *i
) << "Invalid placeholder: " << *i
;
788 while (i
!= format_string
.end() && '$' == *i
) {
789 formatted
.push_back('$');
795 while (i
!= format_string
.end() && '0' <= *i
&& *i
<= '9') {
803 ReplacementOffset
r_offset(index
,
804 static_cast<int>(formatted
.size()));
805 r_offsets
.insert(std::lower_bound(r_offsets
.begin(),
811 if (index
< substitutions
)
812 formatted
.append(subst
.at(index
));
816 formatted
.push_back(*i
);
820 for (std::vector
<ReplacementOffset
>::const_iterator i
= r_offsets
.begin();
821 i
!= r_offsets
.end(); ++i
) {
822 offsets
->push_back(i
->offset
);
828 string16
ReplaceStringPlaceholders(const string16
& format_string
,
829 const std::vector
<string16
>& subst
,
830 std::vector
<size_t>* offsets
) {
831 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
834 std::string
ReplaceStringPlaceholders(const base::StringPiece
& format_string
,
835 const std::vector
<std::string
>& subst
,
836 std::vector
<size_t>* offsets
) {
837 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
840 string16
ReplaceStringPlaceholders(const string16
& format_string
,
843 std::vector
<size_t> offsets
;
844 std::vector
<string16
> subst
;
846 string16 result
= ReplaceStringPlaceholders(format_string
, subst
, &offsets
);
848 DCHECK_EQ(1U, offsets
.size());
850 *offset
= offsets
[0];
854 static bool IsWildcard(base_icu::UChar32 character
) {
855 return character
== '*' || character
== '?';
858 // Move the strings pointers to the point where they start to differ.
859 template <typename CHAR
, typename NEXT
>
860 static void EatSameChars(const CHAR
** pattern
, const CHAR
* pattern_end
,
861 const CHAR
** string
, const CHAR
* string_end
,
863 const CHAR
* escape
= NULL
;
864 while (*pattern
!= pattern_end
&& *string
!= string_end
) {
865 if (!escape
&& IsWildcard(**pattern
)) {
866 // We don't want to match wildcard here, except if it's escaped.
870 // Check if the escapement char is found. If so, skip it and move to the
872 if (!escape
&& **pattern
== '\\') {
874 next(pattern
, pattern_end
);
878 // Check if the chars match, if so, increment the ptrs.
879 const CHAR
* pattern_next
= *pattern
;
880 const CHAR
* string_next
= *string
;
881 base_icu::UChar32 pattern_char
= next(&pattern_next
, pattern_end
);
882 if (pattern_char
== next(&string_next
, string_end
) &&
883 pattern_char
!= CBU_SENTINEL
) {
884 *pattern
= pattern_next
;
885 *string
= string_next
;
887 // Uh oh, it did not match, we are done. If the last char was an
888 // escapement, that means that it was an error to advance the ptr here,
889 // let's put it back where it was. This also mean that the MatchPattern
890 // function will return false because if we can't match an escape char
891 // here, then no one will.
902 template <typename CHAR
, typename NEXT
>
903 static void EatWildcard(const CHAR
** pattern
, const CHAR
* end
, NEXT next
) {
904 while (*pattern
!= end
) {
905 if (!IsWildcard(**pattern
))
911 template <typename CHAR
, typename NEXT
>
912 static bool MatchPatternT(const CHAR
* eval
, const CHAR
* eval_end
,
913 const CHAR
* pattern
, const CHAR
* pattern_end
,
916 const int kMaxDepth
= 16;
917 if (depth
> kMaxDepth
)
920 // Eat all the matching chars.
921 EatSameChars(&pattern
, pattern_end
, &eval
, eval_end
, next
);
923 // If the string is empty, then the pattern must be empty too, or contains
925 if (eval
== eval_end
) {
926 EatWildcard(&pattern
, pattern_end
, next
);
927 return pattern
== pattern_end
;
930 // Pattern is empty but not string, this is not a match.
931 if (pattern
== pattern_end
)
934 // If this is a question mark, then we need to compare the rest with
935 // the current string or the string with one character eaten.
936 const CHAR
* next_pattern
= pattern
;
937 next(&next_pattern
, pattern_end
);
938 if (pattern
[0] == '?') {
939 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
942 const CHAR
* next_eval
= eval
;
943 next(&next_eval
, eval_end
);
944 if (MatchPatternT(next_eval
, eval_end
, next_pattern
, pattern_end
,
949 // This is a *, try to match all the possible substrings with the remainder
951 if (pattern
[0] == '*') {
952 // Collapse duplicate wild cards (********** into *) so that the
953 // method does not recurse unnecessarily. http://crbug.com/52839
954 EatWildcard(&next_pattern
, pattern_end
, next
);
956 while (eval
!= eval_end
) {
957 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
963 // We reached the end of the string, let see if the pattern contains only
965 if (eval
== eval_end
) {
966 EatWildcard(&pattern
, pattern_end
, next
);
967 if (pattern
!= pattern_end
)
976 struct NextCharUTF8
{
977 base_icu::UChar32
operator()(const char** p
, const char* end
) {
980 CBU8_NEXT(*p
, offset
, end
- *p
, c
);
986 struct NextCharUTF16
{
987 base_icu::UChar32
operator()(const char16
** p
, const char16
* end
) {
990 CBU16_NEXT(*p
, offset
, end
- *p
, c
);
996 bool MatchPattern(const base::StringPiece
& eval
,
997 const base::StringPiece
& pattern
) {
998 return MatchPatternT(eval
.data(), eval
.data() + eval
.size(),
999 pattern
.data(), pattern
.data() + pattern
.size(),
1003 bool MatchPattern(const string16
& eval
, const string16
& pattern
) {
1004 return MatchPatternT(eval
.c_str(), eval
.c_str() + eval
.size(),
1005 pattern
.c_str(), pattern
.c_str() + pattern
.size(),
1006 0, NextCharUTF16());
1009 // The following code is compatible with the OpenBSD lcpy interface. See:
1010 // http://www.gratisoft.us/todd/papers/strlcpy.html
1011 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1015 template <typename CHAR
>
1016 size_t lcpyT(CHAR
* dst
, const CHAR
* src
, size_t dst_size
) {
1017 for (size_t i
= 0; i
< dst_size
; ++i
) {
1018 if ((dst
[i
] = src
[i
]) == 0) // We hit and copied the terminating NULL.
1022 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1024 dst
[dst_size
- 1] = 0;
1026 // Count the rest of the |src|, and return it's length in characters.
1027 while (src
[dst_size
]) ++dst_size
;
1033 size_t base::strlcpy(char* dst
, const char* src
, size_t dst_size
) {
1034 return lcpyT
<char>(dst
, src
, dst_size
);
1036 size_t base::wcslcpy(wchar_t* dst
, const wchar_t* src
, size_t dst_size
) {
1037 return lcpyT
<wchar_t>(dst
, src
, dst_size
);