1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_util.h"
7 #include "build/build_config.h"
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/third_party/dmg_fp/dmg_fp.h"
27 #include "base/utf_string_conversion_utils.h"
28 #include "base/utf_string_conversions.h"
29 #include "base/third_party/icu/icu_utf.h"
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This
34 // prevents other code that might accidentally use Singleton<string> from
35 // getting our internal one.
39 const std::wstring ws
;
42 static EmptyStrings
* GetInstance() {
43 return Singleton
<EmptyStrings
>::get();
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset
{
50 ReplacementOffset(uintptr_t parameter
, size_t offset
)
51 : parameter(parameter
),
54 // Index of the parameter.
57 // Starting position in the string.
61 static bool CompareParameter(const ReplacementOffset
& elem1
,
62 const ReplacementOffset
& elem2
) {
63 return elem1
.parameter
< elem2
.parameter
;
70 bool IsWprintfFormatPortable(const wchar_t* format
) {
71 for (const wchar_t* position
= format
; *position
!= '\0'; ++position
) {
72 if (*position
== '%') {
73 bool in_specification
= true;
74 bool modifier_l
= false;
75 while (in_specification
) {
76 // Eat up characters until reaching a known specifier.
77 if (*++position
== '\0') {
78 // The format string ended in the middle of a specification. Call
79 // it portable because no unportable specifications were found. The
80 // string is equally broken on all platforms.
84 if (*position
== 'l') {
85 // 'l' is the only thing that can save the 's' and 'c' specifiers.
87 } else if (((*position
== 's' || *position
== 'c') && !modifier_l
) ||
88 *position
== 'S' || *position
== 'C' || *position
== 'F' ||
89 *position
== 'D' || *position
== 'O' || *position
== 'U') {
94 if (wcschr(L
"diouxXeEfgGaAcspn%", *position
)) {
95 // Portable, keep scanning the rest of the format string.
96 in_specification
= false;
108 const std::string
& EmptyString() {
109 return EmptyStrings::GetInstance()->s
;
112 const std::wstring
& EmptyWString() {
113 return EmptyStrings::GetInstance()->ws
;
116 const string16
& EmptyString16() {
117 return EmptyStrings::GetInstance()->s16
;
120 #define WHITESPACE_UNICODE \
121 0x0009, /* <control-0009> to <control-000D> */ \
126 0x0020, /* Space */ \
127 0x0085, /* <control-0085> */ \
128 0x00A0, /* No-Break Space */ \
129 0x1680, /* Ogham Space Mark */ \
130 0x180E, /* Mongolian Vowel Separator */ \
131 0x2000, /* En Quad to Hair Space */ \
142 0x200C, /* Zero Width Non-Joiner */ \
143 0x2028, /* Line Separator */ \
144 0x2029, /* Paragraph Separator */ \
145 0x202F, /* Narrow No-Break Space */ \
146 0x205F, /* Medium Mathematical Space */ \
147 0x3000, /* Ideographic Space */ \
150 const wchar_t kWhitespaceWide
[] = {
153 const char16 kWhitespaceUTF16
[] = {
156 const char kWhitespaceASCII
[] = {
157 0x09, // <control-0009> to <control-000D>
166 const char kUtf8ByteOrderMark
[] = "\xEF\xBB\xBF";
168 template<typename STR
>
169 bool RemoveCharsT(const STR
& input
,
170 const typename
STR::value_type remove_chars
[],
172 bool removed
= false;
177 found
= output
->find_first_of(remove_chars
);
178 while (found
!= STR::npos
) {
180 output
->replace(found
, 1, STR());
181 found
= output
->find_first_of(remove_chars
, found
);
187 bool RemoveChars(const std::wstring
& input
,
188 const wchar_t remove_chars
[],
189 std::wstring
* output
) {
190 return RemoveCharsT(input
, remove_chars
, output
);
193 #if !defined(WCHAR_T_IS_UTF16)
194 bool RemoveChars(const string16
& input
,
195 const char16 remove_chars
[],
197 return RemoveCharsT(input
, remove_chars
, output
);
201 bool RemoveChars(const std::string
& input
,
202 const char remove_chars
[],
203 std::string
* output
) {
204 return RemoveCharsT(input
, remove_chars
, output
);
207 template<typename STR
>
208 TrimPositions
TrimStringT(const STR
& input
,
209 const typename
STR::value_type trim_chars
[],
210 TrimPositions positions
,
212 // Find the edges of leading/trailing whitespace as desired.
213 const typename
STR::size_type last_char
= input
.length() - 1;
214 const typename
STR::size_type first_good_char
= (positions
& TRIM_LEADING
) ?
215 input
.find_first_not_of(trim_chars
) : 0;
216 const typename
STR::size_type last_good_char
= (positions
& TRIM_TRAILING
) ?
217 input
.find_last_not_of(trim_chars
) : last_char
;
219 // When the string was all whitespace, report that we stripped off whitespace
220 // from whichever position the caller was interested in. For empty input, we
221 // stripped no whitespace, but we still need to clear |output|.
223 (first_good_char
== STR::npos
) || (last_good_char
== STR::npos
)) {
224 bool input_was_empty
= input
.empty(); // in case output == &input
226 return input_was_empty
? TRIM_NONE
: positions
;
229 // Trim the whitespace.
231 input
.substr(first_good_char
, last_good_char
- first_good_char
+ 1);
233 // Return where we trimmed from.
234 return static_cast<TrimPositions
>(
235 ((first_good_char
== 0) ? TRIM_NONE
: TRIM_LEADING
) |
236 ((last_good_char
== last_char
) ? TRIM_NONE
: TRIM_TRAILING
));
239 bool TrimString(const std::wstring
& input
,
240 const wchar_t trim_chars
[],
241 std::wstring
* output
) {
242 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
245 #if !defined(WCHAR_T_IS_UTF16)
246 bool TrimString(const string16
& input
,
247 const char16 trim_chars
[],
249 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
253 bool TrimString(const std::string
& input
,
254 const char trim_chars
[],
255 std::string
* output
) {
256 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
259 void TruncateUTF8ToByteSize(const std::string
& input
,
260 const size_t byte_size
,
261 std::string
* output
) {
263 if (byte_size
> input
.length()) {
267 DCHECK_LE(byte_size
, static_cast<uint32
>(kint32max
));
268 // Note: This cast is necessary because CBU8_NEXT uses int32s.
269 int32 truncation_length
= static_cast<int32
>(byte_size
);
270 int32 char_index
= truncation_length
- 1;
271 const char* data
= input
.data();
273 // Using CBU8, we will move backwards from the truncation point
274 // to the beginning of the string looking for a valid UTF8
275 // character. Once a full UTF8 character is found, we will
276 // truncate the string to the end of that character.
277 while (char_index
>= 0) {
278 int32 prev
= char_index
;
279 uint32 code_point
= 0;
280 CBU8_NEXT(data
, char_index
, truncation_length
, code_point
);
281 if (!base::IsValidCharacter(code_point
) ||
282 !base::IsValidCodepoint(code_point
)) {
283 char_index
= prev
- 1;
289 if (char_index
>= 0 )
290 *output
= input
.substr(0, char_index
);
295 TrimPositions
TrimWhitespace(const std::wstring
& input
,
296 TrimPositions positions
,
297 std::wstring
* output
) {
298 return TrimStringT(input
, kWhitespaceWide
, positions
, output
);
301 #if !defined(WCHAR_T_IS_UTF16)
302 TrimPositions
TrimWhitespace(const string16
& input
,
303 TrimPositions positions
,
305 return TrimStringT(input
, kWhitespaceUTF16
, positions
, output
);
309 TrimPositions
TrimWhitespaceASCII(const std::string
& input
,
310 TrimPositions positions
,
311 std::string
* output
) {
312 return TrimStringT(input
, kWhitespaceASCII
, positions
, output
);
315 // This function is only for backward-compatibility.
316 // To be removed when all callers are updated.
317 TrimPositions
TrimWhitespace(const std::string
& input
,
318 TrimPositions positions
,
319 std::string
* output
) {
320 return TrimWhitespaceASCII(input
, positions
, output
);
323 template<typename STR
>
324 STR
CollapseWhitespaceT(const STR
& text
,
325 bool trim_sequences_with_line_breaks
) {
327 result
.resize(text
.size());
329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
330 // will trim any leading whitespace.
331 bool in_whitespace
= true;
332 bool already_trimmed
= true;
334 int chars_written
= 0;
335 for (typename
STR::const_iterator
i(text
.begin()); i
!= text
.end(); ++i
) {
336 if (IsWhitespace(*i
)) {
337 if (!in_whitespace
) {
338 // Reduce all whitespace sequences to a single space.
339 in_whitespace
= true;
340 result
[chars_written
++] = L
' ';
342 if (trim_sequences_with_line_breaks
&& !already_trimmed
&&
343 ((*i
== '\n') || (*i
== '\r'))) {
344 // Whitespace sequences containing CR or LF are eliminated entirely.
345 already_trimmed
= true;
349 // Non-whitespace chracters are copied straight across.
350 in_whitespace
= false;
351 already_trimmed
= false;
352 result
[chars_written
++] = *i
;
356 if (in_whitespace
&& !already_trimmed
) {
357 // Any trailing whitespace is eliminated.
361 result
.resize(chars_written
);
365 std::wstring
CollapseWhitespace(const std::wstring
& text
,
366 bool trim_sequences_with_line_breaks
) {
367 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
370 #if !defined(WCHAR_T_IS_UTF16)
371 string16
CollapseWhitespace(const string16
& text
,
372 bool trim_sequences_with_line_breaks
) {
373 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
377 std::string
CollapseWhitespaceASCII(const std::string
& text
,
378 bool trim_sequences_with_line_breaks
) {
379 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
382 bool ContainsOnlyWhitespaceASCII(const std::string
& str
) {
383 for (std::string::const_iterator
i(str
.begin()); i
!= str
.end(); ++i
) {
384 if (!IsAsciiWhitespace(*i
))
390 bool ContainsOnlyWhitespace(const string16
& str
) {
391 for (string16::const_iterator
i(str
.begin()); i
!= str
.end(); ++i
) {
392 if (!IsWhitespace(*i
))
398 template<typename STR
>
399 static bool ContainsOnlyCharsT(const STR
& input
, const STR
& characters
) {
400 for (typename
STR::const_iterator iter
= input
.begin();
401 iter
!= input
.end(); ++iter
) {
402 if (characters
.find(*iter
) == STR::npos
)
408 bool ContainsOnlyChars(const std::wstring
& input
,
409 const std::wstring
& characters
) {
410 return ContainsOnlyCharsT(input
, characters
);
413 #if !defined(WCHAR_T_IS_UTF16)
414 bool ContainsOnlyChars(const string16
& input
, const string16
& characters
) {
415 return ContainsOnlyCharsT(input
, characters
);
419 bool ContainsOnlyChars(const std::string
& input
,
420 const std::string
& characters
) {
421 return ContainsOnlyCharsT(input
, characters
);
424 std::string
WideToASCII(const std::wstring
& wide
) {
425 DCHECK(IsStringASCII(wide
)) << wide
;
426 return std::string(wide
.begin(), wide
.end());
429 std::string
UTF16ToASCII(const string16
& utf16
) {
430 DCHECK(IsStringASCII(utf16
)) << utf16
;
431 return std::string(utf16
.begin(), utf16
.end());
434 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
435 bool WideToLatin1(const std::wstring
& wide
, std::string
* latin1
) {
437 output
.resize(wide
.size());
439 for (size_t i
= 0; i
< wide
.size(); i
++) {
442 output
[i
] = static_cast<char>(wide
[i
]);
444 latin1
->swap(output
);
449 static bool DoIsStringASCII(const STR
& str
) {
450 for (size_t i
= 0; i
< str
.length(); i
++) {
451 typename ToUnsigned
<typename
STR::value_type
>::Unsigned c
= str
[i
];
458 bool IsStringASCII(const std::wstring
& str
) {
459 return DoIsStringASCII(str
);
462 #if !defined(WCHAR_T_IS_UTF16)
463 bool IsStringASCII(const string16
& str
) {
464 return DoIsStringASCII(str
);
468 bool IsStringASCII(const base::StringPiece
& str
) {
469 return DoIsStringASCII(str
);
472 bool IsStringUTF8(const std::string
& str
) {
473 const char *src
= str
.data();
474 int32 src_len
= static_cast<int32
>(str
.length());
475 int32 char_index
= 0;
477 while (char_index
< src_len
) {
479 CBU8_NEXT(src
, char_index
, src_len
, code_point
);
480 if (!base::IsValidCharacter(code_point
))
486 template<typename Iter
>
487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin
,
490 for (Iter it
= a_begin
; it
!= a_end
; ++it
, ++b
) {
491 if (!*b
|| base::ToLowerASCII(*it
) != *b
)
497 // Front-ends for LowerCaseEqualsASCII.
498 bool LowerCaseEqualsASCII(const std::string
& a
, const char* b
) {
499 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
502 bool LowerCaseEqualsASCII(const std::wstring
& a
, const char* b
) {
503 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
506 #if !defined(WCHAR_T_IS_UTF16)
507 bool LowerCaseEqualsASCII(const string16
& a
, const char* b
) {
508 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin
,
513 std::string::const_iterator a_end
,
515 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin
,
519 std::wstring::const_iterator a_end
,
521 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
524 #if !defined(WCHAR_T_IS_UTF16)
525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin
,
526 string16::const_iterator a_end
,
528 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
532 bool LowerCaseEqualsASCII(const char* a_begin
,
535 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
538 bool LowerCaseEqualsASCII(const wchar_t* a_begin
,
539 const wchar_t* a_end
,
541 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
544 #if !defined(WCHAR_T_IS_UTF16)
545 bool LowerCaseEqualsASCII(const char16
* a_begin
,
548 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
552 bool EqualsASCII(const string16
& a
, const base::StringPiece
& b
) {
553 if (a
.length() != b
.length())
555 return std::equal(b
.begin(), b
.end(), a
.begin());
558 bool StartsWithASCII(const std::string
& str
,
559 const std::string
& search
,
560 bool case_sensitive
) {
562 return str
.compare(0, search
.length(), search
) == 0;
564 return base::strncasecmp(str
.c_str(), search
.c_str(), search
.length()) == 0;
567 template <typename STR
>
568 bool StartsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
569 if (case_sensitive
) {
570 return str
.compare(0, search
.length(), search
) == 0;
572 if (search
.size() > str
.size())
574 return std::equal(search
.begin(), search
.end(), str
.begin(),
575 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
579 bool StartsWith(const std::wstring
& str
, const std::wstring
& search
,
580 bool case_sensitive
) {
581 return StartsWithT(str
, search
, case_sensitive
);
584 #if !defined(WCHAR_T_IS_UTF16)
585 bool StartsWith(const string16
& str
, const string16
& search
,
586 bool case_sensitive
) {
587 return StartsWithT(str
, search
, case_sensitive
);
591 template <typename STR
>
592 bool EndsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
593 typename
STR::size_type str_length
= str
.length();
594 typename
STR::size_type search_length
= search
.length();
595 if (search_length
> str_length
)
597 if (case_sensitive
) {
598 return str
.compare(str_length
- search_length
, search_length
, search
) == 0;
600 return std::equal(search
.begin(), search
.end(),
601 str
.begin() + (str_length
- search_length
),
602 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
606 bool EndsWith(const std::string
& str
, const std::string
& search
,
607 bool case_sensitive
) {
608 return EndsWithT(str
, search
, case_sensitive
);
611 bool EndsWith(const std::wstring
& str
, const std::wstring
& search
,
612 bool case_sensitive
) {
613 return EndsWithT(str
, search
, case_sensitive
);
616 #if !defined(WCHAR_T_IS_UTF16)
617 bool EndsWith(const string16
& str
, const string16
& search
,
618 bool case_sensitive
) {
619 return EndsWithT(str
, search
, case_sensitive
);
623 DataUnits
GetByteDisplayUnits(int64 bytes
) {
624 // The byte thresholds at which we display amounts. A byte count is displayed
625 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
626 // This must match the DataUnits enum.
627 static const int64 kUnitThresholds
[] = {
628 0, // DATA_UNITS_BYTE,
629 3*1024, // DATA_UNITS_KIBIBYTE,
630 2*1024*1024, // DATA_UNITS_MEBIBYTE,
631 1024*1024*1024 // DATA_UNITS_GIBIBYTE,
635 NOTREACHED() << "Negative bytes value";
636 return DATA_UNITS_BYTE
;
639 int unit_index
= arraysize(kUnitThresholds
);
640 while (--unit_index
> 0) {
641 if (bytes
>= kUnitThresholds
[unit_index
])
645 DCHECK(unit_index
>= DATA_UNITS_BYTE
&& unit_index
<= DATA_UNITS_GIBIBYTE
);
646 return DataUnits(unit_index
);
649 // TODO(mpcomplete): deal with locale
650 // Byte suffixes. This must match the DataUnits enum.
651 static const char* const kByteStrings
[] = {
658 static const char* const kSpeedStrings
[] = {
665 string16
FormatBytesInternal(int64 bytes
,
668 const char* const* suffix
) {
670 NOTREACHED() << "Negative bytes value";
674 DCHECK(units
>= DATA_UNITS_BYTE
&& units
<= DATA_UNITS_GIBIBYTE
);
676 // Put the quantity in the right units.
677 double unit_amount
= static_cast<double>(bytes
);
678 for (int i
= 0; i
< units
; ++i
)
679 unit_amount
/= 1024.0;
682 if (bytes
!= 0 && units
!= DATA_UNITS_BYTE
&& unit_amount
< 100)
683 base::snprintf(buf
, arraysize(buf
), "%.1lf", unit_amount
);
685 base::snprintf(buf
, arraysize(buf
), "%.0lf", unit_amount
);
687 std::string
ret(buf
);
690 ret
+= suffix
[units
];
693 return ASCIIToUTF16(ret
);
696 string16
FormatBytes(int64 bytes
, DataUnits units
, bool show_units
) {
697 return FormatBytesInternal(bytes
, units
, show_units
, kByteStrings
);
700 string16
FormatSpeed(int64 bytes
, DataUnits units
, bool show_units
) {
701 return FormatBytesInternal(bytes
, units
, show_units
, kSpeedStrings
);
704 template<class StringType
>
705 void DoReplaceSubstringsAfterOffset(StringType
* str
,
706 typename
StringType::size_type start_offset
,
707 const StringType
& find_this
,
708 const StringType
& replace_with
,
710 if ((start_offset
== StringType::npos
) || (start_offset
>= str
->length()))
713 DCHECK(!find_this
.empty());
714 for (typename
StringType::size_type
offs(str
->find(find_this
, start_offset
));
715 offs
!= StringType::npos
; offs
= str
->find(find_this
, offs
)) {
716 str
->replace(offs
, find_this
.length(), replace_with
);
717 offs
+= replace_with
.length();
724 void ReplaceFirstSubstringAfterOffset(string16
* str
,
725 string16::size_type start_offset
,
726 const string16
& find_this
,
727 const string16
& replace_with
) {
728 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
729 false); // replace first instance
732 void ReplaceFirstSubstringAfterOffset(std::string
* str
,
733 std::string::size_type start_offset
,
734 const std::string
& find_this
,
735 const std::string
& replace_with
) {
736 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
737 false); // replace first instance
740 void ReplaceSubstringsAfterOffset(string16
* str
,
741 string16::size_type start_offset
,
742 const string16
& find_this
,
743 const string16
& replace_with
) {
744 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
745 true); // replace all instances
748 void ReplaceSubstringsAfterOffset(std::string
* str
,
749 std::string::size_type start_offset
,
750 const std::string
& find_this
,
751 const std::string
& replace_with
) {
752 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
753 true); // replace all instances
757 template<typename STR
>
758 static size_t TokenizeT(const STR
& str
,
759 const STR
& delimiters
,
760 std::vector
<STR
>* tokens
) {
763 typename
STR::size_type start
= str
.find_first_not_of(delimiters
);
764 while (start
!= STR::npos
) {
765 typename
STR::size_type end
= str
.find_first_of(delimiters
, start
+ 1);
766 if (end
== STR::npos
) {
767 tokens
->push_back(str
.substr(start
));
770 tokens
->push_back(str
.substr(start
, end
- start
));
771 start
= str
.find_first_not_of(delimiters
, end
+ 1);
775 return tokens
->size();
778 size_t Tokenize(const std::wstring
& str
,
779 const std::wstring
& delimiters
,
780 std::vector
<std::wstring
>* tokens
) {
781 return TokenizeT(str
, delimiters
, tokens
);
784 #if !defined(WCHAR_T_IS_UTF16)
785 size_t Tokenize(const string16
& str
,
786 const string16
& delimiters
,
787 std::vector
<string16
>* tokens
) {
788 return TokenizeT(str
, delimiters
, tokens
);
792 size_t Tokenize(const std::string
& str
,
793 const std::string
& delimiters
,
794 std::vector
<std::string
>* tokens
) {
795 return TokenizeT(str
, delimiters
, tokens
);
798 size_t Tokenize(const base::StringPiece
& str
,
799 const base::StringPiece
& delimiters
,
800 std::vector
<base::StringPiece
>* tokens
) {
801 return TokenizeT(str
, delimiters
, tokens
);
804 template<typename STR
>
805 static STR
JoinStringT(const std::vector
<STR
>& parts
,
806 typename
STR::value_type sep
) {
810 STR
result(parts
[0]);
811 typename
std::vector
<STR
>::const_iterator iter
= parts
.begin();
814 for (; iter
!= parts
.end(); ++iter
) {
822 std::string
JoinString(const std::vector
<std::string
>& parts
, char sep
) {
823 return JoinStringT(parts
, sep
);
826 string16
JoinString(const std::vector
<string16
>& parts
, char16 sep
) {
827 return JoinStringT(parts
, sep
);
830 template<class FormatStringType
, class OutStringType
>
831 OutStringType
DoReplaceStringPlaceholders(const FormatStringType
& format_string
,
832 const std::vector
<OutStringType
>& subst
, std::vector
<size_t>* offsets
) {
833 size_t substitutions
= subst
.size();
834 DCHECK(substitutions
< 10);
836 size_t sub_length
= 0;
837 for (typename
std::vector
<OutStringType
>::const_iterator iter
= subst
.begin();
838 iter
!= subst
.end(); ++iter
) {
839 sub_length
+= iter
->length();
842 OutStringType formatted
;
843 formatted
.reserve(format_string
.length() + sub_length
);
845 std::vector
<ReplacementOffset
> r_offsets
;
846 for (typename
FormatStringType::const_iterator i
= format_string
.begin();
847 i
!= format_string
.end(); ++i
) {
849 if (i
+ 1 != format_string
.end()) {
851 DCHECK('$' == *i
|| '1' <= *i
) << "Invalid placeholder: " << *i
;
853 while (i
!= format_string
.end() && '$' == *i
) {
854 formatted
.push_back('$');
859 uintptr_t index
= *i
- '1';
861 ReplacementOffset
r_offset(index
,
862 static_cast<int>(formatted
.size()));
863 r_offsets
.insert(std::lower_bound(r_offsets
.begin(),
869 if (index
< substitutions
)
870 formatted
.append(subst
.at(index
));
874 formatted
.push_back(*i
);
878 for (std::vector
<ReplacementOffset
>::const_iterator i
= r_offsets
.begin();
879 i
!= r_offsets
.end(); ++i
) {
880 offsets
->push_back(i
->offset
);
886 string16
ReplaceStringPlaceholders(const string16
& format_string
,
887 const std::vector
<string16
>& subst
,
888 std::vector
<size_t>* offsets
) {
889 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
892 std::string
ReplaceStringPlaceholders(const base::StringPiece
& format_string
,
893 const std::vector
<std::string
>& subst
,
894 std::vector
<size_t>* offsets
) {
895 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
898 string16
ReplaceStringPlaceholders(const string16
& format_string
,
901 std::vector
<size_t> offsets
;
902 std::vector
<string16
> subst
;
904 string16 result
= ReplaceStringPlaceholders(format_string
, subst
, &offsets
);
906 DCHECK(offsets
.size() == 1);
908 *offset
= offsets
[0];
913 static bool IsWildcard(base_icu::UChar32 character
) {
914 return character
== '*' || character
== '?';
917 // Move the strings pointers to the point where they start to differ.
918 template <typename CHAR
, typename NEXT
>
919 static void EatSameChars(const CHAR
** pattern
, const CHAR
* pattern_end
,
920 const CHAR
** string
, const CHAR
* string_end
,
922 const CHAR
* escape
= NULL
;
923 while (*pattern
!= pattern_end
&& *string
!= string_end
) {
924 if (!escape
&& IsWildcard(**pattern
)) {
925 // We don't want to match wildcard here, except if it's escaped.
929 // Check if the escapement char is found. If so, skip it and move to the
931 if (!escape
&& **pattern
== '\\') {
933 next(pattern
, pattern_end
);
937 // Check if the chars match, if so, increment the ptrs.
938 const CHAR
* pattern_next
= *pattern
;
939 const CHAR
* string_next
= *string
;
940 base_icu::UChar32 pattern_char
= next(&pattern_next
, pattern_end
);
941 if (pattern_char
== next(&string_next
, string_end
) &&
942 pattern_char
!= (base_icu::UChar32
) CBU_SENTINEL
) {
943 *pattern
= pattern_next
;
944 *string
= string_next
;
946 // Uh ho, it did not match, we are done. If the last char was an
947 // escapement, that means that it was an error to advance the ptr here,
948 // let's put it back where it was. This also mean that the MatchPattern
949 // function will return false because if we can't match an escape char
950 // here, then no one will.
961 template <typename CHAR
, typename NEXT
>
962 static void EatWildcard(const CHAR
** pattern
, const CHAR
* end
, NEXT next
) {
963 while (*pattern
!= end
) {
964 if (!IsWildcard(**pattern
))
970 template <typename CHAR
, typename NEXT
>
971 static bool MatchPatternT(const CHAR
* eval
, const CHAR
* eval_end
,
972 const CHAR
* pattern
, const CHAR
* pattern_end
,
975 const int kMaxDepth
= 16;
976 if (depth
> kMaxDepth
)
979 // Eat all the matching chars.
980 EatSameChars(&pattern
, pattern_end
, &eval
, eval_end
, next
);
982 // If the string is empty, then the pattern must be empty too, or contains
984 if (eval
== eval_end
) {
985 EatWildcard(&pattern
, pattern_end
, next
);
986 return pattern
== pattern_end
;
989 // Pattern is empty but not string, this is not a match.
990 if (pattern
== pattern_end
)
993 // If this is a question mark, then we need to compare the rest with
994 // the current string or the string with one character eaten.
995 const CHAR
* next_pattern
= pattern
;
996 next(&next_pattern
, pattern_end
);
997 if (pattern
[0] == '?') {
998 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
1001 const CHAR
* next_eval
= eval
;
1002 next(&next_eval
, eval_end
);
1003 if (MatchPatternT(next_eval
, eval_end
, next_pattern
, pattern_end
,
1008 // This is a *, try to match all the possible substrings with the remainder
1010 if (pattern
[0] == '*') {
1011 // Collapse duplicate wild cards (********** into *) so that the
1012 // method does not recurse unnecessarily. http://crbug.com/52839
1013 EatWildcard(&next_pattern
, pattern_end
, next
);
1015 while (eval
!= eval_end
) {
1016 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
1022 // We reached the end of the string, let see if the pattern contains only
1024 if (eval
== eval_end
) {
1025 EatWildcard(&pattern
, pattern_end
, next
);
1026 if (pattern
!= pattern_end
)
1035 struct NextCharUTF8
{
1036 base_icu::UChar32
operator()(const char** p
, const char* end
) {
1037 base_icu::UChar32 c
;
1039 CBU8_NEXT(*p
, offset
, end
- *p
, c
);
1045 struct NextCharUTF16
{
1046 base_icu::UChar32
operator()(const char16
** p
, const char16
* end
) {
1047 base_icu::UChar32 c
;
1049 CBU16_NEXT(*p
, offset
, end
- *p
, c
);
1055 bool MatchPattern(const base::StringPiece
& eval
,
1056 const base::StringPiece
& pattern
) {
1057 return MatchPatternT(eval
.data(), eval
.data() + eval
.size(),
1058 pattern
.data(), pattern
.data() + pattern
.size(),
1062 bool MatchPattern(const string16
& eval
, const string16
& pattern
) {
1063 return MatchPatternT(eval
.c_str(), eval
.c_str() + eval
.size(),
1064 pattern
.c_str(), pattern
.c_str() + pattern
.size(),
1065 0, NextCharUTF16());
1068 // The following code is compatible with the OpenBSD lcpy interface. See:
1069 // http://www.gratisoft.us/todd/papers/strlcpy.html
1070 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1074 template <typename CHAR
>
1075 size_t lcpyT(CHAR
* dst
, const CHAR
* src
, size_t dst_size
) {
1076 for (size_t i
= 0; i
< dst_size
; ++i
) {
1077 if ((dst
[i
] = src
[i
]) == 0) // We hit and copied the terminating NULL.
1081 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1083 dst
[dst_size
- 1] = 0;
1085 // Count the rest of the |src|, and return it's length in characters.
1086 while (src
[dst_size
]) ++dst_size
;
1092 size_t base::strlcpy(char* dst
, const char* src
, size_t dst_size
) {
1093 return lcpyT
<char>(dst
, src
, dst_size
);
1095 size_t base::wcslcpy(wchar_t* dst
, const wchar_t* src
, size_t dst_size
) {
1096 return lcpyT
<wchar_t>(dst
, src
, dst_size
);