1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/string_split.h"
25 #include "base/strings/utf_string_conversion_utils.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "base/third_party/icu/icu_utf.h"
28 #include "build/build_config.h"
34 // Force the singleton used by EmptyString[16] to be a unique type. This
35 // prevents other code that might accidentally use Singleton<string> from
36 // getting our internal one.
42 static EmptyStrings
* GetInstance() {
43 return Singleton
<EmptyStrings
>::get();
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset
{
50 ReplacementOffset(uintptr_t parameter
, size_t offset
)
51 : parameter(parameter
),
54 // Index of the parameter.
57 // Starting position in the string.
61 static bool CompareParameter(const ReplacementOffset
& elem1
,
62 const ReplacementOffset
& elem2
) {
63 return elem1
.parameter
< elem2
.parameter
;
66 // Assuming that a pointer is the size of a "machine word", then
67 // uintptr_t is an integer type that is also a machine word.
68 typedef uintptr_t MachineWord
;
69 const uintptr_t kMachineWordAlignmentMask
= sizeof(MachineWord
) - 1;
71 inline bool IsAlignedToMachineWord(const void* pointer
) {
72 return !(reinterpret_cast<MachineWord
>(pointer
) & kMachineWordAlignmentMask
);
75 template<typename T
> inline T
* AlignToMachineWord(T
* pointer
) {
76 return reinterpret_cast<T
*>(reinterpret_cast<MachineWord
>(pointer
) &
77 ~kMachineWordAlignmentMask
);
80 template<size_t size
, typename CharacterType
> struct NonASCIIMask
;
81 template<> struct NonASCIIMask
<4, char16
> {
82 static inline uint32_t value() { return 0xFF80FF80U
; }
84 template<> struct NonASCIIMask
<4, char> {
85 static inline uint32_t value() { return 0x80808080U
; }
87 template<> struct NonASCIIMask
<8, char16
> {
88 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL
; }
90 template<> struct NonASCIIMask
<8, char> {
91 static inline uint64_t value() { return 0x8080808080808080ULL
; }
93 #if defined(WCHAR_T_IS_UTF32)
94 template<> struct NonASCIIMask
<4, wchar_t> {
95 static inline uint32_t value() { return 0xFFFFFF80U
; }
97 template<> struct NonASCIIMask
<8, wchar_t> {
98 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL
; }
100 #endif // WCHAR_T_IS_UTF32
102 // DO NOT USE. http://crbug.com/24917
104 // tolower() will given incorrect results for non-ASCII characters. Use the
105 // ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here
106 // for backwards-compat for StartsWith until such calls can be updated.
107 struct CaseInsensitiveCompareDeprecated
{
109 bool operator()(char16 x
, char16 y
) const {
110 return tolower(x
) == tolower(y
);
116 bool IsWprintfFormatPortable(const wchar_t* format
) {
117 for (const wchar_t* position
= format
; *position
!= '\0'; ++position
) {
118 if (*position
== '%') {
119 bool in_specification
= true;
120 bool modifier_l
= false;
121 while (in_specification
) {
122 // Eat up characters until reaching a known specifier.
123 if (*++position
== '\0') {
124 // The format string ended in the middle of a specification. Call
125 // it portable because no unportable specifications were found. The
126 // string is equally broken on all platforms.
130 if (*position
== 'l') {
131 // 'l' is the only thing that can save the 's' and 'c' specifiers.
133 } else if (((*position
== 's' || *position
== 'c') && !modifier_l
) ||
134 *position
== 'S' || *position
== 'C' || *position
== 'F' ||
135 *position
== 'D' || *position
== 'O' || *position
== 'U') {
140 if (wcschr(L
"diouxXeEfgGaAcspn%", *position
)) {
141 // Portable, keep scanning the rest of the format string.
142 in_specification
= false;
153 template<typename StringType
>
154 StringType
ToLowerASCIIImpl(BasicStringPiece
<StringType
> str
) {
156 ret
.reserve(str
.size());
157 for (size_t i
= 0; i
< str
.size(); i
++)
158 ret
.push_back(ToLowerASCII(str
[i
]));
162 template<typename StringType
>
163 StringType
ToUpperASCIIImpl(BasicStringPiece
<StringType
> str
) {
165 ret
.reserve(str
.size());
166 for (size_t i
= 0; i
< str
.size(); i
++)
167 ret
.push_back(ToUpperASCII(str
[i
]));
173 std::string
ToLowerASCII(StringPiece str
) {
174 return ToLowerASCIIImpl
<std::string
>(str
);
177 string16
ToLowerASCII(StringPiece16 str
) {
178 return ToLowerASCIIImpl
<string16
>(str
);
181 std::string
ToUpperASCII(StringPiece str
) {
182 return ToUpperASCIIImpl
<std::string
>(str
);
185 string16
ToUpperASCII(StringPiece16 str
) {
186 return ToUpperASCIIImpl
<string16
>(str
);
189 template<class StringType
>
190 int CompareCaseInsensitiveASCIIT(BasicStringPiece
<StringType
> a
,
191 BasicStringPiece
<StringType
> b
) {
192 // Find the first characters that aren't equal and compare them. If the end
193 // of one of the strings is found before a nonequal character, the lengths
194 // of the strings are compared.
196 while (i
< a
.length() && i
< b
.length()) {
197 typename
StringType::value_type lower_a
= ToLowerASCII(a
[i
]);
198 typename
StringType::value_type lower_b
= ToLowerASCII(b
[i
]);
199 if (lower_a
< lower_b
)
201 if (lower_a
> lower_b
)
206 // End of one string hit before finding a different character. Expect the
207 // common case to be "strings equal" at this point so check that first.
208 if (a
.length() == b
.length())
211 if (a
.length() < b
.length())
216 int CompareCaseInsensitiveASCII(StringPiece a
, StringPiece b
) {
217 return CompareCaseInsensitiveASCIIT
<std::string
>(a
, b
);
220 int CompareCaseInsensitiveASCII(StringPiece16 a
, StringPiece16 b
) {
221 return CompareCaseInsensitiveASCIIT
<string16
>(a
, b
);
224 bool EqualsCaseInsensitiveASCII(StringPiece a
, StringPiece b
) {
225 if (a
.length() != b
.length())
227 return CompareCaseInsensitiveASCIIT
<std::string
>(a
, b
) == 0;
230 bool EqualsCaseInsensitiveASCII(StringPiece16 a
, StringPiece16 b
) {
231 if (a
.length() != b
.length())
233 return CompareCaseInsensitiveASCIIT
<string16
>(a
, b
) == 0;
236 const std::string
& EmptyString() {
237 return EmptyStrings::GetInstance()->s
;
240 const string16
& EmptyString16() {
241 return EmptyStrings::GetInstance()->s16
;
244 template<typename STR
>
245 bool ReplaceCharsT(const STR
& input
,
246 const STR
& replace_chars
,
247 const STR
& replace_with
,
249 bool removed
= false;
250 size_t replace_length
= replace_with
.length();
254 size_t found
= output
->find_first_of(replace_chars
);
255 while (found
!= STR::npos
) {
257 output
->replace(found
, 1, replace_with
);
258 found
= output
->find_first_of(replace_chars
, found
+ replace_length
);
264 bool ReplaceChars(const string16
& input
,
265 const StringPiece16
& replace_chars
,
266 const string16
& replace_with
,
268 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
271 bool ReplaceChars(const std::string
& input
,
272 const StringPiece
& replace_chars
,
273 const std::string
& replace_with
,
274 std::string
* output
) {
275 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
278 bool RemoveChars(const string16
& input
,
279 const StringPiece16
& remove_chars
,
281 return ReplaceChars(input
, remove_chars
.as_string(), string16(), output
);
284 bool RemoveChars(const std::string
& input
,
285 const StringPiece
& remove_chars
,
286 std::string
* output
) {
287 return ReplaceChars(input
, remove_chars
.as_string(), std::string(), output
);
290 template<typename Str
>
291 TrimPositions
TrimStringT(const Str
& input
,
292 BasicStringPiece
<Str
> trim_chars
,
293 TrimPositions positions
,
295 // Find the edges of leading/trailing whitespace as desired. Need to use
296 // a StringPiece version of input to be able to call find* on it with the
297 // StringPiece version of trim_chars (normally the trim_chars will be a
298 // constant so avoid making a copy).
299 BasicStringPiece
<Str
> input_piece(input
);
300 const size_t last_char
= input
.length() - 1;
301 const size_t first_good_char
= (positions
& TRIM_LEADING
) ?
302 input_piece
.find_first_not_of(trim_chars
) : 0;
303 const size_t last_good_char
= (positions
& TRIM_TRAILING
) ?
304 input_piece
.find_last_not_of(trim_chars
) : last_char
;
306 // When the string was all trimmed, report that we stripped off characters
307 // from whichever position the caller was interested in. For empty input, we
308 // stripped no characters, but we still need to clear |output|.
310 (first_good_char
== Str::npos
) || (last_good_char
== Str::npos
)) {
311 bool input_was_empty
= input
.empty(); // in case output == &input
313 return input_was_empty
? TRIM_NONE
: positions
;
318 input
.substr(first_good_char
, last_good_char
- first_good_char
+ 1);
320 // Return where we trimmed from.
321 return static_cast<TrimPositions
>(
322 ((first_good_char
== 0) ? TRIM_NONE
: TRIM_LEADING
) |
323 ((last_good_char
== last_char
) ? TRIM_NONE
: TRIM_TRAILING
));
326 bool TrimString(const string16
& input
,
327 StringPiece16 trim_chars
,
329 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
332 bool TrimString(const std::string
& input
,
333 StringPiece trim_chars
,
334 std::string
* output
) {
335 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
338 template<typename Str
>
339 BasicStringPiece
<Str
> TrimStringPieceT(BasicStringPiece
<Str
> input
,
340 BasicStringPiece
<Str
> trim_chars
,
341 TrimPositions positions
) {
342 size_t begin
= (positions
& TRIM_LEADING
) ?
343 input
.find_first_not_of(trim_chars
) : 0;
344 size_t end
= (positions
& TRIM_TRAILING
) ?
345 input
.find_last_not_of(trim_chars
) + 1 : input
.size();
346 return input
.substr(begin
, end
- begin
);
349 StringPiece16
TrimString(StringPiece16 input
,
350 const StringPiece16
& trim_chars
,
351 TrimPositions positions
) {
352 return TrimStringPieceT(input
, trim_chars
, positions
);
355 StringPiece
TrimString(StringPiece input
,
356 const StringPiece
& trim_chars
,
357 TrimPositions positions
) {
358 return TrimStringPieceT(input
, trim_chars
, positions
);
361 void TruncateUTF8ToByteSize(const std::string
& input
,
362 const size_t byte_size
,
363 std::string
* output
) {
365 if (byte_size
> input
.length()) {
369 DCHECK_LE(byte_size
, static_cast<uint32
>(kint32max
));
370 // Note: This cast is necessary because CBU8_NEXT uses int32s.
371 int32 truncation_length
= static_cast<int32
>(byte_size
);
372 int32 char_index
= truncation_length
- 1;
373 const char* data
= input
.data();
375 // Using CBU8, we will move backwards from the truncation point
376 // to the beginning of the string looking for a valid UTF8
377 // character. Once a full UTF8 character is found, we will
378 // truncate the string to the end of that character.
379 while (char_index
>= 0) {
380 int32 prev
= char_index
;
381 base_icu::UChar32 code_point
= 0;
382 CBU8_NEXT(data
, char_index
, truncation_length
, code_point
);
383 if (!IsValidCharacter(code_point
) ||
384 !IsValidCodepoint(code_point
)) {
385 char_index
= prev
- 1;
391 if (char_index
>= 0 )
392 *output
= input
.substr(0, char_index
);
397 TrimPositions
TrimWhitespace(const string16
& input
,
398 TrimPositions positions
,
400 return TrimStringT(input
, StringPiece16(kWhitespaceUTF16
), positions
, output
);
403 StringPiece16
TrimWhitespaceASCII(StringPiece16 input
,
404 TrimPositions positions
) {
405 return TrimStringPieceT(input
, StringPiece16(kWhitespaceUTF16
), positions
);
408 TrimPositions
TrimWhitespaceASCII(const std::string
& input
,
409 TrimPositions positions
,
410 std::string
* output
) {
411 return TrimStringT(input
, StringPiece(kWhitespaceASCII
), positions
, output
);
414 StringPiece
TrimWhitespaceASCII(StringPiece input
, TrimPositions positions
) {
415 return TrimStringPieceT(input
, StringPiece(kWhitespaceASCII
), positions
);
418 // This function is only for backward-compatibility.
419 // To be removed when all callers are updated.
420 TrimPositions
TrimWhitespace(const std::string
& input
,
421 TrimPositions positions
,
422 std::string
* output
) {
423 return TrimWhitespaceASCII(input
, positions
, output
);
426 template<typename STR
>
427 STR
CollapseWhitespaceT(const STR
& text
,
428 bool trim_sequences_with_line_breaks
) {
430 result
.resize(text
.size());
432 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
433 // will trim any leading whitespace.
434 bool in_whitespace
= true;
435 bool already_trimmed
= true;
437 int chars_written
= 0;
438 for (typename
STR::const_iterator
i(text
.begin()); i
!= text
.end(); ++i
) {
439 if (IsUnicodeWhitespace(*i
)) {
440 if (!in_whitespace
) {
441 // Reduce all whitespace sequences to a single space.
442 in_whitespace
= true;
443 result
[chars_written
++] = L
' ';
445 if (trim_sequences_with_line_breaks
&& !already_trimmed
&&
446 ((*i
== '\n') || (*i
== '\r'))) {
447 // Whitespace sequences containing CR or LF are eliminated entirely.
448 already_trimmed
= true;
452 // Non-whitespace chracters are copied straight across.
453 in_whitespace
= false;
454 already_trimmed
= false;
455 result
[chars_written
++] = *i
;
459 if (in_whitespace
&& !already_trimmed
) {
460 // Any trailing whitespace is eliminated.
464 result
.resize(chars_written
);
468 string16
CollapseWhitespace(const string16
& text
,
469 bool trim_sequences_with_line_breaks
) {
470 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
473 std::string
CollapseWhitespaceASCII(const std::string
& text
,
474 bool trim_sequences_with_line_breaks
) {
475 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
478 bool ContainsOnlyChars(const StringPiece
& input
,
479 const StringPiece
& characters
) {
480 return input
.find_first_not_of(characters
) == StringPiece::npos
;
483 bool ContainsOnlyChars(const StringPiece16
& input
,
484 const StringPiece16
& characters
) {
485 return input
.find_first_not_of(characters
) == StringPiece16::npos
;
488 template <class Char
>
489 inline bool DoIsStringASCII(const Char
* characters
, size_t length
) {
490 MachineWord all_char_bits
= 0;
491 const Char
* end
= characters
+ length
;
493 // Prologue: align the input.
494 while (!IsAlignedToMachineWord(characters
) && characters
!= end
) {
495 all_char_bits
|= *characters
;
499 // Compare the values of CPU word size.
500 const Char
* word_end
= AlignToMachineWord(end
);
501 const size_t loop_increment
= sizeof(MachineWord
) / sizeof(Char
);
502 while (characters
< word_end
) {
503 all_char_bits
|= *(reinterpret_cast<const MachineWord
*>(characters
));
504 characters
+= loop_increment
;
507 // Process the remaining bytes.
508 while (characters
!= end
) {
509 all_char_bits
|= *characters
;
513 MachineWord non_ascii_bit_mask
=
514 NonASCIIMask
<sizeof(MachineWord
), Char
>::value();
515 return !(all_char_bits
& non_ascii_bit_mask
);
518 bool IsStringASCII(const StringPiece
& str
) {
519 return DoIsStringASCII(str
.data(), str
.length());
522 bool IsStringASCII(const StringPiece16
& str
) {
523 return DoIsStringASCII(str
.data(), str
.length());
526 bool IsStringASCII(const string16
& str
) {
527 return DoIsStringASCII(str
.data(), str
.length());
530 #if defined(WCHAR_T_IS_UTF32)
531 bool IsStringASCII(const std::wstring
& str
) {
532 return DoIsStringASCII(str
.data(), str
.length());
536 bool IsStringUTF8(const StringPiece
& str
) {
537 const char *src
= str
.data();
538 int32 src_len
= static_cast<int32
>(str
.length());
539 int32 char_index
= 0;
541 while (char_index
< src_len
) {
543 CBU8_NEXT(src
, char_index
, src_len
, code_point
);
544 if (!IsValidCharacter(code_point
))
550 // Implementation note: Normally this function will be called with a hardcoded
551 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
552 // a C constant requires running strlen, so the result will be two passes
553 // through the buffers, one to file the length of lowercase_ascii, and one to
554 // compare each letter.
556 // This function could have taken a const char* to avoid this and only do one
557 // pass through the string. But the strlen is faster than the case-insensitive
558 // compares and lets us early-exit in the case that the strings are different
559 // lengths (will often be the case for non-matches). So whether one approach or
560 // the other will be faster depends on the case.
562 // The hardcoded strings are typically very short so it doesn't matter, and the
563 // string piece gives additional flexibility for the caller (doesn't have to be
564 // null terminated) so we choose the StringPiece route.
565 template<typename Str
>
566 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece
<Str
> str
,
567 StringPiece lowercase_ascii
) {
568 if (str
.size() != lowercase_ascii
.size())
570 for (size_t i
= 0; i
< str
.size(); i
++) {
571 if (ToLowerASCII(str
[i
]) != lowercase_ascii
[i
])
577 bool LowerCaseEqualsASCII(StringPiece str
, StringPiece lowercase_ascii
) {
578 return DoLowerCaseEqualsASCII
<std::string
>(str
, lowercase_ascii
);
581 bool LowerCaseEqualsASCII(StringPiece16 str
, StringPiece lowercase_ascii
) {
582 return DoLowerCaseEqualsASCII
<string16
>(str
, lowercase_ascii
);
585 bool EqualsASCII(StringPiece16 str
, StringPiece ascii
) {
586 if (str
.length() != ascii
.length())
588 return std::equal(ascii
.begin(), ascii
.end(), str
.begin());
591 template<typename Str
>
592 bool StartsWithT(BasicStringPiece
<Str
> str
,
593 BasicStringPiece
<Str
> search_for
,
594 CompareCase case_sensitivity
) {
595 if (search_for
.size() > str
.size())
598 BasicStringPiece
<Str
> source
= str
.substr(0, search_for
.size());
600 switch (case_sensitivity
) {
601 case CompareCase::SENSITIVE
:
602 return source
== search_for
;
604 case CompareCase::INSENSITIVE_ASCII
:
606 search_for
.begin(), search_for
.end(),
608 CaseInsensitiveCompareASCII
<typename
Str::value_type
>());
616 bool StartsWith(StringPiece str
,
617 StringPiece search_for
,
618 CompareCase case_sensitivity
) {
619 return StartsWithT
<std::string
>(str
, search_for
, case_sensitivity
);
622 bool StartsWith(StringPiece16 str
,
623 StringPiece16 search_for
,
624 CompareCase case_sensitivity
) {
625 return StartsWithT
<string16
>(str
, search_for
, case_sensitivity
);
628 bool StartsWith(const string16
& str
,
629 const string16
& search
,
630 bool case_sensitive
) {
631 if (!case_sensitive
) {
632 // This function was originally written using the current locale functions
633 // for case-insensitive comparisons. Emulate this behavior until callers
634 // can be converted either to use the case-insensitive ASCII one (most
635 // callers) or ICU functions in base_i18n.
636 if (search
.size() > str
.size())
638 return std::equal(search
.begin(), search
.end(), str
.begin(),
639 CaseInsensitiveCompareDeprecated());
641 return StartsWith(StringPiece16(str
), StringPiece16(search
),
642 CompareCase::SENSITIVE
);
645 template <typename Str
>
646 bool EndsWithT(BasicStringPiece
<Str
> str
,
647 BasicStringPiece
<Str
> search_for
,
648 CompareCase case_sensitivity
) {
649 if (search_for
.size() > str
.size())
652 BasicStringPiece
<Str
> source
= str
.substr(str
.size() - search_for
.size(),
655 switch (case_sensitivity
) {
656 case CompareCase::SENSITIVE
:
657 return source
== search_for
;
659 case CompareCase::INSENSITIVE_ASCII
:
661 source
.begin(), source
.end(),
663 CaseInsensitiveCompareASCII
<typename
Str::value_type
>());
671 bool EndsWith(StringPiece str
,
672 StringPiece search_for
,
673 CompareCase case_sensitivity
) {
674 return EndsWithT
<std::string
>(str
, search_for
, case_sensitivity
);
677 bool EndsWith(StringPiece16 str
,
678 StringPiece16 search_for
,
679 CompareCase case_sensitivity
) {
680 return EndsWithT
<string16
>(str
, search_for
, case_sensitivity
);
683 bool EndsWith(const string16
& str
,
684 const string16
& search
,
685 bool case_sensitive
) {
686 if (!case_sensitive
) {
687 // This function was originally written using the current locale functions
688 // for case-insensitive comparisons. Emulate this behavior until callers
689 // can be converted either to use the case-insensitive ASCII one (most
690 // callers) or ICU functions in base_i18n.
691 if (search
.size() > str
.size())
693 return std::equal(search
.begin(), search
.end(),
694 str
.begin() + (str
.size() - search
.size()),
695 CaseInsensitiveCompareDeprecated());
697 return EndsWith(StringPiece16(str
), StringPiece16(search
),
698 CompareCase::SENSITIVE
);
701 char HexDigitToInt(wchar_t c
) {
702 DCHECK(IsHexDigit(c
));
703 if (c
>= '0' && c
<= '9')
704 return static_cast<char>(c
- '0');
705 if (c
>= 'A' && c
<= 'F')
706 return static_cast<char>(c
- 'A' + 10);
707 if (c
>= 'a' && c
<= 'f')
708 return static_cast<char>(c
- 'a' + 10);
712 static const char* const kByteStringsUnlocalized
[] = {
721 string16
FormatBytesUnlocalized(int64 bytes
) {
722 double unit_amount
= static_cast<double>(bytes
);
723 size_t dimension
= 0;
724 const int kKilo
= 1024;
725 while (unit_amount
>= kKilo
&&
726 dimension
< arraysize(kByteStringsUnlocalized
) - 1) {
727 unit_amount
/= kKilo
;
732 if (bytes
!= 0 && dimension
> 0 && unit_amount
< 100) {
733 base::snprintf(buf
, arraysize(buf
), "%.1lf%s", unit_amount
,
734 kByteStringsUnlocalized
[dimension
]);
736 base::snprintf(buf
, arraysize(buf
), "%.0lf%s", unit_amount
,
737 kByteStringsUnlocalized
[dimension
]);
740 return ASCIIToUTF16(buf
);
743 // Runs in O(n) time in the length of |str|.
744 template<class StringType
>
745 void DoReplaceSubstringsAfterOffset(StringType
* str
,
747 BasicStringPiece
<StringType
> find_this
,
748 BasicStringPiece
<StringType
> replace_with
,
750 DCHECK(!find_this
.empty());
752 // If the find string doesn't appear, there's nothing to do.
753 offset
= str
->find(find_this
.data(), offset
, find_this
.size());
754 if (offset
== StringType::npos
)
757 // If we're only replacing one instance, there's no need to do anything
759 size_t find_length
= find_this
.length();
761 str
->replace(offset
, find_length
, replace_with
.data(), replace_with
.size());
765 // If the find and replace strings are the same length, we can simply use
766 // replace() on each instance, and finish the entire operation in O(n) time.
767 size_t replace_length
= replace_with
.length();
768 if (find_length
== replace_length
) {
770 str
->replace(offset
, find_length
,
771 replace_with
.data(), replace_with
.size());
772 offset
= str
->find(find_this
.data(), offset
+ replace_length
,
774 } while (offset
!= StringType::npos
);
778 // Since the find and replace strings aren't the same length, a loop like the
779 // one above would be O(n^2) in the worst case, as replace() will shift the
780 // entire remaining string each time. We need to be more clever to keep
783 // If we're shortening the string, we can alternate replacements with shifting
784 // forward the intervening characters using memmove().
785 size_t str_length
= str
->length();
786 if (find_length
> replace_length
) {
787 size_t write_offset
= offset
;
789 if (replace_length
) {
790 str
->replace(write_offset
, replace_length
,
791 replace_with
.data(), replace_with
.size());
792 write_offset
+= replace_length
;
794 size_t read_offset
= offset
+ find_length
;
796 str
->find(find_this
.data(), read_offset
, find_this
.size()),
798 size_t length
= offset
- read_offset
;
800 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
801 length
* sizeof(typename
StringType::value_type
));
802 write_offset
+= length
;
804 } while (offset
< str_length
);
805 str
->resize(write_offset
);
809 // We're lengthening the string. We can use alternating replacements and
810 // memmove() calls like above, but we need to precalculate the final string
811 // length and then expand from back-to-front to avoid overwriting the string
812 // as we're reading it, needing to shift, or having to copy to a second string
814 size_t first_match
= offset
;
816 // First, calculate the final length and resize the string.
817 size_t final_length
= str_length
;
818 size_t expansion
= replace_length
- find_length
;
819 size_t current_match
;
821 final_length
+= expansion
;
822 // Minor optimization: save this offset into |current_match|, so that on
823 // exit from the loop, |current_match| will point at the last instance of
824 // the find string, and we won't need to find() it again immediately.
825 current_match
= offset
;
826 offset
= str
->find(find_this
.data(), offset
+ find_length
,
828 } while (offset
!= StringType::npos
);
829 str
->resize(final_length
);
831 // Now do the replacement loop, working backwards through the string.
832 for (size_t prev_match
= str_length
, write_offset
= final_length
; ;
833 current_match
= str
->rfind(find_this
.data(), current_match
- 1,
835 size_t read_offset
= current_match
+ find_length
;
836 size_t length
= prev_match
- read_offset
;
838 write_offset
-= length
;
839 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
840 length
* sizeof(typename
StringType::value_type
));
842 write_offset
-= replace_length
;
843 str
->replace(write_offset
, replace_length
,
844 replace_with
.data(), replace_with
.size());
845 if (current_match
== first_match
)
847 prev_match
= current_match
;
851 void ReplaceFirstSubstringAfterOffset(string16
* str
,
853 StringPiece16 find_this
,
854 StringPiece16 replace_with
) {
855 DoReplaceSubstringsAfterOffset
<string16
>(
856 str
, start_offset
, find_this
, replace_with
, false); // Replace first.
859 void ReplaceFirstSubstringAfterOffset(std::string
* str
,
861 StringPiece find_this
,
862 StringPiece replace_with
) {
863 DoReplaceSubstringsAfterOffset
<std::string
>(
864 str
, start_offset
, find_this
, replace_with
, false); // Replace first.
867 void ReplaceSubstringsAfterOffset(string16
* str
,
869 StringPiece16 find_this
,
870 StringPiece16 replace_with
) {
871 DoReplaceSubstringsAfterOffset
<string16
>(
872 str
, start_offset
, find_this
, replace_with
, true); // Replace all.
875 void ReplaceSubstringsAfterOffset(std::string
* str
,
877 StringPiece find_this
,
878 StringPiece replace_with
) {
879 DoReplaceSubstringsAfterOffset
<std::string
>(
880 str
, start_offset
, find_this
, replace_with
, true); // Replace all.
883 template <class string_type
>
884 inline typename
string_type::value_type
* WriteIntoT(string_type
* str
,
885 size_t length_with_null
) {
886 DCHECK_GT(length_with_null
, 1u);
887 str
->reserve(length_with_null
);
888 str
->resize(length_with_null
- 1);
892 char* WriteInto(std::string
* str
, size_t length_with_null
) {
893 return WriteIntoT(str
, length_with_null
);
896 char16
* WriteInto(string16
* str
, size_t length_with_null
) {
897 return WriteIntoT(str
, length_with_null
);
900 template<typename STR
>
901 static STR
JoinStringT(const std::vector
<STR
>& parts
,
902 BasicStringPiece
<STR
> sep
) {
906 STR
result(parts
[0]);
907 auto iter
= parts
.begin();
910 for (; iter
!= parts
.end(); ++iter
) {
911 sep
.AppendToString(&result
);
918 std::string
JoinString(const std::vector
<std::string
>& parts
,
919 StringPiece separator
) {
920 return JoinStringT(parts
, separator
);
923 string16
JoinString(const std::vector
<string16
>& parts
,
924 StringPiece16 separator
) {
925 return JoinStringT(parts
, separator
);
928 template<class FormatStringType
, class OutStringType
>
929 OutStringType
DoReplaceStringPlaceholders(
930 const FormatStringType
& format_string
,
931 const std::vector
<OutStringType
>& subst
,
932 std::vector
<size_t>* offsets
) {
933 size_t substitutions
= subst
.size();
935 size_t sub_length
= 0;
936 for (const auto& cur
: subst
)
937 sub_length
+= cur
.length();
939 OutStringType formatted
;
940 formatted
.reserve(format_string
.length() + sub_length
);
942 std::vector
<ReplacementOffset
> r_offsets
;
943 for (auto i
= format_string
.begin(); i
!= format_string
.end(); ++i
) {
945 if (i
+ 1 != format_string
.end()) {
947 DCHECK('$' == *i
|| '1' <= *i
) << "Invalid placeholder: " << *i
;
949 while (i
!= format_string
.end() && '$' == *i
) {
950 formatted
.push_back('$');
956 while (i
!= format_string
.end() && '0' <= *i
&& *i
<= '9') {
964 ReplacementOffset
r_offset(index
,
965 static_cast<int>(formatted
.size()));
966 r_offsets
.insert(std::lower_bound(r_offsets
.begin(),
972 if (index
< substitutions
)
973 formatted
.append(subst
.at(index
));
977 formatted
.push_back(*i
);
981 for (const auto& cur
: r_offsets
)
982 offsets
->push_back(cur
.offset
);
987 string16
ReplaceStringPlaceholders(const string16
& format_string
,
988 const std::vector
<string16
>& subst
,
989 std::vector
<size_t>* offsets
) {
990 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
993 std::string
ReplaceStringPlaceholders(const StringPiece
& format_string
,
994 const std::vector
<std::string
>& subst
,
995 std::vector
<size_t>* offsets
) {
996 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
999 string16
ReplaceStringPlaceholders(const string16
& format_string
,
1002 std::vector
<size_t> offsets
;
1003 std::vector
<string16
> subst
;
1005 string16 result
= ReplaceStringPlaceholders(format_string
, subst
, &offsets
);
1007 DCHECK_EQ(1U, offsets
.size());
1009 *offset
= offsets
[0];
1013 // The following code is compatible with the OpenBSD lcpy interface. See:
1014 // http://www.gratisoft.us/todd/papers/strlcpy.html
1015 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1019 template <typename CHAR
>
1020 size_t lcpyT(CHAR
* dst
, const CHAR
* src
, size_t dst_size
) {
1021 for (size_t i
= 0; i
< dst_size
; ++i
) {
1022 if ((dst
[i
] = src
[i
]) == 0) // We hit and copied the terminating NULL.
1026 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1028 dst
[dst_size
- 1] = 0;
1030 // Count the rest of the |src|, and return it's length in characters.
1031 while (src
[dst_size
]) ++dst_size
;
1037 size_t strlcpy(char* dst
, const char* src
, size_t dst_size
) {
1038 return lcpyT
<char>(dst
, src
, dst_size
);
1040 size_t wcslcpy(wchar_t* dst
, const wchar_t* src
, size_t dst_size
) {
1041 return lcpyT
<wchar_t>(dst
, src
, dst_size
);