[cros] Don't succeed if user has default profile picturewq
[chromium-blink-merge.git] / base / string_util.cc
blob4757a85a0e6df0c79a70682005473ac9b6206463
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_util.h"
7 #include "build/build_config.h"
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
20 #include <algorithm>
21 #include <vector>
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/third_party/dmg_fp/dmg_fp.h"
27 #include "base/utf_string_conversion_utils.h"
28 #include "base/utf_string_conversions.h"
29 #include "base/third_party/icu/icu_utf.h"
31 namespace {
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This
34 // prevents other code that might accidentally use Singleton<string> from
35 // getting our internal one.
36 struct EmptyStrings {
37 EmptyStrings() {}
38 const std::string s;
39 const std::wstring ws;
40 const string16 s16;
42 static EmptyStrings* GetInstance() {
43 return Singleton<EmptyStrings>::get();
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset {
50 ReplacementOffset(uintptr_t parameter, size_t offset)
51 : parameter(parameter),
52 offset(offset) {}
54 // Index of the parameter.
55 uintptr_t parameter;
57 // Starting position in the string.
58 size_t offset;
61 static bool CompareParameter(const ReplacementOffset& elem1,
62 const ReplacementOffset& elem2) {
63 return elem1.parameter < elem2.parameter;
66 } // namespace
68 namespace base {
70 bool IsWprintfFormatPortable(const wchar_t* format) {
71 for (const wchar_t* position = format; *position != '\0'; ++position) {
72 if (*position == '%') {
73 bool in_specification = true;
74 bool modifier_l = false;
75 while (in_specification) {
76 // Eat up characters until reaching a known specifier.
77 if (*++position == '\0') {
78 // The format string ended in the middle of a specification. Call
79 // it portable because no unportable specifications were found. The
80 // string is equally broken on all platforms.
81 return true;
84 if (*position == 'l') {
85 // 'l' is the only thing that can save the 's' and 'c' specifiers.
86 modifier_l = true;
87 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
88 *position == 'S' || *position == 'C' || *position == 'F' ||
89 *position == 'D' || *position == 'O' || *position == 'U') {
90 // Not portable.
91 return false;
94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
95 // Portable, keep scanning the rest of the format string.
96 in_specification = false;
102 return true;
105 } // namespace base
108 const std::string& EmptyString() {
109 return EmptyStrings::GetInstance()->s;
112 const std::wstring& EmptyWString() {
113 return EmptyStrings::GetInstance()->ws;
116 const string16& EmptyString16() {
117 return EmptyStrings::GetInstance()->s16;
120 #define WHITESPACE_UNICODE \
121 0x0009, /* <control-0009> to <control-000D> */ \
122 0x000A, \
123 0x000B, \
124 0x000C, \
125 0x000D, \
126 0x0020, /* Space */ \
127 0x0085, /* <control-0085> */ \
128 0x00A0, /* No-Break Space */ \
129 0x1680, /* Ogham Space Mark */ \
130 0x180E, /* Mongolian Vowel Separator */ \
131 0x2000, /* En Quad to Hair Space */ \
132 0x2001, \
133 0x2002, \
134 0x2003, \
135 0x2004, \
136 0x2005, \
137 0x2006, \
138 0x2007, \
139 0x2008, \
140 0x2009, \
141 0x200A, \
142 0x200C, /* Zero Width Non-Joiner */ \
143 0x2028, /* Line Separator */ \
144 0x2029, /* Paragraph Separator */ \
145 0x202F, /* Narrow No-Break Space */ \
146 0x205F, /* Medium Mathematical Space */ \
147 0x3000, /* Ideographic Space */ \
150 const wchar_t kWhitespaceWide[] = {
151 WHITESPACE_UNICODE
153 const char16 kWhitespaceUTF16[] = {
154 WHITESPACE_UNICODE
156 const char kWhitespaceASCII[] = {
157 0x09, // <control-0009> to <control-000D>
158 0x0A,
159 0x0B,
160 0x0C,
161 0x0D,
162 0x20, // Space
166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
168 template<typename STR>
169 bool RemoveCharsT(const STR& input,
170 const typename STR::value_type remove_chars[],
171 STR* output) {
172 bool removed = false;
173 size_t found;
175 *output = input;
177 found = output->find_first_of(remove_chars);
178 while (found != STR::npos) {
179 removed = true;
180 output->replace(found, 1, STR());
181 found = output->find_first_of(remove_chars, found);
184 return removed;
187 bool RemoveChars(const string16& input,
188 const char16 remove_chars[],
189 string16* output) {
190 return RemoveCharsT(input, remove_chars, output);
193 bool RemoveChars(const std::string& input,
194 const char remove_chars[],
195 std::string* output) {
196 return RemoveCharsT(input, remove_chars, output);
199 template<typename STR>
200 TrimPositions TrimStringT(const STR& input,
201 const typename STR::value_type trim_chars[],
202 TrimPositions positions,
203 STR* output) {
204 // Find the edges of leading/trailing whitespace as desired.
205 const typename STR::size_type last_char = input.length() - 1;
206 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
207 input.find_first_not_of(trim_chars) : 0;
208 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
209 input.find_last_not_of(trim_chars) : last_char;
211 // When the string was all whitespace, report that we stripped off whitespace
212 // from whichever position the caller was interested in. For empty input, we
213 // stripped no whitespace, but we still need to clear |output|.
214 if (input.empty() ||
215 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
216 bool input_was_empty = input.empty(); // in case output == &input
217 output->clear();
218 return input_was_empty ? TRIM_NONE : positions;
221 // Trim the whitespace.
222 *output =
223 input.substr(first_good_char, last_good_char - first_good_char + 1);
225 // Return where we trimmed from.
226 return static_cast<TrimPositions>(
227 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
228 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
231 bool TrimString(const std::wstring& input,
232 const wchar_t trim_chars[],
233 std::wstring* output) {
234 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
237 #if !defined(WCHAR_T_IS_UTF16)
238 bool TrimString(const string16& input,
239 const char16 trim_chars[],
240 string16* output) {
241 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
243 #endif
245 bool TrimString(const std::string& input,
246 const char trim_chars[],
247 std::string* output) {
248 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
251 void TruncateUTF8ToByteSize(const std::string& input,
252 const size_t byte_size,
253 std::string* output) {
254 DCHECK(output);
255 if (byte_size > input.length()) {
256 *output = input;
257 return;
259 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
260 // Note: This cast is necessary because CBU8_NEXT uses int32s.
261 int32 truncation_length = static_cast<int32>(byte_size);
262 int32 char_index = truncation_length - 1;
263 const char* data = input.data();
265 // Using CBU8, we will move backwards from the truncation point
266 // to the beginning of the string looking for a valid UTF8
267 // character. Once a full UTF8 character is found, we will
268 // truncate the string to the end of that character.
269 while (char_index >= 0) {
270 int32 prev = char_index;
271 uint32 code_point = 0;
272 CBU8_NEXT(data, char_index, truncation_length, code_point);
273 if (!base::IsValidCharacter(code_point) ||
274 !base::IsValidCodepoint(code_point)) {
275 char_index = prev - 1;
276 } else {
277 break;
281 if (char_index >= 0 )
282 *output = input.substr(0, char_index);
283 else
284 output->clear();
287 TrimPositions TrimWhitespace(const string16& input,
288 TrimPositions positions,
289 string16* output) {
290 return TrimStringT(input, kWhitespaceUTF16, positions, output);
293 TrimPositions TrimWhitespaceASCII(const std::string& input,
294 TrimPositions positions,
295 std::string* output) {
296 return TrimStringT(input, kWhitespaceASCII, positions, output);
299 // This function is only for backward-compatibility.
300 // To be removed when all callers are updated.
301 TrimPositions TrimWhitespace(const std::string& input,
302 TrimPositions positions,
303 std::string* output) {
304 return TrimWhitespaceASCII(input, positions, output);
307 template<typename STR>
308 STR CollapseWhitespaceT(const STR& text,
309 bool trim_sequences_with_line_breaks) {
310 STR result;
311 result.resize(text.size());
313 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
314 // will trim any leading whitespace.
315 bool in_whitespace = true;
316 bool already_trimmed = true;
318 int chars_written = 0;
319 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
320 if (IsWhitespace(*i)) {
321 if (!in_whitespace) {
322 // Reduce all whitespace sequences to a single space.
323 in_whitespace = true;
324 result[chars_written++] = L' ';
326 if (trim_sequences_with_line_breaks && !already_trimmed &&
327 ((*i == '\n') || (*i == '\r'))) {
328 // Whitespace sequences containing CR or LF are eliminated entirely.
329 already_trimmed = true;
330 --chars_written;
332 } else {
333 // Non-whitespace chracters are copied straight across.
334 in_whitespace = false;
335 already_trimmed = false;
336 result[chars_written++] = *i;
340 if (in_whitespace && !already_trimmed) {
341 // Any trailing whitespace is eliminated.
342 --chars_written;
345 result.resize(chars_written);
346 return result;
349 std::wstring CollapseWhitespace(const std::wstring& text,
350 bool trim_sequences_with_line_breaks) {
351 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
354 #if !defined(WCHAR_T_IS_UTF16)
355 string16 CollapseWhitespace(const string16& text,
356 bool trim_sequences_with_line_breaks) {
357 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
359 #endif
361 std::string CollapseWhitespaceASCII(const std::string& text,
362 bool trim_sequences_with_line_breaks) {
363 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
366 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
367 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
368 if (!IsAsciiWhitespace(*i))
369 return false;
371 return true;
374 bool ContainsOnlyWhitespace(const string16& str) {
375 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
376 if (!IsWhitespace(*i))
377 return false;
379 return true;
382 template<typename STR>
383 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
384 for (typename STR::const_iterator iter = input.begin();
385 iter != input.end(); ++iter) {
386 if (characters.find(*iter) == STR::npos)
387 return false;
389 return true;
392 bool ContainsOnlyChars(const std::wstring& input,
393 const std::wstring& characters) {
394 return ContainsOnlyCharsT(input, characters);
397 #if !defined(WCHAR_T_IS_UTF16)
398 bool ContainsOnlyChars(const string16& input, const string16& characters) {
399 return ContainsOnlyCharsT(input, characters);
401 #endif
403 bool ContainsOnlyChars(const std::string& input,
404 const std::string& characters) {
405 return ContainsOnlyCharsT(input, characters);
408 std::string WideToASCII(const std::wstring& wide) {
409 DCHECK(IsStringASCII(wide)) << wide;
410 return std::string(wide.begin(), wide.end());
413 std::string UTF16ToASCII(const string16& utf16) {
414 DCHECK(IsStringASCII(utf16)) << utf16;
415 return std::string(utf16.begin(), utf16.end());
418 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
419 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
420 std::string output;
421 output.resize(wide.size());
422 latin1->clear();
423 for (size_t i = 0; i < wide.size(); i++) {
424 if (wide[i] > 255)
425 return false;
426 output[i] = static_cast<char>(wide[i]);
428 latin1->swap(output);
429 return true;
432 template<class STR>
433 static bool DoIsStringASCII(const STR& str) {
434 for (size_t i = 0; i < str.length(); i++) {
435 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
436 if (c > 0x7F)
437 return false;
439 return true;
442 bool IsStringASCII(const std::wstring& str) {
443 return DoIsStringASCII(str);
446 #if !defined(WCHAR_T_IS_UTF16)
447 bool IsStringASCII(const string16& str) {
448 return DoIsStringASCII(str);
450 #endif
452 bool IsStringASCII(const base::StringPiece& str) {
453 return DoIsStringASCII(str);
456 bool IsStringUTF8(const std::string& str) {
457 const char *src = str.data();
458 int32 src_len = static_cast<int32>(str.length());
459 int32 char_index = 0;
461 while (char_index < src_len) {
462 int32 code_point;
463 CBU8_NEXT(src, char_index, src_len, code_point);
464 if (!base::IsValidCharacter(code_point))
465 return false;
467 return true;
470 template<typename Iter>
471 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
472 Iter a_end,
473 const char* b) {
474 for (Iter it = a_begin; it != a_end; ++it, ++b) {
475 if (!*b || base::ToLowerASCII(*it) != *b)
476 return false;
478 return *b == 0;
481 // Front-ends for LowerCaseEqualsASCII.
482 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
483 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
486 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
487 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
490 #if !defined(WCHAR_T_IS_UTF16)
491 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
492 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
494 #endif
496 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
497 std::string::const_iterator a_end,
498 const char* b) {
499 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
502 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
503 std::wstring::const_iterator a_end,
504 const char* b) {
505 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
508 #if !defined(WCHAR_T_IS_UTF16)
509 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
510 string16::const_iterator a_end,
511 const char* b) {
512 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
514 #endif
516 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
517 #if !defined(OS_ANDROID)
518 bool LowerCaseEqualsASCII(const char* a_begin,
519 const char* a_end,
520 const char* b) {
521 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
524 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
525 const wchar_t* a_end,
526 const char* b) {
527 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
530 #if !defined(WCHAR_T_IS_UTF16)
531 bool LowerCaseEqualsASCII(const char16* a_begin,
532 const char16* a_end,
533 const char* b) {
534 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
536 #endif
538 #endif // !defined(OS_ANDROID)
540 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
541 if (a.length() != b.length())
542 return false;
543 return std::equal(b.begin(), b.end(), a.begin());
546 bool StartsWithASCII(const std::string& str,
547 const std::string& search,
548 bool case_sensitive) {
549 if (case_sensitive)
550 return str.compare(0, search.length(), search) == 0;
551 else
552 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
555 template <typename STR>
556 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
557 if (case_sensitive) {
558 return str.compare(0, search.length(), search) == 0;
559 } else {
560 if (search.size() > str.size())
561 return false;
562 return std::equal(search.begin(), search.end(), str.begin(),
563 base::CaseInsensitiveCompare<typename STR::value_type>());
567 bool StartsWith(const std::wstring& str, const std::wstring& search,
568 bool case_sensitive) {
569 return StartsWithT(str, search, case_sensitive);
572 #if !defined(WCHAR_T_IS_UTF16)
573 bool StartsWith(const string16& str, const string16& search,
574 bool case_sensitive) {
575 return StartsWithT(str, search, case_sensitive);
577 #endif
579 template <typename STR>
580 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
581 typename STR::size_type str_length = str.length();
582 typename STR::size_type search_length = search.length();
583 if (search_length > str_length)
584 return false;
585 if (case_sensitive) {
586 return str.compare(str_length - search_length, search_length, search) == 0;
587 } else {
588 return std::equal(search.begin(), search.end(),
589 str.begin() + (str_length - search_length),
590 base::CaseInsensitiveCompare<typename STR::value_type>());
594 bool EndsWith(const std::string& str, const std::string& search,
595 bool case_sensitive) {
596 return EndsWithT(str, search, case_sensitive);
599 bool EndsWith(const std::wstring& str, const std::wstring& search,
600 bool case_sensitive) {
601 return EndsWithT(str, search, case_sensitive);
604 #if !defined(WCHAR_T_IS_UTF16)
605 bool EndsWith(const string16& str, const string16& search,
606 bool case_sensitive) {
607 return EndsWithT(str, search, case_sensitive);
609 #endif
611 static const char* const kByteStringsUnlocalized[] = {
612 " B",
613 " kB",
614 " MB",
615 " GB",
616 " TB",
617 " PB"
620 string16 FormatBytesUnlocalized(int64 bytes) {
621 double unit_amount = static_cast<double>(bytes);
622 size_t dimension = 0;
623 const int kKilo = 1024;
624 while (unit_amount >= kKilo &&
625 dimension < arraysize(kByteStringsUnlocalized) - 1) {
626 unit_amount /= kKilo;
627 dimension++;
630 char buf[64];
631 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
632 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
633 kByteStringsUnlocalized[dimension]);
634 } else {
635 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
636 kByteStringsUnlocalized[dimension]);
639 return ASCIIToUTF16(buf);
642 template<class StringType>
643 void DoReplaceSubstringsAfterOffset(StringType* str,
644 typename StringType::size_type start_offset,
645 const StringType& find_this,
646 const StringType& replace_with,
647 bool replace_all) {
648 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
649 return;
651 DCHECK(!find_this.empty());
652 for (typename StringType::size_type offs(str->find(find_this, start_offset));
653 offs != StringType::npos; offs = str->find(find_this, offs)) {
654 str->replace(offs, find_this.length(), replace_with);
655 offs += replace_with.length();
657 if (!replace_all)
658 break;
662 void ReplaceFirstSubstringAfterOffset(string16* str,
663 string16::size_type start_offset,
664 const string16& find_this,
665 const string16& replace_with) {
666 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
667 false); // replace first instance
670 void ReplaceFirstSubstringAfterOffset(std::string* str,
671 std::string::size_type start_offset,
672 const std::string& find_this,
673 const std::string& replace_with) {
674 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
675 false); // replace first instance
678 void ReplaceSubstringsAfterOffset(string16* str,
679 string16::size_type start_offset,
680 const string16& find_this,
681 const string16& replace_with) {
682 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
683 true); // replace all instances
686 void ReplaceSubstringsAfterOffset(std::string* str,
687 std::string::size_type start_offset,
688 const std::string& find_this,
689 const std::string& replace_with) {
690 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
691 true); // replace all instances
695 template<typename STR>
696 static size_t TokenizeT(const STR& str,
697 const STR& delimiters,
698 std::vector<STR>* tokens) {
699 tokens->clear();
701 typename STR::size_type start = str.find_first_not_of(delimiters);
702 while (start != STR::npos) {
703 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
704 if (end == STR::npos) {
705 tokens->push_back(str.substr(start));
706 break;
707 } else {
708 tokens->push_back(str.substr(start, end - start));
709 start = str.find_first_not_of(delimiters, end + 1);
713 return tokens->size();
716 size_t Tokenize(const std::wstring& str,
717 const std::wstring& delimiters,
718 std::vector<std::wstring>* tokens) {
719 return TokenizeT(str, delimiters, tokens);
722 #if !defined(WCHAR_T_IS_UTF16)
723 size_t Tokenize(const string16& str,
724 const string16& delimiters,
725 std::vector<string16>* tokens) {
726 return TokenizeT(str, delimiters, tokens);
728 #endif
730 size_t Tokenize(const std::string& str,
731 const std::string& delimiters,
732 std::vector<std::string>* tokens) {
733 return TokenizeT(str, delimiters, tokens);
736 size_t Tokenize(const base::StringPiece& str,
737 const base::StringPiece& delimiters,
738 std::vector<base::StringPiece>* tokens) {
739 return TokenizeT(str, delimiters, tokens);
742 template<typename STR>
743 static STR JoinStringT(const std::vector<STR>& parts,
744 typename STR::value_type sep) {
745 if (parts.empty())
746 return STR();
748 STR result(parts[0]);
749 typename std::vector<STR>::const_iterator iter = parts.begin();
750 ++iter;
752 for (; iter != parts.end(); ++iter) {
753 result += sep;
754 result += *iter;
757 return result;
760 std::string JoinString(const std::vector<std::string>& parts, char sep) {
761 return JoinStringT(parts, sep);
764 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
765 return JoinStringT(parts, sep);
768 template<class FormatStringType, class OutStringType>
769 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
770 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
771 size_t substitutions = subst.size();
773 size_t sub_length = 0;
774 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
775 iter != subst.end(); ++iter) {
776 sub_length += iter->length();
779 OutStringType formatted;
780 formatted.reserve(format_string.length() + sub_length);
782 std::vector<ReplacementOffset> r_offsets;
783 for (typename FormatStringType::const_iterator i = format_string.begin();
784 i != format_string.end(); ++i) {
785 if ('$' == *i) {
786 if (i + 1 != format_string.end()) {
787 ++i;
788 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
789 if ('$' == *i) {
790 while (i != format_string.end() && '$' == *i) {
791 formatted.push_back('$');
792 ++i;
794 --i;
795 } else {
796 uintptr_t index = 0;
797 while (i != format_string.end() && '0' <= *i && *i <= '9') {
798 index *= 10;
799 index += *i - '0';
800 ++i;
802 --i;
803 index -= 1;
804 if (offsets) {
805 ReplacementOffset r_offset(index,
806 static_cast<int>(formatted.size()));
807 r_offsets.insert(std::lower_bound(r_offsets.begin(),
808 r_offsets.end(),
809 r_offset,
810 &CompareParameter),
811 r_offset);
813 if (index < substitutions)
814 formatted.append(subst.at(index));
817 } else {
818 formatted.push_back(*i);
821 if (offsets) {
822 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
823 i != r_offsets.end(); ++i) {
824 offsets->push_back(i->offset);
827 return formatted;
830 string16 ReplaceStringPlaceholders(const string16& format_string,
831 const std::vector<string16>& subst,
832 std::vector<size_t>* offsets) {
833 return DoReplaceStringPlaceholders(format_string, subst, offsets);
836 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
837 const std::vector<std::string>& subst,
838 std::vector<size_t>* offsets) {
839 return DoReplaceStringPlaceholders(format_string, subst, offsets);
842 string16 ReplaceStringPlaceholders(const string16& format_string,
843 const string16& a,
844 size_t* offset) {
845 std::vector<size_t> offsets;
846 std::vector<string16> subst;
847 subst.push_back(a);
848 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
850 DCHECK(offsets.size() == 1);
851 if (offset) {
852 *offset = offsets[0];
854 return result;
857 static bool IsWildcard(base_icu::UChar32 character) {
858 return character == '*' || character == '?';
861 // Move the strings pointers to the point where they start to differ.
862 template <typename CHAR, typename NEXT>
863 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
864 const CHAR** string, const CHAR* string_end,
865 NEXT next) {
866 const CHAR* escape = NULL;
867 while (*pattern != pattern_end && *string != string_end) {
868 if (!escape && IsWildcard(**pattern)) {
869 // We don't want to match wildcard here, except if it's escaped.
870 return;
873 // Check if the escapement char is found. If so, skip it and move to the
874 // next character.
875 if (!escape && **pattern == '\\') {
876 escape = *pattern;
877 next(pattern, pattern_end);
878 continue;
881 // Check if the chars match, if so, increment the ptrs.
882 const CHAR* pattern_next = *pattern;
883 const CHAR* string_next = *string;
884 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
885 if (pattern_char == next(&string_next, string_end) &&
886 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
887 *pattern = pattern_next;
888 *string = string_next;
889 } else {
890 // Uh ho, it did not match, we are done. If the last char was an
891 // escapement, that means that it was an error to advance the ptr here,
892 // let's put it back where it was. This also mean that the MatchPattern
893 // function will return false because if we can't match an escape char
894 // here, then no one will.
895 if (escape) {
896 *pattern = escape;
898 return;
901 escape = NULL;
905 template <typename CHAR, typename NEXT>
906 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
907 while (*pattern != end) {
908 if (!IsWildcard(**pattern))
909 return;
910 next(pattern, end);
914 template <typename CHAR, typename NEXT>
915 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
916 const CHAR* pattern, const CHAR* pattern_end,
917 int depth,
918 NEXT next) {
919 const int kMaxDepth = 16;
920 if (depth > kMaxDepth)
921 return false;
923 // Eat all the matching chars.
924 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
926 // If the string is empty, then the pattern must be empty too, or contains
927 // only wildcards.
928 if (eval == eval_end) {
929 EatWildcard(&pattern, pattern_end, next);
930 return pattern == pattern_end;
933 // Pattern is empty but not string, this is not a match.
934 if (pattern == pattern_end)
935 return false;
937 // If this is a question mark, then we need to compare the rest with
938 // the current string or the string with one character eaten.
939 const CHAR* next_pattern = pattern;
940 next(&next_pattern, pattern_end);
941 if (pattern[0] == '?') {
942 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
943 depth + 1, next))
944 return true;
945 const CHAR* next_eval = eval;
946 next(&next_eval, eval_end);
947 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
948 depth + 1, next))
949 return true;
952 // This is a *, try to match all the possible substrings with the remainder
953 // of the pattern.
954 if (pattern[0] == '*') {
955 // Collapse duplicate wild cards (********** into *) so that the
956 // method does not recurse unnecessarily. http://crbug.com/52839
957 EatWildcard(&next_pattern, pattern_end, next);
959 while (eval != eval_end) {
960 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
961 depth + 1, next))
962 return true;
963 eval++;
966 // We reached the end of the string, let see if the pattern contains only
967 // wildcards.
968 if (eval == eval_end) {
969 EatWildcard(&pattern, pattern_end, next);
970 if (pattern != pattern_end)
971 return false;
972 return true;
976 return false;
979 struct NextCharUTF8 {
980 base_icu::UChar32 operator()(const char** p, const char* end) {
981 base_icu::UChar32 c;
982 int offset = 0;
983 CBU8_NEXT(*p, offset, end - *p, c);
984 *p += offset;
985 return c;
989 struct NextCharUTF16 {
990 base_icu::UChar32 operator()(const char16** p, const char16* end) {
991 base_icu::UChar32 c;
992 int offset = 0;
993 CBU16_NEXT(*p, offset, end - *p, c);
994 *p += offset;
995 return c;
999 bool MatchPattern(const base::StringPiece& eval,
1000 const base::StringPiece& pattern) {
1001 return MatchPatternT(eval.data(), eval.data() + eval.size(),
1002 pattern.data(), pattern.data() + pattern.size(),
1003 0, NextCharUTF8());
1006 bool MatchPattern(const string16& eval, const string16& pattern) {
1007 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1008 pattern.c_str(), pattern.c_str() + pattern.size(),
1009 0, NextCharUTF16());
1012 // The following code is compatible with the OpenBSD lcpy interface. See:
1013 // http://www.gratisoft.us/todd/papers/strlcpy.html
1014 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1016 namespace {
1018 template <typename CHAR>
1019 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1020 for (size_t i = 0; i < dst_size; ++i) {
1021 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1022 return i;
1025 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1026 if (dst_size != 0)
1027 dst[dst_size - 1] = 0;
1029 // Count the rest of the |src|, and return it's length in characters.
1030 while (src[dst_size]) ++dst_size;
1031 return dst_size;
1034 } // namespace
1036 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1037 return lcpyT<char>(dst, src, dst_size);
1039 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1040 return lcpyT<wchar_t>(dst, src, dst_size);