Encode method and error message inside LevelDB::Status message
[chromium-blink-merge.git] / base / string_util.cc
blob91a1c4caf1d1bde19faee4539dadba7177591841
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_util.h"
7 #include "build/build_config.h"
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
20 #include <algorithm>
21 #include <vector>
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/strings/utf_string_conversion_utils.h"
27 #include "base/utf_string_conversions.h"
28 #include "base/third_party/icu/icu_utf.h"
30 namespace {
32 // Force the singleton used by Empty[W]String[16] to be a unique type. This
33 // prevents other code that might accidentally use Singleton<string> from
34 // getting our internal one.
35 struct EmptyStrings {
36 EmptyStrings() {}
37 const std::string s;
38 const std::wstring ws;
39 const string16 s16;
41 static EmptyStrings* GetInstance() {
42 return Singleton<EmptyStrings>::get();
46 // Used by ReplaceStringPlaceholders to track the position in the string of
47 // replaced parameters.
48 struct ReplacementOffset {
49 ReplacementOffset(uintptr_t parameter, size_t offset)
50 : parameter(parameter),
51 offset(offset) {}
53 // Index of the parameter.
54 uintptr_t parameter;
56 // Starting position in the string.
57 size_t offset;
60 static bool CompareParameter(const ReplacementOffset& elem1,
61 const ReplacementOffset& elem2) {
62 return elem1.parameter < elem2.parameter;
65 } // namespace
67 namespace base {
69 bool IsWprintfFormatPortable(const wchar_t* format) {
70 for (const wchar_t* position = format; *position != '\0'; ++position) {
71 if (*position == '%') {
72 bool in_specification = true;
73 bool modifier_l = false;
74 while (in_specification) {
75 // Eat up characters until reaching a known specifier.
76 if (*++position == '\0') {
77 // The format string ended in the middle of a specification. Call
78 // it portable because no unportable specifications were found. The
79 // string is equally broken on all platforms.
80 return true;
83 if (*position == 'l') {
84 // 'l' is the only thing that can save the 's' and 'c' specifiers.
85 modifier_l = true;
86 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
87 *position == 'S' || *position == 'C' || *position == 'F' ||
88 *position == 'D' || *position == 'O' || *position == 'U') {
89 // Not portable.
90 return false;
93 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
94 // Portable, keep scanning the rest of the format string.
95 in_specification = false;
101 return true;
104 } // namespace base
107 const std::string& EmptyString() {
108 return EmptyStrings::GetInstance()->s;
111 const std::wstring& EmptyWString() {
112 return EmptyStrings::GetInstance()->ws;
115 const string16& EmptyString16() {
116 return EmptyStrings::GetInstance()->s16;
119 #define WHITESPACE_UNICODE \
120 0x0009, /* <control-0009> to <control-000D> */ \
121 0x000A, \
122 0x000B, \
123 0x000C, \
124 0x000D, \
125 0x0020, /* Space */ \
126 0x0085, /* <control-0085> */ \
127 0x00A0, /* No-Break Space */ \
128 0x1680, /* Ogham Space Mark */ \
129 0x180E, /* Mongolian Vowel Separator */ \
130 0x2000, /* En Quad to Hair Space */ \
131 0x2001, \
132 0x2002, \
133 0x2003, \
134 0x2004, \
135 0x2005, \
136 0x2006, \
137 0x2007, \
138 0x2008, \
139 0x2009, \
140 0x200A, \
141 0x200C, /* Zero Width Non-Joiner */ \
142 0x2028, /* Line Separator */ \
143 0x2029, /* Paragraph Separator */ \
144 0x202F, /* Narrow No-Break Space */ \
145 0x205F, /* Medium Mathematical Space */ \
146 0x3000, /* Ideographic Space */ \
149 const wchar_t kWhitespaceWide[] = {
150 WHITESPACE_UNICODE
152 const char16 kWhitespaceUTF16[] = {
153 WHITESPACE_UNICODE
155 const char kWhitespaceASCII[] = {
156 0x09, // <control-0009> to <control-000D>
157 0x0A,
158 0x0B,
159 0x0C,
160 0x0D,
161 0x20, // Space
165 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
167 template<typename STR>
168 bool ReplaceCharsT(const STR& input,
169 const typename STR::value_type replace_chars[],
170 const STR& replace_with,
171 STR* output) {
172 bool removed = false;
173 size_t replace_length = replace_with.length();
175 *output = input;
177 size_t found = output->find_first_of(replace_chars);
178 while (found != STR::npos) {
179 removed = true;
180 output->replace(found, 1, replace_with);
181 found = output->find_first_of(replace_chars, found + replace_length);
184 return removed;
187 bool ReplaceChars(const string16& input,
188 const char16 replace_chars[],
189 const string16& replace_with,
190 string16* output) {
191 return ReplaceCharsT(input, replace_chars, replace_with, output);
194 bool ReplaceChars(const std::string& input,
195 const char replace_chars[],
196 const std::string& replace_with,
197 std::string* output) {
198 return ReplaceCharsT(input, replace_chars, replace_with, output);
201 bool RemoveChars(const string16& input,
202 const char16 remove_chars[],
203 string16* output) {
204 return ReplaceChars(input, remove_chars, string16(), output);
207 bool RemoveChars(const std::string& input,
208 const char remove_chars[],
209 std::string* output) {
210 return ReplaceChars(input, remove_chars, std::string(), output);
213 template<typename STR>
214 TrimPositions TrimStringT(const STR& input,
215 const typename STR::value_type trim_chars[],
216 TrimPositions positions,
217 STR* output) {
218 // Find the edges of leading/trailing whitespace as desired.
219 const typename STR::size_type last_char = input.length() - 1;
220 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
221 input.find_first_not_of(trim_chars) : 0;
222 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
223 input.find_last_not_of(trim_chars) : last_char;
225 // When the string was all whitespace, report that we stripped off whitespace
226 // from whichever position the caller was interested in. For empty input, we
227 // stripped no whitespace, but we still need to clear |output|.
228 if (input.empty() ||
229 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
230 bool input_was_empty = input.empty(); // in case output == &input
231 output->clear();
232 return input_was_empty ? TRIM_NONE : positions;
235 // Trim the whitespace.
236 *output =
237 input.substr(first_good_char, last_good_char - first_good_char + 1);
239 // Return where we trimmed from.
240 return static_cast<TrimPositions>(
241 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
242 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
245 bool TrimString(const std::wstring& input,
246 const wchar_t trim_chars[],
247 std::wstring* output) {
248 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
251 #if !defined(WCHAR_T_IS_UTF16)
252 bool TrimString(const string16& input,
253 const char16 trim_chars[],
254 string16* output) {
255 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
257 #endif
259 bool TrimString(const std::string& input,
260 const char trim_chars[],
261 std::string* output) {
262 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
265 void TruncateUTF8ToByteSize(const std::string& input,
266 const size_t byte_size,
267 std::string* output) {
268 DCHECK(output);
269 if (byte_size > input.length()) {
270 *output = input;
271 return;
273 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
274 // Note: This cast is necessary because CBU8_NEXT uses int32s.
275 int32 truncation_length = static_cast<int32>(byte_size);
276 int32 char_index = truncation_length - 1;
277 const char* data = input.data();
279 // Using CBU8, we will move backwards from the truncation point
280 // to the beginning of the string looking for a valid UTF8
281 // character. Once a full UTF8 character is found, we will
282 // truncate the string to the end of that character.
283 while (char_index >= 0) {
284 int32 prev = char_index;
285 uint32 code_point = 0;
286 CBU8_NEXT(data, char_index, truncation_length, code_point);
287 if (!base::IsValidCharacter(code_point) ||
288 !base::IsValidCodepoint(code_point)) {
289 char_index = prev - 1;
290 } else {
291 break;
295 if (char_index >= 0 )
296 *output = input.substr(0, char_index);
297 else
298 output->clear();
301 TrimPositions TrimWhitespace(const string16& input,
302 TrimPositions positions,
303 string16* output) {
304 return TrimStringT(input, kWhitespaceUTF16, positions, output);
307 TrimPositions TrimWhitespaceASCII(const std::string& input,
308 TrimPositions positions,
309 std::string* output) {
310 return TrimStringT(input, kWhitespaceASCII, positions, output);
313 // This function is only for backward-compatibility.
314 // To be removed when all callers are updated.
315 TrimPositions TrimWhitespace(const std::string& input,
316 TrimPositions positions,
317 std::string* output) {
318 return TrimWhitespaceASCII(input, positions, output);
321 template<typename STR>
322 STR CollapseWhitespaceT(const STR& text,
323 bool trim_sequences_with_line_breaks) {
324 STR result;
325 result.resize(text.size());
327 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
328 // will trim any leading whitespace.
329 bool in_whitespace = true;
330 bool already_trimmed = true;
332 int chars_written = 0;
333 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
334 if (IsWhitespace(*i)) {
335 if (!in_whitespace) {
336 // Reduce all whitespace sequences to a single space.
337 in_whitespace = true;
338 result[chars_written++] = L' ';
340 if (trim_sequences_with_line_breaks && !already_trimmed &&
341 ((*i == '\n') || (*i == '\r'))) {
342 // Whitespace sequences containing CR or LF are eliminated entirely.
343 already_trimmed = true;
344 --chars_written;
346 } else {
347 // Non-whitespace chracters are copied straight across.
348 in_whitespace = false;
349 already_trimmed = false;
350 result[chars_written++] = *i;
354 if (in_whitespace && !already_trimmed) {
355 // Any trailing whitespace is eliminated.
356 --chars_written;
359 result.resize(chars_written);
360 return result;
363 std::wstring CollapseWhitespace(const std::wstring& text,
364 bool trim_sequences_with_line_breaks) {
365 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
368 #if !defined(WCHAR_T_IS_UTF16)
369 string16 CollapseWhitespace(const string16& text,
370 bool trim_sequences_with_line_breaks) {
371 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
373 #endif
375 std::string CollapseWhitespaceASCII(const std::string& text,
376 bool trim_sequences_with_line_breaks) {
377 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
380 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
381 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
382 if (!IsAsciiWhitespace(*i))
383 return false;
385 return true;
388 bool ContainsOnlyWhitespace(const string16& str) {
389 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
392 template<typename STR>
393 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
394 for (typename STR::const_iterator iter = input.begin();
395 iter != input.end(); ++iter) {
396 if (characters.find(*iter) == STR::npos)
397 return false;
399 return true;
402 bool ContainsOnlyChars(const std::wstring& input,
403 const std::wstring& characters) {
404 return ContainsOnlyCharsT(input, characters);
407 #if !defined(WCHAR_T_IS_UTF16)
408 bool ContainsOnlyChars(const string16& input, const string16& characters) {
409 return ContainsOnlyCharsT(input, characters);
411 #endif
413 bool ContainsOnlyChars(const std::string& input,
414 const std::string& characters) {
415 return ContainsOnlyCharsT(input, characters);
418 std::string WideToASCII(const std::wstring& wide) {
419 DCHECK(IsStringASCII(wide)) << wide;
420 return std::string(wide.begin(), wide.end());
423 std::string UTF16ToASCII(const string16& utf16) {
424 DCHECK(IsStringASCII(utf16)) << utf16;
425 return std::string(utf16.begin(), utf16.end());
428 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
429 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
430 std::string output;
431 output.resize(wide.size());
432 latin1->clear();
433 for (size_t i = 0; i < wide.size(); i++) {
434 if (wide[i] > 255)
435 return false;
436 output[i] = static_cast<char>(wide[i]);
438 latin1->swap(output);
439 return true;
442 template<class STR>
443 static bool DoIsStringASCII(const STR& str) {
444 for (size_t i = 0; i < str.length(); i++) {
445 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
446 if (c > 0x7F)
447 return false;
449 return true;
452 bool IsStringASCII(const std::wstring& str) {
453 return DoIsStringASCII(str);
456 #if !defined(WCHAR_T_IS_UTF16)
457 bool IsStringASCII(const string16& str) {
458 return DoIsStringASCII(str);
460 #endif
462 bool IsStringASCII(const base::StringPiece& str) {
463 return DoIsStringASCII(str);
466 bool IsStringUTF8(const std::string& str) {
467 const char *src = str.data();
468 int32 src_len = static_cast<int32>(str.length());
469 int32 char_index = 0;
471 while (char_index < src_len) {
472 int32 code_point;
473 CBU8_NEXT(src, char_index, src_len, code_point);
474 if (!base::IsValidCharacter(code_point))
475 return false;
477 return true;
480 template<typename Iter>
481 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
482 Iter a_end,
483 const char* b) {
484 for (Iter it = a_begin; it != a_end; ++it, ++b) {
485 if (!*b || base::ToLowerASCII(*it) != *b)
486 return false;
488 return *b == 0;
491 // Front-ends for LowerCaseEqualsASCII.
492 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
493 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
496 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
497 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
500 #if !defined(WCHAR_T_IS_UTF16)
501 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
502 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
504 #endif
506 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
507 std::string::const_iterator a_end,
508 const char* b) {
509 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
512 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
513 std::wstring::const_iterator a_end,
514 const char* b) {
515 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
518 #if !defined(WCHAR_T_IS_UTF16)
519 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
520 string16::const_iterator a_end,
521 const char* b) {
522 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
524 #endif
526 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
527 #if !defined(OS_ANDROID)
528 bool LowerCaseEqualsASCII(const char* a_begin,
529 const char* a_end,
530 const char* b) {
531 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
534 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
535 const wchar_t* a_end,
536 const char* b) {
537 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
540 #if !defined(WCHAR_T_IS_UTF16)
541 bool LowerCaseEqualsASCII(const char16* a_begin,
542 const char16* a_end,
543 const char* b) {
544 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
546 #endif
548 #endif // !defined(OS_ANDROID)
550 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
551 if (a.length() != b.length())
552 return false;
553 return std::equal(b.begin(), b.end(), a.begin());
556 bool StartsWithASCII(const std::string& str,
557 const std::string& search,
558 bool case_sensitive) {
559 if (case_sensitive)
560 return str.compare(0, search.length(), search) == 0;
561 else
562 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
565 template <typename STR>
566 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
567 if (case_sensitive) {
568 return str.compare(0, search.length(), search) == 0;
569 } else {
570 if (search.size() > str.size())
571 return false;
572 return std::equal(search.begin(), search.end(), str.begin(),
573 base::CaseInsensitiveCompare<typename STR::value_type>());
577 bool StartsWith(const std::wstring& str, const std::wstring& search,
578 bool case_sensitive) {
579 return StartsWithT(str, search, case_sensitive);
582 #if !defined(WCHAR_T_IS_UTF16)
583 bool StartsWith(const string16& str, const string16& search,
584 bool case_sensitive) {
585 return StartsWithT(str, search, case_sensitive);
587 #endif
589 template <typename STR>
590 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
591 typename STR::size_type str_length = str.length();
592 typename STR::size_type search_length = search.length();
593 if (search_length > str_length)
594 return false;
595 if (case_sensitive) {
596 return str.compare(str_length - search_length, search_length, search) == 0;
597 } else {
598 return std::equal(search.begin(), search.end(),
599 str.begin() + (str_length - search_length),
600 base::CaseInsensitiveCompare<typename STR::value_type>());
604 bool EndsWith(const std::string& str, const std::string& search,
605 bool case_sensitive) {
606 return EndsWithT(str, search, case_sensitive);
609 bool EndsWith(const std::wstring& str, const std::wstring& search,
610 bool case_sensitive) {
611 return EndsWithT(str, search, case_sensitive);
614 #if !defined(WCHAR_T_IS_UTF16)
615 bool EndsWith(const string16& str, const string16& search,
616 bool case_sensitive) {
617 return EndsWithT(str, search, case_sensitive);
619 #endif
621 static const char* const kByteStringsUnlocalized[] = {
622 " B",
623 " kB",
624 " MB",
625 " GB",
626 " TB",
627 " PB"
630 string16 FormatBytesUnlocalized(int64 bytes) {
631 double unit_amount = static_cast<double>(bytes);
632 size_t dimension = 0;
633 const int kKilo = 1024;
634 while (unit_amount >= kKilo &&
635 dimension < arraysize(kByteStringsUnlocalized) - 1) {
636 unit_amount /= kKilo;
637 dimension++;
640 char buf[64];
641 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
642 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
643 kByteStringsUnlocalized[dimension]);
644 } else {
645 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
646 kByteStringsUnlocalized[dimension]);
649 return ASCIIToUTF16(buf);
652 template<class StringType>
653 void DoReplaceSubstringsAfterOffset(StringType* str,
654 typename StringType::size_type start_offset,
655 const StringType& find_this,
656 const StringType& replace_with,
657 bool replace_all) {
658 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
659 return;
661 DCHECK(!find_this.empty());
662 for (typename StringType::size_type offs(str->find(find_this, start_offset));
663 offs != StringType::npos; offs = str->find(find_this, offs)) {
664 str->replace(offs, find_this.length(), replace_with);
665 offs += replace_with.length();
667 if (!replace_all)
668 break;
672 void ReplaceFirstSubstringAfterOffset(string16* str,
673 string16::size_type start_offset,
674 const string16& find_this,
675 const string16& replace_with) {
676 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
677 false); // replace first instance
680 void ReplaceFirstSubstringAfterOffset(std::string* str,
681 std::string::size_type start_offset,
682 const std::string& find_this,
683 const std::string& replace_with) {
684 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
685 false); // replace first instance
688 void ReplaceSubstringsAfterOffset(string16* str,
689 string16::size_type start_offset,
690 const string16& find_this,
691 const string16& replace_with) {
692 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
693 true); // replace all instances
696 void ReplaceSubstringsAfterOffset(std::string* str,
697 std::string::size_type start_offset,
698 const std::string& find_this,
699 const std::string& replace_with) {
700 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
701 true); // replace all instances
705 template<typename STR>
706 static size_t TokenizeT(const STR& str,
707 const STR& delimiters,
708 std::vector<STR>* tokens) {
709 tokens->clear();
711 typename STR::size_type start = str.find_first_not_of(delimiters);
712 while (start != STR::npos) {
713 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
714 if (end == STR::npos) {
715 tokens->push_back(str.substr(start));
716 break;
717 } else {
718 tokens->push_back(str.substr(start, end - start));
719 start = str.find_first_not_of(delimiters, end + 1);
723 return tokens->size();
726 size_t Tokenize(const std::wstring& str,
727 const std::wstring& delimiters,
728 std::vector<std::wstring>* tokens) {
729 return TokenizeT(str, delimiters, tokens);
732 #if !defined(WCHAR_T_IS_UTF16)
733 size_t Tokenize(const string16& str,
734 const string16& delimiters,
735 std::vector<string16>* tokens) {
736 return TokenizeT(str, delimiters, tokens);
738 #endif
740 size_t Tokenize(const std::string& str,
741 const std::string& delimiters,
742 std::vector<std::string>* tokens) {
743 return TokenizeT(str, delimiters, tokens);
746 size_t Tokenize(const base::StringPiece& str,
747 const base::StringPiece& delimiters,
748 std::vector<base::StringPiece>* tokens) {
749 return TokenizeT(str, delimiters, tokens);
752 template<typename STR>
753 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
754 if (parts.empty())
755 return STR();
757 STR result(parts[0]);
758 typename std::vector<STR>::const_iterator iter = parts.begin();
759 ++iter;
761 for (; iter != parts.end(); ++iter) {
762 result += sep;
763 result += *iter;
766 return result;
769 std::string JoinString(const std::vector<std::string>& parts, char sep) {
770 return JoinStringT(parts, std::string(1, sep));
773 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
774 return JoinStringT(parts, string16(1, sep));
777 std::string JoinString(const std::vector<std::string>& parts,
778 const std::string& separator) {
779 return JoinStringT(parts, separator);
782 string16 JoinString(const std::vector<string16>& parts,
783 const string16& separator) {
784 return JoinStringT(parts, separator);
787 template<class FormatStringType, class OutStringType>
788 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
789 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
790 size_t substitutions = subst.size();
792 size_t sub_length = 0;
793 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
794 iter != subst.end(); ++iter) {
795 sub_length += iter->length();
798 OutStringType formatted;
799 formatted.reserve(format_string.length() + sub_length);
801 std::vector<ReplacementOffset> r_offsets;
802 for (typename FormatStringType::const_iterator i = format_string.begin();
803 i != format_string.end(); ++i) {
804 if ('$' == *i) {
805 if (i + 1 != format_string.end()) {
806 ++i;
807 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
808 if ('$' == *i) {
809 while (i != format_string.end() && '$' == *i) {
810 formatted.push_back('$');
811 ++i;
813 --i;
814 } else {
815 uintptr_t index = 0;
816 while (i != format_string.end() && '0' <= *i && *i <= '9') {
817 index *= 10;
818 index += *i - '0';
819 ++i;
821 --i;
822 index -= 1;
823 if (offsets) {
824 ReplacementOffset r_offset(index,
825 static_cast<int>(formatted.size()));
826 r_offsets.insert(std::lower_bound(r_offsets.begin(),
827 r_offsets.end(),
828 r_offset,
829 &CompareParameter),
830 r_offset);
832 if (index < substitutions)
833 formatted.append(subst.at(index));
836 } else {
837 formatted.push_back(*i);
840 if (offsets) {
841 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
842 i != r_offsets.end(); ++i) {
843 offsets->push_back(i->offset);
846 return formatted;
849 string16 ReplaceStringPlaceholders(const string16& format_string,
850 const std::vector<string16>& subst,
851 std::vector<size_t>* offsets) {
852 return DoReplaceStringPlaceholders(format_string, subst, offsets);
855 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
856 const std::vector<std::string>& subst,
857 std::vector<size_t>* offsets) {
858 return DoReplaceStringPlaceholders(format_string, subst, offsets);
861 string16 ReplaceStringPlaceholders(const string16& format_string,
862 const string16& a,
863 size_t* offset) {
864 std::vector<size_t> offsets;
865 std::vector<string16> subst;
866 subst.push_back(a);
867 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
869 DCHECK(offsets.size() == 1);
870 if (offset) {
871 *offset = offsets[0];
873 return result;
876 static bool IsWildcard(base_icu::UChar32 character) {
877 return character == '*' || character == '?';
880 // Move the strings pointers to the point where they start to differ.
881 template <typename CHAR, typename NEXT>
882 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
883 const CHAR** string, const CHAR* string_end,
884 NEXT next) {
885 const CHAR* escape = NULL;
886 while (*pattern != pattern_end && *string != string_end) {
887 if (!escape && IsWildcard(**pattern)) {
888 // We don't want to match wildcard here, except if it's escaped.
889 return;
892 // Check if the escapement char is found. If so, skip it and move to the
893 // next character.
894 if (!escape && **pattern == '\\') {
895 escape = *pattern;
896 next(pattern, pattern_end);
897 continue;
900 // Check if the chars match, if so, increment the ptrs.
901 const CHAR* pattern_next = *pattern;
902 const CHAR* string_next = *string;
903 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
904 if (pattern_char == next(&string_next, string_end) &&
905 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
906 *pattern = pattern_next;
907 *string = string_next;
908 } else {
909 // Uh ho, it did not match, we are done. If the last char was an
910 // escapement, that means that it was an error to advance the ptr here,
911 // let's put it back where it was. This also mean that the MatchPattern
912 // function will return false because if we can't match an escape char
913 // here, then no one will.
914 if (escape) {
915 *pattern = escape;
917 return;
920 escape = NULL;
924 template <typename CHAR, typename NEXT>
925 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
926 while (*pattern != end) {
927 if (!IsWildcard(**pattern))
928 return;
929 next(pattern, end);
933 template <typename CHAR, typename NEXT>
934 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
935 const CHAR* pattern, const CHAR* pattern_end,
936 int depth,
937 NEXT next) {
938 const int kMaxDepth = 16;
939 if (depth > kMaxDepth)
940 return false;
942 // Eat all the matching chars.
943 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
945 // If the string is empty, then the pattern must be empty too, or contains
946 // only wildcards.
947 if (eval == eval_end) {
948 EatWildcard(&pattern, pattern_end, next);
949 return pattern == pattern_end;
952 // Pattern is empty but not string, this is not a match.
953 if (pattern == pattern_end)
954 return false;
956 // If this is a question mark, then we need to compare the rest with
957 // the current string or the string with one character eaten.
958 const CHAR* next_pattern = pattern;
959 next(&next_pattern, pattern_end);
960 if (pattern[0] == '?') {
961 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
962 depth + 1, next))
963 return true;
964 const CHAR* next_eval = eval;
965 next(&next_eval, eval_end);
966 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
967 depth + 1, next))
968 return true;
971 // This is a *, try to match all the possible substrings with the remainder
972 // of the pattern.
973 if (pattern[0] == '*') {
974 // Collapse duplicate wild cards (********** into *) so that the
975 // method does not recurse unnecessarily. http://crbug.com/52839
976 EatWildcard(&next_pattern, pattern_end, next);
978 while (eval != eval_end) {
979 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
980 depth + 1, next))
981 return true;
982 eval++;
985 // We reached the end of the string, let see if the pattern contains only
986 // wildcards.
987 if (eval == eval_end) {
988 EatWildcard(&pattern, pattern_end, next);
989 if (pattern != pattern_end)
990 return false;
991 return true;
995 return false;
998 struct NextCharUTF8 {
999 base_icu::UChar32 operator()(const char** p, const char* end) {
1000 base_icu::UChar32 c;
1001 int offset = 0;
1002 CBU8_NEXT(*p, offset, end - *p, c);
1003 *p += offset;
1004 return c;
1008 struct NextCharUTF16 {
1009 base_icu::UChar32 operator()(const char16** p, const char16* end) {
1010 base_icu::UChar32 c;
1011 int offset = 0;
1012 CBU16_NEXT(*p, offset, end - *p, c);
1013 *p += offset;
1014 return c;
1018 bool MatchPattern(const base::StringPiece& eval,
1019 const base::StringPiece& pattern) {
1020 return MatchPatternT(eval.data(), eval.data() + eval.size(),
1021 pattern.data(), pattern.data() + pattern.size(),
1022 0, NextCharUTF8());
1025 bool MatchPattern(const string16& eval, const string16& pattern) {
1026 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1027 pattern.c_str(), pattern.c_str() + pattern.size(),
1028 0, NextCharUTF16());
1031 // The following code is compatible with the OpenBSD lcpy interface. See:
1032 // http://www.gratisoft.us/todd/papers/strlcpy.html
1033 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1035 namespace {
1037 template <typename CHAR>
1038 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1039 for (size_t i = 0; i < dst_size; ++i) {
1040 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1041 return i;
1044 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1045 if (dst_size != 0)
1046 dst[dst_size - 1] = 0;
1048 // Count the rest of the |src|, and return it's length in characters.
1049 while (src[dst_size]) ++dst_size;
1050 return dst_size;
1053 } // namespace
1055 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1056 return lcpyT<char>(dst, src, dst_size);
1058 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1059 return lcpyT<wchar_t>(dst, src, dst_size);