Add ICU message format support
[chromium-blink-merge.git] / base / strings / pattern.cc
blobaf30aab86da39f3947598b6d3d8d1e9af2d24a44
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/pattern.h"
7 #include "base/third_party/icu/icu_utf.h"
9 namespace base {
11 namespace {
13 static bool IsWildcard(base_icu::UChar32 character) {
14 return character == '*' || character == '?';
17 // Move the strings pointers to the point where they start to differ.
18 template <typename CHAR, typename NEXT>
19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
20 const CHAR** string, const CHAR* string_end,
21 NEXT next) {
22 const CHAR* escape = NULL;
23 while (*pattern != pattern_end && *string != string_end) {
24 if (!escape && IsWildcard(**pattern)) {
25 // We don't want to match wildcard here, except if it's escaped.
26 return;
29 // Check if the escapement char is found. If so, skip it and move to the
30 // next character.
31 if (!escape && **pattern == '\\') {
32 escape = *pattern;
33 next(pattern, pattern_end);
34 continue;
37 // Check if the chars match, if so, increment the ptrs.
38 const CHAR* pattern_next = *pattern;
39 const CHAR* string_next = *string;
40 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
41 if (pattern_char == next(&string_next, string_end) &&
42 pattern_char != CBU_SENTINEL) {
43 *pattern = pattern_next;
44 *string = string_next;
45 } else {
46 // Uh oh, it did not match, we are done. If the last char was an
47 // escapement, that means that it was an error to advance the ptr here,
48 // let's put it back where it was. This also mean that the MatchPattern
49 // function will return false because if we can't match an escape char
50 // here, then no one will.
51 if (escape) {
52 *pattern = escape;
54 return;
57 escape = NULL;
61 template <typename CHAR, typename NEXT>
62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
63 while (*pattern != end) {
64 if (!IsWildcard(**pattern))
65 return;
66 next(pattern, end);
70 template <typename CHAR, typename NEXT>
71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
72 const CHAR* pattern, const CHAR* pattern_end,
73 int depth,
74 NEXT next) {
75 const int kMaxDepth = 16;
76 if (depth > kMaxDepth)
77 return false;
79 // Eat all the matching chars.
80 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
82 // If the string is empty, then the pattern must be empty too, or contains
83 // only wildcards.
84 if (eval == eval_end) {
85 EatWildcard(&pattern, pattern_end, next);
86 return pattern == pattern_end;
89 // Pattern is empty but not string, this is not a match.
90 if (pattern == pattern_end)
91 return false;
93 // If this is a question mark, then we need to compare the rest with
94 // the current string or the string with one character eaten.
95 const CHAR* next_pattern = pattern;
96 next(&next_pattern, pattern_end);
97 if (pattern[0] == '?') {
98 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
99 depth + 1, next))
100 return true;
101 const CHAR* next_eval = eval;
102 next(&next_eval, eval_end);
103 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
104 depth + 1, next))
105 return true;
108 // This is a *, try to match all the possible substrings with the remainder
109 // of the pattern.
110 if (pattern[0] == '*') {
111 // Collapse duplicate wild cards (********** into *) so that the
112 // method does not recurse unnecessarily. http://crbug.com/52839
113 EatWildcard(&next_pattern, pattern_end, next);
115 while (eval != eval_end) {
116 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
117 depth + 1, next))
118 return true;
119 eval++;
122 // We reached the end of the string, let see if the pattern contains only
123 // wildcards.
124 if (eval == eval_end) {
125 EatWildcard(&pattern, pattern_end, next);
126 if (pattern != pattern_end)
127 return false;
128 return true;
132 return false;
135 struct NextCharUTF8 {
136 base_icu::UChar32 operator()(const char** p, const char* end) {
137 base_icu::UChar32 c;
138 int offset = 0;
139 CBU8_NEXT(*p, offset, end - *p, c);
140 *p += offset;
141 return c;
145 struct NextCharUTF16 {
146 base_icu::UChar32 operator()(const char16** p, const char16* end) {
147 base_icu::UChar32 c;
148 int offset = 0;
149 CBU16_NEXT(*p, offset, end - *p, c);
150 *p += offset;
151 return c;
155 } // namespace
157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
158 return MatchPatternT(eval.data(), eval.data() + eval.size(),
159 pattern.data(), pattern.data() + pattern.size(),
160 0, NextCharUTF8());
163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
164 return MatchPatternT(eval.data(), eval.data() + eval.size(),
165 pattern.data(), pattern.data() + pattern.size(),
166 0, NextCharUTF16());
169 } // namespace base