1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/pattern.h"
7 #include "base/third_party/icu/icu_utf.h"
13 static bool IsWildcard(base_icu::UChar32 character
) {
14 return character
== '*' || character
== '?';
17 // Move the strings pointers to the point where they start to differ.
18 template <typename CHAR
, typename NEXT
>
19 static void EatSameChars(const CHAR
** pattern
, const CHAR
* pattern_end
,
20 const CHAR
** string
, const CHAR
* string_end
,
22 const CHAR
* escape
= NULL
;
23 while (*pattern
!= pattern_end
&& *string
!= string_end
) {
24 if (!escape
&& IsWildcard(**pattern
)) {
25 // We don't want to match wildcard here, except if it's escaped.
29 // Check if the escapement char is found. If so, skip it and move to the
31 if (!escape
&& **pattern
== '\\') {
33 next(pattern
, pattern_end
);
37 // Check if the chars match, if so, increment the ptrs.
38 const CHAR
* pattern_next
= *pattern
;
39 const CHAR
* string_next
= *string
;
40 base_icu::UChar32 pattern_char
= next(&pattern_next
, pattern_end
);
41 if (pattern_char
== next(&string_next
, string_end
) &&
42 pattern_char
!= CBU_SENTINEL
) {
43 *pattern
= pattern_next
;
44 *string
= string_next
;
46 // Uh oh, it did not match, we are done. If the last char was an
47 // escapement, that means that it was an error to advance the ptr here,
48 // let's put it back where it was. This also mean that the MatchPattern
49 // function will return false because if we can't match an escape char
50 // here, then no one will.
61 template <typename CHAR
, typename NEXT
>
62 static void EatWildcard(const CHAR
** pattern
, const CHAR
* end
, NEXT next
) {
63 while (*pattern
!= end
) {
64 if (!IsWildcard(**pattern
))
70 template <typename CHAR
, typename NEXT
>
71 static bool MatchPatternT(const CHAR
* eval
, const CHAR
* eval_end
,
72 const CHAR
* pattern
, const CHAR
* pattern_end
,
75 const int kMaxDepth
= 16;
76 if (depth
> kMaxDepth
)
79 // Eat all the matching chars.
80 EatSameChars(&pattern
, pattern_end
, &eval
, eval_end
, next
);
82 // If the string is empty, then the pattern must be empty too, or contains
84 if (eval
== eval_end
) {
85 EatWildcard(&pattern
, pattern_end
, next
);
86 return pattern
== pattern_end
;
89 // Pattern is empty but not string, this is not a match.
90 if (pattern
== pattern_end
)
93 // If this is a question mark, then we need to compare the rest with
94 // the current string or the string with one character eaten.
95 const CHAR
* next_pattern
= pattern
;
96 next(&next_pattern
, pattern_end
);
97 if (pattern
[0] == '?') {
98 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
101 const CHAR
* next_eval
= eval
;
102 next(&next_eval
, eval_end
);
103 if (MatchPatternT(next_eval
, eval_end
, next_pattern
, pattern_end
,
108 // This is a *, try to match all the possible substrings with the remainder
110 if (pattern
[0] == '*') {
111 // Collapse duplicate wild cards (********** into *) so that the
112 // method does not recurse unnecessarily. http://crbug.com/52839
113 EatWildcard(&next_pattern
, pattern_end
, next
);
115 while (eval
!= eval_end
) {
116 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
122 // We reached the end of the string, let see if the pattern contains only
124 if (eval
== eval_end
) {
125 EatWildcard(&pattern
, pattern_end
, next
);
126 if (pattern
!= pattern_end
)
135 struct NextCharUTF8
{
136 base_icu::UChar32
operator()(const char** p
, const char* end
) {
139 CBU8_NEXT(*p
, offset
, end
- *p
, c
);
145 struct NextCharUTF16
{
146 base_icu::UChar32
operator()(const char16
** p
, const char16
* end
) {
149 CBU16_NEXT(*p
, offset
, end
- *p
, c
);
157 bool MatchPattern(const StringPiece
& eval
, const StringPiece
& pattern
) {
158 return MatchPatternT(eval
.data(), eval
.data() + eval
.size(),
159 pattern
.data(), pattern
.data() + pattern
.size(),
163 bool MatchPattern(const StringPiece16
& eval
, const StringPiece16
& pattern
) {
164 return MatchPatternT(eval
.data(), eval
.data() + eval
.size(),
165 pattern
.data(), pattern
.data() + pattern
.size(),