1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/pattern.h"
7 #include "tools/gn/value.h"
9 const char kPattern_Help
[] =
11 " Patterns are VERY limited regular expressions that are used in\n"
14 " Patterns must match the entire input string to be counted as a match.\n"
15 " In regular expression parlance, there is an implicit \"^...$\"\n"
16 " surrounding your input. If you want to match a substring, you need to\n"
17 " use wildcards at the beginning and end.\n"
19 " There are only two special tokens understood by the pattern matcher.\n"
20 " Everything else is a literal.\n"
22 " * Matches zero or more of any character. It does not depend on the\n"
23 " preceeding character (in regular expression parlance it is\n"
24 " equivalent to \".*\").\n"
26 " \\b Matches a path boundary. This will match the beginning or end of\n"
27 " a string, or a slash.\n"
32 " Matches a string containing \"asdf\" anywhere.\n"
35 " Matches only the exact string \"asdf\".\n"
38 " Matches strings ending in the literal \".cc\".\n"
41 " Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n";
45 void ParsePattern(const std::string
& s
, std::vector
<Pattern::Subrange
>* out
) {
46 // Set when the last subrange is a literal so we can just append when we
47 // find another literal.
48 Pattern::Subrange
* last_literal
= NULL
;
50 for (size_t i
= 0; i
< s
.size(); i
++) {
52 // Don't allow two **.
53 if (out
->size() == 0 ||
54 (*out
)[out
->size() - 1].type
!= Pattern::Subrange::ANYTHING
)
55 out
->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING
));
57 } else if (s
[i
] == '\\') {
58 if (i
< s
.size() - 1 && s
[i
+ 1] == 'b') {
59 // "\b" means path boundary.
61 out
->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY
));
64 // Backslash + anything else means that literal char.
66 out
->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL
));
67 last_literal
= &(*out
)[out
->size() - 1];
69 if (i
< s
.size() - 1) {
71 last_literal
->literal
.push_back(s
[i
]);
73 // Single backslash at end, use literal backslash.
74 last_literal
->literal
.push_back('\\');
79 out
->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL
));
80 last_literal
= &(*out
)[out
->size() - 1];
82 last_literal
->literal
.push_back(s
[i
]);
89 Pattern::Pattern(const std::string
& s
) {
90 ParsePattern(s
, &subranges_
);
92 (subranges_
.size() == 2 &&
93 subranges_
[0].type
== Subrange::ANYTHING
&&
94 subranges_
[1].type
== Subrange::LITERAL
);
100 bool Pattern::MatchesString(const std::string
& s
) const {
101 // Empty pattern matches only empty string.
102 if (subranges_
.empty())
106 const std::string
& suffix
= subranges_
[1].literal
;
107 if (suffix
.size() > s
.size())
108 return false; // Too short.
109 return s
.compare(s
.size() - suffix
.size(), suffix
.size(), suffix
) == 0;
112 return RecursiveMatch(s
, 0, 0, true);
115 // We assume the number of ranges is small so recursive is always reasonable.
116 // Could be optimized to only be recursive for *.
117 bool Pattern::RecursiveMatch(const std::string
& s
,
119 size_t subrange_index
,
120 bool allow_implicit_path_boundary
) const {
121 if (subrange_index
>= subranges_
.size()) {
122 // Hit the end of our subranges, the text should also be at the end for a
124 return begin_char
== s
.size();
127 const Subrange
& sr
= subranges_
[subrange_index
];
129 case Subrange::LITERAL
: {
130 if (s
.size() - begin_char
< sr
.literal
.size())
131 return false; // Not enough room.
132 if (s
.compare(begin_char
, sr
.literal
.size(), sr
.literal
) != 0)
133 return false; // Literal doesn't match.
135 // Recursively check the next one.
136 return RecursiveMatch(s
, begin_char
+ sr
.literal
.size(),
137 subrange_index
+ 1, true);
140 case Subrange::PATH_BOUNDARY
: {
141 // When we can accept an implicit path boundary, we have to check both
142 // a match of the literal and the implicit one.
143 if (allow_implicit_path_boundary
&&
144 (begin_char
== 0 || begin_char
== s
.size())) {
145 // At implicit path boundary, see if the rest of the pattern matches.
146 if (RecursiveMatch(s
, begin_char
, subrange_index
+ 1, false))
150 // Check for a literal "/".
151 if (begin_char
< s
.size() && s
[begin_char
] == '/') {
152 // At explicit boundary, see if the rest of the pattern matches.
153 if (RecursiveMatch(s
, begin_char
+ 1, subrange_index
+ 1, true))
159 case Subrange::ANYTHING
: {
160 if (subrange_index
== subranges_
.size() - 1)
161 return true; // * at the end, consider it matching.
163 size_t min_next_size
= sr
.MinSize();
165 // We don't care about exactly what matched as long as there was a match,
166 // so we can do this front-to-back. If we needed the match, we would
167 // normally want "*" to be greedy so would work backwards.
168 for (size_t i
= begin_char
; i
< s
.size() - min_next_size
; i
++) {
169 // Note: this could probably be faster by detecting the type of the
170 // next match in advance and checking for a match in this loop rather
171 // than doing a full recursive call for each character.
172 if (RecursiveMatch(s
, i
, subrange_index
+ 1, true))
185 PatternList::PatternList() {
188 PatternList::~PatternList() {
191 void PatternList::SetFromValue(const Value
& v
, Err
* err
) {
194 if (v
.type() != Value::LIST
) {
195 *err
= Err(v
.origin(), "This value must be a list.");
199 const std::vector
<Value
>& list
= v
.list_value();
200 for (size_t i
= 0; i
< list
.size(); i
++) {
201 if (!list
[i
].VerifyTypeIs(Value::STRING
, err
))
203 patterns_
.push_back(Pattern(list
[i
].string_value()));
207 bool PatternList::MatchesString(const std::string
& s
) const {
208 for (size_t i
= 0; i
< patterns_
.size(); i
++) {
209 if (patterns_
[i
].MatchesString(s
))
215 bool PatternList::MatchesValue(const Value
& v
) const {
216 if (v
.type() == Value::STRING
)
217 return MatchesString(v
.string_value());