1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a glob pattern matcher.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/GlobPattern.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
21 static bool hasWildcard(StringRef S
) {
22 return S
.find_first_of("?*[\\") != StringRef::npos
;
25 // Expands character ranges and returns a bitmap.
26 // For example, "a-cf-hz" is expanded to "abcfghz".
27 static Expected
<BitVector
> expand(StringRef S
, StringRef Original
) {
28 BitVector
BV(256, false);
38 // If it doesn't start with something like X-Y,
39 // consume the first character and proceed.
46 // It must be in the form of X-Y.
47 // Validate it and then interpret the range.
49 return make_error
<StringError
>("invalid glob pattern: " + Original
,
50 errc::invalid_argument
);
52 for (int C
= Start
; C
<= End
; ++C
)
53 BV
[(uint8_t)C
] = true;
58 BV
[(uint8_t)C
] = true;
62 // This is a scanner for the glob pattern.
63 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
64 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
65 // equivalent to "[^<chars>]"), or a non-meta character.
66 // This function returns the first token in S.
67 static Expected
<BitVector
> scan(StringRef
&S
, StringRef Original
) {
71 // '*' is represented by an empty bitvector.
72 // All other bitvectors are 256-bit long.
76 return BitVector(256, true);
78 // ']' is allowed as the first character of a character class. '[]' is
79 // invalid. So, just skip the first character.
80 size_t End
= S
.find(']', 2);
81 if (End
== StringRef::npos
)
82 return make_error
<StringError
>("invalid glob pattern: " + Original
,
83 errc::invalid_argument
);
85 StringRef Chars
= S
.substr(1, End
- 1);
86 S
= S
.substr(End
+ 1);
87 if (Chars
.startswith("^") || Chars
.startswith("!")) {
88 Expected
<BitVector
> BV
= expand(Chars
.substr(1), Original
);
90 return BV
.takeError();
93 return expand(Chars
, Original
);
96 // Eat this character and fall through below to treat it like a non-meta
101 BitVector
BV(256, false);
102 BV
[(uint8_t)S
[0]] = true;
108 Expected
<GlobPattern
> GlobPattern::create(StringRef S
) {
111 // S doesn't contain any metacharacter,
112 // so the regular string comparison should work.
113 if (!hasWildcard(S
)) {
118 // S is something like "foo*", and the "* is not escaped. We can use
120 if (S
.endswith("*") && !S
.endswith("\\*") && !hasWildcard(S
.drop_back())) {
121 Pat
.Prefix
= S
.drop_back();
125 // S is something like "*foo". We can use endswith().
126 if (S
.startswith("*") && !hasWildcard(S
.drop_front())) {
127 Pat
.Suffix
= S
.drop_front();
131 // Otherwise, we need to do real glob pattern matching.
132 // Parse the pattern now.
133 StringRef Original
= S
;
135 Expected
<BitVector
> BV
= scan(S
, Original
);
137 return BV
.takeError();
138 Pat
.Tokens
.push_back(*BV
);
143 bool GlobPattern::match(StringRef S
) const {
147 return S
.startswith(*Prefix
);
149 return S
.endswith(*Suffix
);
150 return matchOne(Tokens
, S
);
153 // Runs glob pattern Pats against string S.
154 bool GlobPattern::matchOne(ArrayRef
<BitVector
> Pats
, StringRef S
) const {
159 // If Pats[0] is '*', try to match Pats[1..] against all possible
160 // tail strings of S to see at least one pattern succeeds.
161 if (Pats
[0].size() == 0) {
162 Pats
= Pats
.slice(1);
164 // Fast path. If a pattern is '*', it matches anything.
166 for (size_t I
= 0, E
= S
.size(); I
< E
; ++I
)
167 if (matchOne(Pats
, S
.substr(I
)))
172 // If Pats[0] is not '*', it must consume one character.
173 if (S
.empty() || !Pats
[0][(uint8_t)S
[0]])
175 Pats
= Pats
.slice(1);