1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a glob pattern matcher.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/GlobPattern.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Errc.h"
19 // Expands character ranges and returns a bitmap.
20 // For example, "a-cf-hz" is expanded to "abcfghz".
21 static Expected
<BitVector
> expand(StringRef S
, StringRef Original
) {
22 BitVector
BV(256, false);
32 // If it doesn't start with something like X-Y,
33 // consume the first character and proceed.
40 // It must be in the form of X-Y.
41 // Validate it and then interpret the range.
43 return make_error
<StringError
>("invalid glob pattern: " + Original
,
44 errc::invalid_argument
);
46 for (int C
= Start
; C
<= End
; ++C
)
47 BV
[(uint8_t)C
] = true;
52 BV
[(uint8_t)C
] = true;
56 // Identify brace expansions in S and return the list of patterns they expand
58 static Expected
<SmallVector
<std::string
, 1>>
59 parseBraceExpansions(StringRef S
, std::optional
<size_t> MaxSubPatterns
) {
60 SmallVector
<std::string
> SubPatterns
= {S
.str()};
61 if (!MaxSubPatterns
|| !S
.contains('{'))
62 return std::move(SubPatterns
);
64 struct BraceExpansion
{
67 SmallVector
<StringRef
, 2> Terms
;
69 SmallVector
<BraceExpansion
, 0> BraceExpansions
;
71 BraceExpansion
*CurrentBE
= nullptr;
73 for (size_t I
= 0, E
= S
.size(); I
!= E
; ++I
) {
75 I
= S
.find(']', I
+ 2);
76 if (I
== std::string::npos
)
77 return make_error
<StringError
>("invalid glob pattern, unmatched '['",
78 errc::invalid_argument
);
79 } else if (S
[I
] == '{') {
81 return make_error
<StringError
>(
82 "nested brace expansions are not supported",
83 errc::invalid_argument
);
84 CurrentBE
= &BraceExpansions
.emplace_back();
87 } else if (S
[I
] == ',') {
90 CurrentBE
->Terms
.push_back(S
.substr(TermBegin
, I
- TermBegin
));
92 } else if (S
[I
] == '}') {
95 if (CurrentBE
->Terms
.empty())
96 return make_error
<StringError
>(
97 "empty or singleton brace expansions are not supported",
98 errc::invalid_argument
);
99 CurrentBE
->Terms
.push_back(S
.substr(TermBegin
, I
- TermBegin
));
100 CurrentBE
->Length
= I
- CurrentBE
->Start
+ 1;
102 } else if (S
[I
] == '\\') {
104 return make_error
<StringError
>("invalid glob pattern, stray '\\'",
105 errc::invalid_argument
);
109 return make_error
<StringError
>("incomplete brace expansion",
110 errc::invalid_argument
);
112 size_t NumSubPatterns
= 1;
113 for (auto &BE
: BraceExpansions
) {
114 if (NumSubPatterns
> std::numeric_limits
<size_t>::max() / BE
.Terms
.size()) {
115 NumSubPatterns
= std::numeric_limits
<size_t>::max();
118 NumSubPatterns
*= BE
.Terms
.size();
120 if (NumSubPatterns
> *MaxSubPatterns
)
121 return make_error
<StringError
>("too many brace expansions",
122 errc::invalid_argument
);
123 // Replace brace expansions in reverse order so that we don't invalidate
124 // earlier start indices
125 for (auto &BE
: reverse(BraceExpansions
)) {
126 SmallVector
<std::string
> OrigSubPatterns
;
127 std::swap(SubPatterns
, OrigSubPatterns
);
128 for (StringRef Term
: BE
.Terms
)
129 for (StringRef Orig
: OrigSubPatterns
)
130 SubPatterns
.emplace_back(Orig
).replace(BE
.Start
, BE
.Length
, Term
);
132 return std::move(SubPatterns
);
135 Expected
<GlobPattern
>
136 GlobPattern::create(StringRef S
, std::optional
<size_t> MaxSubPatterns
) {
139 // Store the prefix that does not contain any metacharacter.
140 size_t PrefixSize
= S
.find_first_of("?*[{\\");
141 Pat
.Prefix
= S
.substr(0, PrefixSize
);
142 if (PrefixSize
== std::string::npos
)
144 S
= S
.substr(PrefixSize
);
146 SmallVector
<std::string
, 1> SubPats
;
147 if (auto Err
= parseBraceExpansions(S
, MaxSubPatterns
).moveInto(SubPats
))
148 return std::move(Err
);
149 for (StringRef SubPat
: SubPats
) {
150 auto SubGlobOrErr
= SubGlobPattern::create(SubPat
);
152 return SubGlobOrErr
.takeError();
153 Pat
.SubGlobs
.push_back(*SubGlobOrErr
);
159 Expected
<GlobPattern::SubGlobPattern
>
160 GlobPattern::SubGlobPattern::create(StringRef S
) {
164 Pat
.Pat
.assign(S
.begin(), S
.end());
165 for (size_t I
= 0, E
= S
.size(); I
!= E
; ++I
) {
167 // ']' is allowed as the first character of a character class. '[]' is
168 // invalid. So, just skip the first character.
170 size_t J
= S
.find(']', I
+ 1);
171 if (J
== StringRef::npos
)
172 return make_error
<StringError
>("invalid glob pattern, unmatched '['",
173 errc::invalid_argument
);
174 StringRef Chars
= S
.substr(I
, J
- I
);
175 bool Invert
= S
[I
] == '^' || S
[I
] == '!';
176 Expected
<BitVector
> BV
=
177 Invert
? expand(Chars
.substr(1), S
) : expand(Chars
, S
);
179 return BV
.takeError();
182 Pat
.Brackets
.push_back(Bracket
{J
+ 1, std::move(*BV
)});
184 } else if (S
[I
] == '\\') {
186 return make_error
<StringError
>("invalid glob pattern, stray '\\'",
187 errc::invalid_argument
);
193 bool GlobPattern::match(StringRef S
) const {
194 if (!S
.consume_front(Prefix
))
196 if (SubGlobs
.empty() && S
.empty())
198 for (auto &Glob
: SubGlobs
)
204 // Factor the pattern into segments split by '*'. The segment is matched
205 // sequentianlly by finding the first occurrence past the end of the previous
207 bool GlobPattern::SubGlobPattern::match(StringRef Str
) const {
208 const char *P
= Pat
.data(), *SegmentBegin
= nullptr, *S
= Str
.data(),
210 const char *const PEnd
= P
+ Pat
.size(), *const End
= S
+ Str
.size();
211 size_t B
= 0, SavedB
= 0;
215 else if (*P
== '*') {
216 // The non-* substring on the left of '*' matches the tail of S. Save the
217 // positions to be used by backtracking if we see a mismatch later.
222 } else if (*P
== '[') {
223 if (Brackets
[B
].Bytes
[uint8_t(*S
)]) {
224 P
= Pat
.data() + Brackets
[B
++].NextOffset
;
228 } else if (*P
== '\\') {
234 } else if (*P
== *S
|| *P
== '?') {
241 // We have seen a '*'. Backtrack to the saved positions. Shift the S
242 // position to probe the next starting position in the segment.
247 // All bytes in Str have been matched. Return true if the rest part of Pat is
248 // empty or contains only '*'.
249 return getPat().find_first_not_of('*', P
- Pat
.data()) == std::string::npos
;