[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Support / GlobPattern.cpp
blob6011be86d77fba053e9b27672e13a3eb1158dd88
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a glob pattern matcher.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/GlobPattern.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
19 using namespace llvm;
21 static bool hasWildcard(StringRef S) {
22 return S.find_first_of("?*[") != StringRef::npos;
25 // Expands character ranges and returns a bitmap.
26 // For example, "a-cf-hz" is expanded to "abcfghz".
27 static Expected<BitVector> expand(StringRef S, StringRef Original) {
28 BitVector BV(256, false);
30 // Expand X-Y.
31 for (;;) {
32 if (S.size() < 3)
33 break;
35 uint8_t Start = S[0];
36 uint8_t End = S[2];
38 // If it doesn't start with something like X-Y,
39 // consume the first character and proceed.
40 if (S[1] != '-') {
41 BV[Start] = true;
42 S = S.substr(1);
43 continue;
46 // It must be in the form of X-Y.
47 // Validate it and then interpret the range.
48 if (Start > End)
49 return make_error<StringError>("invalid glob pattern: " + Original,
50 errc::invalid_argument);
52 for (int C = Start; C <= End; ++C)
53 BV[(uint8_t)C] = true;
54 S = S.substr(3);
57 for (char C : S)
58 BV[(uint8_t)C] = true;
59 return BV;
62 // This is a scanner for the glob pattern.
63 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
64 // (which is a negative form of "[<chars>]"), or a non-meta character.
65 // This function returns the first token in S.
66 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
67 switch (S[0]) {
68 case '*':
69 S = S.substr(1);
70 // '*' is represented by an empty bitvector.
71 // All other bitvectors are 256-bit long.
72 return BitVector();
73 case '?':
74 S = S.substr(1);
75 return BitVector(256, true);
76 case '[': {
77 size_t End = S.find(']', 1);
78 if (End == StringRef::npos)
79 return make_error<StringError>("invalid glob pattern: " + Original,
80 errc::invalid_argument);
82 StringRef Chars = S.substr(1, End - 1);
83 S = S.substr(End + 1);
84 if (Chars.startswith("^")) {
85 Expected<BitVector> BV = expand(Chars.substr(1), Original);
86 if (!BV)
87 return BV.takeError();
88 return BV->flip();
90 return expand(Chars, Original);
92 default:
93 BitVector BV(256, false);
94 BV[(uint8_t)S[0]] = true;
95 S = S.substr(1);
96 return BV;
100 Expected<GlobPattern> GlobPattern::create(StringRef S) {
101 GlobPattern Pat;
103 // S doesn't contain any metacharacter,
104 // so the regular string comparison should work.
105 if (!hasWildcard(S)) {
106 Pat.Exact = S;
107 return Pat;
110 // S is something like "foo*". We can use startswith().
111 if (S.endswith("*") && !hasWildcard(S.drop_back())) {
112 Pat.Prefix = S.drop_back();
113 return Pat;
116 // S is something like "*foo". We can use endswith().
117 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
118 Pat.Suffix = S.drop_front();
119 return Pat;
122 // Otherwise, we need to do real glob pattern matching.
123 // Parse the pattern now.
124 StringRef Original = S;
125 while (!S.empty()) {
126 Expected<BitVector> BV = scan(S, Original);
127 if (!BV)
128 return BV.takeError();
129 Pat.Tokens.push_back(*BV);
131 return Pat;
134 bool GlobPattern::match(StringRef S) const {
135 if (Exact)
136 return S == *Exact;
137 if (Prefix)
138 return S.startswith(*Prefix);
139 if (Suffix)
140 return S.endswith(*Suffix);
141 return matchOne(Tokens, S);
144 // Runs glob pattern Pats against string S.
145 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
146 for (;;) {
147 if (Pats.empty())
148 return S.empty();
150 // If Pats[0] is '*', try to match Pats[1..] against all possible
151 // tail strings of S to see at least one pattern succeeds.
152 if (Pats[0].size() == 0) {
153 Pats = Pats.slice(1);
154 if (Pats.empty())
155 // Fast path. If a pattern is '*', it matches anything.
156 return true;
157 for (size_t I = 0, E = S.size(); I < E; ++I)
158 if (matchOne(Pats, S.substr(I)))
159 return true;
160 return false;
163 // If Pats[0] is not '*', it must consume one character.
164 if (S.empty() || !Pats[0][(uint8_t)S[0]])
165 return false;
166 Pats = Pats.slice(1);
167 S = S.substr(1);