1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Tooling/Transformer/Parsing.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/CharInfo.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/RangeSelector.h"
16 #include "clang/Tooling/Transformer/SourceCode.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
26 using namespace clang
;
27 using namespace transformer
;
29 // FIXME: This implementation is entirely separate from that of the AST
30 // matchers. Given the similarity of the languages and uses of the two parsers,
31 // the two should share a common parsing infrastructure, as should other
32 // Transformer types. We intend to unify this implementation soon to share as
33 // much as possible with the AST Matchers parsing.
38 template <typename
... Ts
> using RangeSelectorOp
= RangeSelector (*)(Ts
...);
41 // The remaining input to be processed.
43 // The original input. Not modified during parsing; only for reference in
45 StringRef OriginalInput
;
48 // Represents an intermediate result returned by a parsing function. Functions
49 // that don't generate values should use `std::nullopt`
50 template <typename ResultType
> struct ParseProgress
{
52 // Intermediate result generated by the Parser.
56 template <typename T
> using ExpectedProgress
= llvm::Expected
<ParseProgress
<T
>>;
57 template <typename T
> using ParseFunction
= ExpectedProgress
<T
> (*)(ParseState
);
59 class ParseError
: public llvm::ErrorInfo
<ParseError
> {
61 // Required field for all ErrorInfo derivatives.
64 ParseError(size_t Pos
, std::string ErrorMsg
, std::string InputExcerpt
)
65 : Pos(Pos
), ErrorMsg(std::move(ErrorMsg
)),
66 Excerpt(std::move(InputExcerpt
)) {}
68 void log(llvm::raw_ostream
&OS
) const override
{
69 OS
<< "parse error at position (" << Pos
<< "): " << ErrorMsg
73 std::error_code
convertToErrorCode() const override
{
74 return llvm::inconvertibleErrorCode();
77 // Position of the error in the input string.
80 // Excerpt of the input starting at the error position.
87 static const llvm::StringMap
<RangeSelectorOp
<std::string
>> &
88 getUnaryStringSelectors() {
89 static const llvm::StringMap
<RangeSelectorOp
<std::string
>> M
= {
92 {"statement", statement
},
93 {"statements", statements
},
95 {"callArgs", callArgs
},
96 {"elseBranch", elseBranch
},
97 {"initListElements", initListElements
}};
101 static const llvm::StringMap
<RangeSelectorOp
<RangeSelector
>> &
102 getUnaryRangeSelectors() {
103 static const llvm::StringMap
<RangeSelectorOp
<RangeSelector
>> M
= {
104 {"before", before
}, {"after", after
}, {"expansion", expansion
}};
108 static const llvm::StringMap
<RangeSelectorOp
<std::string
, std::string
>> &
109 getBinaryStringSelectors() {
110 static const llvm::StringMap
<RangeSelectorOp
<std::string
, std::string
>> M
= {
111 {"encloseNodes", encloseNodes
}};
115 static const llvm::StringMap
<RangeSelectorOp
<RangeSelector
, RangeSelector
>> &
116 getBinaryRangeSelectors() {
117 static const llvm::StringMap
<RangeSelectorOp
<RangeSelector
, RangeSelector
>>
118 M
= {{"enclose", enclose
}, {"between", between
}};
122 template <typename Element
>
123 std::optional
<Element
> findOptional(const llvm::StringMap
<Element
> &Map
,
124 llvm::StringRef Key
) {
125 auto it
= Map
.find(Key
);
131 template <typename ResultType
>
132 ParseProgress
<ResultType
> makeParseProgress(ParseState State
,
134 return ParseProgress
<ResultType
>{State
, std::move(Result
)};
137 static llvm::Error
makeParseError(const ParseState
&S
, std::string ErrorMsg
) {
138 size_t Pos
= S
.OriginalInput
.size() - S
.Input
.size();
139 return llvm::make_error
<ParseError
>(Pos
, std::move(ErrorMsg
),
140 S
.OriginalInput
.substr(Pos
, 20).str());
143 // Returns a new ParseState that advances \c S by \c N characters.
144 static ParseState
advance(ParseState S
, size_t N
) {
145 S
.Input
= S
.Input
.drop_front(N
);
149 static StringRef
consumeWhitespace(StringRef S
) {
150 return S
.drop_while([](char c
) { return isASCII(c
) && isWhitespace(c
); });
153 // Parses a single expected character \c c from \c State, skipping preceding
154 // whitespace. Error if the expected character isn't found.
155 static ExpectedProgress
<std::nullopt_t
> parseChar(char c
, ParseState State
) {
156 State
.Input
= consumeWhitespace(State
.Input
);
157 if (State
.Input
.empty() || State
.Input
.front() != c
)
158 return makeParseError(State
,
159 ("expected char not found: " + llvm::Twine(c
)).str());
160 return makeParseProgress(advance(State
, 1), std::nullopt
);
163 // Parses an identitifer "token" -- handles preceding whitespace.
164 static ExpectedProgress
<std::string
> parseId(ParseState State
) {
165 State
.Input
= consumeWhitespace(State
.Input
);
166 auto Id
= State
.Input
.take_while(
167 [](char c
) { return isASCII(c
) && isAsciiIdentifierContinue(c
); });
169 return makeParseError(State
, "failed to parse name");
170 return makeParseProgress(advance(State
, Id
.size()), Id
.str());
173 // For consistency with the AST matcher parser and C++ code, node ids are
174 // written as strings. However, we do not support escaping in the string.
175 static ExpectedProgress
<std::string
> parseStringId(ParseState State
) {
176 State
.Input
= consumeWhitespace(State
.Input
);
177 if (State
.Input
.empty())
178 return makeParseError(State
, "unexpected end of input");
179 if (!State
.Input
.consume_front("\""))
180 return makeParseError(
182 "expecting string, but encountered other character or end of input");
184 StringRef Id
= State
.Input
.take_until([](char c
) { return c
== '"'; });
185 if (State
.Input
.size() == Id
.size())
186 return makeParseError(State
, "unterminated string");
187 // Advance past the trailing quote as well.
188 return makeParseProgress(advance(State
, Id
.size() + 1), Id
.str());
191 // Parses a single element surrounded by parens. `Op` is applied to the parsed
192 // result to create the result of this function call.
193 template <typename T
>
194 ExpectedProgress
<RangeSelector
> parseSingle(ParseFunction
<T
> ParseElement
,
195 RangeSelectorOp
<T
> Op
,
197 auto P
= parseChar('(', State
);
199 return P
.takeError();
201 auto E
= ParseElement(P
->State
);
203 return E
.takeError();
205 P
= parseChar(')', E
->State
);
207 return P
.takeError();
209 return makeParseProgress(P
->State
, Op(std::move(E
->Value
)));
212 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
213 // is applied to the parsed results to create the result of this function call.
214 template <typename T
>
215 ExpectedProgress
<RangeSelector
> parsePair(ParseFunction
<T
> ParseElement
,
216 RangeSelectorOp
<T
, T
> Op
,
218 auto P
= parseChar('(', State
);
220 return P
.takeError();
222 auto Left
= ParseElement(P
->State
);
224 return Left
.takeError();
226 P
= parseChar(',', Left
->State
);
228 return P
.takeError();
230 auto Right
= ParseElement(P
->State
);
232 return Right
.takeError();
234 P
= parseChar(')', Right
->State
);
236 return P
.takeError();
238 return makeParseProgress(P
->State
,
239 Op(std::move(Left
->Value
), std::move(Right
->Value
)));
242 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
243 // Id operator). Returns StencilType representing the operator on success and
244 // error if it fails to parse input for an operator.
245 static ExpectedProgress
<RangeSelector
>
246 parseRangeSelectorImpl(ParseState State
) {
247 auto Id
= parseId(State
);
249 return Id
.takeError();
251 std::string OpName
= std::move(Id
->Value
);
252 if (auto Op
= findOptional(getUnaryStringSelectors(), OpName
))
253 return parseSingle(parseStringId
, *Op
, Id
->State
);
255 if (auto Op
= findOptional(getUnaryRangeSelectors(), OpName
))
256 return parseSingle(parseRangeSelectorImpl
, *Op
, Id
->State
);
258 if (auto Op
= findOptional(getBinaryStringSelectors(), OpName
))
259 return parsePair(parseStringId
, *Op
, Id
->State
);
261 if (auto Op
= findOptional(getBinaryRangeSelectors(), OpName
))
262 return parsePair(parseRangeSelectorImpl
, *Op
, Id
->State
);
264 return makeParseError(State
, "unknown selector name: " + OpName
);
267 Expected
<RangeSelector
> transformer::parseRangeSelector(llvm::StringRef Input
) {
268 ParseState State
= {Input
, Input
};
269 ExpectedProgress
<RangeSelector
> Result
= parseRangeSelectorImpl(State
);
271 return Result
.takeError();
272 State
= Result
->State
;
273 // Discard any potentially trailing whitespace.
274 State
.Input
= consumeWhitespace(State
.Input
);
275 if (State
.Input
.empty())
276 return Result
->Value
;
277 return makeParseError(State
, "unexpected input after selector");