[memprof] Update YAML traits for writer purposes (#118720)
[llvm-project.git] / clang / lib / ASTMatchers / Dynamic / Parser.cpp
blob6a16c2184fcfb14b6dc76ac1ac53e635b89a2111
1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cerrno>
25 #include <cstddef>
26 #include <cstdlib>
27 #include <optional>
28 #include <string>
29 #include <utility>
30 #include <vector>
32 namespace clang {
33 namespace ast_matchers {
34 namespace dynamic {
36 /// Simple structure to hold information for one token from the parser.
37 struct Parser::TokenInfo {
38 /// Different possible tokens.
39 enum TokenKind {
40 TK_Eof,
41 TK_NewLine,
42 TK_OpenParen,
43 TK_CloseParen,
44 TK_Comma,
45 TK_Period,
46 TK_Literal,
47 TK_Ident,
48 TK_InvalidChar,
49 TK_Error,
50 TK_CodeCompletion
53 /// Some known identifiers.
54 static const char* const ID_Bind;
55 static const char *const ID_With;
57 TokenInfo() = default;
59 StringRef Text;
60 TokenKind Kind = TK_Eof;
61 SourceRange Range;
62 VariantValue Value;
65 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 const char *const Parser::TokenInfo::ID_With = "with";
68 /// Simple tokenizer for the parser.
69 class Parser::CodeTokenizer {
70 public:
71 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73 NextToken = getNextToken();
76 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77 unsigned CodeCompletionOffset)
78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80 NextToken = getNextToken();
83 /// Returns but doesn't consume the next token.
84 const TokenInfo &peekNextToken() const { return NextToken; }
86 /// Consumes and returns the next token.
87 TokenInfo consumeNextToken() {
88 TokenInfo ThisToken = NextToken;
89 NextToken = getNextToken();
90 return ThisToken;
93 TokenInfo SkipNewlines() {
94 while (NextToken.Kind == TokenInfo::TK_NewLine)
95 NextToken = getNextToken();
96 return NextToken;
99 TokenInfo consumeNextTokenIgnoreNewlines() {
100 SkipNewlines();
101 if (NextToken.Kind == TokenInfo::TK_Eof)
102 return NextToken;
103 return consumeNextToken();
106 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
108 private:
109 TokenInfo getNextToken() {
110 consumeWhitespace();
111 TokenInfo Result;
112 Result.Range.Start = currentLocation();
114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115 Result.Kind = TokenInfo::TK_CodeCompletion;
116 Result.Text = StringRef(CodeCompletionLocation, 0);
117 CodeCompletionLocation = nullptr;
118 return Result;
121 if (Code.empty()) {
122 Result.Kind = TokenInfo::TK_Eof;
123 Result.Text = "";
124 return Result;
127 switch (Code[0]) {
128 case '#':
129 Code = Code.drop_until([](char c) { return c == '\n'; });
130 return getNextToken();
131 case ',':
132 Result.Kind = TokenInfo::TK_Comma;
133 Result.Text = Code.substr(0, 1);
134 Code = Code.drop_front();
135 break;
136 case '.':
137 Result.Kind = TokenInfo::TK_Period;
138 Result.Text = Code.substr(0, 1);
139 Code = Code.drop_front();
140 break;
141 case '\n':
142 ++Line;
143 StartOfLine = Code.drop_front();
144 Result.Kind = TokenInfo::TK_NewLine;
145 Result.Text = Code.substr(0, 1);
146 Code = Code.drop_front();
147 break;
148 case '(':
149 Result.Kind = TokenInfo::TK_OpenParen;
150 Result.Text = Code.substr(0, 1);
151 Code = Code.drop_front();
152 break;
153 case ')':
154 Result.Kind = TokenInfo::TK_CloseParen;
155 Result.Text = Code.substr(0, 1);
156 Code = Code.drop_front();
157 break;
159 case '"':
160 case '\'':
161 // Parse a string literal.
162 consumeStringLiteral(&Result);
163 break;
165 case '0': case '1': case '2': case '3': case '4':
166 case '5': case '6': case '7': case '8': case '9':
167 // Parse an unsigned and float literal.
168 consumeNumberLiteral(&Result);
169 break;
171 default:
172 if (isAlphanumeric(Code[0])) {
173 // Parse an identifier
174 size_t TokenLength = 1;
175 while (true) {
176 // A code completion location in/immediately after an identifier will
177 // cause the portion of the identifier before the code completion
178 // location to become a code completion token.
179 if (CodeCompletionLocation == Code.data() + TokenLength) {
180 CodeCompletionLocation = nullptr;
181 Result.Kind = TokenInfo::TK_CodeCompletion;
182 Result.Text = Code.substr(0, TokenLength);
183 Code = Code.drop_front(TokenLength);
184 return Result;
186 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
187 break;
188 ++TokenLength;
190 if (TokenLength == 4 && Code.starts_with("true")) {
191 Result.Kind = TokenInfo::TK_Literal;
192 Result.Value = true;
193 } else if (TokenLength == 5 && Code.starts_with("false")) {
194 Result.Kind = TokenInfo::TK_Literal;
195 Result.Value = false;
196 } else {
197 Result.Kind = TokenInfo::TK_Ident;
198 Result.Text = Code.substr(0, TokenLength);
200 Code = Code.drop_front(TokenLength);
201 } else {
202 Result.Kind = TokenInfo::TK_InvalidChar;
203 Result.Text = Code.substr(0, 1);
204 Code = Code.drop_front(1);
206 break;
209 Result.Range.End = currentLocation();
210 return Result;
213 /// Consume an unsigned and float literal.
214 void consumeNumberLiteral(TokenInfo *Result) {
215 bool isFloatingLiteral = false;
216 unsigned Length = 1;
217 if (Code.size() > 1) {
218 // Consume the 'x' or 'b' radix modifier, if present.
219 switch (toLowercase(Code[1])) {
220 case 'x': case 'b': Length = 2;
223 while (Length < Code.size() && isHexDigit(Code[Length]))
224 ++Length;
226 // Try to recognize a floating point literal.
227 while (Length < Code.size()) {
228 char c = Code[Length];
229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230 isFloatingLiteral = true;
231 Length++;
232 } else {
233 break;
237 Result->Text = Code.substr(0, Length);
238 Code = Code.drop_front(Length);
240 if (isFloatingLiteral) {
241 char *end;
242 errno = 0;
243 std::string Text = Result->Text.str();
244 double doubleValue = strtod(Text.c_str(), &end);
245 if (*end == 0 && errno == 0) {
246 Result->Kind = TokenInfo::TK_Literal;
247 Result->Value = doubleValue;
248 return;
250 } else {
251 unsigned Value;
252 if (!Result->Text.getAsInteger(0, Value)) {
253 Result->Kind = TokenInfo::TK_Literal;
254 Result->Value = Value;
255 return;
259 SourceRange Range;
260 Range.Start = Result->Range.Start;
261 Range.End = currentLocation();
262 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
263 Result->Kind = TokenInfo::TK_Error;
266 /// Consume a string literal.
268 /// \c Code must be positioned at the start of the literal (the opening
269 /// quote). Consumed until it finds the same closing quote character.
270 void consumeStringLiteral(TokenInfo *Result) {
271 bool InEscape = false;
272 const char Marker = Code[0];
273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274 if (InEscape) {
275 InEscape = false;
276 continue;
278 if (Code[Length] == '\\') {
279 InEscape = true;
280 continue;
282 if (Code[Length] == Marker) {
283 Result->Kind = TokenInfo::TK_Literal;
284 Result->Text = Code.substr(0, Length + 1);
285 Result->Value = Code.substr(1, Length - 1);
286 Code = Code.drop_front(Length + 1);
287 return;
291 StringRef ErrorText = Code;
292 Code = Code.drop_front(Code.size());
293 SourceRange Range;
294 Range.Start = Result->Range.Start;
295 Range.End = currentLocation();
296 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
297 Result->Kind = TokenInfo::TK_Error;
300 /// Consume all leading whitespace from \c Code.
301 void consumeWhitespace() {
302 // Don't trim newlines.
303 Code = Code.ltrim(" \t\v\f\r");
306 SourceLocation currentLocation() {
307 SourceLocation Location;
308 Location.Line = Line;
309 Location.Column = Code.data() - StartOfLine.data() + 1;
310 return Location;
313 StringRef &Code;
314 StringRef StartOfLine;
315 unsigned Line = 1;
316 Diagnostics *Error;
317 TokenInfo NextToken;
318 const char *CodeCompletionLocation = nullptr;
321 Parser::Sema::~Sema() = default;
323 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
324 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
325 return {};
328 std::vector<MatcherCompletion>
329 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
330 return {};
333 struct Parser::ScopedContextEntry {
334 Parser *P;
336 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
337 P->ContextStack.push_back(std::make_pair(C, 0u));
340 ~ScopedContextEntry() {
341 P->ContextStack.pop_back();
344 void nextArg() {
345 ++P->ContextStack.back().second;
349 /// Parse expressions that start with an identifier.
351 /// This function can parse named values and matchers.
352 /// In case of failure it will try to determine the user's intent to give
353 /// an appropriate error message.
354 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
355 const TokenInfo NameToken = Tokenizer->consumeNextToken();
357 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
358 // Parse as a named value.
359 if (const VariantValue NamedValue =
360 NamedValues ? NamedValues->lookup(NameToken.Text)
361 : VariantValue()) {
363 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
364 *Value = NamedValue;
365 return true;
368 std::string BindID;
369 Tokenizer->consumeNextToken();
370 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
371 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
372 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
373 return false;
376 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
377 (ChainCallToken.Text != TokenInfo::ID_Bind &&
378 ChainCallToken.Text != TokenInfo::ID_With)) {
379 Error->addError(ChainCallToken.Range,
380 Error->ET_ParserMalformedChainedExpr);
381 return false;
383 if (ChainCallToken.Text == TokenInfo::ID_With) {
385 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
386 NameToken.Text, NameToken.Range);
388 Error->addError(ChainCallToken.Range,
389 Error->ET_RegistryMatcherNoWithSupport);
390 return false;
392 if (!parseBindID(BindID))
393 return false;
395 assert(NamedValue.isMatcher());
396 std::optional<DynTypedMatcher> Result =
397 NamedValue.getMatcher().getSingleMatcher();
398 if (Result) {
399 std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
400 if (Bound) {
401 *Value = VariantMatcher::SingleMatcher(*Bound);
402 return true;
405 return false;
408 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
409 Error->addError(Tokenizer->peekNextToken().Range,
410 Error->ET_ParserNoOpenParen)
411 << "NewLine";
412 return false;
415 // If the syntax is correct and the name is not a matcher either, report
416 // unknown named value.
417 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
418 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
419 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
420 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
421 !S->lookupMatcherCtor(NameToken.Text)) {
422 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
423 << NameToken.Text;
424 return false;
426 // Otherwise, fallback to the matcher parser.
429 Tokenizer->SkipNewlines();
431 assert(NameToken.Kind == TokenInfo::TK_Ident);
432 TokenInfo OpenToken = Tokenizer->consumeNextToken();
433 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
434 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
435 << OpenToken.Text;
436 return false;
439 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
441 // Parse as a matcher expression.
442 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
445 bool Parser::parseBindID(std::string &BindID) {
446 // Parse the parenthesized argument to .bind("foo")
447 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
448 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
451 // TODO: We could use different error codes for each/some to be more
452 // explicit about the syntax error.
453 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
454 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
455 return false;
457 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
458 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
459 return false;
461 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
462 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
463 return false;
465 BindID = IDToken.Value.getString();
466 return true;
469 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
470 const TokenInfo &OpenToken,
471 VariantValue *Value) {
472 std::vector<ParserValue> Args;
473 TokenInfo EndToken;
475 Tokenizer->SkipNewlines();
478 ScopedContextEntry SCE(this, Ctor);
480 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
481 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
482 // End of args.
483 EndToken = Tokenizer->consumeNextToken();
484 break;
486 if (!Args.empty()) {
487 // We must find a , token to continue.
488 TokenInfo CommaToken = Tokenizer->consumeNextToken();
489 if (CommaToken.Kind != TokenInfo::TK_Comma) {
490 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
491 << CommaToken.Text;
492 return false;
496 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
497 NameToken.Text, NameToken.Range,
498 Args.size() + 1);
499 ParserValue ArgValue;
500 Tokenizer->SkipNewlines();
502 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
503 addExpressionCompletions();
504 return false;
507 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
509 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
510 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
511 << NameToken.Text;
512 return false;
515 ArgValue.Text = NodeMatcherToken.Text;
516 ArgValue.Range = NodeMatcherToken.Range;
518 std::optional<MatcherCtor> MappedMatcher =
519 S->lookupMatcherCtor(ArgValue.Text);
521 if (!MappedMatcher) {
522 Error->addError(NodeMatcherToken.Range,
523 Error->ET_RegistryMatcherNotFound)
524 << NodeMatcherToken.Text;
525 return false;
528 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
530 if (NK.isNone()) {
531 Error->addError(NodeMatcherToken.Range,
532 Error->ET_RegistryNonNodeMatcher)
533 << NodeMatcherToken.Text;
534 return false;
537 ArgValue.Value = NK;
539 Tokenizer->SkipNewlines();
540 Args.push_back(ArgValue);
542 SCE.nextArg();
546 if (EndToken.Kind == TokenInfo::TK_Eof) {
547 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
548 return false;
551 internal::MatcherDescriptorPtr BuiltCtor =
552 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
554 if (!BuiltCtor.get()) {
555 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
556 << NameToken.Text;
557 return false;
560 std::string BindID;
561 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
562 Tokenizer->consumeNextToken();
563 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
564 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
565 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
566 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
567 return false;
569 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
570 (ChainCallToken.Text != TokenInfo::ID_Bind &&
571 ChainCallToken.Text != TokenInfo::ID_With)) {
572 Error->addError(ChainCallToken.Range,
573 Error->ET_ParserMalformedChainedExpr);
574 return false;
576 if (ChainCallToken.Text == TokenInfo::ID_Bind) {
577 if (!parseBindID(BindID))
578 return false;
579 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
580 NameToken.Text, NameToken.Range);
581 SourceRange MatcherRange = NameToken.Range;
582 MatcherRange.End = ChainCallToken.Range.End;
583 VariantMatcher Result = S->actOnMatcherExpression(
584 BuiltCtor.get(), MatcherRange, BindID, {}, Error);
585 if (Result.isNull())
586 return false;
588 *Value = Result;
589 return true;
590 } else if (ChainCallToken.Text == TokenInfo::ID_With) {
591 Tokenizer->SkipNewlines();
593 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
594 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
595 ? StringRef("EOF")
596 : Tokenizer->peekNextToken().Text;
597 Error->addError(Tokenizer->peekNextToken().Range,
598 Error->ET_ParserNoOpenParen)
599 << ErrTxt;
600 return false;
603 TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
605 return parseMatcherExpressionImpl(NameToken, WithOpenToken,
606 BuiltCtor.get(), Value);
610 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
611 NameToken.Text, NameToken.Range);
612 SourceRange MatcherRange = NameToken.Range;
613 MatcherRange.End = EndToken.Range.End;
614 VariantMatcher Result = S->actOnMatcherExpression(
615 BuiltCtor.get(), MatcherRange, BindID, {}, Error);
616 if (Result.isNull())
617 return false;
619 *Value = Result;
620 return true;
623 /// Parse and validate a matcher expression.
624 /// \return \c true on success, in which case \c Value has the matcher parsed.
625 /// If the input is malformed, or some argument has an error, it
626 /// returns \c false.
627 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
628 const TokenInfo &OpenToken,
629 std::optional<MatcherCtor> Ctor,
630 VariantValue *Value) {
631 if (!Ctor) {
632 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
633 << NameToken.Text;
634 // Do not return here. We need to continue to give completion suggestions.
637 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
638 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
640 std::vector<ParserValue> Args;
641 TokenInfo EndToken;
643 Tokenizer->SkipNewlines();
646 ScopedContextEntry SCE(this, Ctor.value_or(nullptr));
648 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
649 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
650 // End of args.
651 EndToken = Tokenizer->consumeNextToken();
652 break;
654 if (!Args.empty()) {
655 // We must find a , token to continue.
656 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
657 if (CommaToken.Kind != TokenInfo::TK_Comma) {
658 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
659 << CommaToken.Text;
660 return false;
664 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
665 NameToken.Text, NameToken.Range,
666 Args.size() + 1);
667 ParserValue ArgValue;
668 Tokenizer->SkipNewlines();
669 ArgValue.Text = Tokenizer->peekNextToken().Text;
670 ArgValue.Range = Tokenizer->peekNextToken().Range;
671 if (!parseExpressionImpl(&ArgValue.Value)) {
672 return false;
675 Tokenizer->SkipNewlines();
676 Args.push_back(ArgValue);
677 SCE.nextArg();
681 if (EndToken.Kind == TokenInfo::TK_Eof) {
682 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
683 return false;
686 std::string BindID;
687 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
688 Tokenizer->consumeNextToken();
689 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
690 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
691 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
692 return false;
695 if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
696 Error->addError(ChainCallToken.Range,
697 Error->ET_ParserMalformedChainedExpr);
698 return false;
700 if (ChainCallToken.Text == TokenInfo::ID_With) {
702 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
703 NameToken.Text, NameToken.Range);
705 Error->addError(ChainCallToken.Range,
706 Error->ET_RegistryMatcherNoWithSupport);
707 return false;
709 if (ChainCallToken.Text != TokenInfo::ID_Bind) {
710 Error->addError(ChainCallToken.Range,
711 Error->ET_ParserMalformedChainedExpr);
712 return false;
714 if (!parseBindID(BindID))
715 return false;
718 if (!Ctor)
719 return false;
721 // Merge the start and end infos.
722 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
723 NameToken.Text, NameToken.Range);
724 SourceRange MatcherRange = NameToken.Range;
725 MatcherRange.End = EndToken.Range.End;
726 VariantMatcher Result = S->actOnMatcherExpression(
727 *Ctor, MatcherRange, BindID, Args, Error);
728 if (Result.isNull()) return false;
730 *Value = Result;
731 return true;
734 // If the prefix of this completion matches the completion token, add it to
735 // Completions minus the prefix.
736 void Parser::addCompletion(const TokenInfo &CompToken,
737 const MatcherCompletion& Completion) {
738 if (StringRef(Completion.TypedText).starts_with(CompToken.Text) &&
739 Completion.Specificity > 0) {
740 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
741 Completion.MatcherDecl, Completion.Specificity);
745 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
746 ArrayRef<ArgKind> AcceptedTypes) {
747 if (!NamedValues) return std::vector<MatcherCompletion>();
748 std::vector<MatcherCompletion> Result;
749 for (const auto &Entry : *NamedValues) {
750 unsigned Specificity;
751 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
752 std::string Decl =
753 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
754 Result.emplace_back(Entry.getKey(), Decl, Specificity);
757 return Result;
760 void Parser::addExpressionCompletions() {
761 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
762 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
764 // We cannot complete code if there is an invalid element on the context
765 // stack.
766 for (ContextStackTy::iterator I = ContextStack.begin(),
767 E = ContextStack.end();
768 I != E; ++I) {
769 if (!I->first)
770 return;
773 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
774 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
775 addCompletion(CompToken, Completion);
778 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
779 addCompletion(CompToken, Completion);
783 /// Parse an <Expression>
784 bool Parser::parseExpressionImpl(VariantValue *Value) {
785 switch (Tokenizer->nextTokenKind()) {
786 case TokenInfo::TK_Literal:
787 *Value = Tokenizer->consumeNextToken().Value;
788 return true;
790 case TokenInfo::TK_Ident:
791 return parseIdentifierPrefixImpl(Value);
793 case TokenInfo::TK_CodeCompletion:
794 addExpressionCompletions();
795 return false;
797 case TokenInfo::TK_Eof:
798 Error->addError(Tokenizer->consumeNextToken().Range,
799 Error->ET_ParserNoCode);
800 return false;
802 case TokenInfo::TK_Error:
803 // This error was already reported by the tokenizer.
804 return false;
805 case TokenInfo::TK_NewLine:
806 case TokenInfo::TK_OpenParen:
807 case TokenInfo::TK_CloseParen:
808 case TokenInfo::TK_Comma:
809 case TokenInfo::TK_Period:
810 case TokenInfo::TK_InvalidChar:
811 const TokenInfo Token = Tokenizer->consumeNextToken();
812 Error->addError(Token.Range, Error->ET_ParserInvalidToken)
813 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
814 return false;
817 llvm_unreachable("Unknown token kind.");
820 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
822 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
823 const NamedValueMap *NamedValues, Diagnostics *Error)
824 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
825 NamedValues(NamedValues), Error(Error) {}
827 Parser::RegistrySema::~RegistrySema() = default;
829 std::optional<MatcherCtor>
830 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
831 return Registry::lookupMatcherCtor(MatcherName);
834 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
835 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
836 ArrayRef<ParserValue> Args, Diagnostics *Error) {
837 if (BindID.empty()) {
838 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
839 } else {
840 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
841 Error);
845 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
846 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
847 return Registry::getAcceptedCompletionTypes(Context);
850 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
851 ArrayRef<ArgKind> AcceptedTypes) {
852 return Registry::getMatcherCompletions(AcceptedTypes);
855 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
856 return Registry::isBuilderMatcher(Ctor);
859 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
860 return Registry::nodeMatcherType(Ctor);
863 internal::MatcherDescriptorPtr
864 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
865 ArrayRef<ParserValue> Args,
866 Diagnostics *Error) const {
867 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
870 bool Parser::parseExpression(StringRef &Code, Sema *S,
871 const NamedValueMap *NamedValues,
872 VariantValue *Value, Diagnostics *Error) {
873 CodeTokenizer Tokenizer(Code, Error);
874 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
875 return false;
876 auto NT = Tokenizer.peekNextToken();
877 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
878 Error->addError(Tokenizer.peekNextToken().Range,
879 Error->ET_ParserTrailingCode);
880 return false;
882 return true;
885 std::vector<MatcherCompletion>
886 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
887 const NamedValueMap *NamedValues) {
888 Diagnostics Error;
889 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
890 Parser P(&Tokenizer, S, NamedValues, &Error);
891 VariantValue Dummy;
892 P.parseExpressionImpl(&Dummy);
894 // Sort by specificity, then by name.
895 llvm::sort(P.Completions,
896 [](const MatcherCompletion &A, const MatcherCompletion &B) {
897 if (A.Specificity != B.Specificity)
898 return A.Specificity > B.Specificity;
899 return A.TypedText < B.TypedText;
902 return P.Completions;
905 std::optional<DynTypedMatcher>
906 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
907 const NamedValueMap *NamedValues,
908 Diagnostics *Error) {
909 VariantValue Value;
910 if (!parseExpression(Code, S, NamedValues, &Value, Error))
911 return std::nullopt;
912 if (!Value.isMatcher()) {
913 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
914 return std::nullopt;
916 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
917 if (!Result) {
918 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
919 << Value.getTypeAsString();
921 return Result;
924 } // namespace dynamic
925 } // namespace ast_matchers
926 } // namespace clang