[AMDGPU][AsmParser][NFC] Translate parsed MIMG instructions to MCInsts automatically.
[llvm-project.git] / clang-tools-extra / clang-tidy / utils / LexerUtils.cpp
blob95e0255b37fd6a9db0c85b19ede679a9c5982be3
1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 #include <optional>
14 namespace clang::tidy::utils::lexer {
16 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
17 const LangOptions &LangOpts, bool SkipComments) {
18 Token Token;
19 Token.setKind(tok::unknown);
21 Location = Location.getLocWithOffset(-1);
22 if (Location.isInvalid())
23 return Token;
25 auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
26 while (Location != StartOfFile) {
27 Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
28 if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
29 (!SkipComments || !Token.is(tok::comment))) {
30 break;
32 Location = Location.getLocWithOffset(-1);
34 return Token;
37 SourceLocation findPreviousTokenStart(SourceLocation Start,
38 const SourceManager &SM,
39 const LangOptions &LangOpts) {
40 if (Start.isInvalid() || Start.isMacroID())
41 return SourceLocation();
43 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
44 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
45 return SourceLocation();
47 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50 SourceLocation findPreviousTokenKind(SourceLocation Start,
51 const SourceManager &SM,
52 const LangOptions &LangOpts,
53 tok::TokenKind TK) {
54 if (Start.isInvalid() || Start.isMacroID())
55 return SourceLocation();
57 while (true) {
58 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
59 if (L.isInvalid() || L.isMacroID())
60 return SourceLocation();
62 Token T;
63 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
64 return SourceLocation();
66 if (T.is(TK))
67 return T.getLocation();
69 Start = L;
73 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
74 const LangOptions &LangOpts) {
75 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78 std::optional<Token>
79 findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
80 const LangOptions &LangOpts) {
81 // `Lexer::findNextToken` will ignore comment
82 if (Start.isMacroID())
83 return std::nullopt;
84 Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
85 // Break down the source location.
86 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
87 bool InvalidTemp = false;
88 StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
89 if (InvalidTemp)
90 return std::nullopt;
91 // Lex from the start of the given location.
92 Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
93 File.data() + LocInfo.second, File.end());
94 L.SetCommentRetentionState(true);
95 // Find the token.
96 Token Tok;
97 L.LexFromRawLexer(Tok);
98 return Tok;
101 std::optional<Token>
102 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
103 const LangOptions &LangOpts) {
104 while (Start.isValid()) {
105 std::optional<Token> CurrentToken =
106 Lexer::findNextToken(Start, SM, LangOpts);
107 if (!CurrentToken || !CurrentToken->is(tok::comment))
108 return CurrentToken;
110 Start = CurrentToken->getLocation();
113 return std::nullopt;
116 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
117 const SourceManager &SM,
118 const LangOptions &LangOpts) {
119 assert(Range.isValid() && "Invalid Range for relexing provided");
120 SourceLocation Loc = Range.getBegin();
122 while (Loc <= Range.getEnd()) {
123 if (Loc.isMacroID())
124 return true;
126 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
128 if (!Tok)
129 return true;
131 if (Tok->is(tok::hash))
132 return true;
134 Loc = Tok->getLocation();
137 return false;
140 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
141 CharSourceRange Range,
142 const ASTContext &Context,
143 const SourceManager &SM) {
144 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
145 TK == tok::kw_restrict) &&
146 "TK is not a qualifier keyword");
147 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
148 StringRef File = SM.getBufferData(LocInfo.first);
149 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
150 File.begin(), File.data() + LocInfo.second, File.end());
151 std::optional<Token> LastMatchBeforeTemplate;
152 std::optional<Token> LastMatchAfterTemplate;
153 bool SawTemplate = false;
154 Token Tok;
155 while (!RawLexer.LexFromRawLexer(Tok) &&
156 Range.getEnd() != Tok.getLocation() &&
157 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
158 if (Tok.is(tok::raw_identifier)) {
159 IdentifierInfo &Info = Context.Idents.get(
160 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
161 Tok.setIdentifierInfo(&Info);
162 Tok.setKind(Info.getTokenID());
164 if (Tok.is(tok::less))
165 SawTemplate = true;
166 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
167 LastMatchAfterTemplate = std::nullopt;
168 else if (Tok.is(TK)) {
169 if (SawTemplate)
170 LastMatchAfterTemplate = Tok;
171 else
172 LastMatchBeforeTemplate = Tok;
175 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
176 : LastMatchBeforeTemplate;
179 static bool breakAndReturnEnd(const Stmt &S) {
180 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
183 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
184 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
185 SEHLeaveStmt>(S);
188 // Given a Stmt which does not include it's semicolon this method returns the
189 // SourceLocation of the semicolon.
190 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
191 const SourceManager &SM,
192 const LangOptions &LangOpts) {
194 if (EndLoc.isMacroID()) {
195 // Assuming EndLoc points to a function call foo within macro F.
196 // This method is supposed to return location of the semicolon within
197 // those macro arguments:
198 // F ( foo() ; )
199 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
200 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
201 std::optional<Token> NextTok =
202 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
204 // Was the next token found successfully?
205 // All macro issues are simply resolved by ensuring it's a semicolon.
206 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
207 // Ideally this would return `F` with spelling location `;` (NextTok)
208 // following the example above. For now simply return NextTok location.
209 return NextTok->getLocation();
212 // Fallthrough to 'normal handling'.
213 // F ( foo() ) ;
214 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
217 std::optional<Token> NextTok =
218 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
220 // Testing for semicolon again avoids some issues with macros.
221 if (NextTok && NextTok->is(tok::TokenKind::semi))
222 return NextTok->getLocation();
224 return SourceLocation();
227 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
228 const LangOptions &LangOpts) {
230 const Stmt *LastChild = &S;
231 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
232 !breakAndReturnEndPlus1Token(*LastChild)) {
233 for (const Stmt *Child : LastChild->children())
234 LastChild = Child;
237 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
238 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
240 return S.getEndLoc();
243 SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
244 const SourceManager &SM) {
245 if (!FuncDecl)
246 return {};
248 const LangOptions &LangOpts = FuncDecl->getLangOpts();
250 if (FuncDecl->getNumParams() == 0) {
251 // Start at the beginning of the function declaration, and find the closing
252 // parenthesis after which we would place the noexcept specifier.
253 Token CurrentToken;
254 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
255 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
256 true)) {
257 if (CurrentToken.is(tok::r_paren))
258 return CurrentLocation.getLocWithOffset(1);
260 CurrentLocation = CurrentToken.getEndLoc();
263 // Failed to find the closing parenthesis, so just return an invalid
264 // SourceLocation.
265 return {};
268 // FunctionDecl with parameters
269 const SourceLocation NoexceptLoc =
270 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
271 if (NoexceptLoc.isValid())
272 return Lexer::findLocationAfterToken(
273 NoexceptLoc, tok::r_paren, SM, LangOpts,
274 /*SkipTrailingWhitespaceAndNewLine=*/true);
276 return {};
279 } // namespace clang::tidy::utils::lexer