Reapply "[lldb][dwarf] Compute fully qualified names on simplified template names...
[llvm-project.git] / clang-tools-extra / clang-tidy / utils / LexerUtils.cpp
blob92c3e0ed7894e1eaef71911cb0a37dc00923ff66
1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 #include <optional>
13 #include <utility>
15 namespace clang::tidy::utils::lexer {
17 std::pair<Token, SourceLocation>
18 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 Token Token;
21 Token.setKind(tok::unknown);
23 Location = Location.getLocWithOffset(-1);
24 if (Location.isInvalid())
25 return {Token, Location};
27 const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28 while (Location != StartOfFile) {
29 Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30 if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31 (!SkipComments || !Token.is(tok::comment))) {
32 break;
34 if (Location == StartOfFile)
35 return {Token, Location};
36 Location = Location.getLocWithOffset(-1);
38 return {Token, Location};
41 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
42 const LangOptions &LangOpts, bool SkipComments) {
43 auto [Token, Start] =
44 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
45 return Token;
48 SourceLocation findPreviousTokenStart(SourceLocation Start,
49 const SourceManager &SM,
50 const LangOptions &LangOpts) {
51 if (Start.isInvalid() || Start.isMacroID())
52 return {};
54 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
55 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
56 return {};
58 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
61 SourceLocation findPreviousTokenKind(SourceLocation Start,
62 const SourceManager &SM,
63 const LangOptions &LangOpts,
64 tok::TokenKind TK) {
65 if (Start.isInvalid() || Start.isMacroID())
66 return {};
68 while (true) {
69 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
70 if (L.isInvalid() || L.isMacroID())
71 return {};
73 Token T;
74 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
75 return {};
77 if (T.is(TK))
78 return T.getLocation();
80 Start = L;
84 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
85 const LangOptions &LangOpts) {
86 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
89 std::optional<Token>
90 findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
91 const LangOptions &LangOpts) {
92 // `Lexer::findNextToken` will ignore comment
93 if (Start.isMacroID())
94 return std::nullopt;
95 Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
96 // Break down the source location.
97 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
98 bool InvalidTemp = false;
99 StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
100 if (InvalidTemp)
101 return std::nullopt;
102 // Lex from the start of the given location.
103 Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
104 File.data() + LocInfo.second, File.end());
105 L.SetCommentRetentionState(true);
106 // Find the token.
107 Token Tok;
108 L.LexFromRawLexer(Tok);
109 return Tok;
112 std::optional<Token>
113 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
114 const LangOptions &LangOpts) {
115 while (Start.isValid()) {
116 std::optional<Token> CurrentToken =
117 Lexer::findNextToken(Start, SM, LangOpts);
118 if (!CurrentToken || !CurrentToken->is(tok::comment))
119 return CurrentToken;
121 Start = CurrentToken->getLocation();
124 return std::nullopt;
127 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
128 const SourceManager &SM,
129 const LangOptions &LangOpts) {
130 assert(Range.isValid() && "Invalid Range for relexing provided");
131 SourceLocation Loc = Range.getBegin();
133 while (Loc <= Range.getEnd()) {
134 if (Loc.isMacroID())
135 return true;
137 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
139 if (!Tok)
140 return true;
142 if (Tok->is(tok::hash))
143 return true;
145 Loc = Tok->getLocation();
148 return false;
151 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
152 CharSourceRange Range,
153 const ASTContext &Context,
154 const SourceManager &SM) {
155 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
156 TK == tok::kw_restrict) &&
157 "TK is not a qualifier keyword");
158 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
159 StringRef File = SM.getBufferData(LocInfo.first);
160 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
161 File.begin(), File.data() + LocInfo.second, File.end());
162 std::optional<Token> LastMatchBeforeTemplate;
163 std::optional<Token> LastMatchAfterTemplate;
164 bool SawTemplate = false;
165 Token Tok;
166 while (!RawLexer.LexFromRawLexer(Tok) &&
167 Range.getEnd() != Tok.getLocation() &&
168 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
169 if (Tok.is(tok::raw_identifier)) {
170 IdentifierInfo &Info = Context.Idents.get(
171 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
172 Tok.setIdentifierInfo(&Info);
173 Tok.setKind(Info.getTokenID());
175 if (Tok.is(tok::less))
176 SawTemplate = true;
177 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
178 LastMatchAfterTemplate = std::nullopt;
179 else if (Tok.is(TK)) {
180 if (SawTemplate)
181 LastMatchAfterTemplate = Tok;
182 else
183 LastMatchBeforeTemplate = Tok;
186 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
187 : LastMatchBeforeTemplate;
190 static bool breakAndReturnEnd(const Stmt &S) {
191 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
194 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
195 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
196 SEHLeaveStmt>(S);
199 // Given a Stmt which does not include it's semicolon this method returns the
200 // SourceLocation of the semicolon.
201 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
202 const SourceManager &SM,
203 const LangOptions &LangOpts) {
205 if (EndLoc.isMacroID()) {
206 // Assuming EndLoc points to a function call foo within macro F.
207 // This method is supposed to return location of the semicolon within
208 // those macro arguments:
209 // F ( foo() ; )
210 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
211 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
212 std::optional<Token> NextTok =
213 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
215 // Was the next token found successfully?
216 // All macro issues are simply resolved by ensuring it's a semicolon.
217 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
218 // Ideally this would return `F` with spelling location `;` (NextTok)
219 // following the example above. For now simply return NextTok location.
220 return NextTok->getLocation();
223 // Fallthrough to 'normal handling'.
224 // F ( foo() ) ;
225 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
228 std::optional<Token> NextTok =
229 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
231 // Testing for semicolon again avoids some issues with macros.
232 if (NextTok && NextTok->is(tok::TokenKind::semi))
233 return NextTok->getLocation();
235 return {};
238 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
239 const LangOptions &LangOpts) {
241 const Stmt *LastChild = &S;
242 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
243 !breakAndReturnEndPlus1Token(*LastChild)) {
244 for (const Stmt *Child : LastChild->children())
245 LastChild = Child;
248 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
249 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
251 return S.getEndLoc();
254 SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
255 const SourceManager &SM) {
256 if (!FuncDecl)
257 return {};
259 const LangOptions &LangOpts = FuncDecl->getLangOpts();
261 if (FuncDecl->getNumParams() == 0) {
262 // Start at the beginning of the function declaration, and find the closing
263 // parenthesis after which we would place the noexcept specifier.
264 Token CurrentToken;
265 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
266 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
267 true)) {
268 if (CurrentToken.is(tok::r_paren))
269 return CurrentLocation.getLocWithOffset(1);
271 CurrentLocation = CurrentToken.getEndLoc();
274 // Failed to find the closing parenthesis, so just return an invalid
275 // SourceLocation.
276 return {};
279 // FunctionDecl with parameters
280 const SourceLocation NoexceptLoc =
281 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
282 if (NoexceptLoc.isValid())
283 return Lexer::findLocationAfterToken(
284 NoexceptLoc, tok::r_paren, SM, LangOpts,
285 /*SkipTrailingWhitespaceAndNewLine=*/true);
287 return {};
290 } // namespace clang::tidy::utils::lexer