1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
14 namespace clang::tidy::utils::lexer
{
16 Token
getPreviousToken(SourceLocation Location
, const SourceManager
&SM
,
17 const LangOptions
&LangOpts
, bool SkipComments
) {
19 Token
.setKind(tok::unknown
);
21 Location
= Location
.getLocWithOffset(-1);
22 if (Location
.isInvalid())
25 auto StartOfFile
= SM
.getLocForStartOfFile(SM
.getFileID(Location
));
26 while (Location
!= StartOfFile
) {
27 Location
= Lexer::GetBeginningOfToken(Location
, SM
, LangOpts
);
28 if (!Lexer::getRawToken(Location
, Token
, SM
, LangOpts
) &&
29 (!SkipComments
|| !Token
.is(tok::comment
))) {
32 Location
= Location
.getLocWithOffset(-1);
37 SourceLocation
findPreviousTokenStart(SourceLocation Start
,
38 const SourceManager
&SM
,
39 const LangOptions
&LangOpts
) {
40 if (Start
.isInvalid() || Start
.isMacroID())
41 return SourceLocation();
43 SourceLocation BeforeStart
= Start
.getLocWithOffset(-1);
44 if (BeforeStart
.isInvalid() || BeforeStart
.isMacroID())
45 return SourceLocation();
47 return Lexer::GetBeginningOfToken(BeforeStart
, SM
, LangOpts
);
50 SourceLocation
findPreviousTokenKind(SourceLocation Start
,
51 const SourceManager
&SM
,
52 const LangOptions
&LangOpts
,
54 if (Start
.isInvalid() || Start
.isMacroID())
55 return SourceLocation();
58 SourceLocation L
= findPreviousTokenStart(Start
, SM
, LangOpts
);
59 if (L
.isInvalid() || L
.isMacroID())
60 return SourceLocation();
63 if (Lexer::getRawToken(L
, T
, SM
, LangOpts
, /*IgnoreWhiteSpace=*/true))
64 return SourceLocation();
67 return T
.getLocation();
73 SourceLocation
findNextTerminator(SourceLocation Start
, const SourceManager
&SM
,
74 const LangOptions
&LangOpts
) {
75 return findNextAnyTokenKind(Start
, SM
, LangOpts
, tok::comma
, tok::semi
);
79 findNextTokenIncludingComments(SourceLocation Start
, const SourceManager
&SM
,
80 const LangOptions
&LangOpts
) {
81 // `Lexer::findNextToken` will ignore comment
82 if (Start
.isMacroID())
84 Start
= Lexer::getLocForEndOfToken(Start
, 0, SM
, LangOpts
);
85 // Break down the source location.
86 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Start
);
87 bool InvalidTemp
= false;
88 StringRef File
= SM
.getBufferData(LocInfo
.first
, &InvalidTemp
);
91 // Lex from the start of the given location.
92 Lexer
L(SM
.getLocForStartOfFile(LocInfo
.first
), LangOpts
, File
.begin(),
93 File
.data() + LocInfo
.second
, File
.end());
94 L
.SetCommentRetentionState(true);
97 L
.LexFromRawLexer(Tok
);
102 findNextTokenSkippingComments(SourceLocation Start
, const SourceManager
&SM
,
103 const LangOptions
&LangOpts
) {
104 while (Start
.isValid()) {
105 std::optional
<Token
> CurrentToken
=
106 Lexer::findNextToken(Start
, SM
, LangOpts
);
107 if (!CurrentToken
|| !CurrentToken
->is(tok::comment
))
110 Start
= CurrentToken
->getLocation();
116 bool rangeContainsExpansionsOrDirectives(SourceRange Range
,
117 const SourceManager
&SM
,
118 const LangOptions
&LangOpts
) {
119 assert(Range
.isValid() && "Invalid Range for relexing provided");
120 SourceLocation Loc
= Range
.getBegin();
122 while (Loc
<= Range
.getEnd()) {
126 std::optional
<Token
> Tok
= Lexer::findNextToken(Loc
, SM
, LangOpts
);
131 if (Tok
->is(tok::hash
))
134 Loc
= Tok
->getLocation();
140 std::optional
<Token
> getQualifyingToken(tok::TokenKind TK
,
141 CharSourceRange Range
,
142 const ASTContext
&Context
,
143 const SourceManager
&SM
) {
144 assert((TK
== tok::kw_const
|| TK
== tok::kw_volatile
||
145 TK
== tok::kw_restrict
) &&
146 "TK is not a qualifier keyword");
147 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Range
.getBegin());
148 StringRef File
= SM
.getBufferData(LocInfo
.first
);
149 Lexer
RawLexer(SM
.getLocForStartOfFile(LocInfo
.first
), Context
.getLangOpts(),
150 File
.begin(), File
.data() + LocInfo
.second
, File
.end());
151 std::optional
<Token
> LastMatchBeforeTemplate
;
152 std::optional
<Token
> LastMatchAfterTemplate
;
153 bool SawTemplate
= false;
155 while (!RawLexer
.LexFromRawLexer(Tok
) &&
156 Range
.getEnd() != Tok
.getLocation() &&
157 !SM
.isBeforeInTranslationUnit(Range
.getEnd(), Tok
.getLocation())) {
158 if (Tok
.is(tok::raw_identifier
)) {
159 IdentifierInfo
&Info
= Context
.Idents
.get(
160 StringRef(SM
.getCharacterData(Tok
.getLocation()), Tok
.getLength()));
161 Tok
.setIdentifierInfo(&Info
);
162 Tok
.setKind(Info
.getTokenID());
164 if (Tok
.is(tok::less
))
166 else if (Tok
.isOneOf(tok::greater
, tok::greatergreater
))
167 LastMatchAfterTemplate
= std::nullopt
;
168 else if (Tok
.is(TK
)) {
170 LastMatchAfterTemplate
= Tok
;
172 LastMatchBeforeTemplate
= Tok
;
175 return LastMatchAfterTemplate
!= std::nullopt
? LastMatchAfterTemplate
176 : LastMatchBeforeTemplate
;
179 static bool breakAndReturnEnd(const Stmt
&S
) {
180 return isa
<CompoundStmt
, DeclStmt
, NullStmt
>(S
);
183 static bool breakAndReturnEndPlus1Token(const Stmt
&S
) {
184 return isa
<Expr
, DoStmt
, ReturnStmt
, BreakStmt
, ContinueStmt
, GotoStmt
,
188 // Given a Stmt which does not include it's semicolon this method returns the
189 // SourceLocation of the semicolon.
190 static SourceLocation
getSemicolonAfterStmtEndLoc(const SourceLocation
&EndLoc
,
191 const SourceManager
&SM
,
192 const LangOptions
&LangOpts
) {
194 if (EndLoc
.isMacroID()) {
195 // Assuming EndLoc points to a function call foo within macro F.
196 // This method is supposed to return location of the semicolon within
197 // those macro arguments:
199 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
200 const SourceLocation SpellingLoc
= SM
.getSpellingLoc(EndLoc
);
201 std::optional
<Token
> NextTok
=
202 findNextTokenSkippingComments(SpellingLoc
, SM
, LangOpts
);
204 // Was the next token found successfully?
205 // All macro issues are simply resolved by ensuring it's a semicolon.
206 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
)) {
207 // Ideally this would return `F` with spelling location `;` (NextTok)
208 // following the example above. For now simply return NextTok location.
209 return NextTok
->getLocation();
212 // Fallthrough to 'normal handling'.
214 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
217 std::optional
<Token
> NextTok
=
218 findNextTokenSkippingComments(EndLoc
, SM
, LangOpts
);
220 // Testing for semicolon again avoids some issues with macros.
221 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
))
222 return NextTok
->getLocation();
224 return SourceLocation();
227 SourceLocation
getUnifiedEndLoc(const Stmt
&S
, const SourceManager
&SM
,
228 const LangOptions
&LangOpts
) {
230 const Stmt
*LastChild
= &S
;
231 while (!LastChild
->children().empty() && !breakAndReturnEnd(*LastChild
) &&
232 !breakAndReturnEndPlus1Token(*LastChild
)) {
233 for (const Stmt
*Child
: LastChild
->children())
237 if (!breakAndReturnEnd(*LastChild
) && breakAndReturnEndPlus1Token(*LastChild
))
238 return getSemicolonAfterStmtEndLoc(S
.getEndLoc(), SM
, LangOpts
);
240 return S
.getEndLoc();
243 SourceLocation
getLocationForNoexceptSpecifier(const FunctionDecl
*FuncDecl
,
244 const SourceManager
&SM
) {
248 const LangOptions
&LangOpts
= FuncDecl
->getLangOpts();
250 if (FuncDecl
->getNumParams() == 0) {
251 // Start at the beginning of the function declaration, and find the closing
252 // parenthesis after which we would place the noexcept specifier.
254 SourceLocation CurrentLocation
= FuncDecl
->getBeginLoc();
255 while (!Lexer::getRawToken(CurrentLocation
, CurrentToken
, SM
, LangOpts
,
257 if (CurrentToken
.is(tok::r_paren
))
258 return CurrentLocation
.getLocWithOffset(1);
260 CurrentLocation
= CurrentToken
.getEndLoc();
263 // Failed to find the closing parenthesis, so just return an invalid
268 // FunctionDecl with parameters
269 const SourceLocation NoexceptLoc
=
270 FuncDecl
->getParamDecl(FuncDecl
->getNumParams() - 1)->getEndLoc();
271 if (NoexceptLoc
.isValid())
272 return Lexer::findLocationAfterToken(
273 NoexceptLoc
, tok::r_paren
, SM
, LangOpts
,
274 /*SkipTrailingWhitespaceAndNewLine=*/true);
279 } // namespace clang::tidy::utils::lexer