1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
15 namespace clang::tidy::utils::lexer
{
17 std::pair
<Token
, SourceLocation
>
18 getPreviousTokenAndStart(SourceLocation Location
, const SourceManager
&SM
,
19 const LangOptions
&LangOpts
, bool SkipComments
) {
21 Token
.setKind(tok::unknown
);
23 Location
= Location
.getLocWithOffset(-1);
24 if (Location
.isInvalid())
25 return {Token
, Location
};
27 auto StartOfFile
= SM
.getLocForStartOfFile(SM
.getFileID(Location
));
28 while (Location
!= StartOfFile
) {
29 Location
= Lexer::GetBeginningOfToken(Location
, SM
, LangOpts
);
30 if (!Lexer::getRawToken(Location
, Token
, SM
, LangOpts
) &&
31 (!SkipComments
|| !Token
.is(tok::comment
))) {
34 Location
= Location
.getLocWithOffset(-1);
36 return {Token
, Location
};
39 Token
getPreviousToken(SourceLocation Location
, const SourceManager
&SM
,
40 const LangOptions
&LangOpts
, bool SkipComments
) {
42 getPreviousTokenAndStart(Location
, SM
, LangOpts
, SkipComments
);
46 SourceLocation
findPreviousTokenStart(SourceLocation Start
,
47 const SourceManager
&SM
,
48 const LangOptions
&LangOpts
) {
49 if (Start
.isInvalid() || Start
.isMacroID())
52 SourceLocation BeforeStart
= Start
.getLocWithOffset(-1);
53 if (BeforeStart
.isInvalid() || BeforeStart
.isMacroID())
56 return Lexer::GetBeginningOfToken(BeforeStart
, SM
, LangOpts
);
59 SourceLocation
findPreviousTokenKind(SourceLocation Start
,
60 const SourceManager
&SM
,
61 const LangOptions
&LangOpts
,
63 if (Start
.isInvalid() || Start
.isMacroID())
67 SourceLocation L
= findPreviousTokenStart(Start
, SM
, LangOpts
);
68 if (L
.isInvalid() || L
.isMacroID())
72 if (Lexer::getRawToken(L
, T
, SM
, LangOpts
, /*IgnoreWhiteSpace=*/true))
76 return T
.getLocation();
82 SourceLocation
findNextTerminator(SourceLocation Start
, const SourceManager
&SM
,
83 const LangOptions
&LangOpts
) {
84 return findNextAnyTokenKind(Start
, SM
, LangOpts
, tok::comma
, tok::semi
);
88 findNextTokenIncludingComments(SourceLocation Start
, const SourceManager
&SM
,
89 const LangOptions
&LangOpts
) {
90 // `Lexer::findNextToken` will ignore comment
91 if (Start
.isMacroID())
93 Start
= Lexer::getLocForEndOfToken(Start
, 0, SM
, LangOpts
);
94 // Break down the source location.
95 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Start
);
96 bool InvalidTemp
= false;
97 StringRef File
= SM
.getBufferData(LocInfo
.first
, &InvalidTemp
);
100 // Lex from the start of the given location.
101 Lexer
L(SM
.getLocForStartOfFile(LocInfo
.first
), LangOpts
, File
.begin(),
102 File
.data() + LocInfo
.second
, File
.end());
103 L
.SetCommentRetentionState(true);
106 L
.LexFromRawLexer(Tok
);
111 findNextTokenSkippingComments(SourceLocation Start
, const SourceManager
&SM
,
112 const LangOptions
&LangOpts
) {
113 while (Start
.isValid()) {
114 std::optional
<Token
> CurrentToken
=
115 Lexer::findNextToken(Start
, SM
, LangOpts
);
116 if (!CurrentToken
|| !CurrentToken
->is(tok::comment
))
119 Start
= CurrentToken
->getLocation();
125 bool rangeContainsExpansionsOrDirectives(SourceRange Range
,
126 const SourceManager
&SM
,
127 const LangOptions
&LangOpts
) {
128 assert(Range
.isValid() && "Invalid Range for relexing provided");
129 SourceLocation Loc
= Range
.getBegin();
131 while (Loc
<= Range
.getEnd()) {
135 std::optional
<Token
> Tok
= Lexer::findNextToken(Loc
, SM
, LangOpts
);
140 if (Tok
->is(tok::hash
))
143 Loc
= Tok
->getLocation();
149 std::optional
<Token
> getQualifyingToken(tok::TokenKind TK
,
150 CharSourceRange Range
,
151 const ASTContext
&Context
,
152 const SourceManager
&SM
) {
153 assert((TK
== tok::kw_const
|| TK
== tok::kw_volatile
||
154 TK
== tok::kw_restrict
) &&
155 "TK is not a qualifier keyword");
156 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Range
.getBegin());
157 StringRef File
= SM
.getBufferData(LocInfo
.first
);
158 Lexer
RawLexer(SM
.getLocForStartOfFile(LocInfo
.first
), Context
.getLangOpts(),
159 File
.begin(), File
.data() + LocInfo
.second
, File
.end());
160 std::optional
<Token
> LastMatchBeforeTemplate
;
161 std::optional
<Token
> LastMatchAfterTemplate
;
162 bool SawTemplate
= false;
164 while (!RawLexer
.LexFromRawLexer(Tok
) &&
165 Range
.getEnd() != Tok
.getLocation() &&
166 !SM
.isBeforeInTranslationUnit(Range
.getEnd(), Tok
.getLocation())) {
167 if (Tok
.is(tok::raw_identifier
)) {
168 IdentifierInfo
&Info
= Context
.Idents
.get(
169 StringRef(SM
.getCharacterData(Tok
.getLocation()), Tok
.getLength()));
170 Tok
.setIdentifierInfo(&Info
);
171 Tok
.setKind(Info
.getTokenID());
173 if (Tok
.is(tok::less
))
175 else if (Tok
.isOneOf(tok::greater
, tok::greatergreater
))
176 LastMatchAfterTemplate
= std::nullopt
;
177 else if (Tok
.is(TK
)) {
179 LastMatchAfterTemplate
= Tok
;
181 LastMatchBeforeTemplate
= Tok
;
184 return LastMatchAfterTemplate
!= std::nullopt
? LastMatchAfterTemplate
185 : LastMatchBeforeTemplate
;
188 static bool breakAndReturnEnd(const Stmt
&S
) {
189 return isa
<CompoundStmt
, DeclStmt
, NullStmt
>(S
);
192 static bool breakAndReturnEndPlus1Token(const Stmt
&S
) {
193 return isa
<Expr
, DoStmt
, ReturnStmt
, BreakStmt
, ContinueStmt
, GotoStmt
,
197 // Given a Stmt which does not include it's semicolon this method returns the
198 // SourceLocation of the semicolon.
199 static SourceLocation
getSemicolonAfterStmtEndLoc(const SourceLocation
&EndLoc
,
200 const SourceManager
&SM
,
201 const LangOptions
&LangOpts
) {
203 if (EndLoc
.isMacroID()) {
204 // Assuming EndLoc points to a function call foo within macro F.
205 // This method is supposed to return location of the semicolon within
206 // those macro arguments:
208 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
209 const SourceLocation SpellingLoc
= SM
.getSpellingLoc(EndLoc
);
210 std::optional
<Token
> NextTok
=
211 findNextTokenSkippingComments(SpellingLoc
, SM
, LangOpts
);
213 // Was the next token found successfully?
214 // All macro issues are simply resolved by ensuring it's a semicolon.
215 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
)) {
216 // Ideally this would return `F` with spelling location `;` (NextTok)
217 // following the example above. For now simply return NextTok location.
218 return NextTok
->getLocation();
221 // Fallthrough to 'normal handling'.
223 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
226 std::optional
<Token
> NextTok
=
227 findNextTokenSkippingComments(EndLoc
, SM
, LangOpts
);
229 // Testing for semicolon again avoids some issues with macros.
230 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
))
231 return NextTok
->getLocation();
236 SourceLocation
getUnifiedEndLoc(const Stmt
&S
, const SourceManager
&SM
,
237 const LangOptions
&LangOpts
) {
239 const Stmt
*LastChild
= &S
;
240 while (!LastChild
->children().empty() && !breakAndReturnEnd(*LastChild
) &&
241 !breakAndReturnEndPlus1Token(*LastChild
)) {
242 for (const Stmt
*Child
: LastChild
->children())
246 if (!breakAndReturnEnd(*LastChild
) && breakAndReturnEndPlus1Token(*LastChild
))
247 return getSemicolonAfterStmtEndLoc(S
.getEndLoc(), SM
, LangOpts
);
249 return S
.getEndLoc();
252 SourceLocation
getLocationForNoexceptSpecifier(const FunctionDecl
*FuncDecl
,
253 const SourceManager
&SM
) {
257 const LangOptions
&LangOpts
= FuncDecl
->getLangOpts();
259 if (FuncDecl
->getNumParams() == 0) {
260 // Start at the beginning of the function declaration, and find the closing
261 // parenthesis after which we would place the noexcept specifier.
263 SourceLocation CurrentLocation
= FuncDecl
->getBeginLoc();
264 while (!Lexer::getRawToken(CurrentLocation
, CurrentToken
, SM
, LangOpts
,
266 if (CurrentToken
.is(tok::r_paren
))
267 return CurrentLocation
.getLocWithOffset(1);
269 CurrentLocation
= CurrentToken
.getEndLoc();
272 // Failed to find the closing parenthesis, so just return an invalid
277 // FunctionDecl with parameters
278 const SourceLocation NoexceptLoc
=
279 FuncDecl
->getParamDecl(FuncDecl
->getNumParams() - 1)->getEndLoc();
280 if (NoexceptLoc
.isValid())
281 return Lexer::findLocationAfterToken(
282 NoexceptLoc
, tok::r_paren
, SM
, LangOpts
,
283 /*SkipTrailingWhitespaceAndNewLine=*/true);
288 } // namespace clang::tidy::utils::lexer