1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
15 namespace clang::tidy::utils::lexer
{
17 std::pair
<Token
, SourceLocation
>
18 getPreviousTokenAndStart(SourceLocation Location
, const SourceManager
&SM
,
19 const LangOptions
&LangOpts
, bool SkipComments
) {
21 Token
.setKind(tok::unknown
);
23 Location
= Location
.getLocWithOffset(-1);
24 if (Location
.isInvalid())
25 return {Token
, Location
};
27 const auto StartOfFile
= SM
.getLocForStartOfFile(SM
.getFileID(Location
));
28 while (Location
!= StartOfFile
) {
29 Location
= Lexer::GetBeginningOfToken(Location
, SM
, LangOpts
);
30 if (!Lexer::getRawToken(Location
, Token
, SM
, LangOpts
) &&
31 (!SkipComments
|| !Token
.is(tok::comment
))) {
34 if (Location
== StartOfFile
)
35 return {Token
, Location
};
36 Location
= Location
.getLocWithOffset(-1);
38 return {Token
, Location
};
41 Token
getPreviousToken(SourceLocation Location
, const SourceManager
&SM
,
42 const LangOptions
&LangOpts
, bool SkipComments
) {
44 getPreviousTokenAndStart(Location
, SM
, LangOpts
, SkipComments
);
48 SourceLocation
findPreviousTokenStart(SourceLocation Start
,
49 const SourceManager
&SM
,
50 const LangOptions
&LangOpts
) {
51 if (Start
.isInvalid() || Start
.isMacroID())
54 SourceLocation BeforeStart
= Start
.getLocWithOffset(-1);
55 if (BeforeStart
.isInvalid() || BeforeStart
.isMacroID())
58 return Lexer::GetBeginningOfToken(BeforeStart
, SM
, LangOpts
);
61 SourceLocation
findPreviousTokenKind(SourceLocation Start
,
62 const SourceManager
&SM
,
63 const LangOptions
&LangOpts
,
65 if (Start
.isInvalid() || Start
.isMacroID())
69 SourceLocation L
= findPreviousTokenStart(Start
, SM
, LangOpts
);
70 if (L
.isInvalid() || L
.isMacroID())
74 if (Lexer::getRawToken(L
, T
, SM
, LangOpts
, /*IgnoreWhiteSpace=*/true))
78 return T
.getLocation();
84 SourceLocation
findNextTerminator(SourceLocation Start
, const SourceManager
&SM
,
85 const LangOptions
&LangOpts
) {
86 return findNextAnyTokenKind(Start
, SM
, LangOpts
, tok::comma
, tok::semi
);
90 findNextTokenIncludingComments(SourceLocation Start
, const SourceManager
&SM
,
91 const LangOptions
&LangOpts
) {
92 // `Lexer::findNextToken` will ignore comment
93 if (Start
.isMacroID())
95 Start
= Lexer::getLocForEndOfToken(Start
, 0, SM
, LangOpts
);
96 // Break down the source location.
97 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Start
);
98 bool InvalidTemp
= false;
99 StringRef File
= SM
.getBufferData(LocInfo
.first
, &InvalidTemp
);
102 // Lex from the start of the given location.
103 Lexer
L(SM
.getLocForStartOfFile(LocInfo
.first
), LangOpts
, File
.begin(),
104 File
.data() + LocInfo
.second
, File
.end());
105 L
.SetCommentRetentionState(true);
108 L
.LexFromRawLexer(Tok
);
113 findNextTokenSkippingComments(SourceLocation Start
, const SourceManager
&SM
,
114 const LangOptions
&LangOpts
) {
115 while (Start
.isValid()) {
116 std::optional
<Token
> CurrentToken
=
117 Lexer::findNextToken(Start
, SM
, LangOpts
);
118 if (!CurrentToken
|| !CurrentToken
->is(tok::comment
))
121 Start
= CurrentToken
->getLocation();
127 bool rangeContainsExpansionsOrDirectives(SourceRange Range
,
128 const SourceManager
&SM
,
129 const LangOptions
&LangOpts
) {
130 assert(Range
.isValid() && "Invalid Range for relexing provided");
131 SourceLocation Loc
= Range
.getBegin();
133 while (Loc
<= Range
.getEnd()) {
137 std::optional
<Token
> Tok
= Lexer::findNextToken(Loc
, SM
, LangOpts
);
142 if (Tok
->is(tok::hash
))
145 Loc
= Tok
->getLocation();
151 std::optional
<Token
> getQualifyingToken(tok::TokenKind TK
,
152 CharSourceRange Range
,
153 const ASTContext
&Context
,
154 const SourceManager
&SM
) {
155 assert((TK
== tok::kw_const
|| TK
== tok::kw_volatile
||
156 TK
== tok::kw_restrict
) &&
157 "TK is not a qualifier keyword");
158 std::pair
<FileID
, unsigned> LocInfo
= SM
.getDecomposedLoc(Range
.getBegin());
159 StringRef File
= SM
.getBufferData(LocInfo
.first
);
160 Lexer
RawLexer(SM
.getLocForStartOfFile(LocInfo
.first
), Context
.getLangOpts(),
161 File
.begin(), File
.data() + LocInfo
.second
, File
.end());
162 std::optional
<Token
> LastMatchBeforeTemplate
;
163 std::optional
<Token
> LastMatchAfterTemplate
;
164 bool SawTemplate
= false;
166 while (!RawLexer
.LexFromRawLexer(Tok
) &&
167 Range
.getEnd() != Tok
.getLocation() &&
168 !SM
.isBeforeInTranslationUnit(Range
.getEnd(), Tok
.getLocation())) {
169 if (Tok
.is(tok::raw_identifier
)) {
170 IdentifierInfo
&Info
= Context
.Idents
.get(
171 StringRef(SM
.getCharacterData(Tok
.getLocation()), Tok
.getLength()));
172 Tok
.setIdentifierInfo(&Info
);
173 Tok
.setKind(Info
.getTokenID());
175 if (Tok
.is(tok::less
))
177 else if (Tok
.isOneOf(tok::greater
, tok::greatergreater
))
178 LastMatchAfterTemplate
= std::nullopt
;
179 else if (Tok
.is(TK
)) {
181 LastMatchAfterTemplate
= Tok
;
183 LastMatchBeforeTemplate
= Tok
;
186 return LastMatchAfterTemplate
!= std::nullopt
? LastMatchAfterTemplate
187 : LastMatchBeforeTemplate
;
190 static bool breakAndReturnEnd(const Stmt
&S
) {
191 return isa
<CompoundStmt
, DeclStmt
, NullStmt
>(S
);
194 static bool breakAndReturnEndPlus1Token(const Stmt
&S
) {
195 return isa
<Expr
, DoStmt
, ReturnStmt
, BreakStmt
, ContinueStmt
, GotoStmt
,
199 // Given a Stmt which does not include it's semicolon this method returns the
200 // SourceLocation of the semicolon.
201 static SourceLocation
getSemicolonAfterStmtEndLoc(const SourceLocation
&EndLoc
,
202 const SourceManager
&SM
,
203 const LangOptions
&LangOpts
) {
205 if (EndLoc
.isMacroID()) {
206 // Assuming EndLoc points to a function call foo within macro F.
207 // This method is supposed to return location of the semicolon within
208 // those macro arguments:
210 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
211 const SourceLocation SpellingLoc
= SM
.getSpellingLoc(EndLoc
);
212 std::optional
<Token
> NextTok
=
213 findNextTokenSkippingComments(SpellingLoc
, SM
, LangOpts
);
215 // Was the next token found successfully?
216 // All macro issues are simply resolved by ensuring it's a semicolon.
217 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
)) {
218 // Ideally this would return `F` with spelling location `;` (NextTok)
219 // following the example above. For now simply return NextTok location.
220 return NextTok
->getLocation();
223 // Fallthrough to 'normal handling'.
225 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
228 std::optional
<Token
> NextTok
=
229 findNextTokenSkippingComments(EndLoc
, SM
, LangOpts
);
231 // Testing for semicolon again avoids some issues with macros.
232 if (NextTok
&& NextTok
->is(tok::TokenKind::semi
))
233 return NextTok
->getLocation();
238 SourceLocation
getUnifiedEndLoc(const Stmt
&S
, const SourceManager
&SM
,
239 const LangOptions
&LangOpts
) {
241 const Stmt
*LastChild
= &S
;
242 while (!LastChild
->children().empty() && !breakAndReturnEnd(*LastChild
) &&
243 !breakAndReturnEndPlus1Token(*LastChild
)) {
244 for (const Stmt
*Child
: LastChild
->children())
248 if (!breakAndReturnEnd(*LastChild
) && breakAndReturnEndPlus1Token(*LastChild
))
249 return getSemicolonAfterStmtEndLoc(S
.getEndLoc(), SM
, LangOpts
);
251 return S
.getEndLoc();
254 SourceLocation
getLocationForNoexceptSpecifier(const FunctionDecl
*FuncDecl
,
255 const SourceManager
&SM
) {
259 const LangOptions
&LangOpts
= FuncDecl
->getLangOpts();
261 if (FuncDecl
->getNumParams() == 0) {
262 // Start at the beginning of the function declaration, and find the closing
263 // parenthesis after which we would place the noexcept specifier.
265 SourceLocation CurrentLocation
= FuncDecl
->getBeginLoc();
266 while (!Lexer::getRawToken(CurrentLocation
, CurrentToken
, SM
, LangOpts
,
268 if (CurrentToken
.is(tok::r_paren
))
269 return CurrentLocation
.getLocWithOffset(1);
271 CurrentLocation
= CurrentToken
.getEndLoc();
274 // Failed to find the closing parenthesis, so just return an invalid
279 // FunctionDecl with parameters
280 const SourceLocation NoexceptLoc
=
281 FuncDecl
->getParamDecl(FuncDecl
->getNumParams() - 1)->getEndLoc();
282 if (NoexceptLoc
.isValid())
283 return Lexer::findLocationAfterToken(
284 NoexceptLoc
, tok::r_paren
, SM
, LangOpts
,
285 /*SkipTrailingWhitespaceAndNewLine=*/true);
290 } // namespace clang::tidy::utils::lexer