1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines the \c FormatTokenSource interface, which provides a token
11 /// stream as well as the ability to manipulate the token stream.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
18 #include "UnwrappedLineParser.h"
20 #define DEBUG_TYPE "format-token-source"
25 // Navigate a token stream.
27 // Enables traversal of a token stream, resetting the position in a token
28 // stream, as well as inserting new tokens.
29 class FormatTokenSource
{
31 virtual ~FormatTokenSource() {}
33 // Returns the next token in the token stream.
34 virtual FormatToken
*getNextToken() = 0;
36 // Returns the token preceding the token returned by the last call to
37 // getNextToken() in the token stream, or nullptr if no such token exists.
39 // Must not be called directly at the position directly after insertTokens()
41 virtual FormatToken
*getPreviousToken() = 0;
43 // Returns the token that would be returned by the next call to
45 virtual FormatToken
*peekNextToken(bool SkipComment
= false) = 0;
47 // Returns whether we are at the end of the file.
48 // This can be different from whether getNextToken() returned an eof token
49 // when the FormatTokenSource is a view on a part of the token stream.
50 virtual bool isEOF() = 0;
52 // Gets the current position in the token stream, to be used by setPosition().
54 // Note that the value of the position is not meaningful, and specifically
55 // should not be used to get relative token positions.
56 virtual unsigned getPosition() = 0;
58 // Resets the token stream to the state it was in when getPosition() returned
59 // Position, and return the token at that position in the stream.
60 virtual FormatToken
*setPosition(unsigned Position
) = 0;
62 // Insert the given tokens before the current position.
63 // Returns the first token in \c Tokens.
64 // The next returned token will be the second token in \c Tokens.
65 // Requires the last token in Tokens to be EOF; once the EOF token is reached,
66 // the next token will be the last token returned by getNextToken();
68 // For example, given the token sequence 'a1 a2':
69 // getNextToken() -> a1
70 // insertTokens('b1 b2') -> b1
71 // getNextToken() -> b2
72 // getNextToken() -> a1
73 // getNextToken() -> a2
74 virtual FormatToken
*insertTokens(ArrayRef
<FormatToken
*> Tokens
) = 0;
76 [[nodiscard
]] FormatToken
*getNextNonComment() {
81 } while (Tok
->is(tok::comment
));
86 class IndexedTokenSource
: public FormatTokenSource
{
88 IndexedTokenSource(ArrayRef
<FormatToken
*> Tokens
)
89 : Tokens(Tokens
), Position(-1) {}
91 FormatToken
*getNextToken() override
{
92 if (Position
>= 0 && isEOF()) {
94 llvm::dbgs() << "Next ";
97 return Tokens
[Position
];
99 Position
= successor(Position
);
101 llvm::dbgs() << "Next ";
104 return Tokens
[Position
];
107 FormatToken
*getPreviousToken() override
{
108 assert(Position
<= 0 || Tokens
[Position
- 1]->isNot(tok::eof
));
109 return Position
> 0 ? Tokens
[Position
- 1] : nullptr;
112 FormatToken
*peekNextToken(bool SkipComment
= false) override
{
114 return Tokens
[Position
];
115 int Next
= successor(Position
);
117 while (Tokens
[Next
]->is(tok::comment
))
118 Next
= successor(Next
);
120 llvm::dbgs() << "Peeking ";
126 bool isEOF() override
{
127 return Position
== -1 ? false : Tokens
[Position
]->is(tok::eof
);
130 unsigned getPosition() override
{
131 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position
<< "\n");
132 assert(Position
>= 0);
136 FormatToken
*setPosition(unsigned P
) override
{
137 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P
<< "\n");
139 return Tokens
[Position
];
142 FormatToken
*insertTokens(ArrayRef
<FormatToken
*> New
) override
{
143 assert(Position
!= -1);
144 assert((*New
.rbegin())->Tok
.is(tok::eof
));
145 int Next
= Tokens
.size();
146 Tokens
.append(New
.begin(), New
.end());
148 llvm::dbgs() << "Inserting:\n";
149 for (int I
= Next
, E
= Tokens
.size(); I
!= E
; ++I
)
151 llvm::dbgs() << " Jump from: " << (Tokens
.size() - 1) << " -> "
154 Jumps
[Tokens
.size() - 1] = Position
;
157 llvm::dbgs() << "At inserted token ";
160 return Tokens
[Position
];
163 void reset() { Position
= -1; }
166 int successor(int Current
) const {
167 int Next
= Current
+ 1;
168 auto it
= Jumps
.find(Next
);
169 if (it
!= Jumps
.end()) {
171 assert(!Jumps
.contains(Next
));
176 void dbgToken(int Position
, StringRef Indent
= "") {
177 FormatToken
*Tok
= Tokens
[Position
];
178 llvm::dbgs() << Indent
<< "[" << Position
179 << "] Token: " << Tok
->Tok
.getName() << " / " << Tok
->TokenText
180 << ", Macro: " << !!Tok
->MacroCtx
<< "\n";
183 SmallVector
<FormatToken
*> Tokens
;
186 // Maps from position a to position b, so that when we reach a, the token
187 // stream continues at position b instead.
188 llvm::DenseMap
<int, int> Jumps
;
191 class ScopedMacroState
: public FormatTokenSource
{
193 ScopedMacroState(UnwrappedLine
&Line
, FormatTokenSource
*&TokenSource
,
194 FormatToken
*&ResetToken
)
195 : Line(Line
), TokenSource(TokenSource
), ResetToken(ResetToken
),
196 PreviousLineLevel(Line
.Level
), PreviousTokenSource(TokenSource
),
197 Token(nullptr), PreviousToken(nullptr) {
198 FakeEOF
.Tok
.startToken();
199 FakeEOF
.Tok
.setKind(tok::eof
);
202 Line
.InPPDirective
= true;
203 // InMacroBody gets set after the `#define x` part.
206 ~ScopedMacroState() override
{
207 TokenSource
= PreviousTokenSource
;
209 Line
.InPPDirective
= false;
210 Line
.InMacroBody
= false;
211 Line
.Level
= PreviousLineLevel
;
214 FormatToken
*getNextToken() override
{
215 // The \c UnwrappedLineParser guards against this by never calling
216 // \c getNextToken() after it has encountered the first eof token.
218 PreviousToken
= Token
;
219 Token
= PreviousTokenSource
->getNextToken();
225 FormatToken
*getPreviousToken() override
{
226 return PreviousTokenSource
->getPreviousToken();
229 FormatToken
*peekNextToken(bool SkipComment
) override
{
232 return PreviousTokenSource
->peekNextToken(SkipComment
);
235 bool isEOF() override
{ return PreviousTokenSource
->isEOF(); }
237 unsigned getPosition() override
{ return PreviousTokenSource
->getPosition(); }
239 FormatToken
*setPosition(unsigned Position
) override
{
240 PreviousToken
= nullptr;
241 Token
= PreviousTokenSource
->setPosition(Position
);
245 FormatToken
*insertTokens(ArrayRef
<FormatToken
*> Tokens
) override
{
246 llvm_unreachable("Cannot insert tokens while parsing a macro.");
252 return Token
&& Token
->HasUnescapedNewline
&&
253 !continuesLineComment(*Token
, PreviousToken
,
254 /*MinColumnToken=*/PreviousToken
);
259 FormatTokenSource
*&TokenSource
;
260 FormatToken
*&ResetToken
;
261 unsigned PreviousLineLevel
;
262 FormatTokenSource
*PreviousTokenSource
;
265 FormatToken
*PreviousToken
;
268 } // namespace format