1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines the \c FormatTokenSource interface, which provides a token
11 /// stream as well as the ability to manipulate the token stream.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
18 #include "FormatToken.h"
19 #include "UnwrappedLineParser.h"
20 #include "llvm/ADT/DenseMap.h"
23 #define DEBUG_TYPE "format-token-source"
28 // Navigate a token stream.
30 // Enables traversal of a token stream, resetting the position in a token
31 // stream, as well as inserting new tokens.
32 class FormatTokenSource
{
34 virtual ~FormatTokenSource() {}
36 // Returns the next token in the token stream.
37 virtual FormatToken
*getNextToken() = 0;
39 // Returns the token preceding the token returned by the last call to
40 // getNextToken() in the token stream, or nullptr if no such token exists.
42 // Must not be called directly at the position directly after insertTokens()
44 virtual FormatToken
*getPreviousToken() = 0;
46 // Returns the token that would be returned by the next call to
48 virtual FormatToken
*peekNextToken(bool SkipComment
= false) = 0;
50 // Returns whether we are at the end of the file.
51 // This can be different from whether getNextToken() returned an eof token
52 // when the FormatTokenSource is a view on a part of the token stream.
53 virtual bool isEOF() = 0;
55 // Gets the current position in the token stream, to be used by setPosition().
57 // Note that the value of the position is not meaningful, and specifically
58 // should not be used to get relative token positions.
59 virtual unsigned getPosition() = 0;
61 // Resets the token stream to the state it was in when getPosition() returned
62 // Position, and return the token at that position in the stream.
63 virtual FormatToken
*setPosition(unsigned Position
) = 0;
65 // Insert the given tokens before the current position.
66 // Returns the first token in \c Tokens.
67 // The next returned token will be the second token in \c Tokens.
68 // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69 // the next token will be the last token returned by getNextToken();
71 // For example, given the token sequence 'a1 a2':
72 // getNextToken() -> a1
73 // insertTokens('b1 b2') -> b1
74 // getNextToken() -> b2
75 // getNextToken() -> a1
76 // getNextToken() -> a2
77 virtual FormatToken
*insertTokens(ArrayRef
<FormatToken
*> Tokens
) = 0;
80 class IndexedTokenSource
: public FormatTokenSource
{
82 IndexedTokenSource(ArrayRef
<FormatToken
*> Tokens
)
83 : Tokens(Tokens
), Position(-1) {}
85 FormatToken
*getNextToken() override
{
86 if (Position
>= 0 && isEOF()) {
88 llvm::dbgs() << "Next ";
91 return Tokens
[Position
];
93 Position
= successor(Position
);
95 llvm::dbgs() << "Next ";
98 return Tokens
[Position
];
101 FormatToken
*getPreviousToken() override
{
102 assert(Position
<= 0 || Tokens
[Position
- 1]->isNot(tok::eof
));
103 return Position
> 0 ? Tokens
[Position
- 1] : nullptr;
106 FormatToken
*peekNextToken(bool SkipComment
= false) override
{
108 return Tokens
[Position
];
109 int Next
= successor(Position
);
111 while (Tokens
[Next
]->is(tok::comment
))
112 Next
= successor(Next
);
114 llvm::dbgs() << "Peeking ";
120 bool isEOF() override
{
121 return Position
== -1 ? false : Tokens
[Position
]->is(tok::eof
);
124 unsigned getPosition() override
{
125 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position
<< "\n");
126 assert(Position
>= 0);
130 FormatToken
*setPosition(unsigned P
) override
{
131 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P
<< "\n");
133 return Tokens
[Position
];
136 FormatToken
*insertTokens(ArrayRef
<FormatToken
*> New
) override
{
137 assert(Position
!= -1);
138 assert((*New
.rbegin())->Tok
.is(tok::eof
));
139 int Next
= Tokens
.size();
140 Tokens
.append(New
.begin(), New
.end());
142 llvm::dbgs() << "Inserting:\n";
143 for (int I
= Next
, E
= Tokens
.size(); I
!= E
; ++I
)
145 llvm::dbgs() << " Jump from: " << (Tokens
.size() - 1) << " -> "
148 Jumps
[Tokens
.size() - 1] = Position
;
151 llvm::dbgs() << "At inserted token ";
154 return Tokens
[Position
];
157 void reset() { Position
= -1; }
160 int successor(int Current
) const {
161 int Next
= Current
+ 1;
162 auto it
= Jumps
.find(Next
);
163 if (it
!= Jumps
.end()) {
165 assert(!Jumps
.contains(Next
));
170 void dbgToken(int Position
, llvm::StringRef Indent
= "") {
171 FormatToken
*Tok
= Tokens
[Position
];
172 llvm::dbgs() << Indent
<< "[" << Position
173 << "] Token: " << Tok
->Tok
.getName() << " / " << Tok
->TokenText
174 << ", Macro: " << !!Tok
->MacroCtx
<< "\n";
177 SmallVector
<FormatToken
*> Tokens
;
180 // Maps from position a to position b, so that when we reach a, the token
181 // stream continues at position b instead.
182 llvm::DenseMap
<int, int> Jumps
;
185 class ScopedMacroState
: public FormatTokenSource
{
187 ScopedMacroState(UnwrappedLine
&Line
, FormatTokenSource
*&TokenSource
,
188 FormatToken
*&ResetToken
)
189 : Line(Line
), TokenSource(TokenSource
), ResetToken(ResetToken
),
190 PreviousLineLevel(Line
.Level
), PreviousTokenSource(TokenSource
),
191 Token(nullptr), PreviousToken(nullptr) {
192 FakeEOF
.Tok
.startToken();
193 FakeEOF
.Tok
.setKind(tok::eof
);
196 Line
.InPPDirective
= true;
197 // InMacroBody gets set after the `#define x` part.
200 ~ScopedMacroState() override
{
201 TokenSource
= PreviousTokenSource
;
203 Line
.InPPDirective
= false;
204 Line
.InMacroBody
= false;
205 Line
.Level
= PreviousLineLevel
;
208 FormatToken
*getNextToken() override
{
209 // The \c UnwrappedLineParser guards against this by never calling
210 // \c getNextToken() after it has encountered the first eof token.
212 PreviousToken
= Token
;
213 Token
= PreviousTokenSource
->getNextToken();
219 FormatToken
*getPreviousToken() override
{
220 return PreviousTokenSource
->getPreviousToken();
223 FormatToken
*peekNextToken(bool SkipComment
) override
{
226 return PreviousTokenSource
->peekNextToken(SkipComment
);
229 bool isEOF() override
{ return PreviousTokenSource
->isEOF(); }
231 unsigned getPosition() override
{ return PreviousTokenSource
->getPosition(); }
233 FormatToken
*setPosition(unsigned Position
) override
{
234 PreviousToken
= nullptr;
235 Token
= PreviousTokenSource
->setPosition(Position
);
239 FormatToken
*insertTokens(ArrayRef
<FormatToken
*> Tokens
) override
{
240 llvm_unreachable("Cannot insert tokens while parsing a macro.");
246 return Token
&& Token
->HasUnescapedNewline
&&
247 !continuesLineComment(*Token
, PreviousToken
,
248 /*MinColumnToken=*/PreviousToken
);
253 FormatTokenSource
*&TokenSource
;
254 FormatToken
*&ResetToken
;
255 unsigned PreviousLineLevel
;
256 FormatTokenSource
*PreviousTokenSource
;
259 FormatToken
*PreviousToken
;
262 } // namespace format