1 //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file contains the implementation of MacroCallReconstructor, which fits
12 /// an reconstructed macro call to a parsed set of UnwrappedLines.
14 //===----------------------------------------------------------------------===//
18 #include "UnwrappedLineParser.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/Support/Debug.h"
24 #define DEBUG_TYPE "format-reconstruct"
29 // Call \p Call for each token in the unwrapped line given, passing
30 // the token, its parent and whether it is the first token in the line.
32 void forEachToken(const UnwrappedLine
&Line
, const T
&Call
,
33 FormatToken
*Parent
= nullptr) {
35 for (const auto &N
: Line
.Tokens
) {
36 Call(N
.Tok
, Parent
, First
);
38 for (const auto &Child
: N
.Children
)
39 forEachToken(Child
, Call
, N
.Tok
);
43 MacroCallReconstructor::MacroCallReconstructor(
45 const llvm::DenseMap
<FormatToken
*, std::unique_ptr
<UnwrappedLine
>>
47 : Level(Level
), IdToReconstructed(ActiveExpansions
) {
48 Result
.Tokens
.push_back(std::make_unique
<LineNode
>());
49 ActiveReconstructedLines
.push_back(&Result
);
52 void MacroCallReconstructor::addLine(const UnwrappedLine
&Line
) {
53 assert(State
!= Finalized
);
54 LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55 forEachToken(Line
, [&](FormatToken
*Token
, FormatToken
*Parent
, bool First
) {
56 add(Token
, Parent
, First
);
58 assert(InProgress
|| finished());
61 UnwrappedLine
MacroCallReconstructor::takeResult() && {
63 assert(Result
.Tokens
.size() == 1 &&
64 Result
.Tokens
.front()->Children
.size() == 1);
66 createUnwrappedLine(*Result
.Tokens
.front()->Children
.front(), Level
);
67 assert(!Final
.Tokens
.empty());
71 // Reconstruct the position of the next \p Token, given its parent \p
72 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73 // is the first token in a given unwrapped line.
74 void MacroCallReconstructor::add(FormatToken
*Token
,
75 FormatToken
*ExpandedParent
, bool First
) {
77 llvm::dbgs() << "MCR: Token: " << Token
->TokenText
<< ", Parent: "
78 << (ExpandedParent
? ExpandedParent
->TokenText
: "<null>")
79 << ", First: " << First
<< "\n");
80 // In order to be able to find the correct parent in the reconstructed token
81 // stream, we need to continue the last open reconstruction until we find the
82 // given token if it is part of the reconstructed token stream.
84 // Note that hidden tokens can be part of the reconstructed stream in nested
87 // #define C(x, y) x y
91 // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92 // found in the reconstructed token stream of that expansion level.
93 // In the expanded token stream
95 // 'b' is a child of '{'. We need to continue the open expansion of the ','
96 // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97 // so we later set the spelled token 'b' as a child of the ','.
98 if (!ActiveExpansions
.empty() && Token
->MacroCtx
&&
99 (Token
->MacroCtx
->Role
!= MR_Hidden
||
100 ActiveExpansions
.size() != Token
->MacroCtx
->ExpandedFrom
.size())) {
101 if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token
))
105 prepareParent(ExpandedParent
, First
);
107 if (Token
->MacroCtx
) {
108 // If this token was generated by a macro call, add the reconstructed
109 // equivalent of the token.
112 // Otherwise, we add it to the current line.
117 // Adjusts the stack of active reconstructed lines so we're ready to push
118 // tokens. The tokens to be pushed are children of ExpandedParent in the
122 // - creating a new line, if the parent is on the active line
123 // - popping active lines, if the parent is further up the stack
126 // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127 // reconstructed replacement token as a parent (when possible) - that is, the
128 // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129 // is the parent of ActiveReconstructedLines.back() in the reconstructed
131 void MacroCallReconstructor::prepareParent(FormatToken
*ExpandedParent
,
134 llvm::dbgs() << "ParentMap:\n";
137 // We want to find the parent in the new unwrapped line, where the expanded
138 // parent might have been replaced during reconstruction.
139 FormatToken
*Parent
= getParentInResult(ExpandedParent
);
140 LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141 << (Parent
? Parent
->TokenText
: "<null>") << "\n");
143 FormatToken
*OpenMacroParent
= nullptr;
144 if (!MacroCallStructure
.empty()) {
145 // Inside a macro expansion, it is possible to lose track of the correct
146 // parent - either because it is already popped, for example because it was
147 // in a different macro argument (e.g. M({, })), or when we work on invalid
149 // Thus, we use the innermost macro call's parent as the parent at which
150 // we stop; this allows us to stay within the macro expansion and keeps
151 // any problems confined to the extent of the macro call.
153 getParentInResult(MacroCallStructure
.back().MacroCallLParen
);
154 LLVM_DEBUG(llvm::dbgs()
155 << "MacroCallLParen: "
156 << MacroCallStructure
.back().MacroCallLParen
->TokenText
157 << ", OpenMacroParent: "
158 << (OpenMacroParent
? OpenMacroParent
->TokenText
: "<null>")
162 (!ActiveReconstructedLines
.back()->Tokens
.empty() &&
163 Parent
== ActiveReconstructedLines
.back()->Tokens
.back()->Tok
)) {
164 // If we are at the first token in a new line, we want to also
165 // create a new line in the resulting reconstructed unwrapped line.
166 while (ActiveReconstructedLines
.back()->Tokens
.empty() ||
167 (Parent
!= ActiveReconstructedLines
.back()->Tokens
.back()->Tok
&&
168 ActiveReconstructedLines
.back()->Tokens
.back()->Tok
!=
170 ActiveReconstructedLines
.pop_back();
171 assert(!ActiveReconstructedLines
.empty());
173 assert(!ActiveReconstructedLines
.empty());
174 ActiveReconstructedLines
.back()->Tokens
.back()->Children
.push_back(
175 std::make_unique
<ReconstructedLine
>());
176 ActiveReconstructedLines
.push_back(
177 &*ActiveReconstructedLines
.back()->Tokens
.back()->Children
.back());
178 } else if (parentLine().Tokens
.back()->Tok
!= Parent
) {
179 // If we're not the first token in a new line, pop lines until we find
180 // the child of \c Parent in the stack.
181 while (Parent
!= parentLine().Tokens
.back()->Tok
&&
182 parentLine().Tokens
.back()->Tok
&&
183 parentLine().Tokens
.back()->Tok
!= OpenMacroParent
) {
184 ActiveReconstructedLines
.pop_back();
185 assert(!ActiveReconstructedLines
.empty());
188 assert(!ActiveReconstructedLines
.empty());
191 // For a given \p Parent in the incoming expanded token stream, find the
192 // corresponding parent in the output.
193 FormatToken
*MacroCallReconstructor::getParentInResult(FormatToken
*Parent
) {
194 FormatToken
*Mapped
= SpelledParentToReconstructedParent
.lookup(Parent
);
197 for (; Mapped
; Mapped
= SpelledParentToReconstructedParent
.lookup(Parent
))
199 // If we use a different token than the parent in the expanded token stream
200 // as parent, mark it as a special parent, so the formatting code knows it
201 // needs to have its children formatted.
202 Parent
->MacroParent
= true;
206 // Reconstruct a \p Token that was expanded from a macro call.
207 void MacroCallReconstructor::reconstruct(FormatToken
*Token
) {
208 assert(Token
->MacroCtx
);
209 // A single token can be the only result of a macro call:
210 // Given: #define ID(x, y) ;
211 // And the call: ID(<some>, <tokens>)
212 // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213 if (Token
->MacroCtx
->StartOfExpansion
) {
214 startReconstruction(Token
);
215 // If the order of tokens in the expanded token stream is not the
216 // same as the order of tokens in the reconstructed stream, we need
217 // to reconstruct tokens that arrive later in the stream.
218 if (Token
->MacroCtx
->Role
!= MR_Hidden
)
219 reconstructActiveCallUntil(Token
);
221 assert(!ActiveExpansions
.empty());
222 if (ActiveExpansions
.back().SpelledI
!= ActiveExpansions
.back().SpelledE
) {
223 assert(ActiveExpansions
.size() == Token
->MacroCtx
->ExpandedFrom
.size());
224 if (Token
->MacroCtx
->Role
!= MR_Hidden
) {
225 // The current token in the reconstructed token stream must be the token
226 // we're looking for - we either arrive here after startReconstruction,
227 // which initiates the stream to the first token, or after
228 // continueReconstructionUntil skipped until the expected token in the
229 // reconstructed stream at the start of add(...).
230 assert(ActiveExpansions
.back().SpelledI
->Tok
== Token
);
231 processNextReconstructed();
232 } else if (!currentLine()->Tokens
.empty()) {
233 // Map all hidden tokens to the last visible token in the output.
234 // If the hidden token is a parent, we'll use the last visible
235 // token as the parent of the hidden token's children.
236 SpelledParentToReconstructedParent
[Token
] =
237 currentLine()->Tokens
.back()->Tok
;
239 for (auto I
= ActiveReconstructedLines
.rbegin(),
240 E
= ActiveReconstructedLines
.rend();
242 if (!(*I
)->Tokens
.empty()) {
243 SpelledParentToReconstructedParent
[Token
] = (*I
)->Tokens
.back()->Tok
;
249 if (Token
->MacroCtx
->EndOfExpansion
)
250 endReconstruction(Token
);
253 // Given a \p Token that starts an expansion, reconstruct the beginning of the
255 // For example, given: #define ID(x) x
256 // And the call: ID(int a)
258 void MacroCallReconstructor::startReconstruction(FormatToken
*Token
) {
259 assert(Token
->MacroCtx
);
260 assert(!Token
->MacroCtx
->ExpandedFrom
.empty());
261 assert(ActiveExpansions
.size() <= Token
->MacroCtx
->ExpandedFrom
.size());
263 // Check that the token's reconstruction stack matches our current
264 // reconstruction stack.
265 for (size_t I
= 0; I
< ActiveExpansions
.size(); ++I
) {
266 assert(ActiveExpansions
[I
].ID
==
268 ->ExpandedFrom
[Token
->MacroCtx
->ExpandedFrom
.size() - 1 - I
]);
271 // Start reconstruction for all calls for which this token is the first token
272 // generated by the call.
273 // Note that the token's expanded from stack is inside-to-outside, and the
274 // expansions for which this token is not the first are the outermost ones.
275 ArrayRef
<FormatToken
*> StartedMacros
=
276 ArrayRef(Token
->MacroCtx
->ExpandedFrom
)
277 .drop_back(ActiveExpansions
.size());
278 assert(StartedMacros
.size() == Token
->MacroCtx
->StartOfExpansion
);
279 // We reconstruct macro calls outside-to-inside.
280 for (FormatToken
*ID
: llvm::reverse(StartedMacros
)) {
281 // We found a macro call to be reconstructed; the next time our
282 // reconstruction stack is empty we know we finished an reconstruction.
286 // Put the reconstructed macro call's token into our reconstruction stack.
287 auto IU
= IdToReconstructed
.find(ID
);
288 assert(IU
!= IdToReconstructed
.end());
289 ActiveExpansions
.push_back(
290 {ID
, IU
->second
->Tokens
.begin(), IU
->second
->Tokens
.end()});
291 // Process the macro call's identifier.
292 processNextReconstructed();
293 if (ActiveExpansions
.back().SpelledI
== ActiveExpansions
.back().SpelledE
)
295 if (ActiveExpansions
.back().SpelledI
->Tok
->is(tok::l_paren
)) {
296 // Process the optional opening parenthesis.
297 processNextReconstructed();
302 // Add all tokens in the reconstruction stream to the output until we find the
304 bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken
*Token
) {
305 assert(!ActiveExpansions
.empty());
306 bool PassedMacroComma
= false;
307 // FIXME: If Token was already expanded earlier, due to
308 // a change in order, we will not find it, but need to
310 while (ActiveExpansions
.back().SpelledI
!= ActiveExpansions
.back().SpelledE
&&
311 ActiveExpansions
.back().SpelledI
->Tok
!= Token
) {
312 PassedMacroComma
= processNextReconstructed() || PassedMacroComma
;
314 return PassedMacroComma
;
317 // End all reconstructions for which \p Token is the final token.
318 void MacroCallReconstructor::endReconstruction(FormatToken
*Token
) {
319 assert(Token
->MacroCtx
&&
320 (ActiveExpansions
.size() >= Token
->MacroCtx
->EndOfExpansion
));
321 for (size_t I
= 0; I
< Token
->MacroCtx
->EndOfExpansion
; ++I
) {
323 // Check all remaining tokens but the final closing parenthesis and
324 // optional trailing comment were already reconstructed at an inner
326 for (auto T
= ActiveExpansions
.back().SpelledI
;
327 T
!= ActiveExpansions
.back().SpelledE
; ++T
) {
328 FormatToken
*Token
= T
->Tok
;
329 bool ClosingParen
= (std::next(T
) == ActiveExpansions
.back().SpelledE
||
330 std::next(T
)->Tok
->isTrailingComment()) &&
331 !Token
->MacroCtx
&& Token
->is(tok::r_paren
);
332 bool TrailingComment
= Token
->isTrailingComment();
335 (ActiveExpansions
.size() < Token
->MacroCtx
->ExpandedFrom
.size());
336 if (!ClosingParen
&& !TrailingComment
&& !PreviousLevel
)
337 llvm::dbgs() << "At token: " << Token
->TokenText
<< "\n";
338 // In addition to the following cases, we can also run into this
339 // when a macro call had more arguments than expected; in that case,
340 // the comma and the remaining tokens in the macro call will
341 // potentially end up in the line when we finish the expansion.
342 // FIXME: Add the information which arguments are unused, and assert
343 // one of the cases below plus reconstructed macro argument tokens.
344 // assert(ClosingParen || TrailingComment || PreviousLevel);
347 // Handle the remaining open tokens:
348 // - expand the closing parenthesis, if it exists, including an optional
350 // - handle tokens that were already reconstructed at an inner expansion
352 // - handle tokens when a macro call had more than the expected number of
353 // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354 // up with the sequence ", b, c)" being open at the end of the
355 // reconstruction; we want to gracefully handle that case
357 // FIXME: See the above debug-check for what we will need to do to be
358 // able to assert this.
359 for (auto T
= ActiveExpansions
.back().SpelledI
;
360 T
!= ActiveExpansions
.back().SpelledE
; ++T
) {
361 processNextReconstructed();
363 ActiveExpansions
.pop_back();
367 void MacroCallReconstructor::debugParentMap() const {
368 llvm::DenseSet
<FormatToken
*> Values
;
369 for (const auto &P
: SpelledParentToReconstructedParent
)
370 Values
.insert(P
.second
);
372 for (const auto &P
: SpelledParentToReconstructedParent
) {
373 if (Values
.contains(P
.first
))
375 llvm::dbgs() << (P
.first
? P
.first
->TokenText
: "<null>");
376 for (auto I
= SpelledParentToReconstructedParent
.find(P
.first
),
377 E
= SpelledParentToReconstructedParent
.end();
378 I
!= E
; I
= SpelledParentToReconstructedParent
.find(I
->second
)) {
379 llvm::dbgs() << " -> " << (I
->second
? I
->second
->TokenText
: "<null>");
381 llvm::dbgs() << "\n";
385 // If visible, add the next token of the reconstructed token sequence to the
386 // output. Returns whether reconstruction passed a comma that is part of a
388 bool MacroCallReconstructor::processNextReconstructed() {
389 FormatToken
*Token
= ActiveExpansions
.back().SpelledI
->Tok
;
390 ++ActiveExpansions
.back().SpelledI
;
391 if (Token
->MacroCtx
) {
392 // Skip tokens that are not part of the macro call.
393 if (Token
->MacroCtx
->Role
== MR_Hidden
)
395 // Skip tokens we already expanded during an inner reconstruction.
396 // For example, given: #define ID(x) {x}
397 // And the call: ID(ID(f))
398 // We get two reconstructions:
401 // We reconstruct f during the first reconstruction, and skip it during the
402 // second reconstruction.
403 if (ActiveExpansions
.size() < Token
->MacroCtx
->ExpandedFrom
.size())
406 // Tokens that do not have a macro context are tokens in that are part of the
407 // macro call that have not taken part in expansion.
408 if (!Token
->MacroCtx
) {
409 // Put the parentheses and commas of a macro call into the same line;
410 // if the arguments produce new unwrapped lines, they will become children
411 // of the corresponding opening parenthesis or comma tokens in the
412 // reconstructed call.
413 if (Token
->is(tok::l_paren
)) {
414 MacroCallStructure
.push_back(MacroCallState(
415 currentLine(), parentLine().Tokens
.back()->Tok
, Token
));
416 // All tokens that are children of the previous line's last token in the
417 // reconstructed token stream will now be children of the l_paren token.
418 // For example, for the line containing the macro calls:
419 // auto x = ID({ID(2)});
420 // We will build up a map <null> -> ( -> ( with the first and second
421 // l_paren of the macro call respectively. New lines that come in with a
422 // <null> parent will then become children of the l_paren token of the
423 // currently innermost macro call.
424 SpelledParentToReconstructedParent
[MacroCallStructure
.back()
425 .ParentLastToken
] = Token
;
427 prepareParent(Token
, /*NewLine=*/true);
428 Token
->MacroParent
= true;
431 if (!MacroCallStructure
.empty()) {
432 if (Token
->is(tok::comma
)) {
433 // Make new lines inside the next argument children of the comma token.
434 SpelledParentToReconstructedParent
435 [MacroCallStructure
.back().Line
->Tokens
.back()->Tok
] = Token
;
436 Token
->MacroParent
= true;
437 appendToken(Token
, MacroCallStructure
.back().Line
);
438 prepareParent(Token
, /*NewLine=*/true);
441 if (Token
->is(tok::r_paren
)) {
442 appendToken(Token
, MacroCallStructure
.back().Line
);
443 SpelledParentToReconstructedParent
.erase(
444 MacroCallStructure
.back().ParentLastToken
);
445 MacroCallStructure
.pop_back();
450 // Note that any tokens that are tagged with MR_None have been passed as
451 // arguments to the macro that have not been expanded, for example:
452 // Given: #define ID(X) x
453 // When calling: ID(a, b)
454 // 'b' will be part of the reconstructed token stream, but tagged MR_None.
455 // Given that erroring out in this case would be disruptive, we continue
456 // pushing the (unformatted) token.
457 // FIXME: This can lead to unfortunate formatting decisions - give the user
458 // a hint that their macro definition is broken.
463 void MacroCallReconstructor::finalize() {
465 assert(State
!= Finalized
&& finished());
469 // We created corresponding unwrapped lines for each incoming line as children
470 // the the toplevel null token.
471 assert(Result
.Tokens
.size() == 1 && !Result
.Tokens
.front()->Children
.empty());
473 llvm::dbgs() << "Finalizing reconstructed lines:\n";
477 // The first line becomes the top level line in the resulting unwrapped line.
478 LineNode
&Top
= *Result
.Tokens
.front();
479 auto *I
= Top
.Children
.begin();
480 // Every subsequent line will become a child of the last token in the previous
481 // line, which is the token prior to the first token in the line.
482 LineNode
*Last
= (*I
)->Tokens
.back().get();
484 for (auto *E
= Top
.Children
.end(); I
!= E
; ++I
) {
485 assert(Last
->Children
.empty());
486 Last
->Children
.push_back(std::move(*I
));
488 // Mark the previous line's last token as generated by a macro expansion
489 // so the formatting algorithm can take that into account.
490 Last
->Tok
->MacroParent
= true;
492 Last
= Last
->Children
.back()->Tokens
.back().get();
494 Top
.Children
.resize(1);
497 void MacroCallReconstructor::appendToken(FormatToken
*Token
,
498 ReconstructedLine
*L
) {
499 L
= L
? L
: currentLine();
500 LLVM_DEBUG(llvm::dbgs() << "-> " << Token
->TokenText
<< "\n");
501 L
->Tokens
.push_back(std::make_unique
<LineNode
>(Token
));
505 MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine
&Line
,
507 UnwrappedLine Result
;
508 Result
.Level
= Level
;
509 for (const auto &N
: Line
.Tokens
) {
510 Result
.Tokens
.push_back(N
->Tok
);
511 UnwrappedLineNode
&Current
= Result
.Tokens
.back();
512 for (const auto &Child
: N
->Children
) {
513 if (Child
->Tokens
.empty())
515 Current
.Children
.push_back(createUnwrappedLine(*Child
, Level
+ 1));
517 if (Current
.Children
.size() == 1 &&
518 Current
.Tok
->isOneOf(tok::l_paren
, tok::comma
)) {
519 Result
.Tokens
.splice(Result
.Tokens
.end(),
520 Current
.Children
.front().Tokens
);
521 Current
.Children
.clear();
527 void MacroCallReconstructor::debug(const ReconstructedLine
&Line
, int Level
) {
528 for (int i
= 0; i
< Level
; ++i
)
530 for (const auto &N
: Line
.Tokens
) {
534 llvm::dbgs() << N
->Tok
->TokenText
<< " ";
535 for (const auto &Child
: N
->Children
) {
536 llvm::dbgs() << "\n";
537 debug(*Child
, Level
+ 1);
538 for (int i
= 0; i
< Level
; ++i
)
542 llvm::dbgs() << "\n";
545 MacroCallReconstructor::ReconstructedLine
&
546 MacroCallReconstructor::parentLine() {
547 return **std::prev(std::prev(ActiveReconstructedLines
.end()));
550 MacroCallReconstructor::ReconstructedLine
*
551 MacroCallReconstructor::currentLine() {
552 return ActiveReconstructedLines
.back();
555 MacroCallReconstructor::MacroCallState::MacroCallState(
556 MacroCallReconstructor::ReconstructedLine
*Line
,
557 FormatToken
*ParentLastToken
, FormatToken
*MacroCallLParen
)
558 : Line(Line
), ParentLastToken(ParentLastToken
),
559 MacroCallLParen(MacroCallLParen
) {
561 llvm::dbgs() << "ParentLastToken: "
562 << (ParentLastToken
? ParentLastToken
->TokenText
: "<null>")
565 assert(MacroCallLParen
->is(tok::l_paren
));
568 } // namespace format