1 //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file contains the implementation of MacroCallReconstructor, which fits
12 /// an reconstructed macro call to a parsed set of UnwrappedLines.
14 //===----------------------------------------------------------------------===//
18 #include "UnwrappedLineParser.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/Support/Debug.h"
24 #define DEBUG_TYPE "format-reconstruct"
29 // Call \p Call for each token in the unwrapped line given, passing
30 // the token, its parent and whether it is the first token in the line.
32 void forEachToken(const UnwrappedLine
&Line
, const T
&Call
,
33 FormatToken
*Parent
= nullptr) {
35 for (const auto &N
: Line
.Tokens
) {
36 Call(N
.Tok
, Parent
, First
);
38 for (const auto &Child
: N
.Children
)
39 forEachToken(Child
, Call
, N
.Tok
);
43 MacroCallReconstructor::MacroCallReconstructor(
45 const llvm::DenseMap
<FormatToken
*, std::unique_ptr
<UnwrappedLine
>>
47 : Level(Level
), IdToReconstructed(ActiveExpansions
) {
48 Result
.Tokens
.push_back(std::make_unique
<LineNode
>());
49 ActiveReconstructedLines
.push_back(&Result
);
52 void MacroCallReconstructor::addLine(const UnwrappedLine
&Line
) {
53 assert(State
!= Finalized
);
54 LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55 forEachToken(Line
, [&](FormatToken
*Token
, FormatToken
*Parent
, bool First
) {
56 add(Token
, Parent
, First
);
58 assert(InProgress
|| finished());
61 UnwrappedLine
MacroCallReconstructor::takeResult() && {
63 assert(Result
.Tokens
.size() == 1 &&
64 Result
.Tokens
.front()->Children
.size() == 1);
66 createUnwrappedLine(*Result
.Tokens
.front()->Children
.front(), Level
);
67 assert(!Final
.Tokens
.empty());
71 // Reconstruct the position of the next \p Token, given its parent \p
72 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73 // is the first token in a given unwrapped line.
74 void MacroCallReconstructor::add(FormatToken
*Token
,
75 FormatToken
*ExpandedParent
, bool First
) {
77 llvm::dbgs() << "MCR: Token: " << Token
->TokenText
<< ", Parent: "
78 << (ExpandedParent
? ExpandedParent
->TokenText
: "<null>")
79 << ", First: " << First
<< "\n");
80 // In order to be able to find the correct parent in the reconstructed token
81 // stream, we need to continue the last open reconstruction until we find the
82 // given token if it is part of the reconstructed token stream.
84 // Note that hidden tokens can be part of the reconstructed stream in nested
87 // #define C(x, y) x y
91 // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92 // found in the reconstructed token stream of that expansion level.
93 // In the expanded token stream
95 // 'b' is a child of '{'. We need to continue the open expansion of the ','
96 // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97 // so we later set the spelled token 'b' as a child of the ','.
98 if (!ActiveExpansions
.empty() && Token
->MacroCtx
&&
99 (Token
->MacroCtx
->Role
!= MR_Hidden
||
100 ActiveExpansions
.size() != Token
->MacroCtx
->ExpandedFrom
.size())) {
101 if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token
))
105 prepareParent(ExpandedParent
, First
);
107 if (Token
->MacroCtx
) {
108 // If this token was generated by a macro call, add the reconstructed
109 // equivalent of the token.
112 // Otherwise, we add it to the current line.
117 // Adjusts the stack of active reconstructed lines so we're ready to push
118 // tokens. The tokens to be pushed are children of ExpandedParent in the
122 // - creating a new line, if the parent is on the active line
123 // - popping active lines, if the parent is further up the stack
126 // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127 // reconstructed replacement token as a parent (when possible) - that is, the
128 // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129 // is the parent of ActiveReconstructedLines.back() in the reconstructed
131 void MacroCallReconstructor::prepareParent(FormatToken
*ExpandedParent
,
134 llvm::dbgs() << "ParentMap:\n";
137 // We want to find the parent in the new unwrapped line, where the expanded
138 // parent might have been replaced during reconstruction.
139 FormatToken
*Parent
= getParentInResult(ExpandedParent
);
140 LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141 << (Parent
? Parent
->TokenText
: "<null>") << "\n");
143 FormatToken
*OpenMacroParent
= nullptr;
144 if (!MacroCallStructure
.empty()) {
145 // Inside a macro expansion, it is possible to lose track of the correct
146 // parent - either because it is already popped, for example because it was
147 // in a different macro argument (e.g. M({, })), or when we work on invalid
149 // Thus, we use the innermost macro call's parent as the parent at which
150 // we stop; this allows us to stay within the macro expansion and keeps
151 // any problems confined to the extent of the macro call.
153 getParentInResult(MacroCallStructure
.back().MacroCallLParen
);
154 LLVM_DEBUG(llvm::dbgs()
155 << "MacroCallLParen: "
156 << MacroCallStructure
.back().MacroCallLParen
->TokenText
157 << ", OpenMacroParent: "
158 << (OpenMacroParent
? OpenMacroParent
->TokenText
: "<null>")
162 (!ActiveReconstructedLines
.back()->Tokens
.empty() &&
163 Parent
== ActiveReconstructedLines
.back()->Tokens
.back()->Tok
)) {
164 // If we are at the first token in a new line, we want to also
165 // create a new line in the resulting reconstructed unwrapped line.
166 while (ActiveReconstructedLines
.back()->Tokens
.empty() ||
167 (Parent
!= ActiveReconstructedLines
.back()->Tokens
.back()->Tok
&&
168 ActiveReconstructedLines
.back()->Tokens
.back()->Tok
!=
170 ActiveReconstructedLines
.pop_back();
171 assert(!ActiveReconstructedLines
.empty());
173 assert(!ActiveReconstructedLines
.empty());
174 ActiveReconstructedLines
.back()->Tokens
.back()->Children
.push_back(
175 std::make_unique
<ReconstructedLine
>());
176 ActiveReconstructedLines
.push_back(
177 &*ActiveReconstructedLines
.back()->Tokens
.back()->Children
.back());
178 } else if (parentLine().Tokens
.back()->Tok
!= Parent
) {
179 // If we're not the first token in a new line, pop lines until we find
180 // the child of \c Parent in the stack.
181 while (Parent
!= parentLine().Tokens
.back()->Tok
&&
182 parentLine().Tokens
.back()->Tok
&&
183 parentLine().Tokens
.back()->Tok
!= OpenMacroParent
) {
184 ActiveReconstructedLines
.pop_back();
185 assert(!ActiveReconstructedLines
.empty());
188 assert(!ActiveReconstructedLines
.empty());
191 // For a given \p Parent in the incoming expanded token stream, find the
192 // corresponding parent in the output.
193 FormatToken
*MacroCallReconstructor::getParentInResult(FormatToken
*Parent
) {
194 FormatToken
*Mapped
= SpelledParentToReconstructedParent
.lookup(Parent
);
197 for (; Mapped
; Mapped
= SpelledParentToReconstructedParent
.lookup(Parent
))
199 // If we use a different token than the parent in the expanded token stream
200 // as parent, mark it as a special parent, so the formatting code knows it
201 // needs to have its children formatted.
202 Parent
->MacroParent
= true;
206 // Reconstruct a \p Token that was expanded from a macro call.
207 void MacroCallReconstructor::reconstruct(FormatToken
*Token
) {
208 assert(Token
->MacroCtx
);
209 // A single token can be the only result of a macro call:
210 // Given: #define ID(x, y) ;
211 // And the call: ID(<some>, <tokens>)
212 // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213 if (Token
->MacroCtx
->StartOfExpansion
) {
214 startReconstruction(Token
);
215 // If the order of tokens in the expanded token stream is not the
216 // same as the order of tokens in the reconstructed stream, we need
217 // to reconstruct tokens that arrive later in the stream.
218 if (Token
->MacroCtx
->Role
!= MR_Hidden
)
219 reconstructActiveCallUntil(Token
);
221 assert(!ActiveExpansions
.empty());
222 if (ActiveExpansions
.back().SpelledI
!= ActiveExpansions
.back().SpelledE
) {
223 assert(ActiveExpansions
.size() == Token
->MacroCtx
->ExpandedFrom
.size());
224 if (Token
->MacroCtx
->Role
!= MR_Hidden
) {
225 // The current token in the reconstructed token stream must be the token
226 // we're looking for - we either arrive here after startReconstruction,
227 // which initiates the stream to the first token, or after
228 // continueReconstructionUntil skipped until the expected token in the
229 // reconstructed stream at the start of add(...).
230 assert(ActiveExpansions
.back().SpelledI
->Tok
== Token
);
231 processNextReconstructed();
232 } else if (!currentLine()->Tokens
.empty()) {
233 // Map all hidden tokens to the last visible token in the output.
234 // If the hidden token is a parent, we'll use the last visible
235 // token as the parent of the hidden token's children.
236 SpelledParentToReconstructedParent
[Token
] =
237 currentLine()->Tokens
.back()->Tok
;
239 for (auto I
= ActiveReconstructedLines
.rbegin(),
240 E
= ActiveReconstructedLines
.rend();
242 if (!(*I
)->Tokens
.empty()) {
243 SpelledParentToReconstructedParent
[Token
] = (*I
)->Tokens
.back()->Tok
;
249 if (Token
->MacroCtx
->EndOfExpansion
)
250 endReconstruction(Token
);
253 // Given a \p Token that starts an expansion, reconstruct the beginning of the
255 // For example, given: #define ID(x) x
256 // And the call: ID(int a)
258 void MacroCallReconstructor::startReconstruction(FormatToken
*Token
) {
259 assert(Token
->MacroCtx
);
260 assert(!Token
->MacroCtx
->ExpandedFrom
.empty());
261 assert(ActiveExpansions
.size() <= Token
->MacroCtx
->ExpandedFrom
.size());
263 // Check that the token's reconstruction stack matches our current
264 // reconstruction stack.
265 for (size_t I
= 0; I
< ActiveExpansions
.size(); ++I
) {
266 assert(ActiveExpansions
[I
].ID
==
268 ->ExpandedFrom
[Token
->MacroCtx
->ExpandedFrom
.size() - 1 - I
]);
271 // Start reconstruction for all calls for which this token is the first token
272 // generated by the call.
273 // Note that the token's expanded from stack is inside-to-outside, and the
274 // expansions for which this token is not the first are the outermost ones.
275 ArrayRef
<FormatToken
*> StartedMacros
=
276 ArrayRef(Token
->MacroCtx
->ExpandedFrom
)
277 .drop_back(ActiveExpansions
.size());
278 assert(StartedMacros
.size() == Token
->MacroCtx
->StartOfExpansion
);
279 // We reconstruct macro calls outside-to-inside.
280 for (FormatToken
*ID
: llvm::reverse(StartedMacros
)) {
281 // We found a macro call to be reconstructed; the next time our
282 // reconstruction stack is empty we know we finished an reconstruction.
286 // Put the reconstructed macro call's token into our reconstruction stack.
287 auto IU
= IdToReconstructed
.find(ID
);
288 assert(IU
!= IdToReconstructed
.end());
289 ActiveExpansions
.push_back(
290 {ID
, IU
->second
->Tokens
.begin(), IU
->second
->Tokens
.end()});
291 // Process the macro call's identifier.
292 processNextReconstructed();
293 if (ActiveExpansions
.back().SpelledI
== ActiveExpansions
.back().SpelledE
)
295 if (ActiveExpansions
.back().SpelledI
->Tok
->is(tok::l_paren
)) {
296 // Process the optional opening parenthesis.
297 processNextReconstructed();
302 // Add all tokens in the reconstruction stream to the output until we find the
304 bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken
*Token
) {
305 assert(!ActiveExpansions
.empty());
306 bool PassedMacroComma
= false;
307 // FIXME: If Token was already expanded earlier, due to
308 // a change in order, we will not find it, but need to
310 while (ActiveExpansions
.back().SpelledI
!= ActiveExpansions
.back().SpelledE
&&
311 ActiveExpansions
.back().SpelledI
->Tok
!= Token
) {
312 PassedMacroComma
= processNextReconstructed() || PassedMacroComma
;
314 return PassedMacroComma
;
317 // End all reconstructions for which \p Token is the final token.
318 void MacroCallReconstructor::endReconstruction(FormatToken
*Token
) {
319 assert(Token
->MacroCtx
&&
320 (ActiveExpansions
.size() >= Token
->MacroCtx
->EndOfExpansion
));
321 for (size_t I
= 0; I
< Token
->MacroCtx
->EndOfExpansion
; ++I
) {
323 // Check all remaining tokens but the final closing parenthesis and optional
324 // trailing comment were already reconstructed at an inner expansion level.
325 for (auto T
= ActiveExpansions
.back().SpelledI
;
326 T
!= ActiveExpansions
.back().SpelledE
; ++T
) {
327 FormatToken
*Token
= T
->Tok
;
328 bool ClosingParen
= (std::next(T
) == ActiveExpansions
.back().SpelledE
||
329 std::next(T
)->Tok
->isTrailingComment()) &&
330 !Token
->MacroCtx
&& Token
->is(tok::r_paren
);
331 bool TrailingComment
= Token
->isTrailingComment();
334 (ActiveExpansions
.size() < Token
->MacroCtx
->ExpandedFrom
.size());
335 if (!ClosingParen
&& !TrailingComment
&& !PreviousLevel
)
336 llvm::dbgs() << "At token: " << Token
->TokenText
<< "\n";
337 // In addition to the following cases, we can also run into this
338 // when a macro call had more arguments than expected; in that case,
339 // the comma and the remaining tokens in the macro call will potentially
340 // end up in the line when we finish the expansion.
341 // FIXME: Add the information which arguments are unused, and assert
342 // one of the cases below plus reconstructed macro argument tokens.
343 // assert(ClosingParen || TrailingComment || PreviousLevel);
346 // Handle the remaining open tokens:
347 // - expand the closing parenthesis, if it exists, including an optional
349 // - handle tokens that were already reconstructed at an inner expansion
351 // - handle tokens when a macro call had more than the expected number of
352 // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
353 // up with the sequence ", b, c)" being open at the end of the
354 // reconstruction; we want to gracefully handle that case
356 // FIXME: See the above debug-check for what we will need to do to be
357 // able to assert this.
358 for (auto T
= ActiveExpansions
.back().SpelledI
;
359 T
!= ActiveExpansions
.back().SpelledE
; ++T
) {
360 processNextReconstructed();
362 ActiveExpansions
.pop_back();
366 void MacroCallReconstructor::debugParentMap() const {
367 llvm::DenseSet
<FormatToken
*> Values
;
368 for (const auto &P
: SpelledParentToReconstructedParent
)
369 Values
.insert(P
.second
);
371 for (const auto &P
: SpelledParentToReconstructedParent
) {
372 if (Values
.contains(P
.first
))
374 llvm::dbgs() << (P
.first
? P
.first
->TokenText
: "<null>");
375 for (auto I
= SpelledParentToReconstructedParent
.find(P
.first
),
376 E
= SpelledParentToReconstructedParent
.end();
377 I
!= E
; I
= SpelledParentToReconstructedParent
.find(I
->second
)) {
378 llvm::dbgs() << " -> " << (I
->second
? I
->second
->TokenText
: "<null>");
380 llvm::dbgs() << "\n";
384 // If visible, add the next token of the reconstructed token sequence to the
385 // output. Returns whether reconstruction passed a comma that is part of a
387 bool MacroCallReconstructor::processNextReconstructed() {
388 FormatToken
*Token
= ActiveExpansions
.back().SpelledI
->Tok
;
389 ++ActiveExpansions
.back().SpelledI
;
390 if (Token
->MacroCtx
) {
391 // Skip tokens that are not part of the macro call.
392 if (Token
->MacroCtx
->Role
== MR_Hidden
)
394 // Skip tokens we already expanded during an inner reconstruction.
395 // For example, given: #define ID(x) {x}
396 // And the call: ID(ID(f))
397 // We get two reconstructions:
400 // We reconstruct f during the first reconstruction, and skip it during the
401 // second reconstruction.
402 if (ActiveExpansions
.size() < Token
->MacroCtx
->ExpandedFrom
.size())
405 // Tokens that do not have a macro context are tokens in that are part of the
406 // macro call that have not taken part in expansion.
407 if (!Token
->MacroCtx
) {
408 // Put the parentheses and commas of a macro call into the same line;
409 // if the arguments produce new unwrapped lines, they will become children
410 // of the corresponding opening parenthesis or comma tokens in the
411 // reconstructed call.
412 if (Token
->is(tok::l_paren
)) {
413 MacroCallStructure
.push_back(MacroCallState(
414 currentLine(), parentLine().Tokens
.back()->Tok
, Token
));
415 // All tokens that are children of the previous line's last token in the
416 // reconstructed token stream will now be children of the l_paren token.
417 // For example, for the line containing the macro calls:
418 // auto x = ID({ID(2)});
419 // We will build up a map <null> -> ( -> ( with the first and second
420 // l_paren of the macro call respectively. New lines that come in with a
421 // <null> parent will then become children of the l_paren token of the
422 // currently innermost macro call.
423 SpelledParentToReconstructedParent
[MacroCallStructure
.back()
424 .ParentLastToken
] = Token
;
426 prepareParent(Token
, /*NewLine=*/true);
427 Token
->MacroParent
= true;
430 if (!MacroCallStructure
.empty()) {
431 if (Token
->is(tok::comma
)) {
432 // Make new lines inside the next argument children of the comma token.
433 SpelledParentToReconstructedParent
434 [MacroCallStructure
.back().Line
->Tokens
.back()->Tok
] = Token
;
435 Token
->MacroParent
= true;
436 appendToken(Token
, MacroCallStructure
.back().Line
);
437 prepareParent(Token
, /*NewLine=*/true);
440 if (Token
->is(tok::r_paren
)) {
441 appendToken(Token
, MacroCallStructure
.back().Line
);
442 SpelledParentToReconstructedParent
.erase(
443 MacroCallStructure
.back().ParentLastToken
);
444 MacroCallStructure
.pop_back();
449 // Note that any tokens that are tagged with MR_None have been passed as
450 // arguments to the macro that have not been expanded, for example:
451 // Given: #define ID(X) x
452 // When calling: ID(a, b)
453 // 'b' will be part of the reconstructed token stream, but tagged MR_None.
454 // Given that erroring out in this case would be disruptive, we continue
455 // pushing the (unformatted) token.
456 // FIXME: This can lead to unfortunate formatting decisions - give the user
457 // a hint that their macro definition is broken.
462 void MacroCallReconstructor::finalize() {
464 assert(State
!= Finalized
&& finished());
468 // We created corresponding unwrapped lines for each incoming line as children
469 // the the toplevel null token.
470 assert(Result
.Tokens
.size() == 1 && !Result
.Tokens
.front()->Children
.empty());
472 llvm::dbgs() << "Finalizing reconstructed lines:\n";
476 // The first line becomes the top level line in the resulting unwrapped line.
477 LineNode
&Top
= *Result
.Tokens
.front();
478 auto *I
= Top
.Children
.begin();
479 // Every subsequent line will become a child of the last token in the previous
480 // line, which is the token prior to the first token in the line.
481 LineNode
*Last
= (*I
)->Tokens
.back().get();
483 for (auto *E
= Top
.Children
.end(); I
!= E
; ++I
) {
484 assert(Last
->Children
.empty());
485 Last
->Children
.push_back(std::move(*I
));
487 // Mark the previous line's last token as generated by a macro expansion
488 // so the formatting algorithm can take that into account.
489 Last
->Tok
->MacroParent
= true;
491 Last
= Last
->Children
.back()->Tokens
.back().get();
493 Top
.Children
.resize(1);
496 void MacroCallReconstructor::appendToken(FormatToken
*Token
,
497 ReconstructedLine
*L
) {
498 L
= L
? L
: currentLine();
499 LLVM_DEBUG(llvm::dbgs() << "-> " << Token
->TokenText
<< "\n");
500 L
->Tokens
.push_back(std::make_unique
<LineNode
>(Token
));
504 MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine
&Line
,
506 UnwrappedLine Result
;
507 Result
.Level
= Level
;
508 for (const auto &N
: Line
.Tokens
) {
509 Result
.Tokens
.push_back(N
->Tok
);
510 UnwrappedLineNode
&Current
= Result
.Tokens
.back();
511 for (const auto &Child
: N
->Children
) {
512 if (Child
->Tokens
.empty())
514 Current
.Children
.push_back(createUnwrappedLine(*Child
, Level
+ 1));
516 if (Current
.Children
.size() == 1 &&
517 Current
.Tok
->isOneOf(tok::l_paren
, tok::comma
)) {
518 Result
.Tokens
.splice(Result
.Tokens
.end(),
519 Current
.Children
.front().Tokens
);
520 Current
.Children
.clear();
526 void MacroCallReconstructor::debug(const ReconstructedLine
&Line
, int Level
) {
527 for (int i
= 0; i
< Level
; ++i
)
529 for (const auto &N
: Line
.Tokens
) {
533 llvm::dbgs() << N
->Tok
->TokenText
<< " ";
534 for (const auto &Child
: N
->Children
) {
535 llvm::dbgs() << "\n";
536 debug(*Child
, Level
+ 1);
537 for (int i
= 0; i
< Level
; ++i
)
541 llvm::dbgs() << "\n";
544 MacroCallReconstructor::ReconstructedLine
&
545 MacroCallReconstructor::parentLine() {
546 return **std::prev(std::prev(ActiveReconstructedLines
.end()));
549 MacroCallReconstructor::ReconstructedLine
*
550 MacroCallReconstructor::currentLine() {
551 return ActiveReconstructedLines
.back();
554 MacroCallReconstructor::MacroCallState::MacroCallState(
555 MacroCallReconstructor::ReconstructedLine
*Line
,
556 FormatToken
*ParentLastToken
, FormatToken
*MacroCallLParen
)
557 : Line(Line
), ParentLastToken(ParentLastToken
),
558 MacroCallLParen(MacroCallLParen
) {
560 llvm::dbgs() << "ParentLastToken: "
561 << (ParentLastToken
? ParentLastToken
->TokenText
: "<null>")
564 assert(MacroCallLParen
->is(tok::l_paren
));
567 } // namespace format