[DFAJumpThreading] Remove incoming StartBlock from all phis when unfolding select...
[llvm-project.git] / clang / lib / Format / MacroCallReconstructor.cpp
blobcbdd1683c54d1abeebf580aef3664fe0740d1155
1 //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of MacroCallReconstructor, which fits
12 /// an reconstructed macro call to a parsed set of UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
16 #include "Macros.h"
18 #include "UnwrappedLineParser.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/Support/Debug.h"
22 #include <cassert>
24 #define DEBUG_TYPE "format-reconstruct"
26 namespace clang {
27 namespace format {
29 // Call \p Call for each token in the unwrapped line given, passing
30 // the token, its parent and whether it is the first token in the line.
31 template <typename T>
32 void forEachToken(const UnwrappedLine &Line, const T &Call,
33 FormatToken *Parent = nullptr) {
34 bool First = true;
35 for (const auto &N : Line.Tokens) {
36 Call(N.Tok, Parent, First);
37 First = false;
38 for (const auto &Child : N.Children)
39 forEachToken(Child, Call, N.Tok);
43 MacroCallReconstructor::MacroCallReconstructor(
44 unsigned Level,
45 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46 &ActiveExpansions)
47 : Level(Level), IdToReconstructed(ActiveExpansions) {
48 Result.Tokens.push_back(std::make_unique<LineNode>());
49 ActiveReconstructedLines.push_back(&Result);
52 void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53 assert(State != Finalized);
54 LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55 forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56 add(Token, Parent, First);
57 });
58 assert(InProgress || finished());
61 UnwrappedLine MacroCallReconstructor::takeResult() && {
62 finalize();
63 assert(Result.Tokens.size() == 1 &&
64 Result.Tokens.front()->Children.size() == 1);
65 UnwrappedLine Final =
66 createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67 assert(!Final.Tokens.empty());
68 return Final;
71 // Reconstruct the position of the next \p Token, given its parent \p
72 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73 // is the first token in a given unwrapped line.
74 void MacroCallReconstructor::add(FormatToken *Token,
75 FormatToken *ExpandedParent, bool First) {
76 LLVM_DEBUG(
77 llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78 << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79 << ", First: " << First << "\n");
80 // In order to be able to find the correct parent in the reconstructed token
81 // stream, we need to continue the last open reconstruction until we find the
82 // given token if it is part of the reconstructed token stream.
84 // Note that hidden tokens can be part of the reconstructed stream in nested
85 // macro calls.
86 // For example, given
87 // #define C(x, y) x y
88 // #define B(x) {x}
89 // And the call:
90 // C(a, B(b))
91 // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92 // found in the reconstructed token stream of that expansion level.
93 // In the expanded token stream
94 // a {b}
95 // 'b' is a child of '{'. We need to continue the open expansion of the ','
96 // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97 // so we later set the spelled token 'b' as a child of the ','.
98 if (!ActiveExpansions.empty() && Token->MacroCtx &&
99 (Token->MacroCtx->Role != MR_Hidden ||
100 ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101 if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102 First = true;
105 prepareParent(ExpandedParent, First);
107 if (Token->MacroCtx) {
108 // If this token was generated by a macro call, add the reconstructed
109 // equivalent of the token.
110 reconstruct(Token);
111 } else {
112 // Otherwise, we add it to the current line.
113 appendToken(Token);
117 // Adjusts the stack of active reconstructed lines so we're ready to push
118 // tokens. The tokens to be pushed are children of ExpandedParent in the
119 // expanded code.
121 // This may entail:
122 // - creating a new line, if the parent is on the active line
123 // - popping active lines, if the parent is further up the stack
125 // Postcondition:
126 // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127 // reconstructed replacement token as a parent (when possible) - that is, the
128 // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129 // is the parent of ActiveReconstructedLines.back() in the reconstructed
130 // unwrapped line.
131 void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132 bool NewLine) {
133 LLVM_DEBUG({
134 llvm::dbgs() << "ParentMap:\n";
135 debugParentMap();
137 // We want to find the parent in the new unwrapped line, where the expanded
138 // parent might have been replaced during reconstruction.
139 FormatToken *Parent = getParentInResult(ExpandedParent);
140 LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141 << (Parent ? Parent->TokenText : "<null>") << "\n");
143 FormatToken *OpenMacroParent = nullptr;
144 if (!MacroCallStructure.empty()) {
145 // Inside a macro expansion, it is possible to lose track of the correct
146 // parent - either because it is already popped, for example because it was
147 // in a different macro argument (e.g. M({, })), or when we work on invalid
148 // code.
149 // Thus, we use the innermost macro call's parent as the parent at which
150 // we stop; this allows us to stay within the macro expansion and keeps
151 // any problems confined to the extent of the macro call.
152 OpenMacroParent =
153 getParentInResult(MacroCallStructure.back().MacroCallLParen);
154 LLVM_DEBUG(llvm::dbgs()
155 << "MacroCallLParen: "
156 << MacroCallStructure.back().MacroCallLParen->TokenText
157 << ", OpenMacroParent: "
158 << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159 << "\n");
161 if (NewLine ||
162 (!ActiveReconstructedLines.back()->Tokens.empty() &&
163 Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164 // If we are at the first token in a new line, we want to also
165 // create a new line in the resulting reconstructed unwrapped line.
166 while (ActiveReconstructedLines.back()->Tokens.empty() ||
167 (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168 ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169 OpenMacroParent)) {
170 ActiveReconstructedLines.pop_back();
171 assert(!ActiveReconstructedLines.empty());
173 assert(!ActiveReconstructedLines.empty());
174 ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175 std::make_unique<ReconstructedLine>());
176 ActiveReconstructedLines.push_back(
177 &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178 } else if (parentLine().Tokens.back()->Tok != Parent) {
179 // If we're not the first token in a new line, pop lines until we find
180 // the child of \c Parent in the stack.
181 while (Parent != parentLine().Tokens.back()->Tok &&
182 parentLine().Tokens.back()->Tok &&
183 parentLine().Tokens.back()->Tok != OpenMacroParent) {
184 ActiveReconstructedLines.pop_back();
185 assert(!ActiveReconstructedLines.empty());
188 assert(!ActiveReconstructedLines.empty());
191 // For a given \p Parent in the incoming expanded token stream, find the
192 // corresponding parent in the output.
193 FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194 FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195 if (!Mapped)
196 return Parent;
197 for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198 Parent = Mapped;
199 // If we use a different token than the parent in the expanded token stream
200 // as parent, mark it as a special parent, so the formatting code knows it
201 // needs to have its children formatted.
202 Parent->MacroParent = true;
203 return Parent;
206 // Reconstruct a \p Token that was expanded from a macro call.
207 void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208 assert(Token->MacroCtx);
209 // A single token can be the only result of a macro call:
210 // Given: #define ID(x, y) ;
211 // And the call: ID(<some>, <tokens>)
212 // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213 if (Token->MacroCtx->StartOfExpansion) {
214 startReconstruction(Token);
215 // If the order of tokens in the expanded token stream is not the
216 // same as the order of tokens in the reconstructed stream, we need
217 // to reconstruct tokens that arrive later in the stream.
218 if (Token->MacroCtx->Role != MR_Hidden)
219 reconstructActiveCallUntil(Token);
221 assert(!ActiveExpansions.empty());
222 if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223 assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224 if (Token->MacroCtx->Role != MR_Hidden) {
225 // The current token in the reconstructed token stream must be the token
226 // we're looking for - we either arrive here after startReconstruction,
227 // which initiates the stream to the first token, or after
228 // continueReconstructionUntil skipped until the expected token in the
229 // reconstructed stream at the start of add(...).
230 assert(ActiveExpansions.back().SpelledI->Tok == Token);
231 processNextReconstructed();
232 } else if (!currentLine()->Tokens.empty()) {
233 // Map all hidden tokens to the last visible token in the output.
234 // If the hidden token is a parent, we'll use the last visible
235 // token as the parent of the hidden token's children.
236 SpelledParentToReconstructedParent[Token] =
237 currentLine()->Tokens.back()->Tok;
238 } else {
239 for (auto I = ActiveReconstructedLines.rbegin(),
240 E = ActiveReconstructedLines.rend();
241 I != E; ++I) {
242 if (!(*I)->Tokens.empty()) {
243 SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244 break;
249 if (Token->MacroCtx->EndOfExpansion)
250 endReconstruction(Token);
253 // Given a \p Token that starts an expansion, reconstruct the beginning of the
254 // macro call.
255 // For example, given: #define ID(x) x
256 // And the call: ID(int a)
257 // Reconstructs: ID(
258 void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259 assert(Token->MacroCtx);
260 assert(!Token->MacroCtx->ExpandedFrom.empty());
261 assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262 #ifndef NDEBUG
263 // Check that the token's reconstruction stack matches our current
264 // reconstruction stack.
265 for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266 assert(ActiveExpansions[I].ID ==
267 Token->MacroCtx
268 ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
270 #endif
271 // Start reconstruction for all calls for which this token is the first token
272 // generated by the call.
273 // Note that the token's expanded from stack is inside-to-outside, and the
274 // expansions for which this token is not the first are the outermost ones.
275 ArrayRef<FormatToken *> StartedMacros =
276 ArrayRef(Token->MacroCtx->ExpandedFrom)
277 .drop_back(ActiveExpansions.size());
278 assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279 // We reconstruct macro calls outside-to-inside.
280 for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281 // We found a macro call to be reconstructed; the next time our
282 // reconstruction stack is empty we know we finished an reconstruction.
283 #ifndef NDEBUG
284 State = InProgress;
285 #endif
286 // Put the reconstructed macro call's token into our reconstruction stack.
287 auto IU = IdToReconstructed.find(ID);
288 assert(IU != IdToReconstructed.end());
289 ActiveExpansions.push_back(
290 {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291 // Process the macro call's identifier.
292 processNextReconstructed();
293 if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294 continue;
295 if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296 // Process the optional opening parenthesis.
297 processNextReconstructed();
302 // Add all tokens in the reconstruction stream to the output until we find the
303 // given \p Token.
304 bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305 assert(!ActiveExpansions.empty());
306 bool PassedMacroComma = false;
307 // FIXME: If Token was already expanded earlier, due to
308 // a change in order, we will not find it, but need to
309 // skip it.
310 while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311 ActiveExpansions.back().SpelledI->Tok != Token) {
312 PassedMacroComma = processNextReconstructed() || PassedMacroComma;
314 return PassedMacroComma;
317 // End all reconstructions for which \p Token is the final token.
318 void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319 assert(Token->MacroCtx &&
320 (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321 for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322 LLVM_DEBUG([&] {
323 // Check all remaining tokens but the final closing parenthesis and
324 // optional trailing comment were already reconstructed at an inner
325 // expansion level.
326 for (auto T = ActiveExpansions.back().SpelledI;
327 T != ActiveExpansions.back().SpelledE; ++T) {
328 FormatToken *Token = T->Tok;
329 bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
330 std::next(T)->Tok->isTrailingComment()) &&
331 !Token->MacroCtx && Token->is(tok::r_paren);
332 bool TrailingComment = Token->isTrailingComment();
333 bool PreviousLevel =
334 Token->MacroCtx &&
335 (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
336 if (!ClosingParen && !TrailingComment && !PreviousLevel)
337 llvm::dbgs() << "At token: " << Token->TokenText << "\n";
338 // In addition to the following cases, we can also run into this
339 // when a macro call had more arguments than expected; in that case,
340 // the comma and the remaining tokens in the macro call will
341 // potentially end up in the line when we finish the expansion.
342 // FIXME: Add the information which arguments are unused, and assert
343 // one of the cases below plus reconstructed macro argument tokens.
344 // assert(ClosingParen || TrailingComment || PreviousLevel);
346 }());
347 // Handle the remaining open tokens:
348 // - expand the closing parenthesis, if it exists, including an optional
349 // trailing comment
350 // - handle tokens that were already reconstructed at an inner expansion
351 // level
352 // - handle tokens when a macro call had more than the expected number of
353 // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354 // up with the sequence ", b, c)" being open at the end of the
355 // reconstruction; we want to gracefully handle that case
357 // FIXME: See the above debug-check for what we will need to do to be
358 // able to assert this.
359 for (auto T = ActiveExpansions.back().SpelledI;
360 T != ActiveExpansions.back().SpelledE; ++T) {
361 processNextReconstructed();
363 ActiveExpansions.pop_back();
367 void MacroCallReconstructor::debugParentMap() const {
368 llvm::DenseSet<FormatToken *> Values;
369 for (const auto &P : SpelledParentToReconstructedParent)
370 Values.insert(P.second);
372 for (const auto &P : SpelledParentToReconstructedParent) {
373 if (Values.contains(P.first))
374 continue;
375 llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
376 for (auto I = SpelledParentToReconstructedParent.find(P.first),
377 E = SpelledParentToReconstructedParent.end();
378 I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
379 llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
381 llvm::dbgs() << "\n";
385 // If visible, add the next token of the reconstructed token sequence to the
386 // output. Returns whether reconstruction passed a comma that is part of a
387 // macro call.
388 bool MacroCallReconstructor::processNextReconstructed() {
389 FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
390 ++ActiveExpansions.back().SpelledI;
391 if (Token->MacroCtx) {
392 // Skip tokens that are not part of the macro call.
393 if (Token->MacroCtx->Role == MR_Hidden)
394 return false;
395 // Skip tokens we already expanded during an inner reconstruction.
396 // For example, given: #define ID(x) {x}
397 // And the call: ID(ID(f))
398 // We get two reconstructions:
399 // ID(f) -> {f}
400 // ID({f}) -> {{f}}
401 // We reconstruct f during the first reconstruction, and skip it during the
402 // second reconstruction.
403 if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
404 return false;
406 // Tokens that do not have a macro context are tokens in that are part of the
407 // macro call that have not taken part in expansion.
408 if (!Token->MacroCtx) {
409 // Put the parentheses and commas of a macro call into the same line;
410 // if the arguments produce new unwrapped lines, they will become children
411 // of the corresponding opening parenthesis or comma tokens in the
412 // reconstructed call.
413 if (Token->is(tok::l_paren)) {
414 MacroCallStructure.push_back(MacroCallState(
415 currentLine(), parentLine().Tokens.back()->Tok, Token));
416 // All tokens that are children of the previous line's last token in the
417 // reconstructed token stream will now be children of the l_paren token.
418 // For example, for the line containing the macro calls:
419 // auto x = ID({ID(2)});
420 // We will build up a map <null> -> ( -> ( with the first and second
421 // l_paren of the macro call respectively. New lines that come in with a
422 // <null> parent will then become children of the l_paren token of the
423 // currently innermost macro call.
424 SpelledParentToReconstructedParent[MacroCallStructure.back()
425 .ParentLastToken] = Token;
426 appendToken(Token);
427 prepareParent(Token, /*NewLine=*/true);
428 Token->MacroParent = true;
429 return false;
431 if (!MacroCallStructure.empty()) {
432 if (Token->is(tok::comma)) {
433 // Make new lines inside the next argument children of the comma token.
434 SpelledParentToReconstructedParent
435 [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
436 Token->MacroParent = true;
437 appendToken(Token, MacroCallStructure.back().Line);
438 prepareParent(Token, /*NewLine=*/true);
439 return true;
441 if (Token->is(tok::r_paren)) {
442 appendToken(Token, MacroCallStructure.back().Line);
443 SpelledParentToReconstructedParent.erase(
444 MacroCallStructure.back().ParentLastToken);
445 MacroCallStructure.pop_back();
446 return false;
450 // Note that any tokens that are tagged with MR_None have been passed as
451 // arguments to the macro that have not been expanded, for example:
452 // Given: #define ID(X) x
453 // When calling: ID(a, b)
454 // 'b' will be part of the reconstructed token stream, but tagged MR_None.
455 // Given that erroring out in this case would be disruptive, we continue
456 // pushing the (unformatted) token.
457 // FIXME: This can lead to unfortunate formatting decisions - give the user
458 // a hint that their macro definition is broken.
459 appendToken(Token);
460 return false;
463 void MacroCallReconstructor::finalize() {
464 #ifndef NDEBUG
465 assert(State != Finalized && finished());
466 State = Finalized;
467 #endif
469 // We created corresponding unwrapped lines for each incoming line as children
470 // the the toplevel null token.
471 assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
472 LLVM_DEBUG({
473 llvm::dbgs() << "Finalizing reconstructed lines:\n";
474 debug(Result, 0);
477 // The first line becomes the top level line in the resulting unwrapped line.
478 LineNode &Top = *Result.Tokens.front();
479 auto *I = Top.Children.begin();
480 // Every subsequent line will become a child of the last token in the previous
481 // line, which is the token prior to the first token in the line.
482 LineNode *Last = (*I)->Tokens.back().get();
483 ++I;
484 for (auto *E = Top.Children.end(); I != E; ++I) {
485 assert(Last->Children.empty());
486 Last->Children.push_back(std::move(*I));
488 // Mark the previous line's last token as generated by a macro expansion
489 // so the formatting algorithm can take that into account.
490 Last->Tok->MacroParent = true;
492 Last = Last->Children.back()->Tokens.back().get();
494 Top.Children.resize(1);
497 void MacroCallReconstructor::appendToken(FormatToken *Token,
498 ReconstructedLine *L) {
499 L = L ? L : currentLine();
500 LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
501 L->Tokens.push_back(std::make_unique<LineNode>(Token));
504 UnwrappedLine
505 MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
506 int Level) {
507 UnwrappedLine Result;
508 Result.Level = Level;
509 for (const auto &N : Line.Tokens) {
510 Result.Tokens.push_back(N->Tok);
511 UnwrappedLineNode &Current = Result.Tokens.back();
512 for (const auto &Child : N->Children) {
513 if (Child->Tokens.empty())
514 continue;
515 Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
517 if (Current.Children.size() == 1 &&
518 Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
519 Result.Tokens.splice(Result.Tokens.end(),
520 Current.Children.front().Tokens);
521 Current.Children.clear();
524 return Result;
527 void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
528 for (int i = 0; i < Level; ++i)
529 llvm::dbgs() << " ";
530 for (const auto &N : Line.Tokens) {
531 if (!N)
532 continue;
533 if (N->Tok)
534 llvm::dbgs() << N->Tok->TokenText << " ";
535 for (const auto &Child : N->Children) {
536 llvm::dbgs() << "\n";
537 debug(*Child, Level + 1);
538 for (int i = 0; i < Level; ++i)
539 llvm::dbgs() << " ";
542 llvm::dbgs() << "\n";
545 MacroCallReconstructor::ReconstructedLine &
546 MacroCallReconstructor::parentLine() {
547 return **std::prev(std::prev(ActiveReconstructedLines.end()));
550 MacroCallReconstructor::ReconstructedLine *
551 MacroCallReconstructor::currentLine() {
552 return ActiveReconstructedLines.back();
555 MacroCallReconstructor::MacroCallState::MacroCallState(
556 MacroCallReconstructor::ReconstructedLine *Line,
557 FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
558 : Line(Line), ParentLastToken(ParentLastToken),
559 MacroCallLParen(MacroCallLParen) {
560 LLVM_DEBUG(
561 llvm::dbgs() << "ParentLastToken: "
562 << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
563 << "\n");
565 assert(MacroCallLParen->is(tok::l_paren));
568 } // namespace format
569 } // namespace clang