[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / Format / MacroCallReconstructor.cpp
blob1c295477a922ef2f189e09ae7f93c2c4fe4c8d27
1 //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of MacroCallReconstructor, which fits
12 /// an reconstructed macro call to a parsed set of UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
16 #include "Macros.h"
18 #include "UnwrappedLineParser.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/Support/Debug.h"
22 #include <cassert>
24 #define DEBUG_TYPE "format-reconstruct"
26 namespace clang {
27 namespace format {
29 // Call \p Call for each token in the unwrapped line given, passing
30 // the token, its parent and whether it is the first token in the line.
31 template <typename T>
32 void forEachToken(const UnwrappedLine &Line, const T &Call,
33 FormatToken *Parent = nullptr) {
34 bool First = true;
35 for (const auto &N : Line.Tokens) {
36 Call(N.Tok, Parent, First);
37 First = false;
38 for (const auto &Child : N.Children)
39 forEachToken(Child, Call, N.Tok);
43 MacroCallReconstructor::MacroCallReconstructor(
44 unsigned Level,
45 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46 &ActiveExpansions)
47 : Level(Level), IdToReconstructed(ActiveExpansions) {
48 Result.Tokens.push_back(std::make_unique<LineNode>());
49 ActiveReconstructedLines.push_back(&Result);
52 void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53 assert(State != Finalized);
54 LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55 forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56 add(Token, Parent, First);
57 });
58 assert(InProgress || finished());
61 UnwrappedLine MacroCallReconstructor::takeResult() && {
62 finalize();
63 assert(Result.Tokens.size() == 1 &&
64 Result.Tokens.front()->Children.size() == 1);
65 UnwrappedLine Final =
66 createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67 assert(!Final.Tokens.empty());
68 return Final;
71 // Reconstruct the position of the next \p Token, given its parent \p
72 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73 // is the first token in a given unwrapped line.
74 void MacroCallReconstructor::add(FormatToken *Token,
75 FormatToken *ExpandedParent, bool First) {
76 LLVM_DEBUG(
77 llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78 << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79 << ", First: " << First << "\n");
80 // In order to be able to find the correct parent in the reconstructed token
81 // stream, we need to continue the last open reconstruction until we find the
82 // given token if it is part of the reconstructed token stream.
84 // Note that hidden tokens can be part of the reconstructed stream in nested
85 // macro calls.
86 // For example, given
87 // #define C(x, y) x y
88 // #define B(x) {x}
89 // And the call:
90 // C(a, B(b))
91 // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92 // found in the reconstructed token stream of that expansion level.
93 // In the expanded token stream
94 // a {b}
95 // 'b' is a child of '{'. We need to continue the open expansion of the ','
96 // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97 // so we later set the spelled token 'b' as a child of the ','.
98 if (!ActiveExpansions.empty() && Token->MacroCtx &&
99 (Token->MacroCtx->Role != MR_Hidden ||
100 ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101 if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102 First = true;
105 prepareParent(ExpandedParent, First);
107 if (Token->MacroCtx) {
108 // If this token was generated by a macro call, add the reconstructed
109 // equivalent of the token.
110 reconstruct(Token);
111 } else {
112 // Otherwise, we add it to the current line.
113 appendToken(Token);
117 // Adjusts the stack of active reconstructed lines so we're ready to push
118 // tokens. The tokens to be pushed are children of ExpandedParent in the
119 // expanded code.
121 // This may entail:
122 // - creating a new line, if the parent is on the active line
123 // - popping active lines, if the parent is further up the stack
125 // Postcondition:
126 // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127 // reconstructed replacement token as a parent (when possible) - that is, the
128 // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129 // is the parent of ActiveReconstructedLines.back() in the reconstructed
130 // unwrapped line.
131 void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132 bool NewLine) {
133 LLVM_DEBUG({
134 llvm::dbgs() << "ParentMap:\n";
135 debugParentMap();
137 // We want to find the parent in the new unwrapped line, where the expanded
138 // parent might have been replaced during reconstruction.
139 FormatToken *Parent = getParentInResult(ExpandedParent);
140 LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141 << (Parent ? Parent->TokenText : "<null>") << "\n");
143 FormatToken *OpenMacroParent = nullptr;
144 if (!MacroCallStructure.empty()) {
145 // Inside a macro expansion, it is possible to lose track of the correct
146 // parent - either because it is already popped, for example because it was
147 // in a different macro argument (e.g. M({, })), or when we work on invalid
148 // code.
149 // Thus, we use the innermost macro call's parent as the parent at which
150 // we stop; this allows us to stay within the macro expansion and keeps
151 // any problems confined to the extent of the macro call.
152 OpenMacroParent =
153 getParentInResult(MacroCallStructure.back().MacroCallLParen);
154 LLVM_DEBUG(llvm::dbgs()
155 << "MacroCallLParen: "
156 << MacroCallStructure.back().MacroCallLParen->TokenText
157 << ", OpenMacroParent: "
158 << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159 << "\n");
161 if (NewLine ||
162 (!ActiveReconstructedLines.back()->Tokens.empty() &&
163 Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164 // If we are at the first token in a new line, we want to also
165 // create a new line in the resulting reconstructed unwrapped line.
166 while (ActiveReconstructedLines.back()->Tokens.empty() ||
167 (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168 ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169 OpenMacroParent)) {
170 ActiveReconstructedLines.pop_back();
171 assert(!ActiveReconstructedLines.empty());
173 assert(!ActiveReconstructedLines.empty());
174 ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175 std::make_unique<ReconstructedLine>());
176 ActiveReconstructedLines.push_back(
177 &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178 } else if (parentLine().Tokens.back()->Tok != Parent) {
179 // If we're not the first token in a new line, pop lines until we find
180 // the child of \c Parent in the stack.
181 while (Parent != parentLine().Tokens.back()->Tok &&
182 parentLine().Tokens.back()->Tok &&
183 parentLine().Tokens.back()->Tok != OpenMacroParent) {
184 ActiveReconstructedLines.pop_back();
185 assert(!ActiveReconstructedLines.empty());
188 assert(!ActiveReconstructedLines.empty());
191 // For a given \p Parent in the incoming expanded token stream, find the
192 // corresponding parent in the output.
193 FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194 FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195 if (!Mapped)
196 return Parent;
197 for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198 Parent = Mapped;
199 // If we use a different token than the parent in the expanded token stream
200 // as parent, mark it as a special parent, so the formatting code knows it
201 // needs to have its children formatted.
202 Parent->MacroParent = true;
203 return Parent;
206 // Reconstruct a \p Token that was expanded from a macro call.
207 void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208 assert(Token->MacroCtx);
209 // A single token can be the only result of a macro call:
210 // Given: #define ID(x, y) ;
211 // And the call: ID(<some>, <tokens>)
212 // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213 if (Token->MacroCtx->StartOfExpansion) {
214 startReconstruction(Token);
215 // If the order of tokens in the expanded token stream is not the
216 // same as the order of tokens in the reconstructed stream, we need
217 // to reconstruct tokens that arrive later in the stream.
218 if (Token->MacroCtx->Role != MR_Hidden)
219 reconstructActiveCallUntil(Token);
221 assert(!ActiveExpansions.empty());
222 if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223 assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224 if (Token->MacroCtx->Role != MR_Hidden) {
225 // The current token in the reconstructed token stream must be the token
226 // we're looking for - we either arrive here after startReconstruction,
227 // which initiates the stream to the first token, or after
228 // continueReconstructionUntil skipped until the expected token in the
229 // reconstructed stream at the start of add(...).
230 assert(ActiveExpansions.back().SpelledI->Tok == Token);
231 processNextReconstructed();
232 } else if (!currentLine()->Tokens.empty()) {
233 // Map all hidden tokens to the last visible token in the output.
234 // If the hidden token is a parent, we'll use the last visible
235 // token as the parent of the hidden token's children.
236 SpelledParentToReconstructedParent[Token] =
237 currentLine()->Tokens.back()->Tok;
238 } else {
239 for (auto I = ActiveReconstructedLines.rbegin(),
240 E = ActiveReconstructedLines.rend();
241 I != E; ++I) {
242 if (!(*I)->Tokens.empty()) {
243 SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244 break;
249 if (Token->MacroCtx->EndOfExpansion)
250 endReconstruction(Token);
253 // Given a \p Token that starts an expansion, reconstruct the beginning of the
254 // macro call.
255 // For example, given: #define ID(x) x
256 // And the call: ID(int a)
257 // Reconstructs: ID(
258 void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259 assert(Token->MacroCtx);
260 assert(!Token->MacroCtx->ExpandedFrom.empty());
261 assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262 #ifndef NDEBUG
263 // Check that the token's reconstruction stack matches our current
264 // reconstruction stack.
265 for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266 assert(ActiveExpansions[I].ID ==
267 Token->MacroCtx
268 ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
270 #endif
271 // Start reconstruction for all calls for which this token is the first token
272 // generated by the call.
273 // Note that the token's expanded from stack is inside-to-outside, and the
274 // expansions for which this token is not the first are the outermost ones.
275 ArrayRef<FormatToken *> StartedMacros =
276 ArrayRef(Token->MacroCtx->ExpandedFrom)
277 .drop_back(ActiveExpansions.size());
278 assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279 // We reconstruct macro calls outside-to-inside.
280 for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281 // We found a macro call to be reconstructed; the next time our
282 // reconstruction stack is empty we know we finished an reconstruction.
283 #ifndef NDEBUG
284 State = InProgress;
285 #endif
286 // Put the reconstructed macro call's token into our reconstruction stack.
287 auto IU = IdToReconstructed.find(ID);
288 assert(IU != IdToReconstructed.end());
289 ActiveExpansions.push_back(
290 {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291 // Process the macro call's identifier.
292 processNextReconstructed();
293 if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294 continue;
295 if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296 // Process the optional opening parenthesis.
297 processNextReconstructed();
302 // Add all tokens in the reconstruction stream to the output until we find the
303 // given \p Token.
304 bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305 assert(!ActiveExpansions.empty());
306 bool PassedMacroComma = false;
307 // FIXME: If Token was already expanded earlier, due to
308 // a change in order, we will not find it, but need to
309 // skip it.
310 while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311 ActiveExpansions.back().SpelledI->Tok != Token) {
312 PassedMacroComma = processNextReconstructed() || PassedMacroComma;
314 return PassedMacroComma;
317 // End all reconstructions for which \p Token is the final token.
318 void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319 assert(Token->MacroCtx &&
320 (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321 for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322 #ifndef NDEBUG
323 // Check all remaining tokens but the final closing parenthesis and optional
324 // trailing comment were already reconstructed at an inner expansion level.
325 for (auto T = ActiveExpansions.back().SpelledI;
326 T != ActiveExpansions.back().SpelledE; ++T) {
327 FormatToken *Token = T->Tok;
328 bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
329 std::next(T)->Tok->isTrailingComment()) &&
330 !Token->MacroCtx && Token->is(tok::r_paren);
331 bool TrailingComment = Token->isTrailingComment();
332 bool PreviousLevel =
333 Token->MacroCtx &&
334 (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
335 if (!ClosingParen && !TrailingComment && !PreviousLevel)
336 llvm::dbgs() << "At token: " << Token->TokenText << "\n";
337 // In addition to the following cases, we can also run into this
338 // when a macro call had more arguments than expected; in that case,
339 // the comma and the remaining tokens in the macro call will potentially
340 // end up in the line when we finish the expansion.
341 // FIXME: Add the information which arguments are unused, and assert
342 // one of the cases below plus reconstructed macro argument tokens.
343 // assert(ClosingParen || TrailingComment || PreviousLevel);
345 #endif
346 // Handle the remaining open tokens:
347 // - expand the closing parenthesis, if it exists, including an optional
348 // trailing comment
349 // - handle tokens that were already reconstructed at an inner expansion
350 // level
351 // - handle tokens when a macro call had more than the expected number of
352 // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
353 // up with the sequence ", b, c)" being open at the end of the
354 // reconstruction; we want to gracefully handle that case
356 // FIXME: See the above debug-check for what we will need to do to be
357 // able to assert this.
358 for (auto T = ActiveExpansions.back().SpelledI;
359 T != ActiveExpansions.back().SpelledE; ++T) {
360 processNextReconstructed();
362 ActiveExpansions.pop_back();
366 void MacroCallReconstructor::debugParentMap() const {
367 llvm::DenseSet<FormatToken *> Values;
368 for (const auto &P : SpelledParentToReconstructedParent)
369 Values.insert(P.second);
371 for (const auto &P : SpelledParentToReconstructedParent) {
372 if (Values.contains(P.first))
373 continue;
374 llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
375 for (auto I = SpelledParentToReconstructedParent.find(P.first),
376 E = SpelledParentToReconstructedParent.end();
377 I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
378 llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
380 llvm::dbgs() << "\n";
384 // If visible, add the next token of the reconstructed token sequence to the
385 // output. Returns whether reconstruction passed a comma that is part of a
386 // macro call.
387 bool MacroCallReconstructor::processNextReconstructed() {
388 FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
389 ++ActiveExpansions.back().SpelledI;
390 if (Token->MacroCtx) {
391 // Skip tokens that are not part of the macro call.
392 if (Token->MacroCtx->Role == MR_Hidden)
393 return false;
394 // Skip tokens we already expanded during an inner reconstruction.
395 // For example, given: #define ID(x) {x}
396 // And the call: ID(ID(f))
397 // We get two reconstructions:
398 // ID(f) -> {f}
399 // ID({f}) -> {{f}}
400 // We reconstruct f during the first reconstruction, and skip it during the
401 // second reconstruction.
402 if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
403 return false;
405 // Tokens that do not have a macro context are tokens in that are part of the
406 // macro call that have not taken part in expansion.
407 if (!Token->MacroCtx) {
408 // Put the parentheses and commas of a macro call into the same line;
409 // if the arguments produce new unwrapped lines, they will become children
410 // of the corresponding opening parenthesis or comma tokens in the
411 // reconstructed call.
412 if (Token->is(tok::l_paren)) {
413 MacroCallStructure.push_back(MacroCallState(
414 currentLine(), parentLine().Tokens.back()->Tok, Token));
415 // All tokens that are children of the previous line's last token in the
416 // reconstructed token stream will now be children of the l_paren token.
417 // For example, for the line containing the macro calls:
418 // auto x = ID({ID(2)});
419 // We will build up a map <null> -> ( -> ( with the first and second
420 // l_paren of the macro call respectively. New lines that come in with a
421 // <null> parent will then become children of the l_paren token of the
422 // currently innermost macro call.
423 SpelledParentToReconstructedParent[MacroCallStructure.back()
424 .ParentLastToken] = Token;
425 appendToken(Token);
426 prepareParent(Token, /*NewLine=*/true);
427 Token->MacroParent = true;
428 return false;
430 if (!MacroCallStructure.empty()) {
431 if (Token->is(tok::comma)) {
432 // Make new lines inside the next argument children of the comma token.
433 SpelledParentToReconstructedParent
434 [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
435 Token->MacroParent = true;
436 appendToken(Token, MacroCallStructure.back().Line);
437 prepareParent(Token, /*NewLine=*/true);
438 return true;
440 if (Token->is(tok::r_paren)) {
441 appendToken(Token, MacroCallStructure.back().Line);
442 SpelledParentToReconstructedParent.erase(
443 MacroCallStructure.back().ParentLastToken);
444 MacroCallStructure.pop_back();
445 return false;
449 // Note that any tokens that are tagged with MR_None have been passed as
450 // arguments to the macro that have not been expanded, for example:
451 // Given: #define ID(X) x
452 // When calling: ID(a, b)
453 // 'b' will be part of the reconstructed token stream, but tagged MR_None.
454 // Given that erroring out in this case would be disruptive, we continue
455 // pushing the (unformatted) token.
456 // FIXME: This can lead to unfortunate formatting decisions - give the user
457 // a hint that their macro definition is broken.
458 appendToken(Token);
459 return false;
462 void MacroCallReconstructor::finalize() {
463 #ifndef NDEBUG
464 assert(State != Finalized && finished());
465 State = Finalized;
466 #endif
468 // We created corresponding unwrapped lines for each incoming line as children
469 // the the toplevel null token.
470 assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
471 LLVM_DEBUG({
472 llvm::dbgs() << "Finalizing reconstructed lines:\n";
473 debug(Result, 0);
476 // The first line becomes the top level line in the resulting unwrapped line.
477 LineNode &Top = *Result.Tokens.front();
478 auto *I = Top.Children.begin();
479 // Every subsequent line will become a child of the last token in the previous
480 // line, which is the token prior to the first token in the line.
481 LineNode *Last = (*I)->Tokens.back().get();
482 ++I;
483 for (auto *E = Top.Children.end(); I != E; ++I) {
484 assert(Last->Children.empty());
485 Last->Children.push_back(std::move(*I));
487 // Mark the previous line's last token as generated by a macro expansion
488 // so the formatting algorithm can take that into account.
489 Last->Tok->MacroParent = true;
491 Last = Last->Children.back()->Tokens.back().get();
493 Top.Children.resize(1);
496 void MacroCallReconstructor::appendToken(FormatToken *Token,
497 ReconstructedLine *L) {
498 L = L ? L : currentLine();
499 LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
500 L->Tokens.push_back(std::make_unique<LineNode>(Token));
503 UnwrappedLine
504 MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
505 int Level) {
506 UnwrappedLine Result;
507 Result.Level = Level;
508 for (const auto &N : Line.Tokens) {
509 Result.Tokens.push_back(N->Tok);
510 UnwrappedLineNode &Current = Result.Tokens.back();
511 for (const auto &Child : N->Children) {
512 if (Child->Tokens.empty())
513 continue;
514 Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
516 if (Current.Children.size() == 1 &&
517 Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
518 Result.Tokens.splice(Result.Tokens.end(),
519 Current.Children.front().Tokens);
520 Current.Children.clear();
523 return Result;
526 void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
527 for (int i = 0; i < Level; ++i)
528 llvm::dbgs() << " ";
529 for (const auto &N : Line.Tokens) {
530 if (!N)
531 continue;
532 if (N->Tok)
533 llvm::dbgs() << N->Tok->TokenText << " ";
534 for (const auto &Child : N->Children) {
535 llvm::dbgs() << "\n";
536 debug(*Child, Level + 1);
537 for (int i = 0; i < Level; ++i)
538 llvm::dbgs() << " ";
541 llvm::dbgs() << "\n";
544 MacroCallReconstructor::ReconstructedLine &
545 MacroCallReconstructor::parentLine() {
546 return **std::prev(std::prev(ActiveReconstructedLines.end()));
549 MacroCallReconstructor::ReconstructedLine *
550 MacroCallReconstructor::currentLine() {
551 return ActiveReconstructedLines.back();
554 MacroCallReconstructor::MacroCallState::MacroCallState(
555 MacroCallReconstructor::ReconstructedLine *Line,
556 FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
557 : Line(Line), ParentLastToken(ParentLastToken),
558 MacroCallLParen(MacroCallLParen) {
559 LLVM_DEBUG(
560 llvm::dbgs() << "ParentLastToken: "
561 << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
562 << "\n");
564 assert(MacroCallLParen->is(tok::l_paren));
567 } // namespace format
568 } // namespace clang