[DFAJumpThreading] Remove incoming StartBlock from all phis when unfolding select...
[llvm-project.git] / clang / lib / Format / UnwrappedLineParser.cpp
blob018bc6c165485e2a4e44167e587a31ae19448321
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <utility>
31 #define DEBUG_TYPE "format-parser"
33 namespace clang {
34 namespace format {
36 namespace {
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
63 if (!NewLine)
64 OS << "\n";
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
71 class ScopedDeclarationState {
72 public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
87 private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
92 } // end anonymous namespace
94 class ScopedLineState {
95 public:
96 ScopedLineState(UnwrappedLineParser &Parser,
97 bool SwitchToPreprocessorLines = false)
98 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99 if (SwitchToPreprocessorLines)
100 Parser.CurrentLines = &Parser.PreprocessorDirectives;
101 else if (!Parser.Line->Tokens.empty())
102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103 PreBlockLine = std::move(Parser.Line);
104 Parser.Line = std::make_unique<UnwrappedLine>();
105 Parser.Line->Level = PreBlockLine->Level;
106 Parser.Line->PPLevel = PreBlockLine->PPLevel;
107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 ~ScopedLineState() {
112 if (!Parser.Line->Tokens.empty())
113 Parser.addUnwrappedLine();
114 assert(Parser.Line->Tokens.empty());
115 Parser.Line = std::move(PreBlockLine);
116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117 Parser.MustBreakBeforeNextToken = true;
118 Parser.CurrentLines = OriginalLines;
121 private:
122 UnwrappedLineParser &Parser;
124 std::unique_ptr<UnwrappedLine> PreBlockLine;
125 SmallVectorImpl<UnwrappedLine> *OriginalLines;
128 class CompoundStatementIndenter {
129 public:
130 CompoundStatementIndenter(UnwrappedLineParser *Parser,
131 const FormatStyle &Style, unsigned &LineLevel)
132 : CompoundStatementIndenter(Parser, LineLevel,
133 Style.BraceWrapping.AfterControlStatement,
134 Style.BraceWrapping.IndentBraces) {}
135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
136 bool WrapBrace, bool IndentBrace)
137 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138 if (WrapBrace)
139 Parser->addUnwrappedLine();
140 if (IndentBrace)
141 ++LineLevel;
143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
145 private:
146 unsigned &LineLevel;
147 unsigned OldLineLevel;
150 UnwrappedLineParser::UnwrappedLineParser(
151 SourceManager &SourceMgr, const FormatStyle &Style,
152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155 IdentifierTable &IdentTable)
156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161 ? IG_Rejected
162 : IG_Inited),
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
166 void UnwrappedLineParser::reset() {
167 PPBranchLevel = -1;
168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited;
171 IncludeGuardToken = nullptr;
172 Line.reset(new UnwrappedLine);
173 CommentsBeforeNextToken.clear();
174 FormatTok = nullptr;
175 MustBreakBeforeNextToken = false;
176 IsDecltypeAutoFunction = false;
177 PreprocessorDirectives.clear();
178 CurrentLines = &Lines;
179 DeclarationScopeStack.clear();
180 NestedTooDeep.clear();
181 NestedLambdas.clear();
182 PPStack.clear();
183 Line->FirstStartColumn = FirstStartColumn;
185 if (!Unexpanded.empty())
186 for (FormatToken *Token : AllTokens)
187 Token->MacroCtx.reset();
188 CurrentExpandedLines.clear();
189 ExpandedLines.clear();
190 Unexpanded.clear();
191 InExpansion = false;
192 Reconstruct.reset();
195 void UnwrappedLineParser::parse() {
196 IndexedTokenSource TokenSource(AllTokens);
197 Line->FirstStartColumn = FirstStartColumn;
198 do {
199 LLVM_DEBUG(llvm::dbgs() << "----\n");
200 reset();
201 Tokens = &TokenSource;
202 TokenSource.reset();
204 readToken();
205 parseFile();
207 // If we found an include guard then all preprocessor directives (other than
208 // the guard) are over-indented by one.
209 if (IncludeGuard == IG_Found) {
210 for (auto &Line : Lines)
211 if (Line.InPPDirective && Line.Level > 0)
212 --Line.Level;
215 // Create line with eof token.
216 assert(eof());
217 pushToken(FormatTok);
218 addUnwrappedLine();
220 // In a first run, format everything with the lines containing macro calls
221 // replaced by the expansion.
222 if (!ExpandedLines.empty()) {
223 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
224 for (const auto &Line : Lines) {
225 if (!Line.Tokens.empty()) {
226 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
227 if (it != ExpandedLines.end()) {
228 for (const auto &Expanded : it->second) {
229 LLVM_DEBUG(printDebugInfo(Expanded));
230 Callback.consumeUnwrappedLine(Expanded);
232 continue;
235 LLVM_DEBUG(printDebugInfo(Line));
236 Callback.consumeUnwrappedLine(Line);
238 Callback.finishRun();
241 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
242 for (const UnwrappedLine &Line : Lines) {
243 LLVM_DEBUG(printDebugInfo(Line));
244 Callback.consumeUnwrappedLine(Line);
246 Callback.finishRun();
247 Lines.clear();
248 while (!PPLevelBranchIndex.empty() &&
249 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
250 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
251 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
253 if (!PPLevelBranchIndex.empty()) {
254 ++PPLevelBranchIndex.back();
255 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
256 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 } while (!PPLevelBranchIndex.empty());
261 void UnwrappedLineParser::parseFile() {
262 // The top-level context in a file always has declarations, except for pre-
263 // processor directives and JavaScript files.
264 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
265 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
266 MustBeDeclaration);
267 if (Style.Language == FormatStyle::LK_TextProto)
268 parseBracedList();
269 else
270 parseLevel();
271 // Make sure to format the remaining tokens.
273 // LK_TextProto is special since its top-level is parsed as the body of a
274 // braced list, which does not necessarily have natural line separators such
275 // as a semicolon. Comments after the last entry that have been determined to
276 // not belong to that line, as in:
277 // key: value
278 // // endfile comment
279 // do not have a chance to be put on a line of their own until this point.
280 // Here we add this newline before end-of-file comments.
281 if (Style.Language == FormatStyle::LK_TextProto &&
282 !CommentsBeforeNextToken.empty()) {
283 addUnwrappedLine();
285 flushComments(true);
286 addUnwrappedLine();
289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
290 do {
291 switch (FormatTok->Tok.getKind()) {
292 case tok::l_brace:
293 return;
294 default:
295 if (FormatTok->is(Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
301 nextToken();
302 break;
304 } while (!eof());
307 void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
327 } while (!eof());
330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
339 /// \brief Parses a level, that is ???.
340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341 /// \param IfKind The \p if statement kind in the level.
342 /// \param IfLeftBrace The left brace of the \p if block in the level.
343 /// \returns true if a simple block of if/else/for/while, or false otherwise.
344 /// (A simple block has a single statement.)
345 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 continue;
363 tok::TokenKind kind = FormatTok->Tok.getKind();
364 if (FormatTok->getType() == TT_MacroBlockBegin)
365 kind = tok::l_brace;
366 else if (FormatTok->getType() == TT_MacroBlockEnd)
367 kind = tok::r_brace;
369 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
370 &HasLabel, &StatementCount] {
371 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
372 HasDoWhile ? nullptr : &HasDoWhile,
373 HasLabel ? nullptr : &HasLabel);
374 ++StatementCount;
375 assert(StatementCount > 0 && "StatementCount overflow!");
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 if (InRequiresExpression) {
385 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
386 } else if (FormatTok->Previous &&
387 FormatTok->Previous->ClosesRequiresClause) {
388 // We need the 'default' case here to correctly parse a function
389 // l_brace.
390 ParseDefault();
391 continue;
393 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
394 tryToParseBracedList()) {
395 continue;
397 parseBlock();
398 ++StatementCount;
399 assert(StatementCount > 0 && "StatementCount overflow!");
400 addUnwrappedLine();
401 break;
402 case tok::r_brace:
403 if (OpeningBrace) {
404 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
405 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
406 return false;
408 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
409 HasDoWhile || IsPrecededByCommentOrPPDirective ||
410 precededByCommentOrPPDirective()) {
411 return false;
413 const FormatToken *Next = Tokens->peekNextToken();
414 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
415 return false;
416 if (IfLeftBrace)
417 *IfLeftBrace = IfLBrace;
418 return true;
420 nextToken();
421 addUnwrappedLine();
422 break;
423 case tok::kw_default: {
424 unsigned StoredPosition = Tokens->getPosition();
425 FormatToken *Next;
426 do {
427 Next = Tokens->getNextToken();
428 assert(Next);
429 } while (Next->is(tok::comment));
430 FormatTok = Tokens->setPosition(StoredPosition);
431 if (Next->isNot(tok::colon)) {
432 // default not followed by ':' is not a case label; treat it like
433 // an identifier.
434 parseStructuralElement();
435 break;
437 // Else, if it is 'default:', fall through to the case handling.
438 [[fallthrough]];
440 case tok::kw_case:
441 if (Style.isProto() || Style.isVerilog() ||
442 (Style.isJavaScript() && Line->MustBeDeclaration)) {
443 // Proto: there are no switch/case statements
444 // Verilog: Case labels don't have this word. We handle case
445 // labels including default in TokenAnnotator.
446 // JavaScript: A 'case: string' style field declaration.
447 ParseDefault();
448 break;
450 if (!SwitchLabelEncountered &&
451 (Style.IndentCaseLabels ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
471 } while (!eof());
473 return false;
476 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493 do {
494 // Get next non-comment, non-preprocessor token.
495 FormatToken *NextTok;
496 do {
497 NextTok = Tokens->getNextToken();
498 } while (NextTok->is(tok::comment));
499 while (NextTok->is(tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 do {
502 NextTok = Tokens->getNextToken();
503 } while (NextTok->is(tok::comment) ||
504 (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
507 switch (Tok->Tok.getKind()) {
508 case tok::l_brace:
509 if (Style.isJavaScript() && PrevTok) {
510 if (PrevTok->isOneOf(tok::colon, tok::less)) {
511 // A ':' indicates this code is in a type, or a braced list
512 // following a label in an object literal ({a: {b: 1}}).
513 // A '<' could be an object used in a comparison, but that is nonsense
514 // code (can never return true), so more likely it is a generic type
515 // argument (`X<{a: string; b: number}>`).
516 // The code below could be confused by semicolons between the
517 // individual members in a type member list, which would normally
518 // trigger BK_Block. In both cases, this must be parsed as an inline
519 // braced init.
520 Tok->setBlockKind(BK_BracedInit);
521 } else if (PrevTok->is(tok::r_paren)) {
522 // `) { }` can only occur in function or method declarations in JS.
523 Tok->setBlockKind(BK_Block);
525 } else {
526 Tok->setBlockKind(BK_Unknown);
528 LBraceStack.push_back({Tok, PrevTok});
529 break;
530 case tok::r_brace:
531 if (LBraceStack.empty())
532 break;
533 if (LBraceStack.back().Tok->is(BK_Unknown)) {
534 bool ProbablyBracedList = false;
535 if (Style.Language == FormatStyle::LK_Proto) {
536 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
537 } else {
538 // Skip NextTok over preprocessor lines, otherwise we may not
539 // properly diagnose the block as a braced intializer
540 // if the comma separator appears after the pp directive.
541 while (NextTok->is(tok::hash)) {
542 ScopedMacroState MacroState(*Line, Tokens, NextTok);
543 do {
544 NextTok = Tokens->getNextToken();
545 } while (NextTok->isNot(tok::eof));
548 // Using OriginalColumn to distinguish between ObjC methods and
549 // binary operators is a bit hacky.
550 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
551 NextTok->OriginalColumn == 0;
553 // Try to detect a braced list. Note that regardless how we mark inner
554 // braces here, we will overwrite the BlockKind later if we parse a
555 // braced list (where all blocks inside are by default braced lists),
556 // or when we explicitly detect blocks (for example while parsing
557 // lambdas).
559 // If we already marked the opening brace as braced list, the closing
560 // must also be part of it.
561 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
563 ProbablyBracedList = ProbablyBracedList ||
564 (Style.isJavaScript() &&
565 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
566 Keywords.kw_as));
567 ProbablyBracedList = ProbablyBracedList ||
568 (Style.isCpp() && NextTok->is(tok::l_paren));
570 // If there is a comma, semicolon or right paren after the closing
571 // brace, we assume this is a braced initializer list.
572 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
573 // braced list in JS.
574 ProbablyBracedList =
575 ProbablyBracedList ||
576 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
577 tok::r_paren, tok::r_square, tok::ellipsis);
579 // Distinguish between braced list in a constructor initializer list
580 // followed by constructor body, or just adjacent blocks.
581 ProbablyBracedList =
582 ProbablyBracedList ||
583 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
584 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
585 tok::greater));
587 ProbablyBracedList =
588 ProbablyBracedList ||
589 (NextTok->is(tok::identifier) &&
590 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
592 ProbablyBracedList = ProbablyBracedList ||
593 (NextTok->is(tok::semi) &&
594 (!ExpectClassBody || LBraceStack.size() != 1));
596 ProbablyBracedList =
597 ProbablyBracedList ||
598 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
600 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
601 // We can have an array subscript after a braced init
602 // list, but C++11 attributes are expected after blocks.
603 NextTok = Tokens->getNextToken();
604 ProbablyBracedList = NextTok->isNot(tok::l_square);
607 if (ProbablyBracedList) {
608 Tok->setBlockKind(BK_BracedInit);
609 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
610 } else {
611 Tok->setBlockKind(BK_Block);
612 LBraceStack.back().Tok->setBlockKind(BK_Block);
615 LBraceStack.pop_back();
616 break;
617 case tok::identifier:
618 if (Tok->isNot(TT_StatementMacro))
619 break;
620 [[fallthrough]];
621 case tok::at:
622 case tok::semi:
623 case tok::kw_if:
624 case tok::kw_while:
625 case tok::kw_for:
626 case tok::kw_switch:
627 case tok::kw_try:
628 case tok::kw___try:
629 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
630 LBraceStack.back().Tok->setBlockKind(BK_Block);
631 break;
632 default:
633 break;
635 PrevTok = Tok;
636 Tok = NextTok;
637 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
639 // Assume other blocks for all unclosed opening braces.
640 for (const auto &Entry : LBraceStack)
641 if (Entry.Tok->is(BK_Unknown))
642 Entry.Tok->setBlockKind(BK_Block);
644 FormatTok = Tokens->setPosition(StoredPosition);
647 // Sets the token type of the directly previous right brace.
648 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
649 if (auto Prev = FormatTok->getPreviousNonComment();
650 Prev && Prev->is(tok::r_brace)) {
651 Prev->setFinalizedType(Type);
655 template <class T>
656 static inline void hash_combine(std::size_t &seed, const T &v) {
657 std::hash<T> hasher;
658 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
661 size_t UnwrappedLineParser::computePPHash() const {
662 size_t h = 0;
663 for (const auto &i : PPStack) {
664 hash_combine(h, size_t(i.Kind));
665 hash_combine(h, i.Line);
667 return h;
670 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
671 // is not null, subtracts its length (plus the preceding space) when computing
672 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
673 // running the token annotator on it so that we can restore them afterward.
674 bool UnwrappedLineParser::mightFitOnOneLine(
675 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
676 const auto ColumnLimit = Style.ColumnLimit;
677 if (ColumnLimit == 0)
678 return true;
680 auto &Tokens = ParsedLine.Tokens;
681 assert(!Tokens.empty());
683 const auto *LastToken = Tokens.back().Tok;
684 assert(LastToken);
686 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
688 int Index = 0;
689 for (const auto &Token : Tokens) {
690 assert(Token.Tok);
691 auto &SavedToken = SavedTokens[Index++];
692 SavedToken.Tok = new FormatToken;
693 SavedToken.Tok->copyFrom(*Token.Tok);
694 SavedToken.Children = std::move(Token.Children);
697 AnnotatedLine Line(ParsedLine);
698 assert(Line.Last == LastToken);
700 TokenAnnotator Annotator(Style, Keywords);
701 Annotator.annotate(Line);
702 Annotator.calculateFormattingInformation(Line);
704 auto Length = LastToken->TotalLength;
705 if (OpeningBrace) {
706 assert(OpeningBrace != Tokens.front().Tok);
707 if (auto Prev = OpeningBrace->Previous;
708 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
709 Length -= ColumnLimit;
711 Length -= OpeningBrace->TokenText.size() + 1;
714 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
715 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
716 Length -= FirstToken->TokenText.size() + 1;
719 Index = 0;
720 for (auto &Token : Tokens) {
721 const auto &SavedToken = SavedTokens[Index++];
722 Token.Tok->copyFrom(*SavedToken.Tok);
723 Token.Children = std::move(SavedToken.Children);
724 delete SavedToken.Tok;
727 // If these change PPLevel needs to be used for get correct indentation.
728 assert(!Line.InMacroBody);
729 assert(!Line.InPPDirective);
730 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
733 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
734 unsigned AddLevels, bool MunchSemi,
735 bool KeepBraces,
736 IfStmtKind *IfKind,
737 bool UnindentWhitesmithsBraces) {
738 auto HandleVerilogBlockLabel = [this]() {
739 // ":" name
740 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
741 nextToken();
742 if (Keywords.isVerilogIdentifier(*FormatTok))
743 nextToken();
747 // Whether this is a Verilog-specific block that has a special header like a
748 // module.
749 const bool VerilogHierarchy =
750 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
751 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
752 (Style.isVerilog() &&
753 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
754 "'{' or macro block token expected");
755 FormatToken *Tok = FormatTok;
756 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
757 auto Index = CurrentLines->size();
758 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
759 FormatTok->setBlockKind(BK_Block);
761 // For Whitesmiths mode, jump to the next level prior to skipping over the
762 // braces.
763 if (!VerilogHierarchy && AddLevels > 0 &&
764 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
765 ++Line->Level;
768 size_t PPStartHash = computePPHash();
770 const unsigned InitialLevel = Line->Level;
771 if (VerilogHierarchy) {
772 AddLevels += parseVerilogHierarchyHeader();
773 } else {
774 nextToken(/*LevelDifference=*/AddLevels);
775 HandleVerilogBlockLabel();
778 // Bail out if there are too many levels. Otherwise, the stack might overflow.
779 if (Line->Level > 300)
780 return nullptr;
782 if (MacroBlock && FormatTok->is(tok::l_paren))
783 parseParens();
785 size_t NbPreprocessorDirectives =
786 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
787 addUnwrappedLine();
788 size_t OpeningLineIndex =
789 CurrentLines->empty()
790 ? (UnwrappedLine::kInvalidIndex)
791 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
793 // Whitesmiths is weird here. The brace needs to be indented for the namespace
794 // block, but the block itself may not be indented depending on the style
795 // settings. This allows the format to back up one level in those cases.
796 if (UnindentWhitesmithsBraces)
797 --Line->Level;
799 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
800 MustBeDeclaration);
801 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
802 Line->Level += AddLevels;
804 FormatToken *IfLBrace = nullptr;
805 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
807 if (eof())
808 return IfLBrace;
810 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
811 : FormatTok->isNot(tok::r_brace)) {
812 Line->Level = InitialLevel;
813 FormatTok->setBlockKind(BK_Block);
814 return IfLBrace;
817 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
818 FormatTok->setFinalizedType(TT_NamespaceRBrace);
820 const bool IsFunctionRBrace =
821 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
823 auto RemoveBraces = [=]() mutable {
824 if (!SimpleBlock)
825 return false;
826 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
827 assert(FormatTok->is(tok::r_brace));
828 const bool WrappedOpeningBrace = !Tok->Previous;
829 if (WrappedOpeningBrace && FollowedByComment)
830 return false;
831 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
832 if (KeepBraces && !HasRequiredIfBraces)
833 return false;
834 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
835 const FormatToken *Previous = Tokens->getPreviousToken();
836 assert(Previous);
837 if (Previous->is(tok::r_brace) && !Previous->Optional)
838 return false;
840 assert(!CurrentLines->empty());
841 auto &LastLine = CurrentLines->back();
842 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
843 return false;
844 if (Tok->is(TT_ElseLBrace))
845 return true;
846 if (WrappedOpeningBrace) {
847 assert(Index > 0);
848 --Index; // The line above the wrapped l_brace.
849 Tok = nullptr;
851 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
853 if (RemoveBraces()) {
854 Tok->MatchingParen = FormatTok;
855 FormatTok->MatchingParen = Tok;
858 size_t PPEndHash = computePPHash();
860 // Munch the closing brace.
861 nextToken(/*LevelDifference=*/-AddLevels);
863 // When this is a function block and there is an unnecessary semicolon
864 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
865 // it later).
866 if (Style.RemoveSemicolon && IsFunctionRBrace) {
867 while (FormatTok->is(tok::semi)) {
868 FormatTok->Optional = true;
869 nextToken();
873 HandleVerilogBlockLabel();
875 if (MacroBlock && FormatTok->is(tok::l_paren))
876 parseParens();
878 Line->Level = InitialLevel;
880 if (FormatTok->is(tok::kw_noexcept)) {
881 // A noexcept in a requires expression.
882 nextToken();
885 if (FormatTok->is(tok::arrow)) {
886 // Following the } or noexcept we can find a trailing return type arrow
887 // as part of an implicit conversion constraint.
888 nextToken();
889 parseStructuralElement();
892 if (MunchSemi && FormatTok->is(tok::semi))
893 nextToken();
895 if (PPStartHash == PPEndHash) {
896 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
897 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
898 // Update the opening line to add the forward reference as well
899 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
900 CurrentLines->size() - 1;
904 return IfLBrace;
907 static bool isGoogScope(const UnwrappedLine &Line) {
908 // FIXME: Closure-library specific stuff should not be hard-coded but be
909 // configurable.
910 if (Line.Tokens.size() < 4)
911 return false;
912 auto I = Line.Tokens.begin();
913 if (I->Tok->TokenText != "goog")
914 return false;
915 ++I;
916 if (I->Tok->isNot(tok::period))
917 return false;
918 ++I;
919 if (I->Tok->TokenText != "scope")
920 return false;
921 ++I;
922 return I->Tok->is(tok::l_paren);
925 static bool isIIFE(const UnwrappedLine &Line,
926 const AdditionalKeywords &Keywords) {
927 // Look for the start of an immediately invoked anonymous function.
928 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
929 // This is commonly done in JavaScript to create a new, anonymous scope.
930 // Example: (function() { ... })()
931 if (Line.Tokens.size() < 3)
932 return false;
933 auto I = Line.Tokens.begin();
934 if (I->Tok->isNot(tok::l_paren))
935 return false;
936 ++I;
937 if (I->Tok->isNot(Keywords.kw_function))
938 return false;
939 ++I;
940 return I->Tok->is(tok::l_paren);
943 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
944 const FormatToken &InitialToken) {
945 tok::TokenKind Kind = InitialToken.Tok.getKind();
946 if (InitialToken.is(TT_NamespaceMacro))
947 Kind = tok::kw_namespace;
949 switch (Kind) {
950 case tok::kw_namespace:
951 return Style.BraceWrapping.AfterNamespace;
952 case tok::kw_class:
953 return Style.BraceWrapping.AfterClass;
954 case tok::kw_union:
955 return Style.BraceWrapping.AfterUnion;
956 case tok::kw_struct:
957 return Style.BraceWrapping.AfterStruct;
958 case tok::kw_enum:
959 return Style.BraceWrapping.AfterEnum;
960 default:
961 return false;
965 void UnwrappedLineParser::parseChildBlock() {
966 assert(FormatTok->is(tok::l_brace));
967 FormatTok->setBlockKind(BK_Block);
968 const FormatToken *OpeningBrace = FormatTok;
969 nextToken();
971 bool SkipIndent = (Style.isJavaScript() &&
972 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
973 ScopedLineState LineState(*this);
974 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
975 /*MustBeDeclaration=*/false);
976 Line->Level += SkipIndent ? 0 : 1;
977 parseLevel(OpeningBrace);
978 flushComments(isOnNewLine(*FormatTok));
979 Line->Level -= SkipIndent ? 0 : 1;
981 nextToken();
984 void UnwrappedLineParser::parsePPDirective() {
985 assert(FormatTok->is(tok::hash) && "'#' expected");
986 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
988 nextToken();
990 if (!FormatTok->Tok.getIdentifierInfo()) {
991 parsePPUnknown();
992 return;
995 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
996 case tok::pp_define:
997 parsePPDefine();
998 return;
999 case tok::pp_if:
1000 parsePPIf(/*IfDef=*/false);
1001 break;
1002 case tok::pp_ifdef:
1003 case tok::pp_ifndef:
1004 parsePPIf(/*IfDef=*/true);
1005 break;
1006 case tok::pp_else:
1007 case tok::pp_elifdef:
1008 case tok::pp_elifndef:
1009 case tok::pp_elif:
1010 parsePPElse();
1011 break;
1012 case tok::pp_endif:
1013 parsePPEndIf();
1014 break;
1015 case tok::pp_pragma:
1016 parsePPPragma();
1017 break;
1018 default:
1019 parsePPUnknown();
1020 break;
1024 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1025 size_t Line = CurrentLines->size();
1026 if (CurrentLines == &PreprocessorDirectives)
1027 Line += Lines.size();
1029 if (Unreachable ||
1030 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1031 PPStack.push_back({PP_Unreachable, Line});
1032 } else {
1033 PPStack.push_back({PP_Conditional, Line});
1037 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1038 ++PPBranchLevel;
1039 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1040 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1041 PPLevelBranchIndex.push_back(0);
1042 PPLevelBranchCount.push_back(0);
1044 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1045 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1046 conditionalCompilationCondition(Unreachable || Skip);
1049 void UnwrappedLineParser::conditionalCompilationAlternative() {
1050 if (!PPStack.empty())
1051 PPStack.pop_back();
1052 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1053 if (!PPChainBranchIndex.empty())
1054 ++PPChainBranchIndex.top();
1055 conditionalCompilationCondition(
1056 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1057 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1060 void UnwrappedLineParser::conditionalCompilationEnd() {
1061 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1062 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1063 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1064 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1066 // Guard against #endif's without #if.
1067 if (PPBranchLevel > -1)
1068 --PPBranchLevel;
1069 if (!PPChainBranchIndex.empty())
1070 PPChainBranchIndex.pop();
1071 if (!PPStack.empty())
1072 PPStack.pop_back();
1075 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1076 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1077 nextToken();
1078 bool Unreachable = false;
1079 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1080 Unreachable = true;
1081 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1082 Unreachable = true;
1083 conditionalCompilationStart(Unreachable);
1084 FormatToken *IfCondition = FormatTok;
1085 // If there's a #ifndef on the first line, and the only lines before it are
1086 // comments, it could be an include guard.
1087 bool MaybeIncludeGuard = IfNDef;
1088 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1089 for (auto &Line : Lines) {
1090 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1091 MaybeIncludeGuard = false;
1092 IncludeGuard = IG_Rejected;
1093 break;
1097 --PPBranchLevel;
1098 parsePPUnknown();
1099 ++PPBranchLevel;
1100 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1101 IncludeGuard = IG_IfNdefed;
1102 IncludeGuardToken = IfCondition;
1106 void UnwrappedLineParser::parsePPElse() {
1107 // If a potential include guard has an #else, it's not an include guard.
1108 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1109 IncludeGuard = IG_Rejected;
1110 // Don't crash when there is an #else without an #if.
1111 assert(PPBranchLevel >= -1);
1112 if (PPBranchLevel == -1)
1113 conditionalCompilationStart(/*Unreachable=*/true);
1114 conditionalCompilationAlternative();
1115 --PPBranchLevel;
1116 parsePPUnknown();
1117 ++PPBranchLevel;
1120 void UnwrappedLineParser::parsePPEndIf() {
1121 conditionalCompilationEnd();
1122 parsePPUnknown();
1123 // If the #endif of a potential include guard is the last thing in the file,
1124 // then we found an include guard.
1125 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1126 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1127 IncludeGuard = IG_Found;
1131 void UnwrappedLineParser::parsePPDefine() {
1132 nextToken();
1134 if (!FormatTok->Tok.getIdentifierInfo()) {
1135 IncludeGuard = IG_Rejected;
1136 IncludeGuardToken = nullptr;
1137 parsePPUnknown();
1138 return;
1141 if (IncludeGuard == IG_IfNdefed &&
1142 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1143 IncludeGuard = IG_Defined;
1144 IncludeGuardToken = nullptr;
1145 for (auto &Line : Lines) {
1146 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1147 IncludeGuard = IG_Rejected;
1148 break;
1153 // In the context of a define, even keywords should be treated as normal
1154 // identifiers. Setting the kind to identifier is not enough, because we need
1155 // to treat additional keywords like __except as well, which are already
1156 // identifiers. Setting the identifier info to null interferes with include
1157 // guard processing above, and changes preprocessing nesting.
1158 FormatTok->Tok.setKind(tok::identifier);
1159 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1160 nextToken();
1161 if (FormatTok->Tok.getKind() == tok::l_paren &&
1162 !FormatTok->hasWhitespaceBefore()) {
1163 parseParens();
1165 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1166 Line->Level += PPBranchLevel + 1;
1167 addUnwrappedLine();
1168 ++Line->Level;
1170 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1171 assert((int)Line->PPLevel >= 0);
1172 Line->InMacroBody = true;
1174 // Errors during a preprocessor directive can only affect the layout of the
1175 // preprocessor directive, and thus we ignore them. An alternative approach
1176 // would be to use the same approach we use on the file level (no
1177 // re-indentation if there was a structural error) within the macro
1178 // definition.
1179 parseFile();
1182 void UnwrappedLineParser::parsePPPragma() {
1183 Line->InPragmaDirective = true;
1184 parsePPUnknown();
1187 void UnwrappedLineParser::parsePPUnknown() {
1188 do {
1189 nextToken();
1190 } while (!eof());
1191 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1192 Line->Level += PPBranchLevel + 1;
1193 addUnwrappedLine();
1196 // Here we exclude certain tokens that are not usually the first token in an
1197 // unwrapped line. This is used in attempt to distinguish macro calls without
1198 // trailing semicolons from other constructs split to several lines.
1199 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1200 // Semicolon can be a null-statement, l_square can be a start of a macro or
1201 // a C++11 attribute, but this doesn't seem to be common.
1202 assert(Tok.isNot(TT_AttributeSquare));
1203 return !Tok.isOneOf(tok::semi, tok::l_brace,
1204 // Tokens that can only be used as binary operators and a
1205 // part of overloaded operator names.
1206 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1207 tok::less, tok::greater, tok::slash, tok::percent,
1208 tok::lessless, tok::greatergreater, tok::equal,
1209 tok::plusequal, tok::minusequal, tok::starequal,
1210 tok::slashequal, tok::percentequal, tok::ampequal,
1211 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1212 tok::lesslessequal,
1213 // Colon is used in labels, base class lists, initializer
1214 // lists, range-based for loops, ternary operator, but
1215 // should never be the first token in an unwrapped line.
1216 tok::colon,
1217 // 'noexcept' is a trailing annotation.
1218 tok::kw_noexcept);
1221 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1222 const FormatToken *FormatTok) {
1223 // FIXME: This returns true for C/C++ keywords like 'struct'.
1224 return FormatTok->is(tok::identifier) &&
1225 (!FormatTok->Tok.getIdentifierInfo() ||
1226 !FormatTok->isOneOf(
1227 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1228 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1229 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1230 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1231 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1232 Keywords.kw_instanceof, Keywords.kw_interface,
1233 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1236 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1237 const FormatToken *FormatTok) {
1238 return FormatTok->Tok.isLiteral() ||
1239 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1240 mustBeJSIdent(Keywords, FormatTok);
1243 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1244 // when encountered after a value (see mustBeJSIdentOrValue).
1245 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1246 const FormatToken *FormatTok) {
1247 return FormatTok->isOneOf(
1248 tok::kw_return, Keywords.kw_yield,
1249 // conditionals
1250 tok::kw_if, tok::kw_else,
1251 // loops
1252 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1253 // switch/case
1254 tok::kw_switch, tok::kw_case,
1255 // exceptions
1256 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1257 // declaration
1258 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1259 Keywords.kw_async, Keywords.kw_function,
1260 // import/export
1261 Keywords.kw_import, tok::kw_export);
1264 // Checks whether a token is a type in K&R C (aka C78).
1265 static bool isC78Type(const FormatToken &Tok) {
1266 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1267 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1268 tok::identifier);
1271 // This function checks whether a token starts the first parameter declaration
1272 // in a K&R C (aka C78) function definition, e.g.:
1273 // int f(a, b)
1274 // short a, b;
1275 // {
1276 // return a + b;
1277 // }
1278 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1279 const FormatToken *FuncName) {
1280 assert(Tok);
1281 assert(Next);
1282 assert(FuncName);
1284 if (FuncName->isNot(tok::identifier))
1285 return false;
1287 const FormatToken *Prev = FuncName->Previous;
1288 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1289 return false;
1291 if (!isC78Type(*Tok) &&
1292 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1293 return false;
1296 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1297 return false;
1299 Tok = Tok->Previous;
1300 if (!Tok || Tok->isNot(tok::r_paren))
1301 return false;
1303 Tok = Tok->Previous;
1304 if (!Tok || Tok->isNot(tok::identifier))
1305 return false;
1307 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1310 bool UnwrappedLineParser::parseModuleImport() {
1311 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1313 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1314 !Token->Tok.getIdentifierInfo() &&
1315 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1316 return false;
1319 nextToken();
1320 while (!eof()) {
1321 if (FormatTok->is(tok::colon)) {
1322 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1324 // Handle import <foo/bar.h> as we would an include statement.
1325 else if (FormatTok->is(tok::less)) {
1326 nextToken();
1327 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1328 // Mark tokens up to the trailing line comments as implicit string
1329 // literals.
1330 if (FormatTok->isNot(tok::comment) &&
1331 !FormatTok->TokenText.startswith("//")) {
1332 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1334 nextToken();
1337 if (FormatTok->is(tok::semi)) {
1338 nextToken();
1339 break;
1341 nextToken();
1344 addUnwrappedLine();
1345 return true;
1348 // readTokenWithJavaScriptASI reads the next token and terminates the current
1349 // line if JavaScript Automatic Semicolon Insertion must
1350 // happen between the current token and the next token.
1352 // This method is conservative - it cannot cover all edge cases of JavaScript,
1353 // but only aims to correctly handle certain well known cases. It *must not*
1354 // return true in speculative cases.
1355 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1356 FormatToken *Previous = FormatTok;
1357 readToken();
1358 FormatToken *Next = FormatTok;
1360 bool IsOnSameLine =
1361 CommentsBeforeNextToken.empty()
1362 ? Next->NewlinesBefore == 0
1363 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1364 if (IsOnSameLine)
1365 return;
1367 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1368 bool PreviousStartsTemplateExpr =
1369 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1370 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1371 // If the line contains an '@' sign, the previous token might be an
1372 // annotation, which can precede another identifier/value.
1373 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1374 return LineNode.Tok->is(tok::at);
1376 if (HasAt)
1377 return;
1379 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1380 return addUnwrappedLine();
1381 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1382 bool NextEndsTemplateExpr =
1383 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1384 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1385 (PreviousMustBeValue ||
1386 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1387 tok::minusminus))) {
1388 return addUnwrappedLine();
1390 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1391 isJSDeclOrStmt(Keywords, Next)) {
1392 return addUnwrappedLine();
1396 void UnwrappedLineParser::parseStructuralElement(
1397 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1398 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1399 if (Style.Language == FormatStyle::LK_TableGen &&
1400 FormatTok->is(tok::pp_include)) {
1401 nextToken();
1402 if (FormatTok->is(tok::string_literal))
1403 nextToken();
1404 addUnwrappedLine();
1405 return;
1408 if (Style.isCpp()) {
1409 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1411 } else if (Style.isVerilog()) {
1412 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1413 parseForOrWhileLoop(/*HasParens=*/false);
1414 return;
1416 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1417 parseForOrWhileLoop();
1418 return;
1420 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1421 Keywords.kw_assume, Keywords.kw_cover)) {
1422 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1423 return;
1426 // Skip things that can exist before keywords like 'if' and 'case'.
1427 while (true) {
1428 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1429 Keywords.kw_unique0)) {
1430 nextToken();
1431 } else if (FormatTok->is(tok::l_paren) &&
1432 Tokens->peekNextToken()->is(tok::star)) {
1433 parseParens();
1434 } else {
1435 break;
1440 // Tokens that only make sense at the beginning of a line.
1441 switch (FormatTok->Tok.getKind()) {
1442 case tok::kw_asm:
1443 nextToken();
1444 if (FormatTok->is(tok::l_brace)) {
1445 FormatTok->setFinalizedType(TT_InlineASMBrace);
1446 nextToken();
1447 while (FormatTok && !eof()) {
1448 if (FormatTok->is(tok::r_brace)) {
1449 FormatTok->setFinalizedType(TT_InlineASMBrace);
1450 nextToken();
1451 addUnwrappedLine();
1452 break;
1454 FormatTok->Finalized = true;
1455 nextToken();
1458 break;
1459 case tok::kw_namespace:
1460 parseNamespace();
1461 return;
1462 case tok::kw_public:
1463 case tok::kw_protected:
1464 case tok::kw_private:
1465 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1466 Style.isCSharp()) {
1467 nextToken();
1468 } else {
1469 parseAccessSpecifier();
1471 return;
1472 case tok::kw_if: {
1473 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1474 // field/method declaration.
1475 break;
1477 FormatToken *Tok = parseIfThenElse(IfKind);
1478 if (IfLeftBrace)
1479 *IfLeftBrace = Tok;
1480 return;
1482 case tok::kw_for:
1483 case tok::kw_while:
1484 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1485 // field/method declaration.
1486 break;
1488 parseForOrWhileLoop();
1489 return;
1490 case tok::kw_do:
1491 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1492 // field/method declaration.
1493 break;
1495 parseDoWhile();
1496 if (HasDoWhile)
1497 *HasDoWhile = true;
1498 return;
1499 case tok::kw_switch:
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // 'switch: string' field declaration.
1502 break;
1504 parseSwitch();
1505 return;
1506 case tok::kw_default:
1507 // In Verilog default along with other labels are handled in the next loop.
1508 if (Style.isVerilog())
1509 break;
1510 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1511 // 'default: string' field declaration.
1512 break;
1514 nextToken();
1515 if (FormatTok->is(tok::colon)) {
1516 FormatTok->setFinalizedType(TT_CaseLabelColon);
1517 parseLabel();
1518 return;
1520 // e.g. "default void f() {}" in a Java interface.
1521 break;
1522 case tok::kw_case:
1523 // Proto: there are no switch/case statements.
1524 if (Style.isProto()) {
1525 nextToken();
1526 return;
1528 if (Style.isVerilog()) {
1529 parseBlock();
1530 addUnwrappedLine();
1531 return;
1533 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534 // 'case: string' field declaration.
1535 nextToken();
1536 break;
1538 parseCaseLabel();
1539 return;
1540 case tok::kw_try:
1541 case tok::kw___try:
1542 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1543 // field/method declaration.
1544 break;
1546 parseTryCatch();
1547 return;
1548 case tok::kw_extern:
1549 nextToken();
1550 if (Style.isVerilog()) {
1551 // In Verilog and extern module declaration looks like a start of module.
1552 // But there is no body and endmodule. So we handle it separately.
1553 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1554 parseVerilogHierarchyHeader();
1555 return;
1557 } else if (FormatTok->is(tok::string_literal)) {
1558 nextToken();
1559 if (FormatTok->is(tok::l_brace)) {
1560 if (Style.BraceWrapping.AfterExternBlock)
1561 addUnwrappedLine();
1562 // Either we indent or for backwards compatibility we follow the
1563 // AfterExternBlock style.
1564 unsigned AddLevels =
1565 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1566 (Style.BraceWrapping.AfterExternBlock &&
1567 Style.IndentExternBlock ==
1568 FormatStyle::IEBS_AfterExternBlock)
1569 ? 1u
1570 : 0u;
1571 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1572 addUnwrappedLine();
1573 return;
1576 break;
1577 case tok::kw_export:
1578 if (Style.isJavaScript()) {
1579 parseJavaScriptEs6ImportExport();
1580 return;
1582 if (Style.isCpp()) {
1583 nextToken();
1584 if (FormatTok->is(tok::kw_namespace)) {
1585 parseNamespace();
1586 return;
1588 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1589 return;
1591 break;
1592 case tok::kw_inline:
1593 nextToken();
1594 if (FormatTok->is(tok::kw_namespace)) {
1595 parseNamespace();
1596 return;
1598 break;
1599 case tok::identifier:
1600 if (FormatTok->is(TT_ForEachMacro)) {
1601 parseForOrWhileLoop();
1602 return;
1604 if (FormatTok->is(TT_MacroBlockBegin)) {
1605 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1606 /*MunchSemi=*/false);
1607 return;
1609 if (FormatTok->is(Keywords.kw_import)) {
1610 if (Style.isJavaScript()) {
1611 parseJavaScriptEs6ImportExport();
1612 return;
1614 if (Style.Language == FormatStyle::LK_Proto) {
1615 nextToken();
1616 if (FormatTok->is(tok::kw_public))
1617 nextToken();
1618 if (FormatTok->isNot(tok::string_literal))
1619 return;
1620 nextToken();
1621 if (FormatTok->is(tok::semi))
1622 nextToken();
1623 addUnwrappedLine();
1624 return;
1626 if (Style.isCpp() && parseModuleImport())
1627 return;
1629 if (Style.isCpp() &&
1630 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1631 Keywords.kw_slots, Keywords.kw_qslots)) {
1632 nextToken();
1633 if (FormatTok->is(tok::colon)) {
1634 nextToken();
1635 addUnwrappedLine();
1636 return;
1639 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1640 parseStatementMacro();
1641 return;
1643 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1644 parseNamespace();
1645 return;
1647 // In Verilog labels can be any expression, so we don't do them here.
1648 if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1649 !Line->MustBeDeclaration) {
1650 nextToken();
1651 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1652 FormatTok->setFinalizedType(TT_GotoLabelColon);
1653 parseLabel(!Style.IndentGotoLabels);
1654 if (HasLabel)
1655 *HasLabel = true;
1656 return;
1658 // In all other cases, parse the declaration.
1659 break;
1660 default:
1661 break;
1664 const bool InRequiresExpression =
1665 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1666 do {
1667 const FormatToken *Previous = FormatTok->Previous;
1668 switch (FormatTok->Tok.getKind()) {
1669 case tok::at:
1670 nextToken();
1671 if (FormatTok->is(tok::l_brace)) {
1672 nextToken();
1673 parseBracedList();
1674 break;
1675 } else if (Style.Language == FormatStyle::LK_Java &&
1676 FormatTok->is(Keywords.kw_interface)) {
1677 nextToken();
1678 break;
1680 switch (FormatTok->Tok.getObjCKeywordID()) {
1681 case tok::objc_public:
1682 case tok::objc_protected:
1683 case tok::objc_package:
1684 case tok::objc_private:
1685 return parseAccessSpecifier();
1686 case tok::objc_interface:
1687 case tok::objc_implementation:
1688 return parseObjCInterfaceOrImplementation();
1689 case tok::objc_protocol:
1690 if (parseObjCProtocol())
1691 return;
1692 break;
1693 case tok::objc_end:
1694 return; // Handled by the caller.
1695 case tok::objc_optional:
1696 case tok::objc_required:
1697 nextToken();
1698 addUnwrappedLine();
1699 return;
1700 case tok::objc_autoreleasepool:
1701 nextToken();
1702 if (FormatTok->is(tok::l_brace)) {
1703 if (Style.BraceWrapping.AfterControlStatement ==
1704 FormatStyle::BWACS_Always) {
1705 addUnwrappedLine();
1707 parseBlock();
1709 addUnwrappedLine();
1710 return;
1711 case tok::objc_synchronized:
1712 nextToken();
1713 if (FormatTok->is(tok::l_paren)) {
1714 // Skip synchronization object
1715 parseParens();
1717 if (FormatTok->is(tok::l_brace)) {
1718 if (Style.BraceWrapping.AfterControlStatement ==
1719 FormatStyle::BWACS_Always) {
1720 addUnwrappedLine();
1722 parseBlock();
1724 addUnwrappedLine();
1725 return;
1726 case tok::objc_try:
1727 // This branch isn't strictly necessary (the kw_try case below would
1728 // do this too after the tok::at is parsed above). But be explicit.
1729 parseTryCatch();
1730 return;
1731 default:
1732 break;
1734 break;
1735 case tok::kw_requires: {
1736 if (Style.isCpp()) {
1737 bool ParsedClause = parseRequires();
1738 if (ParsedClause)
1739 return;
1740 } else {
1741 nextToken();
1743 break;
1745 case tok::kw_enum:
1746 // Ignore if this is part of "template <enum ...".
1747 if (Previous && Previous->is(tok::less)) {
1748 nextToken();
1749 break;
1752 // parseEnum falls through and does not yet add an unwrapped line as an
1753 // enum definition can start a structural element.
1754 if (!parseEnum())
1755 break;
1756 // This only applies to C++ and Verilog.
1757 if (!Style.isCpp() && !Style.isVerilog()) {
1758 addUnwrappedLine();
1759 return;
1761 break;
1762 case tok::kw_typedef:
1763 nextToken();
1764 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1765 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1766 Keywords.kw_CF_CLOSED_ENUM,
1767 Keywords.kw_NS_CLOSED_ENUM)) {
1768 parseEnum();
1770 break;
1771 case tok::kw_class:
1772 if (Style.isVerilog()) {
1773 parseBlock();
1774 addUnwrappedLine();
1775 return;
1777 [[fallthrough]];
1778 case tok::kw_struct:
1779 case tok::kw_union:
1780 if (parseStructLike())
1781 return;
1782 break;
1783 case tok::kw_decltype:
1784 nextToken();
1785 if (FormatTok->is(tok::l_paren)) {
1786 parseParens();
1787 assert(FormatTok->Previous);
1788 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1789 tok::l_paren)) {
1790 Line->SeenDecltypeAuto = true;
1793 break;
1794 case tok::period:
1795 nextToken();
1796 // In Java, classes have an implicit static member "class".
1797 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1798 FormatTok->is(tok::kw_class)) {
1799 nextToken();
1801 if (Style.isJavaScript() && FormatTok &&
1802 FormatTok->Tok.getIdentifierInfo()) {
1803 // JavaScript only has pseudo keywords, all keywords are allowed to
1804 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1805 nextToken();
1807 break;
1808 case tok::semi:
1809 nextToken();
1810 addUnwrappedLine();
1811 return;
1812 case tok::r_brace:
1813 addUnwrappedLine();
1814 return;
1815 case tok::l_paren: {
1816 parseParens();
1817 // Break the unwrapped line if a K&R C function definition has a parameter
1818 // declaration.
1819 if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1820 break;
1821 if (isC78ParameterDecl(FormatTok,
1822 Tokens->peekNextToken(/*SkipComment=*/true),
1823 Previous)) {
1824 addUnwrappedLine();
1825 return;
1827 break;
1829 case tok::kw_operator:
1830 nextToken();
1831 if (FormatTok->isBinaryOperator())
1832 nextToken();
1833 break;
1834 case tok::caret:
1835 nextToken();
1836 // Block return type.
1837 if (FormatTok->Tok.isAnyIdentifier() ||
1838 FormatTok->isSimpleTypeSpecifier()) {
1839 nextToken();
1840 // Return types: pointers are ok too.
1841 while (FormatTok->is(tok::star))
1842 nextToken();
1844 // Block argument list.
1845 if (FormatTok->is(tok::l_paren))
1846 parseParens();
1847 // Block body.
1848 if (FormatTok->is(tok::l_brace))
1849 parseChildBlock();
1850 break;
1851 case tok::l_brace:
1852 if (InRequiresExpression)
1853 FormatTok->setFinalizedType(TT_BracedListLBrace);
1854 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1855 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1856 // A block outside of parentheses must be the last part of a
1857 // structural element.
1858 // FIXME: Figure out cases where this is not true, and add projections
1859 // for them (the one we know is missing are lambdas).
1860 if (Style.Language == FormatStyle::LK_Java &&
1861 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1862 // If necessary, we could set the type to something different than
1863 // TT_FunctionLBrace.
1864 if (Style.BraceWrapping.AfterControlStatement ==
1865 FormatStyle::BWACS_Always) {
1866 addUnwrappedLine();
1868 } else if (Style.BraceWrapping.AfterFunction) {
1869 addUnwrappedLine();
1871 FormatTok->setFinalizedType(TT_FunctionLBrace);
1872 parseBlock();
1873 IsDecltypeAutoFunction = false;
1874 addUnwrappedLine();
1875 return;
1877 // Otherwise this was a braced init list, and the structural
1878 // element continues.
1879 break;
1880 case tok::kw_try:
1881 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1882 // field/method declaration.
1883 nextToken();
1884 break;
1886 // We arrive here when parsing function-try blocks.
1887 if (Style.BraceWrapping.AfterFunction)
1888 addUnwrappedLine();
1889 parseTryCatch();
1890 return;
1891 case tok::identifier: {
1892 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1893 Line->MustBeDeclaration) {
1894 addUnwrappedLine();
1895 parseCSharpGenericTypeConstraint();
1896 break;
1898 if (FormatTok->is(TT_MacroBlockEnd)) {
1899 addUnwrappedLine();
1900 return;
1903 // Function declarations (as opposed to function expressions) are parsed
1904 // on their own unwrapped line by continuing this loop. Function
1905 // expressions (functions that are not on their own line) must not create
1906 // a new unwrapped line, so they are special cased below.
1907 size_t TokenCount = Line->Tokens.size();
1908 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1909 (TokenCount > 1 ||
1910 (TokenCount == 1 &&
1911 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1912 tryToParseJSFunction();
1913 break;
1915 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1916 FormatTok->is(Keywords.kw_interface)) {
1917 if (Style.isJavaScript()) {
1918 // In JavaScript/TypeScript, "interface" can be used as a standalone
1919 // identifier, e.g. in `var interface = 1;`. If "interface" is
1920 // followed by another identifier, it is very like to be an actual
1921 // interface declaration.
1922 unsigned StoredPosition = Tokens->getPosition();
1923 FormatToken *Next = Tokens->getNextToken();
1924 FormatTok = Tokens->setPosition(StoredPosition);
1925 if (!mustBeJSIdent(Keywords, Next)) {
1926 nextToken();
1927 break;
1930 parseRecord();
1931 addUnwrappedLine();
1932 return;
1935 if (Style.isVerilog()) {
1936 if (FormatTok->is(Keywords.kw_table)) {
1937 parseVerilogTable();
1938 return;
1940 if (Keywords.isVerilogBegin(*FormatTok) ||
1941 Keywords.isVerilogHierarchy(*FormatTok)) {
1942 parseBlock();
1943 addUnwrappedLine();
1944 return;
1948 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1949 if (parseStructLike())
1950 return;
1951 break;
1954 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1955 parseStatementMacro();
1956 return;
1959 // See if the following token should start a new unwrapped line.
1960 StringRef Text = FormatTok->TokenText;
1962 FormatToken *PreviousToken = FormatTok;
1963 nextToken();
1965 // JS doesn't have macros, and within classes colons indicate fields, not
1966 // labels.
1967 if (Style.isJavaScript())
1968 break;
1970 auto OneTokenSoFar = [&]() {
1971 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1972 while (I != E && I->Tok->is(tok::comment))
1973 ++I;
1974 if (Style.isVerilog())
1975 while (I != E && I->Tok->is(tok::hash))
1976 ++I;
1977 return I != E && (++I == E);
1979 if (OneTokenSoFar()) {
1980 // Recognize function-like macro usages without trailing semicolon as
1981 // well as free-standing macros like Q_OBJECT.
1982 bool FunctionLike = FormatTok->is(tok::l_paren);
1983 if (FunctionLike)
1984 parseParens();
1986 bool FollowedByNewline =
1987 CommentsBeforeNextToken.empty()
1988 ? FormatTok->NewlinesBefore > 0
1989 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1991 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1992 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1993 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1994 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1995 addUnwrappedLine();
1996 return;
1999 break;
2001 case tok::equal:
2002 if ((Style.isJavaScript() || Style.isCSharp()) &&
2003 FormatTok->is(TT_FatArrow)) {
2004 tryToParseChildBlock();
2005 break;
2008 nextToken();
2009 if (FormatTok->is(tok::l_brace)) {
2010 // Block kind should probably be set to BK_BracedInit for any language.
2011 // C# needs this change to ensure that array initialisers and object
2012 // initialisers are indented the same way.
2013 if (Style.isCSharp())
2014 FormatTok->setBlockKind(BK_BracedInit);
2015 nextToken();
2016 parseBracedList();
2017 } else if (Style.Language == FormatStyle::LK_Proto &&
2018 FormatTok->is(tok::less)) {
2019 nextToken();
2020 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2021 /*ClosingBraceKind=*/tok::greater);
2023 break;
2024 case tok::l_square:
2025 parseSquare();
2026 break;
2027 case tok::kw_new:
2028 parseNew();
2029 break;
2030 case tok::kw_case:
2031 // Proto: there are no switch/case statements.
2032 if (Style.isProto()) {
2033 nextToken();
2034 return;
2036 // In Verilog switch is called case.
2037 if (Style.isVerilog()) {
2038 parseBlock();
2039 addUnwrappedLine();
2040 return;
2042 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2043 // 'case: string' field declaration.
2044 nextToken();
2045 break;
2047 parseCaseLabel();
2048 break;
2049 case tok::kw_default:
2050 nextToken();
2051 if (Style.isVerilog()) {
2052 if (FormatTok->is(tok::colon)) {
2053 // The label will be handled in the next iteration.
2054 break;
2056 if (FormatTok->is(Keywords.kw_clocking)) {
2057 // A default clocking block.
2058 parseBlock();
2059 addUnwrappedLine();
2060 return;
2062 parseVerilogCaseLabel();
2063 return;
2065 break;
2066 case tok::colon:
2067 nextToken();
2068 if (Style.isVerilog()) {
2069 parseVerilogCaseLabel();
2070 return;
2072 break;
2073 default:
2074 nextToken();
2075 break;
2077 } while (!eof());
2080 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2081 assert(FormatTok->is(tok::l_brace));
2082 if (!Style.isCSharp())
2083 return false;
2084 // See if it's a property accessor.
2085 if (FormatTok->Previous->isNot(tok::identifier))
2086 return false;
2088 // See if we are inside a property accessor.
2090 // Record the current tokenPosition so that we can advance and
2091 // reset the current token. `Next` is not set yet so we need
2092 // another way to advance along the token stream.
2093 unsigned int StoredPosition = Tokens->getPosition();
2094 FormatToken *Tok = Tokens->getNextToken();
2096 // A trivial property accessor is of the form:
2097 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2098 // Track these as they do not require line breaks to be introduced.
2099 bool HasSpecialAccessor = false;
2100 bool IsTrivialPropertyAccessor = true;
2101 while (!eof()) {
2102 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2103 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2104 Keywords.kw_init, Keywords.kw_set)) {
2105 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2106 HasSpecialAccessor = true;
2107 Tok = Tokens->getNextToken();
2108 continue;
2110 if (Tok->isNot(tok::r_brace))
2111 IsTrivialPropertyAccessor = false;
2112 break;
2115 if (!HasSpecialAccessor) {
2116 Tokens->setPosition(StoredPosition);
2117 return false;
2120 // Try to parse the property accessor:
2121 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2122 Tokens->setPosition(StoredPosition);
2123 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2124 addUnwrappedLine();
2125 nextToken();
2126 do {
2127 switch (FormatTok->Tok.getKind()) {
2128 case tok::r_brace:
2129 nextToken();
2130 if (FormatTok->is(tok::equal)) {
2131 while (!eof() && FormatTok->isNot(tok::semi))
2132 nextToken();
2133 nextToken();
2135 addUnwrappedLine();
2136 return true;
2137 case tok::l_brace:
2138 ++Line->Level;
2139 parseBlock(/*MustBeDeclaration=*/true);
2140 addUnwrappedLine();
2141 --Line->Level;
2142 break;
2143 case tok::equal:
2144 if (FormatTok->is(TT_FatArrow)) {
2145 ++Line->Level;
2146 do {
2147 nextToken();
2148 } while (!eof() && FormatTok->isNot(tok::semi));
2149 nextToken();
2150 addUnwrappedLine();
2151 --Line->Level;
2152 break;
2154 nextToken();
2155 break;
2156 default:
2157 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2158 Keywords.kw_set) &&
2159 !IsTrivialPropertyAccessor) {
2160 // Non-trivial get/set needs to be on its own line.
2161 addUnwrappedLine();
2163 nextToken();
2165 } while (!eof());
2167 // Unreachable for well-formed code (paired '{' and '}').
2168 return true;
2171 bool UnwrappedLineParser::tryToParseLambda() {
2172 assert(FormatTok->is(tok::l_square));
2173 if (!Style.isCpp()) {
2174 nextToken();
2175 return false;
2177 FormatToken &LSquare = *FormatTok;
2178 if (!tryToParseLambdaIntroducer())
2179 return false;
2181 bool SeenArrow = false;
2182 bool InTemplateParameterList = false;
2184 while (FormatTok->isNot(tok::l_brace)) {
2185 if (FormatTok->isSimpleTypeSpecifier()) {
2186 nextToken();
2187 continue;
2189 switch (FormatTok->Tok.getKind()) {
2190 case tok::l_brace:
2191 break;
2192 case tok::l_paren:
2193 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2194 break;
2195 case tok::l_square:
2196 parseSquare();
2197 break;
2198 case tok::less:
2199 assert(FormatTok->Previous);
2200 if (FormatTok->Previous->is(tok::r_square))
2201 InTemplateParameterList = true;
2202 nextToken();
2203 break;
2204 case tok::kw_auto:
2205 case tok::kw_class:
2206 case tok::kw_template:
2207 case tok::kw_typename:
2208 case tok::amp:
2209 case tok::star:
2210 case tok::kw_const:
2211 case tok::kw_constexpr:
2212 case tok::kw_consteval:
2213 case tok::comma:
2214 case tok::greater:
2215 case tok::identifier:
2216 case tok::numeric_constant:
2217 case tok::coloncolon:
2218 case tok::kw_mutable:
2219 case tok::kw_noexcept:
2220 case tok::kw_static:
2221 nextToken();
2222 break;
2223 // Specialization of a template with an integer parameter can contain
2224 // arithmetic, logical, comparison and ternary operators.
2226 // FIXME: This also accepts sequences of operators that are not in the scope
2227 // of a template argument list.
2229 // In a C++ lambda a template type can only occur after an arrow. We use
2230 // this as an heuristic to distinguish between Objective-C expressions
2231 // followed by an `a->b` expression, such as:
2232 // ([obj func:arg] + a->b)
2233 // Otherwise the code below would parse as a lambda.
2234 case tok::plus:
2235 case tok::minus:
2236 case tok::exclaim:
2237 case tok::tilde:
2238 case tok::slash:
2239 case tok::percent:
2240 case tok::lessless:
2241 case tok::pipe:
2242 case tok::pipepipe:
2243 case tok::ampamp:
2244 case tok::caret:
2245 case tok::equalequal:
2246 case tok::exclaimequal:
2247 case tok::greaterequal:
2248 case tok::lessequal:
2249 case tok::question:
2250 case tok::colon:
2251 case tok::ellipsis:
2252 case tok::kw_true:
2253 case tok::kw_false:
2254 if (SeenArrow || InTemplateParameterList) {
2255 nextToken();
2256 break;
2258 return true;
2259 case tok::arrow:
2260 // This might or might not actually be a lambda arrow (this could be an
2261 // ObjC method invocation followed by a dereferencing arrow). We might
2262 // reset this back to TT_Unknown in TokenAnnotator.
2263 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2264 SeenArrow = true;
2265 nextToken();
2266 break;
2267 case tok::kw_requires: {
2268 auto *RequiresToken = FormatTok;
2269 nextToken();
2270 parseRequiresClause(RequiresToken);
2271 break;
2273 case tok::equal:
2274 if (!InTemplateParameterList)
2275 return true;
2276 nextToken();
2277 break;
2278 default:
2279 return true;
2283 FormatTok->setFinalizedType(TT_LambdaLBrace);
2284 LSquare.setFinalizedType(TT_LambdaLSquare);
2286 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2287 parseChildBlock();
2288 assert(!NestedLambdas.empty());
2289 NestedLambdas.pop_back();
2291 return true;
2294 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2295 const FormatToken *Previous = FormatTok->Previous;
2296 const FormatToken *LeftSquare = FormatTok;
2297 nextToken();
2298 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2299 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2300 tok::kw_co_yield, tok::kw_co_return)) ||
2301 Previous->closesScope())) ||
2302 LeftSquare->isCppStructuredBinding(Style)) {
2303 return false;
2305 if (FormatTok->is(tok::l_square))
2306 return false;
2307 if (FormatTok->is(tok::r_square)) {
2308 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2309 if (Next->is(tok::greater))
2310 return false;
2312 parseSquare(/*LambdaIntroducer=*/true);
2313 return true;
2316 void UnwrappedLineParser::tryToParseJSFunction() {
2317 assert(FormatTok->is(Keywords.kw_function));
2318 if (FormatTok->is(Keywords.kw_async))
2319 nextToken();
2320 // Consume "function".
2321 nextToken();
2323 // Consume * (generator function). Treat it like C++'s overloaded operators.
2324 if (FormatTok->is(tok::star)) {
2325 FormatTok->setFinalizedType(TT_OverloadedOperator);
2326 nextToken();
2329 // Consume function name.
2330 if (FormatTok->is(tok::identifier))
2331 nextToken();
2333 if (FormatTok->isNot(tok::l_paren))
2334 return;
2336 // Parse formal parameter list.
2337 parseParens();
2339 if (FormatTok->is(tok::colon)) {
2340 // Parse a type definition.
2341 nextToken();
2343 // Eat the type declaration. For braced inline object types, balance braces,
2344 // otherwise just parse until finding an l_brace for the function body.
2345 if (FormatTok->is(tok::l_brace))
2346 tryToParseBracedList();
2347 else
2348 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2349 nextToken();
2352 if (FormatTok->is(tok::semi))
2353 return;
2355 parseChildBlock();
2358 bool UnwrappedLineParser::tryToParseBracedList() {
2359 if (FormatTok->is(BK_Unknown))
2360 calculateBraceTypes();
2361 assert(FormatTok->isNot(BK_Unknown));
2362 if (FormatTok->is(BK_Block))
2363 return false;
2364 nextToken();
2365 parseBracedList();
2366 return true;
2369 bool UnwrappedLineParser::tryToParseChildBlock() {
2370 assert(Style.isJavaScript() || Style.isCSharp());
2371 assert(FormatTok->is(TT_FatArrow));
2372 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2373 // They always start an expression or a child block if followed by a curly
2374 // brace.
2375 nextToken();
2376 if (FormatTok->isNot(tok::l_brace))
2377 return false;
2378 parseChildBlock();
2379 return true;
2382 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2383 bool IsEnum,
2384 tok::TokenKind ClosingBraceKind) {
2385 bool HasError = false;
2387 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2388 // replace this by using parseAssignmentExpression() inside.
2389 do {
2390 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2391 tryToParseChildBlock()) {
2392 continue;
2394 if (Style.isJavaScript()) {
2395 if (FormatTok->is(Keywords.kw_function)) {
2396 tryToParseJSFunction();
2397 continue;
2399 if (FormatTok->is(tok::l_brace)) {
2400 // Could be a method inside of a braced list `{a() { return 1; }}`.
2401 if (tryToParseBracedList())
2402 continue;
2403 parseChildBlock();
2406 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2407 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2408 addUnwrappedLine();
2409 nextToken();
2410 return !HasError;
2412 switch (FormatTok->Tok.getKind()) {
2413 case tok::l_square:
2414 if (Style.isCSharp())
2415 parseSquare();
2416 else
2417 tryToParseLambda();
2418 break;
2419 case tok::l_paren:
2420 parseParens();
2421 // JavaScript can just have free standing methods and getters/setters in
2422 // object literals. Detect them by a "{" following ")".
2423 if (Style.isJavaScript()) {
2424 if (FormatTok->is(tok::l_brace))
2425 parseChildBlock();
2426 break;
2428 break;
2429 case tok::l_brace:
2430 // Assume there are no blocks inside a braced init list apart
2431 // from the ones we explicitly parse out (like lambdas).
2432 FormatTok->setBlockKind(BK_BracedInit);
2433 nextToken();
2434 parseBracedList();
2435 break;
2436 case tok::less:
2437 if (Style.Language == FormatStyle::LK_Proto ||
2438 ClosingBraceKind == tok::greater) {
2439 nextToken();
2440 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2441 /*ClosingBraceKind=*/tok::greater);
2442 } else {
2443 nextToken();
2445 break;
2446 case tok::semi:
2447 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2448 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2449 // used for error recovery if we have otherwise determined that this is
2450 // a braced list.
2451 if (Style.isJavaScript()) {
2452 nextToken();
2453 break;
2455 HasError = true;
2456 if (!ContinueOnSemicolons)
2457 return !HasError;
2458 nextToken();
2459 break;
2460 case tok::comma:
2461 nextToken();
2462 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2463 addUnwrappedLine();
2464 break;
2465 default:
2466 nextToken();
2467 break;
2469 } while (!eof());
2470 return false;
2473 /// \brief Parses a pair of parentheses (and everything between them).
2474 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2475 /// double ampersands. This applies for all nested scopes as well.
2477 /// Returns whether there is a `=` token between the parentheses.
2478 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2479 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2480 auto *LeftParen = FormatTok;
2481 bool SeenEqual = false;
2482 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2483 nextToken();
2484 do {
2485 switch (FormatTok->Tok.getKind()) {
2486 case tok::l_paren:
2487 if (parseParens(AmpAmpTokenType))
2488 SeenEqual = true;
2489 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2490 parseChildBlock();
2491 break;
2492 case tok::r_paren:
2493 if (!MightBeStmtExpr &&
2494 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2495 const auto *Prev = LeftParen->Previous;
2496 const auto *Next = Tokens->peekNextToken();
2497 const bool DoubleParens =
2498 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2499 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2500 const bool Blacklisted =
2501 PrevPrev &&
2502 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2503 (SeenEqual &&
2504 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2505 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2506 const bool ReturnParens =
2507 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2508 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2509 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2510 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2511 Next->is(tok::semi);
2512 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2513 LeftParen->Optional = true;
2514 FormatTok->Optional = true;
2517 nextToken();
2518 return SeenEqual;
2519 case tok::r_brace:
2520 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2521 return SeenEqual;
2522 case tok::l_square:
2523 tryToParseLambda();
2524 break;
2525 case tok::l_brace:
2526 if (!tryToParseBracedList())
2527 parseChildBlock();
2528 break;
2529 case tok::at:
2530 nextToken();
2531 if (FormatTok->is(tok::l_brace)) {
2532 nextToken();
2533 parseBracedList();
2535 break;
2536 case tok::equal:
2537 SeenEqual = true;
2538 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2539 tryToParseChildBlock();
2540 else
2541 nextToken();
2542 break;
2543 case tok::kw_class:
2544 if (Style.isJavaScript())
2545 parseRecord(/*ParseAsExpr=*/true);
2546 else
2547 nextToken();
2548 break;
2549 case tok::identifier:
2550 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2551 tryToParseJSFunction();
2552 else
2553 nextToken();
2554 break;
2555 case tok::kw_requires: {
2556 auto RequiresToken = FormatTok;
2557 nextToken();
2558 parseRequiresExpression(RequiresToken);
2559 break;
2561 case tok::ampamp:
2562 if (AmpAmpTokenType != TT_Unknown)
2563 FormatTok->setFinalizedType(AmpAmpTokenType);
2564 [[fallthrough]];
2565 default:
2566 nextToken();
2567 break;
2569 } while (!eof());
2570 return SeenEqual;
2573 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2574 if (!LambdaIntroducer) {
2575 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2576 if (tryToParseLambda())
2577 return;
2579 do {
2580 switch (FormatTok->Tok.getKind()) {
2581 case tok::l_paren:
2582 parseParens();
2583 break;
2584 case tok::r_square:
2585 nextToken();
2586 return;
2587 case tok::r_brace:
2588 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2589 return;
2590 case tok::l_square:
2591 parseSquare();
2592 break;
2593 case tok::l_brace: {
2594 if (!tryToParseBracedList())
2595 parseChildBlock();
2596 break;
2598 case tok::at:
2599 nextToken();
2600 if (FormatTok->is(tok::l_brace)) {
2601 nextToken();
2602 parseBracedList();
2604 break;
2605 default:
2606 nextToken();
2607 break;
2609 } while (!eof());
2612 void UnwrappedLineParser::keepAncestorBraces() {
2613 if (!Style.RemoveBracesLLVM)
2614 return;
2616 const int MaxNestingLevels = 2;
2617 const int Size = NestedTooDeep.size();
2618 if (Size >= MaxNestingLevels)
2619 NestedTooDeep[Size - MaxNestingLevels] = true;
2620 NestedTooDeep.push_back(false);
2623 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2624 for (const auto &Token : llvm::reverse(Line.Tokens))
2625 if (Token.Tok->isNot(tok::comment))
2626 return Token.Tok;
2628 return nullptr;
2631 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2632 FormatToken *Tok = nullptr;
2634 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2635 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2636 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2637 ? getLastNonComment(*Line)
2638 : Line->Tokens.back().Tok;
2639 assert(Tok);
2640 if (Tok->BraceCount < 0) {
2641 assert(Tok->BraceCount == -1);
2642 Tok = nullptr;
2643 } else {
2644 Tok->BraceCount = -1;
2648 addUnwrappedLine();
2649 ++Line->Level;
2650 parseStructuralElement();
2652 if (Tok) {
2653 assert(!Line->InPPDirective);
2654 Tok = nullptr;
2655 for (const auto &L : llvm::reverse(*CurrentLines)) {
2656 if (!L.InPPDirective && getLastNonComment(L)) {
2657 Tok = L.Tokens.back().Tok;
2658 break;
2661 assert(Tok);
2662 ++Tok->BraceCount;
2665 if (CheckEOF && eof())
2666 addUnwrappedLine();
2668 --Line->Level;
2671 static void markOptionalBraces(FormatToken *LeftBrace) {
2672 if (!LeftBrace)
2673 return;
2675 assert(LeftBrace->is(tok::l_brace));
2677 FormatToken *RightBrace = LeftBrace->MatchingParen;
2678 if (!RightBrace) {
2679 assert(!LeftBrace->Optional);
2680 return;
2683 assert(RightBrace->is(tok::r_brace));
2684 assert(RightBrace->MatchingParen == LeftBrace);
2685 assert(LeftBrace->Optional == RightBrace->Optional);
2687 LeftBrace->Optional = true;
2688 RightBrace->Optional = true;
2691 void UnwrappedLineParser::handleAttributes() {
2692 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2693 if (FormatTok->isAttribute())
2694 nextToken();
2695 else if (FormatTok->is(tok::l_square))
2696 handleCppAttributes();
2699 bool UnwrappedLineParser::handleCppAttributes() {
2700 // Handle [[likely]] / [[unlikely]] attributes.
2701 assert(FormatTok->is(tok::l_square));
2702 if (!tryToParseSimpleAttribute())
2703 return false;
2704 parseSquare();
2705 return true;
2708 /// Returns whether \c Tok begins a block.
2709 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2710 // FIXME: rename the function or make
2711 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2712 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2713 : Tok.is(tok::l_brace);
2716 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2717 bool KeepBraces,
2718 bool IsVerilogAssert) {
2719 assert((FormatTok->is(tok::kw_if) ||
2720 (Style.isVerilog() &&
2721 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2722 Keywords.kw_assume, Keywords.kw_cover))) &&
2723 "'if' expected");
2724 nextToken();
2726 if (IsVerilogAssert) {
2727 // Handle `assert #0` and `assert final`.
2728 if (FormatTok->is(Keywords.kw_verilogHash)) {
2729 nextToken();
2730 if (FormatTok->is(tok::numeric_constant))
2731 nextToken();
2732 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2733 Keywords.kw_sequence)) {
2734 nextToken();
2738 // Handle `if !consteval`.
2739 if (FormatTok->is(tok::exclaim))
2740 nextToken();
2742 bool KeepIfBraces = true;
2743 if (FormatTok->is(tok::kw_consteval)) {
2744 nextToken();
2745 } else {
2746 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2747 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2748 nextToken();
2749 if (FormatTok->is(tok::l_paren)) {
2750 FormatTok->setFinalizedType(TT_ConditionLParen);
2751 parseParens();
2754 handleAttributes();
2755 // The then action is optional in Verilog assert statements.
2756 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2757 nextToken();
2758 addUnwrappedLine();
2759 return nullptr;
2762 bool NeedsUnwrappedLine = false;
2763 keepAncestorBraces();
2765 FormatToken *IfLeftBrace = nullptr;
2766 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2768 if (isBlockBegin(*FormatTok)) {
2769 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2770 IfLeftBrace = FormatTok;
2771 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2772 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2773 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2774 setPreviousRBraceType(TT_ControlStatementRBrace);
2775 if (Style.BraceWrapping.BeforeElse)
2776 addUnwrappedLine();
2777 else
2778 NeedsUnwrappedLine = true;
2779 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2780 addUnwrappedLine();
2781 } else {
2782 parseUnbracedBody();
2785 if (Style.RemoveBracesLLVM) {
2786 assert(!NestedTooDeep.empty());
2787 KeepIfBraces = KeepIfBraces ||
2788 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2789 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2790 IfBlockKind == IfStmtKind::IfElseIf;
2793 bool KeepElseBraces = KeepIfBraces;
2794 FormatToken *ElseLeftBrace = nullptr;
2795 IfStmtKind Kind = IfStmtKind::IfOnly;
2797 if (FormatTok->is(tok::kw_else)) {
2798 if (Style.RemoveBracesLLVM) {
2799 NestedTooDeep.back() = false;
2800 Kind = IfStmtKind::IfElse;
2802 nextToken();
2803 handleAttributes();
2804 if (isBlockBegin(*FormatTok)) {
2805 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2806 FormatTok->setFinalizedType(TT_ElseLBrace);
2807 ElseLeftBrace = FormatTok;
2808 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2809 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2810 FormatToken *IfLBrace =
2811 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2812 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2813 setPreviousRBraceType(TT_ElseRBrace);
2814 if (FormatTok->is(tok::kw_else)) {
2815 KeepElseBraces = KeepElseBraces ||
2816 ElseBlockKind == IfStmtKind::IfOnly ||
2817 ElseBlockKind == IfStmtKind::IfElseIf;
2818 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2819 KeepElseBraces = true;
2820 assert(ElseLeftBrace->MatchingParen);
2821 markOptionalBraces(ElseLeftBrace);
2823 addUnwrappedLine();
2824 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2825 const FormatToken *Previous = Tokens->getPreviousToken();
2826 assert(Previous);
2827 const bool IsPrecededByComment = Previous->is(tok::comment);
2828 if (IsPrecededByComment) {
2829 addUnwrappedLine();
2830 ++Line->Level;
2832 bool TooDeep = true;
2833 if (Style.RemoveBracesLLVM) {
2834 Kind = IfStmtKind::IfElseIf;
2835 TooDeep = NestedTooDeep.pop_back_val();
2837 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2838 if (Style.RemoveBracesLLVM)
2839 NestedTooDeep.push_back(TooDeep);
2840 if (IsPrecededByComment)
2841 --Line->Level;
2842 } else {
2843 parseUnbracedBody(/*CheckEOF=*/true);
2845 } else {
2846 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2847 if (NeedsUnwrappedLine)
2848 addUnwrappedLine();
2851 if (!Style.RemoveBracesLLVM)
2852 return nullptr;
2854 assert(!NestedTooDeep.empty());
2855 KeepElseBraces = KeepElseBraces ||
2856 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2857 NestedTooDeep.back();
2859 NestedTooDeep.pop_back();
2861 if (!KeepIfBraces && !KeepElseBraces) {
2862 markOptionalBraces(IfLeftBrace);
2863 markOptionalBraces(ElseLeftBrace);
2864 } else if (IfLeftBrace) {
2865 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2866 if (IfRightBrace) {
2867 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2868 assert(!IfLeftBrace->Optional);
2869 assert(!IfRightBrace->Optional);
2870 IfLeftBrace->MatchingParen = nullptr;
2871 IfRightBrace->MatchingParen = nullptr;
2875 if (IfKind)
2876 *IfKind = Kind;
2878 return IfLeftBrace;
2881 void UnwrappedLineParser::parseTryCatch() {
2882 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2883 nextToken();
2884 bool NeedsUnwrappedLine = false;
2885 if (FormatTok->is(tok::colon)) {
2886 // We are in a function try block, what comes is an initializer list.
2887 nextToken();
2889 // In case identifiers were removed by clang-tidy, what might follow is
2890 // multiple commas in sequence - before the first identifier.
2891 while (FormatTok->is(tok::comma))
2892 nextToken();
2894 while (FormatTok->is(tok::identifier)) {
2895 nextToken();
2896 if (FormatTok->is(tok::l_paren))
2897 parseParens();
2898 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2899 FormatTok->is(tok::l_brace)) {
2900 do {
2901 nextToken();
2902 } while (FormatTok->isNot(tok::r_brace));
2903 nextToken();
2906 // In case identifiers were removed by clang-tidy, what might follow is
2907 // multiple commas in sequence - after the first identifier.
2908 while (FormatTok->is(tok::comma))
2909 nextToken();
2912 // Parse try with resource.
2913 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2914 parseParens();
2916 keepAncestorBraces();
2918 if (FormatTok->is(tok::l_brace)) {
2919 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2920 parseBlock();
2921 if (Style.BraceWrapping.BeforeCatch)
2922 addUnwrappedLine();
2923 else
2924 NeedsUnwrappedLine = true;
2925 } else if (FormatTok->isNot(tok::kw_catch)) {
2926 // The C++ standard requires a compound-statement after a try.
2927 // If there's none, we try to assume there's a structuralElement
2928 // and try to continue.
2929 addUnwrappedLine();
2930 ++Line->Level;
2931 parseStructuralElement();
2932 --Line->Level;
2934 while (true) {
2935 if (FormatTok->is(tok::at))
2936 nextToken();
2937 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2938 tok::kw___finally) ||
2939 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2940 FormatTok->is(Keywords.kw_finally)) ||
2941 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2942 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2943 break;
2945 nextToken();
2946 while (FormatTok->isNot(tok::l_brace)) {
2947 if (FormatTok->is(tok::l_paren)) {
2948 parseParens();
2949 continue;
2951 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2952 if (Style.RemoveBracesLLVM)
2953 NestedTooDeep.pop_back();
2954 return;
2956 nextToken();
2958 NeedsUnwrappedLine = false;
2959 Line->MustBeDeclaration = false;
2960 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2961 parseBlock();
2962 if (Style.BraceWrapping.BeforeCatch)
2963 addUnwrappedLine();
2964 else
2965 NeedsUnwrappedLine = true;
2968 if (Style.RemoveBracesLLVM)
2969 NestedTooDeep.pop_back();
2971 if (NeedsUnwrappedLine)
2972 addUnwrappedLine();
2975 void UnwrappedLineParser::parseNamespace() {
2976 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2977 "'namespace' expected");
2979 const FormatToken &InitialToken = *FormatTok;
2980 nextToken();
2981 if (InitialToken.is(TT_NamespaceMacro)) {
2982 parseParens();
2983 } else {
2984 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2985 tok::l_square, tok::period, tok::l_paren) ||
2986 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2987 if (FormatTok->is(tok::l_square))
2988 parseSquare();
2989 else if (FormatTok->is(tok::l_paren))
2990 parseParens();
2991 else
2992 nextToken();
2995 if (FormatTok->is(tok::l_brace)) {
2996 FormatTok->setFinalizedType(TT_NamespaceLBrace);
2998 if (ShouldBreakBeforeBrace(Style, InitialToken))
2999 addUnwrappedLine();
3001 unsigned AddLevels =
3002 Style.NamespaceIndentation == FormatStyle::NI_All ||
3003 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3004 DeclarationScopeStack.size() > 1)
3005 ? 1u
3006 : 0u;
3007 bool ManageWhitesmithsBraces =
3008 AddLevels == 0u &&
3009 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3011 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3012 // the whole block.
3013 if (ManageWhitesmithsBraces)
3014 ++Line->Level;
3016 // Munch the semicolon after a namespace. This is more common than one would
3017 // think. Putting the semicolon into its own line is very ugly.
3018 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3019 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3020 ManageWhitesmithsBraces);
3022 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3024 if (ManageWhitesmithsBraces)
3025 --Line->Level;
3027 // FIXME: Add error handling.
3030 void UnwrappedLineParser::parseNew() {
3031 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3032 nextToken();
3034 if (Style.isCSharp()) {
3035 do {
3036 // Handle constructor invocation, e.g. `new(field: value)`.
3037 if (FormatTok->is(tok::l_paren))
3038 parseParens();
3040 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3041 if (FormatTok->is(tok::l_brace))
3042 parseBracedList();
3044 if (FormatTok->isOneOf(tok::semi, tok::comma))
3045 return;
3047 nextToken();
3048 } while (!eof());
3051 if (Style.Language != FormatStyle::LK_Java)
3052 return;
3054 // In Java, we can parse everything up to the parens, which aren't optional.
3055 do {
3056 // There should not be a ;, { or } before the new's open paren.
3057 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3058 return;
3060 // Consume the parens.
3061 if (FormatTok->is(tok::l_paren)) {
3062 parseParens();
3064 // If there is a class body of an anonymous class, consume that as child.
3065 if (FormatTok->is(tok::l_brace))
3066 parseChildBlock();
3067 return;
3069 nextToken();
3070 } while (!eof());
3073 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3074 keepAncestorBraces();
3076 if (isBlockBegin(*FormatTok)) {
3077 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3078 FormatToken *LeftBrace = FormatTok;
3079 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3080 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3081 /*MunchSemi=*/true, KeepBraces);
3082 setPreviousRBraceType(TT_ControlStatementRBrace);
3083 if (!KeepBraces) {
3084 assert(!NestedTooDeep.empty());
3085 if (!NestedTooDeep.back())
3086 markOptionalBraces(LeftBrace);
3088 if (WrapRightBrace)
3089 addUnwrappedLine();
3090 } else {
3091 parseUnbracedBody();
3094 if (!KeepBraces)
3095 NestedTooDeep.pop_back();
3098 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3099 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3100 (Style.isVerilog() &&
3101 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3102 Keywords.kw_always_ff, Keywords.kw_always_latch,
3103 Keywords.kw_final, Keywords.kw_initial,
3104 Keywords.kw_foreach, Keywords.kw_forever,
3105 Keywords.kw_repeat))) &&
3106 "'for', 'while' or foreach macro expected");
3107 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3108 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3110 nextToken();
3111 // JS' for await ( ...
3112 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3113 nextToken();
3114 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3115 nextToken();
3116 if (HasParens && FormatTok->is(tok::l_paren)) {
3117 // The type is only set for Verilog basically because we were afraid to
3118 // change the existing behavior for loops. See the discussion on D121756 for
3119 // details.
3120 if (Style.isVerilog())
3121 FormatTok->setFinalizedType(TT_ConditionLParen);
3122 parseParens();
3125 if (Style.isVerilog()) {
3126 // Event control.
3127 parseVerilogSensitivityList();
3128 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3129 Tokens->getPreviousToken()->is(tok::r_paren)) {
3130 nextToken();
3131 addUnwrappedLine();
3132 return;
3135 handleAttributes();
3136 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3139 void UnwrappedLineParser::parseDoWhile() {
3140 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3141 nextToken();
3143 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3145 // FIXME: Add error handling.
3146 if (FormatTok->isNot(tok::kw_while)) {
3147 addUnwrappedLine();
3148 return;
3151 FormatTok->setFinalizedType(TT_DoWhile);
3153 // If in Whitesmiths mode, the line with the while() needs to be indented
3154 // to the same level as the block.
3155 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3156 ++Line->Level;
3158 nextToken();
3159 parseStructuralElement();
3162 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3163 nextToken();
3164 unsigned OldLineLevel = Line->Level;
3165 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3166 --Line->Level;
3167 if (LeftAlignLabel)
3168 Line->Level = 0;
3170 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3171 FormatTok->is(tok::l_brace)) {
3173 CompoundStatementIndenter Indenter(this, Line->Level,
3174 Style.BraceWrapping.AfterCaseLabel,
3175 Style.BraceWrapping.IndentBraces);
3176 parseBlock();
3177 if (FormatTok->is(tok::kw_break)) {
3178 if (Style.BraceWrapping.AfterControlStatement ==
3179 FormatStyle::BWACS_Always) {
3180 addUnwrappedLine();
3181 if (!Style.IndentCaseBlocks &&
3182 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3183 ++Line->Level;
3186 parseStructuralElement();
3188 addUnwrappedLine();
3189 } else {
3190 if (FormatTok->is(tok::semi))
3191 nextToken();
3192 addUnwrappedLine();
3194 Line->Level = OldLineLevel;
3195 if (FormatTok->isNot(tok::l_brace)) {
3196 parseStructuralElement();
3197 addUnwrappedLine();
3201 void UnwrappedLineParser::parseCaseLabel() {
3202 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3204 // FIXME: fix handling of complex expressions here.
3205 do {
3206 nextToken();
3207 if (FormatTok->is(tok::colon)) {
3208 FormatTok->setFinalizedType(TT_CaseLabelColon);
3209 break;
3211 } while (!eof());
3212 parseLabel();
3215 void UnwrappedLineParser::parseSwitch() {
3216 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3217 nextToken();
3218 if (FormatTok->is(tok::l_paren))
3219 parseParens();
3221 keepAncestorBraces();
3223 if (FormatTok->is(tok::l_brace)) {
3224 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3225 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3226 parseBlock();
3227 setPreviousRBraceType(TT_ControlStatementRBrace);
3228 addUnwrappedLine();
3229 } else {
3230 addUnwrappedLine();
3231 ++Line->Level;
3232 parseStructuralElement();
3233 --Line->Level;
3236 if (Style.RemoveBracesLLVM)
3237 NestedTooDeep.pop_back();
3240 // Operators that can follow a C variable.
3241 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3242 switch (kind) {
3243 case tok::ampamp:
3244 case tok::ampequal:
3245 case tok::arrow:
3246 case tok::caret:
3247 case tok::caretequal:
3248 case tok::comma:
3249 case tok::ellipsis:
3250 case tok::equal:
3251 case tok::equalequal:
3252 case tok::exclaim:
3253 case tok::exclaimequal:
3254 case tok::greater:
3255 case tok::greaterequal:
3256 case tok::greatergreater:
3257 case tok::greatergreaterequal:
3258 case tok::l_paren:
3259 case tok::l_square:
3260 case tok::less:
3261 case tok::lessequal:
3262 case tok::lessless:
3263 case tok::lesslessequal:
3264 case tok::minus:
3265 case tok::minusequal:
3266 case tok::minusminus:
3267 case tok::percent:
3268 case tok::percentequal:
3269 case tok::period:
3270 case tok::pipe:
3271 case tok::pipeequal:
3272 case tok::pipepipe:
3273 case tok::plus:
3274 case tok::plusequal:
3275 case tok::plusplus:
3276 case tok::question:
3277 case tok::r_brace:
3278 case tok::r_paren:
3279 case tok::r_square:
3280 case tok::semi:
3281 case tok::slash:
3282 case tok::slashequal:
3283 case tok::star:
3284 case tok::starequal:
3285 return true;
3286 default:
3287 return false;
3291 void UnwrappedLineParser::parseAccessSpecifier() {
3292 FormatToken *AccessSpecifierCandidate = FormatTok;
3293 nextToken();
3294 // Understand Qt's slots.
3295 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3296 nextToken();
3297 // Otherwise, we don't know what it is, and we'd better keep the next token.
3298 if (FormatTok->is(tok::colon)) {
3299 nextToken();
3300 addUnwrappedLine();
3301 } else if (FormatTok->isNot(tok::coloncolon) &&
3302 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3303 // Not a variable name nor namespace name.
3304 addUnwrappedLine();
3305 } else if (AccessSpecifierCandidate) {
3306 // Consider the access specifier to be a C identifier.
3307 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3311 /// \brief Parses a requires, decides if it is a clause or an expression.
3312 /// \pre The current token has to be the requires keyword.
3313 /// \returns true if it parsed a clause.
3314 bool clang::format::UnwrappedLineParser::parseRequires() {
3315 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3316 auto RequiresToken = FormatTok;
3318 // We try to guess if it is a requires clause, or a requires expression. For
3319 // that we first consume the keyword and check the next token.
3320 nextToken();
3322 switch (FormatTok->Tok.getKind()) {
3323 case tok::l_brace:
3324 // This can only be an expression, never a clause.
3325 parseRequiresExpression(RequiresToken);
3326 return false;
3327 case tok::l_paren:
3328 // Clauses and expression can start with a paren, it's unclear what we have.
3329 break;
3330 default:
3331 // All other tokens can only be a clause.
3332 parseRequiresClause(RequiresToken);
3333 return true;
3336 // Looking forward we would have to decide if there are function declaration
3337 // like arguments to the requires expression:
3338 // requires (T t) {
3339 // Or there is a constraint expression for the requires clause:
3340 // requires (C<T> && ...
3342 // But first let's look behind.
3343 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3345 if (!PreviousNonComment ||
3346 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3347 // If there is no token, or an expression left brace, we are a requires
3348 // clause within a requires expression.
3349 parseRequiresClause(RequiresToken);
3350 return true;
3353 switch (PreviousNonComment->Tok.getKind()) {
3354 case tok::greater:
3355 case tok::r_paren:
3356 case tok::kw_noexcept:
3357 case tok::kw_const:
3358 // This is a requires clause.
3359 parseRequiresClause(RequiresToken);
3360 return true;
3361 case tok::amp:
3362 case tok::ampamp: {
3363 // This can be either:
3364 // if (... && requires (T t) ...)
3365 // Or
3366 // void member(...) && requires (C<T> ...
3367 // We check the one token before that for a const:
3368 // void member(...) const && requires (C<T> ...
3369 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3370 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3371 parseRequiresClause(RequiresToken);
3372 return true;
3374 break;
3376 default:
3377 if (PreviousNonComment->isTypeOrIdentifier()) {
3378 // This is a requires clause.
3379 parseRequiresClause(RequiresToken);
3380 return true;
3382 // It's an expression.
3383 parseRequiresExpression(RequiresToken);
3384 return false;
3387 // Now we look forward and try to check if the paren content is a parameter
3388 // list. The parameters can be cv-qualified and contain references or
3389 // pointers.
3390 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3391 // of stuff: typename, const, *, &, &&, ::, identifiers.
3393 unsigned StoredPosition = Tokens->getPosition();
3394 FormatToken *NextToken = Tokens->getNextToken();
3395 int Lookahead = 0;
3396 auto PeekNext = [&Lookahead, &NextToken, this] {
3397 ++Lookahead;
3398 NextToken = Tokens->getNextToken();
3401 bool FoundType = false;
3402 bool LastWasColonColon = false;
3403 int OpenAngles = 0;
3405 for (; Lookahead < 50; PeekNext()) {
3406 switch (NextToken->Tok.getKind()) {
3407 case tok::kw_volatile:
3408 case tok::kw_const:
3409 case tok::comma:
3410 if (OpenAngles == 0) {
3411 FormatTok = Tokens->setPosition(StoredPosition);
3412 parseRequiresExpression(RequiresToken);
3413 return false;
3415 break;
3416 case tok::r_paren:
3417 case tok::pipepipe:
3418 FormatTok = Tokens->setPosition(StoredPosition);
3419 parseRequiresClause(RequiresToken);
3420 return true;
3421 case tok::eof:
3422 // Break out of the loop.
3423 Lookahead = 50;
3424 break;
3425 case tok::coloncolon:
3426 LastWasColonColon = true;
3427 break;
3428 case tok::identifier:
3429 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3430 FormatTok = Tokens->setPosition(StoredPosition);
3431 parseRequiresExpression(RequiresToken);
3432 return false;
3434 FoundType = true;
3435 LastWasColonColon = false;
3436 break;
3437 case tok::less:
3438 ++OpenAngles;
3439 break;
3440 case tok::greater:
3441 --OpenAngles;
3442 break;
3443 default:
3444 if (NextToken->isSimpleTypeSpecifier()) {
3445 FormatTok = Tokens->setPosition(StoredPosition);
3446 parseRequiresExpression(RequiresToken);
3447 return false;
3449 break;
3452 // This seems to be a complicated expression, just assume it's a clause.
3453 FormatTok = Tokens->setPosition(StoredPosition);
3454 parseRequiresClause(RequiresToken);
3455 return true;
3458 /// \brief Parses a requires clause.
3459 /// \param RequiresToken The requires keyword token, which starts this clause.
3460 /// \pre We need to be on the next token after the requires keyword.
3461 /// \sa parseRequiresExpression
3463 /// Returns if it either has finished parsing the clause, or it detects, that
3464 /// the clause is incorrect.
3465 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3466 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3467 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3469 // If there is no previous token, we are within a requires expression,
3470 // otherwise we will always have the template or function declaration in front
3471 // of it.
3472 bool InRequiresExpression =
3473 !RequiresToken->Previous ||
3474 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3476 RequiresToken->setFinalizedType(InRequiresExpression
3477 ? TT_RequiresClauseInARequiresExpression
3478 : TT_RequiresClause);
3480 // NOTE: parseConstraintExpression is only ever called from this function.
3481 // It could be inlined into here.
3482 parseConstraintExpression();
3484 if (!InRequiresExpression)
3485 FormatTok->Previous->ClosesRequiresClause = true;
3488 /// \brief Parses a requires expression.
3489 /// \param RequiresToken The requires keyword token, which starts this clause.
3490 /// \pre We need to be on the next token after the requires keyword.
3491 /// \sa parseRequiresClause
3493 /// Returns if it either has finished parsing the expression, or it detects,
3494 /// that the expression is incorrect.
3495 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3496 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3497 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3499 RequiresToken->setFinalizedType(TT_RequiresExpression);
3501 if (FormatTok->is(tok::l_paren)) {
3502 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3503 parseParens();
3506 if (FormatTok->is(tok::l_brace)) {
3507 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3508 parseChildBlock();
3512 /// \brief Parses a constraint expression.
3514 /// This is the body of a requires clause. It returns, when the parsing is
3515 /// complete, or the expression is incorrect.
3516 void UnwrappedLineParser::parseConstraintExpression() {
3517 // The special handling for lambdas is needed since tryToParseLambda() eats a
3518 // token and if a requires expression is the last part of a requires clause
3519 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3520 // not set on the correct token. Thus we need to be aware if we even expect a
3521 // lambda to be possible.
3522 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3523 bool LambdaNextTimeAllowed = true;
3525 // Within lambda declarations, it is permitted to put a requires clause after
3526 // its template parameter list, which would place the requires clause right
3527 // before the parentheses of the parameters of the lambda declaration. Thus,
3528 // we track if we expect to see grouping parentheses at all.
3529 // Without this check, `requires foo<T> (T t)` in the below example would be
3530 // seen as the whole requires clause, accidentally eating the parameters of
3531 // the lambda.
3532 // [&]<typename T> requires foo<T> (T t) { ... };
3533 bool TopLevelParensAllowed = true;
3535 do {
3536 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3538 switch (FormatTok->Tok.getKind()) {
3539 case tok::kw_requires: {
3540 auto RequiresToken = FormatTok;
3541 nextToken();
3542 parseRequiresExpression(RequiresToken);
3543 break;
3546 case tok::l_paren:
3547 if (!TopLevelParensAllowed)
3548 return;
3549 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3550 TopLevelParensAllowed = false;
3551 break;
3553 case tok::l_square:
3554 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3555 return;
3556 break;
3558 case tok::kw_const:
3559 case tok::semi:
3560 case tok::kw_class:
3561 case tok::kw_struct:
3562 case tok::kw_union:
3563 return;
3565 case tok::l_brace:
3566 // Potential function body.
3567 return;
3569 case tok::ampamp:
3570 case tok::pipepipe:
3571 FormatTok->setFinalizedType(TT_BinaryOperator);
3572 nextToken();
3573 LambdaNextTimeAllowed = true;
3574 TopLevelParensAllowed = true;
3575 break;
3577 case tok::comma:
3578 case tok::comment:
3579 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3580 nextToken();
3581 break;
3583 case tok::kw_sizeof:
3584 case tok::greater:
3585 case tok::greaterequal:
3586 case tok::greatergreater:
3587 case tok::less:
3588 case tok::lessequal:
3589 case tok::lessless:
3590 case tok::equalequal:
3591 case tok::exclaim:
3592 case tok::exclaimequal:
3593 case tok::plus:
3594 case tok::minus:
3595 case tok::star:
3596 case tok::slash:
3597 LambdaNextTimeAllowed = true;
3598 TopLevelParensAllowed = true;
3599 // Just eat them.
3600 nextToken();
3601 break;
3603 case tok::numeric_constant:
3604 case tok::coloncolon:
3605 case tok::kw_true:
3606 case tok::kw_false:
3607 TopLevelParensAllowed = false;
3608 // Just eat them.
3609 nextToken();
3610 break;
3612 case tok::kw_static_cast:
3613 case tok::kw_const_cast:
3614 case tok::kw_reinterpret_cast:
3615 case tok::kw_dynamic_cast:
3616 nextToken();
3617 if (FormatTok->isNot(tok::less))
3618 return;
3620 nextToken();
3621 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3622 /*ClosingBraceKind=*/tok::greater);
3623 break;
3625 default:
3626 if (!FormatTok->Tok.getIdentifierInfo()) {
3627 // Identifiers are part of the default case, we check for more then
3628 // tok::identifier to handle builtin type traits.
3629 return;
3632 // We need to differentiate identifiers for a template deduction guide,
3633 // variables, or function return types (the constraint expression has
3634 // ended before that), and basically all other cases. But it's easier to
3635 // check the other way around.
3636 assert(FormatTok->Previous);
3637 switch (FormatTok->Previous->Tok.getKind()) {
3638 case tok::coloncolon: // Nested identifier.
3639 case tok::ampamp: // Start of a function or variable for the
3640 case tok::pipepipe: // constraint expression. (binary)
3641 case tok::exclaim: // The same as above, but unary.
3642 case tok::kw_requires: // Initial identifier of a requires clause.
3643 case tok::equal: // Initial identifier of a concept declaration.
3644 break;
3645 default:
3646 return;
3649 // Read identifier with optional template declaration.
3650 nextToken();
3651 if (FormatTok->is(tok::less)) {
3652 nextToken();
3653 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3654 /*ClosingBraceKind=*/tok::greater);
3656 TopLevelParensAllowed = false;
3657 break;
3659 } while (!eof());
3662 bool UnwrappedLineParser::parseEnum() {
3663 const FormatToken &InitialToken = *FormatTok;
3665 // Won't be 'enum' for NS_ENUMs.
3666 if (FormatTok->is(tok::kw_enum))
3667 nextToken();
3669 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3670 // declarations. An "enum" keyword followed by a colon would be a syntax
3671 // error and thus assume it is just an identifier.
3672 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3673 return false;
3675 // In protobuf, "enum" can be used as a field name.
3676 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3677 return false;
3679 // Eat up enum class ...
3680 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3681 nextToken();
3683 while (FormatTok->Tok.getIdentifierInfo() ||
3684 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3685 tok::greater, tok::comma, tok::question,
3686 tok::l_square, tok::r_square)) {
3687 if (Style.isVerilog()) {
3688 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3689 nextToken();
3690 // In Verilog the base type can have dimensions.
3691 while (FormatTok->is(tok::l_square))
3692 parseSquare();
3693 } else {
3694 nextToken();
3696 // We can have macros or attributes in between 'enum' and the enum name.
3697 if (FormatTok->is(tok::l_paren))
3698 parseParens();
3699 assert(FormatTok->isNot(TT_AttributeSquare));
3700 if (FormatTok->is(tok::identifier)) {
3701 nextToken();
3702 // If there are two identifiers in a row, this is likely an elaborate
3703 // return type. In Java, this can be "implements", etc.
3704 if (Style.isCpp() && FormatTok->is(tok::identifier))
3705 return false;
3709 // Just a declaration or something is wrong.
3710 if (FormatTok->isNot(tok::l_brace))
3711 return true;
3712 FormatTok->setFinalizedType(TT_EnumLBrace);
3713 FormatTok->setBlockKind(BK_Block);
3715 if (Style.Language == FormatStyle::LK_Java) {
3716 // Java enums are different.
3717 parseJavaEnumBody();
3718 return true;
3720 if (Style.Language == FormatStyle::LK_Proto) {
3721 parseBlock(/*MustBeDeclaration=*/true);
3722 return true;
3725 if (!Style.AllowShortEnumsOnASingleLine &&
3726 ShouldBreakBeforeBrace(Style, InitialToken)) {
3727 addUnwrappedLine();
3729 // Parse enum body.
3730 nextToken();
3731 if (!Style.AllowShortEnumsOnASingleLine) {
3732 addUnwrappedLine();
3733 Line->Level += 1;
3735 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3736 /*IsEnum=*/true);
3737 if (!Style.AllowShortEnumsOnASingleLine)
3738 Line->Level -= 1;
3739 if (HasError) {
3740 if (FormatTok->is(tok::semi))
3741 nextToken();
3742 addUnwrappedLine();
3744 setPreviousRBraceType(TT_EnumRBrace);
3745 return true;
3747 // There is no addUnwrappedLine() here so that we fall through to parsing a
3748 // structural element afterwards. Thus, in "enum A {} n, m;",
3749 // "} n, m;" will end up in one unwrapped line.
3752 bool UnwrappedLineParser::parseStructLike() {
3753 // parseRecord falls through and does not yet add an unwrapped line as a
3754 // record declaration or definition can start a structural element.
3755 parseRecord();
3756 // This does not apply to Java, JavaScript and C#.
3757 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3758 Style.isCSharp()) {
3759 if (FormatTok->is(tok::semi))
3760 nextToken();
3761 addUnwrappedLine();
3762 return true;
3764 return false;
3767 namespace {
3768 // A class used to set and restore the Token position when peeking
3769 // ahead in the token source.
3770 class ScopedTokenPosition {
3771 unsigned StoredPosition;
3772 FormatTokenSource *Tokens;
3774 public:
3775 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3776 assert(Tokens && "Tokens expected to not be null");
3777 StoredPosition = Tokens->getPosition();
3780 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3782 } // namespace
3784 // Look to see if we have [[ by looking ahead, if
3785 // its not then rewind to the original position.
3786 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3787 ScopedTokenPosition AutoPosition(Tokens);
3788 FormatToken *Tok = Tokens->getNextToken();
3789 // We already read the first [ check for the second.
3790 if (Tok->isNot(tok::l_square))
3791 return false;
3792 // Double check that the attribute is just something
3793 // fairly simple.
3794 while (Tok->isNot(tok::eof)) {
3795 if (Tok->is(tok::r_square))
3796 break;
3797 Tok = Tokens->getNextToken();
3799 if (Tok->is(tok::eof))
3800 return false;
3801 Tok = Tokens->getNextToken();
3802 if (Tok->isNot(tok::r_square))
3803 return false;
3804 Tok = Tokens->getNextToken();
3805 if (Tok->is(tok::semi))
3806 return false;
3807 return true;
3810 void UnwrappedLineParser::parseJavaEnumBody() {
3811 assert(FormatTok->is(tok::l_brace));
3812 const FormatToken *OpeningBrace = FormatTok;
3814 // Determine whether the enum is simple, i.e. does not have a semicolon or
3815 // constants with class bodies. Simple enums can be formatted like braced
3816 // lists, contracted to a single line, etc.
3817 unsigned StoredPosition = Tokens->getPosition();
3818 bool IsSimple = true;
3819 FormatToken *Tok = Tokens->getNextToken();
3820 while (Tok->isNot(tok::eof)) {
3821 if (Tok->is(tok::r_brace))
3822 break;
3823 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3824 IsSimple = false;
3825 break;
3827 // FIXME: This will also mark enums with braces in the arguments to enum
3828 // constants as "not simple". This is probably fine in practice, though.
3829 Tok = Tokens->getNextToken();
3831 FormatTok = Tokens->setPosition(StoredPosition);
3833 if (IsSimple) {
3834 nextToken();
3835 parseBracedList();
3836 addUnwrappedLine();
3837 return;
3840 // Parse the body of a more complex enum.
3841 // First add a line for everything up to the "{".
3842 nextToken();
3843 addUnwrappedLine();
3844 ++Line->Level;
3846 // Parse the enum constants.
3847 while (!eof()) {
3848 if (FormatTok->is(tok::l_brace)) {
3849 // Parse the constant's class body.
3850 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3851 /*MunchSemi=*/false);
3852 } else if (FormatTok->is(tok::l_paren)) {
3853 parseParens();
3854 } else if (FormatTok->is(tok::comma)) {
3855 nextToken();
3856 addUnwrappedLine();
3857 } else if (FormatTok->is(tok::semi)) {
3858 nextToken();
3859 addUnwrappedLine();
3860 break;
3861 } else if (FormatTok->is(tok::r_brace)) {
3862 addUnwrappedLine();
3863 break;
3864 } else {
3865 nextToken();
3869 // Parse the class body after the enum's ";" if any.
3870 parseLevel(OpeningBrace);
3871 nextToken();
3872 --Line->Level;
3873 addUnwrappedLine();
3876 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3877 const FormatToken &InitialToken = *FormatTok;
3878 nextToken();
3880 // The actual identifier can be a nested name specifier, and in macros
3881 // it is often token-pasted.
3882 // An [[attribute]] can be before the identifier.
3883 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3884 tok::kw_alignas, tok::l_square) ||
3885 FormatTok->isAttribute() ||
3886 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3887 FormatTok->isOneOf(tok::period, tok::comma))) {
3888 if (Style.isJavaScript() &&
3889 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3890 // JavaScript/TypeScript supports inline object types in
3891 // extends/implements positions:
3892 // class Foo implements {bar: number} { }
3893 nextToken();
3894 if (FormatTok->is(tok::l_brace)) {
3895 tryToParseBracedList();
3896 continue;
3899 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3900 continue;
3901 bool IsNonMacroIdentifier =
3902 FormatTok->is(tok::identifier) &&
3903 FormatTok->TokenText != FormatTok->TokenText.upper();
3904 nextToken();
3905 // We can have macros in between 'class' and the class name.
3906 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3907 parseParens();
3910 // Note that parsing away template declarations here leads to incorrectly
3911 // accepting function declarations as record declarations.
3912 // In general, we cannot solve this problem. Consider:
3913 // class A<int> B() {}
3914 // which can be a function definition or a class definition when B() is a
3915 // macro. If we find enough real-world cases where this is a problem, we
3916 // can parse for the 'template' keyword in the beginning of the statement,
3917 // and thus rule out the record production in case there is no template
3918 // (this would still leave us with an ambiguity between template function
3919 // and class declarations).
3920 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3921 do {
3922 if (FormatTok->is(tok::l_brace)) {
3923 calculateBraceTypes(/*ExpectClassBody=*/true);
3924 if (!tryToParseBracedList())
3925 break;
3927 if (FormatTok->is(tok::l_square)) {
3928 FormatToken *Previous = FormatTok->Previous;
3929 if (!Previous ||
3930 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3931 // Don't try parsing a lambda if we had a closing parenthesis before,
3932 // it was probably a pointer to an array: int (*)[].
3933 if (!tryToParseLambda())
3934 continue;
3935 } else {
3936 parseSquare();
3937 continue;
3940 if (FormatTok->is(tok::semi))
3941 return;
3942 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3943 addUnwrappedLine();
3944 nextToken();
3945 parseCSharpGenericTypeConstraint();
3946 break;
3948 nextToken();
3949 } while (!eof());
3952 auto GetBraceTypes =
3953 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3954 switch (RecordTok.Tok.getKind()) {
3955 case tok::kw_class:
3956 return {TT_ClassLBrace, TT_ClassRBrace};
3957 case tok::kw_struct:
3958 return {TT_StructLBrace, TT_StructRBrace};
3959 case tok::kw_union:
3960 return {TT_UnionLBrace, TT_UnionRBrace};
3961 default:
3962 // Useful for e.g. interface.
3963 return {TT_RecordLBrace, TT_RecordRBrace};
3966 if (FormatTok->is(tok::l_brace)) {
3967 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3968 FormatTok->setFinalizedType(OpenBraceType);
3969 if (ParseAsExpr) {
3970 parseChildBlock();
3971 } else {
3972 if (ShouldBreakBeforeBrace(Style, InitialToken))
3973 addUnwrappedLine();
3975 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3976 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3978 setPreviousRBraceType(ClosingBraceType);
3980 // There is no addUnwrappedLine() here so that we fall through to parsing a
3981 // structural element afterwards. Thus, in "class A {} n, m;",
3982 // "} n, m;" will end up in one unwrapped line.
3985 void UnwrappedLineParser::parseObjCMethod() {
3986 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3987 "'(' or identifier expected.");
3988 do {
3989 if (FormatTok->is(tok::semi)) {
3990 nextToken();
3991 addUnwrappedLine();
3992 return;
3993 } else if (FormatTok->is(tok::l_brace)) {
3994 if (Style.BraceWrapping.AfterFunction)
3995 addUnwrappedLine();
3996 parseBlock();
3997 addUnwrappedLine();
3998 return;
3999 } else {
4000 nextToken();
4002 } while (!eof());
4005 void UnwrappedLineParser::parseObjCProtocolList() {
4006 assert(FormatTok->is(tok::less) && "'<' expected.");
4007 do {
4008 nextToken();
4009 // Early exit in case someone forgot a close angle.
4010 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4011 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4012 return;
4014 } while (!eof() && FormatTok->isNot(tok::greater));
4015 nextToken(); // Skip '>'.
4018 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4019 do {
4020 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4021 nextToken();
4022 addUnwrappedLine();
4023 break;
4025 if (FormatTok->is(tok::l_brace)) {
4026 parseBlock();
4027 // In ObjC interfaces, nothing should be following the "}".
4028 addUnwrappedLine();
4029 } else if (FormatTok->is(tok::r_brace)) {
4030 // Ignore stray "}". parseStructuralElement doesn't consume them.
4031 nextToken();
4032 addUnwrappedLine();
4033 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4034 nextToken();
4035 parseObjCMethod();
4036 } else {
4037 parseStructuralElement();
4039 } while (!eof());
4042 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4043 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4044 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4045 nextToken();
4046 nextToken(); // interface name
4048 // @interface can be followed by a lightweight generic
4049 // specialization list, then either a base class or a category.
4050 if (FormatTok->is(tok::less))
4051 parseObjCLightweightGenerics();
4052 if (FormatTok->is(tok::colon)) {
4053 nextToken();
4054 nextToken(); // base class name
4055 // The base class can also have lightweight generics applied to it.
4056 if (FormatTok->is(tok::less))
4057 parseObjCLightweightGenerics();
4058 } else if (FormatTok->is(tok::l_paren)) {
4059 // Skip category, if present.
4060 parseParens();
4063 if (FormatTok->is(tok::less))
4064 parseObjCProtocolList();
4066 if (FormatTok->is(tok::l_brace)) {
4067 if (Style.BraceWrapping.AfterObjCDeclaration)
4068 addUnwrappedLine();
4069 parseBlock(/*MustBeDeclaration=*/true);
4072 // With instance variables, this puts '}' on its own line. Without instance
4073 // variables, this ends the @interface line.
4074 addUnwrappedLine();
4076 parseObjCUntilAtEnd();
4079 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4080 assert(FormatTok->is(tok::less));
4081 // Unlike protocol lists, generic parameterizations support
4082 // nested angles:
4084 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4085 // NSObject <NSCopying, NSSecureCoding>
4087 // so we need to count how many open angles we have left.
4088 unsigned NumOpenAngles = 1;
4089 do {
4090 nextToken();
4091 // Early exit in case someone forgot a close angle.
4092 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4093 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4094 break;
4096 if (FormatTok->is(tok::less)) {
4097 ++NumOpenAngles;
4098 } else if (FormatTok->is(tok::greater)) {
4099 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4100 --NumOpenAngles;
4102 } while (!eof() && NumOpenAngles != 0);
4103 nextToken(); // Skip '>'.
4106 // Returns true for the declaration/definition form of @protocol,
4107 // false for the expression form.
4108 bool UnwrappedLineParser::parseObjCProtocol() {
4109 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4110 nextToken();
4112 if (FormatTok->is(tok::l_paren)) {
4113 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4114 return false;
4117 // The definition/declaration form,
4118 // @protocol Foo
4119 // - (int)someMethod;
4120 // @end
4122 nextToken(); // protocol name
4124 if (FormatTok->is(tok::less))
4125 parseObjCProtocolList();
4127 // Check for protocol declaration.
4128 if (FormatTok->is(tok::semi)) {
4129 nextToken();
4130 addUnwrappedLine();
4131 return true;
4134 addUnwrappedLine();
4135 parseObjCUntilAtEnd();
4136 return true;
4139 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4140 bool IsImport = FormatTok->is(Keywords.kw_import);
4141 assert(IsImport || FormatTok->is(tok::kw_export));
4142 nextToken();
4144 // Consume the "default" in "export default class/function".
4145 if (FormatTok->is(tok::kw_default))
4146 nextToken();
4148 // Consume "async function", "function" and "default function", so that these
4149 // get parsed as free-standing JS functions, i.e. do not require a trailing
4150 // semicolon.
4151 if (FormatTok->is(Keywords.kw_async))
4152 nextToken();
4153 if (FormatTok->is(Keywords.kw_function)) {
4154 nextToken();
4155 return;
4158 // For imports, `export *`, `export {...}`, consume the rest of the line up
4159 // to the terminating `;`. For everything else, just return and continue
4160 // parsing the structural element, i.e. the declaration or expression for
4161 // `export default`.
4162 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4163 !FormatTok->isStringLiteral() &&
4164 !(FormatTok->is(Keywords.kw_type) &&
4165 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4166 return;
4169 while (!eof()) {
4170 if (FormatTok->is(tok::semi))
4171 return;
4172 if (Line->Tokens.empty()) {
4173 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4174 // import statement should terminate.
4175 return;
4177 if (FormatTok->is(tok::l_brace)) {
4178 FormatTok->setBlockKind(BK_Block);
4179 nextToken();
4180 parseBracedList();
4181 } else {
4182 nextToken();
4187 void UnwrappedLineParser::parseStatementMacro() {
4188 nextToken();
4189 if (FormatTok->is(tok::l_paren))
4190 parseParens();
4191 if (FormatTok->is(tok::semi))
4192 nextToken();
4193 addUnwrappedLine();
4196 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4197 // consume things like a::`b.c[d:e] or a::*
4198 while (true) {
4199 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4200 tok::coloncolon, tok::hash) ||
4201 Keywords.isVerilogIdentifier(*FormatTok)) {
4202 nextToken();
4203 } else if (FormatTok->is(tok::l_square)) {
4204 parseSquare();
4205 } else {
4206 break;
4211 void UnwrappedLineParser::parseVerilogSensitivityList() {
4212 if (FormatTok->isNot(tok::at))
4213 return;
4214 nextToken();
4215 // A block event expression has 2 at signs.
4216 if (FormatTok->is(tok::at))
4217 nextToken();
4218 switch (FormatTok->Tok.getKind()) {
4219 case tok::star:
4220 nextToken();
4221 break;
4222 case tok::l_paren:
4223 parseParens();
4224 break;
4225 default:
4226 parseVerilogHierarchyIdentifier();
4227 break;
4231 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4232 unsigned AddLevels = 0;
4234 if (FormatTok->is(Keywords.kw_clocking)) {
4235 nextToken();
4236 if (Keywords.isVerilogIdentifier(*FormatTok))
4237 nextToken();
4238 parseVerilogSensitivityList();
4239 if (FormatTok->is(tok::semi))
4240 nextToken();
4241 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4242 Keywords.kw_casez, Keywords.kw_randcase,
4243 Keywords.kw_randsequence)) {
4244 if (Style.IndentCaseLabels)
4245 AddLevels++;
4246 nextToken();
4247 if (FormatTok->is(tok::l_paren)) {
4248 FormatTok->setFinalizedType(TT_ConditionLParen);
4249 parseParens();
4251 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4252 nextToken();
4253 // The case header has no semicolon.
4254 } else {
4255 // "module" etc.
4256 nextToken();
4257 // all the words like the name of the module and specifiers like
4258 // "automatic" and the width of function return type
4259 while (true) {
4260 if (FormatTok->is(tok::l_square)) {
4261 auto Prev = FormatTok->getPreviousNonComment();
4262 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4263 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4264 parseSquare();
4265 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4266 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4267 nextToken();
4268 } else {
4269 break;
4273 auto NewLine = [this]() {
4274 addUnwrappedLine();
4275 Line->IsContinuation = true;
4278 // package imports
4279 while (FormatTok->is(Keywords.kw_import)) {
4280 NewLine();
4281 nextToken();
4282 parseVerilogHierarchyIdentifier();
4283 if (FormatTok->is(tok::semi))
4284 nextToken();
4287 // parameters and ports
4288 if (FormatTok->is(Keywords.kw_verilogHash)) {
4289 NewLine();
4290 nextToken();
4291 if (FormatTok->is(tok::l_paren)) {
4292 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4293 parseParens();
4296 if (FormatTok->is(tok::l_paren)) {
4297 NewLine();
4298 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4299 parseParens();
4302 // extends and implements
4303 if (FormatTok->is(Keywords.kw_extends)) {
4304 NewLine();
4305 nextToken();
4306 parseVerilogHierarchyIdentifier();
4307 if (FormatTok->is(tok::l_paren))
4308 parseParens();
4310 if (FormatTok->is(Keywords.kw_implements)) {
4311 NewLine();
4312 do {
4313 nextToken();
4314 parseVerilogHierarchyIdentifier();
4315 } while (FormatTok->is(tok::comma));
4318 // Coverage event for cover groups.
4319 if (FormatTok->is(tok::at)) {
4320 NewLine();
4321 parseVerilogSensitivityList();
4324 if (FormatTok->is(tok::semi))
4325 nextToken(/*LevelDifference=*/1);
4326 addUnwrappedLine();
4329 return AddLevels;
4332 void UnwrappedLineParser::parseVerilogTable() {
4333 assert(FormatTok->is(Keywords.kw_table));
4334 nextToken(/*LevelDifference=*/1);
4335 addUnwrappedLine();
4337 auto InitialLevel = Line->Level++;
4338 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4339 FormatToken *Tok = FormatTok;
4340 nextToken();
4341 if (Tok->is(tok::semi))
4342 addUnwrappedLine();
4343 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4344 Tok->setFinalizedType(TT_VerilogTableItem);
4346 Line->Level = InitialLevel;
4347 nextToken(/*LevelDifference=*/-1);
4348 addUnwrappedLine();
4351 void UnwrappedLineParser::parseVerilogCaseLabel() {
4352 // The label will get unindented in AnnotatingParser. If there are no leading
4353 // spaces, indent the rest here so that things inside the block will be
4354 // indented relative to things outside. We don't use parseLabel because we
4355 // don't know whether this colon is a label or a ternary expression at this
4356 // point.
4357 auto OrigLevel = Line->Level;
4358 auto FirstLine = CurrentLines->size();
4359 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4360 ++Line->Level;
4361 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4362 --Line->Level;
4363 parseStructuralElement();
4364 // Restore the indentation in both the new line and the line that has the
4365 // label.
4366 if (CurrentLines->size() > FirstLine)
4367 (*CurrentLines)[FirstLine].Level = OrigLevel;
4368 Line->Level = OrigLevel;
4371 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4372 for (const auto &N : Line.Tokens) {
4373 if (N.Tok->MacroCtx)
4374 return true;
4375 for (const UnwrappedLine &Child : N.Children)
4376 if (containsExpansion(Child))
4377 return true;
4379 return false;
4382 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4383 if (Line->Tokens.empty())
4384 return;
4385 LLVM_DEBUG({
4386 if (!parsingPPDirective()) {
4387 llvm::dbgs() << "Adding unwrapped line:\n";
4388 printDebugInfo(*Line);
4392 // If this line closes a block when in Whitesmiths mode, remember that
4393 // information so that the level can be decreased after the line is added.
4394 // This has to happen after the addition of the line since the line itself
4395 // needs to be indented.
4396 bool ClosesWhitesmithsBlock =
4397 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4398 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4400 // If the current line was expanded from a macro call, we use it to
4401 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4402 // line and the unexpanded token stream.
4403 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4404 if (!Reconstruct)
4405 Reconstruct.emplace(Line->Level, Unexpanded);
4406 Reconstruct->addLine(*Line);
4408 // While the reconstructed unexpanded lines are stored in the normal
4409 // flow of lines, the expanded lines are stored on the side to be analyzed
4410 // in an extra step.
4411 CurrentExpandedLines.push_back(std::move(*Line));
4413 if (Reconstruct->finished()) {
4414 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4415 assert(!Reconstructed.Tokens.empty() &&
4416 "Reconstructed must at least contain the macro identifier.");
4417 assert(!parsingPPDirective());
4418 LLVM_DEBUG({
4419 llvm::dbgs() << "Adding unexpanded line:\n";
4420 printDebugInfo(Reconstructed);
4422 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4423 Lines.push_back(std::move(Reconstructed));
4424 CurrentExpandedLines.clear();
4425 Reconstruct.reset();
4427 } else {
4428 // At the top level we only get here when no unexpansion is going on, or
4429 // when conditional formatting led to unfinished macro reconstructions.
4430 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4431 CurrentLines->push_back(std::move(*Line));
4433 Line->Tokens.clear();
4434 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4435 Line->FirstStartColumn = 0;
4436 Line->IsContinuation = false;
4437 Line->SeenDecltypeAuto = false;
4439 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4440 --Line->Level;
4441 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4442 CurrentLines->append(
4443 std::make_move_iterator(PreprocessorDirectives.begin()),
4444 std::make_move_iterator(PreprocessorDirectives.end()));
4445 PreprocessorDirectives.clear();
4447 // Disconnect the current token from the last token on the previous line.
4448 FormatTok->Previous = nullptr;
4451 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4453 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4454 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4455 FormatTok.NewlinesBefore > 0;
4458 // Checks if \p FormatTok is a line comment that continues the line comment
4459 // section on \p Line.
4460 static bool
4461 continuesLineCommentSection(const FormatToken &FormatTok,
4462 const UnwrappedLine &Line,
4463 const llvm::Regex &CommentPragmasRegex) {
4464 if (Line.Tokens.empty())
4465 return false;
4467 StringRef IndentContent = FormatTok.TokenText;
4468 if (FormatTok.TokenText.startswith("//") ||
4469 FormatTok.TokenText.startswith("/*")) {
4470 IndentContent = FormatTok.TokenText.substr(2);
4472 if (CommentPragmasRegex.match(IndentContent))
4473 return false;
4475 // If Line starts with a line comment, then FormatTok continues the comment
4476 // section if its original column is greater or equal to the original start
4477 // column of the line.
4479 // Define the min column token of a line as follows: if a line ends in '{' or
4480 // contains a '{' followed by a line comment, then the min column token is
4481 // that '{'. Otherwise, the min column token of the line is the first token of
4482 // the line.
4484 // If Line starts with a token other than a line comment, then FormatTok
4485 // continues the comment section if its original column is greater than the
4486 // original start column of the min column token of the line.
4488 // For example, the second line comment continues the first in these cases:
4490 // // first line
4491 // // second line
4493 // and:
4495 // // first line
4496 // // second line
4498 // and:
4500 // int i; // first line
4501 // // second line
4503 // and:
4505 // do { // first line
4506 // // second line
4507 // int i;
4508 // } while (true);
4510 // and:
4512 // enum {
4513 // a, // first line
4514 // // second line
4515 // b
4516 // };
4518 // The second line comment doesn't continue the first in these cases:
4520 // // first line
4521 // // second line
4523 // and:
4525 // int i; // first line
4526 // // second line
4528 // and:
4530 // do { // first line
4531 // // second line
4532 // int i;
4533 // } while (true);
4535 // and:
4537 // enum {
4538 // a, // first line
4539 // // second line
4540 // };
4541 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4543 // Scan for '{//'. If found, use the column of '{' as a min column for line
4544 // comment section continuation.
4545 const FormatToken *PreviousToken = nullptr;
4546 for (const UnwrappedLineNode &Node : Line.Tokens) {
4547 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4548 isLineComment(*Node.Tok)) {
4549 MinColumnToken = PreviousToken;
4550 break;
4552 PreviousToken = Node.Tok;
4554 // Grab the last newline preceding a token in this unwrapped line.
4555 if (Node.Tok->NewlinesBefore > 0)
4556 MinColumnToken = Node.Tok;
4558 if (PreviousToken && PreviousToken->is(tok::l_brace))
4559 MinColumnToken = PreviousToken;
4561 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4562 MinColumnToken);
4565 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4566 bool JustComments = Line->Tokens.empty();
4567 for (FormatToken *Tok : CommentsBeforeNextToken) {
4568 // Line comments that belong to the same line comment section are put on the
4569 // same line since later we might want to reflow content between them.
4570 // Additional fine-grained breaking of line comment sections is controlled
4571 // by the class BreakableLineCommentSection in case it is desirable to keep
4572 // several line comment sections in the same unwrapped line.
4574 // FIXME: Consider putting separate line comment sections as children to the
4575 // unwrapped line instead.
4576 Tok->ContinuesLineCommentSection =
4577 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4578 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4579 addUnwrappedLine();
4580 pushToken(Tok);
4582 if (NewlineBeforeNext && JustComments)
4583 addUnwrappedLine();
4584 CommentsBeforeNextToken.clear();
4587 void UnwrappedLineParser::nextToken(int LevelDifference) {
4588 if (eof())
4589 return;
4590 flushComments(isOnNewLine(*FormatTok));
4591 pushToken(FormatTok);
4592 FormatToken *Previous = FormatTok;
4593 if (!Style.isJavaScript())
4594 readToken(LevelDifference);
4595 else
4596 readTokenWithJavaScriptASI();
4597 FormatTok->Previous = Previous;
4598 if (Style.isVerilog()) {
4599 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4600 // keywords like `begin`, we can't treat them the same as left braces
4601 // because some contexts require one of them. For example structs use
4602 // braces and if blocks use keywords, and a left brace can occur in an if
4603 // statement, but it is not a block. For keywords like `end`, we simply
4604 // treat them the same as right braces.
4605 if (Keywords.isVerilogEnd(*FormatTok))
4606 FormatTok->Tok.setKind(tok::r_brace);
4610 void UnwrappedLineParser::distributeComments(
4611 const SmallVectorImpl<FormatToken *> &Comments,
4612 const FormatToken *NextTok) {
4613 // Whether or not a line comment token continues a line is controlled by
4614 // the method continuesLineCommentSection, with the following caveat:
4616 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4617 // that each comment line from the trail is aligned with the next token, if
4618 // the next token exists. If a trail exists, the beginning of the maximal
4619 // trail is marked as a start of a new comment section.
4621 // For example in this code:
4623 // int a; // line about a
4624 // // line 1 about b
4625 // // line 2 about b
4626 // int b;
4628 // the two lines about b form a maximal trail, so there are two sections, the
4629 // first one consisting of the single comment "// line about a" and the
4630 // second one consisting of the next two comments.
4631 if (Comments.empty())
4632 return;
4633 bool ShouldPushCommentsInCurrentLine = true;
4634 bool HasTrailAlignedWithNextToken = false;
4635 unsigned StartOfTrailAlignedWithNextToken = 0;
4636 if (NextTok) {
4637 // We are skipping the first element intentionally.
4638 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4639 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4640 HasTrailAlignedWithNextToken = true;
4641 StartOfTrailAlignedWithNextToken = i;
4645 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4646 FormatToken *FormatTok = Comments[i];
4647 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4648 FormatTok->ContinuesLineCommentSection = false;
4649 } else {
4650 FormatTok->ContinuesLineCommentSection =
4651 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4653 if (!FormatTok->ContinuesLineCommentSection &&
4654 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4655 ShouldPushCommentsInCurrentLine = false;
4657 if (ShouldPushCommentsInCurrentLine)
4658 pushToken(FormatTok);
4659 else
4660 CommentsBeforeNextToken.push_back(FormatTok);
4664 void UnwrappedLineParser::readToken(int LevelDifference) {
4665 SmallVector<FormatToken *, 1> Comments;
4666 bool PreviousWasComment = false;
4667 bool FirstNonCommentOnLine = false;
4668 do {
4669 FormatTok = Tokens->getNextToken();
4670 assert(FormatTok);
4671 while (FormatTok->getType() == TT_ConflictStart ||
4672 FormatTok->getType() == TT_ConflictEnd ||
4673 FormatTok->getType() == TT_ConflictAlternative) {
4674 if (FormatTok->getType() == TT_ConflictStart)
4675 conditionalCompilationStart(/*Unreachable=*/false);
4676 else if (FormatTok->getType() == TT_ConflictAlternative)
4677 conditionalCompilationAlternative();
4678 else if (FormatTok->getType() == TT_ConflictEnd)
4679 conditionalCompilationEnd();
4680 FormatTok = Tokens->getNextToken();
4681 FormatTok->MustBreakBefore = true;
4684 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4685 const FormatToken &Tok,
4686 bool PreviousWasComment) {
4687 auto IsFirstOnLine = [](const FormatToken &Tok) {
4688 return Tok.HasUnescapedNewline || Tok.IsFirst;
4691 // Consider preprocessor directives preceded by block comments as first
4692 // on line.
4693 if (PreviousWasComment)
4694 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4695 return IsFirstOnLine(Tok);
4698 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4699 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4700 PreviousWasComment = FormatTok->is(tok::comment);
4702 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4703 (!Style.isVerilog() ||
4704 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4705 FirstNonCommentOnLine) {
4706 distributeComments(Comments, FormatTok);
4707 Comments.clear();
4708 // If there is an unfinished unwrapped line, we flush the preprocessor
4709 // directives only after that unwrapped line was finished later.
4710 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4711 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4712 assert((LevelDifference >= 0 ||
4713 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4714 "LevelDifference makes Line->Level negative");
4715 Line->Level += LevelDifference;
4716 // Comments stored before the preprocessor directive need to be output
4717 // before the preprocessor directive, at the same level as the
4718 // preprocessor directive, as we consider them to apply to the directive.
4719 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4720 PPBranchLevel > 0) {
4721 Line->Level += PPBranchLevel;
4723 flushComments(isOnNewLine(*FormatTok));
4724 parsePPDirective();
4725 PreviousWasComment = FormatTok->is(tok::comment);
4726 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4727 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4730 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4731 !Line->InPPDirective) {
4732 continue;
4735 if (FormatTok->is(tok::identifier) &&
4736 Macros.defined(FormatTok->TokenText) &&
4737 // FIXME: Allow expanding macros in preprocessor directives.
4738 !Line->InPPDirective) {
4739 FormatToken *ID = FormatTok;
4740 unsigned Position = Tokens->getPosition();
4742 // To correctly parse the code, we need to replace the tokens of the macro
4743 // call with its expansion.
4744 auto PreCall = std::move(Line);
4745 Line.reset(new UnwrappedLine);
4746 bool OldInExpansion = InExpansion;
4747 InExpansion = true;
4748 // We parse the macro call into a new line.
4749 auto Args = parseMacroCall();
4750 InExpansion = OldInExpansion;
4751 assert(Line->Tokens.front().Tok == ID);
4752 // And remember the unexpanded macro call tokens.
4753 auto UnexpandedLine = std::move(Line);
4754 // Reset to the old line.
4755 Line = std::move(PreCall);
4757 LLVM_DEBUG({
4758 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4759 if (Args) {
4760 llvm::dbgs() << "(";
4761 for (const auto &Arg : Args.value())
4762 for (const auto &T : Arg)
4763 llvm::dbgs() << T->TokenText << " ";
4764 llvm::dbgs() << ")";
4766 llvm::dbgs() << "\n";
4768 if (Macros.objectLike(ID->TokenText) && Args &&
4769 !Macros.hasArity(ID->TokenText, Args->size())) {
4770 // The macro is either
4771 // - object-like, but we got argumnets, or
4772 // - overloaded to be both object-like and function-like, but none of
4773 // the function-like arities match the number of arguments.
4774 // Thus, expand as object-like macro.
4775 LLVM_DEBUG(llvm::dbgs()
4776 << "Macro \"" << ID->TokenText
4777 << "\" not overloaded for arity " << Args->size()
4778 << "or not function-like, using object-like overload.");
4779 Args.reset();
4780 UnexpandedLine->Tokens.resize(1);
4781 Tokens->setPosition(Position);
4782 nextToken();
4783 assert(!Args && Macros.objectLike(ID->TokenText));
4785 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4786 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4787 // Next, we insert the expanded tokens in the token stream at the
4788 // current position, and continue parsing.
4789 Unexpanded[ID] = std::move(UnexpandedLine);
4790 SmallVector<FormatToken *, 8> Expansion =
4791 Macros.expand(ID, std::move(Args));
4792 if (!Expansion.empty())
4793 FormatTok = Tokens->insertTokens(Expansion);
4795 LLVM_DEBUG({
4796 llvm::dbgs() << "Expanded: ";
4797 for (const auto &T : Expansion)
4798 llvm::dbgs() << T->TokenText << " ";
4799 llvm::dbgs() << "\n";
4801 } else {
4802 LLVM_DEBUG({
4803 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4804 << "\", because it was used ";
4805 if (Args)
4806 llvm::dbgs() << "with " << Args->size();
4807 else
4808 llvm::dbgs() << "without";
4809 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4811 Tokens->setPosition(Position);
4812 FormatTok = ID;
4816 if (FormatTok->isNot(tok::comment)) {
4817 distributeComments(Comments, FormatTok);
4818 Comments.clear();
4819 return;
4822 Comments.push_back(FormatTok);
4823 } while (!eof());
4825 distributeComments(Comments, nullptr);
4826 Comments.clear();
4829 namespace {
4830 template <typename Iterator>
4831 void pushTokens(Iterator Begin, Iterator End,
4832 llvm::SmallVectorImpl<FormatToken *> &Into) {
4833 for (auto I = Begin; I != End; ++I) {
4834 Into.push_back(I->Tok);
4835 for (const auto &Child : I->Children)
4836 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4839 } // namespace
4841 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4842 UnwrappedLineParser::parseMacroCall() {
4843 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4844 assert(Line->Tokens.empty());
4845 nextToken();
4846 if (FormatTok->isNot(tok::l_paren))
4847 return Args;
4848 unsigned Position = Tokens->getPosition();
4849 FormatToken *Tok = FormatTok;
4850 nextToken();
4851 Args.emplace();
4852 auto ArgStart = std::prev(Line->Tokens.end());
4854 int Parens = 0;
4855 do {
4856 switch (FormatTok->Tok.getKind()) {
4857 case tok::l_paren:
4858 ++Parens;
4859 nextToken();
4860 break;
4861 case tok::r_paren: {
4862 if (Parens > 0) {
4863 --Parens;
4864 nextToken();
4865 break;
4867 Args->push_back({});
4868 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4869 nextToken();
4870 return Args;
4872 case tok::comma: {
4873 if (Parens > 0) {
4874 nextToken();
4875 break;
4877 Args->push_back({});
4878 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4879 nextToken();
4880 ArgStart = std::prev(Line->Tokens.end());
4881 break;
4883 default:
4884 nextToken();
4885 break;
4887 } while (!eof());
4888 Line->Tokens.resize(1);
4889 Tokens->setPosition(Position);
4890 FormatTok = Tok;
4891 return {};
4894 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4895 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4896 if (MustBreakBeforeNextToken) {
4897 Line->Tokens.back().Tok->MustBreakBefore = true;
4898 MustBreakBeforeNextToken = false;
4902 } // end namespace format
4903 } // end namespace clang