clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains the implementation of the UnwrappedLineParser,
  11 /// which turns a stream of tokens into UnwrappedLines.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "UnwrappedLineParser.h"
  16 #include "FormatToken.h"
  17 #include "FormatTokenLexer.h"
  18 #include "FormatTokenSource.h"
  19 #include "Macros.h"
  20 #include "TokenAnnotator.h"
  21 #include "clang/Basic/TokenKinds.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/raw_os_ostream.h"
  26 #include "llvm/Support/raw_ostream.h"
  27
  28 #include <algorithm>
  29 #include <utility>
  30
  31 #define DEBUG_TYPE "format-parser"
  32
  33 namespace clang {
  34 namespace format {
  35
  36 namespace {
  37
  38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
  39                StringRef Prefix = "", bool PrintText = false) {
  40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
  41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
  42   bool NewLine = false;
  43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  44                                                     E = Line.Tokens.end();
  45        I != E; ++I) {
  46     if (NewLine) {
  47       OS << Prefix;
  48       NewLine = false;
  49     }
  50     OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
  51        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
  52        << "\"] ";
  53     for (SmallVectorImpl<UnwrappedLine>::const_iterator
  54              CI = I->Children.begin(),
  55              CE = I->Children.end();
  56          CI != CE; ++CI) {
  57       OS << "\n";
  58       printLine(OS, *CI, (Prefix + "  ").str());
  59       NewLine = true;
  60     }
  61   }
  62   if (!NewLine)
  63     OS << "\n";
  64 }
  65
  66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
  67   printLine(llvm::dbgs(), Line);
  68 }
  69
  70 class ScopedDeclarationState {
  71 public:
  72   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
  73                          bool MustBeDeclaration)
  74       : Line(Line), Stack(Stack) {
  75     Line.MustBeDeclaration = MustBeDeclaration;
  76     Stack.push_back(MustBeDeclaration);
  77   }
  78   ~ScopedDeclarationState() {
  79     Stack.pop_back();
  80     if (!Stack.empty())
  81       Line.MustBeDeclaration = Stack.back();
  82     else
  83       Line.MustBeDeclaration = true;
  84   }
  85
  86 private:
  87   UnwrappedLine &Line;
  88   llvm::BitVector &Stack;
  89 };
  90
  91 } // end anonymous namespace
  92
  93 class ScopedLineState {
  94 public:
  95   ScopedLineState(UnwrappedLineParser &Parser,
  96                   bool SwitchToPreprocessorLines = false)
  97       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  98     if (SwitchToPreprocessorLines)
  99       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 100     else if (!Parser.Line->Tokens.empty())
 101       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 102     PreBlockLine = std::move(Parser.Line);
 103     Parser.Line = std::make_unique<UnwrappedLine>();
 104     Parser.Line->Level = PreBlockLine->Level;
 105     Parser.Line->PPLevel = PreBlockLine->PPLevel;
 106     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 107     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
 108   }
 109
 110   ~ScopedLineState() {
 111     if (!Parser.Line->Tokens.empty())
 112       Parser.addUnwrappedLine();
 113     assert(Parser.Line->Tokens.empty());
 114     Parser.Line = std::move(PreBlockLine);
 115     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 116       Parser.MustBreakBeforeNextToken = true;
 117     Parser.CurrentLines = OriginalLines;
 118   }
 119
 120 private:
 121   UnwrappedLineParser &Parser;
 122
 123   std::unique_ptr<UnwrappedLine> PreBlockLine;
 124   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 125 };
 126
 127 class CompoundStatementIndenter {
 128 public:
 129   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 130                             const FormatStyle &Style, unsigned &LineLevel)
 131       : CompoundStatementIndenter(Parser, LineLevel,
 132                                   Style.BraceWrapping.AfterControlStatement,
 133                                   Style.BraceWrapping.IndentBraces) {}
 134   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
 135                             bool WrapBrace, bool IndentBrace)
 136       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 137     if (WrapBrace)
 138       Parser->addUnwrappedLine();
 139     if (IndentBrace)
 140       ++LineLevel;
 141   }
 142   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 143
 144 private:
 145   unsigned &LineLevel;
 146   unsigned OldLineLevel;
 147 };
 148
 149 UnwrappedLineParser::UnwrappedLineParser(
 150     SourceManager &SourceMgr, const FormatStyle &Style,
 151     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
 152     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
 153     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
 154     IdentifierTable &IdentTable)
 155     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 156       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 157       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 158       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 159       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 160                        ? IG_Rejected
 161                        : IG_Inited),
 162       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
 163       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
 164
 165 void UnwrappedLineParser::reset() {
 166   PPBranchLevel = -1;
 167   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 168                      ? IG_Rejected
 169                      : IG_Inited;
 170   IncludeGuardToken = nullptr;
 171   Line.reset(new UnwrappedLine);
 172   CommentsBeforeNextToken.clear();
 173   FormatTok = nullptr;
 174   MustBreakBeforeNextToken = false;
 175   IsDecltypeAutoFunction = false;
 176   PreprocessorDirectives.clear();
 177   CurrentLines = &Lines;
 178   DeclarationScopeStack.clear();
 179   NestedTooDeep.clear();
 180   NestedLambdas.clear();
 181   PPStack.clear();
 182   Line->FirstStartColumn = FirstStartColumn;
 183
 184   if (!Unexpanded.empty())
 185     for (FormatToken *Token : AllTokens)
 186       Token->MacroCtx.reset();
 187   CurrentExpandedLines.clear();
 188   ExpandedLines.clear();
 189   Unexpanded.clear();
 190   InExpansion = false;
 191   Reconstruct.reset();
 192 }
 193
 194 void UnwrappedLineParser::parse() {
 195   IndexedTokenSource TokenSource(AllTokens);
 196   Line->FirstStartColumn = FirstStartColumn;
 197   do {
 198     LLVM_DEBUG(llvm::dbgs() << "----\n");
 199     reset();
 200     Tokens = &TokenSource;
 201     TokenSource.reset();
 202
 203     readToken();
 204     parseFile();
 205
 206     // If we found an include guard then all preprocessor directives (other than
 207     // the guard) are over-indented by one.
 208     if (IncludeGuard == IG_Found) {
 209       for (auto &Line : Lines)
 210         if (Line.InPPDirective && Line.Level > 0)
 211           --Line.Level;
 212     }
 213
 214     // Create line with eof token.
 215     assert(eof());
 216     pushToken(FormatTok);
 217     addUnwrappedLine();
 218
 219     // In a first run, format everything with the lines containing macro calls
 220     // replaced by the expansion.
 221     if (!ExpandedLines.empty()) {
 222       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
 223       for (const auto &Line : Lines) {
 224         if (!Line.Tokens.empty()) {
 225           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
 226           if (it != ExpandedLines.end()) {
 227             for (const auto &Expanded : it->second) {
 228               LLVM_DEBUG(printDebugInfo(Expanded));
 229               Callback.consumeUnwrappedLine(Expanded);
 230             }
 231             continue;
 232           }
 233         }
 234         LLVM_DEBUG(printDebugInfo(Line));
 235         Callback.consumeUnwrappedLine(Line);
 236       }
 237       Callback.finishRun();
 238     }
 239
 240     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
 241     for (const UnwrappedLine &Line : Lines) {
 242       LLVM_DEBUG(printDebugInfo(Line));
 243       Callback.consumeUnwrappedLine(Line);
 244     }
 245     Callback.finishRun();
 246     Lines.clear();
 247     while (!PPLevelBranchIndex.empty() &&
 248            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 249       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 250       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 251     }
 252     if (!PPLevelBranchIndex.empty()) {
 253       ++PPLevelBranchIndex.back();
 254       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 255       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 256     }
 257   } while (!PPLevelBranchIndex.empty());
 258 }
 259
 260 void UnwrappedLineParser::parseFile() {
 261   // The top-level context in a file always has declarations, except for pre-
 262   // processor directives and JavaScript files.
 263   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
 264   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 265                                           MustBeDeclaration);
 266   if (Style.Language == FormatStyle::LK_TextProto)
 267     parseBracedList();
 268   else
 269     parseLevel();
 270   // Make sure to format the remaining tokens.
 271   //
 272   // LK_TextProto is special since its top-level is parsed as the body of a
 273   // braced list, which does not necessarily have natural line separators such
 274   // as a semicolon. Comments after the last entry that have been determined to
 275   // not belong to that line, as in:
 276   //   key: value
 277   //   // endfile comment
 278   // do not have a chance to be put on a line of their own until this point.
 279   // Here we add this newline before end-of-file comments.
 280   if (Style.Language == FormatStyle::LK_TextProto &&
 281       !CommentsBeforeNextToken.empty()) {
 282     addUnwrappedLine();
 283   }
 284   flushComments(true);
 285   addUnwrappedLine();
 286 }
 287
 288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
 289   do {
 290     switch (FormatTok->Tok.getKind()) {
 291     case tok::l_brace:
 292       return;
 293     default:
 294       if (FormatTok->is(Keywords.kw_where)) {
 295         addUnwrappedLine();
 296         nextToken();
 297         parseCSharpGenericTypeConstraint();
 298         break;
 299       }
 300       nextToken();
 301       break;
 302     }
 303   } while (!eof());
 304 }
 305
 306 void UnwrappedLineParser::parseCSharpAttribute() {
 307   int UnpairedSquareBrackets = 1;
 308   do {
 309     switch (FormatTok->Tok.getKind()) {
 310     case tok::r_square:
 311       nextToken();
 312       --UnpairedSquareBrackets;
 313       if (UnpairedSquareBrackets == 0) {
 314         addUnwrappedLine();
 315         return;
 316       }
 317       break;
 318     case tok::l_square:
 319       ++UnpairedSquareBrackets;
 320       nextToken();
 321       break;
 322     default:
 323       nextToken();
 324       break;
 325     }
 326   } while (!eof());
 327 }
 328
 329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
 330   if (!Lines.empty() && Lines.back().InPPDirective)
 331     return true;
 332
 333   const FormatToken *Previous = Tokens->getPreviousToken();
 334   return Previous && Previous->is(tok::comment) &&
 335          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
 336 }
 337
 338 /// \brief Parses a level, that is ???.
 339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
 340 /// \param IfKind The \p if statement kind in the level.
 341 /// \param IfLeftBrace The left brace of the \p if block in the level.
 342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
 343 /// (A simple block has a single statement.)
 344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
 345                                      IfStmtKind *IfKind,
 346                                      FormatToken **IfLeftBrace) {
 347   const bool InRequiresExpression =
 348       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
 349   const bool IsPrecededByCommentOrPPDirective =
 350       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
 351   FormatToken *IfLBrace = nullptr;
 352   bool HasDoWhile = false;
 353   bool HasLabel = false;
 354   unsigned StatementCount = 0;
 355   bool SwitchLabelEncountered = false;
 356
 357   do {
 358     if (FormatTok->isAttribute()) {
 359       nextToken();
 360       continue;
 361     }
 362     tok::TokenKind kind = FormatTok->Tok.getKind();
 363     if (FormatTok->getType() == TT_MacroBlockBegin)
 364       kind = tok::l_brace;
 365     else if (FormatTok->getType() == TT_MacroBlockEnd)
 366       kind = tok::r_brace;
 367
 368     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
 369                          &HasLabel, &StatementCount] {
 370       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
 371                              HasDoWhile ? nullptr : &HasDoWhile,
 372                              HasLabel ? nullptr : &HasLabel);
 373       ++StatementCount;
 374       assert(StatementCount > 0 && "StatementCount overflow!");
 375     };
 376
 377     switch (kind) {
 378     case tok::comment:
 379       nextToken();
 380       addUnwrappedLine();
 381       break;
 382     case tok::l_brace:
 383       if (InRequiresExpression) {
 384         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
 385       } else if (FormatTok->Previous &&
 386                  FormatTok->Previous->ClosesRequiresClause) {
 387         // We need the 'default' case here to correctly parse a function
 388         // l_brace.
 389         ParseDefault();
 390         continue;
 391       }
 392       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
 393           tryToParseBracedList()) {
 394         continue;
 395       }
 396       parseBlock();
 397       ++StatementCount;
 398       assert(StatementCount > 0 && "StatementCount overflow!");
 399       addUnwrappedLine();
 400       break;
 401     case tok::r_brace:
 402       if (OpeningBrace) {
 403         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
 404             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
 405           return false;
 406         }
 407         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
 408             HasDoWhile || IsPrecededByCommentOrPPDirective ||
 409             precededByCommentOrPPDirective()) {
 410           return false;
 411         }
 412         const FormatToken *Next = Tokens->peekNextToken();
 413         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
 414           return false;
 415         if (IfLeftBrace)
 416           *IfLeftBrace = IfLBrace;
 417         return true;
 418       }
 419       nextToken();
 420       addUnwrappedLine();
 421       break;
 422     case tok::kw_default: {
 423       unsigned StoredPosition = Tokens->getPosition();
 424       FormatToken *Next;
 425       do {
 426         Next = Tokens->getNextToken();
 427         assert(Next);
 428       } while (Next->is(tok::comment));
 429       FormatTok = Tokens->setPosition(StoredPosition);
 430       if (Next->isNot(tok::colon)) {
 431         // default not followed by ':' is not a case label; treat it like
 432         // an identifier.
 433         parseStructuralElement();
 434         break;
 435       }
 436       // Else, if it is 'default:', fall through to the case handling.
 437       [[fallthrough]];
 438     }
 439     case tok::kw_case:
 440       if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
 441           (Style.isJavaScript() && Line->MustBeDeclaration)) {
 442         // Proto: there are no switch/case statements
 443         // Verilog: Case labels don't have this word. We handle case
 444         // labels including default in TokenAnnotator.
 445         // JavaScript: A 'case: string' style field declaration.
 446         ParseDefault();
 447         break;
 448       }
 449       if (!SwitchLabelEncountered &&
 450           (Style.IndentCaseLabels ||
 451            (Line->InPPDirective && Line->Level == 1))) {
 452         ++Line->Level;
 453       }
 454       SwitchLabelEncountered = true;
 455       parseStructuralElement();
 456       break;
 457     case tok::l_square:
 458       if (Style.isCSharp()) {
 459         nextToken();
 460         parseCSharpAttribute();
 461         break;
 462       }
 463       if (handleCppAttributes())
 464         break;
 465       [[fallthrough]];
 466     default:
 467       ParseDefault();
 468       break;
 469     }
 470   } while (!eof());
 471
 472   return false;
 473 }
 474
 475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 476   // We'll parse forward through the tokens until we hit
 477   // a closing brace or eof - note that getNextToken() will
 478   // parse macros, so this will magically work inside macro
 479   // definitions, too.
 480   unsigned StoredPosition = Tokens->getPosition();
 481   FormatToken *Tok = FormatTok;
 482   const FormatToken *PrevTok = Tok->Previous;
 483   // Keep a stack of positions of lbrace tokens. We will
 484   // update information about whether an lbrace starts a
 485   // braced init list or a different block during the loop.
 486   struct StackEntry {
 487     FormatToken *Tok;
 488     const FormatToken *PrevTok;
 489   };
 490   SmallVector<StackEntry, 8> LBraceStack;
 491   assert(Tok->is(tok::l_brace));
 492   do {
 493     // Get next non-comment, non-preprocessor token.
 494     FormatToken *NextTok;
 495     do {
 496       NextTok = Tokens->getNextToken();
 497     } while (NextTok->is(tok::comment));
 498     while (NextTok->is(tok::hash) && !Line->InMacroBody) {
 499       NextTok = Tokens->getNextToken();
 500       do {
 501         NextTok = Tokens->getNextToken();
 502       } while (NextTok->is(tok::comment) ||
 503                (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
 504     }
 505
 506     switch (Tok->Tok.getKind()) {
 507     case tok::l_brace:
 508       if (Style.isJavaScript() && PrevTok) {
 509         if (PrevTok->isOneOf(tok::colon, tok::less)) {
 510           // A ':' indicates this code is in a type, or a braced list
 511           // following a label in an object literal ({a: {b: 1}}).
 512           // A '<' could be an object used in a comparison, but that is nonsense
 513           // code (can never return true), so more likely it is a generic type
 514           // argument (`X<{a: string; b: number}>`).
 515           // The code below could be confused by semicolons between the
 516           // individual members in a type member list, which would normally
 517           // trigger BK_Block. In both cases, this must be parsed as an inline
 518           // braced init.
 519           Tok->setBlockKind(BK_BracedInit);
 520         } else if (PrevTok->is(tok::r_paren)) {
 521           // `) { }` can only occur in function or method declarations in JS.
 522           Tok->setBlockKind(BK_Block);
 523         }
 524       } else {
 525         Tok->setBlockKind(BK_Unknown);
 526       }
 527       LBraceStack.push_back({Tok, PrevTok});
 528       break;
 529     case tok::r_brace:
 530       if (LBraceStack.empty())
 531         break;
 532       if (LBraceStack.back().Tok->is(BK_Unknown)) {
 533         bool ProbablyBracedList = false;
 534         if (Style.Language == FormatStyle::LK_Proto) {
 535           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 536         } else {
 537           // Skip NextTok over preprocessor lines, otherwise we may not
 538           // properly diagnose the block as a braced intializer
 539           // if the comma separator appears after the pp directive.
 540           while (NextTok->is(tok::hash)) {
 541             ScopedMacroState MacroState(*Line, Tokens, NextTok);
 542             do {
 543               NextTok = Tokens->getNextToken();
 544             } while (NextTok->isNot(tok::eof));
 545           }
 546
 547           // Using OriginalColumn to distinguish between ObjC methods and
 548           // binary operators is a bit hacky.
 549           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 550                                   NextTok->OriginalColumn == 0;
 551
 552           // Try to detect a braced list. Note that regardless how we mark inner
 553           // braces here, we will overwrite the BlockKind later if we parse a
 554           // braced list (where all blocks inside are by default braced lists),
 555           // or when we explicitly detect blocks (for example while parsing
 556           // lambdas).
 557
 558           // If we already marked the opening brace as braced list, the closing
 559           // must also be part of it.
 560           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
 561
 562           ProbablyBracedList = ProbablyBracedList ||
 563                                (Style.isJavaScript() &&
 564                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 565                                                  Keywords.kw_as));
 566           ProbablyBracedList = ProbablyBracedList ||
 567                                (Style.isCpp() && NextTok->is(tok::l_paren));
 568
 569           // If there is a comma, semicolon or right paren after the closing
 570           // brace, we assume this is a braced initializer list.
 571           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 572           // braced list in JS.
 573           ProbablyBracedList =
 574               ProbablyBracedList ||
 575               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 576                                tok::r_paren, tok::r_square, tok::ellipsis);
 577
 578           // Distinguish between braced list in a constructor initializer list
 579           // followed by constructor body, or just adjacent blocks.
 580           ProbablyBracedList =
 581               ProbablyBracedList ||
 582               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
 583                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
 584                                                    tok::greater));
 585
 586           ProbablyBracedList =
 587               ProbablyBracedList ||
 588               (NextTok->is(tok::identifier) &&
 589                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
 590
 591           ProbablyBracedList = ProbablyBracedList ||
 592                                (NextTok->is(tok::semi) &&
 593                                 (!ExpectClassBody || LBraceStack.size() != 1));
 594
 595           ProbablyBracedList =
 596               ProbablyBracedList ||
 597               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 598
 599           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
 600             // We can have an array subscript after a braced init
 601             // list, but C++11 attributes are expected after blocks.
 602             NextTok = Tokens->getNextToken();
 603             ProbablyBracedList = NextTok->isNot(tok::l_square);
 604           }
 605         }
 606         if (ProbablyBracedList) {
 607           Tok->setBlockKind(BK_BracedInit);
 608           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
 609         } else {
 610           Tok->setBlockKind(BK_Block);
 611           LBraceStack.back().Tok->setBlockKind(BK_Block);
 612         }
 613       }
 614       LBraceStack.pop_back();
 615       break;
 616     case tok::identifier:
 617       if (Tok->isNot(TT_StatementMacro))
 618         break;
 619       [[fallthrough]];
 620     case tok::at:
 621     case tok::semi:
 622     case tok::kw_if:
 623     case tok::kw_while:
 624     case tok::kw_for:
 625     case tok::kw_switch:
 626     case tok::kw_try:
 627     case tok::kw___try:
 628       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
 629         LBraceStack.back().Tok->setBlockKind(BK_Block);
 630       break;
 631     default:
 632       break;
 633     }
 634     PrevTok = Tok;
 635     Tok = NextTok;
 636   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
 637
 638   // Assume other blocks for all unclosed opening braces.
 639   for (const auto &Entry : LBraceStack)
 640     if (Entry.Tok->is(BK_Unknown))
 641       Entry.Tok->setBlockKind(BK_Block);
 642
 643   FormatTok = Tokens->setPosition(StoredPosition);
 644 }
 645
 646 // Sets the token type of the directly previous right brace.
 647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
 648   if (auto Prev = FormatTok->getPreviousNonComment();
 649       Prev && Prev->is(tok::r_brace)) {
 650     Prev->setFinalizedType(Type);
 651   }
 652 }
 653
 654 template <class T>
 655 static inline void hash_combine(std::size_t &seed, const T &v) {
 656   std::hash<T> hasher;
 657   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 658 }
 659
 660 size_t UnwrappedLineParser::computePPHash() const {
 661   size_t h = 0;
 662   for (const auto &i : PPStack) {
 663     hash_combine(h, size_t(i.Kind));
 664     hash_combine(h, i.Line);
 665   }
 666   return h;
 667 }
 668
 669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
 670 // is not null, subtracts its length (plus the preceding space) when computing
 671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
 672 // running the token annotator on it so that we can restore them afterward.
 673 bool UnwrappedLineParser::mightFitOnOneLine(
 674     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
 675   const auto ColumnLimit = Style.ColumnLimit;
 676   if (ColumnLimit == 0)
 677     return true;
 678
 679   auto &Tokens = ParsedLine.Tokens;
 680   assert(!Tokens.empty());
 681
 682   const auto *LastToken = Tokens.back().Tok;
 683   assert(LastToken);
 684
 685   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
 686
 687   int Index = 0;
 688   for (const auto &Token : Tokens) {
 689     assert(Token.Tok);
 690     auto &SavedToken = SavedTokens[Index++];
 691     SavedToken.Tok = new FormatToken;
 692     SavedToken.Tok->copyFrom(*Token.Tok);
 693     SavedToken.Children = std::move(Token.Children);
 694   }
 695
 696   AnnotatedLine Line(ParsedLine);
 697   assert(Line.Last == LastToken);
 698
 699   TokenAnnotator Annotator(Style, Keywords);
 700   Annotator.annotate(Line);
 701   Annotator.calculateFormattingInformation(Line);
 702
 703   auto Length = LastToken->TotalLength;
 704   if (OpeningBrace) {
 705     assert(OpeningBrace != Tokens.front().Tok);
 706     if (auto Prev = OpeningBrace->Previous;
 707         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
 708       Length -= ColumnLimit;
 709     }
 710     Length -= OpeningBrace->TokenText.size() + 1;
 711   }
 712
 713   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
 714     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
 715     Length -= FirstToken->TokenText.size() + 1;
 716   }
 717
 718   Index = 0;
 719   for (auto &Token : Tokens) {
 720     const auto &SavedToken = SavedTokens[Index++];
 721     Token.Tok->copyFrom(*SavedToken.Tok);
 722     Token.Children = std::move(SavedToken.Children);
 723     delete SavedToken.Tok;
 724   }
 725
 726   // If these change PPLevel needs to be used for get correct indentation.
 727   assert(!Line.InMacroBody);
 728   assert(!Line.InPPDirective);
 729   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
 730 }
 731
 732 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
 733                                              unsigned AddLevels, bool MunchSemi,
 734                                              bool KeepBraces,
 735                                              IfStmtKind *IfKind,
 736                                              bool UnindentWhitesmithsBraces) {
 737   auto HandleVerilogBlockLabel = [this]() {
 738     // ":" name
 739     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
 740       nextToken();
 741       if (Keywords.isVerilogIdentifier(*FormatTok))
 742         nextToken();
 743     }
 744   };
 745
 746   // Whether this is a Verilog-specific block that has a special header like a
 747   // module.
 748   const bool VerilogHierarchy =
 749       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
 750   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
 751           (Style.isVerilog() &&
 752            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
 753          "'{' or macro block token expected");
 754   FormatToken *Tok = FormatTok;
 755   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
 756   auto Index = CurrentLines->size();
 757   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 758   FormatTok->setBlockKind(BK_Block);
 759
 760   // For Whitesmiths mode, jump to the next level prior to skipping over the
 761   // braces.
 762   if (!VerilogHierarchy && AddLevels > 0 &&
 763       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
 764     ++Line->Level;
 765   }
 766
 767   size_t PPStartHash = computePPHash();
 768
 769   const unsigned InitialLevel = Line->Level;
 770   if (VerilogHierarchy) {
 771     AddLevels += parseVerilogHierarchyHeader();
 772   } else {
 773     nextToken(/*LevelDifference=*/AddLevels);
 774     HandleVerilogBlockLabel();
 775   }
 776
 777   // Bail out if there are too many levels. Otherwise, the stack might overflow.
 778   if (Line->Level > 300)
 779     return nullptr;
 780
 781   if (MacroBlock && FormatTok->is(tok::l_paren))
 782     parseParens();
 783
 784   size_t NbPreprocessorDirectives =
 785       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
 786   addUnwrappedLine();
 787   size_t OpeningLineIndex =
 788       CurrentLines->empty()
 789           ? (UnwrappedLine::kInvalidIndex)
 790           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 791
 792   // Whitesmiths is weird here. The brace needs to be indented for the namespace
 793   // block, but the block itself may not be indented depending on the style
 794   // settings. This allows the format to back up one level in those cases.
 795   if (UnindentWhitesmithsBraces)
 796     --Line->Level;
 797
 798   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 799                                           MustBeDeclaration);
 800   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
 801     Line->Level += AddLevels;
 802
 803   FormatToken *IfLBrace = nullptr;
 804   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
 805
 806   if (eof())
 807     return IfLBrace;
 808
 809   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
 810                  : FormatTok->isNot(tok::r_brace)) {
 811     Line->Level = InitialLevel;
 812     FormatTok->setBlockKind(BK_Block);
 813     return IfLBrace;
 814   }
 815
 816   if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
 817     FormatTok->setFinalizedType(TT_NamespaceRBrace);
 818
 819   const bool IsFunctionRBrace =
 820       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
 821
 822   auto RemoveBraces = [=]() mutable {
 823     if (!SimpleBlock)
 824       return false;
 825     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
 826     assert(FormatTok->is(tok::r_brace));
 827     const bool WrappedOpeningBrace = !Tok->Previous;
 828     if (WrappedOpeningBrace && FollowedByComment)
 829       return false;
 830     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
 831     if (KeepBraces && !HasRequiredIfBraces)
 832       return false;
 833     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
 834       const FormatToken *Previous = Tokens->getPreviousToken();
 835       assert(Previous);
 836       if (Previous->is(tok::r_brace) && !Previous->Optional)
 837         return false;
 838     }
 839     assert(!CurrentLines->empty());
 840     auto &LastLine = CurrentLines->back();
 841     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
 842       return false;
 843     if (Tok->is(TT_ElseLBrace))
 844       return true;
 845     if (WrappedOpeningBrace) {
 846       assert(Index > 0);
 847       --Index; // The line above the wrapped l_brace.
 848       Tok = nullptr;
 849     }
 850     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
 851   };
 852   if (RemoveBraces()) {
 853     Tok->MatchingParen = FormatTok;
 854     FormatTok->MatchingParen = Tok;
 855   }
 856
 857   size_t PPEndHash = computePPHash();
 858
 859   // Munch the closing brace.
 860   nextToken(/*LevelDifference=*/-AddLevels);
 861
 862   // When this is a function block and there is an unnecessary semicolon
 863   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
 864   // it later).
 865   if (Style.RemoveSemicolon && IsFunctionRBrace) {
 866     while (FormatTok->is(tok::semi)) {
 867       FormatTok->Optional = true;
 868       nextToken();
 869     }
 870   }
 871
 872   HandleVerilogBlockLabel();
 873
 874   if (MacroBlock && FormatTok->is(tok::l_paren))
 875     parseParens();
 876
 877   Line->Level = InitialLevel;
 878
 879   if (FormatTok->is(tok::kw_noexcept)) {
 880     // A noexcept in a requires expression.
 881     nextToken();
 882   }
 883
 884   if (FormatTok->is(tok::arrow)) {
 885     // Following the } or noexcept we can find a trailing return type arrow
 886     // as part of an implicit conversion constraint.
 887     nextToken();
 888     parseStructuralElement();
 889   }
 890
 891   if (MunchSemi && FormatTok->is(tok::semi))
 892     nextToken();
 893
 894   if (PPStartHash == PPEndHash) {
 895     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 896     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 897       // Update the opening line to add the forward reference as well
 898       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 899           CurrentLines->size() - 1;
 900     }
 901   }
 902
 903   return IfLBrace;
 904 }
 905
 906 static bool isGoogScope(const UnwrappedLine &Line) {
 907   // FIXME: Closure-library specific stuff should not be hard-coded but be
 908   // configurable.
 909   if (Line.Tokens.size() < 4)
 910     return false;
 911   auto I = Line.Tokens.begin();
 912   if (I->Tok->TokenText != "goog")
 913     return false;
 914   ++I;
 915   if (I->Tok->isNot(tok::period))
 916     return false;
 917   ++I;
 918   if (I->Tok->TokenText != "scope")
 919     return false;
 920   ++I;
 921   return I->Tok->is(tok::l_paren);
 922 }
 923
 924 static bool isIIFE(const UnwrappedLine &Line,
 925                    const AdditionalKeywords &Keywords) {
 926   // Look for the start of an immediately invoked anonymous function.
 927   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 928   // This is commonly done in JavaScript to create a new, anonymous scope.
 929   // Example: (function() { ... })()
 930   if (Line.Tokens.size() < 3)
 931     return false;
 932   auto I = Line.Tokens.begin();
 933   if (I->Tok->isNot(tok::l_paren))
 934     return false;
 935   ++I;
 936   if (I->Tok->isNot(Keywords.kw_function))
 937     return false;
 938   ++I;
 939   return I->Tok->is(tok::l_paren);
 940 }
 941
 942 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 943                                    const FormatToken &InitialToken) {
 944   tok::TokenKind Kind = InitialToken.Tok.getKind();
 945   if (InitialToken.is(TT_NamespaceMacro))
 946     Kind = tok::kw_namespace;
 947
 948   switch (Kind) {
 949   case tok::kw_namespace:
 950     return Style.BraceWrapping.AfterNamespace;
 951   case tok::kw_class:
 952     return Style.BraceWrapping.AfterClass;
 953   case tok::kw_union:
 954     return Style.BraceWrapping.AfterUnion;
 955   case tok::kw_struct:
 956     return Style.BraceWrapping.AfterStruct;
 957   case tok::kw_enum:
 958     return Style.BraceWrapping.AfterEnum;
 959   default:
 960     return false;
 961   }
 962 }
 963
 964 void UnwrappedLineParser::parseChildBlock() {
 965   assert(FormatTok->is(tok::l_brace));
 966   FormatTok->setBlockKind(BK_Block);
 967   const FormatToken *OpeningBrace = FormatTok;
 968   nextToken();
 969   {
 970     bool SkipIndent = (Style.isJavaScript() &&
 971                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 972     ScopedLineState LineState(*this);
 973     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 974                                             /*MustBeDeclaration=*/false);
 975     Line->Level += SkipIndent ? 0 : 1;
 976     parseLevel(OpeningBrace);
 977     flushComments(isOnNewLine(*FormatTok));
 978     Line->Level -= SkipIndent ? 0 : 1;
 979   }
 980   nextToken();
 981 }
 982
 983 void UnwrappedLineParser::parsePPDirective() {
 984   assert(FormatTok->is(tok::hash) && "'#' expected");
 985   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 986
 987   nextToken();
 988
 989   if (!FormatTok->Tok.getIdentifierInfo()) {
 990     parsePPUnknown();
 991     return;
 992   }
 993
 994   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 995   case tok::pp_define:
 996     parsePPDefine();
 997     return;
 998   case tok::pp_if:
 999     parsePPIf(/*IfDef=*/false);
1000     break;
1001   case tok::pp_ifdef:
1002   case tok::pp_ifndef:
1003     parsePPIf(/*IfDef=*/true);
1004     break;
1005   case tok::pp_else:
1006   case tok::pp_elifdef:
1007   case tok::pp_elifndef:
1008   case tok::pp_elif:
1009     parsePPElse();
1010     break;
1011   case tok::pp_endif:
1012     parsePPEndIf();
1013     break;
1014   case tok::pp_pragma:
1015     parsePPPragma();
1016     break;
1017   default:
1018     parsePPUnknown();
1019     break;
1020   }
1021 }
1022
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024   size_t Line = CurrentLines->size();
1025   if (CurrentLines == &PreprocessorDirectives)
1026     Line += Lines.size();
1027
1028   if (Unreachable ||
1029       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1030     PPStack.push_back({PP_Unreachable, Line});
1031   } else {
1032     PPStack.push_back({PP_Conditional, Line});
1033   }
1034 }
1035
1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1037   ++PPBranchLevel;
1038   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1039   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1040     PPLevelBranchIndex.push_back(0);
1041     PPLevelBranchCount.push_back(0);
1042   }
1043   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1044   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1045   conditionalCompilationCondition(Unreachable || Skip);
1046 }
1047
1048 void UnwrappedLineParser::conditionalCompilationAlternative() {
1049   if (!PPStack.empty())
1050     PPStack.pop_back();
1051   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1052   if (!PPChainBranchIndex.empty())
1053     ++PPChainBranchIndex.top();
1054   conditionalCompilationCondition(
1055       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1056       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1057 }
1058
1059 void UnwrappedLineParser::conditionalCompilationEnd() {
1060   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1061   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1062     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1063       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1064   }
1065   // Guard against #endif's without #if.
1066   if (PPBranchLevel > -1)
1067     --PPBranchLevel;
1068   if (!PPChainBranchIndex.empty())
1069     PPChainBranchIndex.pop();
1070   if (!PPStack.empty())
1071     PPStack.pop_back();
1072 }
1073
1074 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1075   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1076   nextToken();
1077   bool Unreachable = false;
1078   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1079     Unreachable = true;
1080   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1081     Unreachable = true;
1082   conditionalCompilationStart(Unreachable);
1083   FormatToken *IfCondition = FormatTok;
1084   // If there's a #ifndef on the first line, and the only lines before it are
1085   // comments, it could be an include guard.
1086   bool MaybeIncludeGuard = IfNDef;
1087   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1088     for (auto &Line : Lines) {
1089       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1090         MaybeIncludeGuard = false;
1091         IncludeGuard = IG_Rejected;
1092         break;
1093       }
1094     }
1095   }
1096   --PPBranchLevel;
1097   parsePPUnknown();
1098   ++PPBranchLevel;
1099   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1100     IncludeGuard = IG_IfNdefed;
1101     IncludeGuardToken = IfCondition;
1102   }
1103 }
1104
1105 void UnwrappedLineParser::parsePPElse() {
1106   // If a potential include guard has an #else, it's not an include guard.
1107   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1108     IncludeGuard = IG_Rejected;
1109   // Don't crash when there is an #else without an #if.
1110   assert(PPBranchLevel >= -1);
1111   if (PPBranchLevel == -1)
1112     conditionalCompilationStart(/*Unreachable=*/true);
1113   conditionalCompilationAlternative();
1114   --PPBranchLevel;
1115   parsePPUnknown();
1116   ++PPBranchLevel;
1117 }
1118
1119 void UnwrappedLineParser::parsePPEndIf() {
1120   conditionalCompilationEnd();
1121   parsePPUnknown();
1122   // If the #endif of a potential include guard is the last thing in the file,
1123   // then we found an include guard.
1124   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1125       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1126     IncludeGuard = IG_Found;
1127   }
1128 }
1129
1130 void UnwrappedLineParser::parsePPDefine() {
1131   nextToken();
1132
1133   if (!FormatTok->Tok.getIdentifierInfo()) {
1134     IncludeGuard = IG_Rejected;
1135     IncludeGuardToken = nullptr;
1136     parsePPUnknown();
1137     return;
1138   }
1139
1140   if (IncludeGuard == IG_IfNdefed &&
1141       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1142     IncludeGuard = IG_Defined;
1143     IncludeGuardToken = nullptr;
1144     for (auto &Line : Lines) {
1145       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1146         IncludeGuard = IG_Rejected;
1147         break;
1148       }
1149     }
1150   }
1151
1152   // In the context of a define, even keywords should be treated as normal
1153   // identifiers. Setting the kind to identifier is not enough, because we need
1154   // to treat additional keywords like __except as well, which are already
1155   // identifiers. Setting the identifier info to null interferes with include
1156   // guard processing above, and changes preprocessing nesting.
1157   FormatTok->Tok.setKind(tok::identifier);
1158   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1159   nextToken();
1160   if (FormatTok->Tok.getKind() == tok::l_paren &&
1161       !FormatTok->hasWhitespaceBefore()) {
1162     parseParens();
1163   }
1164   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1165     Line->Level += PPBranchLevel + 1;
1166   addUnwrappedLine();
1167   ++Line->Level;
1168
1169   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1170   assert((int)Line->PPLevel >= 0);
1171   Line->InMacroBody = true;
1172
1173   if (FormatTok->is(tok::identifier) &&
1174       Tokens->peekNextToken()->is(tok::colon)) {
1175     nextToken();
1176     nextToken();
1177   }
1178
1179   // Errors during a preprocessor directive can only affect the layout of the
1180   // preprocessor directive, and thus we ignore them. An alternative approach
1181   // would be to use the same approach we use on the file level (no
1182   // re-indentation if there was a structural error) within the macro
1183   // definition.
1184   parseFile();
1185 }
1186
1187 void UnwrappedLineParser::parsePPPragma() {
1188   Line->InPragmaDirective = true;
1189   parsePPUnknown();
1190 }
1191
1192 void UnwrappedLineParser::parsePPUnknown() {
1193   do {
1194     nextToken();
1195   } while (!eof());
1196   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1197     Line->Level += PPBranchLevel + 1;
1198   addUnwrappedLine();
1199 }
1200
1201 // Here we exclude certain tokens that are not usually the first token in an
1202 // unwrapped line. This is used in attempt to distinguish macro calls without
1203 // trailing semicolons from other constructs split to several lines.
1204 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1205   // Semicolon can be a null-statement, l_square can be a start of a macro or
1206   // a C++11 attribute, but this doesn't seem to be common.
1207   assert(Tok.isNot(TT_AttributeSquare));
1208   return !Tok.isOneOf(tok::semi, tok::l_brace,
1209                       // Tokens that can only be used as binary operators and a
1210                       // part of overloaded operator names.
1211                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1212                       tok::less, tok::greater, tok::slash, tok::percent,
1213                       tok::lessless, tok::greatergreater, tok::equal,
1214                       tok::plusequal, tok::minusequal, tok::starequal,
1215                       tok::slashequal, tok::percentequal, tok::ampequal,
1216                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1217                       tok::lesslessequal,
1218                       // Colon is used in labels, base class lists, initializer
1219                       // lists, range-based for loops, ternary operator, but
1220                       // should never be the first token in an unwrapped line.
1221                       tok::colon,
1222                       // 'noexcept' is a trailing annotation.
1223                       tok::kw_noexcept);
1224 }
1225
1226 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1227                           const FormatToken *FormatTok) {
1228   // FIXME: This returns true for C/C++ keywords like 'struct'.
1229   return FormatTok->is(tok::identifier) &&
1230          (!FormatTok->Tok.getIdentifierInfo() ||
1231           !FormatTok->isOneOf(
1232               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1233               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1234               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1235               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1236               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1237               Keywords.kw_instanceof, Keywords.kw_interface,
1238               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1239 }
1240
1241 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1242                                  const FormatToken *FormatTok) {
1243   return FormatTok->Tok.isLiteral() ||
1244          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1245          mustBeJSIdent(Keywords, FormatTok);
1246 }
1247
1248 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1249 // when encountered after a value (see mustBeJSIdentOrValue).
1250 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1251                            const FormatToken *FormatTok) {
1252   return FormatTok->isOneOf(
1253       tok::kw_return, Keywords.kw_yield,
1254       // conditionals
1255       tok::kw_if, tok::kw_else,
1256       // loops
1257       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1258       // switch/case
1259       tok::kw_switch, tok::kw_case,
1260       // exceptions
1261       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1262       // declaration
1263       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1264       Keywords.kw_async, Keywords.kw_function,
1265       // import/export
1266       Keywords.kw_import, tok::kw_export);
1267 }
1268
1269 // Checks whether a token is a type in K&R C (aka C78).
1270 static bool isC78Type(const FormatToken &Tok) {
1271   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1272                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1273                      tok::identifier);
1274 }
1275
1276 // This function checks whether a token starts the first parameter declaration
1277 // in a K&R C (aka C78) function definition, e.g.:
1278 //   int f(a, b)
1279 //   short a, b;
1280 //   {
1281 //      return a + b;
1282 //   }
1283 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1284                                const FormatToken *FuncName) {
1285   assert(Tok);
1286   assert(Next);
1287   assert(FuncName);
1288
1289   if (FuncName->isNot(tok::identifier))
1290     return false;
1291
1292   const FormatToken *Prev = FuncName->Previous;
1293   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1294     return false;
1295
1296   if (!isC78Type(*Tok) &&
1297       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1298     return false;
1299   }
1300
1301   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1302     return false;
1303
1304   Tok = Tok->Previous;
1305   if (!Tok || Tok->isNot(tok::r_paren))
1306     return false;
1307
1308   Tok = Tok->Previous;
1309   if (!Tok || Tok->isNot(tok::identifier))
1310     return false;
1311
1312   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1313 }
1314
1315 bool UnwrappedLineParser::parseModuleImport() {
1316   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1317
1318   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1319       !Token->Tok.getIdentifierInfo() &&
1320       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1321     return false;
1322   }
1323
1324   nextToken();
1325   while (!eof()) {
1326     if (FormatTok->is(tok::colon)) {
1327       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1328     }
1329     // Handle import <foo/bar.h> as we would an include statement.
1330     else if (FormatTok->is(tok::less)) {
1331       nextToken();
1332       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1333         // Mark tokens up to the trailing line comments as implicit string
1334         // literals.
1335         if (FormatTok->isNot(tok::comment) &&
1336             !FormatTok->TokenText.starts_with("//")) {
1337           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1338         }
1339         nextToken();
1340       }
1341     }
1342     if (FormatTok->is(tok::semi)) {
1343       nextToken();
1344       break;
1345     }
1346     nextToken();
1347   }
1348
1349   addUnwrappedLine();
1350   return true;
1351 }
1352
1353 // readTokenWithJavaScriptASI reads the next token and terminates the current
1354 // line if JavaScript Automatic Semicolon Insertion must
1355 // happen between the current token and the next token.
1356 //
1357 // This method is conservative - it cannot cover all edge cases of JavaScript,
1358 // but only aims to correctly handle certain well known cases. It *must not*
1359 // return true in speculative cases.
1360 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1361   FormatToken *Previous = FormatTok;
1362   readToken();
1363   FormatToken *Next = FormatTok;
1364
1365   bool IsOnSameLine =
1366       CommentsBeforeNextToken.empty()
1367           ? Next->NewlinesBefore == 0
1368           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1369   if (IsOnSameLine)
1370     return;
1371
1372   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1373   bool PreviousStartsTemplateExpr =
1374       Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1375   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1376     // If the line contains an '@' sign, the previous token might be an
1377     // annotation, which can precede another identifier/value.
1378     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1379       return LineNode.Tok->is(tok::at);
1380     });
1381     if (HasAt)
1382       return;
1383   }
1384   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1385     return addUnwrappedLine();
1386   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1387   bool NextEndsTemplateExpr =
1388       Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1389   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1390       (PreviousMustBeValue ||
1391        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1392                          tok::minusminus))) {
1393     return addUnwrappedLine();
1394   }
1395   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1396       isJSDeclOrStmt(Keywords, Next)) {
1397     return addUnwrappedLine();
1398   }
1399 }
1400
1401 void UnwrappedLineParser::parseStructuralElement(
1402     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1403     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1404   if (Style.Language == FormatStyle::LK_TableGen &&
1405       FormatTok->is(tok::pp_include)) {
1406     nextToken();
1407     if (FormatTok->is(tok::string_literal))
1408       nextToken();
1409     addUnwrappedLine();
1410     return;
1411   }
1412
1413   if (Style.isCpp()) {
1414     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1415     }
1416   } else if (Style.isVerilog()) {
1417     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1418       parseForOrWhileLoop(/*HasParens=*/false);
1419       return;
1420     }
1421     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1422       parseForOrWhileLoop();
1423       return;
1424     }
1425     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1426                            Keywords.kw_assume, Keywords.kw_cover)) {
1427       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1428       return;
1429     }
1430
1431     // Skip things that can exist before keywords like 'if' and 'case'.
1432     while (true) {
1433       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1434                              Keywords.kw_unique0)) {
1435         nextToken();
1436       } else if (FormatTok->is(tok::l_paren) &&
1437                  Tokens->peekNextToken()->is(tok::star)) {
1438         parseParens();
1439       } else {
1440         break;
1441       }
1442     }
1443   }
1444
1445   // Tokens that only make sense at the beginning of a line.
1446   switch (FormatTok->Tok.getKind()) {
1447   case tok::kw_asm:
1448     nextToken();
1449     if (FormatTok->is(tok::l_brace)) {
1450       FormatTok->setFinalizedType(TT_InlineASMBrace);
1451       nextToken();
1452       while (FormatTok && !eof()) {
1453         if (FormatTok->is(tok::r_brace)) {
1454           FormatTok->setFinalizedType(TT_InlineASMBrace);
1455           nextToken();
1456           addUnwrappedLine();
1457           break;
1458         }
1459         FormatTok->Finalized = true;
1460         nextToken();
1461       }
1462     }
1463     break;
1464   case tok::kw_namespace:
1465     parseNamespace();
1466     return;
1467   case tok::kw_public:
1468   case tok::kw_protected:
1469   case tok::kw_private:
1470     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471         Style.isCSharp()) {
1472       nextToken();
1473     } else {
1474       parseAccessSpecifier();
1475     }
1476     return;
1477   case tok::kw_if: {
1478     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1479       // field/method declaration.
1480       break;
1481     }
1482     FormatToken *Tok = parseIfThenElse(IfKind);
1483     if (IfLeftBrace)
1484       *IfLeftBrace = Tok;
1485     return;
1486   }
1487   case tok::kw_for:
1488   case tok::kw_while:
1489     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1490       // field/method declaration.
1491       break;
1492     }
1493     parseForOrWhileLoop();
1494     return;
1495   case tok::kw_do:
1496     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497       // field/method declaration.
1498       break;
1499     }
1500     parseDoWhile();
1501     if (HasDoWhile)
1502       *HasDoWhile = true;
1503     return;
1504   case tok::kw_switch:
1505     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1506       // 'switch: string' field declaration.
1507       break;
1508     }
1509     parseSwitch();
1510     return;
1511   case tok::kw_default:
1512     // In Verilog default along with other labels are handled in the next loop.
1513     if (Style.isVerilog())
1514       break;
1515     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1516       // 'default: string' field declaration.
1517       break;
1518     }
1519     nextToken();
1520     if (FormatTok->is(tok::colon)) {
1521       FormatTok->setFinalizedType(TT_CaseLabelColon);
1522       parseLabel();
1523       return;
1524     }
1525     // e.g. "default void f() {}" in a Java interface.
1526     break;
1527   case tok::kw_case:
1528     // Proto: there are no switch/case statements.
1529     if (Style.Language == FormatStyle::LK_Proto) {
1530       nextToken();
1531       return;
1532     }
1533     if (Style.isVerilog()) {
1534       parseBlock();
1535       addUnwrappedLine();
1536       return;
1537     }
1538     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539       // 'case: string' field declaration.
1540       nextToken();
1541       break;
1542     }
1543     parseCaseLabel();
1544     return;
1545   case tok::kw_try:
1546   case tok::kw___try:
1547     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548       // field/method declaration.
1549       break;
1550     }
1551     parseTryCatch();
1552     return;
1553   case tok::kw_extern:
1554     nextToken();
1555     if (Style.isVerilog()) {
1556       // In Verilog and extern module declaration looks like a start of module.
1557       // But there is no body and endmodule. So we handle it separately.
1558       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1559         parseVerilogHierarchyHeader();
1560         return;
1561       }
1562     } else if (FormatTok->is(tok::string_literal)) {
1563       nextToken();
1564       if (FormatTok->is(tok::l_brace)) {
1565         if (Style.BraceWrapping.AfterExternBlock)
1566           addUnwrappedLine();
1567         // Either we indent or for backwards compatibility we follow the
1568         // AfterExternBlock style.
1569         unsigned AddLevels =
1570             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1571                     (Style.BraceWrapping.AfterExternBlock &&
1572                      Style.IndentExternBlock ==
1573                          FormatStyle::IEBS_AfterExternBlock)
1574                 ? 1u
1575                 : 0u;
1576         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1577         addUnwrappedLine();
1578         return;
1579       }
1580     }
1581     break;
1582   case tok::kw_export:
1583     if (Style.isJavaScript()) {
1584       parseJavaScriptEs6ImportExport();
1585       return;
1586     }
1587     if (Style.isCpp()) {
1588       nextToken();
1589       if (FormatTok->is(tok::kw_namespace)) {
1590         parseNamespace();
1591         return;
1592       }
1593       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1594         return;
1595     }
1596     break;
1597   case tok::kw_inline:
1598     nextToken();
1599     if (FormatTok->is(tok::kw_namespace)) {
1600       parseNamespace();
1601       return;
1602     }
1603     break;
1604   case tok::identifier:
1605     if (FormatTok->is(TT_ForEachMacro)) {
1606       parseForOrWhileLoop();
1607       return;
1608     }
1609     if (FormatTok->is(TT_MacroBlockBegin)) {
1610       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1611                  /*MunchSemi=*/false);
1612       return;
1613     }
1614     if (FormatTok->is(Keywords.kw_import)) {
1615       if (Style.isJavaScript()) {
1616         parseJavaScriptEs6ImportExport();
1617         return;
1618       }
1619       if (Style.Language == FormatStyle::LK_Proto) {
1620         nextToken();
1621         if (FormatTok->is(tok::kw_public))
1622           nextToken();
1623         if (FormatTok->isNot(tok::string_literal))
1624           return;
1625         nextToken();
1626         if (FormatTok->is(tok::semi))
1627           nextToken();
1628         addUnwrappedLine();
1629         return;
1630       }
1631       if (Style.isCpp() && parseModuleImport())
1632         return;
1633     }
1634     if (Style.isCpp() &&
1635         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1636                            Keywords.kw_slots, Keywords.kw_qslots)) {
1637       nextToken();
1638       if (FormatTok->is(tok::colon)) {
1639         nextToken();
1640         addUnwrappedLine();
1641         return;
1642       }
1643     }
1644     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1645       parseStatementMacro();
1646       return;
1647     }
1648     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1649       parseNamespace();
1650       return;
1651     }
1652     // In Verilog labels can be any expression, so we don't do them here.
1653     if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1654         !Line->MustBeDeclaration) {
1655       nextToken();
1656       Line->Tokens.begin()->Tok->MustBreakBefore = true;
1657       FormatTok->setFinalizedType(TT_GotoLabelColon);
1658       parseLabel(!Style.IndentGotoLabels);
1659       if (HasLabel)
1660         *HasLabel = true;
1661       return;
1662     }
1663     // In all other cases, parse the declaration.
1664     break;
1665   default:
1666     break;
1667   }
1668
1669   const bool InRequiresExpression =
1670       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1671   do {
1672     const FormatToken *Previous = FormatTok->Previous;
1673     switch (FormatTok->Tok.getKind()) {
1674     case tok::at:
1675       nextToken();
1676       if (FormatTok->is(tok::l_brace)) {
1677         nextToken();
1678         parseBracedList();
1679         break;
1680       } else if (Style.Language == FormatStyle::LK_Java &&
1681                  FormatTok->is(Keywords.kw_interface)) {
1682         nextToken();
1683         break;
1684       }
1685       switch (FormatTok->Tok.getObjCKeywordID()) {
1686       case tok::objc_public:
1687       case tok::objc_protected:
1688       case tok::objc_package:
1689       case tok::objc_private:
1690         return parseAccessSpecifier();
1691       case tok::objc_interface:
1692       case tok::objc_implementation:
1693         return parseObjCInterfaceOrImplementation();
1694       case tok::objc_protocol:
1695         if (parseObjCProtocol())
1696           return;
1697         break;
1698       case tok::objc_end:
1699         return; // Handled by the caller.
1700       case tok::objc_optional:
1701       case tok::objc_required:
1702         nextToken();
1703         addUnwrappedLine();
1704         return;
1705       case tok::objc_autoreleasepool:
1706         nextToken();
1707         if (FormatTok->is(tok::l_brace)) {
1708           if (Style.BraceWrapping.AfterControlStatement ==
1709               FormatStyle::BWACS_Always) {
1710             addUnwrappedLine();
1711           }
1712           parseBlock();
1713         }
1714         addUnwrappedLine();
1715         return;
1716       case tok::objc_synchronized:
1717         nextToken();
1718         if (FormatTok->is(tok::l_paren)) {
1719           // Skip synchronization object
1720           parseParens();
1721         }
1722         if (FormatTok->is(tok::l_brace)) {
1723           if (Style.BraceWrapping.AfterControlStatement ==
1724               FormatStyle::BWACS_Always) {
1725             addUnwrappedLine();
1726           }
1727           parseBlock();
1728         }
1729         addUnwrappedLine();
1730         return;
1731       case tok::objc_try:
1732         // This branch isn't strictly necessary (the kw_try case below would
1733         // do this too after the tok::at is parsed above).  But be explicit.
1734         parseTryCatch();
1735         return;
1736       default:
1737         break;
1738       }
1739       break;
1740     case tok::kw_requires: {
1741       if (Style.isCpp()) {
1742         bool ParsedClause = parseRequires();
1743         if (ParsedClause)
1744           return;
1745       } else {
1746         nextToken();
1747       }
1748       break;
1749     }
1750     case tok::kw_enum:
1751       // Ignore if this is part of "template <enum ...".
1752       if (Previous && Previous->is(tok::less)) {
1753         nextToken();
1754         break;
1755       }
1756
1757       // parseEnum falls through and does not yet add an unwrapped line as an
1758       // enum definition can start a structural element.
1759       if (!parseEnum())
1760         break;
1761       // This only applies to C++ and Verilog.
1762       if (!Style.isCpp() && !Style.isVerilog()) {
1763         addUnwrappedLine();
1764         return;
1765       }
1766       break;
1767     case tok::kw_typedef:
1768       nextToken();
1769       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1770                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1771                              Keywords.kw_CF_CLOSED_ENUM,
1772                              Keywords.kw_NS_CLOSED_ENUM)) {
1773         parseEnum();
1774       }
1775       break;
1776     case tok::kw_class:
1777       if (Style.isVerilog()) {
1778         parseBlock();
1779         addUnwrappedLine();
1780         return;
1781       }
1782       [[fallthrough]];
1783     case tok::kw_struct:
1784     case tok::kw_union:
1785       if (parseStructLike())
1786         return;
1787       break;
1788     case tok::kw_decltype:
1789       nextToken();
1790       if (FormatTok->is(tok::l_paren)) {
1791         parseParens();
1792         assert(FormatTok->Previous);
1793         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1794                                               tok::l_paren)) {
1795           Line->SeenDecltypeAuto = true;
1796         }
1797       }
1798       break;
1799     case tok::period:
1800       nextToken();
1801       // In Java, classes have an implicit static member "class".
1802       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1803           FormatTok->is(tok::kw_class)) {
1804         nextToken();
1805       }
1806       if (Style.isJavaScript() && FormatTok &&
1807           FormatTok->Tok.getIdentifierInfo()) {
1808         // JavaScript only has pseudo keywords, all keywords are allowed to
1809         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1810         nextToken();
1811       }
1812       break;
1813     case tok::semi:
1814       nextToken();
1815       addUnwrappedLine();
1816       return;
1817     case tok::r_brace:
1818       addUnwrappedLine();
1819       return;
1820     case tok::l_paren: {
1821       parseParens();
1822       // Break the unwrapped line if a K&R C function definition has a parameter
1823       // declaration.
1824       if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1825         break;
1826       if (isC78ParameterDecl(FormatTok,
1827                              Tokens->peekNextToken(/*SkipComment=*/true),
1828                              Previous)) {
1829         addUnwrappedLine();
1830         return;
1831       }
1832       break;
1833     }
1834     case tok::kw_operator:
1835       nextToken();
1836       if (FormatTok->isBinaryOperator())
1837         nextToken();
1838       break;
1839     case tok::caret:
1840       nextToken();
1841       // Block return type.
1842       if (FormatTok->Tok.isAnyIdentifier() ||
1843           FormatTok->isSimpleTypeSpecifier()) {
1844         nextToken();
1845         // Return types: pointers are ok too.
1846         while (FormatTok->is(tok::star))
1847           nextToken();
1848       }
1849       // Block argument list.
1850       if (FormatTok->is(tok::l_paren))
1851         parseParens();
1852       // Block body.
1853       if (FormatTok->is(tok::l_brace))
1854         parseChildBlock();
1855       break;
1856     case tok::l_brace:
1857       if (InRequiresExpression)
1858         FormatTok->setFinalizedType(TT_BracedListLBrace);
1859       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1860         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1861         // A block outside of parentheses must be the last part of a
1862         // structural element.
1863         // FIXME: Figure out cases where this is not true, and add projections
1864         // for them (the one we know is missing are lambdas).
1865         if (Style.Language == FormatStyle::LK_Java &&
1866             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1867           // If necessary, we could set the type to something different than
1868           // TT_FunctionLBrace.
1869           if (Style.BraceWrapping.AfterControlStatement ==
1870               FormatStyle::BWACS_Always) {
1871             addUnwrappedLine();
1872           }
1873         } else if (Style.BraceWrapping.AfterFunction) {
1874           addUnwrappedLine();
1875         }
1876         FormatTok->setFinalizedType(TT_FunctionLBrace);
1877         parseBlock();
1878         IsDecltypeAutoFunction = false;
1879         addUnwrappedLine();
1880         return;
1881       }
1882       // Otherwise this was a braced init list, and the structural
1883       // element continues.
1884       break;
1885     case tok::kw_try:
1886       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1887         // field/method declaration.
1888         nextToken();
1889         break;
1890       }
1891       // We arrive here when parsing function-try blocks.
1892       if (Style.BraceWrapping.AfterFunction)
1893         addUnwrappedLine();
1894       parseTryCatch();
1895       return;
1896     case tok::identifier: {
1897       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1898           Line->MustBeDeclaration) {
1899         addUnwrappedLine();
1900         parseCSharpGenericTypeConstraint();
1901         break;
1902       }
1903       if (FormatTok->is(TT_MacroBlockEnd)) {
1904         addUnwrappedLine();
1905         return;
1906       }
1907
1908       // Function declarations (as opposed to function expressions) are parsed
1909       // on their own unwrapped line by continuing this loop. Function
1910       // expressions (functions that are not on their own line) must not create
1911       // a new unwrapped line, so they are special cased below.
1912       size_t TokenCount = Line->Tokens.size();
1913       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1914           (TokenCount > 1 ||
1915            (TokenCount == 1 &&
1916             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1917         tryToParseJSFunction();
1918         break;
1919       }
1920       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1921           FormatTok->is(Keywords.kw_interface)) {
1922         if (Style.isJavaScript()) {
1923           // In JavaScript/TypeScript, "interface" can be used as a standalone
1924           // identifier, e.g. in `var interface = 1;`. If "interface" is
1925           // followed by another identifier, it is very like to be an actual
1926           // interface declaration.
1927           unsigned StoredPosition = Tokens->getPosition();
1928           FormatToken *Next = Tokens->getNextToken();
1929           FormatTok = Tokens->setPosition(StoredPosition);
1930           if (!mustBeJSIdent(Keywords, Next)) {
1931             nextToken();
1932             break;
1933           }
1934         }
1935         parseRecord();
1936         addUnwrappedLine();
1937         return;
1938       }
1939
1940       if (Style.isVerilog()) {
1941         if (FormatTok->is(Keywords.kw_table)) {
1942           parseVerilogTable();
1943           return;
1944         }
1945         if (Keywords.isVerilogBegin(*FormatTok) ||
1946             Keywords.isVerilogHierarchy(*FormatTok)) {
1947           parseBlock();
1948           addUnwrappedLine();
1949           return;
1950         }
1951       }
1952
1953       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1954         if (parseStructLike())
1955           return;
1956         break;
1957       }
1958
1959       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1960         parseStatementMacro();
1961         return;
1962       }
1963
1964       // See if the following token should start a new unwrapped line.
1965       StringRef Text = FormatTok->TokenText;
1966
1967       FormatToken *PreviousToken = FormatTok;
1968       nextToken();
1969
1970       // JS doesn't have macros, and within classes colons indicate fields, not
1971       // labels.
1972       if (Style.isJavaScript())
1973         break;
1974
1975       auto OneTokenSoFar = [&]() {
1976         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1977         while (I != E && I->Tok->is(tok::comment))
1978           ++I;
1979         if (Style.isVerilog())
1980           while (I != E && I->Tok->is(tok::hash))
1981             ++I;
1982         return I != E && (++I == E);
1983       };
1984       if (OneTokenSoFar()) {
1985         // Recognize function-like macro usages without trailing semicolon as
1986         // well as free-standing macros like Q_OBJECT.
1987         bool FunctionLike = FormatTok->is(tok::l_paren);
1988         if (FunctionLike)
1989           parseParens();
1990
1991         bool FollowedByNewline =
1992             CommentsBeforeNextToken.empty()
1993                 ? FormatTok->NewlinesBefore > 0
1994                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1995
1996         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1997             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1998           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1999             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2000           addUnwrappedLine();
2001           return;
2002         }
2003       }
2004       break;
2005     }
2006     case tok::equal:
2007       if ((Style.isJavaScript() || Style.isCSharp()) &&
2008           FormatTok->is(TT_FatArrow)) {
2009         tryToParseChildBlock();
2010         break;
2011       }
2012
2013       nextToken();
2014       if (FormatTok->is(tok::l_brace)) {
2015         // Block kind should probably be set to BK_BracedInit for any language.
2016         // C# needs this change to ensure that array initialisers and object
2017         // initialisers are indented the same way.
2018         if (Style.isCSharp())
2019           FormatTok->setBlockKind(BK_BracedInit);
2020         nextToken();
2021         parseBracedList();
2022       } else if (Style.Language == FormatStyle::LK_Proto &&
2023                  FormatTok->is(tok::less)) {
2024         nextToken();
2025         parseBracedList(/*IsAngleBracket=*/true);
2026       }
2027       break;
2028     case tok::l_square:
2029       parseSquare();
2030       break;
2031     case tok::kw_new:
2032       parseNew();
2033       break;
2034     case tok::kw_case:
2035       // Proto: there are no switch/case statements.
2036       if (Style.Language == FormatStyle::LK_Proto) {
2037         nextToken();
2038         return;
2039       }
2040       // In Verilog switch is called case.
2041       if (Style.isVerilog()) {
2042         parseBlock();
2043         addUnwrappedLine();
2044         return;
2045       }
2046       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2047         // 'case: string' field declaration.
2048         nextToken();
2049         break;
2050       }
2051       parseCaseLabel();
2052       break;
2053     case tok::kw_default:
2054       nextToken();
2055       if (Style.isVerilog()) {
2056         if (FormatTok->is(tok::colon)) {
2057           // The label will be handled in the next iteration.
2058           break;
2059         }
2060         if (FormatTok->is(Keywords.kw_clocking)) {
2061           // A default clocking block.
2062           parseBlock();
2063           addUnwrappedLine();
2064           return;
2065         }
2066         parseVerilogCaseLabel();
2067         return;
2068       }
2069       break;
2070     case tok::colon:
2071       nextToken();
2072       if (Style.isVerilog()) {
2073         parseVerilogCaseLabel();
2074         return;
2075       }
2076       break;
2077     default:
2078       nextToken();
2079       break;
2080     }
2081   } while (!eof());
2082 }
2083
2084 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2085   assert(FormatTok->is(tok::l_brace));
2086   if (!Style.isCSharp())
2087     return false;
2088   // See if it's a property accessor.
2089   if (FormatTok->Previous->isNot(tok::identifier))
2090     return false;
2091
2092   // See if we are inside a property accessor.
2093   //
2094   // Record the current tokenPosition so that we can advance and
2095   // reset the current token. `Next` is not set yet so we need
2096   // another way to advance along the token stream.
2097   unsigned int StoredPosition = Tokens->getPosition();
2098   FormatToken *Tok = Tokens->getNextToken();
2099
2100   // A trivial property accessor is of the form:
2101   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2102   // Track these as they do not require line breaks to be introduced.
2103   bool HasSpecialAccessor = false;
2104   bool IsTrivialPropertyAccessor = true;
2105   while (!eof()) {
2106     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2107                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2108                      Keywords.kw_init, Keywords.kw_set)) {
2109       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2110         HasSpecialAccessor = true;
2111       Tok = Tokens->getNextToken();
2112       continue;
2113     }
2114     if (Tok->isNot(tok::r_brace))
2115       IsTrivialPropertyAccessor = false;
2116     break;
2117   }
2118
2119   if (!HasSpecialAccessor) {
2120     Tokens->setPosition(StoredPosition);
2121     return false;
2122   }
2123
2124   // Try to parse the property accessor:
2125   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2126   Tokens->setPosition(StoredPosition);
2127   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2128     addUnwrappedLine();
2129   nextToken();
2130   do {
2131     switch (FormatTok->Tok.getKind()) {
2132     case tok::r_brace:
2133       nextToken();
2134       if (FormatTok->is(tok::equal)) {
2135         while (!eof() && FormatTok->isNot(tok::semi))
2136           nextToken();
2137         nextToken();
2138       }
2139       addUnwrappedLine();
2140       return true;
2141     case tok::l_brace:
2142       ++Line->Level;
2143       parseBlock(/*MustBeDeclaration=*/true);
2144       addUnwrappedLine();
2145       --Line->Level;
2146       break;
2147     case tok::equal:
2148       if (FormatTok->is(TT_FatArrow)) {
2149         ++Line->Level;
2150         do {
2151           nextToken();
2152         } while (!eof() && FormatTok->isNot(tok::semi));
2153         nextToken();
2154         addUnwrappedLine();
2155         --Line->Level;
2156         break;
2157       }
2158       nextToken();
2159       break;
2160     default:
2161       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2162                              Keywords.kw_set) &&
2163           !IsTrivialPropertyAccessor) {
2164         // Non-trivial get/set needs to be on its own line.
2165         addUnwrappedLine();
2166       }
2167       nextToken();
2168     }
2169   } while (!eof());
2170
2171   // Unreachable for well-formed code (paired '{' and '}').
2172   return true;
2173 }
2174
2175 bool UnwrappedLineParser::tryToParseLambda() {
2176   assert(FormatTok->is(tok::l_square));
2177   if (!Style.isCpp()) {
2178     nextToken();
2179     return false;
2180   }
2181   FormatToken &LSquare = *FormatTok;
2182   if (!tryToParseLambdaIntroducer())
2183     return false;
2184
2185   bool SeenArrow = false;
2186   bool InTemplateParameterList = false;
2187
2188   while (FormatTok->isNot(tok::l_brace)) {
2189     if (FormatTok->isSimpleTypeSpecifier()) {
2190       nextToken();
2191       continue;
2192     }
2193     switch (FormatTok->Tok.getKind()) {
2194     case tok::l_brace:
2195       break;
2196     case tok::l_paren:
2197       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2198       break;
2199     case tok::l_square:
2200       parseSquare();
2201       break;
2202     case tok::less:
2203       assert(FormatTok->Previous);
2204       if (FormatTok->Previous->is(tok::r_square))
2205         InTemplateParameterList = true;
2206       nextToken();
2207       break;
2208     case tok::kw_auto:
2209     case tok::kw_class:
2210     case tok::kw_template:
2211     case tok::kw_typename:
2212     case tok::amp:
2213     case tok::star:
2214     case tok::kw_const:
2215     case tok::kw_constexpr:
2216     case tok::kw_consteval:
2217     case tok::comma:
2218     case tok::greater:
2219     case tok::identifier:
2220     case tok::numeric_constant:
2221     case tok::coloncolon:
2222     case tok::kw_mutable:
2223     case tok::kw_noexcept:
2224     case tok::kw_static:
2225       nextToken();
2226       break;
2227     // Specialization of a template with an integer parameter can contain
2228     // arithmetic, logical, comparison and ternary operators.
2229     //
2230     // FIXME: This also accepts sequences of operators that are not in the scope
2231     // of a template argument list.
2232     //
2233     // In a C++ lambda a template type can only occur after an arrow. We use
2234     // this as an heuristic to distinguish between Objective-C expressions
2235     // followed by an `a->b` expression, such as:
2236     // ([obj func:arg] + a->b)
2237     // Otherwise the code below would parse as a lambda.
2238     case tok::plus:
2239     case tok::minus:
2240     case tok::exclaim:
2241     case tok::tilde:
2242     case tok::slash:
2243     case tok::percent:
2244     case tok::lessless:
2245     case tok::pipe:
2246     case tok::pipepipe:
2247     case tok::ampamp:
2248     case tok::caret:
2249     case tok::equalequal:
2250     case tok::exclaimequal:
2251     case tok::greaterequal:
2252     case tok::lessequal:
2253     case tok::question:
2254     case tok::colon:
2255     case tok::ellipsis:
2256     case tok::kw_true:
2257     case tok::kw_false:
2258       if (SeenArrow || InTemplateParameterList) {
2259         nextToken();
2260         break;
2261       }
2262       return true;
2263     case tok::arrow:
2264       // This might or might not actually be a lambda arrow (this could be an
2265       // ObjC method invocation followed by a dereferencing arrow). We might
2266       // reset this back to TT_Unknown in TokenAnnotator.
2267       FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2268       SeenArrow = true;
2269       nextToken();
2270       break;
2271     case tok::kw_requires: {
2272       auto *RequiresToken = FormatTok;
2273       nextToken();
2274       parseRequiresClause(RequiresToken);
2275       break;
2276     }
2277     case tok::equal:
2278       if (!InTemplateParameterList)
2279         return true;
2280       nextToken();
2281       break;
2282     default:
2283       return true;
2284     }
2285   }
2286
2287   FormatTok->setFinalizedType(TT_LambdaLBrace);
2288   LSquare.setFinalizedType(TT_LambdaLSquare);
2289
2290   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2291   parseChildBlock();
2292   assert(!NestedLambdas.empty());
2293   NestedLambdas.pop_back();
2294
2295   return true;
2296 }
2297
2298 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2299   const FormatToken *Previous = FormatTok->Previous;
2300   const FormatToken *LeftSquare = FormatTok;
2301   nextToken();
2302   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2303                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2304                                         tok::kw_co_yield, tok::kw_co_return)) ||
2305                     Previous->closesScope())) ||
2306       LeftSquare->isCppStructuredBinding(Style)) {
2307     return false;
2308   }
2309   if (FormatTok->is(tok::l_square))
2310     return false;
2311   if (FormatTok->is(tok::r_square)) {
2312     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2313     if (Next->is(tok::greater))
2314       return false;
2315   }
2316   parseSquare(/*LambdaIntroducer=*/true);
2317   return true;
2318 }
2319
2320 void UnwrappedLineParser::tryToParseJSFunction() {
2321   assert(FormatTok->is(Keywords.kw_function));
2322   if (FormatTok->is(Keywords.kw_async))
2323     nextToken();
2324   // Consume "function".
2325   nextToken();
2326
2327   // Consume * (generator function). Treat it like C++'s overloaded operators.
2328   if (FormatTok->is(tok::star)) {
2329     FormatTok->setFinalizedType(TT_OverloadedOperator);
2330     nextToken();
2331   }
2332
2333   // Consume function name.
2334   if (FormatTok->is(tok::identifier))
2335     nextToken();
2336
2337   if (FormatTok->isNot(tok::l_paren))
2338     return;
2339
2340   // Parse formal parameter list.
2341   parseParens();
2342
2343   if (FormatTok->is(tok::colon)) {
2344     // Parse a type definition.
2345     nextToken();
2346
2347     // Eat the type declaration. For braced inline object types, balance braces,
2348     // otherwise just parse until finding an l_brace for the function body.
2349     if (FormatTok->is(tok::l_brace))
2350       tryToParseBracedList();
2351     else
2352       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2353         nextToken();
2354   }
2355
2356   if (FormatTok->is(tok::semi))
2357     return;
2358
2359   parseChildBlock();
2360 }
2361
2362 bool UnwrappedLineParser::tryToParseBracedList() {
2363   if (FormatTok->is(BK_Unknown))
2364     calculateBraceTypes();
2365   assert(FormatTok->isNot(BK_Unknown));
2366   if (FormatTok->is(BK_Block))
2367     return false;
2368   nextToken();
2369   parseBracedList();
2370   return true;
2371 }
2372
2373 bool UnwrappedLineParser::tryToParseChildBlock() {
2374   assert(Style.isJavaScript() || Style.isCSharp());
2375   assert(FormatTok->is(TT_FatArrow));
2376   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2377   // They always start an expression or a child block if followed by a curly
2378   // brace.
2379   nextToken();
2380   if (FormatTok->isNot(tok::l_brace))
2381     return false;
2382   parseChildBlock();
2383   return true;
2384 }
2385
2386 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2387   bool HasError = false;
2388
2389   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2390   // replace this by using parseAssignmentExpression() inside.
2391   do {
2392     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2393         tryToParseChildBlock()) {
2394       continue;
2395     }
2396     if (Style.isJavaScript()) {
2397       if (FormatTok->is(Keywords.kw_function)) {
2398         tryToParseJSFunction();
2399         continue;
2400       }
2401       if (FormatTok->is(tok::l_brace)) {
2402         // Could be a method inside of a braced list `{a() { return 1; }}`.
2403         if (tryToParseBracedList())
2404           continue;
2405         parseChildBlock();
2406       }
2407     }
2408     if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2409       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2410         addUnwrappedLine();
2411       nextToken();
2412       return !HasError;
2413     }
2414     switch (FormatTok->Tok.getKind()) {
2415     case tok::l_square:
2416       if (Style.isCSharp())
2417         parseSquare();
2418       else
2419         tryToParseLambda();
2420       break;
2421     case tok::l_paren:
2422       parseParens();
2423       // JavaScript can just have free standing methods and getters/setters in
2424       // object literals. Detect them by a "{" following ")".
2425       if (Style.isJavaScript()) {
2426         if (FormatTok->is(tok::l_brace))
2427           parseChildBlock();
2428         break;
2429       }
2430       break;
2431     case tok::l_brace:
2432       // Assume there are no blocks inside a braced init list apart
2433       // from the ones we explicitly parse out (like lambdas).
2434       FormatTok->setBlockKind(BK_BracedInit);
2435       nextToken();
2436       parseBracedList();
2437       break;
2438     case tok::less:
2439       nextToken();
2440       if (IsAngleBracket)
2441         parseBracedList(/*IsAngleBracket=*/true);
2442       break;
2443     case tok::semi:
2444       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2445       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2446       // used for error recovery if we have otherwise determined that this is
2447       // a braced list.
2448       if (Style.isJavaScript()) {
2449         nextToken();
2450         break;
2451       }
2452       HasError = true;
2453       if (!IsEnum)
2454         return false;
2455       nextToken();
2456       break;
2457     case tok::comma:
2458       nextToken();
2459       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2460         addUnwrappedLine();
2461       break;
2462     default:
2463       nextToken();
2464       break;
2465     }
2466   } while (!eof());
2467   return false;
2468 }
2469
2470 /// \brief Parses a pair of parentheses (and everything between them).
2471 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2472 /// double ampersands. This applies for all nested scopes as well.
2473 ///
2474 /// Returns whether there is a `=` token between the parentheses.
2475 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2476   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2477   auto *LeftParen = FormatTok;
2478   bool SeenEqual = false;
2479   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2480   nextToken();
2481   do {
2482     switch (FormatTok->Tok.getKind()) {
2483     case tok::l_paren:
2484       if (parseParens(AmpAmpTokenType))
2485         SeenEqual = true;
2486       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2487         parseChildBlock();
2488       break;
2489     case tok::r_paren:
2490       if (!MightBeStmtExpr &&
2491           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2492         const auto *Prev = LeftParen->Previous;
2493         const auto *Next = Tokens->peekNextToken();
2494         const bool DoubleParens =
2495             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2496         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2497         const bool Blacklisted =
2498             PrevPrev &&
2499             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2500              (SeenEqual &&
2501               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2502                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2503         const bool ReturnParens =
2504             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2505             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2506              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2507             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2508             Next->is(tok::semi);
2509         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2510           LeftParen->Optional = true;
2511           FormatTok->Optional = true;
2512         }
2513       }
2514       nextToken();
2515       return SeenEqual;
2516     case tok::r_brace:
2517       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2518       return SeenEqual;
2519     case tok::l_square:
2520       tryToParseLambda();
2521       break;
2522     case tok::l_brace:
2523       if (!tryToParseBracedList())
2524         parseChildBlock();
2525       break;
2526     case tok::at:
2527       nextToken();
2528       if (FormatTok->is(tok::l_brace)) {
2529         nextToken();
2530         parseBracedList();
2531       }
2532       break;
2533     case tok::equal:
2534       SeenEqual = true;
2535       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2536         tryToParseChildBlock();
2537       else
2538         nextToken();
2539       break;
2540     case tok::kw_class:
2541       if (Style.isJavaScript())
2542         parseRecord(/*ParseAsExpr=*/true);
2543       else
2544         nextToken();
2545       break;
2546     case tok::identifier:
2547       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2548         tryToParseJSFunction();
2549       else
2550         nextToken();
2551       break;
2552     case tok::kw_requires: {
2553       auto RequiresToken = FormatTok;
2554       nextToken();
2555       parseRequiresExpression(RequiresToken);
2556       break;
2557     }
2558     case tok::ampamp:
2559       if (AmpAmpTokenType != TT_Unknown)
2560         FormatTok->setFinalizedType(AmpAmpTokenType);
2561       [[fallthrough]];
2562     default:
2563       nextToken();
2564       break;
2565     }
2566   } while (!eof());
2567   return SeenEqual;
2568 }
2569
2570 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2571   if (!LambdaIntroducer) {
2572     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2573     if (tryToParseLambda())
2574       return;
2575   }
2576   do {
2577     switch (FormatTok->Tok.getKind()) {
2578     case tok::l_paren:
2579       parseParens();
2580       break;
2581     case tok::r_square:
2582       nextToken();
2583       return;
2584     case tok::r_brace:
2585       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2586       return;
2587     case tok::l_square:
2588       parseSquare();
2589       break;
2590     case tok::l_brace: {
2591       if (!tryToParseBracedList())
2592         parseChildBlock();
2593       break;
2594     }
2595     case tok::at:
2596       nextToken();
2597       if (FormatTok->is(tok::l_brace)) {
2598         nextToken();
2599         parseBracedList();
2600       }
2601       break;
2602     default:
2603       nextToken();
2604       break;
2605     }
2606   } while (!eof());
2607 }
2608
2609 void UnwrappedLineParser::keepAncestorBraces() {
2610   if (!Style.RemoveBracesLLVM)
2611     return;
2612
2613   const int MaxNestingLevels = 2;
2614   const int Size = NestedTooDeep.size();
2615   if (Size >= MaxNestingLevels)
2616     NestedTooDeep[Size - MaxNestingLevels] = true;
2617   NestedTooDeep.push_back(false);
2618 }
2619
2620 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2621   for (const auto &Token : llvm::reverse(Line.Tokens))
2622     if (Token.Tok->isNot(tok::comment))
2623       return Token.Tok;
2624
2625   return nullptr;
2626 }
2627
2628 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2629   FormatToken *Tok = nullptr;
2630
2631   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2632       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2633     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2634               ? getLastNonComment(*Line)
2635               : Line->Tokens.back().Tok;
2636     assert(Tok);
2637     if (Tok->BraceCount < 0) {
2638       assert(Tok->BraceCount == -1);
2639       Tok = nullptr;
2640     } else {
2641       Tok->BraceCount = -1;
2642     }
2643   }
2644
2645   addUnwrappedLine();
2646   ++Line->Level;
2647   parseStructuralElement();
2648
2649   if (Tok) {
2650     assert(!Line->InPPDirective);
2651     Tok = nullptr;
2652     for (const auto &L : llvm::reverse(*CurrentLines)) {
2653       if (!L.InPPDirective && getLastNonComment(L)) {
2654         Tok = L.Tokens.back().Tok;
2655         break;
2656       }
2657     }
2658     assert(Tok);
2659     ++Tok->BraceCount;
2660   }
2661
2662   if (CheckEOF && eof())
2663     addUnwrappedLine();
2664
2665   --Line->Level;
2666 }
2667
2668 static void markOptionalBraces(FormatToken *LeftBrace) {
2669   if (!LeftBrace)
2670     return;
2671
2672   assert(LeftBrace->is(tok::l_brace));
2673
2674   FormatToken *RightBrace = LeftBrace->MatchingParen;
2675   if (!RightBrace) {
2676     assert(!LeftBrace->Optional);
2677     return;
2678   }
2679
2680   assert(RightBrace->is(tok::r_brace));
2681   assert(RightBrace->MatchingParen == LeftBrace);
2682   assert(LeftBrace->Optional == RightBrace->Optional);
2683
2684   LeftBrace->Optional = true;
2685   RightBrace->Optional = true;
2686 }
2687
2688 void UnwrappedLineParser::handleAttributes() {
2689   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2690   if (FormatTok->isAttribute())
2691     nextToken();
2692   else if (FormatTok->is(tok::l_square))
2693     handleCppAttributes();
2694 }
2695
2696 bool UnwrappedLineParser::handleCppAttributes() {
2697   // Handle [[likely]] / [[unlikely]] attributes.
2698   assert(FormatTok->is(tok::l_square));
2699   if (!tryToParseSimpleAttribute())
2700     return false;
2701   parseSquare();
2702   return true;
2703 }
2704
2705 /// Returns whether \c Tok begins a block.
2706 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2707   // FIXME: rename the function or make
2708   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2709   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2710                            : Tok.is(tok::l_brace);
2711 }
2712
2713 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2714                                                   bool KeepBraces,
2715                                                   bool IsVerilogAssert) {
2716   assert((FormatTok->is(tok::kw_if) ||
2717           (Style.isVerilog() &&
2718            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2719                               Keywords.kw_assume, Keywords.kw_cover))) &&
2720          "'if' expected");
2721   nextToken();
2722
2723   if (IsVerilogAssert) {
2724     // Handle `assert #0` and `assert final`.
2725     if (FormatTok->is(Keywords.kw_verilogHash)) {
2726       nextToken();
2727       if (FormatTok->is(tok::numeric_constant))
2728         nextToken();
2729     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2730                                   Keywords.kw_sequence)) {
2731       nextToken();
2732     }
2733   }
2734
2735   // Handle `if !consteval`.
2736   if (FormatTok->is(tok::exclaim))
2737     nextToken();
2738
2739   bool KeepIfBraces = true;
2740   if (FormatTok->is(tok::kw_consteval)) {
2741     nextToken();
2742   } else {
2743     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2744     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2745       nextToken();
2746     if (FormatTok->is(tok::l_paren)) {
2747       FormatTok->setFinalizedType(TT_ConditionLParen);
2748       parseParens();
2749     }
2750   }
2751   handleAttributes();
2752   // The then action is optional in Verilog assert statements.
2753   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2754     nextToken();
2755     addUnwrappedLine();
2756     return nullptr;
2757   }
2758
2759   bool NeedsUnwrappedLine = false;
2760   keepAncestorBraces();
2761
2762   FormatToken *IfLeftBrace = nullptr;
2763   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2764
2765   if (isBlockBegin(*FormatTok)) {
2766     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2767     IfLeftBrace = FormatTok;
2768     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2769     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2770                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2771     setPreviousRBraceType(TT_ControlStatementRBrace);
2772     if (Style.BraceWrapping.BeforeElse)
2773       addUnwrappedLine();
2774     else
2775       NeedsUnwrappedLine = true;
2776   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2777     addUnwrappedLine();
2778   } else {
2779     parseUnbracedBody();
2780   }
2781
2782   if (Style.RemoveBracesLLVM) {
2783     assert(!NestedTooDeep.empty());
2784     KeepIfBraces = KeepIfBraces ||
2785                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2786                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2787                    IfBlockKind == IfStmtKind::IfElseIf;
2788   }
2789
2790   bool KeepElseBraces = KeepIfBraces;
2791   FormatToken *ElseLeftBrace = nullptr;
2792   IfStmtKind Kind = IfStmtKind::IfOnly;
2793
2794   if (FormatTok->is(tok::kw_else)) {
2795     if (Style.RemoveBracesLLVM) {
2796       NestedTooDeep.back() = false;
2797       Kind = IfStmtKind::IfElse;
2798     }
2799     nextToken();
2800     handleAttributes();
2801     if (isBlockBegin(*FormatTok)) {
2802       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2803       FormatTok->setFinalizedType(TT_ElseLBrace);
2804       ElseLeftBrace = FormatTok;
2805       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2806       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2807       FormatToken *IfLBrace =
2808           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2809                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2810       setPreviousRBraceType(TT_ElseRBrace);
2811       if (FormatTok->is(tok::kw_else)) {
2812         KeepElseBraces = KeepElseBraces ||
2813                          ElseBlockKind == IfStmtKind::IfOnly ||
2814                          ElseBlockKind == IfStmtKind::IfElseIf;
2815       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2816         KeepElseBraces = true;
2817         assert(ElseLeftBrace->MatchingParen);
2818         markOptionalBraces(ElseLeftBrace);
2819       }
2820       addUnwrappedLine();
2821     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2822       const FormatToken *Previous = Tokens->getPreviousToken();
2823       assert(Previous);
2824       const bool IsPrecededByComment = Previous->is(tok::comment);
2825       if (IsPrecededByComment) {
2826         addUnwrappedLine();
2827         ++Line->Level;
2828       }
2829       bool TooDeep = true;
2830       if (Style.RemoveBracesLLVM) {
2831         Kind = IfStmtKind::IfElseIf;
2832         TooDeep = NestedTooDeep.pop_back_val();
2833       }
2834       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2835       if (Style.RemoveBracesLLVM)
2836         NestedTooDeep.push_back(TooDeep);
2837       if (IsPrecededByComment)
2838         --Line->Level;
2839     } else {
2840       parseUnbracedBody(/*CheckEOF=*/true);
2841     }
2842   } else {
2843     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2844     if (NeedsUnwrappedLine)
2845       addUnwrappedLine();
2846   }
2847
2848   if (!Style.RemoveBracesLLVM)
2849     return nullptr;
2850
2851   assert(!NestedTooDeep.empty());
2852   KeepElseBraces = KeepElseBraces ||
2853                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2854                    NestedTooDeep.back();
2855
2856   NestedTooDeep.pop_back();
2857
2858   if (!KeepIfBraces && !KeepElseBraces) {
2859     markOptionalBraces(IfLeftBrace);
2860     markOptionalBraces(ElseLeftBrace);
2861   } else if (IfLeftBrace) {
2862     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2863     if (IfRightBrace) {
2864       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2865       assert(!IfLeftBrace->Optional);
2866       assert(!IfRightBrace->Optional);
2867       IfLeftBrace->MatchingParen = nullptr;
2868       IfRightBrace->MatchingParen = nullptr;
2869     }
2870   }
2871
2872   if (IfKind)
2873     *IfKind = Kind;
2874
2875   return IfLeftBrace;
2876 }
2877
2878 void UnwrappedLineParser::parseTryCatch() {
2879   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2880   nextToken();
2881   bool NeedsUnwrappedLine = false;
2882   if (FormatTok->is(tok::colon)) {
2883     // We are in a function try block, what comes is an initializer list.
2884     nextToken();
2885
2886     // In case identifiers were removed by clang-tidy, what might follow is
2887     // multiple commas in sequence - before the first identifier.
2888     while (FormatTok->is(tok::comma))
2889       nextToken();
2890
2891     while (FormatTok->is(tok::identifier)) {
2892       nextToken();
2893       if (FormatTok->is(tok::l_paren))
2894         parseParens();
2895       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2896           FormatTok->is(tok::l_brace)) {
2897         do {
2898           nextToken();
2899         } while (FormatTok->isNot(tok::r_brace));
2900         nextToken();
2901       }
2902
2903       // In case identifiers were removed by clang-tidy, what might follow is
2904       // multiple commas in sequence - after the first identifier.
2905       while (FormatTok->is(tok::comma))
2906         nextToken();
2907     }
2908   }
2909   // Parse try with resource.
2910   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2911     parseParens();
2912
2913   keepAncestorBraces();
2914
2915   if (FormatTok->is(tok::l_brace)) {
2916     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2917     parseBlock();
2918     if (Style.BraceWrapping.BeforeCatch)
2919       addUnwrappedLine();
2920     else
2921       NeedsUnwrappedLine = true;
2922   } else if (FormatTok->isNot(tok::kw_catch)) {
2923     // The C++ standard requires a compound-statement after a try.
2924     // If there's none, we try to assume there's a structuralElement
2925     // and try to continue.
2926     addUnwrappedLine();
2927     ++Line->Level;
2928     parseStructuralElement();
2929     --Line->Level;
2930   }
2931   while (true) {
2932     if (FormatTok->is(tok::at))
2933       nextToken();
2934     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2935                              tok::kw___finally) ||
2936           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2937            FormatTok->is(Keywords.kw_finally)) ||
2938           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2939            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2940       break;
2941     }
2942     nextToken();
2943     while (FormatTok->isNot(tok::l_brace)) {
2944       if (FormatTok->is(tok::l_paren)) {
2945         parseParens();
2946         continue;
2947       }
2948       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2949         if (Style.RemoveBracesLLVM)
2950           NestedTooDeep.pop_back();
2951         return;
2952       }
2953       nextToken();
2954     }
2955     NeedsUnwrappedLine = false;
2956     Line->MustBeDeclaration = false;
2957     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2958     parseBlock();
2959     if (Style.BraceWrapping.BeforeCatch)
2960       addUnwrappedLine();
2961     else
2962       NeedsUnwrappedLine = true;
2963   }
2964
2965   if (Style.RemoveBracesLLVM)
2966     NestedTooDeep.pop_back();
2967
2968   if (NeedsUnwrappedLine)
2969     addUnwrappedLine();
2970 }
2971
2972 void UnwrappedLineParser::parseNamespace() {
2973   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2974          "'namespace' expected");
2975
2976   const FormatToken &InitialToken = *FormatTok;
2977   nextToken();
2978   if (InitialToken.is(TT_NamespaceMacro)) {
2979     parseParens();
2980   } else {
2981     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2982                               tok::l_square, tok::period, tok::l_paren) ||
2983            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2984       if (FormatTok->is(tok::l_square))
2985         parseSquare();
2986       else if (FormatTok->is(tok::l_paren))
2987         parseParens();
2988       else
2989         nextToken();
2990     }
2991   }
2992   if (FormatTok->is(tok::l_brace)) {
2993     FormatTok->setFinalizedType(TT_NamespaceLBrace);
2994
2995     if (ShouldBreakBeforeBrace(Style, InitialToken))
2996       addUnwrappedLine();
2997
2998     unsigned AddLevels =
2999         Style.NamespaceIndentation == FormatStyle::NI_All ||
3000                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3001                  DeclarationScopeStack.size() > 1)
3002             ? 1u
3003             : 0u;
3004     bool ManageWhitesmithsBraces =
3005         AddLevels == 0u &&
3006         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3007
3008     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3009     // the whole block.
3010     if (ManageWhitesmithsBraces)
3011       ++Line->Level;
3012
3013     // Munch the semicolon after a namespace. This is more common than one would
3014     // think. Putting the semicolon into its own line is very ugly.
3015     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3016                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3017                ManageWhitesmithsBraces);
3018
3019     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3020
3021     if (ManageWhitesmithsBraces)
3022       --Line->Level;
3023   }
3024   // FIXME: Add error handling.
3025 }
3026
3027 void UnwrappedLineParser::parseNew() {
3028   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3029   nextToken();
3030
3031   if (Style.isCSharp()) {
3032     do {
3033       // Handle constructor invocation, e.g. `new(field: value)`.
3034       if (FormatTok->is(tok::l_paren))
3035         parseParens();
3036
3037       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3038       if (FormatTok->is(tok::l_brace))
3039         parseBracedList();
3040
3041       if (FormatTok->isOneOf(tok::semi, tok::comma))
3042         return;
3043
3044       nextToken();
3045     } while (!eof());
3046   }
3047
3048   if (Style.Language != FormatStyle::LK_Java)
3049     return;
3050
3051   // In Java, we can parse everything up to the parens, which aren't optional.
3052   do {
3053     // There should not be a ;, { or } before the new's open paren.
3054     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3055       return;
3056
3057     // Consume the parens.
3058     if (FormatTok->is(tok::l_paren)) {
3059       parseParens();
3060
3061       // If there is a class body of an anonymous class, consume that as child.
3062       if (FormatTok->is(tok::l_brace))
3063         parseChildBlock();
3064       return;
3065     }
3066     nextToken();
3067   } while (!eof());
3068 }
3069
3070 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3071   keepAncestorBraces();
3072
3073   if (isBlockBegin(*FormatTok)) {
3074     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3075     FormatToken *LeftBrace = FormatTok;
3076     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3077     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3078                /*MunchSemi=*/true, KeepBraces);
3079     setPreviousRBraceType(TT_ControlStatementRBrace);
3080     if (!KeepBraces) {
3081       assert(!NestedTooDeep.empty());
3082       if (!NestedTooDeep.back())
3083         markOptionalBraces(LeftBrace);
3084     }
3085     if (WrapRightBrace)
3086       addUnwrappedLine();
3087   } else {
3088     parseUnbracedBody();
3089   }
3090
3091   if (!KeepBraces)
3092     NestedTooDeep.pop_back();
3093 }
3094
3095 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3096   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3097           (Style.isVerilog() &&
3098            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3099                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3100                               Keywords.kw_final, Keywords.kw_initial,
3101                               Keywords.kw_foreach, Keywords.kw_forever,
3102                               Keywords.kw_repeat))) &&
3103          "'for', 'while' or foreach macro expected");
3104   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3105                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3106
3107   nextToken();
3108   // JS' for await ( ...
3109   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3110     nextToken();
3111   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3112     nextToken();
3113   if (HasParens && FormatTok->is(tok::l_paren)) {
3114     // The type is only set for Verilog basically because we were afraid to
3115     // change the existing behavior for loops. See the discussion on D121756 for
3116     // details.
3117     if (Style.isVerilog())
3118       FormatTok->setFinalizedType(TT_ConditionLParen);
3119     parseParens();
3120   }
3121
3122   if (Style.isVerilog()) {
3123     // Event control.
3124     parseVerilogSensitivityList();
3125   } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3126              Tokens->getPreviousToken()->is(tok::r_paren)) {
3127     nextToken();
3128     addUnwrappedLine();
3129     return;
3130   }
3131
3132   handleAttributes();
3133   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3134 }
3135
3136 void UnwrappedLineParser::parseDoWhile() {
3137   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3138   nextToken();
3139
3140   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3141
3142   // FIXME: Add error handling.
3143   if (FormatTok->isNot(tok::kw_while)) {
3144     addUnwrappedLine();
3145     return;
3146   }
3147
3148   FormatTok->setFinalizedType(TT_DoWhile);
3149
3150   // If in Whitesmiths mode, the line with the while() needs to be indented
3151   // to the same level as the block.
3152   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3153     ++Line->Level;
3154
3155   nextToken();
3156   parseStructuralElement();
3157 }
3158
3159 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3160   nextToken();
3161   unsigned OldLineLevel = Line->Level;
3162   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3163     --Line->Level;
3164   if (LeftAlignLabel)
3165     Line->Level = 0;
3166
3167   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3168       FormatTok->is(tok::l_brace)) {
3169
3170     CompoundStatementIndenter Indenter(this, Line->Level,
3171                                        Style.BraceWrapping.AfterCaseLabel,
3172                                        Style.BraceWrapping.IndentBraces);
3173     parseBlock();
3174     if (FormatTok->is(tok::kw_break)) {
3175       if (Style.BraceWrapping.AfterControlStatement ==
3176           FormatStyle::BWACS_Always) {
3177         addUnwrappedLine();
3178         if (!Style.IndentCaseBlocks &&
3179             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3180           ++Line->Level;
3181         }
3182       }
3183       parseStructuralElement();
3184     }
3185     addUnwrappedLine();
3186   } else {
3187     if (FormatTok->is(tok::semi))
3188       nextToken();
3189     addUnwrappedLine();
3190   }
3191   Line->Level = OldLineLevel;
3192   if (FormatTok->isNot(tok::l_brace)) {
3193     parseStructuralElement();
3194     addUnwrappedLine();
3195   }
3196 }
3197
3198 void UnwrappedLineParser::parseCaseLabel() {
3199   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3200
3201   // FIXME: fix handling of complex expressions here.
3202   do {
3203     nextToken();
3204     if (FormatTok->is(tok::colon)) {
3205       FormatTok->setFinalizedType(TT_CaseLabelColon);
3206       break;
3207     }
3208   } while (!eof());
3209   parseLabel();
3210 }
3211
3212 void UnwrappedLineParser::parseSwitch() {
3213   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3214   nextToken();
3215   if (FormatTok->is(tok::l_paren))
3216     parseParens();
3217
3218   keepAncestorBraces();
3219
3220   if (FormatTok->is(tok::l_brace)) {
3221     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3222     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3223     parseBlock();
3224     setPreviousRBraceType(TT_ControlStatementRBrace);
3225     addUnwrappedLine();
3226   } else {
3227     addUnwrappedLine();
3228     ++Line->Level;
3229     parseStructuralElement();
3230     --Line->Level;
3231   }
3232
3233   if (Style.RemoveBracesLLVM)
3234     NestedTooDeep.pop_back();
3235 }
3236
3237 // Operators that can follow a C variable.
3238 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3239   switch (kind) {
3240   case tok::ampamp:
3241   case tok::ampequal:
3242   case tok::arrow:
3243   case tok::caret:
3244   case tok::caretequal:
3245   case tok::comma:
3246   case tok::ellipsis:
3247   case tok::equal:
3248   case tok::equalequal:
3249   case tok::exclaim:
3250   case tok::exclaimequal:
3251   case tok::greater:
3252   case tok::greaterequal:
3253   case tok::greatergreater:
3254   case tok::greatergreaterequal:
3255   case tok::l_paren:
3256   case tok::l_square:
3257   case tok::less:
3258   case tok::lessequal:
3259   case tok::lessless:
3260   case tok::lesslessequal:
3261   case tok::minus:
3262   case tok::minusequal:
3263   case tok::minusminus:
3264   case tok::percent:
3265   case tok::percentequal:
3266   case tok::period:
3267   case tok::pipe:
3268   case tok::pipeequal:
3269   case tok::pipepipe:
3270   case tok::plus:
3271   case tok::plusequal:
3272   case tok::plusplus:
3273   case tok::question:
3274   case tok::r_brace:
3275   case tok::r_paren:
3276   case tok::r_square:
3277   case tok::semi:
3278   case tok::slash:
3279   case tok::slashequal:
3280   case tok::star:
3281   case tok::starequal:
3282     return true;
3283   default:
3284     return false;
3285   }
3286 }
3287
3288 void UnwrappedLineParser::parseAccessSpecifier() {
3289   FormatToken *AccessSpecifierCandidate = FormatTok;
3290   nextToken();
3291   // Understand Qt's slots.
3292   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3293     nextToken();
3294   // Otherwise, we don't know what it is, and we'd better keep the next token.
3295   if (FormatTok->is(tok::colon)) {
3296     nextToken();
3297     addUnwrappedLine();
3298   } else if (FormatTok->isNot(tok::coloncolon) &&
3299              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3300     // Not a variable name nor namespace name.
3301     addUnwrappedLine();
3302   } else if (AccessSpecifierCandidate) {
3303     // Consider the access specifier to be a C identifier.
3304     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3305   }
3306 }
3307
3308 /// \brief Parses a requires, decides if it is a clause or an expression.
3309 /// \pre The current token has to be the requires keyword.
3310 /// \returns true if it parsed a clause.
3311 bool clang::format::UnwrappedLineParser::parseRequires() {
3312   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3313   auto RequiresToken = FormatTok;
3314
3315   // We try to guess if it is a requires clause, or a requires expression. For
3316   // that we first consume the keyword and check the next token.
3317   nextToken();
3318
3319   switch (FormatTok->Tok.getKind()) {
3320   case tok::l_brace:
3321     // This can only be an expression, never a clause.
3322     parseRequiresExpression(RequiresToken);
3323     return false;
3324   case tok::l_paren:
3325     // Clauses and expression can start with a paren, it's unclear what we have.
3326     break;
3327   default:
3328     // All other tokens can only be a clause.
3329     parseRequiresClause(RequiresToken);
3330     return true;
3331   }
3332
3333   // Looking forward we would have to decide if there are function declaration
3334   // like arguments to the requires expression:
3335   // requires (T t) {
3336   // Or there is a constraint expression for the requires clause:
3337   // requires (C<T> && ...
3338
3339   // But first let's look behind.
3340   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3341
3342   if (!PreviousNonComment ||
3343       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3344     // If there is no token, or an expression left brace, we are a requires
3345     // clause within a requires expression.
3346     parseRequiresClause(RequiresToken);
3347     return true;
3348   }
3349
3350   switch (PreviousNonComment->Tok.getKind()) {
3351   case tok::greater:
3352   case tok::r_paren:
3353   case tok::kw_noexcept:
3354   case tok::kw_const:
3355     // This is a requires clause.
3356     parseRequiresClause(RequiresToken);
3357     return true;
3358   case tok::amp:
3359   case tok::ampamp: {
3360     // This can be either:
3361     // if (... && requires (T t) ...)
3362     // Or
3363     // void member(...) && requires (C<T> ...
3364     // We check the one token before that for a const:
3365     // void member(...) const && requires (C<T> ...
3366     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3367     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3368       parseRequiresClause(RequiresToken);
3369       return true;
3370     }
3371     break;
3372   }
3373   default:
3374     if (PreviousNonComment->isTypeOrIdentifier()) {
3375       // This is a requires clause.
3376       parseRequiresClause(RequiresToken);
3377       return true;
3378     }
3379     // It's an expression.
3380     parseRequiresExpression(RequiresToken);
3381     return false;
3382   }
3383
3384   // Now we look forward and try to check if the paren content is a parameter
3385   // list. The parameters can be cv-qualified and contain references or
3386   // pointers.
3387   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3388   // of stuff: typename, const, *, &, &&, ::, identifiers.
3389
3390   unsigned StoredPosition = Tokens->getPosition();
3391   FormatToken *NextToken = Tokens->getNextToken();
3392   int Lookahead = 0;
3393   auto PeekNext = [&Lookahead, &NextToken, this] {
3394     ++Lookahead;
3395     NextToken = Tokens->getNextToken();
3396   };
3397
3398   bool FoundType = false;
3399   bool LastWasColonColon = false;
3400   int OpenAngles = 0;
3401
3402   for (; Lookahead < 50; PeekNext()) {
3403     switch (NextToken->Tok.getKind()) {
3404     case tok::kw_volatile:
3405     case tok::kw_const:
3406     case tok::comma:
3407       if (OpenAngles == 0) {
3408         FormatTok = Tokens->setPosition(StoredPosition);
3409         parseRequiresExpression(RequiresToken);
3410         return false;
3411       }
3412       break;
3413     case tok::r_paren:
3414     case tok::pipepipe:
3415       FormatTok = Tokens->setPosition(StoredPosition);
3416       parseRequiresClause(RequiresToken);
3417       return true;
3418     case tok::eof:
3419       // Break out of the loop.
3420       Lookahead = 50;
3421       break;
3422     case tok::coloncolon:
3423       LastWasColonColon = true;
3424       break;
3425     case tok::identifier:
3426       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3427         FormatTok = Tokens->setPosition(StoredPosition);
3428         parseRequiresExpression(RequiresToken);
3429         return false;
3430       }
3431       FoundType = true;
3432       LastWasColonColon = false;
3433       break;
3434     case tok::less:
3435       ++OpenAngles;
3436       break;
3437     case tok::greater:
3438       --OpenAngles;
3439       break;
3440     default:
3441       if (NextToken->isSimpleTypeSpecifier()) {
3442         FormatTok = Tokens->setPosition(StoredPosition);
3443         parseRequiresExpression(RequiresToken);
3444         return false;
3445       }
3446       break;
3447     }
3448   }
3449   // This seems to be a complicated expression, just assume it's a clause.
3450   FormatTok = Tokens->setPosition(StoredPosition);
3451   parseRequiresClause(RequiresToken);
3452   return true;
3453 }
3454
3455 /// \brief Parses a requires clause.
3456 /// \param RequiresToken The requires keyword token, which starts this clause.
3457 /// \pre We need to be on the next token after the requires keyword.
3458 /// \sa parseRequiresExpression
3459 ///
3460 /// Returns if it either has finished parsing the clause, or it detects, that
3461 /// the clause is incorrect.
3462 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3463   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3464   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3465
3466   // If there is no previous token, we are within a requires expression,
3467   // otherwise we will always have the template or function declaration in front
3468   // of it.
3469   bool InRequiresExpression =
3470       !RequiresToken->Previous ||
3471       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3472
3473   RequiresToken->setFinalizedType(InRequiresExpression
3474                                       ? TT_RequiresClauseInARequiresExpression
3475                                       : TT_RequiresClause);
3476
3477   // NOTE: parseConstraintExpression is only ever called from this function.
3478   // It could be inlined into here.
3479   parseConstraintExpression();
3480
3481   if (!InRequiresExpression)
3482     FormatTok->Previous->ClosesRequiresClause = true;
3483 }
3484
3485 /// \brief Parses a requires expression.
3486 /// \param RequiresToken The requires keyword token, which starts this clause.
3487 /// \pre We need to be on the next token after the requires keyword.
3488 /// \sa parseRequiresClause
3489 ///
3490 /// Returns if it either has finished parsing the expression, or it detects,
3491 /// that the expression is incorrect.
3492 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3493   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3494   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3495
3496   RequiresToken->setFinalizedType(TT_RequiresExpression);
3497
3498   if (FormatTok->is(tok::l_paren)) {
3499     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3500     parseParens();
3501   }
3502
3503   if (FormatTok->is(tok::l_brace)) {
3504     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3505     parseChildBlock();
3506   }
3507 }
3508
3509 /// \brief Parses a constraint expression.
3510 ///
3511 /// This is the body of a requires clause. It returns, when the parsing is
3512 /// complete, or the expression is incorrect.
3513 void UnwrappedLineParser::parseConstraintExpression() {
3514   // The special handling for lambdas is needed since tryToParseLambda() eats a
3515   // token and if a requires expression is the last part of a requires clause
3516   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3517   // not set on the correct token. Thus we need to be aware if we even expect a
3518   // lambda to be possible.
3519   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3520   bool LambdaNextTimeAllowed = true;
3521
3522   // Within lambda declarations, it is permitted to put a requires clause after
3523   // its template parameter list, which would place the requires clause right
3524   // before the parentheses of the parameters of the lambda declaration. Thus,
3525   // we track if we expect to see grouping parentheses at all.
3526   // Without this check, `requires foo<T> (T t)` in the below example would be
3527   // seen as the whole requires clause, accidentally eating the parameters of
3528   // the lambda.
3529   // [&]<typename T> requires foo<T> (T t) { ... };
3530   bool TopLevelParensAllowed = true;
3531
3532   do {
3533     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3534
3535     switch (FormatTok->Tok.getKind()) {
3536     case tok::kw_requires: {
3537       auto RequiresToken = FormatTok;
3538       nextToken();
3539       parseRequiresExpression(RequiresToken);
3540       break;
3541     }
3542
3543     case tok::l_paren:
3544       if (!TopLevelParensAllowed)
3545         return;
3546       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3547       TopLevelParensAllowed = false;
3548       break;
3549
3550     case tok::l_square:
3551       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3552         return;
3553       break;
3554
3555     case tok::kw_const:
3556     case tok::semi:
3557     case tok::kw_class:
3558     case tok::kw_struct:
3559     case tok::kw_union:
3560       return;
3561
3562     case tok::l_brace:
3563       // Potential function body.
3564       return;
3565
3566     case tok::ampamp:
3567     case tok::pipepipe:
3568       FormatTok->setFinalizedType(TT_BinaryOperator);
3569       nextToken();
3570       LambdaNextTimeAllowed = true;
3571       TopLevelParensAllowed = true;
3572       break;
3573
3574     case tok::comma:
3575     case tok::comment:
3576       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3577       nextToken();
3578       break;
3579
3580     case tok::kw_sizeof:
3581     case tok::greater:
3582     case tok::greaterequal:
3583     case tok::greatergreater:
3584     case tok::less:
3585     case tok::lessequal:
3586     case tok::lessless:
3587     case tok::equalequal:
3588     case tok::exclaim:
3589     case tok::exclaimequal:
3590     case tok::plus:
3591     case tok::minus:
3592     case tok::star:
3593     case tok::slash:
3594       LambdaNextTimeAllowed = true;
3595       TopLevelParensAllowed = true;
3596       // Just eat them.
3597       nextToken();
3598       break;
3599
3600     case tok::numeric_constant:
3601     case tok::coloncolon:
3602     case tok::kw_true:
3603     case tok::kw_false:
3604       TopLevelParensAllowed = false;
3605       // Just eat them.
3606       nextToken();
3607       break;
3608
3609     case tok::kw_static_cast:
3610     case tok::kw_const_cast:
3611     case tok::kw_reinterpret_cast:
3612     case tok::kw_dynamic_cast:
3613       nextToken();
3614       if (FormatTok->isNot(tok::less))
3615         return;
3616
3617       nextToken();
3618       parseBracedList(/*IsAngleBracket=*/true);
3619       break;
3620
3621     default:
3622       if (!FormatTok->Tok.getIdentifierInfo()) {
3623         // Identifiers are part of the default case, we check for more then
3624         // tok::identifier to handle builtin type traits.
3625         return;
3626       }
3627
3628       // We need to differentiate identifiers for a template deduction guide,
3629       // variables, or function return types (the constraint expression has
3630       // ended before that), and basically all other cases. But it's easier to
3631       // check the other way around.
3632       assert(FormatTok->Previous);
3633       switch (FormatTok->Previous->Tok.getKind()) {
3634       case tok::coloncolon:  // Nested identifier.
3635       case tok::ampamp:      // Start of a function or variable for the
3636       case tok::pipepipe:    // constraint expression. (binary)
3637       case tok::exclaim:     // The same as above, but unary.
3638       case tok::kw_requires: // Initial identifier of a requires clause.
3639       case tok::equal:       // Initial identifier of a concept declaration.
3640         break;
3641       default:
3642         return;
3643       }
3644
3645       // Read identifier with optional template declaration.
3646       nextToken();
3647       if (FormatTok->is(tok::less)) {
3648         nextToken();
3649         parseBracedList(/*IsAngleBracket=*/true);
3650       }
3651       TopLevelParensAllowed = false;
3652       break;
3653     }
3654   } while (!eof());
3655 }
3656
3657 bool UnwrappedLineParser::parseEnum() {
3658   const FormatToken &InitialToken = *FormatTok;
3659
3660   // Won't be 'enum' for NS_ENUMs.
3661   if (FormatTok->is(tok::kw_enum))
3662     nextToken();
3663
3664   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3665   // declarations. An "enum" keyword followed by a colon would be a syntax
3666   // error and thus assume it is just an identifier.
3667   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3668     return false;
3669
3670   // In protobuf, "enum" can be used as a field name.
3671   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3672     return false;
3673
3674   // Eat up enum class ...
3675   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3676     nextToken();
3677
3678   while (FormatTok->Tok.getIdentifierInfo() ||
3679          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3680                             tok::greater, tok::comma, tok::question,
3681                             tok::l_square, tok::r_square)) {
3682     if (Style.isVerilog()) {
3683       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3684       nextToken();
3685       // In Verilog the base type can have dimensions.
3686       while (FormatTok->is(tok::l_square))
3687         parseSquare();
3688     } else {
3689       nextToken();
3690     }
3691     // We can have macros or attributes in between 'enum' and the enum name.
3692     if (FormatTok->is(tok::l_paren))
3693       parseParens();
3694     assert(FormatTok->isNot(TT_AttributeSquare));
3695     if (FormatTok->is(tok::identifier)) {
3696       nextToken();
3697       // If there are two identifiers in a row, this is likely an elaborate
3698       // return type. In Java, this can be "implements", etc.
3699       if (Style.isCpp() && FormatTok->is(tok::identifier))
3700         return false;
3701     }
3702   }
3703
3704   // Just a declaration or something is wrong.
3705   if (FormatTok->isNot(tok::l_brace))
3706     return true;
3707   FormatTok->setFinalizedType(TT_EnumLBrace);
3708   FormatTok->setBlockKind(BK_Block);
3709
3710   if (Style.Language == FormatStyle::LK_Java) {
3711     // Java enums are different.
3712     parseJavaEnumBody();
3713     return true;
3714   }
3715   if (Style.Language == FormatStyle::LK_Proto) {
3716     parseBlock(/*MustBeDeclaration=*/true);
3717     return true;
3718   }
3719
3720   if (!Style.AllowShortEnumsOnASingleLine &&
3721       ShouldBreakBeforeBrace(Style, InitialToken)) {
3722     addUnwrappedLine();
3723   }
3724   // Parse enum body.
3725   nextToken();
3726   if (!Style.AllowShortEnumsOnASingleLine) {
3727     addUnwrappedLine();
3728     Line->Level += 1;
3729   }
3730   bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3731   if (!Style.AllowShortEnumsOnASingleLine)
3732     Line->Level -= 1;
3733   if (HasError) {
3734     if (FormatTok->is(tok::semi))
3735       nextToken();
3736     addUnwrappedLine();
3737   }
3738   setPreviousRBraceType(TT_EnumRBrace);
3739   return true;
3740
3741   // There is no addUnwrappedLine() here so that we fall through to parsing a
3742   // structural element afterwards. Thus, in "enum A {} n, m;",
3743   // "} n, m;" will end up in one unwrapped line.
3744 }
3745
3746 bool UnwrappedLineParser::parseStructLike() {
3747   // parseRecord falls through and does not yet add an unwrapped line as a
3748   // record declaration or definition can start a structural element.
3749   parseRecord();
3750   // This does not apply to Java, JavaScript and C#.
3751   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3752       Style.isCSharp()) {
3753     if (FormatTok->is(tok::semi))
3754       nextToken();
3755     addUnwrappedLine();
3756     return true;
3757   }
3758   return false;
3759 }
3760
3761 namespace {
3762 // A class used to set and restore the Token position when peeking
3763 // ahead in the token source.
3764 class ScopedTokenPosition {
3765   unsigned StoredPosition;
3766   FormatTokenSource *Tokens;
3767
3768 public:
3769   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3770     assert(Tokens && "Tokens expected to not be null");
3771     StoredPosition = Tokens->getPosition();
3772   }
3773
3774   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3775 };
3776 } // namespace
3777
3778 // Look to see if we have [[ by looking ahead, if
3779 // its not then rewind to the original position.
3780 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3781   ScopedTokenPosition AutoPosition(Tokens);
3782   FormatToken *Tok = Tokens->getNextToken();
3783   // We already read the first [ check for the second.
3784   if (Tok->isNot(tok::l_square))
3785     return false;
3786   // Double check that the attribute is just something
3787   // fairly simple.
3788   while (Tok->isNot(tok::eof)) {
3789     if (Tok->is(tok::r_square))
3790       break;
3791     Tok = Tokens->getNextToken();
3792   }
3793   if (Tok->is(tok::eof))
3794     return false;
3795   Tok = Tokens->getNextToken();
3796   if (Tok->isNot(tok::r_square))
3797     return false;
3798   Tok = Tokens->getNextToken();
3799   if (Tok->is(tok::semi))
3800     return false;
3801   return true;
3802 }
3803
3804 void UnwrappedLineParser::parseJavaEnumBody() {
3805   assert(FormatTok->is(tok::l_brace));
3806   const FormatToken *OpeningBrace = FormatTok;
3807
3808   // Determine whether the enum is simple, i.e. does not have a semicolon or
3809   // constants with class bodies. Simple enums can be formatted like braced
3810   // lists, contracted to a single line, etc.
3811   unsigned StoredPosition = Tokens->getPosition();
3812   bool IsSimple = true;
3813   FormatToken *Tok = Tokens->getNextToken();
3814   while (Tok->isNot(tok::eof)) {
3815     if (Tok->is(tok::r_brace))
3816       break;
3817     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3818       IsSimple = false;
3819       break;
3820     }
3821     // FIXME: This will also mark enums with braces in the arguments to enum
3822     // constants as "not simple". This is probably fine in practice, though.
3823     Tok = Tokens->getNextToken();
3824   }
3825   FormatTok = Tokens->setPosition(StoredPosition);
3826
3827   if (IsSimple) {
3828     nextToken();
3829     parseBracedList();
3830     addUnwrappedLine();
3831     return;
3832   }
3833
3834   // Parse the body of a more complex enum.
3835   // First add a line for everything up to the "{".
3836   nextToken();
3837   addUnwrappedLine();
3838   ++Line->Level;
3839
3840   // Parse the enum constants.
3841   while (!eof()) {
3842     if (FormatTok->is(tok::l_brace)) {
3843       // Parse the constant's class body.
3844       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3845                  /*MunchSemi=*/false);
3846     } else if (FormatTok->is(tok::l_paren)) {
3847       parseParens();
3848     } else if (FormatTok->is(tok::comma)) {
3849       nextToken();
3850       addUnwrappedLine();
3851     } else if (FormatTok->is(tok::semi)) {
3852       nextToken();
3853       addUnwrappedLine();
3854       break;
3855     } else if (FormatTok->is(tok::r_brace)) {
3856       addUnwrappedLine();
3857       break;
3858     } else {
3859       nextToken();
3860     }
3861   }
3862
3863   // Parse the class body after the enum's ";" if any.
3864   parseLevel(OpeningBrace);
3865   nextToken();
3866   --Line->Level;
3867   addUnwrappedLine();
3868 }
3869
3870 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3871   const FormatToken &InitialToken = *FormatTok;
3872   nextToken();
3873
3874   // The actual identifier can be a nested name specifier, and in macros
3875   // it is often token-pasted.
3876   // An [[attribute]] can be before the identifier.
3877   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3878                             tok::kw_alignas, tok::l_square) ||
3879          FormatTok->isAttribute() ||
3880          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3881           FormatTok->isOneOf(tok::period, tok::comma))) {
3882     if (Style.isJavaScript() &&
3883         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3884       // JavaScript/TypeScript supports inline object types in
3885       // extends/implements positions:
3886       //     class Foo implements {bar: number} { }
3887       nextToken();
3888       if (FormatTok->is(tok::l_brace)) {
3889         tryToParseBracedList();
3890         continue;
3891       }
3892     }
3893     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3894       continue;
3895     bool IsNonMacroIdentifier =
3896         FormatTok->is(tok::identifier) &&
3897         FormatTok->TokenText != FormatTok->TokenText.upper();
3898     nextToken();
3899     // We can have macros in between 'class' and the class name.
3900     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3901       parseParens();
3902   }
3903
3904   // Note that parsing away template declarations here leads to incorrectly
3905   // accepting function declarations as record declarations.
3906   // In general, we cannot solve this problem. Consider:
3907   // class A<int> B() {}
3908   // which can be a function definition or a class definition when B() is a
3909   // macro. If we find enough real-world cases where this is a problem, we
3910   // can parse for the 'template' keyword in the beginning of the statement,
3911   // and thus rule out the record production in case there is no template
3912   // (this would still leave us with an ambiguity between template function
3913   // and class declarations).
3914   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3915     do {
3916       if (FormatTok->is(tok::l_brace)) {
3917         calculateBraceTypes(/*ExpectClassBody=*/true);
3918         if (!tryToParseBracedList())
3919           break;
3920       }
3921       if (FormatTok->is(tok::l_square)) {
3922         FormatToken *Previous = FormatTok->Previous;
3923         if (!Previous ||
3924             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3925           // Don't try parsing a lambda if we had a closing parenthesis before,
3926           // it was probably a pointer to an array: int (*)[].
3927           if (!tryToParseLambda())
3928             continue;
3929         } else {
3930           parseSquare();
3931           continue;
3932         }
3933       }
3934       if (FormatTok->is(tok::semi))
3935         return;
3936       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3937         addUnwrappedLine();
3938         nextToken();
3939         parseCSharpGenericTypeConstraint();
3940         break;
3941       }
3942       nextToken();
3943     } while (!eof());
3944   }
3945
3946   auto GetBraceTypes =
3947       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3948     switch (RecordTok.Tok.getKind()) {
3949     case tok::kw_class:
3950       return {TT_ClassLBrace, TT_ClassRBrace};
3951     case tok::kw_struct:
3952       return {TT_StructLBrace, TT_StructRBrace};
3953     case tok::kw_union:
3954       return {TT_UnionLBrace, TT_UnionRBrace};
3955     default:
3956       // Useful for e.g. interface.
3957       return {TT_RecordLBrace, TT_RecordRBrace};
3958     }
3959   };
3960   if (FormatTok->is(tok::l_brace)) {
3961     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3962     FormatTok->setFinalizedType(OpenBraceType);
3963     if (ParseAsExpr) {
3964       parseChildBlock();
3965     } else {
3966       if (ShouldBreakBeforeBrace(Style, InitialToken))
3967         addUnwrappedLine();
3968
3969       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3970       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3971     }
3972     setPreviousRBraceType(ClosingBraceType);
3973   }
3974   // There is no addUnwrappedLine() here so that we fall through to parsing a
3975   // structural element afterwards. Thus, in "class A {} n, m;",
3976   // "} n, m;" will end up in one unwrapped line.
3977 }
3978
3979 void UnwrappedLineParser::parseObjCMethod() {
3980   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3981          "'(' or identifier expected.");
3982   do {
3983     if (FormatTok->is(tok::semi)) {
3984       nextToken();
3985       addUnwrappedLine();
3986       return;
3987     } else if (FormatTok->is(tok::l_brace)) {
3988       if (Style.BraceWrapping.AfterFunction)
3989         addUnwrappedLine();
3990       parseBlock();
3991       addUnwrappedLine();
3992       return;
3993     } else {
3994       nextToken();
3995     }
3996   } while (!eof());
3997 }
3998
3999 void UnwrappedLineParser::parseObjCProtocolList() {
4000   assert(FormatTok->is(tok::less) && "'<' expected.");
4001   do {
4002     nextToken();
4003     // Early exit in case someone forgot a close angle.
4004     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4005         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4006       return;
4007     }
4008   } while (!eof() && FormatTok->isNot(tok::greater));
4009   nextToken(); // Skip '>'.
4010 }
4011
4012 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4013   do {
4014     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4015       nextToken();
4016       addUnwrappedLine();
4017       break;
4018     }
4019     if (FormatTok->is(tok::l_brace)) {
4020       parseBlock();
4021       // In ObjC interfaces, nothing should be following the "}".
4022       addUnwrappedLine();
4023     } else if (FormatTok->is(tok::r_brace)) {
4024       // Ignore stray "}". parseStructuralElement doesn't consume them.
4025       nextToken();
4026       addUnwrappedLine();
4027     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4028       nextToken();
4029       parseObjCMethod();
4030     } else {
4031       parseStructuralElement();
4032     }
4033   } while (!eof());
4034 }
4035
4036 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4037   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4038          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4039   nextToken();
4040   nextToken(); // interface name
4041
4042   // @interface can be followed by a lightweight generic
4043   // specialization list, then either a base class or a category.
4044   if (FormatTok->is(tok::less))
4045     parseObjCLightweightGenerics();
4046   if (FormatTok->is(tok::colon)) {
4047     nextToken();
4048     nextToken(); // base class name
4049     // The base class can also have lightweight generics applied to it.
4050     if (FormatTok->is(tok::less))
4051       parseObjCLightweightGenerics();
4052   } else if (FormatTok->is(tok::l_paren)) {
4053     // Skip category, if present.
4054     parseParens();
4055   }
4056
4057   if (FormatTok->is(tok::less))
4058     parseObjCProtocolList();
4059
4060   if (FormatTok->is(tok::l_brace)) {
4061     if (Style.BraceWrapping.AfterObjCDeclaration)
4062       addUnwrappedLine();
4063     parseBlock(/*MustBeDeclaration=*/true);
4064   }
4065
4066   // With instance variables, this puts '}' on its own line.  Without instance
4067   // variables, this ends the @interface line.
4068   addUnwrappedLine();
4069
4070   parseObjCUntilAtEnd();
4071 }
4072
4073 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4074   assert(FormatTok->is(tok::less));
4075   // Unlike protocol lists, generic parameterizations support
4076   // nested angles:
4077   //
4078   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4079   //     NSObject <NSCopying, NSSecureCoding>
4080   //
4081   // so we need to count how many open angles we have left.
4082   unsigned NumOpenAngles = 1;
4083   do {
4084     nextToken();
4085     // Early exit in case someone forgot a close angle.
4086     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4087         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4088       break;
4089     }
4090     if (FormatTok->is(tok::less)) {
4091       ++NumOpenAngles;
4092     } else if (FormatTok->is(tok::greater)) {
4093       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4094       --NumOpenAngles;
4095     }
4096   } while (!eof() && NumOpenAngles != 0);
4097   nextToken(); // Skip '>'.
4098 }
4099
4100 // Returns true for the declaration/definition form of @protocol,
4101 // false for the expression form.
4102 bool UnwrappedLineParser::parseObjCProtocol() {
4103   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4104   nextToken();
4105
4106   if (FormatTok->is(tok::l_paren)) {
4107     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4108     return false;
4109   }
4110
4111   // The definition/declaration form,
4112   // @protocol Foo
4113   // - (int)someMethod;
4114   // @end
4115
4116   nextToken(); // protocol name
4117
4118   if (FormatTok->is(tok::less))
4119     parseObjCProtocolList();
4120
4121   // Check for protocol declaration.
4122   if (FormatTok->is(tok::semi)) {
4123     nextToken();
4124     addUnwrappedLine();
4125     return true;
4126   }
4127
4128   addUnwrappedLine();
4129   parseObjCUntilAtEnd();
4130   return true;
4131 }
4132
4133 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4134   bool IsImport = FormatTok->is(Keywords.kw_import);
4135   assert(IsImport || FormatTok->is(tok::kw_export));
4136   nextToken();
4137
4138   // Consume the "default" in "export default class/function".
4139   if (FormatTok->is(tok::kw_default))
4140     nextToken();
4141
4142   // Consume "async function", "function" and "default function", so that these
4143   // get parsed as free-standing JS functions, i.e. do not require a trailing
4144   // semicolon.
4145   if (FormatTok->is(Keywords.kw_async))
4146     nextToken();
4147   if (FormatTok->is(Keywords.kw_function)) {
4148     nextToken();
4149     return;
4150   }
4151
4152   // For imports, `export *`, `export {...}`, consume the rest of the line up
4153   // to the terminating `;`. For everything else, just return and continue
4154   // parsing the structural element, i.e. the declaration or expression for
4155   // `export default`.
4156   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4157       !FormatTok->isStringLiteral() &&
4158       !(FormatTok->is(Keywords.kw_type) &&
4159         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4160     return;
4161   }
4162
4163   while (!eof()) {
4164     if (FormatTok->is(tok::semi))
4165       return;
4166     if (Line->Tokens.empty()) {
4167       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4168       // import statement should terminate.
4169       return;
4170     }
4171     if (FormatTok->is(tok::l_brace)) {
4172       FormatTok->setBlockKind(BK_Block);
4173       nextToken();
4174       parseBracedList();
4175     } else {
4176       nextToken();
4177     }
4178   }
4179 }
4180
4181 void UnwrappedLineParser::parseStatementMacro() {
4182   nextToken();
4183   if (FormatTok->is(tok::l_paren))
4184     parseParens();
4185   if (FormatTok->is(tok::semi))
4186     nextToken();
4187   addUnwrappedLine();
4188 }
4189
4190 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4191   // consume things like a::`b.c[d:e] or a::*
4192   while (true) {
4193     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4194                            tok::coloncolon, tok::hash) ||
4195         Keywords.isVerilogIdentifier(*FormatTok)) {
4196       nextToken();
4197     } else if (FormatTok->is(tok::l_square)) {
4198       parseSquare();
4199     } else {
4200       break;
4201     }
4202   }
4203 }
4204
4205 void UnwrappedLineParser::parseVerilogSensitivityList() {
4206   if (FormatTok->isNot(tok::at))
4207     return;
4208   nextToken();
4209   // A block event expression has 2 at signs.
4210   if (FormatTok->is(tok::at))
4211     nextToken();
4212   switch (FormatTok->Tok.getKind()) {
4213   case tok::star:
4214     nextToken();
4215     break;
4216   case tok::l_paren:
4217     parseParens();
4218     break;
4219   default:
4220     parseVerilogHierarchyIdentifier();
4221     break;
4222   }
4223 }
4224
4225 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4226   unsigned AddLevels = 0;
4227
4228   if (FormatTok->is(Keywords.kw_clocking)) {
4229     nextToken();
4230     if (Keywords.isVerilogIdentifier(*FormatTok))
4231       nextToken();
4232     parseVerilogSensitivityList();
4233     if (FormatTok->is(tok::semi))
4234       nextToken();
4235   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4236                                 Keywords.kw_casez, Keywords.kw_randcase,
4237                                 Keywords.kw_randsequence)) {
4238     if (Style.IndentCaseLabels)
4239       AddLevels++;
4240     nextToken();
4241     if (FormatTok->is(tok::l_paren)) {
4242       FormatTok->setFinalizedType(TT_ConditionLParen);
4243       parseParens();
4244     }
4245     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4246       nextToken();
4247     // The case header has no semicolon.
4248   } else {
4249     // "module" etc.
4250     nextToken();
4251     // all the words like the name of the module and specifiers like
4252     // "automatic" and the width of function return type
4253     while (true) {
4254       if (FormatTok->is(tok::l_square)) {
4255         auto Prev = FormatTok->getPreviousNonComment();
4256         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4257           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4258         parseSquare();
4259       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4260                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4261         nextToken();
4262       } else {
4263         break;
4264       }
4265     }
4266
4267     auto NewLine = [this]() {
4268       addUnwrappedLine();
4269       Line->IsContinuation = true;
4270     };
4271
4272     // package imports
4273     while (FormatTok->is(Keywords.kw_import)) {
4274       NewLine();
4275       nextToken();
4276       parseVerilogHierarchyIdentifier();
4277       if (FormatTok->is(tok::semi))
4278         nextToken();
4279     }
4280
4281     // parameters and ports
4282     if (FormatTok->is(Keywords.kw_verilogHash)) {
4283       NewLine();
4284       nextToken();
4285       if (FormatTok->is(tok::l_paren)) {
4286         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4287         parseParens();
4288       }
4289     }
4290     if (FormatTok->is(tok::l_paren)) {
4291       NewLine();
4292       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4293       parseParens();
4294     }
4295
4296     // extends and implements
4297     if (FormatTok->is(Keywords.kw_extends)) {
4298       NewLine();
4299       nextToken();
4300       parseVerilogHierarchyIdentifier();
4301       if (FormatTok->is(tok::l_paren))
4302         parseParens();
4303     }
4304     if (FormatTok->is(Keywords.kw_implements)) {
4305       NewLine();
4306       do {
4307         nextToken();
4308         parseVerilogHierarchyIdentifier();
4309       } while (FormatTok->is(tok::comma));
4310     }
4311
4312     // Coverage event for cover groups.
4313     if (FormatTok->is(tok::at)) {
4314       NewLine();
4315       parseVerilogSensitivityList();
4316     }
4317
4318     if (FormatTok->is(tok::semi))
4319       nextToken(/*LevelDifference=*/1);
4320     addUnwrappedLine();
4321   }
4322
4323   return AddLevels;
4324 }
4325
4326 void UnwrappedLineParser::parseVerilogTable() {
4327   assert(FormatTok->is(Keywords.kw_table));
4328   nextToken(/*LevelDifference=*/1);
4329   addUnwrappedLine();
4330
4331   auto InitialLevel = Line->Level++;
4332   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4333     FormatToken *Tok = FormatTok;
4334     nextToken();
4335     if (Tok->is(tok::semi))
4336       addUnwrappedLine();
4337     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4338       Tok->setFinalizedType(TT_VerilogTableItem);
4339   }
4340   Line->Level = InitialLevel;
4341   nextToken(/*LevelDifference=*/-1);
4342   addUnwrappedLine();
4343 }
4344
4345 void UnwrappedLineParser::parseVerilogCaseLabel() {
4346   // The label will get unindented in AnnotatingParser. If there are no leading
4347   // spaces, indent the rest here so that things inside the block will be
4348   // indented relative to things outside. We don't use parseLabel because we
4349   // don't know whether this colon is a label or a ternary expression at this
4350   // point.
4351   auto OrigLevel = Line->Level;
4352   auto FirstLine = CurrentLines->size();
4353   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4354     ++Line->Level;
4355   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4356     --Line->Level;
4357   parseStructuralElement();
4358   // Restore the indentation in both the new line and the line that has the
4359   // label.
4360   if (CurrentLines->size() > FirstLine)
4361     (*CurrentLines)[FirstLine].Level = OrigLevel;
4362   Line->Level = OrigLevel;
4363 }
4364
4365 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4366   for (const auto &N : Line.Tokens) {
4367     if (N.Tok->MacroCtx)
4368       return true;
4369     for (const UnwrappedLine &Child : N.Children)
4370       if (containsExpansion(Child))
4371         return true;
4372   }
4373   return false;
4374 }
4375
4376 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4377   if (Line->Tokens.empty())
4378     return;
4379   LLVM_DEBUG({
4380     if (!parsingPPDirective()) {
4381       llvm::dbgs() << "Adding unwrapped line:\n";
4382       printDebugInfo(*Line);
4383     }
4384   });
4385
4386   // If this line closes a block when in Whitesmiths mode, remember that
4387   // information so that the level can be decreased after the line is added.
4388   // This has to happen after the addition of the line since the line itself
4389   // needs to be indented.
4390   bool ClosesWhitesmithsBlock =
4391       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4392       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4393
4394   // If the current line was expanded from a macro call, we use it to
4395   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4396   // line and the unexpanded token stream.
4397   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4398     if (!Reconstruct)
4399       Reconstruct.emplace(Line->Level, Unexpanded);
4400     Reconstruct->addLine(*Line);
4401
4402     // While the reconstructed unexpanded lines are stored in the normal
4403     // flow of lines, the expanded lines are stored on the side to be analyzed
4404     // in an extra step.
4405     CurrentExpandedLines.push_back(std::move(*Line));
4406
4407     if (Reconstruct->finished()) {
4408       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4409       assert(!Reconstructed.Tokens.empty() &&
4410              "Reconstructed must at least contain the macro identifier.");
4411       assert(!parsingPPDirective());
4412       LLVM_DEBUG({
4413         llvm::dbgs() << "Adding unexpanded line:\n";
4414         printDebugInfo(Reconstructed);
4415       });
4416       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4417       Lines.push_back(std::move(Reconstructed));
4418       CurrentExpandedLines.clear();
4419       Reconstruct.reset();
4420     }
4421   } else {
4422     // At the top level we only get here when no unexpansion is going on, or
4423     // when conditional formatting led to unfinished macro reconstructions.
4424     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4425     CurrentLines->push_back(std::move(*Line));
4426   }
4427   Line->Tokens.clear();
4428   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4429   Line->FirstStartColumn = 0;
4430   Line->IsContinuation = false;
4431   Line->SeenDecltypeAuto = false;
4432
4433   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4434     --Line->Level;
4435   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4436     CurrentLines->append(
4437         std::make_move_iterator(PreprocessorDirectives.begin()),
4438         std::make_move_iterator(PreprocessorDirectives.end()));
4439     PreprocessorDirectives.clear();
4440   }
4441   // Disconnect the current token from the last token on the previous line.
4442   FormatTok->Previous = nullptr;
4443 }
4444
4445 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4446
4447 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4448   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4449          FormatTok.NewlinesBefore > 0;
4450 }
4451
4452 // Checks if \p FormatTok is a line comment that continues the line comment
4453 // section on \p Line.
4454 static bool
4455 continuesLineCommentSection(const FormatToken &FormatTok,
4456                             const UnwrappedLine &Line,
4457                             const llvm::Regex &CommentPragmasRegex) {
4458   if (Line.Tokens.empty())
4459     return false;
4460
4461   StringRef IndentContent = FormatTok.TokenText;
4462   if (FormatTok.TokenText.starts_with("//") ||
4463       FormatTok.TokenText.starts_with("/*")) {
4464     IndentContent = FormatTok.TokenText.substr(2);
4465   }
4466   if (CommentPragmasRegex.match(IndentContent))
4467     return false;
4468
4469   // If Line starts with a line comment, then FormatTok continues the comment
4470   // section if its original column is greater or equal to the original start
4471   // column of the line.
4472   //
4473   // Define the min column token of a line as follows: if a line ends in '{' or
4474   // contains a '{' followed by a line comment, then the min column token is
4475   // that '{'. Otherwise, the min column token of the line is the first token of
4476   // the line.
4477   //
4478   // If Line starts with a token other than a line comment, then FormatTok
4479   // continues the comment section if its original column is greater than the
4480   // original start column of the min column token of the line.
4481   //
4482   // For example, the second line comment continues the first in these cases:
4483   //
4484   // // first line
4485   // // second line
4486   //
4487   // and:
4488   //
4489   // // first line
4490   //  // second line
4491   //
4492   // and:
4493   //
4494   // int i; // first line
4495   //  // second line
4496   //
4497   // and:
4498   //
4499   // do { // first line
4500   //      // second line
4501   //   int i;
4502   // } while (true);
4503   //
4504   // and:
4505   //
4506   // enum {
4507   //   a, // first line
4508   //    // second line
4509   //   b
4510   // };
4511   //
4512   // The second line comment doesn't continue the first in these cases:
4513   //
4514   //   // first line
4515   //  // second line
4516   //
4517   // and:
4518   //
4519   // int i; // first line
4520   // // second line
4521   //
4522   // and:
4523   //
4524   // do { // first line
4525   //   // second line
4526   //   int i;
4527   // } while (true);
4528   //
4529   // and:
4530   //
4531   // enum {
4532   //   a, // first line
4533   //   // second line
4534   // };
4535   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4536
4537   // Scan for '{//'. If found, use the column of '{' as a min column for line
4538   // comment section continuation.
4539   const FormatToken *PreviousToken = nullptr;
4540   for (const UnwrappedLineNode &Node : Line.Tokens) {
4541     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4542         isLineComment(*Node.Tok)) {
4543       MinColumnToken = PreviousToken;
4544       break;
4545     }
4546     PreviousToken = Node.Tok;
4547
4548     // Grab the last newline preceding a token in this unwrapped line.
4549     if (Node.Tok->NewlinesBefore > 0)
4550       MinColumnToken = Node.Tok;
4551   }
4552   if (PreviousToken && PreviousToken->is(tok::l_brace))
4553     MinColumnToken = PreviousToken;
4554
4555   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4556                               MinColumnToken);
4557 }
4558
4559 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4560   bool JustComments = Line->Tokens.empty();
4561   for (FormatToken *Tok : CommentsBeforeNextToken) {
4562     // Line comments that belong to the same line comment section are put on the
4563     // same line since later we might want to reflow content between them.
4564     // Additional fine-grained breaking of line comment sections is controlled
4565     // by the class BreakableLineCommentSection in case it is desirable to keep
4566     // several line comment sections in the same unwrapped line.
4567     //
4568     // FIXME: Consider putting separate line comment sections as children to the
4569     // unwrapped line instead.
4570     Tok->ContinuesLineCommentSection =
4571         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4572     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4573       addUnwrappedLine();
4574     pushToken(Tok);
4575   }
4576   if (NewlineBeforeNext && JustComments)
4577     addUnwrappedLine();
4578   CommentsBeforeNextToken.clear();
4579 }
4580
4581 void UnwrappedLineParser::nextToken(int LevelDifference) {
4582   if (eof())
4583     return;
4584   flushComments(isOnNewLine(*FormatTok));
4585   pushToken(FormatTok);
4586   FormatToken *Previous = FormatTok;
4587   if (!Style.isJavaScript())
4588     readToken(LevelDifference);
4589   else
4590     readTokenWithJavaScriptASI();
4591   FormatTok->Previous = Previous;
4592   if (Style.isVerilog()) {
4593     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4594     // keywords like `begin`, we can't treat them the same as left braces
4595     // because some contexts require one of them.  For example structs use
4596     // braces and if blocks use keywords, and a left brace can occur in an if
4597     // statement, but it is not a block.  For keywords like `end`, we simply
4598     // treat them the same as right braces.
4599     if (Keywords.isVerilogEnd(*FormatTok))
4600       FormatTok->Tok.setKind(tok::r_brace);
4601   }
4602 }
4603
4604 void UnwrappedLineParser::distributeComments(
4605     const SmallVectorImpl<FormatToken *> &Comments,
4606     const FormatToken *NextTok) {
4607   // Whether or not a line comment token continues a line is controlled by
4608   // the method continuesLineCommentSection, with the following caveat:
4609   //
4610   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4611   // that each comment line from the trail is aligned with the next token, if
4612   // the next token exists. If a trail exists, the beginning of the maximal
4613   // trail is marked as a start of a new comment section.
4614   //
4615   // For example in this code:
4616   //
4617   // int a; // line about a
4618   //   // line 1 about b
4619   //   // line 2 about b
4620   //   int b;
4621   //
4622   // the two lines about b form a maximal trail, so there are two sections, the
4623   // first one consisting of the single comment "// line about a" and the
4624   // second one consisting of the next two comments.
4625   if (Comments.empty())
4626     return;
4627   bool ShouldPushCommentsInCurrentLine = true;
4628   bool HasTrailAlignedWithNextToken = false;
4629   unsigned StartOfTrailAlignedWithNextToken = 0;
4630   if (NextTok) {
4631     // We are skipping the first element intentionally.
4632     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4633       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4634         HasTrailAlignedWithNextToken = true;
4635         StartOfTrailAlignedWithNextToken = i;
4636       }
4637     }
4638   }
4639   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4640     FormatToken *FormatTok = Comments[i];
4641     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4642       FormatTok->ContinuesLineCommentSection = false;
4643     } else {
4644       FormatTok->ContinuesLineCommentSection =
4645           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4646     }
4647     if (!FormatTok->ContinuesLineCommentSection &&
4648         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4649       ShouldPushCommentsInCurrentLine = false;
4650     }
4651     if (ShouldPushCommentsInCurrentLine)
4652       pushToken(FormatTok);
4653     else
4654       CommentsBeforeNextToken.push_back(FormatTok);
4655   }
4656 }
4657
4658 void UnwrappedLineParser::readToken(int LevelDifference) {
4659   SmallVector<FormatToken *, 1> Comments;
4660   bool PreviousWasComment = false;
4661   bool FirstNonCommentOnLine = false;
4662   do {
4663     FormatTok = Tokens->getNextToken();
4664     assert(FormatTok);
4665     while (FormatTok->getType() == TT_ConflictStart ||
4666            FormatTok->getType() == TT_ConflictEnd ||
4667            FormatTok->getType() == TT_ConflictAlternative) {
4668       if (FormatTok->getType() == TT_ConflictStart)
4669         conditionalCompilationStart(/*Unreachable=*/false);
4670       else if (FormatTok->getType() == TT_ConflictAlternative)
4671         conditionalCompilationAlternative();
4672       else if (FormatTok->getType() == TT_ConflictEnd)
4673         conditionalCompilationEnd();
4674       FormatTok = Tokens->getNextToken();
4675       FormatTok->MustBreakBefore = true;
4676     }
4677
4678     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4679                                       const FormatToken &Tok,
4680                                       bool PreviousWasComment) {
4681       auto IsFirstOnLine = [](const FormatToken &Tok) {
4682         return Tok.HasUnescapedNewline || Tok.IsFirst;
4683       };
4684
4685       // Consider preprocessor directives preceded by block comments as first
4686       // on line.
4687       if (PreviousWasComment)
4688         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4689       return IsFirstOnLine(Tok);
4690     };
4691
4692     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4693         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4694     PreviousWasComment = FormatTok->is(tok::comment);
4695
4696     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4697            (!Style.isVerilog() ||
4698             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4699            FirstNonCommentOnLine) {
4700       distributeComments(Comments, FormatTok);
4701       Comments.clear();
4702       // If there is an unfinished unwrapped line, we flush the preprocessor
4703       // directives only after that unwrapped line was finished later.
4704       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4705       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4706       assert((LevelDifference >= 0 ||
4707               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4708              "LevelDifference makes Line->Level negative");
4709       Line->Level += LevelDifference;
4710       // Comments stored before the preprocessor directive need to be output
4711       // before the preprocessor directive, at the same level as the
4712       // preprocessor directive, as we consider them to apply to the directive.
4713       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4714           PPBranchLevel > 0) {
4715         Line->Level += PPBranchLevel;
4716       }
4717       flushComments(isOnNewLine(*FormatTok));
4718       parsePPDirective();
4719       PreviousWasComment = FormatTok->is(tok::comment);
4720       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4721           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4722     }
4723
4724     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4725         !Line->InPPDirective) {
4726       continue;
4727     }
4728
4729     if (FormatTok->is(tok::identifier) &&
4730         Macros.defined(FormatTok->TokenText) &&
4731         // FIXME: Allow expanding macros in preprocessor directives.
4732         !Line->InPPDirective) {
4733       FormatToken *ID = FormatTok;
4734       unsigned Position = Tokens->getPosition();
4735
4736       // To correctly parse the code, we need to replace the tokens of the macro
4737       // call with its expansion.
4738       auto PreCall = std::move(Line);
4739       Line.reset(new UnwrappedLine);
4740       bool OldInExpansion = InExpansion;
4741       InExpansion = true;
4742       // We parse the macro call into a new line.
4743       auto Args = parseMacroCall();
4744       InExpansion = OldInExpansion;
4745       assert(Line->Tokens.front().Tok == ID);
4746       // And remember the unexpanded macro call tokens.
4747       auto UnexpandedLine = std::move(Line);
4748       // Reset to the old line.
4749       Line = std::move(PreCall);
4750
4751       LLVM_DEBUG({
4752         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4753         if (Args) {
4754           llvm::dbgs() << "(";
4755           for (const auto &Arg : Args.value())
4756             for (const auto &T : Arg)
4757               llvm::dbgs() << T->TokenText << " ";
4758           llvm::dbgs() << ")";
4759         }
4760         llvm::dbgs() << "\n";
4761       });
4762       if (Macros.objectLike(ID->TokenText) && Args &&
4763           !Macros.hasArity(ID->TokenText, Args->size())) {
4764         // The macro is either
4765         // - object-like, but we got argumnets, or
4766         // - overloaded to be both object-like and function-like, but none of
4767         //   the function-like arities match the number of arguments.
4768         // Thus, expand as object-like macro.
4769         LLVM_DEBUG(llvm::dbgs()
4770                    << "Macro \"" << ID->TokenText
4771                    << "\" not overloaded for arity " << Args->size()
4772                    << "or not function-like, using object-like overload.");
4773         Args.reset();
4774         UnexpandedLine->Tokens.resize(1);
4775         Tokens->setPosition(Position);
4776         nextToken();
4777         assert(!Args && Macros.objectLike(ID->TokenText));
4778       }
4779       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4780           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4781         // Next, we insert the expanded tokens in the token stream at the
4782         // current position, and continue parsing.
4783         Unexpanded[ID] = std::move(UnexpandedLine);
4784         SmallVector<FormatToken *, 8> Expansion =
4785             Macros.expand(ID, std::move(Args));
4786         if (!Expansion.empty())
4787           FormatTok = Tokens->insertTokens(Expansion);
4788
4789         LLVM_DEBUG({
4790           llvm::dbgs() << "Expanded: ";
4791           for (const auto &T : Expansion)
4792             llvm::dbgs() << T->TokenText << " ";
4793           llvm::dbgs() << "\n";
4794         });
4795       } else {
4796         LLVM_DEBUG({
4797           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4798                        << "\", because it was used ";
4799           if (Args)
4800             llvm::dbgs() << "with " << Args->size();
4801           else
4802             llvm::dbgs() << "without";
4803           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4804         });
4805         Tokens->setPosition(Position);
4806         FormatTok = ID;
4807       }
4808     }
4809
4810     if (FormatTok->isNot(tok::comment)) {
4811       distributeComments(Comments, FormatTok);
4812       Comments.clear();
4813       return;
4814     }
4815
4816     Comments.push_back(FormatTok);
4817   } while (!eof());
4818
4819   distributeComments(Comments, nullptr);
4820   Comments.clear();
4821 }
4822
4823 namespace {
4824 template <typename Iterator>
4825 void pushTokens(Iterator Begin, Iterator End,
4826                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4827   for (auto I = Begin; I != End; ++I) {
4828     Into.push_back(I->Tok);
4829     for (const auto &Child : I->Children)
4830       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4831   }
4832 }
4833 } // namespace
4834
4835 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4836 UnwrappedLineParser::parseMacroCall() {
4837   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4838   assert(Line->Tokens.empty());
4839   nextToken();
4840   if (FormatTok->isNot(tok::l_paren))
4841     return Args;
4842   unsigned Position = Tokens->getPosition();
4843   FormatToken *Tok = FormatTok;
4844   nextToken();
4845   Args.emplace();
4846   auto ArgStart = std::prev(Line->Tokens.end());
4847
4848   int Parens = 0;
4849   do {
4850     switch (FormatTok->Tok.getKind()) {
4851     case tok::l_paren:
4852       ++Parens;
4853       nextToken();
4854       break;
4855     case tok::r_paren: {
4856       if (Parens > 0) {
4857         --Parens;
4858         nextToken();
4859         break;
4860       }
4861       Args->push_back({});
4862       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4863       nextToken();
4864       return Args;
4865     }
4866     case tok::comma: {
4867       if (Parens > 0) {
4868         nextToken();
4869         break;
4870       }
4871       Args->push_back({});
4872       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4873       nextToken();
4874       ArgStart = std::prev(Line->Tokens.end());
4875       break;
4876     }
4877     default:
4878       nextToken();
4879       break;
4880     }
4881   } while (!eof());
4882   Line->Tokens.resize(1);
4883   Tokens->setPosition(Position);
4884   FormatTok = Tok;
4885   return {};
4886 }
4887
4888 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4889   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4890   if (MustBreakBeforeNextToken) {
4891     Line->Tokens.back().Tok->MustBreakBefore = true;
4892     MustBreakBeforeNextToken = false;
4893   }
4894 }
4895
4896 } // end namespace format
4897 } // end namespace clang