clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains the implementation of the UnwrappedLineParser,
  11 /// which turns a stream of tokens into UnwrappedLines.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "UnwrappedLineParser.h"
  16 #include "FormatToken.h"
  17 #include "FormatTokenLexer.h"
  18 #include "FormatTokenSource.h"
  19 #include "Macros.h"
  20 #include "TokenAnnotator.h"
  21 #include "clang/Basic/TokenKinds.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/raw_os_ostream.h"
  26 #include "llvm/Support/raw_ostream.h"
  27
  28 #include <algorithm>
  29 #include <utility>
  30
  31 #define DEBUG_TYPE "format-parser"
  32
  33 namespace clang {
  34 namespace format {
  35
  36 namespace {
  37
  38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
  39                StringRef Prefix = "", bool PrintText = false) {
  40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
  41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
  42   bool NewLine = false;
  43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  44                                                     E = Line.Tokens.end();
  45        I != E; ++I) {
  46     if (NewLine) {
  47       OS << Prefix;
  48       NewLine = false;
  49     }
  50     OS << I->Tok->Tok.getName() << "["
  51        << "T=" << (unsigned)I->Tok->getType()
  52        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
  53        << "\"] ";
  54     for (SmallVectorImpl<UnwrappedLine>::const_iterator
  55              CI = I->Children.begin(),
  56              CE = I->Children.end();
  57          CI != CE; ++CI) {
  58       OS << "\n";
  59       printLine(OS, *CI, (Prefix + "  ").str());
  60       NewLine = true;
  61     }
  62   }
  63   if (!NewLine)
  64     OS << "\n";
  65 }
  66
  67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
  68   printLine(llvm::dbgs(), Line);
  69 }
  70
  71 class ScopedDeclarationState {
  72 public:
  73   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
  74                          bool MustBeDeclaration)
  75       : Line(Line), Stack(Stack) {
  76     Line.MustBeDeclaration = MustBeDeclaration;
  77     Stack.push_back(MustBeDeclaration);
  78   }
  79   ~ScopedDeclarationState() {
  80     Stack.pop_back();
  81     if (!Stack.empty())
  82       Line.MustBeDeclaration = Stack.back();
  83     else
  84       Line.MustBeDeclaration = true;
  85   }
  86
  87 private:
  88   UnwrappedLine &Line;
  89   llvm::BitVector &Stack;
  90 };
  91
  92 } // end anonymous namespace
  93
  94 class ScopedLineState {
  95 public:
  96   ScopedLineState(UnwrappedLineParser &Parser,
  97                   bool SwitchToPreprocessorLines = false)
  98       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  99     if (SwitchToPreprocessorLines)
 100       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 101     else if (!Parser.Line->Tokens.empty())
 102       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 103     PreBlockLine = std::move(Parser.Line);
 104     Parser.Line = std::make_unique<UnwrappedLine>();
 105     Parser.Line->Level = PreBlockLine->Level;
 106     Parser.Line->PPLevel = PreBlockLine->PPLevel;
 107     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 108     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
 109   }
 110
 111   ~ScopedLineState() {
 112     if (!Parser.Line->Tokens.empty())
 113       Parser.addUnwrappedLine();
 114     assert(Parser.Line->Tokens.empty());
 115     Parser.Line = std::move(PreBlockLine);
 116     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 117       Parser.MustBreakBeforeNextToken = true;
 118     Parser.CurrentLines = OriginalLines;
 119   }
 120
 121 private:
 122   UnwrappedLineParser &Parser;
 123
 124   std::unique_ptr<UnwrappedLine> PreBlockLine;
 125   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 126 };
 127
 128 class CompoundStatementIndenter {
 129 public:
 130   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 131                             const FormatStyle &Style, unsigned &LineLevel)
 132       : CompoundStatementIndenter(Parser, LineLevel,
 133                                   Style.BraceWrapping.AfterControlStatement,
 134                                   Style.BraceWrapping.IndentBraces) {}
 135   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
 136                             bool WrapBrace, bool IndentBrace)
 137       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 138     if (WrapBrace)
 139       Parser->addUnwrappedLine();
 140     if (IndentBrace)
 141       ++LineLevel;
 142   }
 143   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 144
 145 private:
 146   unsigned &LineLevel;
 147   unsigned OldLineLevel;
 148 };
 149
 150 UnwrappedLineParser::UnwrappedLineParser(
 151     SourceManager &SourceMgr, const FormatStyle &Style,
 152     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
 153     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
 154     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
 155     IdentifierTable &IdentTable)
 156     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 157       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 158       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 159       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 160       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 161                        ? IG_Rejected
 162                        : IG_Inited),
 163       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
 164       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
 165
 166 void UnwrappedLineParser::reset() {
 167   PPBranchLevel = -1;
 168   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 169                      ? IG_Rejected
 170                      : IG_Inited;
 171   IncludeGuardToken = nullptr;
 172   Line.reset(new UnwrappedLine);
 173   CommentsBeforeNextToken.clear();
 174   FormatTok = nullptr;
 175   MustBreakBeforeNextToken = false;
 176   PreprocessorDirectives.clear();
 177   CurrentLines = &Lines;
 178   DeclarationScopeStack.clear();
 179   NestedTooDeep.clear();
 180   PPStack.clear();
 181   Line->FirstStartColumn = FirstStartColumn;
 182
 183   if (!Unexpanded.empty())
 184     for (FormatToken *Token : AllTokens)
 185       Token->MacroCtx.reset();
 186   CurrentExpandedLines.clear();
 187   ExpandedLines.clear();
 188   Unexpanded.clear();
 189   InExpansion = false;
 190   Reconstruct.reset();
 191 }
 192
 193 void UnwrappedLineParser::parse() {
 194   IndexedTokenSource TokenSource(AllTokens);
 195   Line->FirstStartColumn = FirstStartColumn;
 196   do {
 197     LLVM_DEBUG(llvm::dbgs() << "----\n");
 198     reset();
 199     Tokens = &TokenSource;
 200     TokenSource.reset();
 201
 202     readToken();
 203     parseFile();
 204
 205     // If we found an include guard then all preprocessor directives (other than
 206     // the guard) are over-indented by one.
 207     if (IncludeGuard == IG_Found) {
 208       for (auto &Line : Lines)
 209         if (Line.InPPDirective && Line.Level > 0)
 210           --Line.Level;
 211     }
 212
 213     // Create line with eof token.
 214     assert(FormatTok->is(tok::eof));
 215     pushToken(FormatTok);
 216     addUnwrappedLine();
 217
 218     // In a first run, format everything with the lines containing macro calls
 219     // replaced by the expansion.
 220     if (!ExpandedLines.empty()) {
 221       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
 222       for (const auto &Line : Lines) {
 223         if (!Line.Tokens.empty()) {
 224           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
 225           if (it != ExpandedLines.end()) {
 226             for (const auto &Expanded : it->second) {
 227               LLVM_DEBUG(printDebugInfo(Expanded));
 228               Callback.consumeUnwrappedLine(Expanded);
 229             }
 230             continue;
 231           }
 232         }
 233         LLVM_DEBUG(printDebugInfo(Line));
 234         Callback.consumeUnwrappedLine(Line);
 235       }
 236       Callback.finishRun();
 237     }
 238
 239     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
 240     for (const UnwrappedLine &Line : Lines) {
 241       LLVM_DEBUG(printDebugInfo(Line));
 242       Callback.consumeUnwrappedLine(Line);
 243     }
 244     Callback.finishRun();
 245     Lines.clear();
 246     while (!PPLevelBranchIndex.empty() &&
 247            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 248       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 249       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 250     }
 251     if (!PPLevelBranchIndex.empty()) {
 252       ++PPLevelBranchIndex.back();
 253       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 254       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 255     }
 256   } while (!PPLevelBranchIndex.empty());
 257 }
 258
 259 void UnwrappedLineParser::parseFile() {
 260   // The top-level context in a file always has declarations, except for pre-
 261   // processor directives and JavaScript files.
 262   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
 263   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 264                                           MustBeDeclaration);
 265   if (Style.Language == FormatStyle::LK_TextProto)
 266     parseBracedList();
 267   else
 268     parseLevel();
 269   // Make sure to format the remaining tokens.
 270   //
 271   // LK_TextProto is special since its top-level is parsed as the body of a
 272   // braced list, which does not necessarily have natural line separators such
 273   // as a semicolon. Comments after the last entry that have been determined to
 274   // not belong to that line, as in:
 275   //   key: value
 276   //   // endfile comment
 277   // do not have a chance to be put on a line of their own until this point.
 278   // Here we add this newline before end-of-file comments.
 279   if (Style.Language == FormatStyle::LK_TextProto &&
 280       !CommentsBeforeNextToken.empty()) {
 281     addUnwrappedLine();
 282   }
 283   flushComments(true);
 284   addUnwrappedLine();
 285 }
 286
 287 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
 288   do {
 289     switch (FormatTok->Tok.getKind()) {
 290     case tok::l_brace:
 291       return;
 292     default:
 293       if (FormatTok->is(Keywords.kw_where)) {
 294         addUnwrappedLine();
 295         nextToken();
 296         parseCSharpGenericTypeConstraint();
 297         break;
 298       }
 299       nextToken();
 300       break;
 301     }
 302   } while (!eof());
 303 }
 304
 305 void UnwrappedLineParser::parseCSharpAttribute() {
 306   int UnpairedSquareBrackets = 1;
 307   do {
 308     switch (FormatTok->Tok.getKind()) {
 309     case tok::r_square:
 310       nextToken();
 311       --UnpairedSquareBrackets;
 312       if (UnpairedSquareBrackets == 0) {
 313         addUnwrappedLine();
 314         return;
 315       }
 316       break;
 317     case tok::l_square:
 318       ++UnpairedSquareBrackets;
 319       nextToken();
 320       break;
 321     default:
 322       nextToken();
 323       break;
 324     }
 325   } while (!eof());
 326 }
 327
 328 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
 329   if (!Lines.empty() && Lines.back().InPPDirective)
 330     return true;
 331
 332   const FormatToken *Previous = Tokens->getPreviousToken();
 333   return Previous && Previous->is(tok::comment) &&
 334          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
 335 }
 336
 337 /// \brief Parses a level, that is ???.
 338 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
 339 /// \param CanContainBracedList If the content can contain (at any level) a
 340 /// braced list.
 341 /// \param NextLBracesType The type for left brace found in this level.
 342 /// \param IfKind The \p if statement kind in the level.
 343 /// \param IfLeftBrace The left brace of the \p if block in the level.
 344 /// \returns true if a simple block of if/else/for/while, or false otherwise.
 345 /// (A simple block has a single statement.)
 346 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
 347                                      bool CanContainBracedList,
 348                                      TokenType NextLBracesType,
 349                                      IfStmtKind *IfKind,
 350                                      FormatToken **IfLeftBrace) {
 351   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
 352                                   ? TT_BracedListLBrace
 353                                   : TT_Unknown;
 354   const bool IsPrecededByCommentOrPPDirective =
 355       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
 356   FormatToken *IfLBrace = nullptr;
 357   bool HasDoWhile = false;
 358   bool HasLabel = false;
 359   unsigned StatementCount = 0;
 360   bool SwitchLabelEncountered = false;
 361
 362   do {
 363     if (FormatTok->getType() == TT_AttributeMacro) {
 364       nextToken();
 365       continue;
 366     }
 367     tok::TokenKind kind = FormatTok->Tok.getKind();
 368     if (FormatTok->getType() == TT_MacroBlockBegin)
 369       kind = tok::l_brace;
 370     else if (FormatTok->getType() == TT_MacroBlockEnd)
 371       kind = tok::r_brace;
 372
 373     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
 374                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
 375       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
 376                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
 377                              HasLabel ? nullptr : &HasLabel);
 378       ++StatementCount;
 379       assert(StatementCount > 0 && "StatementCount overflow!");
 380     };
 381
 382     switch (kind) {
 383     case tok::comment:
 384       nextToken();
 385       addUnwrappedLine();
 386       break;
 387     case tok::l_brace:
 388       if (NextLBracesType != TT_Unknown) {
 389         FormatTok->setFinalizedType(NextLBracesType);
 390       } else if (FormatTok->Previous &&
 391                  FormatTok->Previous->ClosesRequiresClause) {
 392         // We need the 'default' case here to correctly parse a function
 393         // l_brace.
 394         ParseDefault();
 395         continue;
 396       }
 397       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
 398           tryToParseBracedList()) {
 399         continue;
 400       }
 401       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
 402                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
 403                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
 404                  NextLBracesType);
 405       ++StatementCount;
 406       assert(StatementCount > 0 && "StatementCount overflow!");
 407       addUnwrappedLine();
 408       break;
 409     case tok::r_brace:
 410       if (OpeningBrace) {
 411         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
 412             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
 413           return false;
 414         }
 415         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
 416             HasDoWhile || IsPrecededByCommentOrPPDirective ||
 417             precededByCommentOrPPDirective()) {
 418           return false;
 419         }
 420         const FormatToken *Next = Tokens->peekNextToken();
 421         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
 422           return false;
 423         if (IfLeftBrace)
 424           *IfLeftBrace = IfLBrace;
 425         return true;
 426       }
 427       nextToken();
 428       addUnwrappedLine();
 429       break;
 430     case tok::kw_default: {
 431       unsigned StoredPosition = Tokens->getPosition();
 432       FormatToken *Next;
 433       do {
 434         Next = Tokens->getNextToken();
 435         assert(Next);
 436       } while (Next->is(tok::comment));
 437       FormatTok = Tokens->setPosition(StoredPosition);
 438       if (Next->isNot(tok::colon)) {
 439         // default not followed by ':' is not a case label; treat it like
 440         // an identifier.
 441         parseStructuralElement();
 442         break;
 443       }
 444       // Else, if it is 'default:', fall through to the case handling.
 445       [[fallthrough]];
 446     }
 447     case tok::kw_case:
 448       if (Style.isProto() || Style.isVerilog() ||
 449           (Style.isJavaScript() && Line->MustBeDeclaration)) {
 450         // Proto: there are no switch/case statements
 451         // Verilog: Case labels don't have this word. We handle case
 452         // labels including default in TokenAnnotator.
 453         // JavaScript: A 'case: string' style field declaration.
 454         ParseDefault();
 455         break;
 456       }
 457       if (!SwitchLabelEncountered &&
 458           (Style.IndentCaseLabels ||
 459            (Line->InPPDirective && Line->Level == 1))) {
 460         ++Line->Level;
 461       }
 462       SwitchLabelEncountered = true;
 463       parseStructuralElement();
 464       break;
 465     case tok::l_square:
 466       if (Style.isCSharp()) {
 467         nextToken();
 468         parseCSharpAttribute();
 469         break;
 470       }
 471       if (handleCppAttributes())
 472         break;
 473       [[fallthrough]];
 474     default:
 475       ParseDefault();
 476       break;
 477     }
 478   } while (!eof());
 479
 480   return false;
 481 }
 482
 483 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 484   // We'll parse forward through the tokens until we hit
 485   // a closing brace or eof - note that getNextToken() will
 486   // parse macros, so this will magically work inside macro
 487   // definitions, too.
 488   unsigned StoredPosition = Tokens->getPosition();
 489   FormatToken *Tok = FormatTok;
 490   const FormatToken *PrevTok = Tok->Previous;
 491   // Keep a stack of positions of lbrace tokens. We will
 492   // update information about whether an lbrace starts a
 493   // braced init list or a different block during the loop.
 494   struct StackEntry {
 495     FormatToken *Tok;
 496     const FormatToken *PrevTok;
 497   };
 498   SmallVector<StackEntry, 8> LBraceStack;
 499   assert(Tok->is(tok::l_brace));
 500   do {
 501     // Get next non-comment token.
 502     FormatToken *NextTok;
 503     do {
 504       NextTok = Tokens->getNextToken();
 505     } while (NextTok->is(tok::comment));
 506
 507     switch (Tok->Tok.getKind()) {
 508     case tok::l_brace:
 509       if (Style.isJavaScript() && PrevTok) {
 510         if (PrevTok->isOneOf(tok::colon, tok::less)) {
 511           // A ':' indicates this code is in a type, or a braced list
 512           // following a label in an object literal ({a: {b: 1}}).
 513           // A '<' could be an object used in a comparison, but that is nonsense
 514           // code (can never return true), so more likely it is a generic type
 515           // argument (`X<{a: string; b: number}>`).
 516           // The code below could be confused by semicolons between the
 517           // individual members in a type member list, which would normally
 518           // trigger BK_Block. In both cases, this must be parsed as an inline
 519           // braced init.
 520           Tok->setBlockKind(BK_BracedInit);
 521         } else if (PrevTok->is(tok::r_paren)) {
 522           // `) { }` can only occur in function or method declarations in JS.
 523           Tok->setBlockKind(BK_Block);
 524         }
 525       } else {
 526         Tok->setBlockKind(BK_Unknown);
 527       }
 528       LBraceStack.push_back({Tok, PrevTok});
 529       break;
 530     case tok::r_brace:
 531       if (LBraceStack.empty())
 532         break;
 533       if (LBraceStack.back().Tok->is(BK_Unknown)) {
 534         bool ProbablyBracedList = false;
 535         if (Style.Language == FormatStyle::LK_Proto) {
 536           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 537         } else {
 538           // Skip NextTok over preprocessor lines, otherwise we may not
 539           // properly diagnose the block as a braced intializer
 540           // if the comma separator appears after the pp directive.
 541           while (NextTok->is(tok::hash)) {
 542             ScopedMacroState MacroState(*Line, Tokens, NextTok);
 543             do {
 544               NextTok = Tokens->getNextToken();
 545             } while (NextTok->isNot(tok::eof));
 546           }
 547
 548           // Using OriginalColumn to distinguish between ObjC methods and
 549           // binary operators is a bit hacky.
 550           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 551                                   NextTok->OriginalColumn == 0;
 552
 553           // Try to detect a braced list. Note that regardless how we mark inner
 554           // braces here, we will overwrite the BlockKind later if we parse a
 555           // braced list (where all blocks inside are by default braced lists),
 556           // or when we explicitly detect blocks (for example while parsing
 557           // lambdas).
 558
 559           // If we already marked the opening brace as braced list, the closing
 560           // must also be part of it.
 561           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
 562
 563           ProbablyBracedList = ProbablyBracedList ||
 564                                (Style.isJavaScript() &&
 565                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 566                                                  Keywords.kw_as));
 567           ProbablyBracedList = ProbablyBracedList ||
 568                                (Style.isCpp() && NextTok->is(tok::l_paren));
 569
 570           // If there is a comma, semicolon or right paren after the closing
 571           // brace, we assume this is a braced initializer list.
 572           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 573           // braced list in JS.
 574           ProbablyBracedList =
 575               ProbablyBracedList ||
 576               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 577                                tok::r_paren, tok::r_square, tok::ellipsis);
 578
 579           // Distinguish between braced list in a constructor initializer list
 580           // followed by constructor body, or just adjacent blocks.
 581           ProbablyBracedList =
 582               ProbablyBracedList ||
 583               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
 584                LBraceStack.back().PrevTok->is(tok::identifier));
 585
 586           ProbablyBracedList =
 587               ProbablyBracedList ||
 588               (NextTok->is(tok::identifier) &&
 589                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
 590
 591           ProbablyBracedList = ProbablyBracedList ||
 592                                (NextTok->is(tok::semi) &&
 593                                 (!ExpectClassBody || LBraceStack.size() != 1));
 594
 595           ProbablyBracedList =
 596               ProbablyBracedList ||
 597               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 598
 599           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
 600             // We can have an array subscript after a braced init
 601             // list, but C++11 attributes are expected after blocks.
 602             NextTok = Tokens->getNextToken();
 603             ProbablyBracedList = NextTok->isNot(tok::l_square);
 604           }
 605         }
 606         if (ProbablyBracedList) {
 607           Tok->setBlockKind(BK_BracedInit);
 608           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
 609         } else {
 610           Tok->setBlockKind(BK_Block);
 611           LBraceStack.back().Tok->setBlockKind(BK_Block);
 612         }
 613       }
 614       LBraceStack.pop_back();
 615       break;
 616     case tok::identifier:
 617       if (!Tok->is(TT_StatementMacro))
 618         break;
 619       [[fallthrough]];
 620     case tok::at:
 621     case tok::semi:
 622     case tok::kw_if:
 623     case tok::kw_while:
 624     case tok::kw_for:
 625     case tok::kw_switch:
 626     case tok::kw_try:
 627     case tok::kw___try:
 628       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
 629         LBraceStack.back().Tok->setBlockKind(BK_Block);
 630       break;
 631     default:
 632       break;
 633     }
 634     PrevTok = Tok;
 635     Tok = NextTok;
 636   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
 637
 638   // Assume other blocks for all unclosed opening braces.
 639   for (const auto &Entry : LBraceStack)
 640     if (Entry.Tok->is(BK_Unknown))
 641       Entry.Tok->setBlockKind(BK_Block);
 642
 643   FormatTok = Tokens->setPosition(StoredPosition);
 644 }
 645
 646 template <class T>
 647 static inline void hash_combine(std::size_t &seed, const T &v) {
 648   std::hash<T> hasher;
 649   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 650 }
 651
 652 size_t UnwrappedLineParser::computePPHash() const {
 653   size_t h = 0;
 654   for (const auto &i : PPStack) {
 655     hash_combine(h, size_t(i.Kind));
 656     hash_combine(h, i.Line);
 657   }
 658   return h;
 659 }
 660
 661 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
 662 // is not null, subtracts its length (plus the preceding space) when computing
 663 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
 664 // running the token annotator on it so that we can restore them afterward.
 665 bool UnwrappedLineParser::mightFitOnOneLine(
 666     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
 667   const auto ColumnLimit = Style.ColumnLimit;
 668   if (ColumnLimit == 0)
 669     return true;
 670
 671   auto &Tokens = ParsedLine.Tokens;
 672   assert(!Tokens.empty());
 673
 674   const auto *LastToken = Tokens.back().Tok;
 675   assert(LastToken);
 676
 677   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
 678
 679   int Index = 0;
 680   for (const auto &Token : Tokens) {
 681     assert(Token.Tok);
 682     auto &SavedToken = SavedTokens[Index++];
 683     SavedToken.Tok = new FormatToken;
 684     SavedToken.Tok->copyFrom(*Token.Tok);
 685     SavedToken.Children = std::move(Token.Children);
 686   }
 687
 688   AnnotatedLine Line(ParsedLine);
 689   assert(Line.Last == LastToken);
 690
 691   TokenAnnotator Annotator(Style, Keywords);
 692   Annotator.annotate(Line);
 693   Annotator.calculateFormattingInformation(Line);
 694
 695   auto Length = LastToken->TotalLength;
 696   if (OpeningBrace) {
 697     assert(OpeningBrace != Tokens.front().Tok);
 698     if (auto Prev = OpeningBrace->Previous;
 699         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
 700       Length -= ColumnLimit;
 701     }
 702     Length -= OpeningBrace->TokenText.size() + 1;
 703   }
 704
 705   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
 706     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
 707     Length -= FirstToken->TokenText.size() + 1;
 708   }
 709
 710   Index = 0;
 711   for (auto &Token : Tokens) {
 712     const auto &SavedToken = SavedTokens[Index++];
 713     Token.Tok->copyFrom(*SavedToken.Tok);
 714     Token.Children = std::move(SavedToken.Children);
 715     delete SavedToken.Tok;
 716   }
 717
 718   // If these change PPLevel needs to be used for get correct indentation.
 719   assert(!Line.InMacroBody);
 720   assert(!Line.InPPDirective);
 721   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
 722 }
 723
 724 FormatToken *UnwrappedLineParser::parseBlock(
 725     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
 726     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
 727     bool CanContainBracedList, TokenType NextLBracesType) {
 728   auto HandleVerilogBlockLabel = [this]() {
 729     // ":" name
 730     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
 731       nextToken();
 732       if (Keywords.isVerilogIdentifier(*FormatTok))
 733         nextToken();
 734     }
 735   };
 736
 737   // Whether this is a Verilog-specific block that has a special header like a
 738   // module.
 739   const bool VerilogHierarchy =
 740       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
 741   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
 742           (Style.isVerilog() &&
 743            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
 744          "'{' or macro block token expected");
 745   FormatToken *Tok = FormatTok;
 746   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
 747   auto Index = CurrentLines->size();
 748   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 749   FormatTok->setBlockKind(BK_Block);
 750
 751   // For Whitesmiths mode, jump to the next level prior to skipping over the
 752   // braces.
 753   if (!VerilogHierarchy && AddLevels > 0 &&
 754       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
 755     ++Line->Level;
 756   }
 757
 758   size_t PPStartHash = computePPHash();
 759
 760   const unsigned InitialLevel = Line->Level;
 761   if (VerilogHierarchy) {
 762     AddLevels += parseVerilogHierarchyHeader();
 763   } else {
 764     nextToken(/*LevelDifference=*/AddLevels);
 765     HandleVerilogBlockLabel();
 766   }
 767
 768   // Bail out if there are too many levels. Otherwise, the stack might overflow.
 769   if (Line->Level > 300)
 770     return nullptr;
 771
 772   if (MacroBlock && FormatTok->is(tok::l_paren))
 773     parseParens();
 774
 775   size_t NbPreprocessorDirectives =
 776       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
 777   addUnwrappedLine();
 778   size_t OpeningLineIndex =
 779       CurrentLines->empty()
 780           ? (UnwrappedLine::kInvalidIndex)
 781           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 782
 783   // Whitesmiths is weird here. The brace needs to be indented for the namespace
 784   // block, but the block itself may not be indented depending on the style
 785   // settings. This allows the format to back up one level in those cases.
 786   if (UnindentWhitesmithsBraces)
 787     --Line->Level;
 788
 789   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 790                                           MustBeDeclaration);
 791   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
 792     Line->Level += AddLevels;
 793
 794   FormatToken *IfLBrace = nullptr;
 795   const bool SimpleBlock =
 796       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
 797
 798   if (eof())
 799     return IfLBrace;
 800
 801   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 802                  : !FormatTok->is(tok::r_brace)) {
 803     Line->Level = InitialLevel;
 804     FormatTok->setBlockKind(BK_Block);
 805     return IfLBrace;
 806   }
 807
 808   const bool IsFunctionRBrace =
 809       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
 810
 811   auto RemoveBraces = [=]() mutable {
 812     if (!SimpleBlock)
 813       return false;
 814     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
 815     assert(FormatTok->is(tok::r_brace));
 816     const bool WrappedOpeningBrace = !Tok->Previous;
 817     if (WrappedOpeningBrace && FollowedByComment)
 818       return false;
 819     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
 820     if (KeepBraces && !HasRequiredIfBraces)
 821       return false;
 822     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
 823       const FormatToken *Previous = Tokens->getPreviousToken();
 824       assert(Previous);
 825       if (Previous->is(tok::r_brace) && !Previous->Optional)
 826         return false;
 827     }
 828     assert(!CurrentLines->empty());
 829     auto &LastLine = CurrentLines->back();
 830     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
 831       return false;
 832     if (Tok->is(TT_ElseLBrace))
 833       return true;
 834     if (WrappedOpeningBrace) {
 835       assert(Index > 0);
 836       --Index; // The line above the wrapped l_brace.
 837       Tok = nullptr;
 838     }
 839     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
 840   };
 841   if (RemoveBraces()) {
 842     Tok->MatchingParen = FormatTok;
 843     FormatTok->MatchingParen = Tok;
 844   }
 845
 846   size_t PPEndHash = computePPHash();
 847
 848   // Munch the closing brace.
 849   nextToken(/*LevelDifference=*/-AddLevels);
 850
 851   // When this is a function block and there is an unnecessary semicolon
 852   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
 853   // it later).
 854   if (Style.RemoveSemicolon && IsFunctionRBrace) {
 855     while (FormatTok->is(tok::semi)) {
 856       FormatTok->Optional = true;
 857       nextToken();
 858     }
 859   }
 860
 861   HandleVerilogBlockLabel();
 862
 863   if (MacroBlock && FormatTok->is(tok::l_paren))
 864     parseParens();
 865
 866   Line->Level = InitialLevel;
 867
 868   if (FormatTok->is(tok::kw_noexcept)) {
 869     // A noexcept in a requires expression.
 870     nextToken();
 871   }
 872
 873   if (FormatTok->is(tok::arrow)) {
 874     // Following the } or noexcept we can find a trailing return type arrow
 875     // as part of an implicit conversion constraint.
 876     nextToken();
 877     parseStructuralElement();
 878   }
 879
 880   if (MunchSemi && FormatTok->is(tok::semi))
 881     nextToken();
 882
 883   if (PPStartHash == PPEndHash) {
 884     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 885     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 886       // Update the opening line to add the forward reference as well
 887       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 888           CurrentLines->size() - 1;
 889     }
 890   }
 891
 892   return IfLBrace;
 893 }
 894
 895 static bool isGoogScope(const UnwrappedLine &Line) {
 896   // FIXME: Closure-library specific stuff should not be hard-coded but be
 897   // configurable.
 898   if (Line.Tokens.size() < 4)
 899     return false;
 900   auto I = Line.Tokens.begin();
 901   if (I->Tok->TokenText != "goog")
 902     return false;
 903   ++I;
 904   if (I->Tok->isNot(tok::period))
 905     return false;
 906   ++I;
 907   if (I->Tok->TokenText != "scope")
 908     return false;
 909   ++I;
 910   return I->Tok->is(tok::l_paren);
 911 }
 912
 913 static bool isIIFE(const UnwrappedLine &Line,
 914                    const AdditionalKeywords &Keywords) {
 915   // Look for the start of an immediately invoked anonymous function.
 916   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 917   // This is commonly done in JavaScript to create a new, anonymous scope.
 918   // Example: (function() { ... })()
 919   if (Line.Tokens.size() < 3)
 920     return false;
 921   auto I = Line.Tokens.begin();
 922   if (I->Tok->isNot(tok::l_paren))
 923     return false;
 924   ++I;
 925   if (I->Tok->isNot(Keywords.kw_function))
 926     return false;
 927   ++I;
 928   return I->Tok->is(tok::l_paren);
 929 }
 930
 931 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 932                                    const FormatToken &InitialToken) {
 933   tok::TokenKind Kind = InitialToken.Tok.getKind();
 934   if (InitialToken.is(TT_NamespaceMacro))
 935     Kind = tok::kw_namespace;
 936
 937   switch (Kind) {
 938   case tok::kw_namespace:
 939     return Style.BraceWrapping.AfterNamespace;
 940   case tok::kw_class:
 941     return Style.BraceWrapping.AfterClass;
 942   case tok::kw_union:
 943     return Style.BraceWrapping.AfterUnion;
 944   case tok::kw_struct:
 945     return Style.BraceWrapping.AfterStruct;
 946   case tok::kw_enum:
 947     return Style.BraceWrapping.AfterEnum;
 948   default:
 949     return false;
 950   }
 951 }
 952
 953 void UnwrappedLineParser::parseChildBlock(
 954     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
 955   assert(FormatTok->is(tok::l_brace));
 956   FormatTok->setBlockKind(BK_Block);
 957   const FormatToken *OpeningBrace = FormatTok;
 958   nextToken();
 959   {
 960     bool SkipIndent = (Style.isJavaScript() &&
 961                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 962     ScopedLineState LineState(*this);
 963     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 964                                             /*MustBeDeclaration=*/false);
 965     Line->Level += SkipIndent ? 0 : 1;
 966     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
 967     flushComments(isOnNewLine(*FormatTok));
 968     Line->Level -= SkipIndent ? 0 : 1;
 969   }
 970   nextToken();
 971 }
 972
 973 void UnwrappedLineParser::parsePPDirective() {
 974   assert(FormatTok->is(tok::hash) && "'#' expected");
 975   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 976
 977   nextToken();
 978
 979   if (!FormatTok->Tok.getIdentifierInfo()) {
 980     parsePPUnknown();
 981     return;
 982   }
 983
 984   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 985   case tok::pp_define:
 986     parsePPDefine();
 987     return;
 988   case tok::pp_if:
 989     parsePPIf(/*IfDef=*/false);
 990     break;
 991   case tok::pp_ifdef:
 992   case tok::pp_ifndef:
 993     parsePPIf(/*IfDef=*/true);
 994     break;
 995   case tok::pp_else:
 996   case tok::pp_elifdef:
 997   case tok::pp_elifndef:
 998   case tok::pp_elif:
 999     parsePPElse();
1000     break;
1001   case tok::pp_endif:
1002     parsePPEndIf();
1003     break;
1004   case tok::pp_pragma:
1005     parsePPPragma();
1006     break;
1007   default:
1008     parsePPUnknown();
1009     break;
1010   }
1011 }
1012
1013 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1014   size_t Line = CurrentLines->size();
1015   if (CurrentLines == &PreprocessorDirectives)
1016     Line += Lines.size();
1017
1018   if (Unreachable ||
1019       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1020     PPStack.push_back({PP_Unreachable, Line});
1021   } else {
1022     PPStack.push_back({PP_Conditional, Line});
1023   }
1024 }
1025
1026 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1027   ++PPBranchLevel;
1028   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1029   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1030     PPLevelBranchIndex.push_back(0);
1031     PPLevelBranchCount.push_back(0);
1032   }
1033   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1034   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1035   conditionalCompilationCondition(Unreachable || Skip);
1036 }
1037
1038 void UnwrappedLineParser::conditionalCompilationAlternative() {
1039   if (!PPStack.empty())
1040     PPStack.pop_back();
1041   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1042   if (!PPChainBranchIndex.empty())
1043     ++PPChainBranchIndex.top();
1044   conditionalCompilationCondition(
1045       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1046       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1047 }
1048
1049 void UnwrappedLineParser::conditionalCompilationEnd() {
1050   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1051   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1052     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1053       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1054   }
1055   // Guard against #endif's without #if.
1056   if (PPBranchLevel > -1)
1057     --PPBranchLevel;
1058   if (!PPChainBranchIndex.empty())
1059     PPChainBranchIndex.pop();
1060   if (!PPStack.empty())
1061     PPStack.pop_back();
1062 }
1063
1064 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1065   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1066   nextToken();
1067   bool Unreachable = false;
1068   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1069     Unreachable = true;
1070   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1071     Unreachable = true;
1072   conditionalCompilationStart(Unreachable);
1073   FormatToken *IfCondition = FormatTok;
1074   // If there's a #ifndef on the first line, and the only lines before it are
1075   // comments, it could be an include guard.
1076   bool MaybeIncludeGuard = IfNDef;
1077   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1078     for (auto &Line : Lines) {
1079       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1080         MaybeIncludeGuard = false;
1081         IncludeGuard = IG_Rejected;
1082         break;
1083       }
1084     }
1085   }
1086   --PPBranchLevel;
1087   parsePPUnknown();
1088   ++PPBranchLevel;
1089   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1090     IncludeGuard = IG_IfNdefed;
1091     IncludeGuardToken = IfCondition;
1092   }
1093 }
1094
1095 void UnwrappedLineParser::parsePPElse() {
1096   // If a potential include guard has an #else, it's not an include guard.
1097   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1098     IncludeGuard = IG_Rejected;
1099   // Don't crash when there is an #else without an #if.
1100   assert(PPBranchLevel >= -1);
1101   if (PPBranchLevel == -1)
1102     conditionalCompilationStart(/*Unreachable=*/true);
1103   conditionalCompilationAlternative();
1104   --PPBranchLevel;
1105   parsePPUnknown();
1106   ++PPBranchLevel;
1107 }
1108
1109 void UnwrappedLineParser::parsePPEndIf() {
1110   conditionalCompilationEnd();
1111   parsePPUnknown();
1112   // If the #endif of a potential include guard is the last thing in the file,
1113   // then we found an include guard.
1114   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1115       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1116     IncludeGuard = IG_Found;
1117   }
1118 }
1119
1120 void UnwrappedLineParser::parsePPDefine() {
1121   nextToken();
1122
1123   if (!FormatTok->Tok.getIdentifierInfo()) {
1124     IncludeGuard = IG_Rejected;
1125     IncludeGuardToken = nullptr;
1126     parsePPUnknown();
1127     return;
1128   }
1129
1130   if (IncludeGuard == IG_IfNdefed &&
1131       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1132     IncludeGuard = IG_Defined;
1133     IncludeGuardToken = nullptr;
1134     for (auto &Line : Lines) {
1135       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1136         IncludeGuard = IG_Rejected;
1137         break;
1138       }
1139     }
1140   }
1141
1142   // In the context of a define, even keywords should be treated as normal
1143   // identifiers. Setting the kind to identifier is not enough, because we need
1144   // to treat additional keywords like __except as well, which are already
1145   // identifiers. Setting the identifier info to null interferes with include
1146   // guard processing above, and changes preprocessing nesting.
1147   FormatTok->Tok.setKind(tok::identifier);
1148   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1149   nextToken();
1150   if (FormatTok->Tok.getKind() == tok::l_paren &&
1151       !FormatTok->hasWhitespaceBefore()) {
1152     parseParens();
1153   }
1154   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1155     Line->Level += PPBranchLevel + 1;
1156   addUnwrappedLine();
1157   ++Line->Level;
1158
1159   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1160   assert((int)Line->PPLevel >= 0);
1161   Line->InMacroBody = true;
1162
1163   // Errors during a preprocessor directive can only affect the layout of the
1164   // preprocessor directive, and thus we ignore them. An alternative approach
1165   // would be to use the same approach we use on the file level (no
1166   // re-indentation if there was a structural error) within the macro
1167   // definition.
1168   parseFile();
1169 }
1170
1171 void UnwrappedLineParser::parsePPPragma() {
1172   Line->InPragmaDirective = true;
1173   parsePPUnknown();
1174 }
1175
1176 void UnwrappedLineParser::parsePPUnknown() {
1177   do {
1178     nextToken();
1179   } while (!eof());
1180   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1181     Line->Level += PPBranchLevel + 1;
1182   addUnwrappedLine();
1183 }
1184
1185 // Here we exclude certain tokens that are not usually the first token in an
1186 // unwrapped line. This is used in attempt to distinguish macro calls without
1187 // trailing semicolons from other constructs split to several lines.
1188 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1189   // Semicolon can be a null-statement, l_square can be a start of a macro or
1190   // a C++11 attribute, but this doesn't seem to be common.
1191   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1192          Tok.isNot(TT_AttributeSquare) &&
1193          // Tokens that can only be used as binary operators and a part of
1194          // overloaded operator names.
1195          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1196          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1197          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1198          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1199          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1200          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1201          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1202          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1203          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1204          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1205          Tok.isNot(tok::lesslessequal) &&
1206          // Colon is used in labels, base class lists, initializer lists,
1207          // range-based for loops, ternary operator, but should never be the
1208          // first token in an unwrapped line.
1209          Tok.isNot(tok::colon) &&
1210          // 'noexcept' is a trailing annotation.
1211          Tok.isNot(tok::kw_noexcept);
1212 }
1213
1214 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1215                           const FormatToken *FormatTok) {
1216   // FIXME: This returns true for C/C++ keywords like 'struct'.
1217   return FormatTok->is(tok::identifier) &&
1218          (!FormatTok->Tok.getIdentifierInfo() ||
1219           !FormatTok->isOneOf(
1220               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1221               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1222               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1223               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1224               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1225               Keywords.kw_instanceof, Keywords.kw_interface,
1226               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1227 }
1228
1229 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1230                                  const FormatToken *FormatTok) {
1231   return FormatTok->Tok.isLiteral() ||
1232          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1233          mustBeJSIdent(Keywords, FormatTok);
1234 }
1235
1236 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1237 // when encountered after a value (see mustBeJSIdentOrValue).
1238 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1239                            const FormatToken *FormatTok) {
1240   return FormatTok->isOneOf(
1241       tok::kw_return, Keywords.kw_yield,
1242       // conditionals
1243       tok::kw_if, tok::kw_else,
1244       // loops
1245       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1246       // switch/case
1247       tok::kw_switch, tok::kw_case,
1248       // exceptions
1249       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1250       // declaration
1251       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1252       Keywords.kw_async, Keywords.kw_function,
1253       // import/export
1254       Keywords.kw_import, tok::kw_export);
1255 }
1256
1257 // Checks whether a token is a type in K&R C (aka C78).
1258 static bool isC78Type(const FormatToken &Tok) {
1259   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1260                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1261                      tok::identifier);
1262 }
1263
1264 // This function checks whether a token starts the first parameter declaration
1265 // in a K&R C (aka C78) function definition, e.g.:
1266 //   int f(a, b)
1267 //   short a, b;
1268 //   {
1269 //      return a + b;
1270 //   }
1271 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1272                                const FormatToken *FuncName) {
1273   assert(Tok);
1274   assert(Next);
1275   assert(FuncName);
1276
1277   if (FuncName->isNot(tok::identifier))
1278     return false;
1279
1280   const FormatToken *Prev = FuncName->Previous;
1281   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1282     return false;
1283
1284   if (!isC78Type(*Tok) &&
1285       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1286     return false;
1287   }
1288
1289   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1290     return false;
1291
1292   Tok = Tok->Previous;
1293   if (!Tok || Tok->isNot(tok::r_paren))
1294     return false;
1295
1296   Tok = Tok->Previous;
1297   if (!Tok || Tok->isNot(tok::identifier))
1298     return false;
1299
1300   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1301 }
1302
1303 bool UnwrappedLineParser::parseModuleImport() {
1304   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1305
1306   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1307       !Token->Tok.getIdentifierInfo() &&
1308       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1309     return false;
1310   }
1311
1312   nextToken();
1313   while (!eof()) {
1314     if (FormatTok->is(tok::colon)) {
1315       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1316     }
1317     // Handle import <foo/bar.h> as we would an include statement.
1318     else if (FormatTok->is(tok::less)) {
1319       nextToken();
1320       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1321         // Mark tokens up to the trailing line comments as implicit string
1322         // literals.
1323         if (FormatTok->isNot(tok::comment) &&
1324             !FormatTok->TokenText.startswith("//")) {
1325           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1326         }
1327         nextToken();
1328       }
1329     }
1330     if (FormatTok->is(tok::semi)) {
1331       nextToken();
1332       break;
1333     }
1334     nextToken();
1335   }
1336
1337   addUnwrappedLine();
1338   return true;
1339 }
1340
1341 // readTokenWithJavaScriptASI reads the next token and terminates the current
1342 // line if JavaScript Automatic Semicolon Insertion must
1343 // happen between the current token and the next token.
1344 //
1345 // This method is conservative - it cannot cover all edge cases of JavaScript,
1346 // but only aims to correctly handle certain well known cases. It *must not*
1347 // return true in speculative cases.
1348 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1349   FormatToken *Previous = FormatTok;
1350   readToken();
1351   FormatToken *Next = FormatTok;
1352
1353   bool IsOnSameLine =
1354       CommentsBeforeNextToken.empty()
1355           ? Next->NewlinesBefore == 0
1356           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1357   if (IsOnSameLine)
1358     return;
1359
1360   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1361   bool PreviousStartsTemplateExpr =
1362       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1363   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1364     // If the line contains an '@' sign, the previous token might be an
1365     // annotation, which can precede another identifier/value.
1366     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1367       return LineNode.Tok->is(tok::at);
1368     });
1369     if (HasAt)
1370       return;
1371   }
1372   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1373     return addUnwrappedLine();
1374   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1375   bool NextEndsTemplateExpr =
1376       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1377   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1378       (PreviousMustBeValue ||
1379        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1380                          tok::minusminus))) {
1381     return addUnwrappedLine();
1382   }
1383   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1384       isJSDeclOrStmt(Keywords, Next)) {
1385     return addUnwrappedLine();
1386   }
1387 }
1388
1389 void UnwrappedLineParser::parseStructuralElement(
1390     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1391     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1392   if (Style.Language == FormatStyle::LK_TableGen &&
1393       FormatTok->is(tok::pp_include)) {
1394     nextToken();
1395     if (FormatTok->is(tok::string_literal))
1396       nextToken();
1397     addUnwrappedLine();
1398     return;
1399   }
1400
1401   if (Style.isVerilog()) {
1402     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1403       parseForOrWhileLoop(/*HasParens=*/false);
1404       return;
1405     }
1406     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1407       parseForOrWhileLoop();
1408       return;
1409     }
1410     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1411                            Keywords.kw_assume, Keywords.kw_cover)) {
1412       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1413       return;
1414     }
1415
1416     // Skip things that can exist before keywords like 'if' and 'case'.
1417     while (true) {
1418       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1419                              Keywords.kw_unique0)) {
1420         nextToken();
1421       } else if (FormatTok->is(tok::l_paren) &&
1422                  Tokens->peekNextToken()->is(tok::star)) {
1423         parseParens();
1424       } else {
1425         break;
1426       }
1427     }
1428   }
1429
1430   // Tokens that only make sense at the beginning of a line.
1431   switch (FormatTok->Tok.getKind()) {
1432   case tok::kw_asm:
1433     nextToken();
1434     if (FormatTok->is(tok::l_brace)) {
1435       FormatTok->setFinalizedType(TT_InlineASMBrace);
1436       nextToken();
1437       while (FormatTok && !eof()) {
1438         if (FormatTok->is(tok::r_brace)) {
1439           FormatTok->setFinalizedType(TT_InlineASMBrace);
1440           nextToken();
1441           addUnwrappedLine();
1442           break;
1443         }
1444         FormatTok->Finalized = true;
1445         nextToken();
1446       }
1447     }
1448     break;
1449   case tok::kw_namespace:
1450     parseNamespace();
1451     return;
1452   case tok::kw_public:
1453   case tok::kw_protected:
1454   case tok::kw_private:
1455     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1456         Style.isCSharp()) {
1457       nextToken();
1458     } else {
1459       parseAccessSpecifier();
1460     }
1461     return;
1462   case tok::kw_if: {
1463     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1464       // field/method declaration.
1465       break;
1466     }
1467     FormatToken *Tok = parseIfThenElse(IfKind);
1468     if (IfLeftBrace)
1469       *IfLeftBrace = Tok;
1470     return;
1471   }
1472   case tok::kw_for:
1473   case tok::kw_while:
1474     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1475       // field/method declaration.
1476       break;
1477     }
1478     parseForOrWhileLoop();
1479     return;
1480   case tok::kw_do:
1481     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1482       // field/method declaration.
1483       break;
1484     }
1485     parseDoWhile();
1486     if (HasDoWhile)
1487       *HasDoWhile = true;
1488     return;
1489   case tok::kw_switch:
1490     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1491       // 'switch: string' field declaration.
1492       break;
1493     }
1494     parseSwitch();
1495     return;
1496   case tok::kw_default:
1497     // In Verilog default along with other labels are handled in the next loop.
1498     if (Style.isVerilog())
1499       break;
1500     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501       // 'default: string' field declaration.
1502       break;
1503     }
1504     nextToken();
1505     if (FormatTok->is(tok::colon)) {
1506       FormatTok->setFinalizedType(TT_CaseLabelColon);
1507       parseLabel();
1508       return;
1509     }
1510     // e.g. "default void f() {}" in a Java interface.
1511     break;
1512   case tok::kw_case:
1513     // Proto: there are no switch/case statements.
1514     if (Style.isProto()) {
1515       nextToken();
1516       return;
1517     }
1518     if (Style.isVerilog()) {
1519       parseBlock();
1520       addUnwrappedLine();
1521       return;
1522     }
1523     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524       // 'case: string' field declaration.
1525       nextToken();
1526       break;
1527     }
1528     parseCaseLabel();
1529     return;
1530   case tok::kw_try:
1531   case tok::kw___try:
1532     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1533       // field/method declaration.
1534       break;
1535     }
1536     parseTryCatch();
1537     return;
1538   case tok::kw_extern:
1539     nextToken();
1540     if (Style.isVerilog()) {
1541       // In Verilog and extern module declaration looks like a start of module.
1542       // But there is no body and endmodule. So we handle it separately.
1543       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1544         parseVerilogHierarchyHeader();
1545         return;
1546       }
1547     } else if (FormatTok->is(tok::string_literal)) {
1548       nextToken();
1549       if (FormatTok->is(tok::l_brace)) {
1550         if (Style.BraceWrapping.AfterExternBlock)
1551           addUnwrappedLine();
1552         // Either we indent or for backwards compatibility we follow the
1553         // AfterExternBlock style.
1554         unsigned AddLevels =
1555             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1556                     (Style.BraceWrapping.AfterExternBlock &&
1557                      Style.IndentExternBlock ==
1558                          FormatStyle::IEBS_AfterExternBlock)
1559                 ? 1u
1560                 : 0u;
1561         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1562         addUnwrappedLine();
1563         return;
1564       }
1565     }
1566     break;
1567   case tok::kw_export:
1568     if (Style.isJavaScript()) {
1569       parseJavaScriptEs6ImportExport();
1570       return;
1571     }
1572     if (Style.isCpp()) {
1573       nextToken();
1574       if (FormatTok->is(tok::kw_namespace)) {
1575         parseNamespace();
1576         return;
1577       }
1578       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1579         return;
1580     }
1581     break;
1582   case tok::kw_inline:
1583     nextToken();
1584     if (FormatTok->is(tok::kw_namespace)) {
1585       parseNamespace();
1586       return;
1587     }
1588     break;
1589   case tok::identifier:
1590     if (FormatTok->is(TT_ForEachMacro)) {
1591       parseForOrWhileLoop();
1592       return;
1593     }
1594     if (FormatTok->is(TT_MacroBlockBegin)) {
1595       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1596                  /*MunchSemi=*/false);
1597       return;
1598     }
1599     if (FormatTok->is(Keywords.kw_import)) {
1600       if (Style.isJavaScript()) {
1601         parseJavaScriptEs6ImportExport();
1602         return;
1603       }
1604       if (Style.Language == FormatStyle::LK_Proto) {
1605         nextToken();
1606         if (FormatTok->is(tok::kw_public))
1607           nextToken();
1608         if (!FormatTok->is(tok::string_literal))
1609           return;
1610         nextToken();
1611         if (FormatTok->is(tok::semi))
1612           nextToken();
1613         addUnwrappedLine();
1614         return;
1615       }
1616       if (Style.isCpp() && parseModuleImport())
1617         return;
1618     }
1619     if (Style.isCpp() &&
1620         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1621                            Keywords.kw_slots, Keywords.kw_qslots)) {
1622       nextToken();
1623       if (FormatTok->is(tok::colon)) {
1624         nextToken();
1625         addUnwrappedLine();
1626         return;
1627       }
1628     }
1629     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1630       parseStatementMacro();
1631       return;
1632     }
1633     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1634       parseNamespace();
1635       return;
1636     }
1637     // In all other cases, parse the declaration.
1638     break;
1639   default:
1640     break;
1641   }
1642   do {
1643     const FormatToken *Previous = FormatTok->Previous;
1644     switch (FormatTok->Tok.getKind()) {
1645     case tok::at:
1646       nextToken();
1647       if (FormatTok->is(tok::l_brace)) {
1648         nextToken();
1649         parseBracedList();
1650         break;
1651       } else if (Style.Language == FormatStyle::LK_Java &&
1652                  FormatTok->is(Keywords.kw_interface)) {
1653         nextToken();
1654         break;
1655       }
1656       switch (FormatTok->Tok.getObjCKeywordID()) {
1657       case tok::objc_public:
1658       case tok::objc_protected:
1659       case tok::objc_package:
1660       case tok::objc_private:
1661         return parseAccessSpecifier();
1662       case tok::objc_interface:
1663       case tok::objc_implementation:
1664         return parseObjCInterfaceOrImplementation();
1665       case tok::objc_protocol:
1666         if (parseObjCProtocol())
1667           return;
1668         break;
1669       case tok::objc_end:
1670         return; // Handled by the caller.
1671       case tok::objc_optional:
1672       case tok::objc_required:
1673         nextToken();
1674         addUnwrappedLine();
1675         return;
1676       case tok::objc_autoreleasepool:
1677         nextToken();
1678         if (FormatTok->is(tok::l_brace)) {
1679           if (Style.BraceWrapping.AfterControlStatement ==
1680               FormatStyle::BWACS_Always) {
1681             addUnwrappedLine();
1682           }
1683           parseBlock();
1684         }
1685         addUnwrappedLine();
1686         return;
1687       case tok::objc_synchronized:
1688         nextToken();
1689         if (FormatTok->is(tok::l_paren)) {
1690           // Skip synchronization object
1691           parseParens();
1692         }
1693         if (FormatTok->is(tok::l_brace)) {
1694           if (Style.BraceWrapping.AfterControlStatement ==
1695               FormatStyle::BWACS_Always) {
1696             addUnwrappedLine();
1697           }
1698           parseBlock();
1699         }
1700         addUnwrappedLine();
1701         return;
1702       case tok::objc_try:
1703         // This branch isn't strictly necessary (the kw_try case below would
1704         // do this too after the tok::at is parsed above).  But be explicit.
1705         parseTryCatch();
1706         return;
1707       default:
1708         break;
1709       }
1710       break;
1711     case tok::kw_requires: {
1712       if (Style.isCpp()) {
1713         bool ParsedClause = parseRequires();
1714         if (ParsedClause)
1715           return;
1716       } else {
1717         nextToken();
1718       }
1719       break;
1720     }
1721     case tok::kw_enum:
1722       // Ignore if this is part of "template <enum ...".
1723       if (Previous && Previous->is(tok::less)) {
1724         nextToken();
1725         break;
1726       }
1727
1728       // parseEnum falls through and does not yet add an unwrapped line as an
1729       // enum definition can start a structural element.
1730       if (!parseEnum())
1731         break;
1732       // This only applies to C++ and Verilog.
1733       if (!Style.isCpp() && !Style.isVerilog()) {
1734         addUnwrappedLine();
1735         return;
1736       }
1737       break;
1738     case tok::kw_typedef:
1739       nextToken();
1740       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1741                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1742                              Keywords.kw_CF_CLOSED_ENUM,
1743                              Keywords.kw_NS_CLOSED_ENUM)) {
1744         parseEnum();
1745       }
1746       break;
1747     case tok::kw_class:
1748       if (Style.isVerilog()) {
1749         parseBlock();
1750         addUnwrappedLine();
1751         return;
1752       }
1753       [[fallthrough]];
1754     case tok::kw_struct:
1755     case tok::kw_union:
1756       if (parseStructLike())
1757         return;
1758       break;
1759     case tok::period:
1760       nextToken();
1761       // In Java, classes have an implicit static member "class".
1762       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1763           FormatTok->is(tok::kw_class)) {
1764         nextToken();
1765       }
1766       if (Style.isJavaScript() && FormatTok &&
1767           FormatTok->Tok.getIdentifierInfo()) {
1768         // JavaScript only has pseudo keywords, all keywords are allowed to
1769         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1770         nextToken();
1771       }
1772       break;
1773     case tok::semi:
1774       nextToken();
1775       addUnwrappedLine();
1776       return;
1777     case tok::r_brace:
1778       addUnwrappedLine();
1779       return;
1780     case tok::l_paren: {
1781       parseParens();
1782       // Break the unwrapped line if a K&R C function definition has a parameter
1783       // declaration.
1784       if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1785         break;
1786       if (isC78ParameterDecl(FormatTok,
1787                              Tokens->peekNextToken(/*SkipComment=*/true),
1788                              Previous)) {
1789         addUnwrappedLine();
1790         return;
1791       }
1792       break;
1793     }
1794     case tok::kw_operator:
1795       nextToken();
1796       if (FormatTok->isBinaryOperator())
1797         nextToken();
1798       break;
1799     case tok::caret:
1800       nextToken();
1801       if (FormatTok->Tok.isAnyIdentifier() ||
1802           FormatTok->isSimpleTypeSpecifier()) {
1803         nextToken();
1804       }
1805       if (FormatTok->is(tok::l_paren))
1806         parseParens();
1807       if (FormatTok->is(tok::l_brace))
1808         parseChildBlock();
1809       break;
1810     case tok::l_brace:
1811       if (NextLBracesType != TT_Unknown)
1812         FormatTok->setFinalizedType(NextLBracesType);
1813       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1814         // A block outside of parentheses must be the last part of a
1815         // structural element.
1816         // FIXME: Figure out cases where this is not true, and add projections
1817         // for them (the one we know is missing are lambdas).
1818         if (Style.Language == FormatStyle::LK_Java &&
1819             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1820           // If necessary, we could set the type to something different than
1821           // TT_FunctionLBrace.
1822           if (Style.BraceWrapping.AfterControlStatement ==
1823               FormatStyle::BWACS_Always) {
1824             addUnwrappedLine();
1825           }
1826         } else if (Style.BraceWrapping.AfterFunction) {
1827           addUnwrappedLine();
1828         }
1829         FormatTok->setFinalizedType(TT_FunctionLBrace);
1830         parseBlock();
1831         addUnwrappedLine();
1832         return;
1833       }
1834       // Otherwise this was a braced init list, and the structural
1835       // element continues.
1836       break;
1837     case tok::kw_try:
1838       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1839         // field/method declaration.
1840         nextToken();
1841         break;
1842       }
1843       // We arrive here when parsing function-try blocks.
1844       if (Style.BraceWrapping.AfterFunction)
1845         addUnwrappedLine();
1846       parseTryCatch();
1847       return;
1848     case tok::identifier: {
1849       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1850           Line->MustBeDeclaration) {
1851         addUnwrappedLine();
1852         parseCSharpGenericTypeConstraint();
1853         break;
1854       }
1855       if (FormatTok->is(TT_MacroBlockEnd)) {
1856         addUnwrappedLine();
1857         return;
1858       }
1859
1860       // Function declarations (as opposed to function expressions) are parsed
1861       // on their own unwrapped line by continuing this loop. Function
1862       // expressions (functions that are not on their own line) must not create
1863       // a new unwrapped line, so they are special cased below.
1864       size_t TokenCount = Line->Tokens.size();
1865       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1866           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1867                                                      Keywords.kw_async)))) {
1868         tryToParseJSFunction();
1869         break;
1870       }
1871       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1872           FormatTok->is(Keywords.kw_interface)) {
1873         if (Style.isJavaScript()) {
1874           // In JavaScript/TypeScript, "interface" can be used as a standalone
1875           // identifier, e.g. in `var interface = 1;`. If "interface" is
1876           // followed by another identifier, it is very like to be an actual
1877           // interface declaration.
1878           unsigned StoredPosition = Tokens->getPosition();
1879           FormatToken *Next = Tokens->getNextToken();
1880           FormatTok = Tokens->setPosition(StoredPosition);
1881           if (!mustBeJSIdent(Keywords, Next)) {
1882             nextToken();
1883             break;
1884           }
1885         }
1886         parseRecord();
1887         addUnwrappedLine();
1888         return;
1889       }
1890
1891       if (Style.isVerilog()) {
1892         if (FormatTok->is(Keywords.kw_table)) {
1893           parseVerilogTable();
1894           return;
1895         }
1896         if (Keywords.isVerilogBegin(*FormatTok) ||
1897             Keywords.isVerilogHierarchy(*FormatTok)) {
1898           parseBlock();
1899           addUnwrappedLine();
1900           return;
1901         }
1902       }
1903
1904       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1905         if (parseStructLike())
1906           return;
1907         break;
1908       }
1909
1910       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1911         parseStatementMacro();
1912         return;
1913       }
1914
1915       // See if the following token should start a new unwrapped line.
1916       StringRef Text = FormatTok->TokenText;
1917
1918       FormatToken *PreviousToken = FormatTok;
1919       nextToken();
1920
1921       // JS doesn't have macros, and within classes colons indicate fields, not
1922       // labels.
1923       if (Style.isJavaScript())
1924         break;
1925
1926       auto OneTokenSoFar = [&]() {
1927         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1928         while (I != E && I->Tok->is(tok::comment))
1929           ++I;
1930         while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1931           ++I;
1932         return I != E && (++I == E);
1933       };
1934       if (OneTokenSoFar()) {
1935         // In Verilog labels can be any expression, so we don't do them here.
1936         if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
1937             !Line->MustBeDeclaration) {
1938           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1939           FormatTok->setFinalizedType(TT_GotoLabelColon);
1940           parseLabel(!Style.IndentGotoLabels);
1941           if (HasLabel)
1942             *HasLabel = true;
1943           return;
1944         }
1945         // Recognize function-like macro usages without trailing semicolon as
1946         // well as free-standing macros like Q_OBJECT.
1947         bool FunctionLike = FormatTok->is(tok::l_paren);
1948         if (FunctionLike)
1949           parseParens();
1950
1951         bool FollowedByNewline =
1952             CommentsBeforeNextToken.empty()
1953                 ? FormatTok->NewlinesBefore > 0
1954                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1955
1956         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1957             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1958           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1959             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1960           addUnwrappedLine();
1961           return;
1962         }
1963       }
1964       break;
1965     }
1966     case tok::equal:
1967       if ((Style.isJavaScript() || Style.isCSharp()) &&
1968           FormatTok->is(TT_FatArrow)) {
1969         tryToParseChildBlock();
1970         break;
1971       }
1972
1973       nextToken();
1974       if (FormatTok->is(tok::l_brace)) {
1975         // Block kind should probably be set to BK_BracedInit for any language.
1976         // C# needs this change to ensure that array initialisers and object
1977         // initialisers are indented the same way.
1978         if (Style.isCSharp())
1979           FormatTok->setBlockKind(BK_BracedInit);
1980         nextToken();
1981         parseBracedList();
1982       } else if (Style.Language == FormatStyle::LK_Proto &&
1983                  FormatTok->is(tok::less)) {
1984         nextToken();
1985         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1986                         /*ClosingBraceKind=*/tok::greater);
1987       }
1988       break;
1989     case tok::l_square:
1990       parseSquare();
1991       break;
1992     case tok::kw_new:
1993       parseNew();
1994       break;
1995     case tok::kw_case:
1996       // Proto: there are no switch/case statements.
1997       if (Style.isProto()) {
1998         nextToken();
1999         return;
2000       }
2001       // In Verilog switch is called case.
2002       if (Style.isVerilog()) {
2003         parseBlock();
2004         addUnwrappedLine();
2005         return;
2006       }
2007       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2008         // 'case: string' field declaration.
2009         nextToken();
2010         break;
2011       }
2012       parseCaseLabel();
2013       break;
2014     case tok::kw_default:
2015       nextToken();
2016       if (Style.isVerilog()) {
2017         if (FormatTok->is(tok::colon)) {
2018           // The label will be handled in the next iteration.
2019           break;
2020         }
2021         if (FormatTok->is(Keywords.kw_clocking)) {
2022           // A default clocking block.
2023           parseBlock();
2024           addUnwrappedLine();
2025           return;
2026         }
2027         parseVerilogCaseLabel();
2028         return;
2029       }
2030       break;
2031     case tok::colon:
2032       nextToken();
2033       if (Style.isVerilog()) {
2034         parseVerilogCaseLabel();
2035         return;
2036       }
2037       break;
2038     default:
2039       nextToken();
2040       break;
2041     }
2042   } while (!eof());
2043 }
2044
2045 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2046   assert(FormatTok->is(tok::l_brace));
2047   if (!Style.isCSharp())
2048     return false;
2049   // See if it's a property accessor.
2050   if (FormatTok->Previous->isNot(tok::identifier))
2051     return false;
2052
2053   // See if we are inside a property accessor.
2054   //
2055   // Record the current tokenPosition so that we can advance and
2056   // reset the current token. `Next` is not set yet so we need
2057   // another way to advance along the token stream.
2058   unsigned int StoredPosition = Tokens->getPosition();
2059   FormatToken *Tok = Tokens->getNextToken();
2060
2061   // A trivial property accessor is of the form:
2062   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2063   // Track these as they do not require line breaks to be introduced.
2064   bool HasSpecialAccessor = false;
2065   bool IsTrivialPropertyAccessor = true;
2066   while (!eof()) {
2067     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2068                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2069                      Keywords.kw_init, Keywords.kw_set)) {
2070       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2071         HasSpecialAccessor = true;
2072       Tok = Tokens->getNextToken();
2073       continue;
2074     }
2075     if (Tok->isNot(tok::r_brace))
2076       IsTrivialPropertyAccessor = false;
2077     break;
2078   }
2079
2080   if (!HasSpecialAccessor) {
2081     Tokens->setPosition(StoredPosition);
2082     return false;
2083   }
2084
2085   // Try to parse the property accessor:
2086   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2087   Tokens->setPosition(StoredPosition);
2088   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2089     addUnwrappedLine();
2090   nextToken();
2091   do {
2092     switch (FormatTok->Tok.getKind()) {
2093     case tok::r_brace:
2094       nextToken();
2095       if (FormatTok->is(tok::equal)) {
2096         while (!eof() && FormatTok->isNot(tok::semi))
2097           nextToken();
2098         nextToken();
2099       }
2100       addUnwrappedLine();
2101       return true;
2102     case tok::l_brace:
2103       ++Line->Level;
2104       parseBlock(/*MustBeDeclaration=*/true);
2105       addUnwrappedLine();
2106       --Line->Level;
2107       break;
2108     case tok::equal:
2109       if (FormatTok->is(TT_FatArrow)) {
2110         ++Line->Level;
2111         do {
2112           nextToken();
2113         } while (!eof() && FormatTok->isNot(tok::semi));
2114         nextToken();
2115         addUnwrappedLine();
2116         --Line->Level;
2117         break;
2118       }
2119       nextToken();
2120       break;
2121     default:
2122       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2123                              Keywords.kw_set) &&
2124           !IsTrivialPropertyAccessor) {
2125         // Non-trivial get/set needs to be on its own line.
2126         addUnwrappedLine();
2127       }
2128       nextToken();
2129     }
2130   } while (!eof());
2131
2132   // Unreachable for well-formed code (paired '{' and '}').
2133   return true;
2134 }
2135
2136 bool UnwrappedLineParser::tryToParseLambda() {
2137   assert(FormatTok->is(tok::l_square));
2138   if (!Style.isCpp()) {
2139     nextToken();
2140     return false;
2141   }
2142   FormatToken &LSquare = *FormatTok;
2143   if (!tryToParseLambdaIntroducer())
2144     return false;
2145
2146   bool SeenArrow = false;
2147   bool InTemplateParameterList = false;
2148
2149   while (FormatTok->isNot(tok::l_brace)) {
2150     if (FormatTok->isSimpleTypeSpecifier()) {
2151       nextToken();
2152       continue;
2153     }
2154     switch (FormatTok->Tok.getKind()) {
2155     case tok::l_brace:
2156       break;
2157     case tok::l_paren:
2158       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2159       break;
2160     case tok::l_square:
2161       parseSquare();
2162       break;
2163     case tok::less:
2164       assert(FormatTok->Previous);
2165       if (FormatTok->Previous->is(tok::r_square))
2166         InTemplateParameterList = true;
2167       nextToken();
2168       break;
2169     case tok::kw_auto:
2170     case tok::kw_class:
2171     case tok::kw_template:
2172     case tok::kw_typename:
2173     case tok::amp:
2174     case tok::star:
2175     case tok::kw_const:
2176     case tok::kw_constexpr:
2177     case tok::kw_consteval:
2178     case tok::comma:
2179     case tok::greater:
2180     case tok::identifier:
2181     case tok::numeric_constant:
2182     case tok::coloncolon:
2183     case tok::kw_mutable:
2184     case tok::kw_noexcept:
2185     case tok::kw_static:
2186       nextToken();
2187       break;
2188     // Specialization of a template with an integer parameter can contain
2189     // arithmetic, logical, comparison and ternary operators.
2190     //
2191     // FIXME: This also accepts sequences of operators that are not in the scope
2192     // of a template argument list.
2193     //
2194     // In a C++ lambda a template type can only occur after an arrow. We use
2195     // this as an heuristic to distinguish between Objective-C expressions
2196     // followed by an `a->b` expression, such as:
2197     // ([obj func:arg] + a->b)
2198     // Otherwise the code below would parse as a lambda.
2199     //
2200     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2201     // explicit template lists: []<bool b = true && false>(U &&u){}
2202     case tok::plus:
2203     case tok::minus:
2204     case tok::exclaim:
2205     case tok::tilde:
2206     case tok::slash:
2207     case tok::percent:
2208     case tok::lessless:
2209     case tok::pipe:
2210     case tok::pipepipe:
2211     case tok::ampamp:
2212     case tok::caret:
2213     case tok::equalequal:
2214     case tok::exclaimequal:
2215     case tok::greaterequal:
2216     case tok::lessequal:
2217     case tok::question:
2218     case tok::colon:
2219     case tok::ellipsis:
2220     case tok::kw_true:
2221     case tok::kw_false:
2222       if (SeenArrow || InTemplateParameterList) {
2223         nextToken();
2224         break;
2225       }
2226       return true;
2227     case tok::arrow:
2228       // This might or might not actually be a lambda arrow (this could be an
2229       // ObjC method invocation followed by a dereferencing arrow). We might
2230       // reset this back to TT_Unknown in TokenAnnotator.
2231       FormatTok->setFinalizedType(TT_LambdaArrow);
2232       SeenArrow = true;
2233       nextToken();
2234       break;
2235     case tok::kw_requires: {
2236       auto *RequiresToken = FormatTok;
2237       nextToken();
2238       parseRequiresClause(RequiresToken);
2239       break;
2240     }
2241     default:
2242       return true;
2243     }
2244   }
2245   FormatTok->setFinalizedType(TT_LambdaLBrace);
2246   LSquare.setFinalizedType(TT_LambdaLSquare);
2247   parseChildBlock();
2248   return true;
2249 }
2250
2251 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2252   const FormatToken *Previous = FormatTok->Previous;
2253   const FormatToken *LeftSquare = FormatTok;
2254   nextToken();
2255   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2256                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2257                                         tok::kw_co_yield, tok::kw_co_return)) ||
2258                     Previous->closesScope())) ||
2259       LeftSquare->isCppStructuredBinding(Style)) {
2260     return false;
2261   }
2262   if (FormatTok->is(tok::l_square))
2263     return false;
2264   if (FormatTok->is(tok::r_square)) {
2265     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2266     if (Next->is(tok::greater))
2267       return false;
2268   }
2269   parseSquare(/*LambdaIntroducer=*/true);
2270   return true;
2271 }
2272
2273 void UnwrappedLineParser::tryToParseJSFunction() {
2274   assert(FormatTok->is(Keywords.kw_function) ||
2275          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2276   if (FormatTok->is(Keywords.kw_async))
2277     nextToken();
2278   // Consume "function".
2279   nextToken();
2280
2281   // Consume * (generator function). Treat it like C++'s overloaded operators.
2282   if (FormatTok->is(tok::star)) {
2283     FormatTok->setFinalizedType(TT_OverloadedOperator);
2284     nextToken();
2285   }
2286
2287   // Consume function name.
2288   if (FormatTok->is(tok::identifier))
2289     nextToken();
2290
2291   if (FormatTok->isNot(tok::l_paren))
2292     return;
2293
2294   // Parse formal parameter list.
2295   parseParens();
2296
2297   if (FormatTok->is(tok::colon)) {
2298     // Parse a type definition.
2299     nextToken();
2300
2301     // Eat the type declaration. For braced inline object types, balance braces,
2302     // otherwise just parse until finding an l_brace for the function body.
2303     if (FormatTok->is(tok::l_brace))
2304       tryToParseBracedList();
2305     else
2306       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2307         nextToken();
2308   }
2309
2310   if (FormatTok->is(tok::semi))
2311     return;
2312
2313   parseChildBlock();
2314 }
2315
2316 bool UnwrappedLineParser::tryToParseBracedList() {
2317   if (FormatTok->is(BK_Unknown))
2318     calculateBraceTypes();
2319   assert(FormatTok->isNot(BK_Unknown));
2320   if (FormatTok->is(BK_Block))
2321     return false;
2322   nextToken();
2323   parseBracedList();
2324   return true;
2325 }
2326
2327 bool UnwrappedLineParser::tryToParseChildBlock() {
2328   assert(Style.isJavaScript() || Style.isCSharp());
2329   assert(FormatTok->is(TT_FatArrow));
2330   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2331   // They always start an expression or a child block if followed by a curly
2332   // brace.
2333   nextToken();
2334   if (FormatTok->isNot(tok::l_brace))
2335     return false;
2336   parseChildBlock();
2337   return true;
2338 }
2339
2340 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2341                                           bool IsEnum,
2342                                           tok::TokenKind ClosingBraceKind) {
2343   bool HasError = false;
2344
2345   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2346   // replace this by using parseAssignmentExpression() inside.
2347   do {
2348     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2349         tryToParseChildBlock()) {
2350       continue;
2351     }
2352     if (Style.isJavaScript()) {
2353       if (FormatTok->is(Keywords.kw_function) ||
2354           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2355         tryToParseJSFunction();
2356         continue;
2357       }
2358       if (FormatTok->is(tok::l_brace)) {
2359         // Could be a method inside of a braced list `{a() { return 1; }}`.
2360         if (tryToParseBracedList())
2361           continue;
2362         parseChildBlock();
2363       }
2364     }
2365     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2366       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2367         addUnwrappedLine();
2368       nextToken();
2369       return !HasError;
2370     }
2371     switch (FormatTok->Tok.getKind()) {
2372     case tok::l_square:
2373       if (Style.isCSharp())
2374         parseSquare();
2375       else
2376         tryToParseLambda();
2377       break;
2378     case tok::l_paren:
2379       parseParens();
2380       // JavaScript can just have free standing methods and getters/setters in
2381       // object literals. Detect them by a "{" following ")".
2382       if (Style.isJavaScript()) {
2383         if (FormatTok->is(tok::l_brace))
2384           parseChildBlock();
2385         break;
2386       }
2387       break;
2388     case tok::l_brace:
2389       // Assume there are no blocks inside a braced init list apart
2390       // from the ones we explicitly parse out (like lambdas).
2391       FormatTok->setBlockKind(BK_BracedInit);
2392       nextToken();
2393       parseBracedList();
2394       break;
2395     case tok::less:
2396       if (Style.Language == FormatStyle::LK_Proto ||
2397           ClosingBraceKind == tok::greater) {
2398         nextToken();
2399         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2400                         /*ClosingBraceKind=*/tok::greater);
2401       } else {
2402         nextToken();
2403       }
2404       break;
2405     case tok::semi:
2406       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2407       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2408       // used for error recovery if we have otherwise determined that this is
2409       // a braced list.
2410       if (Style.isJavaScript()) {
2411         nextToken();
2412         break;
2413       }
2414       HasError = true;
2415       if (!ContinueOnSemicolons)
2416         return !HasError;
2417       nextToken();
2418       break;
2419     case tok::comma:
2420       nextToken();
2421       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2422         addUnwrappedLine();
2423       break;
2424     default:
2425       nextToken();
2426       break;
2427     }
2428   } while (!eof());
2429   return false;
2430 }
2431
2432 /// \brief Parses a pair of parentheses (and everything between them).
2433 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2434 /// double ampersands. This only counts for the current parens scope.
2435 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2436   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2437   nextToken();
2438   do {
2439     switch (FormatTok->Tok.getKind()) {
2440     case tok::l_paren:
2441       parseParens();
2442       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2443         parseChildBlock();
2444       break;
2445     case tok::r_paren:
2446       nextToken();
2447       return;
2448     case tok::r_brace:
2449       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2450       return;
2451     case tok::l_square:
2452       tryToParseLambda();
2453       break;
2454     case tok::l_brace:
2455       if (!tryToParseBracedList())
2456         parseChildBlock();
2457       break;
2458     case tok::at:
2459       nextToken();
2460       if (FormatTok->is(tok::l_brace)) {
2461         nextToken();
2462         parseBracedList();
2463       }
2464       break;
2465     case tok::equal:
2466       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2467         tryToParseChildBlock();
2468       else
2469         nextToken();
2470       break;
2471     case tok::kw_class:
2472       if (Style.isJavaScript())
2473         parseRecord(/*ParseAsExpr=*/true);
2474       else
2475         nextToken();
2476       break;
2477     case tok::identifier:
2478       if (Style.isJavaScript() &&
2479           (FormatTok->is(Keywords.kw_function) ||
2480            FormatTok->startsSequence(Keywords.kw_async,
2481                                      Keywords.kw_function))) {
2482         tryToParseJSFunction();
2483       } else {
2484         nextToken();
2485       }
2486       break;
2487     case tok::kw_requires: {
2488       auto RequiresToken = FormatTok;
2489       nextToken();
2490       parseRequiresExpression(RequiresToken);
2491       break;
2492     }
2493     case tok::ampamp:
2494       if (AmpAmpTokenType != TT_Unknown)
2495         FormatTok->setFinalizedType(AmpAmpTokenType);
2496       [[fallthrough]];
2497     default:
2498       nextToken();
2499       break;
2500     }
2501   } while (!eof());
2502 }
2503
2504 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2505   if (!LambdaIntroducer) {
2506     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2507     if (tryToParseLambda())
2508       return;
2509   }
2510   do {
2511     switch (FormatTok->Tok.getKind()) {
2512     case tok::l_paren:
2513       parseParens();
2514       break;
2515     case tok::r_square:
2516       nextToken();
2517       return;
2518     case tok::r_brace:
2519       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2520       return;
2521     case tok::l_square:
2522       parseSquare();
2523       break;
2524     case tok::l_brace: {
2525       if (!tryToParseBracedList())
2526         parseChildBlock();
2527       break;
2528     }
2529     case tok::at:
2530       nextToken();
2531       if (FormatTok->is(tok::l_brace)) {
2532         nextToken();
2533         parseBracedList();
2534       }
2535       break;
2536     default:
2537       nextToken();
2538       break;
2539     }
2540   } while (!eof());
2541 }
2542
2543 void UnwrappedLineParser::keepAncestorBraces() {
2544   if (!Style.RemoveBracesLLVM)
2545     return;
2546
2547   const int MaxNestingLevels = 2;
2548   const int Size = NestedTooDeep.size();
2549   if (Size >= MaxNestingLevels)
2550     NestedTooDeep[Size - MaxNestingLevels] = true;
2551   NestedTooDeep.push_back(false);
2552 }
2553
2554 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2555   for (const auto &Token : llvm::reverse(Line.Tokens))
2556     if (Token.Tok->isNot(tok::comment))
2557       return Token.Tok;
2558
2559   return nullptr;
2560 }
2561
2562 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2563   FormatToken *Tok = nullptr;
2564
2565   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2566       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2567     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2568               ? getLastNonComment(*Line)
2569               : Line->Tokens.back().Tok;
2570     assert(Tok);
2571     if (Tok->BraceCount < 0) {
2572       assert(Tok->BraceCount == -1);
2573       Tok = nullptr;
2574     } else {
2575       Tok->BraceCount = -1;
2576     }
2577   }
2578
2579   addUnwrappedLine();
2580   ++Line->Level;
2581   parseStructuralElement();
2582
2583   if (Tok) {
2584     assert(!Line->InPPDirective);
2585     Tok = nullptr;
2586     for (const auto &L : llvm::reverse(*CurrentLines)) {
2587       if (!L.InPPDirective && getLastNonComment(L)) {
2588         Tok = L.Tokens.back().Tok;
2589         break;
2590       }
2591     }
2592     assert(Tok);
2593     ++Tok->BraceCount;
2594   }
2595
2596   if (CheckEOF && eof())
2597     addUnwrappedLine();
2598
2599   --Line->Level;
2600 }
2601
2602 static void markOptionalBraces(FormatToken *LeftBrace) {
2603   if (!LeftBrace)
2604     return;
2605
2606   assert(LeftBrace->is(tok::l_brace));
2607
2608   FormatToken *RightBrace = LeftBrace->MatchingParen;
2609   if (!RightBrace) {
2610     assert(!LeftBrace->Optional);
2611     return;
2612   }
2613
2614   assert(RightBrace->is(tok::r_brace));
2615   assert(RightBrace->MatchingParen == LeftBrace);
2616   assert(LeftBrace->Optional == RightBrace->Optional);
2617
2618   LeftBrace->Optional = true;
2619   RightBrace->Optional = true;
2620 }
2621
2622 void UnwrappedLineParser::handleAttributes() {
2623   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2624   if (FormatTok->is(TT_AttributeMacro))
2625     nextToken();
2626   if (FormatTok->is(tok::l_square))
2627     handleCppAttributes();
2628 }
2629
2630 bool UnwrappedLineParser::handleCppAttributes() {
2631   // Handle [[likely]] / [[unlikely]] attributes.
2632   assert(FormatTok->is(tok::l_square));
2633   if (!tryToParseSimpleAttribute())
2634     return false;
2635   parseSquare();
2636   return true;
2637 }
2638
2639 /// Returns whether \c Tok begins a block.
2640 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2641   // FIXME: rename the function or make
2642   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2643   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2644                            : Tok.is(tok::l_brace);
2645 }
2646
2647 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2648                                                   bool KeepBraces,
2649                                                   bool IsVerilogAssert) {
2650   assert((FormatTok->is(tok::kw_if) ||
2651           (Style.isVerilog() &&
2652            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2653                               Keywords.kw_assume, Keywords.kw_cover))) &&
2654          "'if' expected");
2655   nextToken();
2656
2657   if (IsVerilogAssert) {
2658     // Handle `assert #0` and `assert final`.
2659     if (FormatTok->is(Keywords.kw_verilogHash)) {
2660       nextToken();
2661       if (FormatTok->is(tok::numeric_constant))
2662         nextToken();
2663     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2664                                   Keywords.kw_sequence)) {
2665       nextToken();
2666     }
2667   }
2668
2669   // Handle `if !consteval`.
2670   if (FormatTok->is(tok::exclaim))
2671     nextToken();
2672
2673   bool KeepIfBraces = true;
2674   if (FormatTok->is(tok::kw_consteval)) {
2675     nextToken();
2676   } else {
2677     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2678     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2679       nextToken();
2680     if (FormatTok->is(tok::l_paren)) {
2681       FormatTok->setFinalizedType(TT_ConditionLParen);
2682       parseParens();
2683     }
2684   }
2685   handleAttributes();
2686   // The then action is optional in Verilog assert statements.
2687   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2688     nextToken();
2689     addUnwrappedLine();
2690     return nullptr;
2691   }
2692
2693   bool NeedsUnwrappedLine = false;
2694   keepAncestorBraces();
2695
2696   FormatToken *IfLeftBrace = nullptr;
2697   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2698
2699   if (isBlockBegin(*FormatTok)) {
2700     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2701     IfLeftBrace = FormatTok;
2702     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2703     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2704                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2705     if (Style.BraceWrapping.BeforeElse)
2706       addUnwrappedLine();
2707     else
2708       NeedsUnwrappedLine = true;
2709   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2710     addUnwrappedLine();
2711   } else {
2712     parseUnbracedBody();
2713   }
2714
2715   if (Style.RemoveBracesLLVM) {
2716     assert(!NestedTooDeep.empty());
2717     KeepIfBraces = KeepIfBraces ||
2718                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2719                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2720                    IfBlockKind == IfStmtKind::IfElseIf;
2721   }
2722
2723   bool KeepElseBraces = KeepIfBraces;
2724   FormatToken *ElseLeftBrace = nullptr;
2725   IfStmtKind Kind = IfStmtKind::IfOnly;
2726
2727   if (FormatTok->is(tok::kw_else)) {
2728     if (Style.RemoveBracesLLVM) {
2729       NestedTooDeep.back() = false;
2730       Kind = IfStmtKind::IfElse;
2731     }
2732     nextToken();
2733     handleAttributes();
2734     if (isBlockBegin(*FormatTok)) {
2735       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2736       FormatTok->setFinalizedType(TT_ElseLBrace);
2737       ElseLeftBrace = FormatTok;
2738       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2739       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2740       FormatToken *IfLBrace =
2741           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2742                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2743       if (FormatTok->is(tok::kw_else)) {
2744         KeepElseBraces = KeepElseBraces ||
2745                          ElseBlockKind == IfStmtKind::IfOnly ||
2746                          ElseBlockKind == IfStmtKind::IfElseIf;
2747       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2748         KeepElseBraces = true;
2749         assert(ElseLeftBrace->MatchingParen);
2750         markOptionalBraces(ElseLeftBrace);
2751       }
2752       addUnwrappedLine();
2753     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2754       const FormatToken *Previous = Tokens->getPreviousToken();
2755       assert(Previous);
2756       const bool IsPrecededByComment = Previous->is(tok::comment);
2757       if (IsPrecededByComment) {
2758         addUnwrappedLine();
2759         ++Line->Level;
2760       }
2761       bool TooDeep = true;
2762       if (Style.RemoveBracesLLVM) {
2763         Kind = IfStmtKind::IfElseIf;
2764         TooDeep = NestedTooDeep.pop_back_val();
2765       }
2766       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2767       if (Style.RemoveBracesLLVM)
2768         NestedTooDeep.push_back(TooDeep);
2769       if (IsPrecededByComment)
2770         --Line->Level;
2771     } else {
2772       parseUnbracedBody(/*CheckEOF=*/true);
2773     }
2774   } else {
2775     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2776     if (NeedsUnwrappedLine)
2777       addUnwrappedLine();
2778   }
2779
2780   if (!Style.RemoveBracesLLVM)
2781     return nullptr;
2782
2783   assert(!NestedTooDeep.empty());
2784   KeepElseBraces = KeepElseBraces ||
2785                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2786                    NestedTooDeep.back();
2787
2788   NestedTooDeep.pop_back();
2789
2790   if (!KeepIfBraces && !KeepElseBraces) {
2791     markOptionalBraces(IfLeftBrace);
2792     markOptionalBraces(ElseLeftBrace);
2793   } else if (IfLeftBrace) {
2794     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2795     if (IfRightBrace) {
2796       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2797       assert(!IfLeftBrace->Optional);
2798       assert(!IfRightBrace->Optional);
2799       IfLeftBrace->MatchingParen = nullptr;
2800       IfRightBrace->MatchingParen = nullptr;
2801     }
2802   }
2803
2804   if (IfKind)
2805     *IfKind = Kind;
2806
2807   return IfLeftBrace;
2808 }
2809
2810 void UnwrappedLineParser::parseTryCatch() {
2811   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2812   nextToken();
2813   bool NeedsUnwrappedLine = false;
2814   if (FormatTok->is(tok::colon)) {
2815     // We are in a function try block, what comes is an initializer list.
2816     nextToken();
2817
2818     // In case identifiers were removed by clang-tidy, what might follow is
2819     // multiple commas in sequence - before the first identifier.
2820     while (FormatTok->is(tok::comma))
2821       nextToken();
2822
2823     while (FormatTok->is(tok::identifier)) {
2824       nextToken();
2825       if (FormatTok->is(tok::l_paren))
2826         parseParens();
2827       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2828           FormatTok->is(tok::l_brace)) {
2829         do {
2830           nextToken();
2831         } while (!FormatTok->is(tok::r_brace));
2832         nextToken();
2833       }
2834
2835       // In case identifiers were removed by clang-tidy, what might follow is
2836       // multiple commas in sequence - after the first identifier.
2837       while (FormatTok->is(tok::comma))
2838         nextToken();
2839     }
2840   }
2841   // Parse try with resource.
2842   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2843     parseParens();
2844
2845   keepAncestorBraces();
2846
2847   if (FormatTok->is(tok::l_brace)) {
2848     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2849     parseBlock();
2850     if (Style.BraceWrapping.BeforeCatch)
2851       addUnwrappedLine();
2852     else
2853       NeedsUnwrappedLine = true;
2854   } else if (!FormatTok->is(tok::kw_catch)) {
2855     // The C++ standard requires a compound-statement after a try.
2856     // If there's none, we try to assume there's a structuralElement
2857     // and try to continue.
2858     addUnwrappedLine();
2859     ++Line->Level;
2860     parseStructuralElement();
2861     --Line->Level;
2862   }
2863   while (true) {
2864     if (FormatTok->is(tok::at))
2865       nextToken();
2866     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2867                              tok::kw___finally) ||
2868           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2869            FormatTok->is(Keywords.kw_finally)) ||
2870           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2871            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2872       break;
2873     }
2874     nextToken();
2875     while (FormatTok->isNot(tok::l_brace)) {
2876       if (FormatTok->is(tok::l_paren)) {
2877         parseParens();
2878         continue;
2879       }
2880       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2881         if (Style.RemoveBracesLLVM)
2882           NestedTooDeep.pop_back();
2883         return;
2884       }
2885       nextToken();
2886     }
2887     NeedsUnwrappedLine = false;
2888     Line->MustBeDeclaration = false;
2889     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2890     parseBlock();
2891     if (Style.BraceWrapping.BeforeCatch)
2892       addUnwrappedLine();
2893     else
2894       NeedsUnwrappedLine = true;
2895   }
2896
2897   if (Style.RemoveBracesLLVM)
2898     NestedTooDeep.pop_back();
2899
2900   if (NeedsUnwrappedLine)
2901     addUnwrappedLine();
2902 }
2903
2904 void UnwrappedLineParser::parseNamespace() {
2905   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2906          "'namespace' expected");
2907
2908   const FormatToken &InitialToken = *FormatTok;
2909   nextToken();
2910   if (InitialToken.is(TT_NamespaceMacro)) {
2911     parseParens();
2912   } else {
2913     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2914                               tok::l_square, tok::period, tok::l_paren) ||
2915            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2916       if (FormatTok->is(tok::l_square))
2917         parseSquare();
2918       else if (FormatTok->is(tok::l_paren))
2919         parseParens();
2920       else
2921         nextToken();
2922     }
2923   }
2924   if (FormatTok->is(tok::l_brace)) {
2925     if (ShouldBreakBeforeBrace(Style, InitialToken))
2926       addUnwrappedLine();
2927
2928     unsigned AddLevels =
2929         Style.NamespaceIndentation == FormatStyle::NI_All ||
2930                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2931                  DeclarationScopeStack.size() > 1)
2932             ? 1u
2933             : 0u;
2934     bool ManageWhitesmithsBraces =
2935         AddLevels == 0u &&
2936         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2937
2938     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2939     // the whole block.
2940     if (ManageWhitesmithsBraces)
2941       ++Line->Level;
2942
2943     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2944                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2945                ManageWhitesmithsBraces);
2946
2947     // Munch the semicolon after a namespace. This is more common than one would
2948     // think. Putting the semicolon into its own line is very ugly.
2949     if (FormatTok->is(tok::semi))
2950       nextToken();
2951
2952     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2953
2954     if (ManageWhitesmithsBraces)
2955       --Line->Level;
2956   }
2957   // FIXME: Add error handling.
2958 }
2959
2960 void UnwrappedLineParser::parseNew() {
2961   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2962   nextToken();
2963
2964   if (Style.isCSharp()) {
2965     do {
2966       // Handle constructor invocation, e.g. `new(field: value)`.
2967       if (FormatTok->is(tok::l_paren))
2968         parseParens();
2969
2970       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
2971       if (FormatTok->is(tok::l_brace))
2972         parseBracedList();
2973
2974       if (FormatTok->isOneOf(tok::semi, tok::comma))
2975         return;
2976
2977       nextToken();
2978     } while (!eof());
2979   }
2980
2981   if (Style.Language != FormatStyle::LK_Java)
2982     return;
2983
2984   // In Java, we can parse everything up to the parens, which aren't optional.
2985   do {
2986     // There should not be a ;, { or } before the new's open paren.
2987     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2988       return;
2989
2990     // Consume the parens.
2991     if (FormatTok->is(tok::l_paren)) {
2992       parseParens();
2993
2994       // If there is a class body of an anonymous class, consume that as child.
2995       if (FormatTok->is(tok::l_brace))
2996         parseChildBlock();
2997       return;
2998     }
2999     nextToken();
3000   } while (!eof());
3001 }
3002
3003 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3004   keepAncestorBraces();
3005
3006   if (isBlockBegin(*FormatTok)) {
3007     if (!KeepBraces)
3008       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3009     FormatToken *LeftBrace = FormatTok;
3010     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3011     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3012                /*MunchSemi=*/true, KeepBraces);
3013     if (!KeepBraces) {
3014       assert(!NestedTooDeep.empty());
3015       if (!NestedTooDeep.back())
3016         markOptionalBraces(LeftBrace);
3017     }
3018     if (WrapRightBrace)
3019       addUnwrappedLine();
3020   } else {
3021     parseUnbracedBody();
3022   }
3023
3024   if (!KeepBraces)
3025     NestedTooDeep.pop_back();
3026 }
3027
3028 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3029   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3030           (Style.isVerilog() &&
3031            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3032                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3033                               Keywords.kw_final, Keywords.kw_initial,
3034                               Keywords.kw_foreach, Keywords.kw_forever,
3035                               Keywords.kw_repeat))) &&
3036          "'for', 'while' or foreach macro expected");
3037   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3038                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3039
3040   nextToken();
3041   // JS' for await ( ...
3042   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3043     nextToken();
3044   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3045     nextToken();
3046   if (HasParens && FormatTok->is(tok::l_paren)) {
3047     // The type is only set for Verilog basically because we were afraid to
3048     // change the existing behavior for loops. See the discussion on D121756 for
3049     // details.
3050     if (Style.isVerilog())
3051       FormatTok->setFinalizedType(TT_ConditionLParen);
3052     parseParens();
3053   }
3054   // Event control.
3055   if (Style.isVerilog())
3056     parseVerilogSensitivityList();
3057
3058   handleAttributes();
3059   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3060 }
3061
3062 void UnwrappedLineParser::parseDoWhile() {
3063   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3064   nextToken();
3065
3066   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3067
3068   // FIXME: Add error handling.
3069   if (!FormatTok->is(tok::kw_while)) {
3070     addUnwrappedLine();
3071     return;
3072   }
3073
3074   // If in Whitesmiths mode, the line with the while() needs to be indented
3075   // to the same level as the block.
3076   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3077     ++Line->Level;
3078
3079   nextToken();
3080   parseStructuralElement();
3081 }
3082
3083 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3084   nextToken();
3085   unsigned OldLineLevel = Line->Level;
3086   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3087     --Line->Level;
3088   if (LeftAlignLabel)
3089     Line->Level = 0;
3090
3091   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3092       FormatTok->is(tok::l_brace)) {
3093
3094     CompoundStatementIndenter Indenter(this, Line->Level,
3095                                        Style.BraceWrapping.AfterCaseLabel,
3096                                        Style.BraceWrapping.IndentBraces);
3097     parseBlock();
3098     if (FormatTok->is(tok::kw_break)) {
3099       if (Style.BraceWrapping.AfterControlStatement ==
3100           FormatStyle::BWACS_Always) {
3101         addUnwrappedLine();
3102         if (!Style.IndentCaseBlocks &&
3103             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3104           ++Line->Level;
3105         }
3106       }
3107       parseStructuralElement();
3108     }
3109     addUnwrappedLine();
3110   } else {
3111     if (FormatTok->is(tok::semi))
3112       nextToken();
3113     addUnwrappedLine();
3114   }
3115   Line->Level = OldLineLevel;
3116   if (FormatTok->isNot(tok::l_brace)) {
3117     parseStructuralElement();
3118     addUnwrappedLine();
3119   }
3120 }
3121
3122 void UnwrappedLineParser::parseCaseLabel() {
3123   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3124
3125   // FIXME: fix handling of complex expressions here.
3126   do {
3127     nextToken();
3128     if (FormatTok->is(tok::colon)) {
3129       FormatTok->setFinalizedType(TT_CaseLabelColon);
3130       break;
3131     }
3132   } while (!eof());
3133   parseLabel();
3134 }
3135
3136 void UnwrappedLineParser::parseSwitch() {
3137   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3138   nextToken();
3139   if (FormatTok->is(tok::l_paren))
3140     parseParens();
3141
3142   keepAncestorBraces();
3143
3144   if (FormatTok->is(tok::l_brace)) {
3145     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3146     parseBlock();
3147     addUnwrappedLine();
3148   } else {
3149     addUnwrappedLine();
3150     ++Line->Level;
3151     parseStructuralElement();
3152     --Line->Level;
3153   }
3154
3155   if (Style.RemoveBracesLLVM)
3156     NestedTooDeep.pop_back();
3157 }
3158
3159 // Operators that can follow a C variable.
3160 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3161   switch (kind) {
3162   case tok::ampamp:
3163   case tok::ampequal:
3164   case tok::arrow:
3165   case tok::caret:
3166   case tok::caretequal:
3167   case tok::comma:
3168   case tok::ellipsis:
3169   case tok::equal:
3170   case tok::equalequal:
3171   case tok::exclaim:
3172   case tok::exclaimequal:
3173   case tok::greater:
3174   case tok::greaterequal:
3175   case tok::greatergreater:
3176   case tok::greatergreaterequal:
3177   case tok::l_paren:
3178   case tok::l_square:
3179   case tok::less:
3180   case tok::lessequal:
3181   case tok::lessless:
3182   case tok::lesslessequal:
3183   case tok::minus:
3184   case tok::minusequal:
3185   case tok::minusminus:
3186   case tok::percent:
3187   case tok::percentequal:
3188   case tok::period:
3189   case tok::pipe:
3190   case tok::pipeequal:
3191   case tok::pipepipe:
3192   case tok::plus:
3193   case tok::plusequal:
3194   case tok::plusplus:
3195   case tok::question:
3196   case tok::r_brace:
3197   case tok::r_paren:
3198   case tok::r_square:
3199   case tok::semi:
3200   case tok::slash:
3201   case tok::slashequal:
3202   case tok::star:
3203   case tok::starequal:
3204     return true;
3205   default:
3206     return false;
3207   }
3208 }
3209
3210 void UnwrappedLineParser::parseAccessSpecifier() {
3211   FormatToken *AccessSpecifierCandidate = FormatTok;
3212   nextToken();
3213   // Understand Qt's slots.
3214   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3215     nextToken();
3216   // Otherwise, we don't know what it is, and we'd better keep the next token.
3217   if (FormatTok->is(tok::colon)) {
3218     nextToken();
3219     addUnwrappedLine();
3220   } else if (!FormatTok->is(tok::coloncolon) &&
3221              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3222     // Not a variable name nor namespace name.
3223     addUnwrappedLine();
3224   } else if (AccessSpecifierCandidate) {
3225     // Consider the access specifier to be a C identifier.
3226     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3227   }
3228 }
3229
3230 /// \brief Parses a requires, decides if it is a clause or an expression.
3231 /// \pre The current token has to be the requires keyword.
3232 /// \returns true if it parsed a clause.
3233 bool clang::format::UnwrappedLineParser::parseRequires() {
3234   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3235   auto RequiresToken = FormatTok;
3236
3237   // We try to guess if it is a requires clause, or a requires expression. For
3238   // that we first consume the keyword and check the next token.
3239   nextToken();
3240
3241   switch (FormatTok->Tok.getKind()) {
3242   case tok::l_brace:
3243     // This can only be an expression, never a clause.
3244     parseRequiresExpression(RequiresToken);
3245     return false;
3246   case tok::l_paren:
3247     // Clauses and expression can start with a paren, it's unclear what we have.
3248     break;
3249   default:
3250     // All other tokens can only be a clause.
3251     parseRequiresClause(RequiresToken);
3252     return true;
3253   }
3254
3255   // Looking forward we would have to decide if there are function declaration
3256   // like arguments to the requires expression:
3257   // requires (T t) {
3258   // Or there is a constraint expression for the requires clause:
3259   // requires (C<T> && ...
3260
3261   // But first let's look behind.
3262   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3263
3264   if (!PreviousNonComment ||
3265       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3266     // If there is no token, or an expression left brace, we are a requires
3267     // clause within a requires expression.
3268     parseRequiresClause(RequiresToken);
3269     return true;
3270   }
3271
3272   switch (PreviousNonComment->Tok.getKind()) {
3273   case tok::greater:
3274   case tok::r_paren:
3275   case tok::kw_noexcept:
3276   case tok::kw_const:
3277     // This is a requires clause.
3278     parseRequiresClause(RequiresToken);
3279     return true;
3280   case tok::amp:
3281   case tok::ampamp: {
3282     // This can be either:
3283     // if (... && requires (T t) ...)
3284     // Or
3285     // void member(...) && requires (C<T> ...
3286     // We check the one token before that for a const:
3287     // void member(...) const && requires (C<T> ...
3288     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3289     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3290       parseRequiresClause(RequiresToken);
3291       return true;
3292     }
3293     break;
3294   }
3295   default:
3296     if (PreviousNonComment->isTypeOrIdentifier()) {
3297       // This is a requires clause.
3298       parseRequiresClause(RequiresToken);
3299       return true;
3300     }
3301     // It's an expression.
3302     parseRequiresExpression(RequiresToken);
3303     return false;
3304   }
3305
3306   // Now we look forward and try to check if the paren content is a parameter
3307   // list. The parameters can be cv-qualified and contain references or
3308   // pointers.
3309   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3310   // of stuff: typename, const, *, &, &&, ::, identifiers.
3311
3312   unsigned StoredPosition = Tokens->getPosition();
3313   FormatToken *NextToken = Tokens->getNextToken();
3314   int Lookahead = 0;
3315   auto PeekNext = [&Lookahead, &NextToken, this] {
3316     ++Lookahead;
3317     NextToken = Tokens->getNextToken();
3318   };
3319
3320   bool FoundType = false;
3321   bool LastWasColonColon = false;
3322   int OpenAngles = 0;
3323
3324   for (; Lookahead < 50; PeekNext()) {
3325     switch (NextToken->Tok.getKind()) {
3326     case tok::kw_volatile:
3327     case tok::kw_const:
3328     case tok::comma:
3329       FormatTok = Tokens->setPosition(StoredPosition);
3330       parseRequiresExpression(RequiresToken);
3331       return false;
3332     case tok::r_paren:
3333     case tok::pipepipe:
3334       FormatTok = Tokens->setPosition(StoredPosition);
3335       parseRequiresClause(RequiresToken);
3336       return true;
3337     case tok::eof:
3338       // Break out of the loop.
3339       Lookahead = 50;
3340       break;
3341     case tok::coloncolon:
3342       LastWasColonColon = true;
3343       break;
3344     case tok::identifier:
3345       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3346         FormatTok = Tokens->setPosition(StoredPosition);
3347         parseRequiresExpression(RequiresToken);
3348         return false;
3349       }
3350       FoundType = true;
3351       LastWasColonColon = false;
3352       break;
3353     case tok::less:
3354       ++OpenAngles;
3355       break;
3356     case tok::greater:
3357       --OpenAngles;
3358       break;
3359     default:
3360       if (NextToken->isSimpleTypeSpecifier()) {
3361         FormatTok = Tokens->setPosition(StoredPosition);
3362         parseRequiresExpression(RequiresToken);
3363         return false;
3364       }
3365       break;
3366     }
3367   }
3368   // This seems to be a complicated expression, just assume it's a clause.
3369   FormatTok = Tokens->setPosition(StoredPosition);
3370   parseRequiresClause(RequiresToken);
3371   return true;
3372 }
3373
3374 /// \brief Parses a requires clause.
3375 /// \param RequiresToken The requires keyword token, which starts this clause.
3376 /// \pre We need to be on the next token after the requires keyword.
3377 /// \sa parseRequiresExpression
3378 ///
3379 /// Returns if it either has finished parsing the clause, or it detects, that
3380 /// the clause is incorrect.
3381 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3382   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3383   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3384
3385   // If there is no previous token, we are within a requires expression,
3386   // otherwise we will always have the template or function declaration in front
3387   // of it.
3388   bool InRequiresExpression =
3389       !RequiresToken->Previous ||
3390       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3391
3392   RequiresToken->setFinalizedType(InRequiresExpression
3393                                       ? TT_RequiresClauseInARequiresExpression
3394                                       : TT_RequiresClause);
3395
3396   // NOTE: parseConstraintExpression is only ever called from this function.
3397   // It could be inlined into here.
3398   parseConstraintExpression();
3399
3400   if (!InRequiresExpression)
3401     FormatTok->Previous->ClosesRequiresClause = true;
3402 }
3403
3404 /// \brief Parses a requires expression.
3405 /// \param RequiresToken The requires keyword token, which starts this clause.
3406 /// \pre We need to be on the next token after the requires keyword.
3407 /// \sa parseRequiresClause
3408 ///
3409 /// Returns if it either has finished parsing the expression, or it detects,
3410 /// that the expression is incorrect.
3411 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3412   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3413   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3414
3415   RequiresToken->setFinalizedType(TT_RequiresExpression);
3416
3417   if (FormatTok->is(tok::l_paren)) {
3418     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3419     parseParens();
3420   }
3421
3422   if (FormatTok->is(tok::l_brace)) {
3423     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3424     parseChildBlock(/*CanContainBracedList=*/false,
3425                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3426   }
3427 }
3428
3429 /// \brief Parses a constraint expression.
3430 ///
3431 /// This is the body of a requires clause. It returns, when the parsing is
3432 /// complete, or the expression is incorrect.
3433 void UnwrappedLineParser::parseConstraintExpression() {
3434   // The special handling for lambdas is needed since tryToParseLambda() eats a
3435   // token and if a requires expression is the last part of a requires clause
3436   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3437   // not set on the correct token. Thus we need to be aware if we even expect a
3438   // lambda to be possible.
3439   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3440   bool LambdaNextTimeAllowed = true;
3441
3442   // Within lambda declarations, it is permitted to put a requires clause after
3443   // its template parameter list, which would place the requires clause right
3444   // before the parentheses of the parameters of the lambda declaration. Thus,
3445   // we track if we expect to see grouping parentheses at all.
3446   // Without this check, `requires foo<T> (T t)` in the below example would be
3447   // seen as the whole requires clause, accidentally eating the parameters of
3448   // the lambda.
3449   // [&]<typename T> requires foo<T> (T t) { ... };
3450   bool TopLevelParensAllowed = true;
3451
3452   do {
3453     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3454
3455     switch (FormatTok->Tok.getKind()) {
3456     case tok::kw_requires: {
3457       auto RequiresToken = FormatTok;
3458       nextToken();
3459       parseRequiresExpression(RequiresToken);
3460       break;
3461     }
3462
3463     case tok::l_paren:
3464       if (!TopLevelParensAllowed)
3465         return;
3466       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3467       TopLevelParensAllowed = false;
3468       break;
3469
3470     case tok::l_square:
3471       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3472         return;
3473       break;
3474
3475     case tok::kw_const:
3476     case tok::semi:
3477     case tok::kw_class:
3478     case tok::kw_struct:
3479     case tok::kw_union:
3480       return;
3481
3482     case tok::l_brace:
3483       // Potential function body.
3484       return;
3485
3486     case tok::ampamp:
3487     case tok::pipepipe:
3488       FormatTok->setFinalizedType(TT_BinaryOperator);
3489       nextToken();
3490       LambdaNextTimeAllowed = true;
3491       TopLevelParensAllowed = true;
3492       break;
3493
3494     case tok::comma:
3495     case tok::comment:
3496       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3497       nextToken();
3498       break;
3499
3500     case tok::kw_sizeof:
3501     case tok::greater:
3502     case tok::greaterequal:
3503     case tok::greatergreater:
3504     case tok::less:
3505     case tok::lessequal:
3506     case tok::lessless:
3507     case tok::equalequal:
3508     case tok::exclaim:
3509     case tok::exclaimequal:
3510     case tok::plus:
3511     case tok::minus:
3512     case tok::star:
3513     case tok::slash:
3514       LambdaNextTimeAllowed = true;
3515       TopLevelParensAllowed = true;
3516       // Just eat them.
3517       nextToken();
3518       break;
3519
3520     case tok::numeric_constant:
3521     case tok::coloncolon:
3522     case tok::kw_true:
3523     case tok::kw_false:
3524       TopLevelParensAllowed = false;
3525       // Just eat them.
3526       nextToken();
3527       break;
3528
3529     case tok::kw_static_cast:
3530     case tok::kw_const_cast:
3531     case tok::kw_reinterpret_cast:
3532     case tok::kw_dynamic_cast:
3533       nextToken();
3534       if (!FormatTok->is(tok::less))
3535         return;
3536
3537       nextToken();
3538       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3539                       /*ClosingBraceKind=*/tok::greater);
3540       break;
3541
3542     default:
3543       if (!FormatTok->Tok.getIdentifierInfo()) {
3544         // Identifiers are part of the default case, we check for more then
3545         // tok::identifier to handle builtin type traits.
3546         return;
3547       }
3548
3549       // We need to differentiate identifiers for a template deduction guide,
3550       // variables, or function return types (the constraint expression has
3551       // ended before that), and basically all other cases. But it's easier to
3552       // check the other way around.
3553       assert(FormatTok->Previous);
3554       switch (FormatTok->Previous->Tok.getKind()) {
3555       case tok::coloncolon:  // Nested identifier.
3556       case tok::ampamp:      // Start of a function or variable for the
3557       case tok::pipepipe:    // constraint expression. (binary)
3558       case tok::exclaim:     // The same as above, but unary.
3559       case tok::kw_requires: // Initial identifier of a requires clause.
3560       case tok::equal:       // Initial identifier of a concept declaration.
3561         break;
3562       default:
3563         return;
3564       }
3565
3566       // Read identifier with optional template declaration.
3567       nextToken();
3568       if (FormatTok->is(tok::less)) {
3569         nextToken();
3570         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3571                         /*ClosingBraceKind=*/tok::greater);
3572       }
3573       TopLevelParensAllowed = false;
3574       break;
3575     }
3576   } while (!eof());
3577 }
3578
3579 bool UnwrappedLineParser::parseEnum() {
3580   const FormatToken &InitialToken = *FormatTok;
3581
3582   // Won't be 'enum' for NS_ENUMs.
3583   if (FormatTok->is(tok::kw_enum))
3584     nextToken();
3585
3586   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3587   // declarations. An "enum" keyword followed by a colon would be a syntax
3588   // error and thus assume it is just an identifier.
3589   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3590     return false;
3591
3592   // In protobuf, "enum" can be used as a field name.
3593   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3594     return false;
3595
3596   // Eat up enum class ...
3597   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3598     nextToken();
3599
3600   while (FormatTok->Tok.getIdentifierInfo() ||
3601          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3602                             tok::greater, tok::comma, tok::question,
3603                             tok::l_square, tok::r_square)) {
3604     if (Style.isVerilog()) {
3605       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3606       nextToken();
3607       // In Verilog the base type can have dimensions.
3608       while (FormatTok->is(tok::l_square))
3609         parseSquare();
3610     } else {
3611       nextToken();
3612     }
3613     // We can have macros or attributes in between 'enum' and the enum name.
3614     if (FormatTok->is(tok::l_paren))
3615       parseParens();
3616     if (FormatTok->is(TT_AttributeSquare)) {
3617       parseSquare();
3618       // Consume the closing TT_AttributeSquare.
3619       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3620         nextToken();
3621     }
3622     if (FormatTok->is(tok::identifier)) {
3623       nextToken();
3624       // If there are two identifiers in a row, this is likely an elaborate
3625       // return type. In Java, this can be "implements", etc.
3626       if (Style.isCpp() && FormatTok->is(tok::identifier))
3627         return false;
3628     }
3629   }
3630
3631   // Just a declaration or something is wrong.
3632   if (FormatTok->isNot(tok::l_brace))
3633     return true;
3634   FormatTok->setFinalizedType(TT_EnumLBrace);
3635   FormatTok->setBlockKind(BK_Block);
3636
3637   if (Style.Language == FormatStyle::LK_Java) {
3638     // Java enums are different.
3639     parseJavaEnumBody();
3640     return true;
3641   }
3642   if (Style.Language == FormatStyle::LK_Proto) {
3643     parseBlock(/*MustBeDeclaration=*/true);
3644     return true;
3645   }
3646
3647   if (!Style.AllowShortEnumsOnASingleLine &&
3648       ShouldBreakBeforeBrace(Style, InitialToken)) {
3649     addUnwrappedLine();
3650   }
3651   // Parse enum body.
3652   nextToken();
3653   if (!Style.AllowShortEnumsOnASingleLine) {
3654     addUnwrappedLine();
3655     Line->Level += 1;
3656   }
3657   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3658                                    /*IsEnum=*/true);
3659   if (!Style.AllowShortEnumsOnASingleLine)
3660     Line->Level -= 1;
3661   if (HasError) {
3662     if (FormatTok->is(tok::semi))
3663       nextToken();
3664     addUnwrappedLine();
3665   }
3666   return true;
3667
3668   // There is no addUnwrappedLine() here so that we fall through to parsing a
3669   // structural element afterwards. Thus, in "enum A {} n, m;",
3670   // "} n, m;" will end up in one unwrapped line.
3671 }
3672
3673 bool UnwrappedLineParser::parseStructLike() {
3674   // parseRecord falls through and does not yet add an unwrapped line as a
3675   // record declaration or definition can start a structural element.
3676   parseRecord();
3677   // This does not apply to Java, JavaScript and C#.
3678   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3679       Style.isCSharp()) {
3680     if (FormatTok->is(tok::semi))
3681       nextToken();
3682     addUnwrappedLine();
3683     return true;
3684   }
3685   return false;
3686 }
3687
3688 namespace {
3689 // A class used to set and restore the Token position when peeking
3690 // ahead in the token source.
3691 class ScopedTokenPosition {
3692   unsigned StoredPosition;
3693   FormatTokenSource *Tokens;
3694
3695 public:
3696   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3697     assert(Tokens && "Tokens expected to not be null");
3698     StoredPosition = Tokens->getPosition();
3699   }
3700
3701   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3702 };
3703 } // namespace
3704
3705 // Look to see if we have [[ by looking ahead, if
3706 // its not then rewind to the original position.
3707 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3708   ScopedTokenPosition AutoPosition(Tokens);
3709   FormatToken *Tok = Tokens->getNextToken();
3710   // We already read the first [ check for the second.
3711   if (!Tok->is(tok::l_square))
3712     return false;
3713   // Double check that the attribute is just something
3714   // fairly simple.
3715   while (Tok->isNot(tok::eof)) {
3716     if (Tok->is(tok::r_square))
3717       break;
3718     Tok = Tokens->getNextToken();
3719   }
3720   if (Tok->is(tok::eof))
3721     return false;
3722   Tok = Tokens->getNextToken();
3723   if (!Tok->is(tok::r_square))
3724     return false;
3725   Tok = Tokens->getNextToken();
3726   if (Tok->is(tok::semi))
3727     return false;
3728   return true;
3729 }
3730
3731 void UnwrappedLineParser::parseJavaEnumBody() {
3732   assert(FormatTok->is(tok::l_brace));
3733   const FormatToken *OpeningBrace = FormatTok;
3734
3735   // Determine whether the enum is simple, i.e. does not have a semicolon or
3736   // constants with class bodies. Simple enums can be formatted like braced
3737   // lists, contracted to a single line, etc.
3738   unsigned StoredPosition = Tokens->getPosition();
3739   bool IsSimple = true;
3740   FormatToken *Tok = Tokens->getNextToken();
3741   while (!Tok->is(tok::eof)) {
3742     if (Tok->is(tok::r_brace))
3743       break;
3744     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3745       IsSimple = false;
3746       break;
3747     }
3748     // FIXME: This will also mark enums with braces in the arguments to enum
3749     // constants as "not simple". This is probably fine in practice, though.
3750     Tok = Tokens->getNextToken();
3751   }
3752   FormatTok = Tokens->setPosition(StoredPosition);
3753
3754   if (IsSimple) {
3755     nextToken();
3756     parseBracedList();
3757     addUnwrappedLine();
3758     return;
3759   }
3760
3761   // Parse the body of a more complex enum.
3762   // First add a line for everything up to the "{".
3763   nextToken();
3764   addUnwrappedLine();
3765   ++Line->Level;
3766
3767   // Parse the enum constants.
3768   while (!eof()) {
3769     if (FormatTok->is(tok::l_brace)) {
3770       // Parse the constant's class body.
3771       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3772                  /*MunchSemi=*/false);
3773     } else if (FormatTok->is(tok::l_paren)) {
3774       parseParens();
3775     } else if (FormatTok->is(tok::comma)) {
3776       nextToken();
3777       addUnwrappedLine();
3778     } else if (FormatTok->is(tok::semi)) {
3779       nextToken();
3780       addUnwrappedLine();
3781       break;
3782     } else if (FormatTok->is(tok::r_brace)) {
3783       addUnwrappedLine();
3784       break;
3785     } else {
3786       nextToken();
3787     }
3788   }
3789
3790   // Parse the class body after the enum's ";" if any.
3791   parseLevel(OpeningBrace);
3792   nextToken();
3793   --Line->Level;
3794   addUnwrappedLine();
3795 }
3796
3797 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3798   const FormatToken &InitialToken = *FormatTok;
3799   nextToken();
3800
3801   // The actual identifier can be a nested name specifier, and in macros
3802   // it is often token-pasted.
3803   // An [[attribute]] can be before the identifier.
3804   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3805                             tok::kw___attribute, tok::kw___declspec,
3806                             tok::kw_alignas, tok::l_square) ||
3807          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3808           FormatTok->isOneOf(tok::period, tok::comma))) {
3809     if (Style.isJavaScript() &&
3810         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3811       // JavaScript/TypeScript supports inline object types in
3812       // extends/implements positions:
3813       //     class Foo implements {bar: number} { }
3814       nextToken();
3815       if (FormatTok->is(tok::l_brace)) {
3816         tryToParseBracedList();
3817         continue;
3818       }
3819     }
3820     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3821       continue;
3822     bool IsNonMacroIdentifier =
3823         FormatTok->is(tok::identifier) &&
3824         FormatTok->TokenText != FormatTok->TokenText.upper();
3825     nextToken();
3826     // We can have macros in between 'class' and the class name.
3827     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3828       parseParens();
3829   }
3830
3831   // Note that parsing away template declarations here leads to incorrectly
3832   // accepting function declarations as record declarations.
3833   // In general, we cannot solve this problem. Consider:
3834   // class A<int> B() {}
3835   // which can be a function definition or a class definition when B() is a
3836   // macro. If we find enough real-world cases where this is a problem, we
3837   // can parse for the 'template' keyword in the beginning of the statement,
3838   // and thus rule out the record production in case there is no template
3839   // (this would still leave us with an ambiguity between template function
3840   // and class declarations).
3841   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3842     do {
3843       if (FormatTok->is(tok::l_brace)) {
3844         calculateBraceTypes(/*ExpectClassBody=*/true);
3845         if (!tryToParseBracedList())
3846           break;
3847       }
3848       if (FormatTok->is(tok::l_square)) {
3849         FormatToken *Previous = FormatTok->Previous;
3850         if (!Previous ||
3851             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3852           // Don't try parsing a lambda if we had a closing parenthesis before,
3853           // it was probably a pointer to an array: int (*)[].
3854           if (!tryToParseLambda())
3855             continue;
3856         } else {
3857           parseSquare();
3858           continue;
3859         }
3860       }
3861       if (FormatTok->is(tok::semi))
3862         return;
3863       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3864         addUnwrappedLine();
3865         nextToken();
3866         parseCSharpGenericTypeConstraint();
3867         break;
3868       }
3869       nextToken();
3870     } while (!eof());
3871   }
3872
3873   auto GetBraceType = [](const FormatToken &RecordTok) {
3874     switch (RecordTok.Tok.getKind()) {
3875     case tok::kw_class:
3876       return TT_ClassLBrace;
3877     case tok::kw_struct:
3878       return TT_StructLBrace;
3879     case tok::kw_union:
3880       return TT_UnionLBrace;
3881     default:
3882       // Useful for e.g. interface.
3883       return TT_RecordLBrace;
3884     }
3885   };
3886   if (FormatTok->is(tok::l_brace)) {
3887     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3888     if (ParseAsExpr) {
3889       parseChildBlock();
3890     } else {
3891       if (ShouldBreakBeforeBrace(Style, InitialToken))
3892         addUnwrappedLine();
3893
3894       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3895       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3896     }
3897   }
3898   // There is no addUnwrappedLine() here so that we fall through to parsing a
3899   // structural element afterwards. Thus, in "class A {} n, m;",
3900   // "} n, m;" will end up in one unwrapped line.
3901 }
3902
3903 void UnwrappedLineParser::parseObjCMethod() {
3904   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3905          "'(' or identifier expected.");
3906   do {
3907     if (FormatTok->is(tok::semi)) {
3908       nextToken();
3909       addUnwrappedLine();
3910       return;
3911     } else if (FormatTok->is(tok::l_brace)) {
3912       if (Style.BraceWrapping.AfterFunction)
3913         addUnwrappedLine();
3914       parseBlock();
3915       addUnwrappedLine();
3916       return;
3917     } else {
3918       nextToken();
3919     }
3920   } while (!eof());
3921 }
3922
3923 void UnwrappedLineParser::parseObjCProtocolList() {
3924   assert(FormatTok->is(tok::less) && "'<' expected.");
3925   do {
3926     nextToken();
3927     // Early exit in case someone forgot a close angle.
3928     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3929         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3930       return;
3931     }
3932   } while (!eof() && FormatTok->isNot(tok::greater));
3933   nextToken(); // Skip '>'.
3934 }
3935
3936 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3937   do {
3938     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3939       nextToken();
3940       addUnwrappedLine();
3941       break;
3942     }
3943     if (FormatTok->is(tok::l_brace)) {
3944       parseBlock();
3945       // In ObjC interfaces, nothing should be following the "}".
3946       addUnwrappedLine();
3947     } else if (FormatTok->is(tok::r_brace)) {
3948       // Ignore stray "}". parseStructuralElement doesn't consume them.
3949       nextToken();
3950       addUnwrappedLine();
3951     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3952       nextToken();
3953       parseObjCMethod();
3954     } else {
3955       parseStructuralElement();
3956     }
3957   } while (!eof());
3958 }
3959
3960 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3961   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3962          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3963   nextToken();
3964   nextToken(); // interface name
3965
3966   // @interface can be followed by a lightweight generic
3967   // specialization list, then either a base class or a category.
3968   if (FormatTok->is(tok::less))
3969     parseObjCLightweightGenerics();
3970   if (FormatTok->is(tok::colon)) {
3971     nextToken();
3972     nextToken(); // base class name
3973     // The base class can also have lightweight generics applied to it.
3974     if (FormatTok->is(tok::less))
3975       parseObjCLightweightGenerics();
3976   } else if (FormatTok->is(tok::l_paren)) {
3977     // Skip category, if present.
3978     parseParens();
3979   }
3980
3981   if (FormatTok->is(tok::less))
3982     parseObjCProtocolList();
3983
3984   if (FormatTok->is(tok::l_brace)) {
3985     if (Style.BraceWrapping.AfterObjCDeclaration)
3986       addUnwrappedLine();
3987     parseBlock(/*MustBeDeclaration=*/true);
3988   }
3989
3990   // With instance variables, this puts '}' on its own line.  Without instance
3991   // variables, this ends the @interface line.
3992   addUnwrappedLine();
3993
3994   parseObjCUntilAtEnd();
3995 }
3996
3997 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3998   assert(FormatTok->is(tok::less));
3999   // Unlike protocol lists, generic parameterizations support
4000   // nested angles:
4001   //
4002   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4003   //     NSObject <NSCopying, NSSecureCoding>
4004   //
4005   // so we need to count how many open angles we have left.
4006   unsigned NumOpenAngles = 1;
4007   do {
4008     nextToken();
4009     // Early exit in case someone forgot a close angle.
4010     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4011         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4012       break;
4013     }
4014     if (FormatTok->is(tok::less)) {
4015       ++NumOpenAngles;
4016     } else if (FormatTok->is(tok::greater)) {
4017       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4018       --NumOpenAngles;
4019     }
4020   } while (!eof() && NumOpenAngles != 0);
4021   nextToken(); // Skip '>'.
4022 }
4023
4024 // Returns true for the declaration/definition form of @protocol,
4025 // false for the expression form.
4026 bool UnwrappedLineParser::parseObjCProtocol() {
4027   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4028   nextToken();
4029
4030   if (FormatTok->is(tok::l_paren)) {
4031     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4032     return false;
4033   }
4034
4035   // The definition/declaration form,
4036   // @protocol Foo
4037   // - (int)someMethod;
4038   // @end
4039
4040   nextToken(); // protocol name
4041
4042   if (FormatTok->is(tok::less))
4043     parseObjCProtocolList();
4044
4045   // Check for protocol declaration.
4046   if (FormatTok->is(tok::semi)) {
4047     nextToken();
4048     addUnwrappedLine();
4049     return true;
4050   }
4051
4052   addUnwrappedLine();
4053   parseObjCUntilAtEnd();
4054   return true;
4055 }
4056
4057 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4058   bool IsImport = FormatTok->is(Keywords.kw_import);
4059   assert(IsImport || FormatTok->is(tok::kw_export));
4060   nextToken();
4061
4062   // Consume the "default" in "export default class/function".
4063   if (FormatTok->is(tok::kw_default))
4064     nextToken();
4065
4066   // Consume "async function", "function" and "default function", so that these
4067   // get parsed as free-standing JS functions, i.e. do not require a trailing
4068   // semicolon.
4069   if (FormatTok->is(Keywords.kw_async))
4070     nextToken();
4071   if (FormatTok->is(Keywords.kw_function)) {
4072     nextToken();
4073     return;
4074   }
4075
4076   // For imports, `export *`, `export {...}`, consume the rest of the line up
4077   // to the terminating `;`. For everything else, just return and continue
4078   // parsing the structural element, i.e. the declaration or expression for
4079   // `export default`.
4080   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4081       !FormatTok->isStringLiteral() &&
4082       !(FormatTok->is(Keywords.kw_type) &&
4083         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4084     return;
4085   }
4086
4087   while (!eof()) {
4088     if (FormatTok->is(tok::semi))
4089       return;
4090     if (Line->Tokens.empty()) {
4091       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4092       // import statement should terminate.
4093       return;
4094     }
4095     if (FormatTok->is(tok::l_brace)) {
4096       FormatTok->setBlockKind(BK_Block);
4097       nextToken();
4098       parseBracedList();
4099     } else {
4100       nextToken();
4101     }
4102   }
4103 }
4104
4105 void UnwrappedLineParser::parseStatementMacro() {
4106   nextToken();
4107   if (FormatTok->is(tok::l_paren))
4108     parseParens();
4109   if (FormatTok->is(tok::semi))
4110     nextToken();
4111   addUnwrappedLine();
4112 }
4113
4114 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4115   // consume things like a::`b.c[d:e] or a::*
4116   while (true) {
4117     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4118                            tok::coloncolon, tok::hash) ||
4119         Keywords.isVerilogIdentifier(*FormatTok)) {
4120       nextToken();
4121     } else if (FormatTok->is(tok::l_square)) {
4122       parseSquare();
4123     } else {
4124       break;
4125     }
4126   }
4127 }
4128
4129 void UnwrappedLineParser::parseVerilogSensitivityList() {
4130   if (!FormatTok->is(tok::at))
4131     return;
4132   nextToken();
4133   // A block event expression has 2 at signs.
4134   if (FormatTok->is(tok::at))
4135     nextToken();
4136   switch (FormatTok->Tok.getKind()) {
4137   case tok::star:
4138     nextToken();
4139     break;
4140   case tok::l_paren:
4141     parseParens();
4142     break;
4143   default:
4144     parseVerilogHierarchyIdentifier();
4145     break;
4146   }
4147 }
4148
4149 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4150   unsigned AddLevels = 0;
4151
4152   if (FormatTok->is(Keywords.kw_clocking)) {
4153     nextToken();
4154     if (Keywords.isVerilogIdentifier(*FormatTok))
4155       nextToken();
4156     parseVerilogSensitivityList();
4157     if (FormatTok->is(tok::semi))
4158       nextToken();
4159   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4160                                 Keywords.kw_casez, Keywords.kw_randcase,
4161                                 Keywords.kw_randsequence)) {
4162     if (Style.IndentCaseLabels)
4163       AddLevels++;
4164     nextToken();
4165     if (FormatTok->is(tok::l_paren)) {
4166       FormatTok->setFinalizedType(TT_ConditionLParen);
4167       parseParens();
4168     }
4169     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4170       nextToken();
4171     // The case header has no semicolon.
4172   } else {
4173     // "module" etc.
4174     nextToken();
4175     // all the words like the name of the module and specifiers like
4176     // "automatic" and the width of function return type
4177     while (true) {
4178       if (FormatTok->is(tok::l_square)) {
4179         auto Prev = FormatTok->getPreviousNonComment();
4180         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4181           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4182         parseSquare();
4183       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4184                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4185         nextToken();
4186       } else {
4187         break;
4188       }
4189     }
4190
4191     auto NewLine = [this]() {
4192       addUnwrappedLine();
4193       Line->IsContinuation = true;
4194     };
4195
4196     // package imports
4197     while (FormatTok->is(Keywords.kw_import)) {
4198       NewLine();
4199       nextToken();
4200       parseVerilogHierarchyIdentifier();
4201       if (FormatTok->is(tok::semi))
4202         nextToken();
4203     }
4204
4205     // parameters and ports
4206     if (FormatTok->is(Keywords.kw_verilogHash)) {
4207       NewLine();
4208       nextToken();
4209       if (FormatTok->is(tok::l_paren)) {
4210         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4211         parseParens();
4212       }
4213     }
4214     if (FormatTok->is(tok::l_paren)) {
4215       NewLine();
4216       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4217       parseParens();
4218     }
4219
4220     // extends and implements
4221     if (FormatTok->is(Keywords.kw_extends)) {
4222       NewLine();
4223       nextToken();
4224       parseVerilogHierarchyIdentifier();
4225       if (FormatTok->is(tok::l_paren))
4226         parseParens();
4227     }
4228     if (FormatTok->is(Keywords.kw_implements)) {
4229       NewLine();
4230       do {
4231         nextToken();
4232         parseVerilogHierarchyIdentifier();
4233       } while (FormatTok->is(tok::comma));
4234     }
4235
4236     // Coverage event for cover groups.
4237     if (FormatTok->is(tok::at)) {
4238       NewLine();
4239       parseVerilogSensitivityList();
4240     }
4241
4242     if (FormatTok->is(tok::semi))
4243       nextToken(/*LevelDifference=*/1);
4244     addUnwrappedLine();
4245   }
4246
4247   return AddLevels;
4248 }
4249
4250 void UnwrappedLineParser::parseVerilogTable() {
4251   assert(FormatTok->is(Keywords.kw_table));
4252   nextToken(/*LevelDifference=*/1);
4253   addUnwrappedLine();
4254
4255   auto InitialLevel = Line->Level++;
4256   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4257     FormatToken *Tok = FormatTok;
4258     nextToken();
4259     if (Tok->is(tok::semi))
4260       addUnwrappedLine();
4261     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4262       Tok->setFinalizedType(TT_VerilogTableItem);
4263   }
4264   Line->Level = InitialLevel;
4265   nextToken(/*LevelDifference=*/-1);
4266   addUnwrappedLine();
4267 }
4268
4269 void UnwrappedLineParser::parseVerilogCaseLabel() {
4270   // The label will get unindented in AnnotatingParser. If there are no leading
4271   // spaces, indent the rest here so that things inside the block will be
4272   // indented relative to things outside. We don't use parseLabel because we
4273   // don't know whether this colon is a label or a ternary expression at this
4274   // point.
4275   auto OrigLevel = Line->Level;
4276   auto FirstLine = CurrentLines->size();
4277   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4278     ++Line->Level;
4279   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4280     --Line->Level;
4281   parseStructuralElement();
4282   // Restore the indentation in both the new line and the line that has the
4283   // label.
4284   if (CurrentLines->size() > FirstLine)
4285     (*CurrentLines)[FirstLine].Level = OrigLevel;
4286   Line->Level = OrigLevel;
4287 }
4288
4289 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4290   for (const auto &N : Line.Tokens) {
4291     if (N.Tok->MacroCtx)
4292       return true;
4293     for (const UnwrappedLine &Child : N.Children)
4294       if (containsExpansion(Child))
4295         return true;
4296   }
4297   return false;
4298 }
4299
4300 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4301   if (Line->Tokens.empty())
4302     return;
4303   LLVM_DEBUG({
4304     if (!parsingPPDirective()) {
4305       llvm::dbgs() << "Adding unwrapped line:\n";
4306       printDebugInfo(*Line);
4307     }
4308   });
4309
4310   // If this line closes a block when in Whitesmiths mode, remember that
4311   // information so that the level can be decreased after the line is added.
4312   // This has to happen after the addition of the line since the line itself
4313   // needs to be indented.
4314   bool ClosesWhitesmithsBlock =
4315       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4316       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4317
4318   // If the current line was expanded from a macro call, we use it to
4319   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4320   // line and the unexpanded token stream.
4321   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4322     if (!Reconstruct)
4323       Reconstruct.emplace(Line->Level, Unexpanded);
4324     Reconstruct->addLine(*Line);
4325
4326     // While the reconstructed unexpanded lines are stored in the normal
4327     // flow of lines, the expanded lines are stored on the side to be analyzed
4328     // in an extra step.
4329     CurrentExpandedLines.push_back(std::move(*Line));
4330
4331     if (Reconstruct->finished()) {
4332       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4333       assert(!Reconstructed.Tokens.empty() &&
4334              "Reconstructed must at least contain the macro identifier.");
4335       assert(!parsingPPDirective());
4336       LLVM_DEBUG({
4337         llvm::dbgs() << "Adding unexpanded line:\n";
4338         printDebugInfo(Reconstructed);
4339       });
4340       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4341       Lines.push_back(std::move(Reconstructed));
4342       CurrentExpandedLines.clear();
4343       Reconstruct.reset();
4344     }
4345   } else {
4346     // At the top level we only get here when no unexpansion is going on, or
4347     // when conditional formatting led to unfinished macro reconstructions.
4348     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4349     CurrentLines->push_back(std::move(*Line));
4350   }
4351   Line->Tokens.clear();
4352   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4353   Line->FirstStartColumn = 0;
4354   Line->IsContinuation = false;
4355
4356   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4357     --Line->Level;
4358   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4359     CurrentLines->append(
4360         std::make_move_iterator(PreprocessorDirectives.begin()),
4361         std::make_move_iterator(PreprocessorDirectives.end()));
4362     PreprocessorDirectives.clear();
4363   }
4364   // Disconnect the current token from the last token on the previous line.
4365   FormatTok->Previous = nullptr;
4366 }
4367
4368 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4369
4370 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4371   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4372          FormatTok.NewlinesBefore > 0;
4373 }
4374
4375 // Checks if \p FormatTok is a line comment that continues the line comment
4376 // section on \p Line.
4377 static bool
4378 continuesLineCommentSection(const FormatToken &FormatTok,
4379                             const UnwrappedLine &Line,
4380                             const llvm::Regex &CommentPragmasRegex) {
4381   if (Line.Tokens.empty())
4382     return false;
4383
4384   StringRef IndentContent = FormatTok.TokenText;
4385   if (FormatTok.TokenText.startswith("//") ||
4386       FormatTok.TokenText.startswith("/*")) {
4387     IndentContent = FormatTok.TokenText.substr(2);
4388   }
4389   if (CommentPragmasRegex.match(IndentContent))
4390     return false;
4391
4392   // If Line starts with a line comment, then FormatTok continues the comment
4393   // section if its original column is greater or equal to the original start
4394   // column of the line.
4395   //
4396   // Define the min column token of a line as follows: if a line ends in '{' or
4397   // contains a '{' followed by a line comment, then the min column token is
4398   // that '{'. Otherwise, the min column token of the line is the first token of
4399   // the line.
4400   //
4401   // If Line starts with a token other than a line comment, then FormatTok
4402   // continues the comment section if its original column is greater than the
4403   // original start column of the min column token of the line.
4404   //
4405   // For example, the second line comment continues the first in these cases:
4406   //
4407   // // first line
4408   // // second line
4409   //
4410   // and:
4411   //
4412   // // first line
4413   //  // second line
4414   //
4415   // and:
4416   //
4417   // int i; // first line
4418   //  // second line
4419   //
4420   // and:
4421   //
4422   // do { // first line
4423   //      // second line
4424   //   int i;
4425   // } while (true);
4426   //
4427   // and:
4428   //
4429   // enum {
4430   //   a, // first line
4431   //    // second line
4432   //   b
4433   // };
4434   //
4435   // The second line comment doesn't continue the first in these cases:
4436   //
4437   //   // first line
4438   //  // second line
4439   //
4440   // and:
4441   //
4442   // int i; // first line
4443   // // second line
4444   //
4445   // and:
4446   //
4447   // do { // first line
4448   //   // second line
4449   //   int i;
4450   // } while (true);
4451   //
4452   // and:
4453   //
4454   // enum {
4455   //   a, // first line
4456   //   // second line
4457   // };
4458   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4459
4460   // Scan for '{//'. If found, use the column of '{' as a min column for line
4461   // comment section continuation.
4462   const FormatToken *PreviousToken = nullptr;
4463   for (const UnwrappedLineNode &Node : Line.Tokens) {
4464     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4465         isLineComment(*Node.Tok)) {
4466       MinColumnToken = PreviousToken;
4467       break;
4468     }
4469     PreviousToken = Node.Tok;
4470
4471     // Grab the last newline preceding a token in this unwrapped line.
4472     if (Node.Tok->NewlinesBefore > 0)
4473       MinColumnToken = Node.Tok;
4474   }
4475   if (PreviousToken && PreviousToken->is(tok::l_brace))
4476     MinColumnToken = PreviousToken;
4477
4478   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4479                               MinColumnToken);
4480 }
4481
4482 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4483   bool JustComments = Line->Tokens.empty();
4484   for (FormatToken *Tok : CommentsBeforeNextToken) {
4485     // Line comments that belong to the same line comment section are put on the
4486     // same line since later we might want to reflow content between them.
4487     // Additional fine-grained breaking of line comment sections is controlled
4488     // by the class BreakableLineCommentSection in case it is desirable to keep
4489     // several line comment sections in the same unwrapped line.
4490     //
4491     // FIXME: Consider putting separate line comment sections as children to the
4492     // unwrapped line instead.
4493     Tok->ContinuesLineCommentSection =
4494         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4495     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4496       addUnwrappedLine();
4497     pushToken(Tok);
4498   }
4499   if (NewlineBeforeNext && JustComments)
4500     addUnwrappedLine();
4501   CommentsBeforeNextToken.clear();
4502 }
4503
4504 void UnwrappedLineParser::nextToken(int LevelDifference) {
4505   if (eof())
4506     return;
4507   flushComments(isOnNewLine(*FormatTok));
4508   pushToken(FormatTok);
4509   FormatToken *Previous = FormatTok;
4510   if (!Style.isJavaScript())
4511     readToken(LevelDifference);
4512   else
4513     readTokenWithJavaScriptASI();
4514   FormatTok->Previous = Previous;
4515   if (Style.isVerilog()) {
4516     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4517     // keywords like `begin`, we can't treat them the same as left braces
4518     // because some contexts require one of them.  For example structs use
4519     // braces and if blocks use keywords, and a left brace can occur in an if
4520     // statement, but it is not a block.  For keywords like `end`, we simply
4521     // treat them the same as right braces.
4522     if (Keywords.isVerilogEnd(*FormatTok))
4523       FormatTok->Tok.setKind(tok::r_brace);
4524   }
4525 }
4526
4527 void UnwrappedLineParser::distributeComments(
4528     const SmallVectorImpl<FormatToken *> &Comments,
4529     const FormatToken *NextTok) {
4530   // Whether or not a line comment token continues a line is controlled by
4531   // the method continuesLineCommentSection, with the following caveat:
4532   //
4533   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4534   // that each comment line from the trail is aligned with the next token, if
4535   // the next token exists. If a trail exists, the beginning of the maximal
4536   // trail is marked as a start of a new comment section.
4537   //
4538   // For example in this code:
4539   //
4540   // int a; // line about a
4541   //   // line 1 about b
4542   //   // line 2 about b
4543   //   int b;
4544   //
4545   // the two lines about b form a maximal trail, so there are two sections, the
4546   // first one consisting of the single comment "// line about a" and the
4547   // second one consisting of the next two comments.
4548   if (Comments.empty())
4549     return;
4550   bool ShouldPushCommentsInCurrentLine = true;
4551   bool HasTrailAlignedWithNextToken = false;
4552   unsigned StartOfTrailAlignedWithNextToken = 0;
4553   if (NextTok) {
4554     // We are skipping the first element intentionally.
4555     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4556       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4557         HasTrailAlignedWithNextToken = true;
4558         StartOfTrailAlignedWithNextToken = i;
4559       }
4560     }
4561   }
4562   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4563     FormatToken *FormatTok = Comments[i];
4564     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4565       FormatTok->ContinuesLineCommentSection = false;
4566     } else {
4567       FormatTok->ContinuesLineCommentSection =
4568           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4569     }
4570     if (!FormatTok->ContinuesLineCommentSection &&
4571         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4572       ShouldPushCommentsInCurrentLine = false;
4573     }
4574     if (ShouldPushCommentsInCurrentLine)
4575       pushToken(FormatTok);
4576     else
4577       CommentsBeforeNextToken.push_back(FormatTok);
4578   }
4579 }
4580
4581 void UnwrappedLineParser::readToken(int LevelDifference) {
4582   SmallVector<FormatToken *, 1> Comments;
4583   bool PreviousWasComment = false;
4584   bool FirstNonCommentOnLine = false;
4585   do {
4586     FormatTok = Tokens->getNextToken();
4587     assert(FormatTok);
4588     while (FormatTok->getType() == TT_ConflictStart ||
4589            FormatTok->getType() == TT_ConflictEnd ||
4590            FormatTok->getType() == TT_ConflictAlternative) {
4591       if (FormatTok->getType() == TT_ConflictStart)
4592         conditionalCompilationStart(/*Unreachable=*/false);
4593       else if (FormatTok->getType() == TT_ConflictAlternative)
4594         conditionalCompilationAlternative();
4595       else if (FormatTok->getType() == TT_ConflictEnd)
4596         conditionalCompilationEnd();
4597       FormatTok = Tokens->getNextToken();
4598       FormatTok->MustBreakBefore = true;
4599     }
4600
4601     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4602                                       const FormatToken &Tok,
4603                                       bool PreviousWasComment) {
4604       auto IsFirstOnLine = [](const FormatToken &Tok) {
4605         return Tok.HasUnescapedNewline || Tok.IsFirst;
4606       };
4607
4608       // Consider preprocessor directives preceded by block comments as first
4609       // on line.
4610       if (PreviousWasComment)
4611         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4612       return IsFirstOnLine(Tok);
4613     };
4614
4615     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4616         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4617     PreviousWasComment = FormatTok->is(tok::comment);
4618
4619     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4620            (!Style.isVerilog() ||
4621             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4622            FirstNonCommentOnLine) {
4623       distributeComments(Comments, FormatTok);
4624       Comments.clear();
4625       // If there is an unfinished unwrapped line, we flush the preprocessor
4626       // directives only after that unwrapped line was finished later.
4627       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4628       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4629       assert((LevelDifference >= 0 ||
4630               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4631              "LevelDifference makes Line->Level negative");
4632       Line->Level += LevelDifference;
4633       // Comments stored before the preprocessor directive need to be output
4634       // before the preprocessor directive, at the same level as the
4635       // preprocessor directive, as we consider them to apply to the directive.
4636       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4637           PPBranchLevel > 0) {
4638         Line->Level += PPBranchLevel;
4639       }
4640       flushComments(isOnNewLine(*FormatTok));
4641       parsePPDirective();
4642       PreviousWasComment = FormatTok->is(tok::comment);
4643       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4644           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4645     }
4646
4647     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4648         !Line->InPPDirective) {
4649       continue;
4650     }
4651
4652     if (FormatTok->is(tok::identifier) &&
4653         Macros.defined(FormatTok->TokenText) &&
4654         // FIXME: Allow expanding macros in preprocessor directives.
4655         !Line->InPPDirective) {
4656       FormatToken *ID = FormatTok;
4657       unsigned Position = Tokens->getPosition();
4658
4659       // To correctly parse the code, we need to replace the tokens of the macro
4660       // call with its expansion.
4661       auto PreCall = std::move(Line);
4662       Line.reset(new UnwrappedLine);
4663       bool OldInExpansion = InExpansion;
4664       InExpansion = true;
4665       // We parse the macro call into a new line.
4666       auto Args = parseMacroCall();
4667       InExpansion = OldInExpansion;
4668       assert(Line->Tokens.front().Tok == ID);
4669       // And remember the unexpanded macro call tokens.
4670       auto UnexpandedLine = std::move(Line);
4671       // Reset to the old line.
4672       Line = std::move(PreCall);
4673
4674       LLVM_DEBUG({
4675         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4676         if (Args) {
4677           llvm::dbgs() << "(";
4678           for (const auto &Arg : Args.value())
4679             for (const auto &T : Arg)
4680               llvm::dbgs() << T->TokenText << " ";
4681           llvm::dbgs() << ")";
4682         }
4683         llvm::dbgs() << "\n";
4684       });
4685       if (Macros.objectLike(ID->TokenText) && Args &&
4686           !Macros.hasArity(ID->TokenText, Args->size())) {
4687         // The macro is either
4688         // - object-like, but we got argumnets, or
4689         // - overloaded to be both object-like and function-like, but none of
4690         //   the function-like arities match the number of arguments.
4691         // Thus, expand as object-like macro.
4692         LLVM_DEBUG(llvm::dbgs()
4693                    << "Macro \"" << ID->TokenText
4694                    << "\" not overloaded for arity " << Args->size()
4695                    << "or not function-like, using object-like overload.");
4696         Args.reset();
4697         UnexpandedLine->Tokens.resize(1);
4698         Tokens->setPosition(Position);
4699         nextToken();
4700         assert(!Args && Macros.objectLike(ID->TokenText));
4701       }
4702       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4703           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4704         // Next, we insert the expanded tokens in the token stream at the
4705         // current position, and continue parsing.
4706         Unexpanded[ID] = std::move(UnexpandedLine);
4707         SmallVector<FormatToken *, 8> Expansion =
4708             Macros.expand(ID, std::move(Args));
4709         if (!Expansion.empty())
4710           FormatTok = Tokens->insertTokens(Expansion);
4711
4712         LLVM_DEBUG({
4713           llvm::dbgs() << "Expanded: ";
4714           for (const auto &T : Expansion)
4715             llvm::dbgs() << T->TokenText << " ";
4716           llvm::dbgs() << "\n";
4717         });
4718       } else {
4719         LLVM_DEBUG({
4720           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4721                        << "\", because it was used ";
4722           if (Args)
4723             llvm::dbgs() << "with " << Args->size();
4724           else
4725             llvm::dbgs() << "without";
4726           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4727         });
4728         Tokens->setPosition(Position);
4729         FormatTok = ID;
4730       }
4731     }
4732
4733     if (!FormatTok->is(tok::comment)) {
4734       distributeComments(Comments, FormatTok);
4735       Comments.clear();
4736       return;
4737     }
4738
4739     Comments.push_back(FormatTok);
4740   } while (!eof());
4741
4742   distributeComments(Comments, nullptr);
4743   Comments.clear();
4744 }
4745
4746 namespace {
4747 template <typename Iterator>
4748 void pushTokens(Iterator Begin, Iterator End,
4749                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4750   for (auto I = Begin; I != End; ++I) {
4751     Into.push_back(I->Tok);
4752     for (const auto &Child : I->Children)
4753       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4754   }
4755 }
4756 } // namespace
4757
4758 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4759 UnwrappedLineParser::parseMacroCall() {
4760   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4761   assert(Line->Tokens.empty());
4762   nextToken();
4763   if (!FormatTok->is(tok::l_paren))
4764     return Args;
4765   unsigned Position = Tokens->getPosition();
4766   FormatToken *Tok = FormatTok;
4767   nextToken();
4768   Args.emplace();
4769   auto ArgStart = std::prev(Line->Tokens.end());
4770
4771   int Parens = 0;
4772   do {
4773     switch (FormatTok->Tok.getKind()) {
4774     case tok::l_paren:
4775       ++Parens;
4776       nextToken();
4777       break;
4778     case tok::r_paren: {
4779       if (Parens > 0) {
4780         --Parens;
4781         nextToken();
4782         break;
4783       }
4784       Args->push_back({});
4785       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4786       nextToken();
4787       return Args;
4788     }
4789     case tok::comma: {
4790       if (Parens > 0) {
4791         nextToken();
4792         break;
4793       }
4794       Args->push_back({});
4795       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4796       nextToken();
4797       ArgStart = std::prev(Line->Tokens.end());
4798       break;
4799     }
4800     default:
4801       nextToken();
4802       break;
4803     }
4804   } while (!eof());
4805   Line->Tokens.resize(1);
4806   Tokens->setPosition(Position);
4807   FormatTok = Tok;
4808   return {};
4809 }
4810
4811 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4812   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4813   if (MustBreakBeforeNextToken) {
4814     Line->Tokens.back().Tok->MustBreakBefore = true;
4815     MustBreakBeforeNextToken = false;
4816   }
4817 }
4818
4819 } // end namespace format
4820 } // end namespace clang