clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains the implementation of the UnwrappedLineParser,
  11 /// which turns a stream of tokens into UnwrappedLines.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "UnwrappedLineParser.h"
  16 #include "FormatToken.h"
  17 #include "FormatTokenLexer.h"
  18 #include "FormatTokenSource.h"
  19 #include "Macros.h"
  20 #include "TokenAnnotator.h"
  21 #include "clang/Basic/TokenKinds.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/raw_os_ostream.h"
  26 #include "llvm/Support/raw_ostream.h"
  27
  28 #include <algorithm>
  29 #include <utility>
  30
  31 #define DEBUG_TYPE "format-parser"
  32
  33 namespace clang {
  34 namespace format {
  35
  36 namespace {
  37
  38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
  39                StringRef Prefix = "", bool PrintText = false) {
  40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
  41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
  42   bool NewLine = false;
  43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  44                                                     E = Line.Tokens.end();
  45        I != E; ++I) {
  46     if (NewLine) {
  47       OS << Prefix;
  48       NewLine = false;
  49     }
  50     OS << I->Tok->Tok.getName() << "["
  51        << "T=" << (unsigned)I->Tok->getType()
  52        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
  53        << "\"] ";
  54     for (SmallVectorImpl<UnwrappedLine>::const_iterator
  55              CI = I->Children.begin(),
  56              CE = I->Children.end();
  57          CI != CE; ++CI) {
  58       OS << "\n";
  59       printLine(OS, *CI, (Prefix + "  ").str());
  60       NewLine = true;
  61     }
  62   }
  63   if (!NewLine)
  64     OS << "\n";
  65 }
  66
  67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
  68   printLine(llvm::dbgs(), Line);
  69 }
  70
  71 class ScopedDeclarationState {
  72 public:
  73   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
  74                          bool MustBeDeclaration)
  75       : Line(Line), Stack(Stack) {
  76     Line.MustBeDeclaration = MustBeDeclaration;
  77     Stack.push_back(MustBeDeclaration);
  78   }
  79   ~ScopedDeclarationState() {
  80     Stack.pop_back();
  81     if (!Stack.empty())
  82       Line.MustBeDeclaration = Stack.back();
  83     else
  84       Line.MustBeDeclaration = true;
  85   }
  86
  87 private:
  88   UnwrappedLine &Line;
  89   llvm::BitVector &Stack;
  90 };
  91
  92 } // end anonymous namespace
  93
  94 class ScopedLineState {
  95 public:
  96   ScopedLineState(UnwrappedLineParser &Parser,
  97                   bool SwitchToPreprocessorLines = false)
  98       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  99     if (SwitchToPreprocessorLines)
 100       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 101     else if (!Parser.Line->Tokens.empty())
 102       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 103     PreBlockLine = std::move(Parser.Line);
 104     Parser.Line = std::make_unique<UnwrappedLine>();
 105     Parser.Line->Level = PreBlockLine->Level;
 106     Parser.Line->PPLevel = PreBlockLine->PPLevel;
 107     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 108     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
 109   }
 110
 111   ~ScopedLineState() {
 112     if (!Parser.Line->Tokens.empty())
 113       Parser.addUnwrappedLine();
 114     assert(Parser.Line->Tokens.empty());
 115     Parser.Line = std::move(PreBlockLine);
 116     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 117       Parser.MustBreakBeforeNextToken = true;
 118     Parser.CurrentLines = OriginalLines;
 119   }
 120
 121 private:
 122   UnwrappedLineParser &Parser;
 123
 124   std::unique_ptr<UnwrappedLine> PreBlockLine;
 125   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 126 };
 127
 128 class CompoundStatementIndenter {
 129 public:
 130   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 131                             const FormatStyle &Style, unsigned &LineLevel)
 132       : CompoundStatementIndenter(Parser, LineLevel,
 133                                   Style.BraceWrapping.AfterControlStatement,
 134                                   Style.BraceWrapping.IndentBraces) {}
 135   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
 136                             bool WrapBrace, bool IndentBrace)
 137       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 138     if (WrapBrace)
 139       Parser->addUnwrappedLine();
 140     if (IndentBrace)
 141       ++LineLevel;
 142   }
 143   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 144
 145 private:
 146   unsigned &LineLevel;
 147   unsigned OldLineLevel;
 148 };
 149
 150 UnwrappedLineParser::UnwrappedLineParser(
 151     SourceManager &SourceMgr, const FormatStyle &Style,
 152     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
 153     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
 154     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
 155     IdentifierTable &IdentTable)
 156     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 157       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 158       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 159       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 160       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 161                        ? IG_Rejected
 162                        : IG_Inited),
 163       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
 164       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
 165
 166 void UnwrappedLineParser::reset() {
 167   PPBranchLevel = -1;
 168   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 169                      ? IG_Rejected
 170                      : IG_Inited;
 171   IncludeGuardToken = nullptr;
 172   Line.reset(new UnwrappedLine);
 173   CommentsBeforeNextToken.clear();
 174   FormatTok = nullptr;
 175   MustBreakBeforeNextToken = false;
 176   IsDecltypeAutoFunction = false;
 177   PreprocessorDirectives.clear();
 178   CurrentLines = &Lines;
 179   DeclarationScopeStack.clear();
 180   NestedTooDeep.clear();
 181   NestedLambdas.clear();
 182   PPStack.clear();
 183   Line->FirstStartColumn = FirstStartColumn;
 184
 185   if (!Unexpanded.empty())
 186     for (FormatToken *Token : AllTokens)
 187       Token->MacroCtx.reset();
 188   CurrentExpandedLines.clear();
 189   ExpandedLines.clear();
 190   Unexpanded.clear();
 191   InExpansion = false;
 192   Reconstruct.reset();
 193 }
 194
 195 void UnwrappedLineParser::parse() {
 196   IndexedTokenSource TokenSource(AllTokens);
 197   Line->FirstStartColumn = FirstStartColumn;
 198   do {
 199     LLVM_DEBUG(llvm::dbgs() << "----\n");
 200     reset();
 201     Tokens = &TokenSource;
 202     TokenSource.reset();
 203
 204     readToken();
 205     parseFile();
 206
 207     // If we found an include guard then all preprocessor directives (other than
 208     // the guard) are over-indented by one.
 209     if (IncludeGuard == IG_Found) {
 210       for (auto &Line : Lines)
 211         if (Line.InPPDirective && Line.Level > 0)
 212           --Line.Level;
 213     }
 214
 215     // Create line with eof token.
 216     assert(eof());
 217     pushToken(FormatTok);
 218     addUnwrappedLine();
 219
 220     // In a first run, format everything with the lines containing macro calls
 221     // replaced by the expansion.
 222     if (!ExpandedLines.empty()) {
 223       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
 224       for (const auto &Line : Lines) {
 225         if (!Line.Tokens.empty()) {
 226           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
 227           if (it != ExpandedLines.end()) {
 228             for (const auto &Expanded : it->second) {
 229               LLVM_DEBUG(printDebugInfo(Expanded));
 230               Callback.consumeUnwrappedLine(Expanded);
 231             }
 232             continue;
 233           }
 234         }
 235         LLVM_DEBUG(printDebugInfo(Line));
 236         Callback.consumeUnwrappedLine(Line);
 237       }
 238       Callback.finishRun();
 239     }
 240
 241     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
 242     for (const UnwrappedLine &Line : Lines) {
 243       LLVM_DEBUG(printDebugInfo(Line));
 244       Callback.consumeUnwrappedLine(Line);
 245     }
 246     Callback.finishRun();
 247     Lines.clear();
 248     while (!PPLevelBranchIndex.empty() &&
 249            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 250       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 251       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 252     }
 253     if (!PPLevelBranchIndex.empty()) {
 254       ++PPLevelBranchIndex.back();
 255       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 256       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 257     }
 258   } while (!PPLevelBranchIndex.empty());
 259 }
 260
 261 void UnwrappedLineParser::parseFile() {
 262   // The top-level context in a file always has declarations, except for pre-
 263   // processor directives and JavaScript files.
 264   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
 265   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 266                                           MustBeDeclaration);
 267   if (Style.Language == FormatStyle::LK_TextProto)
 268     parseBracedList();
 269   else
 270     parseLevel();
 271   // Make sure to format the remaining tokens.
 272   //
 273   // LK_TextProto is special since its top-level is parsed as the body of a
 274   // braced list, which does not necessarily have natural line separators such
 275   // as a semicolon. Comments after the last entry that have been determined to
 276   // not belong to that line, as in:
 277   //   key: value
 278   //   // endfile comment
 279   // do not have a chance to be put on a line of their own until this point.
 280   // Here we add this newline before end-of-file comments.
 281   if (Style.Language == FormatStyle::LK_TextProto &&
 282       !CommentsBeforeNextToken.empty()) {
 283     addUnwrappedLine();
 284   }
 285   flushComments(true);
 286   addUnwrappedLine();
 287 }
 288
 289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
 290   do {
 291     switch (FormatTok->Tok.getKind()) {
 292     case tok::l_brace:
 293       return;
 294     default:
 295       if (FormatTok->is(Keywords.kw_where)) {
 296         addUnwrappedLine();
 297         nextToken();
 298         parseCSharpGenericTypeConstraint();
 299         break;
 300       }
 301       nextToken();
 302       break;
 303     }
 304   } while (!eof());
 305 }
 306
 307 void UnwrappedLineParser::parseCSharpAttribute() {
 308   int UnpairedSquareBrackets = 1;
 309   do {
 310     switch (FormatTok->Tok.getKind()) {
 311     case tok::r_square:
 312       nextToken();
 313       --UnpairedSquareBrackets;
 314       if (UnpairedSquareBrackets == 0) {
 315         addUnwrappedLine();
 316         return;
 317       }
 318       break;
 319     case tok::l_square:
 320       ++UnpairedSquareBrackets;
 321       nextToken();
 322       break;
 323     default:
 324       nextToken();
 325       break;
 326     }
 327   } while (!eof());
 328 }
 329
 330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
 331   if (!Lines.empty() && Lines.back().InPPDirective)
 332     return true;
 333
 334   const FormatToken *Previous = Tokens->getPreviousToken();
 335   return Previous && Previous->is(tok::comment) &&
 336          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
 337 }
 338
 339 /// \brief Parses a level, that is ???.
 340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
 341 /// \param IfKind The \p if statement kind in the level.
 342 /// \param IfLeftBrace The left brace of the \p if block in the level.
 343 /// \returns true if a simple block of if/else/for/while, or false otherwise.
 344 /// (A simple block has a single statement.)
 345 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
 346                                      IfStmtKind *IfKind,
 347                                      FormatToken **IfLeftBrace) {
 348   const bool InRequiresExpression =
 349       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
 350   const bool IsPrecededByCommentOrPPDirective =
 351       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
 352   FormatToken *IfLBrace = nullptr;
 353   bool HasDoWhile = false;
 354   bool HasLabel = false;
 355   unsigned StatementCount = 0;
 356   bool SwitchLabelEncountered = false;
 357
 358   do {
 359     if (FormatTok->isAttribute()) {
 360       nextToken();
 361       continue;
 362     }
 363     tok::TokenKind kind = FormatTok->Tok.getKind();
 364     if (FormatTok->getType() == TT_MacroBlockBegin)
 365       kind = tok::l_brace;
 366     else if (FormatTok->getType() == TT_MacroBlockEnd)
 367       kind = tok::r_brace;
 368
 369     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
 370                          &HasLabel, &StatementCount] {
 371       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
 372                              HasDoWhile ? nullptr : &HasDoWhile,
 373                              HasLabel ? nullptr : &HasLabel);
 374       ++StatementCount;
 375       assert(StatementCount > 0 && "StatementCount overflow!");
 376     };
 377
 378     switch (kind) {
 379     case tok::comment:
 380       nextToken();
 381       addUnwrappedLine();
 382       break;
 383     case tok::l_brace:
 384       if (InRequiresExpression) {
 385         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
 386       } else if (FormatTok->Previous &&
 387                  FormatTok->Previous->ClosesRequiresClause) {
 388         // We need the 'default' case here to correctly parse a function
 389         // l_brace.
 390         ParseDefault();
 391         continue;
 392       }
 393       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
 394           tryToParseBracedList()) {
 395         continue;
 396       }
 397       parseBlock();
 398       ++StatementCount;
 399       assert(StatementCount > 0 && "StatementCount overflow!");
 400       addUnwrappedLine();
 401       break;
 402     case tok::r_brace:
 403       if (OpeningBrace) {
 404         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
 405             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
 406           return false;
 407         }
 408         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
 409             HasDoWhile || IsPrecededByCommentOrPPDirective ||
 410             precededByCommentOrPPDirective()) {
 411           return false;
 412         }
 413         const FormatToken *Next = Tokens->peekNextToken();
 414         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
 415           return false;
 416         if (IfLeftBrace)
 417           *IfLeftBrace = IfLBrace;
 418         return true;
 419       }
 420       nextToken();
 421       addUnwrappedLine();
 422       break;
 423     case tok::kw_default: {
 424       unsigned StoredPosition = Tokens->getPosition();
 425       FormatToken *Next;
 426       do {
 427         Next = Tokens->getNextToken();
 428         assert(Next);
 429       } while (Next->is(tok::comment));
 430       FormatTok = Tokens->setPosition(StoredPosition);
 431       if (Next->isNot(tok::colon)) {
 432         // default not followed by ':' is not a case label; treat it like
 433         // an identifier.
 434         parseStructuralElement();
 435         break;
 436       }
 437       // Else, if it is 'default:', fall through to the case handling.
 438       [[fallthrough]];
 439     }
 440     case tok::kw_case:
 441       if (Style.isProto() || Style.isVerilog() ||
 442           (Style.isJavaScript() && Line->MustBeDeclaration)) {
 443         // Proto: there are no switch/case statements
 444         // Verilog: Case labels don't have this word. We handle case
 445         // labels including default in TokenAnnotator.
 446         // JavaScript: A 'case: string' style field declaration.
 447         ParseDefault();
 448         break;
 449       }
 450       if (!SwitchLabelEncountered &&
 451           (Style.IndentCaseLabels ||
 452            (Line->InPPDirective && Line->Level == 1))) {
 453         ++Line->Level;
 454       }
 455       SwitchLabelEncountered = true;
 456       parseStructuralElement();
 457       break;
 458     case tok::l_square:
 459       if (Style.isCSharp()) {
 460         nextToken();
 461         parseCSharpAttribute();
 462         break;
 463       }
 464       if (handleCppAttributes())
 465         break;
 466       [[fallthrough]];
 467     default:
 468       ParseDefault();
 469       break;
 470     }
 471   } while (!eof());
 472
 473   return false;
 474 }
 475
 476 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 477   // We'll parse forward through the tokens until we hit
 478   // a closing brace or eof - note that getNextToken() will
 479   // parse macros, so this will magically work inside macro
 480   // definitions, too.
 481   unsigned StoredPosition = Tokens->getPosition();
 482   FormatToken *Tok = FormatTok;
 483   const FormatToken *PrevTok = Tok->Previous;
 484   // Keep a stack of positions of lbrace tokens. We will
 485   // update information about whether an lbrace starts a
 486   // braced init list or a different block during the loop.
 487   struct StackEntry {
 488     FormatToken *Tok;
 489     const FormatToken *PrevTok;
 490   };
 491   SmallVector<StackEntry, 8> LBraceStack;
 492   assert(Tok->is(tok::l_brace));
 493   do {
 494     // Get next non-comment token.
 495     FormatToken *NextTok;
 496     do {
 497       NextTok = Tokens->getNextToken();
 498     } while (NextTok->is(tok::comment));
 499
 500     switch (Tok->Tok.getKind()) {
 501     case tok::l_brace:
 502       if (Style.isJavaScript() && PrevTok) {
 503         if (PrevTok->isOneOf(tok::colon, tok::less)) {
 504           // A ':' indicates this code is in a type, or a braced list
 505           // following a label in an object literal ({a: {b: 1}}).
 506           // A '<' could be an object used in a comparison, but that is nonsense
 507           // code (can never return true), so more likely it is a generic type
 508           // argument (`X<{a: string; b: number}>`).
 509           // The code below could be confused by semicolons between the
 510           // individual members in a type member list, which would normally
 511           // trigger BK_Block. In both cases, this must be parsed as an inline
 512           // braced init.
 513           Tok->setBlockKind(BK_BracedInit);
 514         } else if (PrevTok->is(tok::r_paren)) {
 515           // `) { }` can only occur in function or method declarations in JS.
 516           Tok->setBlockKind(BK_Block);
 517         }
 518       } else {
 519         Tok->setBlockKind(BK_Unknown);
 520       }
 521       LBraceStack.push_back({Tok, PrevTok});
 522       break;
 523     case tok::r_brace:
 524       if (LBraceStack.empty())
 525         break;
 526       if (LBraceStack.back().Tok->is(BK_Unknown)) {
 527         bool ProbablyBracedList = false;
 528         if (Style.Language == FormatStyle::LK_Proto) {
 529           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 530         } else {
 531           // Skip NextTok over preprocessor lines, otherwise we may not
 532           // properly diagnose the block as a braced intializer
 533           // if the comma separator appears after the pp directive.
 534           while (NextTok->is(tok::hash)) {
 535             ScopedMacroState MacroState(*Line, Tokens, NextTok);
 536             do {
 537               NextTok = Tokens->getNextToken();
 538             } while (NextTok->isNot(tok::eof));
 539           }
 540
 541           // Using OriginalColumn to distinguish between ObjC methods and
 542           // binary operators is a bit hacky.
 543           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 544                                   NextTok->OriginalColumn == 0;
 545
 546           // Try to detect a braced list. Note that regardless how we mark inner
 547           // braces here, we will overwrite the BlockKind later if we parse a
 548           // braced list (where all blocks inside are by default braced lists),
 549           // or when we explicitly detect blocks (for example while parsing
 550           // lambdas).
 551
 552           // If we already marked the opening brace as braced list, the closing
 553           // must also be part of it.
 554           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
 555
 556           ProbablyBracedList = ProbablyBracedList ||
 557                                (Style.isJavaScript() &&
 558                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 559                                                  Keywords.kw_as));
 560           ProbablyBracedList = ProbablyBracedList ||
 561                                (Style.isCpp() && NextTok->is(tok::l_paren));
 562
 563           // If there is a comma, semicolon or right paren after the closing
 564           // brace, we assume this is a braced initializer list.
 565           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 566           // braced list in JS.
 567           ProbablyBracedList =
 568               ProbablyBracedList ||
 569               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 570                                tok::r_paren, tok::r_square, tok::ellipsis);
 571
 572           // Distinguish between braced list in a constructor initializer list
 573           // followed by constructor body, or just adjacent blocks.
 574           ProbablyBracedList =
 575               ProbablyBracedList ||
 576               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
 577                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
 578                                                    tok::greater));
 579
 580           ProbablyBracedList =
 581               ProbablyBracedList ||
 582               (NextTok->is(tok::identifier) &&
 583                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
 584
 585           ProbablyBracedList = ProbablyBracedList ||
 586                                (NextTok->is(tok::semi) &&
 587                                 (!ExpectClassBody || LBraceStack.size() != 1));
 588
 589           ProbablyBracedList =
 590               ProbablyBracedList ||
 591               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 592
 593           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
 594             // We can have an array subscript after a braced init
 595             // list, but C++11 attributes are expected after blocks.
 596             NextTok = Tokens->getNextToken();
 597             ProbablyBracedList = NextTok->isNot(tok::l_square);
 598           }
 599         }
 600         if (ProbablyBracedList) {
 601           Tok->setBlockKind(BK_BracedInit);
 602           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
 603         } else {
 604           Tok->setBlockKind(BK_Block);
 605           LBraceStack.back().Tok->setBlockKind(BK_Block);
 606         }
 607       }
 608       LBraceStack.pop_back();
 609       break;
 610     case tok::identifier:
 611       if (Tok->isNot(TT_StatementMacro))
 612         break;
 613       [[fallthrough]];
 614     case tok::kw_if:
 615       if (PrevTok->is(tok::hash))
 616         break;
 617       [[fallthrough]];
 618     case tok::at:
 619     case tok::semi:
 620     case tok::kw_while:
 621     case tok::kw_for:
 622     case tok::kw_switch:
 623     case tok::kw_try:
 624     case tok::kw___try:
 625       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
 626         LBraceStack.back().Tok->setBlockKind(BK_Block);
 627       break;
 628     default:
 629       break;
 630     }
 631     PrevTok = Tok;
 632     Tok = NextTok;
 633   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
 634
 635   // Assume other blocks for all unclosed opening braces.
 636   for (const auto &Entry : LBraceStack)
 637     if (Entry.Tok->is(BK_Unknown))
 638       Entry.Tok->setBlockKind(BK_Block);
 639
 640   FormatTok = Tokens->setPosition(StoredPosition);
 641 }
 642
 643 // Sets the token type of the directly previous right brace.
 644 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
 645   if (auto Prev = FormatTok->getPreviousNonComment();
 646       Prev && Prev->is(tok::r_brace)) {
 647     Prev->setFinalizedType(Type);
 648   }
 649 }
 650
 651 template <class T>
 652 static inline void hash_combine(std::size_t &seed, const T &v) {
 653   std::hash<T> hasher;
 654   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 655 }
 656
 657 size_t UnwrappedLineParser::computePPHash() const {
 658   size_t h = 0;
 659   for (const auto &i : PPStack) {
 660     hash_combine(h, size_t(i.Kind));
 661     hash_combine(h, i.Line);
 662   }
 663   return h;
 664 }
 665
 666 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
 667 // is not null, subtracts its length (plus the preceding space) when computing
 668 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
 669 // running the token annotator on it so that we can restore them afterward.
 670 bool UnwrappedLineParser::mightFitOnOneLine(
 671     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
 672   const auto ColumnLimit = Style.ColumnLimit;
 673   if (ColumnLimit == 0)
 674     return true;
 675
 676   auto &Tokens = ParsedLine.Tokens;
 677   assert(!Tokens.empty());
 678
 679   const auto *LastToken = Tokens.back().Tok;
 680   assert(LastToken);
 681
 682   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
 683
 684   int Index = 0;
 685   for (const auto &Token : Tokens) {
 686     assert(Token.Tok);
 687     auto &SavedToken = SavedTokens[Index++];
 688     SavedToken.Tok = new FormatToken;
 689     SavedToken.Tok->copyFrom(*Token.Tok);
 690     SavedToken.Children = std::move(Token.Children);
 691   }
 692
 693   AnnotatedLine Line(ParsedLine);
 694   assert(Line.Last == LastToken);
 695
 696   TokenAnnotator Annotator(Style, Keywords);
 697   Annotator.annotate(Line);
 698   Annotator.calculateFormattingInformation(Line);
 699
 700   auto Length = LastToken->TotalLength;
 701   if (OpeningBrace) {
 702     assert(OpeningBrace != Tokens.front().Tok);
 703     if (auto Prev = OpeningBrace->Previous;
 704         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
 705       Length -= ColumnLimit;
 706     }
 707     Length -= OpeningBrace->TokenText.size() + 1;
 708   }
 709
 710   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
 711     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
 712     Length -= FirstToken->TokenText.size() + 1;
 713   }
 714
 715   Index = 0;
 716   for (auto &Token : Tokens) {
 717     const auto &SavedToken = SavedTokens[Index++];
 718     Token.Tok->copyFrom(*SavedToken.Tok);
 719     Token.Children = std::move(SavedToken.Children);
 720     delete SavedToken.Tok;
 721   }
 722
 723   // If these change PPLevel needs to be used for get correct indentation.
 724   assert(!Line.InMacroBody);
 725   assert(!Line.InPPDirective);
 726   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
 727 }
 728
 729 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
 730                                              unsigned AddLevels, bool MunchSemi,
 731                                              bool KeepBraces,
 732                                              IfStmtKind *IfKind,
 733                                              bool UnindentWhitesmithsBraces) {
 734   auto HandleVerilogBlockLabel = [this]() {
 735     // ":" name
 736     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
 737       nextToken();
 738       if (Keywords.isVerilogIdentifier(*FormatTok))
 739         nextToken();
 740     }
 741   };
 742
 743   // Whether this is a Verilog-specific block that has a special header like a
 744   // module.
 745   const bool VerilogHierarchy =
 746       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
 747   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
 748           (Style.isVerilog() &&
 749            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
 750          "'{' or macro block token expected");
 751   FormatToken *Tok = FormatTok;
 752   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
 753   auto Index = CurrentLines->size();
 754   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 755   FormatTok->setBlockKind(BK_Block);
 756
 757   // For Whitesmiths mode, jump to the next level prior to skipping over the
 758   // braces.
 759   if (!VerilogHierarchy && AddLevels > 0 &&
 760       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
 761     ++Line->Level;
 762   }
 763
 764   size_t PPStartHash = computePPHash();
 765
 766   const unsigned InitialLevel = Line->Level;
 767   if (VerilogHierarchy) {
 768     AddLevels += parseVerilogHierarchyHeader();
 769   } else {
 770     nextToken(/*LevelDifference=*/AddLevels);
 771     HandleVerilogBlockLabel();
 772   }
 773
 774   // Bail out if there are too many levels. Otherwise, the stack might overflow.
 775   if (Line->Level > 300)
 776     return nullptr;
 777
 778   if (MacroBlock && FormatTok->is(tok::l_paren))
 779     parseParens();
 780
 781   size_t NbPreprocessorDirectives =
 782       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
 783   addUnwrappedLine();
 784   size_t OpeningLineIndex =
 785       CurrentLines->empty()
 786           ? (UnwrappedLine::kInvalidIndex)
 787           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 788
 789   // Whitesmiths is weird here. The brace needs to be indented for the namespace
 790   // block, but the block itself may not be indented depending on the style
 791   // settings. This allows the format to back up one level in those cases.
 792   if (UnindentWhitesmithsBraces)
 793     --Line->Level;
 794
 795   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 796                                           MustBeDeclaration);
 797   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
 798     Line->Level += AddLevels;
 799
 800   FormatToken *IfLBrace = nullptr;
 801   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
 802
 803   if (eof())
 804     return IfLBrace;
 805
 806   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
 807                  : FormatTok->isNot(tok::r_brace)) {
 808     Line->Level = InitialLevel;
 809     FormatTok->setBlockKind(BK_Block);
 810     return IfLBrace;
 811   }
 812
 813   if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
 814     FormatTok->setFinalizedType(TT_NamespaceRBrace);
 815
 816   const bool IsFunctionRBrace =
 817       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
 818
 819   auto RemoveBraces = [=]() mutable {
 820     if (!SimpleBlock)
 821       return false;
 822     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
 823     assert(FormatTok->is(tok::r_brace));
 824     const bool WrappedOpeningBrace = !Tok->Previous;
 825     if (WrappedOpeningBrace && FollowedByComment)
 826       return false;
 827     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
 828     if (KeepBraces && !HasRequiredIfBraces)
 829       return false;
 830     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
 831       const FormatToken *Previous = Tokens->getPreviousToken();
 832       assert(Previous);
 833       if (Previous->is(tok::r_brace) && !Previous->Optional)
 834         return false;
 835     }
 836     assert(!CurrentLines->empty());
 837     auto &LastLine = CurrentLines->back();
 838     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
 839       return false;
 840     if (Tok->is(TT_ElseLBrace))
 841       return true;
 842     if (WrappedOpeningBrace) {
 843       assert(Index > 0);
 844       --Index; // The line above the wrapped l_brace.
 845       Tok = nullptr;
 846     }
 847     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
 848   };
 849   if (RemoveBraces()) {
 850     Tok->MatchingParen = FormatTok;
 851     FormatTok->MatchingParen = Tok;
 852   }
 853
 854   size_t PPEndHash = computePPHash();
 855
 856   // Munch the closing brace.
 857   nextToken(/*LevelDifference=*/-AddLevels);
 858
 859   // When this is a function block and there is an unnecessary semicolon
 860   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
 861   // it later).
 862   if (Style.RemoveSemicolon && IsFunctionRBrace) {
 863     while (FormatTok->is(tok::semi)) {
 864       FormatTok->Optional = true;
 865       nextToken();
 866     }
 867   }
 868
 869   HandleVerilogBlockLabel();
 870
 871   if (MacroBlock && FormatTok->is(tok::l_paren))
 872     parseParens();
 873
 874   Line->Level = InitialLevel;
 875
 876   if (FormatTok->is(tok::kw_noexcept)) {
 877     // A noexcept in a requires expression.
 878     nextToken();
 879   }
 880
 881   if (FormatTok->is(tok::arrow)) {
 882     // Following the } or noexcept we can find a trailing return type arrow
 883     // as part of an implicit conversion constraint.
 884     nextToken();
 885     parseStructuralElement();
 886   }
 887
 888   if (MunchSemi && FormatTok->is(tok::semi))
 889     nextToken();
 890
 891   if (PPStartHash == PPEndHash) {
 892     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 893     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 894       // Update the opening line to add the forward reference as well
 895       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 896           CurrentLines->size() - 1;
 897     }
 898   }
 899
 900   return IfLBrace;
 901 }
 902
 903 static bool isGoogScope(const UnwrappedLine &Line) {
 904   // FIXME: Closure-library specific stuff should not be hard-coded but be
 905   // configurable.
 906   if (Line.Tokens.size() < 4)
 907     return false;
 908   auto I = Line.Tokens.begin();
 909   if (I->Tok->TokenText != "goog")
 910     return false;
 911   ++I;
 912   if (I->Tok->isNot(tok::period))
 913     return false;
 914   ++I;
 915   if (I->Tok->TokenText != "scope")
 916     return false;
 917   ++I;
 918   return I->Tok->is(tok::l_paren);
 919 }
 920
 921 static bool isIIFE(const UnwrappedLine &Line,
 922                    const AdditionalKeywords &Keywords) {
 923   // Look for the start of an immediately invoked anonymous function.
 924   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 925   // This is commonly done in JavaScript to create a new, anonymous scope.
 926   // Example: (function() { ... })()
 927   if (Line.Tokens.size() < 3)
 928     return false;
 929   auto I = Line.Tokens.begin();
 930   if (I->Tok->isNot(tok::l_paren))
 931     return false;
 932   ++I;
 933   if (I->Tok->isNot(Keywords.kw_function))
 934     return false;
 935   ++I;
 936   return I->Tok->is(tok::l_paren);
 937 }
 938
 939 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 940                                    const FormatToken &InitialToken) {
 941   tok::TokenKind Kind = InitialToken.Tok.getKind();
 942   if (InitialToken.is(TT_NamespaceMacro))
 943     Kind = tok::kw_namespace;
 944
 945   switch (Kind) {
 946   case tok::kw_namespace:
 947     return Style.BraceWrapping.AfterNamespace;
 948   case tok::kw_class:
 949     return Style.BraceWrapping.AfterClass;
 950   case tok::kw_union:
 951     return Style.BraceWrapping.AfterUnion;
 952   case tok::kw_struct:
 953     return Style.BraceWrapping.AfterStruct;
 954   case tok::kw_enum:
 955     return Style.BraceWrapping.AfterEnum;
 956   default:
 957     return false;
 958   }
 959 }
 960
 961 void UnwrappedLineParser::parseChildBlock() {
 962   assert(FormatTok->is(tok::l_brace));
 963   FormatTok->setBlockKind(BK_Block);
 964   const FormatToken *OpeningBrace = FormatTok;
 965   nextToken();
 966   {
 967     bool SkipIndent = (Style.isJavaScript() &&
 968                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 969     ScopedLineState LineState(*this);
 970     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 971                                             /*MustBeDeclaration=*/false);
 972     Line->Level += SkipIndent ? 0 : 1;
 973     parseLevel(OpeningBrace);
 974     flushComments(isOnNewLine(*FormatTok));
 975     Line->Level -= SkipIndent ? 0 : 1;
 976   }
 977   nextToken();
 978 }
 979
 980 void UnwrappedLineParser::parsePPDirective() {
 981   assert(FormatTok->is(tok::hash) && "'#' expected");
 982   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 983
 984   nextToken();
 985
 986   if (!FormatTok->Tok.getIdentifierInfo()) {
 987     parsePPUnknown();
 988     return;
 989   }
 990
 991   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 992   case tok::pp_define:
 993     parsePPDefine();
 994     return;
 995   case tok::pp_if:
 996     parsePPIf(/*IfDef=*/false);
 997     break;
 998   case tok::pp_ifdef:
 999   case tok::pp_ifndef:
1000     parsePPIf(/*IfDef=*/true);
1001     break;
1002   case tok::pp_else:
1003   case tok::pp_elifdef:
1004   case tok::pp_elifndef:
1005   case tok::pp_elif:
1006     parsePPElse();
1007     break;
1008   case tok::pp_endif:
1009     parsePPEndIf();
1010     break;
1011   case tok::pp_pragma:
1012     parsePPPragma();
1013     break;
1014   default:
1015     parsePPUnknown();
1016     break;
1017   }
1018 }
1019
1020 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1021   size_t Line = CurrentLines->size();
1022   if (CurrentLines == &PreprocessorDirectives)
1023     Line += Lines.size();
1024
1025   if (Unreachable ||
1026       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1027     PPStack.push_back({PP_Unreachable, Line});
1028   } else {
1029     PPStack.push_back({PP_Conditional, Line});
1030   }
1031 }
1032
1033 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1034   ++PPBranchLevel;
1035   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1036   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1037     PPLevelBranchIndex.push_back(0);
1038     PPLevelBranchCount.push_back(0);
1039   }
1040   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1041   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1042   conditionalCompilationCondition(Unreachable || Skip);
1043 }
1044
1045 void UnwrappedLineParser::conditionalCompilationAlternative() {
1046   if (!PPStack.empty())
1047     PPStack.pop_back();
1048   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1049   if (!PPChainBranchIndex.empty())
1050     ++PPChainBranchIndex.top();
1051   conditionalCompilationCondition(
1052       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1053       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1054 }
1055
1056 void UnwrappedLineParser::conditionalCompilationEnd() {
1057   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1058   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1059     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1060       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1061   }
1062   // Guard against #endif's without #if.
1063   if (PPBranchLevel > -1)
1064     --PPBranchLevel;
1065   if (!PPChainBranchIndex.empty())
1066     PPChainBranchIndex.pop();
1067   if (!PPStack.empty())
1068     PPStack.pop_back();
1069 }
1070
1071 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1072   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1073   nextToken();
1074   bool Unreachable = false;
1075   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1076     Unreachable = true;
1077   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1078     Unreachable = true;
1079   conditionalCompilationStart(Unreachable);
1080   FormatToken *IfCondition = FormatTok;
1081   // If there's a #ifndef on the first line, and the only lines before it are
1082   // comments, it could be an include guard.
1083   bool MaybeIncludeGuard = IfNDef;
1084   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1085     for (auto &Line : Lines) {
1086       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1087         MaybeIncludeGuard = false;
1088         IncludeGuard = IG_Rejected;
1089         break;
1090       }
1091     }
1092   }
1093   --PPBranchLevel;
1094   parsePPUnknown();
1095   ++PPBranchLevel;
1096   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1097     IncludeGuard = IG_IfNdefed;
1098     IncludeGuardToken = IfCondition;
1099   }
1100 }
1101
1102 void UnwrappedLineParser::parsePPElse() {
1103   // If a potential include guard has an #else, it's not an include guard.
1104   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1105     IncludeGuard = IG_Rejected;
1106   // Don't crash when there is an #else without an #if.
1107   assert(PPBranchLevel >= -1);
1108   if (PPBranchLevel == -1)
1109     conditionalCompilationStart(/*Unreachable=*/true);
1110   conditionalCompilationAlternative();
1111   --PPBranchLevel;
1112   parsePPUnknown();
1113   ++PPBranchLevel;
1114 }
1115
1116 void UnwrappedLineParser::parsePPEndIf() {
1117   conditionalCompilationEnd();
1118   parsePPUnknown();
1119   // If the #endif of a potential include guard is the last thing in the file,
1120   // then we found an include guard.
1121   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1122       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1123     IncludeGuard = IG_Found;
1124   }
1125 }
1126
1127 void UnwrappedLineParser::parsePPDefine() {
1128   nextToken();
1129
1130   if (!FormatTok->Tok.getIdentifierInfo()) {
1131     IncludeGuard = IG_Rejected;
1132     IncludeGuardToken = nullptr;
1133     parsePPUnknown();
1134     return;
1135   }
1136
1137   if (IncludeGuard == IG_IfNdefed &&
1138       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1139     IncludeGuard = IG_Defined;
1140     IncludeGuardToken = nullptr;
1141     for (auto &Line : Lines) {
1142       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1143         IncludeGuard = IG_Rejected;
1144         break;
1145       }
1146     }
1147   }
1148
1149   // In the context of a define, even keywords should be treated as normal
1150   // identifiers. Setting the kind to identifier is not enough, because we need
1151   // to treat additional keywords like __except as well, which are already
1152   // identifiers. Setting the identifier info to null interferes with include
1153   // guard processing above, and changes preprocessing nesting.
1154   FormatTok->Tok.setKind(tok::identifier);
1155   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1156   nextToken();
1157   if (FormatTok->Tok.getKind() == tok::l_paren &&
1158       !FormatTok->hasWhitespaceBefore()) {
1159     parseParens();
1160   }
1161   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1162     Line->Level += PPBranchLevel + 1;
1163   addUnwrappedLine();
1164   ++Line->Level;
1165
1166   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1167   assert((int)Line->PPLevel >= 0);
1168   Line->InMacroBody = true;
1169
1170   // Errors during a preprocessor directive can only affect the layout of the
1171   // preprocessor directive, and thus we ignore them. An alternative approach
1172   // would be to use the same approach we use on the file level (no
1173   // re-indentation if there was a structural error) within the macro
1174   // definition.
1175   parseFile();
1176 }
1177
1178 void UnwrappedLineParser::parsePPPragma() {
1179   Line->InPragmaDirective = true;
1180   parsePPUnknown();
1181 }
1182
1183 void UnwrappedLineParser::parsePPUnknown() {
1184   do {
1185     nextToken();
1186   } while (!eof());
1187   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1188     Line->Level += PPBranchLevel + 1;
1189   addUnwrappedLine();
1190 }
1191
1192 // Here we exclude certain tokens that are not usually the first token in an
1193 // unwrapped line. This is used in attempt to distinguish macro calls without
1194 // trailing semicolons from other constructs split to several lines.
1195 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1196   // Semicolon can be a null-statement, l_square can be a start of a macro or
1197   // a C++11 attribute, but this doesn't seem to be common.
1198   assert(Tok.isNot(TT_AttributeSquare));
1199   return !Tok.isOneOf(tok::semi, tok::l_brace,
1200                       // Tokens that can only be used as binary operators and a
1201                       // part of overloaded operator names.
1202                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1203                       tok::less, tok::greater, tok::slash, tok::percent,
1204                       tok::lessless, tok::greatergreater, tok::equal,
1205                       tok::plusequal, tok::minusequal, tok::starequal,
1206                       tok::slashequal, tok::percentequal, tok::ampequal,
1207                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1208                       tok::lesslessequal,
1209                       // Colon is used in labels, base class lists, initializer
1210                       // lists, range-based for loops, ternary operator, but
1211                       // should never be the first token in an unwrapped line.
1212                       tok::colon,
1213                       // 'noexcept' is a trailing annotation.
1214                       tok::kw_noexcept);
1215 }
1216
1217 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1218                           const FormatToken *FormatTok) {
1219   // FIXME: This returns true for C/C++ keywords like 'struct'.
1220   return FormatTok->is(tok::identifier) &&
1221          (!FormatTok->Tok.getIdentifierInfo() ||
1222           !FormatTok->isOneOf(
1223               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1224               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1225               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1226               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1227               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1228               Keywords.kw_instanceof, Keywords.kw_interface,
1229               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1230 }
1231
1232 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1233                                  const FormatToken *FormatTok) {
1234   return FormatTok->Tok.isLiteral() ||
1235          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1236          mustBeJSIdent(Keywords, FormatTok);
1237 }
1238
1239 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1240 // when encountered after a value (see mustBeJSIdentOrValue).
1241 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1242                            const FormatToken *FormatTok) {
1243   return FormatTok->isOneOf(
1244       tok::kw_return, Keywords.kw_yield,
1245       // conditionals
1246       tok::kw_if, tok::kw_else,
1247       // loops
1248       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1249       // switch/case
1250       tok::kw_switch, tok::kw_case,
1251       // exceptions
1252       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1253       // declaration
1254       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1255       Keywords.kw_async, Keywords.kw_function,
1256       // import/export
1257       Keywords.kw_import, tok::kw_export);
1258 }
1259
1260 // Checks whether a token is a type in K&R C (aka C78).
1261 static bool isC78Type(const FormatToken &Tok) {
1262   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1263                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1264                      tok::identifier);
1265 }
1266
1267 // This function checks whether a token starts the first parameter declaration
1268 // in a K&R C (aka C78) function definition, e.g.:
1269 //   int f(a, b)
1270 //   short a, b;
1271 //   {
1272 //      return a + b;
1273 //   }
1274 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1275                                const FormatToken *FuncName) {
1276   assert(Tok);
1277   assert(Next);
1278   assert(FuncName);
1279
1280   if (FuncName->isNot(tok::identifier))
1281     return false;
1282
1283   const FormatToken *Prev = FuncName->Previous;
1284   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1285     return false;
1286
1287   if (!isC78Type(*Tok) &&
1288       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1289     return false;
1290   }
1291
1292   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1293     return false;
1294
1295   Tok = Tok->Previous;
1296   if (!Tok || Tok->isNot(tok::r_paren))
1297     return false;
1298
1299   Tok = Tok->Previous;
1300   if (!Tok || Tok->isNot(tok::identifier))
1301     return false;
1302
1303   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1304 }
1305
1306 bool UnwrappedLineParser::parseModuleImport() {
1307   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1308
1309   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1310       !Token->Tok.getIdentifierInfo() &&
1311       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1312     return false;
1313   }
1314
1315   nextToken();
1316   while (!eof()) {
1317     if (FormatTok->is(tok::colon)) {
1318       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1319     }
1320     // Handle import <foo/bar.h> as we would an include statement.
1321     else if (FormatTok->is(tok::less)) {
1322       nextToken();
1323       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1324         // Mark tokens up to the trailing line comments as implicit string
1325         // literals.
1326         if (FormatTok->isNot(tok::comment) &&
1327             !FormatTok->TokenText.startswith("//")) {
1328           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1329         }
1330         nextToken();
1331       }
1332     }
1333     if (FormatTok->is(tok::semi)) {
1334       nextToken();
1335       break;
1336     }
1337     nextToken();
1338   }
1339
1340   addUnwrappedLine();
1341   return true;
1342 }
1343
1344 // readTokenWithJavaScriptASI reads the next token and terminates the current
1345 // line if JavaScript Automatic Semicolon Insertion must
1346 // happen between the current token and the next token.
1347 //
1348 // This method is conservative - it cannot cover all edge cases of JavaScript,
1349 // but only aims to correctly handle certain well known cases. It *must not*
1350 // return true in speculative cases.
1351 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1352   FormatToken *Previous = FormatTok;
1353   readToken();
1354   FormatToken *Next = FormatTok;
1355
1356   bool IsOnSameLine =
1357       CommentsBeforeNextToken.empty()
1358           ? Next->NewlinesBefore == 0
1359           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1360   if (IsOnSameLine)
1361     return;
1362
1363   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1364   bool PreviousStartsTemplateExpr =
1365       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1366   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1367     // If the line contains an '@' sign, the previous token might be an
1368     // annotation, which can precede another identifier/value.
1369     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1370       return LineNode.Tok->is(tok::at);
1371     });
1372     if (HasAt)
1373       return;
1374   }
1375   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1376     return addUnwrappedLine();
1377   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1378   bool NextEndsTemplateExpr =
1379       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1380   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1381       (PreviousMustBeValue ||
1382        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1383                          tok::minusminus))) {
1384     return addUnwrappedLine();
1385   }
1386   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1387       isJSDeclOrStmt(Keywords, Next)) {
1388     return addUnwrappedLine();
1389   }
1390 }
1391
1392 void UnwrappedLineParser::parseStructuralElement(
1393     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1394     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1395   if (Style.Language == FormatStyle::LK_TableGen &&
1396       FormatTok->is(tok::pp_include)) {
1397     nextToken();
1398     if (FormatTok->is(tok::string_literal))
1399       nextToken();
1400     addUnwrappedLine();
1401     return;
1402   }
1403
1404   if (Style.isCpp()) {
1405     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1406     }
1407   } else if (Style.isVerilog()) {
1408     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1409       parseForOrWhileLoop(/*HasParens=*/false);
1410       return;
1411     }
1412     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1413       parseForOrWhileLoop();
1414       return;
1415     }
1416     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1417                            Keywords.kw_assume, Keywords.kw_cover)) {
1418       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1419       return;
1420     }
1421
1422     // Skip things that can exist before keywords like 'if' and 'case'.
1423     while (true) {
1424       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1425                              Keywords.kw_unique0)) {
1426         nextToken();
1427       } else if (FormatTok->is(tok::l_paren) &&
1428                  Tokens->peekNextToken()->is(tok::star)) {
1429         parseParens();
1430       } else {
1431         break;
1432       }
1433     }
1434   }
1435
1436   // Tokens that only make sense at the beginning of a line.
1437   switch (FormatTok->Tok.getKind()) {
1438   case tok::kw_asm:
1439     nextToken();
1440     if (FormatTok->is(tok::l_brace)) {
1441       FormatTok->setFinalizedType(TT_InlineASMBrace);
1442       nextToken();
1443       while (FormatTok && !eof()) {
1444         if (FormatTok->is(tok::r_brace)) {
1445           FormatTok->setFinalizedType(TT_InlineASMBrace);
1446           nextToken();
1447           addUnwrappedLine();
1448           break;
1449         }
1450         FormatTok->Finalized = true;
1451         nextToken();
1452       }
1453     }
1454     break;
1455   case tok::kw_namespace:
1456     parseNamespace();
1457     return;
1458   case tok::kw_public:
1459   case tok::kw_protected:
1460   case tok::kw_private:
1461     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1462         Style.isCSharp()) {
1463       nextToken();
1464     } else {
1465       parseAccessSpecifier();
1466     }
1467     return;
1468   case tok::kw_if: {
1469     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1470       // field/method declaration.
1471       break;
1472     }
1473     FormatToken *Tok = parseIfThenElse(IfKind);
1474     if (IfLeftBrace)
1475       *IfLeftBrace = Tok;
1476     return;
1477   }
1478   case tok::kw_for:
1479   case tok::kw_while:
1480     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1481       // field/method declaration.
1482       break;
1483     }
1484     parseForOrWhileLoop();
1485     return;
1486   case tok::kw_do:
1487     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1488       // field/method declaration.
1489       break;
1490     }
1491     parseDoWhile();
1492     if (HasDoWhile)
1493       *HasDoWhile = true;
1494     return;
1495   case tok::kw_switch:
1496     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497       // 'switch: string' field declaration.
1498       break;
1499     }
1500     parseSwitch();
1501     return;
1502   case tok::kw_default:
1503     // In Verilog default along with other labels are handled in the next loop.
1504     if (Style.isVerilog())
1505       break;
1506     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507       // 'default: string' field declaration.
1508       break;
1509     }
1510     nextToken();
1511     if (FormatTok->is(tok::colon)) {
1512       FormatTok->setFinalizedType(TT_CaseLabelColon);
1513       parseLabel();
1514       return;
1515     }
1516     // e.g. "default void f() {}" in a Java interface.
1517     break;
1518   case tok::kw_case:
1519     // Proto: there are no switch/case statements.
1520     if (Style.isProto()) {
1521       nextToken();
1522       return;
1523     }
1524     if (Style.isVerilog()) {
1525       parseBlock();
1526       addUnwrappedLine();
1527       return;
1528     }
1529     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1530       // 'case: string' field declaration.
1531       nextToken();
1532       break;
1533     }
1534     parseCaseLabel();
1535     return;
1536   case tok::kw_try:
1537   case tok::kw___try:
1538     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539       // field/method declaration.
1540       break;
1541     }
1542     parseTryCatch();
1543     return;
1544   case tok::kw_extern:
1545     nextToken();
1546     if (Style.isVerilog()) {
1547       // In Verilog and extern module declaration looks like a start of module.
1548       // But there is no body and endmodule. So we handle it separately.
1549       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1550         parseVerilogHierarchyHeader();
1551         return;
1552       }
1553     } else if (FormatTok->is(tok::string_literal)) {
1554       nextToken();
1555       if (FormatTok->is(tok::l_brace)) {
1556         if (Style.BraceWrapping.AfterExternBlock)
1557           addUnwrappedLine();
1558         // Either we indent or for backwards compatibility we follow the
1559         // AfterExternBlock style.
1560         unsigned AddLevels =
1561             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1562                     (Style.BraceWrapping.AfterExternBlock &&
1563                      Style.IndentExternBlock ==
1564                          FormatStyle::IEBS_AfterExternBlock)
1565                 ? 1u
1566                 : 0u;
1567         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1568         addUnwrappedLine();
1569         return;
1570       }
1571     }
1572     break;
1573   case tok::kw_export:
1574     if (Style.isJavaScript()) {
1575       parseJavaScriptEs6ImportExport();
1576       return;
1577     }
1578     if (Style.isCpp()) {
1579       nextToken();
1580       if (FormatTok->is(tok::kw_namespace)) {
1581         parseNamespace();
1582         return;
1583       }
1584       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1585         return;
1586     }
1587     break;
1588   case tok::kw_inline:
1589     nextToken();
1590     if (FormatTok->is(tok::kw_namespace)) {
1591       parseNamespace();
1592       return;
1593     }
1594     break;
1595   case tok::identifier:
1596     if (FormatTok->is(TT_ForEachMacro)) {
1597       parseForOrWhileLoop();
1598       return;
1599     }
1600     if (FormatTok->is(TT_MacroBlockBegin)) {
1601       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1602                  /*MunchSemi=*/false);
1603       return;
1604     }
1605     if (FormatTok->is(Keywords.kw_import)) {
1606       if (Style.isJavaScript()) {
1607         parseJavaScriptEs6ImportExport();
1608         return;
1609       }
1610       if (Style.Language == FormatStyle::LK_Proto) {
1611         nextToken();
1612         if (FormatTok->is(tok::kw_public))
1613           nextToken();
1614         if (FormatTok->isNot(tok::string_literal))
1615           return;
1616         nextToken();
1617         if (FormatTok->is(tok::semi))
1618           nextToken();
1619         addUnwrappedLine();
1620         return;
1621       }
1622       if (Style.isCpp() && parseModuleImport())
1623         return;
1624     }
1625     if (Style.isCpp() &&
1626         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1627                            Keywords.kw_slots, Keywords.kw_qslots)) {
1628       nextToken();
1629       if (FormatTok->is(tok::colon)) {
1630         nextToken();
1631         addUnwrappedLine();
1632         return;
1633       }
1634     }
1635     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1636       parseStatementMacro();
1637       return;
1638     }
1639     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1640       parseNamespace();
1641       return;
1642     }
1643     // In Verilog labels can be any expression, so we don't do them here.
1644     if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1645         !Line->MustBeDeclaration) {
1646       nextToken();
1647       Line->Tokens.begin()->Tok->MustBreakBefore = true;
1648       FormatTok->setFinalizedType(TT_GotoLabelColon);
1649       parseLabel(!Style.IndentGotoLabels);
1650       if (HasLabel)
1651         *HasLabel = true;
1652       return;
1653     }
1654     // In all other cases, parse the declaration.
1655     break;
1656   default:
1657     break;
1658   }
1659
1660   const bool InRequiresExpression =
1661       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1662   do {
1663     const FormatToken *Previous = FormatTok->Previous;
1664     switch (FormatTok->Tok.getKind()) {
1665     case tok::at:
1666       nextToken();
1667       if (FormatTok->is(tok::l_brace)) {
1668         nextToken();
1669         parseBracedList();
1670         break;
1671       } else if (Style.Language == FormatStyle::LK_Java &&
1672                  FormatTok->is(Keywords.kw_interface)) {
1673         nextToken();
1674         break;
1675       }
1676       switch (FormatTok->Tok.getObjCKeywordID()) {
1677       case tok::objc_public:
1678       case tok::objc_protected:
1679       case tok::objc_package:
1680       case tok::objc_private:
1681         return parseAccessSpecifier();
1682       case tok::objc_interface:
1683       case tok::objc_implementation:
1684         return parseObjCInterfaceOrImplementation();
1685       case tok::objc_protocol:
1686         if (parseObjCProtocol())
1687           return;
1688         break;
1689       case tok::objc_end:
1690         return; // Handled by the caller.
1691       case tok::objc_optional:
1692       case tok::objc_required:
1693         nextToken();
1694         addUnwrappedLine();
1695         return;
1696       case tok::objc_autoreleasepool:
1697         nextToken();
1698         if (FormatTok->is(tok::l_brace)) {
1699           if (Style.BraceWrapping.AfterControlStatement ==
1700               FormatStyle::BWACS_Always) {
1701             addUnwrappedLine();
1702           }
1703           parseBlock();
1704         }
1705         addUnwrappedLine();
1706         return;
1707       case tok::objc_synchronized:
1708         nextToken();
1709         if (FormatTok->is(tok::l_paren)) {
1710           // Skip synchronization object
1711           parseParens();
1712         }
1713         if (FormatTok->is(tok::l_brace)) {
1714           if (Style.BraceWrapping.AfterControlStatement ==
1715               FormatStyle::BWACS_Always) {
1716             addUnwrappedLine();
1717           }
1718           parseBlock();
1719         }
1720         addUnwrappedLine();
1721         return;
1722       case tok::objc_try:
1723         // This branch isn't strictly necessary (the kw_try case below would
1724         // do this too after the tok::at is parsed above).  But be explicit.
1725         parseTryCatch();
1726         return;
1727       default:
1728         break;
1729       }
1730       break;
1731     case tok::kw_requires: {
1732       if (Style.isCpp()) {
1733         bool ParsedClause = parseRequires();
1734         if (ParsedClause)
1735           return;
1736       } else {
1737         nextToken();
1738       }
1739       break;
1740     }
1741     case tok::kw_enum:
1742       // Ignore if this is part of "template <enum ...".
1743       if (Previous && Previous->is(tok::less)) {
1744         nextToken();
1745         break;
1746       }
1747
1748       // parseEnum falls through and does not yet add an unwrapped line as an
1749       // enum definition can start a structural element.
1750       if (!parseEnum())
1751         break;
1752       // This only applies to C++ and Verilog.
1753       if (!Style.isCpp() && !Style.isVerilog()) {
1754         addUnwrappedLine();
1755         return;
1756       }
1757       break;
1758     case tok::kw_typedef:
1759       nextToken();
1760       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1761                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1762                              Keywords.kw_CF_CLOSED_ENUM,
1763                              Keywords.kw_NS_CLOSED_ENUM)) {
1764         parseEnum();
1765       }
1766       break;
1767     case tok::kw_class:
1768       if (Style.isVerilog()) {
1769         parseBlock();
1770         addUnwrappedLine();
1771         return;
1772       }
1773       [[fallthrough]];
1774     case tok::kw_struct:
1775     case tok::kw_union:
1776       if (parseStructLike())
1777         return;
1778       break;
1779     case tok::kw_decltype:
1780       nextToken();
1781       if (FormatTok->is(tok::l_paren)) {
1782         parseParens();
1783         assert(FormatTok->Previous);
1784         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1785                                               tok::l_paren)) {
1786           Line->SeenDecltypeAuto = true;
1787         }
1788       }
1789       break;
1790     case tok::period:
1791       nextToken();
1792       // In Java, classes have an implicit static member "class".
1793       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1794           FormatTok->is(tok::kw_class)) {
1795         nextToken();
1796       }
1797       if (Style.isJavaScript() && FormatTok &&
1798           FormatTok->Tok.getIdentifierInfo()) {
1799         // JavaScript only has pseudo keywords, all keywords are allowed to
1800         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1801         nextToken();
1802       }
1803       break;
1804     case tok::semi:
1805       nextToken();
1806       addUnwrappedLine();
1807       return;
1808     case tok::r_brace:
1809       addUnwrappedLine();
1810       return;
1811     case tok::l_paren: {
1812       parseParens();
1813       // Break the unwrapped line if a K&R C function definition has a parameter
1814       // declaration.
1815       if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1816         break;
1817       if (isC78ParameterDecl(FormatTok,
1818                              Tokens->peekNextToken(/*SkipComment=*/true),
1819                              Previous)) {
1820         addUnwrappedLine();
1821         return;
1822       }
1823       break;
1824     }
1825     case tok::kw_operator:
1826       nextToken();
1827       if (FormatTok->isBinaryOperator())
1828         nextToken();
1829       break;
1830     case tok::caret:
1831       nextToken();
1832       // Block return type.
1833       if (FormatTok->Tok.isAnyIdentifier() ||
1834           FormatTok->isSimpleTypeSpecifier()) {
1835         nextToken();
1836         // Return types: pointers are ok too.
1837         while (FormatTok->is(tok::star))
1838           nextToken();
1839       }
1840       // Block argument list.
1841       if (FormatTok->is(tok::l_paren))
1842         parseParens();
1843       // Block body.
1844       if (FormatTok->is(tok::l_brace))
1845         parseChildBlock();
1846       break;
1847     case tok::l_brace:
1848       if (InRequiresExpression)
1849         FormatTok->setFinalizedType(TT_BracedListLBrace);
1850       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1851         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1852         // A block outside of parentheses must be the last part of a
1853         // structural element.
1854         // FIXME: Figure out cases where this is not true, and add projections
1855         // for them (the one we know is missing are lambdas).
1856         if (Style.Language == FormatStyle::LK_Java &&
1857             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1858           // If necessary, we could set the type to something different than
1859           // TT_FunctionLBrace.
1860           if (Style.BraceWrapping.AfterControlStatement ==
1861               FormatStyle::BWACS_Always) {
1862             addUnwrappedLine();
1863           }
1864         } else if (Style.BraceWrapping.AfterFunction) {
1865           addUnwrappedLine();
1866         }
1867         FormatTok->setFinalizedType(TT_FunctionLBrace);
1868         parseBlock();
1869         IsDecltypeAutoFunction = false;
1870         addUnwrappedLine();
1871         return;
1872       }
1873       // Otherwise this was a braced init list, and the structural
1874       // element continues.
1875       break;
1876     case tok::kw_try:
1877       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1878         // field/method declaration.
1879         nextToken();
1880         break;
1881       }
1882       // We arrive here when parsing function-try blocks.
1883       if (Style.BraceWrapping.AfterFunction)
1884         addUnwrappedLine();
1885       parseTryCatch();
1886       return;
1887     case tok::identifier: {
1888       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1889           Line->MustBeDeclaration) {
1890         addUnwrappedLine();
1891         parseCSharpGenericTypeConstraint();
1892         break;
1893       }
1894       if (FormatTok->is(TT_MacroBlockEnd)) {
1895         addUnwrappedLine();
1896         return;
1897       }
1898
1899       // Function declarations (as opposed to function expressions) are parsed
1900       // on their own unwrapped line by continuing this loop. Function
1901       // expressions (functions that are not on their own line) must not create
1902       // a new unwrapped line, so they are special cased below.
1903       size_t TokenCount = Line->Tokens.size();
1904       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1905           (TokenCount > 1 ||
1906            (TokenCount == 1 &&
1907             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1908         tryToParseJSFunction();
1909         break;
1910       }
1911       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1912           FormatTok->is(Keywords.kw_interface)) {
1913         if (Style.isJavaScript()) {
1914           // In JavaScript/TypeScript, "interface" can be used as a standalone
1915           // identifier, e.g. in `var interface = 1;`. If "interface" is
1916           // followed by another identifier, it is very like to be an actual
1917           // interface declaration.
1918           unsigned StoredPosition = Tokens->getPosition();
1919           FormatToken *Next = Tokens->getNextToken();
1920           FormatTok = Tokens->setPosition(StoredPosition);
1921           if (!mustBeJSIdent(Keywords, Next)) {
1922             nextToken();
1923             break;
1924           }
1925         }
1926         parseRecord();
1927         addUnwrappedLine();
1928         return;
1929       }
1930
1931       if (Style.isVerilog()) {
1932         if (FormatTok->is(Keywords.kw_table)) {
1933           parseVerilogTable();
1934           return;
1935         }
1936         if (Keywords.isVerilogBegin(*FormatTok) ||
1937             Keywords.isVerilogHierarchy(*FormatTok)) {
1938           parseBlock();
1939           addUnwrappedLine();
1940           return;
1941         }
1942       }
1943
1944       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1945         if (parseStructLike())
1946           return;
1947         break;
1948       }
1949
1950       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1951         parseStatementMacro();
1952         return;
1953       }
1954
1955       // See if the following token should start a new unwrapped line.
1956       StringRef Text = FormatTok->TokenText;
1957
1958       FormatToken *PreviousToken = FormatTok;
1959       nextToken();
1960
1961       // JS doesn't have macros, and within classes colons indicate fields, not
1962       // labels.
1963       if (Style.isJavaScript())
1964         break;
1965
1966       auto OneTokenSoFar = [&]() {
1967         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1968         while (I != E && I->Tok->is(tok::comment))
1969           ++I;
1970         if (Style.isVerilog())
1971           while (I != E && I->Tok->is(tok::hash))
1972             ++I;
1973         return I != E && (++I == E);
1974       };
1975       if (OneTokenSoFar()) {
1976         // Recognize function-like macro usages without trailing semicolon as
1977         // well as free-standing macros like Q_OBJECT.
1978         bool FunctionLike = FormatTok->is(tok::l_paren);
1979         if (FunctionLike)
1980           parseParens();
1981
1982         bool FollowedByNewline =
1983             CommentsBeforeNextToken.empty()
1984                 ? FormatTok->NewlinesBefore > 0
1985                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1986
1987         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1988             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1989           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1990             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1991           addUnwrappedLine();
1992           return;
1993         }
1994       }
1995       break;
1996     }
1997     case tok::equal:
1998       if ((Style.isJavaScript() || Style.isCSharp()) &&
1999           FormatTok->is(TT_FatArrow)) {
2000         tryToParseChildBlock();
2001         break;
2002       }
2003
2004       nextToken();
2005       if (FormatTok->is(tok::l_brace)) {
2006         // Block kind should probably be set to BK_BracedInit for any language.
2007         // C# needs this change to ensure that array initialisers and object
2008         // initialisers are indented the same way.
2009         if (Style.isCSharp())
2010           FormatTok->setBlockKind(BK_BracedInit);
2011         nextToken();
2012         parseBracedList();
2013       } else if (Style.Language == FormatStyle::LK_Proto &&
2014                  FormatTok->is(tok::less)) {
2015         nextToken();
2016         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2017                         /*ClosingBraceKind=*/tok::greater);
2018       }
2019       break;
2020     case tok::l_square:
2021       parseSquare();
2022       break;
2023     case tok::kw_new:
2024       parseNew();
2025       break;
2026     case tok::kw_case:
2027       // Proto: there are no switch/case statements.
2028       if (Style.isProto()) {
2029         nextToken();
2030         return;
2031       }
2032       // In Verilog switch is called case.
2033       if (Style.isVerilog()) {
2034         parseBlock();
2035         addUnwrappedLine();
2036         return;
2037       }
2038       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2039         // 'case: string' field declaration.
2040         nextToken();
2041         break;
2042       }
2043       parseCaseLabel();
2044       break;
2045     case tok::kw_default:
2046       nextToken();
2047       if (Style.isVerilog()) {
2048         if (FormatTok->is(tok::colon)) {
2049           // The label will be handled in the next iteration.
2050           break;
2051         }
2052         if (FormatTok->is(Keywords.kw_clocking)) {
2053           // A default clocking block.
2054           parseBlock();
2055           addUnwrappedLine();
2056           return;
2057         }
2058         parseVerilogCaseLabel();
2059         return;
2060       }
2061       break;
2062     case tok::colon:
2063       nextToken();
2064       if (Style.isVerilog()) {
2065         parseVerilogCaseLabel();
2066         return;
2067       }
2068       break;
2069     default:
2070       nextToken();
2071       break;
2072     }
2073   } while (!eof());
2074 }
2075
2076 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2077   assert(FormatTok->is(tok::l_brace));
2078   if (!Style.isCSharp())
2079     return false;
2080   // See if it's a property accessor.
2081   if (FormatTok->Previous->isNot(tok::identifier))
2082     return false;
2083
2084   // See if we are inside a property accessor.
2085   //
2086   // Record the current tokenPosition so that we can advance and
2087   // reset the current token. `Next` is not set yet so we need
2088   // another way to advance along the token stream.
2089   unsigned int StoredPosition = Tokens->getPosition();
2090   FormatToken *Tok = Tokens->getNextToken();
2091
2092   // A trivial property accessor is of the form:
2093   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2094   // Track these as they do not require line breaks to be introduced.
2095   bool HasSpecialAccessor = false;
2096   bool IsTrivialPropertyAccessor = true;
2097   while (!eof()) {
2098     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2099                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2100                      Keywords.kw_init, Keywords.kw_set)) {
2101       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2102         HasSpecialAccessor = true;
2103       Tok = Tokens->getNextToken();
2104       continue;
2105     }
2106     if (Tok->isNot(tok::r_brace))
2107       IsTrivialPropertyAccessor = false;
2108     break;
2109   }
2110
2111   if (!HasSpecialAccessor) {
2112     Tokens->setPosition(StoredPosition);
2113     return false;
2114   }
2115
2116   // Try to parse the property accessor:
2117   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2118   Tokens->setPosition(StoredPosition);
2119   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2120     addUnwrappedLine();
2121   nextToken();
2122   do {
2123     switch (FormatTok->Tok.getKind()) {
2124     case tok::r_brace:
2125       nextToken();
2126       if (FormatTok->is(tok::equal)) {
2127         while (!eof() && FormatTok->isNot(tok::semi))
2128           nextToken();
2129         nextToken();
2130       }
2131       addUnwrappedLine();
2132       return true;
2133     case tok::l_brace:
2134       ++Line->Level;
2135       parseBlock(/*MustBeDeclaration=*/true);
2136       addUnwrappedLine();
2137       --Line->Level;
2138       break;
2139     case tok::equal:
2140       if (FormatTok->is(TT_FatArrow)) {
2141         ++Line->Level;
2142         do {
2143           nextToken();
2144         } while (!eof() && FormatTok->isNot(tok::semi));
2145         nextToken();
2146         addUnwrappedLine();
2147         --Line->Level;
2148         break;
2149       }
2150       nextToken();
2151       break;
2152     default:
2153       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2154                              Keywords.kw_set) &&
2155           !IsTrivialPropertyAccessor) {
2156         // Non-trivial get/set needs to be on its own line.
2157         addUnwrappedLine();
2158       }
2159       nextToken();
2160     }
2161   } while (!eof());
2162
2163   // Unreachable for well-formed code (paired '{' and '}').
2164   return true;
2165 }
2166
2167 bool UnwrappedLineParser::tryToParseLambda() {
2168   assert(FormatTok->is(tok::l_square));
2169   if (!Style.isCpp()) {
2170     nextToken();
2171     return false;
2172   }
2173   FormatToken &LSquare = *FormatTok;
2174   if (!tryToParseLambdaIntroducer())
2175     return false;
2176
2177   bool SeenArrow = false;
2178   bool InTemplateParameterList = false;
2179
2180   while (FormatTok->isNot(tok::l_brace)) {
2181     if (FormatTok->isSimpleTypeSpecifier()) {
2182       nextToken();
2183       continue;
2184     }
2185     switch (FormatTok->Tok.getKind()) {
2186     case tok::l_brace:
2187       break;
2188     case tok::l_paren:
2189       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2190       break;
2191     case tok::l_square:
2192       parseSquare();
2193       break;
2194     case tok::less:
2195       assert(FormatTok->Previous);
2196       if (FormatTok->Previous->is(tok::r_square))
2197         InTemplateParameterList = true;
2198       nextToken();
2199       break;
2200     case tok::kw_auto:
2201     case tok::kw_class:
2202     case tok::kw_template:
2203     case tok::kw_typename:
2204     case tok::amp:
2205     case tok::star:
2206     case tok::kw_const:
2207     case tok::kw_constexpr:
2208     case tok::kw_consteval:
2209     case tok::comma:
2210     case tok::greater:
2211     case tok::identifier:
2212     case tok::numeric_constant:
2213     case tok::coloncolon:
2214     case tok::kw_mutable:
2215     case tok::kw_noexcept:
2216     case tok::kw_static:
2217       nextToken();
2218       break;
2219     // Specialization of a template with an integer parameter can contain
2220     // arithmetic, logical, comparison and ternary operators.
2221     //
2222     // FIXME: This also accepts sequences of operators that are not in the scope
2223     // of a template argument list.
2224     //
2225     // In a C++ lambda a template type can only occur after an arrow. We use
2226     // this as an heuristic to distinguish between Objective-C expressions
2227     // followed by an `a->b` expression, such as:
2228     // ([obj func:arg] + a->b)
2229     // Otherwise the code below would parse as a lambda.
2230     case tok::plus:
2231     case tok::minus:
2232     case tok::exclaim:
2233     case tok::tilde:
2234     case tok::slash:
2235     case tok::percent:
2236     case tok::lessless:
2237     case tok::pipe:
2238     case tok::pipepipe:
2239     case tok::ampamp:
2240     case tok::caret:
2241     case tok::equalequal:
2242     case tok::exclaimequal:
2243     case tok::greaterequal:
2244     case tok::lessequal:
2245     case tok::question:
2246     case tok::colon:
2247     case tok::ellipsis:
2248     case tok::kw_true:
2249     case tok::kw_false:
2250       if (SeenArrow || InTemplateParameterList) {
2251         nextToken();
2252         break;
2253       }
2254       return true;
2255     case tok::arrow:
2256       // This might or might not actually be a lambda arrow (this could be an
2257       // ObjC method invocation followed by a dereferencing arrow). We might
2258       // reset this back to TT_Unknown in TokenAnnotator.
2259       FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2260       SeenArrow = true;
2261       nextToken();
2262       break;
2263     case tok::kw_requires: {
2264       auto *RequiresToken = FormatTok;
2265       nextToken();
2266       parseRequiresClause(RequiresToken);
2267       break;
2268     }
2269     case tok::equal:
2270       if (!InTemplateParameterList)
2271         return true;
2272       nextToken();
2273       break;
2274     default:
2275       return true;
2276     }
2277   }
2278
2279   FormatTok->setFinalizedType(TT_LambdaLBrace);
2280   LSquare.setFinalizedType(TT_LambdaLSquare);
2281
2282   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2283   parseChildBlock();
2284   assert(!NestedLambdas.empty());
2285   NestedLambdas.pop_back();
2286
2287   return true;
2288 }
2289
2290 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2291   const FormatToken *Previous = FormatTok->Previous;
2292   const FormatToken *LeftSquare = FormatTok;
2293   nextToken();
2294   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2295                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2296                                         tok::kw_co_yield, tok::kw_co_return)) ||
2297                     Previous->closesScope())) ||
2298       LeftSquare->isCppStructuredBinding(Style)) {
2299     return false;
2300   }
2301   if (FormatTok->is(tok::l_square))
2302     return false;
2303   if (FormatTok->is(tok::r_square)) {
2304     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2305     if (Next->is(tok::greater))
2306       return false;
2307   }
2308   parseSquare(/*LambdaIntroducer=*/true);
2309   return true;
2310 }
2311
2312 void UnwrappedLineParser::tryToParseJSFunction() {
2313   assert(FormatTok->is(Keywords.kw_function));
2314   if (FormatTok->is(Keywords.kw_async))
2315     nextToken();
2316   // Consume "function".
2317   nextToken();
2318
2319   // Consume * (generator function). Treat it like C++'s overloaded operators.
2320   if (FormatTok->is(tok::star)) {
2321     FormatTok->setFinalizedType(TT_OverloadedOperator);
2322     nextToken();
2323   }
2324
2325   // Consume function name.
2326   if (FormatTok->is(tok::identifier))
2327     nextToken();
2328
2329   if (FormatTok->isNot(tok::l_paren))
2330     return;
2331
2332   // Parse formal parameter list.
2333   parseParens();
2334
2335   if (FormatTok->is(tok::colon)) {
2336     // Parse a type definition.
2337     nextToken();
2338
2339     // Eat the type declaration. For braced inline object types, balance braces,
2340     // otherwise just parse until finding an l_brace for the function body.
2341     if (FormatTok->is(tok::l_brace))
2342       tryToParseBracedList();
2343     else
2344       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2345         nextToken();
2346   }
2347
2348   if (FormatTok->is(tok::semi))
2349     return;
2350
2351   parseChildBlock();
2352 }
2353
2354 bool UnwrappedLineParser::tryToParseBracedList() {
2355   if (FormatTok->is(BK_Unknown))
2356     calculateBraceTypes();
2357   assert(FormatTok->isNot(BK_Unknown));
2358   if (FormatTok->is(BK_Block))
2359     return false;
2360   nextToken();
2361   parseBracedList();
2362   return true;
2363 }
2364
2365 bool UnwrappedLineParser::tryToParseChildBlock() {
2366   assert(Style.isJavaScript() || Style.isCSharp());
2367   assert(FormatTok->is(TT_FatArrow));
2368   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2369   // They always start an expression or a child block if followed by a curly
2370   // brace.
2371   nextToken();
2372   if (FormatTok->isNot(tok::l_brace))
2373     return false;
2374   parseChildBlock();
2375   return true;
2376 }
2377
2378 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2379                                           bool IsEnum,
2380                                           tok::TokenKind ClosingBraceKind) {
2381   bool HasError = false;
2382
2383   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2384   // replace this by using parseAssignmentExpression() inside.
2385   do {
2386     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2387         tryToParseChildBlock()) {
2388       continue;
2389     }
2390     if (Style.isJavaScript()) {
2391       if (FormatTok->is(Keywords.kw_function)) {
2392         tryToParseJSFunction();
2393         continue;
2394       }
2395       if (FormatTok->is(tok::l_brace)) {
2396         // Could be a method inside of a braced list `{a() { return 1; }}`.
2397         if (tryToParseBracedList())
2398           continue;
2399         parseChildBlock();
2400       }
2401     }
2402     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2403       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2404         addUnwrappedLine();
2405       nextToken();
2406       return !HasError;
2407     }
2408     switch (FormatTok->Tok.getKind()) {
2409     case tok::l_square:
2410       if (Style.isCSharp())
2411         parseSquare();
2412       else
2413         tryToParseLambda();
2414       break;
2415     case tok::l_paren:
2416       parseParens();
2417       // JavaScript can just have free standing methods and getters/setters in
2418       // object literals. Detect them by a "{" following ")".
2419       if (Style.isJavaScript()) {
2420         if (FormatTok->is(tok::l_brace))
2421           parseChildBlock();
2422         break;
2423       }
2424       break;
2425     case tok::l_brace:
2426       // Assume there are no blocks inside a braced init list apart
2427       // from the ones we explicitly parse out (like lambdas).
2428       FormatTok->setBlockKind(BK_BracedInit);
2429       nextToken();
2430       parseBracedList();
2431       break;
2432     case tok::less:
2433       if (Style.Language == FormatStyle::LK_Proto ||
2434           ClosingBraceKind == tok::greater) {
2435         nextToken();
2436         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2437                         /*ClosingBraceKind=*/tok::greater);
2438       } else {
2439         nextToken();
2440       }
2441       break;
2442     case tok::semi:
2443       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2444       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2445       // used for error recovery if we have otherwise determined that this is
2446       // a braced list.
2447       if (Style.isJavaScript()) {
2448         nextToken();
2449         break;
2450       }
2451       HasError = true;
2452       if (!ContinueOnSemicolons)
2453         return !HasError;
2454       nextToken();
2455       break;
2456     case tok::comma:
2457       nextToken();
2458       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2459         addUnwrappedLine();
2460       break;
2461     default:
2462       nextToken();
2463       break;
2464     }
2465   } while (!eof());
2466   return false;
2467 }
2468
2469 /// \brief Parses a pair of parentheses (and everything between them).
2470 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2471 /// double ampersands. This applies for all nested scopes as well.
2472 ///
2473 /// Returns whether there is a `=` token between the parentheses.
2474 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2475   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2476   auto *LeftParen = FormatTok;
2477   bool SeenEqual = false;
2478   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2479   nextToken();
2480   do {
2481     switch (FormatTok->Tok.getKind()) {
2482     case tok::l_paren:
2483       if (parseParens(AmpAmpTokenType))
2484         SeenEqual = true;
2485       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2486         parseChildBlock();
2487       break;
2488     case tok::r_paren:
2489       if (!MightBeStmtExpr &&
2490           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2491         const auto *Prev = LeftParen->Previous;
2492         const auto *Next = Tokens->peekNextToken();
2493         const bool DoubleParens =
2494             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2495         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2496         const bool Blacklisted =
2497             PrevPrev &&
2498             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2499              (SeenEqual &&
2500               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2501                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2502         const bool ReturnParens =
2503             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2504             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2505              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2506             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2507             Next->is(tok::semi);
2508         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2509           LeftParen->Optional = true;
2510           FormatTok->Optional = true;
2511         }
2512       }
2513       nextToken();
2514       return SeenEqual;
2515     case tok::r_brace:
2516       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2517       return SeenEqual;
2518     case tok::l_square:
2519       tryToParseLambda();
2520       break;
2521     case tok::l_brace:
2522       if (!tryToParseBracedList())
2523         parseChildBlock();
2524       break;
2525     case tok::at:
2526       nextToken();
2527       if (FormatTok->is(tok::l_brace)) {
2528         nextToken();
2529         parseBracedList();
2530       }
2531       break;
2532     case tok::equal:
2533       SeenEqual = true;
2534       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2535         tryToParseChildBlock();
2536       else
2537         nextToken();
2538       break;
2539     case tok::kw_class:
2540       if (Style.isJavaScript())
2541         parseRecord(/*ParseAsExpr=*/true);
2542       else
2543         nextToken();
2544       break;
2545     case tok::identifier:
2546       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2547         tryToParseJSFunction();
2548       else
2549         nextToken();
2550       break;
2551     case tok::kw_requires: {
2552       auto RequiresToken = FormatTok;
2553       nextToken();
2554       parseRequiresExpression(RequiresToken);
2555       break;
2556     }
2557     case tok::ampamp:
2558       if (AmpAmpTokenType != TT_Unknown)
2559         FormatTok->setFinalizedType(AmpAmpTokenType);
2560       [[fallthrough]];
2561     default:
2562       nextToken();
2563       break;
2564     }
2565   } while (!eof());
2566   return SeenEqual;
2567 }
2568
2569 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2570   if (!LambdaIntroducer) {
2571     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2572     if (tryToParseLambda())
2573       return;
2574   }
2575   do {
2576     switch (FormatTok->Tok.getKind()) {
2577     case tok::l_paren:
2578       parseParens();
2579       break;
2580     case tok::r_square:
2581       nextToken();
2582       return;
2583     case tok::r_brace:
2584       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2585       return;
2586     case tok::l_square:
2587       parseSquare();
2588       break;
2589     case tok::l_brace: {
2590       if (!tryToParseBracedList())
2591         parseChildBlock();
2592       break;
2593     }
2594     case tok::at:
2595       nextToken();
2596       if (FormatTok->is(tok::l_brace)) {
2597         nextToken();
2598         parseBracedList();
2599       }
2600       break;
2601     default:
2602       nextToken();
2603       break;
2604     }
2605   } while (!eof());
2606 }
2607
2608 void UnwrappedLineParser::keepAncestorBraces() {
2609   if (!Style.RemoveBracesLLVM)
2610     return;
2611
2612   const int MaxNestingLevels = 2;
2613   const int Size = NestedTooDeep.size();
2614   if (Size >= MaxNestingLevels)
2615     NestedTooDeep[Size - MaxNestingLevels] = true;
2616   NestedTooDeep.push_back(false);
2617 }
2618
2619 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2620   for (const auto &Token : llvm::reverse(Line.Tokens))
2621     if (Token.Tok->isNot(tok::comment))
2622       return Token.Tok;
2623
2624   return nullptr;
2625 }
2626
2627 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2628   FormatToken *Tok = nullptr;
2629
2630   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2631       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2632     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2633               ? getLastNonComment(*Line)
2634               : Line->Tokens.back().Tok;
2635     assert(Tok);
2636     if (Tok->BraceCount < 0) {
2637       assert(Tok->BraceCount == -1);
2638       Tok = nullptr;
2639     } else {
2640       Tok->BraceCount = -1;
2641     }
2642   }
2643
2644   addUnwrappedLine();
2645   ++Line->Level;
2646   parseStructuralElement();
2647
2648   if (Tok) {
2649     assert(!Line->InPPDirective);
2650     Tok = nullptr;
2651     for (const auto &L : llvm::reverse(*CurrentLines)) {
2652       if (!L.InPPDirective && getLastNonComment(L)) {
2653         Tok = L.Tokens.back().Tok;
2654         break;
2655       }
2656     }
2657     assert(Tok);
2658     ++Tok->BraceCount;
2659   }
2660
2661   if (CheckEOF && eof())
2662     addUnwrappedLine();
2663
2664   --Line->Level;
2665 }
2666
2667 static void markOptionalBraces(FormatToken *LeftBrace) {
2668   if (!LeftBrace)
2669     return;
2670
2671   assert(LeftBrace->is(tok::l_brace));
2672
2673   FormatToken *RightBrace = LeftBrace->MatchingParen;
2674   if (!RightBrace) {
2675     assert(!LeftBrace->Optional);
2676     return;
2677   }
2678
2679   assert(RightBrace->is(tok::r_brace));
2680   assert(RightBrace->MatchingParen == LeftBrace);
2681   assert(LeftBrace->Optional == RightBrace->Optional);
2682
2683   LeftBrace->Optional = true;
2684   RightBrace->Optional = true;
2685 }
2686
2687 void UnwrappedLineParser::handleAttributes() {
2688   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2689   if (FormatTok->isAttribute())
2690     nextToken();
2691   else if (FormatTok->is(tok::l_square))
2692     handleCppAttributes();
2693 }
2694
2695 bool UnwrappedLineParser::handleCppAttributes() {
2696   // Handle [[likely]] / [[unlikely]] attributes.
2697   assert(FormatTok->is(tok::l_square));
2698   if (!tryToParseSimpleAttribute())
2699     return false;
2700   parseSquare();
2701   return true;
2702 }
2703
2704 /// Returns whether \c Tok begins a block.
2705 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2706   // FIXME: rename the function or make
2707   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2708   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2709                            : Tok.is(tok::l_brace);
2710 }
2711
2712 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2713                                                   bool KeepBraces,
2714                                                   bool IsVerilogAssert) {
2715   assert((FormatTok->is(tok::kw_if) ||
2716           (Style.isVerilog() &&
2717            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2718                               Keywords.kw_assume, Keywords.kw_cover))) &&
2719          "'if' expected");
2720   nextToken();
2721
2722   if (IsVerilogAssert) {
2723     // Handle `assert #0` and `assert final`.
2724     if (FormatTok->is(Keywords.kw_verilogHash)) {
2725       nextToken();
2726       if (FormatTok->is(tok::numeric_constant))
2727         nextToken();
2728     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2729                                   Keywords.kw_sequence)) {
2730       nextToken();
2731     }
2732   }
2733
2734   // Handle `if !consteval`.
2735   if (FormatTok->is(tok::exclaim))
2736     nextToken();
2737
2738   bool KeepIfBraces = true;
2739   if (FormatTok->is(tok::kw_consteval)) {
2740     nextToken();
2741   } else {
2742     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2743     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2744       nextToken();
2745     if (FormatTok->is(tok::l_paren)) {
2746       FormatTok->setFinalizedType(TT_ConditionLParen);
2747       parseParens();
2748     }
2749   }
2750   handleAttributes();
2751   // The then action is optional in Verilog assert statements.
2752   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2753     nextToken();
2754     addUnwrappedLine();
2755     return nullptr;
2756   }
2757
2758   bool NeedsUnwrappedLine = false;
2759   keepAncestorBraces();
2760
2761   FormatToken *IfLeftBrace = nullptr;
2762   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2763
2764   if (isBlockBegin(*FormatTok)) {
2765     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2766     IfLeftBrace = FormatTok;
2767     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2768     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2769                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2770     setPreviousRBraceType(TT_ControlStatementRBrace);
2771     if (Style.BraceWrapping.BeforeElse)
2772       addUnwrappedLine();
2773     else
2774       NeedsUnwrappedLine = true;
2775   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2776     addUnwrappedLine();
2777   } else {
2778     parseUnbracedBody();
2779   }
2780
2781   if (Style.RemoveBracesLLVM) {
2782     assert(!NestedTooDeep.empty());
2783     KeepIfBraces = KeepIfBraces ||
2784                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2785                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2786                    IfBlockKind == IfStmtKind::IfElseIf;
2787   }
2788
2789   bool KeepElseBraces = KeepIfBraces;
2790   FormatToken *ElseLeftBrace = nullptr;
2791   IfStmtKind Kind = IfStmtKind::IfOnly;
2792
2793   if (FormatTok->is(tok::kw_else)) {
2794     if (Style.RemoveBracesLLVM) {
2795       NestedTooDeep.back() = false;
2796       Kind = IfStmtKind::IfElse;
2797     }
2798     nextToken();
2799     handleAttributes();
2800     if (isBlockBegin(*FormatTok)) {
2801       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2802       FormatTok->setFinalizedType(TT_ElseLBrace);
2803       ElseLeftBrace = FormatTok;
2804       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2805       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2806       FormatToken *IfLBrace =
2807           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2808                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2809       setPreviousRBraceType(TT_ElseRBrace);
2810       if (FormatTok->is(tok::kw_else)) {
2811         KeepElseBraces = KeepElseBraces ||
2812                          ElseBlockKind == IfStmtKind::IfOnly ||
2813                          ElseBlockKind == IfStmtKind::IfElseIf;
2814       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2815         KeepElseBraces = true;
2816         assert(ElseLeftBrace->MatchingParen);
2817         markOptionalBraces(ElseLeftBrace);
2818       }
2819       addUnwrappedLine();
2820     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2821       const FormatToken *Previous = Tokens->getPreviousToken();
2822       assert(Previous);
2823       const bool IsPrecededByComment = Previous->is(tok::comment);
2824       if (IsPrecededByComment) {
2825         addUnwrappedLine();
2826         ++Line->Level;
2827       }
2828       bool TooDeep = true;
2829       if (Style.RemoveBracesLLVM) {
2830         Kind = IfStmtKind::IfElseIf;
2831         TooDeep = NestedTooDeep.pop_back_val();
2832       }
2833       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2834       if (Style.RemoveBracesLLVM)
2835         NestedTooDeep.push_back(TooDeep);
2836       if (IsPrecededByComment)
2837         --Line->Level;
2838     } else {
2839       parseUnbracedBody(/*CheckEOF=*/true);
2840     }
2841   } else {
2842     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2843     if (NeedsUnwrappedLine)
2844       addUnwrappedLine();
2845   }
2846
2847   if (!Style.RemoveBracesLLVM)
2848     return nullptr;
2849
2850   assert(!NestedTooDeep.empty());
2851   KeepElseBraces = KeepElseBraces ||
2852                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2853                    NestedTooDeep.back();
2854
2855   NestedTooDeep.pop_back();
2856
2857   if (!KeepIfBraces && !KeepElseBraces) {
2858     markOptionalBraces(IfLeftBrace);
2859     markOptionalBraces(ElseLeftBrace);
2860   } else if (IfLeftBrace) {
2861     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2862     if (IfRightBrace) {
2863       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2864       assert(!IfLeftBrace->Optional);
2865       assert(!IfRightBrace->Optional);
2866       IfLeftBrace->MatchingParen = nullptr;
2867       IfRightBrace->MatchingParen = nullptr;
2868     }
2869   }
2870
2871   if (IfKind)
2872     *IfKind = Kind;
2873
2874   return IfLeftBrace;
2875 }
2876
2877 void UnwrappedLineParser::parseTryCatch() {
2878   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2879   nextToken();
2880   bool NeedsUnwrappedLine = false;
2881   if (FormatTok->is(tok::colon)) {
2882     // We are in a function try block, what comes is an initializer list.
2883     nextToken();
2884
2885     // In case identifiers were removed by clang-tidy, what might follow is
2886     // multiple commas in sequence - before the first identifier.
2887     while (FormatTok->is(tok::comma))
2888       nextToken();
2889
2890     while (FormatTok->is(tok::identifier)) {
2891       nextToken();
2892       if (FormatTok->is(tok::l_paren))
2893         parseParens();
2894       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2895           FormatTok->is(tok::l_brace)) {
2896         do {
2897           nextToken();
2898         } while (FormatTok->isNot(tok::r_brace));
2899         nextToken();
2900       }
2901
2902       // In case identifiers were removed by clang-tidy, what might follow is
2903       // multiple commas in sequence - after the first identifier.
2904       while (FormatTok->is(tok::comma))
2905         nextToken();
2906     }
2907   }
2908   // Parse try with resource.
2909   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2910     parseParens();
2911
2912   keepAncestorBraces();
2913
2914   if (FormatTok->is(tok::l_brace)) {
2915     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2916     parseBlock();
2917     if (Style.BraceWrapping.BeforeCatch)
2918       addUnwrappedLine();
2919     else
2920       NeedsUnwrappedLine = true;
2921   } else if (FormatTok->isNot(tok::kw_catch)) {
2922     // The C++ standard requires a compound-statement after a try.
2923     // If there's none, we try to assume there's a structuralElement
2924     // and try to continue.
2925     addUnwrappedLine();
2926     ++Line->Level;
2927     parseStructuralElement();
2928     --Line->Level;
2929   }
2930   while (true) {
2931     if (FormatTok->is(tok::at))
2932       nextToken();
2933     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2934                              tok::kw___finally) ||
2935           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2936            FormatTok->is(Keywords.kw_finally)) ||
2937           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2938            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2939       break;
2940     }
2941     nextToken();
2942     while (FormatTok->isNot(tok::l_brace)) {
2943       if (FormatTok->is(tok::l_paren)) {
2944         parseParens();
2945         continue;
2946       }
2947       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2948         if (Style.RemoveBracesLLVM)
2949           NestedTooDeep.pop_back();
2950         return;
2951       }
2952       nextToken();
2953     }
2954     NeedsUnwrappedLine = false;
2955     Line->MustBeDeclaration = false;
2956     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2957     parseBlock();
2958     if (Style.BraceWrapping.BeforeCatch)
2959       addUnwrappedLine();
2960     else
2961       NeedsUnwrappedLine = true;
2962   }
2963
2964   if (Style.RemoveBracesLLVM)
2965     NestedTooDeep.pop_back();
2966
2967   if (NeedsUnwrappedLine)
2968     addUnwrappedLine();
2969 }
2970
2971 void UnwrappedLineParser::parseNamespace() {
2972   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2973          "'namespace' expected");
2974
2975   const FormatToken &InitialToken = *FormatTok;
2976   nextToken();
2977   if (InitialToken.is(TT_NamespaceMacro)) {
2978     parseParens();
2979   } else {
2980     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2981                               tok::l_square, tok::period, tok::l_paren) ||
2982            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2983       if (FormatTok->is(tok::l_square))
2984         parseSquare();
2985       else if (FormatTok->is(tok::l_paren))
2986         parseParens();
2987       else
2988         nextToken();
2989     }
2990   }
2991   if (FormatTok->is(tok::l_brace)) {
2992     FormatTok->setFinalizedType(TT_NamespaceLBrace);
2993
2994     if (ShouldBreakBeforeBrace(Style, InitialToken))
2995       addUnwrappedLine();
2996
2997     unsigned AddLevels =
2998         Style.NamespaceIndentation == FormatStyle::NI_All ||
2999                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3000                  DeclarationScopeStack.size() > 1)
3001             ? 1u
3002             : 0u;
3003     bool ManageWhitesmithsBraces =
3004         AddLevels == 0u &&
3005         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3006
3007     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3008     // the whole block.
3009     if (ManageWhitesmithsBraces)
3010       ++Line->Level;
3011
3012     // Munch the semicolon after a namespace. This is more common than one would
3013     // think. Putting the semicolon into its own line is very ugly.
3014     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3015                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3016                ManageWhitesmithsBraces);
3017
3018     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3019
3020     if (ManageWhitesmithsBraces)
3021       --Line->Level;
3022   }
3023   // FIXME: Add error handling.
3024 }
3025
3026 void UnwrappedLineParser::parseNew() {
3027   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3028   nextToken();
3029
3030   if (Style.isCSharp()) {
3031     do {
3032       // Handle constructor invocation, e.g. `new(field: value)`.
3033       if (FormatTok->is(tok::l_paren))
3034         parseParens();
3035
3036       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3037       if (FormatTok->is(tok::l_brace))
3038         parseBracedList();
3039
3040       if (FormatTok->isOneOf(tok::semi, tok::comma))
3041         return;
3042
3043       nextToken();
3044     } while (!eof());
3045   }
3046
3047   if (Style.Language != FormatStyle::LK_Java)
3048     return;
3049
3050   // In Java, we can parse everything up to the parens, which aren't optional.
3051   do {
3052     // There should not be a ;, { or } before the new's open paren.
3053     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3054       return;
3055
3056     // Consume the parens.
3057     if (FormatTok->is(tok::l_paren)) {
3058       parseParens();
3059
3060       // If there is a class body of an anonymous class, consume that as child.
3061       if (FormatTok->is(tok::l_brace))
3062         parseChildBlock();
3063       return;
3064     }
3065     nextToken();
3066   } while (!eof());
3067 }
3068
3069 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3070   keepAncestorBraces();
3071
3072   if (isBlockBegin(*FormatTok)) {
3073     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3074     FormatToken *LeftBrace = FormatTok;
3075     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3076     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3077                /*MunchSemi=*/true, KeepBraces);
3078     setPreviousRBraceType(TT_ControlStatementRBrace);
3079     if (!KeepBraces) {
3080       assert(!NestedTooDeep.empty());
3081       if (!NestedTooDeep.back())
3082         markOptionalBraces(LeftBrace);
3083     }
3084     if (WrapRightBrace)
3085       addUnwrappedLine();
3086   } else {
3087     parseUnbracedBody();
3088   }
3089
3090   if (!KeepBraces)
3091     NestedTooDeep.pop_back();
3092 }
3093
3094 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3095   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3096           (Style.isVerilog() &&
3097            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3098                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3099                               Keywords.kw_final, Keywords.kw_initial,
3100                               Keywords.kw_foreach, Keywords.kw_forever,
3101                               Keywords.kw_repeat))) &&
3102          "'for', 'while' or foreach macro expected");
3103   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3104                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3105
3106   nextToken();
3107   // JS' for await ( ...
3108   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3109     nextToken();
3110   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3111     nextToken();
3112   if (HasParens && FormatTok->is(tok::l_paren)) {
3113     // The type is only set for Verilog basically because we were afraid to
3114     // change the existing behavior for loops. See the discussion on D121756 for
3115     // details.
3116     if (Style.isVerilog())
3117       FormatTok->setFinalizedType(TT_ConditionLParen);
3118     parseParens();
3119   }
3120   // Event control.
3121   if (Style.isVerilog())
3122     parseVerilogSensitivityList();
3123
3124   handleAttributes();
3125   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3126 }
3127
3128 void UnwrappedLineParser::parseDoWhile() {
3129   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3130   nextToken();
3131
3132   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3133
3134   // FIXME: Add error handling.
3135   if (FormatTok->isNot(tok::kw_while)) {
3136     addUnwrappedLine();
3137     return;
3138   }
3139
3140   FormatTok->setFinalizedType(TT_DoWhile);
3141
3142   // If in Whitesmiths mode, the line with the while() needs to be indented
3143   // to the same level as the block.
3144   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3145     ++Line->Level;
3146
3147   nextToken();
3148   parseStructuralElement();
3149 }
3150
3151 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3152   nextToken();
3153   unsigned OldLineLevel = Line->Level;
3154   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3155     --Line->Level;
3156   if (LeftAlignLabel)
3157     Line->Level = 0;
3158
3159   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3160       FormatTok->is(tok::l_brace)) {
3161
3162     CompoundStatementIndenter Indenter(this, Line->Level,
3163                                        Style.BraceWrapping.AfterCaseLabel,
3164                                        Style.BraceWrapping.IndentBraces);
3165     parseBlock();
3166     if (FormatTok->is(tok::kw_break)) {
3167       if (Style.BraceWrapping.AfterControlStatement ==
3168           FormatStyle::BWACS_Always) {
3169         addUnwrappedLine();
3170         if (!Style.IndentCaseBlocks &&
3171             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3172           ++Line->Level;
3173         }
3174       }
3175       parseStructuralElement();
3176     }
3177     addUnwrappedLine();
3178   } else {
3179     if (FormatTok->is(tok::semi))
3180       nextToken();
3181     addUnwrappedLine();
3182   }
3183   Line->Level = OldLineLevel;
3184   if (FormatTok->isNot(tok::l_brace)) {
3185     parseStructuralElement();
3186     addUnwrappedLine();
3187   }
3188 }
3189
3190 void UnwrappedLineParser::parseCaseLabel() {
3191   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3192
3193   // FIXME: fix handling of complex expressions here.
3194   do {
3195     nextToken();
3196     if (FormatTok->is(tok::colon)) {
3197       FormatTok->setFinalizedType(TT_CaseLabelColon);
3198       break;
3199     }
3200   } while (!eof());
3201   parseLabel();
3202 }
3203
3204 void UnwrappedLineParser::parseSwitch() {
3205   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3206   nextToken();
3207   if (FormatTok->is(tok::l_paren))
3208     parseParens();
3209
3210   keepAncestorBraces();
3211
3212   if (FormatTok->is(tok::l_brace)) {
3213     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3214     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3215     parseBlock();
3216     setPreviousRBraceType(TT_ControlStatementRBrace);
3217     addUnwrappedLine();
3218   } else {
3219     addUnwrappedLine();
3220     ++Line->Level;
3221     parseStructuralElement();
3222     --Line->Level;
3223   }
3224
3225   if (Style.RemoveBracesLLVM)
3226     NestedTooDeep.pop_back();
3227 }
3228
3229 // Operators that can follow a C variable.
3230 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3231   switch (kind) {
3232   case tok::ampamp:
3233   case tok::ampequal:
3234   case tok::arrow:
3235   case tok::caret:
3236   case tok::caretequal:
3237   case tok::comma:
3238   case tok::ellipsis:
3239   case tok::equal:
3240   case tok::equalequal:
3241   case tok::exclaim:
3242   case tok::exclaimequal:
3243   case tok::greater:
3244   case tok::greaterequal:
3245   case tok::greatergreater:
3246   case tok::greatergreaterequal:
3247   case tok::l_paren:
3248   case tok::l_square:
3249   case tok::less:
3250   case tok::lessequal:
3251   case tok::lessless:
3252   case tok::lesslessequal:
3253   case tok::minus:
3254   case tok::minusequal:
3255   case tok::minusminus:
3256   case tok::percent:
3257   case tok::percentequal:
3258   case tok::period:
3259   case tok::pipe:
3260   case tok::pipeequal:
3261   case tok::pipepipe:
3262   case tok::plus:
3263   case tok::plusequal:
3264   case tok::plusplus:
3265   case tok::question:
3266   case tok::r_brace:
3267   case tok::r_paren:
3268   case tok::r_square:
3269   case tok::semi:
3270   case tok::slash:
3271   case tok::slashequal:
3272   case tok::star:
3273   case tok::starequal:
3274     return true;
3275   default:
3276     return false;
3277   }
3278 }
3279
3280 void UnwrappedLineParser::parseAccessSpecifier() {
3281   FormatToken *AccessSpecifierCandidate = FormatTok;
3282   nextToken();
3283   // Understand Qt's slots.
3284   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3285     nextToken();
3286   // Otherwise, we don't know what it is, and we'd better keep the next token.
3287   if (FormatTok->is(tok::colon)) {
3288     nextToken();
3289     addUnwrappedLine();
3290   } else if (FormatTok->isNot(tok::coloncolon) &&
3291              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3292     // Not a variable name nor namespace name.
3293     addUnwrappedLine();
3294   } else if (AccessSpecifierCandidate) {
3295     // Consider the access specifier to be a C identifier.
3296     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3297   }
3298 }
3299
3300 /// \brief Parses a requires, decides if it is a clause or an expression.
3301 /// \pre The current token has to be the requires keyword.
3302 /// \returns true if it parsed a clause.
3303 bool clang::format::UnwrappedLineParser::parseRequires() {
3304   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3305   auto RequiresToken = FormatTok;
3306
3307   // We try to guess if it is a requires clause, or a requires expression. For
3308   // that we first consume the keyword and check the next token.
3309   nextToken();
3310
3311   switch (FormatTok->Tok.getKind()) {
3312   case tok::l_brace:
3313     // This can only be an expression, never a clause.
3314     parseRequiresExpression(RequiresToken);
3315     return false;
3316   case tok::l_paren:
3317     // Clauses and expression can start with a paren, it's unclear what we have.
3318     break;
3319   default:
3320     // All other tokens can only be a clause.
3321     parseRequiresClause(RequiresToken);
3322     return true;
3323   }
3324
3325   // Looking forward we would have to decide if there are function declaration
3326   // like arguments to the requires expression:
3327   // requires (T t) {
3328   // Or there is a constraint expression for the requires clause:
3329   // requires (C<T> && ...
3330
3331   // But first let's look behind.
3332   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3333
3334   if (!PreviousNonComment ||
3335       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3336     // If there is no token, or an expression left brace, we are a requires
3337     // clause within a requires expression.
3338     parseRequiresClause(RequiresToken);
3339     return true;
3340   }
3341
3342   switch (PreviousNonComment->Tok.getKind()) {
3343   case tok::greater:
3344   case tok::r_paren:
3345   case tok::kw_noexcept:
3346   case tok::kw_const:
3347     // This is a requires clause.
3348     parseRequiresClause(RequiresToken);
3349     return true;
3350   case tok::amp:
3351   case tok::ampamp: {
3352     // This can be either:
3353     // if (... && requires (T t) ...)
3354     // Or
3355     // void member(...) && requires (C<T> ...
3356     // We check the one token before that for a const:
3357     // void member(...) const && requires (C<T> ...
3358     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3359     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3360       parseRequiresClause(RequiresToken);
3361       return true;
3362     }
3363     break;
3364   }
3365   default:
3366     if (PreviousNonComment->isTypeOrIdentifier()) {
3367       // This is a requires clause.
3368       parseRequiresClause(RequiresToken);
3369       return true;
3370     }
3371     // It's an expression.
3372     parseRequiresExpression(RequiresToken);
3373     return false;
3374   }
3375
3376   // Now we look forward and try to check if the paren content is a parameter
3377   // list. The parameters can be cv-qualified and contain references or
3378   // pointers.
3379   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3380   // of stuff: typename, const, *, &, &&, ::, identifiers.
3381
3382   unsigned StoredPosition = Tokens->getPosition();
3383   FormatToken *NextToken = Tokens->getNextToken();
3384   int Lookahead = 0;
3385   auto PeekNext = [&Lookahead, &NextToken, this] {
3386     ++Lookahead;
3387     NextToken = Tokens->getNextToken();
3388   };
3389
3390   bool FoundType = false;
3391   bool LastWasColonColon = false;
3392   int OpenAngles = 0;
3393
3394   for (; Lookahead < 50; PeekNext()) {
3395     switch (NextToken->Tok.getKind()) {
3396     case tok::kw_volatile:
3397     case tok::kw_const:
3398     case tok::comma:
3399       if (OpenAngles == 0) {
3400         FormatTok = Tokens->setPosition(StoredPosition);
3401         parseRequiresExpression(RequiresToken);
3402         return false;
3403       }
3404       break;
3405     case tok::r_paren:
3406     case tok::pipepipe:
3407       FormatTok = Tokens->setPosition(StoredPosition);
3408       parseRequiresClause(RequiresToken);
3409       return true;
3410     case tok::eof:
3411       // Break out of the loop.
3412       Lookahead = 50;
3413       break;
3414     case tok::coloncolon:
3415       LastWasColonColon = true;
3416       break;
3417     case tok::identifier:
3418       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3419         FormatTok = Tokens->setPosition(StoredPosition);
3420         parseRequiresExpression(RequiresToken);
3421         return false;
3422       }
3423       FoundType = true;
3424       LastWasColonColon = false;
3425       break;
3426     case tok::less:
3427       ++OpenAngles;
3428       break;
3429     case tok::greater:
3430       --OpenAngles;
3431       break;
3432     default:
3433       if (NextToken->isSimpleTypeSpecifier()) {
3434         FormatTok = Tokens->setPosition(StoredPosition);
3435         parseRequiresExpression(RequiresToken);
3436         return false;
3437       }
3438       break;
3439     }
3440   }
3441   // This seems to be a complicated expression, just assume it's a clause.
3442   FormatTok = Tokens->setPosition(StoredPosition);
3443   parseRequiresClause(RequiresToken);
3444   return true;
3445 }
3446
3447 /// \brief Parses a requires clause.
3448 /// \param RequiresToken The requires keyword token, which starts this clause.
3449 /// \pre We need to be on the next token after the requires keyword.
3450 /// \sa parseRequiresExpression
3451 ///
3452 /// Returns if it either has finished parsing the clause, or it detects, that
3453 /// the clause is incorrect.
3454 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3455   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3456   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3457
3458   // If there is no previous token, we are within a requires expression,
3459   // otherwise we will always have the template or function declaration in front
3460   // of it.
3461   bool InRequiresExpression =
3462       !RequiresToken->Previous ||
3463       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3464
3465   RequiresToken->setFinalizedType(InRequiresExpression
3466                                       ? TT_RequiresClauseInARequiresExpression
3467                                       : TT_RequiresClause);
3468
3469   // NOTE: parseConstraintExpression is only ever called from this function.
3470   // It could be inlined into here.
3471   parseConstraintExpression();
3472
3473   if (!InRequiresExpression)
3474     FormatTok->Previous->ClosesRequiresClause = true;
3475 }
3476
3477 /// \brief Parses a requires expression.
3478 /// \param RequiresToken The requires keyword token, which starts this clause.
3479 /// \pre We need to be on the next token after the requires keyword.
3480 /// \sa parseRequiresClause
3481 ///
3482 /// Returns if it either has finished parsing the expression, or it detects,
3483 /// that the expression is incorrect.
3484 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3485   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3486   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3487
3488   RequiresToken->setFinalizedType(TT_RequiresExpression);
3489
3490   if (FormatTok->is(tok::l_paren)) {
3491     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3492     parseParens();
3493   }
3494
3495   if (FormatTok->is(tok::l_brace)) {
3496     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3497     parseChildBlock();
3498   }
3499 }
3500
3501 /// \brief Parses a constraint expression.
3502 ///
3503 /// This is the body of a requires clause. It returns, when the parsing is
3504 /// complete, or the expression is incorrect.
3505 void UnwrappedLineParser::parseConstraintExpression() {
3506   // The special handling for lambdas is needed since tryToParseLambda() eats a
3507   // token and if a requires expression is the last part of a requires clause
3508   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3509   // not set on the correct token. Thus we need to be aware if we even expect a
3510   // lambda to be possible.
3511   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3512   bool LambdaNextTimeAllowed = true;
3513
3514   // Within lambda declarations, it is permitted to put a requires clause after
3515   // its template parameter list, which would place the requires clause right
3516   // before the parentheses of the parameters of the lambda declaration. Thus,
3517   // we track if we expect to see grouping parentheses at all.
3518   // Without this check, `requires foo<T> (T t)` in the below example would be
3519   // seen as the whole requires clause, accidentally eating the parameters of
3520   // the lambda.
3521   // [&]<typename T> requires foo<T> (T t) { ... };
3522   bool TopLevelParensAllowed = true;
3523
3524   do {
3525     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3526
3527     switch (FormatTok->Tok.getKind()) {
3528     case tok::kw_requires: {
3529       auto RequiresToken = FormatTok;
3530       nextToken();
3531       parseRequiresExpression(RequiresToken);
3532       break;
3533     }
3534
3535     case tok::l_paren:
3536       if (!TopLevelParensAllowed)
3537         return;
3538       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3539       TopLevelParensAllowed = false;
3540       break;
3541
3542     case tok::l_square:
3543       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3544         return;
3545       break;
3546
3547     case tok::kw_const:
3548     case tok::semi:
3549     case tok::kw_class:
3550     case tok::kw_struct:
3551     case tok::kw_union:
3552       return;
3553
3554     case tok::l_brace:
3555       // Potential function body.
3556       return;
3557
3558     case tok::ampamp:
3559     case tok::pipepipe:
3560       FormatTok->setFinalizedType(TT_BinaryOperator);
3561       nextToken();
3562       LambdaNextTimeAllowed = true;
3563       TopLevelParensAllowed = true;
3564       break;
3565
3566     case tok::comma:
3567     case tok::comment:
3568       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3569       nextToken();
3570       break;
3571
3572     case tok::kw_sizeof:
3573     case tok::greater:
3574     case tok::greaterequal:
3575     case tok::greatergreater:
3576     case tok::less:
3577     case tok::lessequal:
3578     case tok::lessless:
3579     case tok::equalequal:
3580     case tok::exclaim:
3581     case tok::exclaimequal:
3582     case tok::plus:
3583     case tok::minus:
3584     case tok::star:
3585     case tok::slash:
3586       LambdaNextTimeAllowed = true;
3587       TopLevelParensAllowed = true;
3588       // Just eat them.
3589       nextToken();
3590       break;
3591
3592     case tok::numeric_constant:
3593     case tok::coloncolon:
3594     case tok::kw_true:
3595     case tok::kw_false:
3596       TopLevelParensAllowed = false;
3597       // Just eat them.
3598       nextToken();
3599       break;
3600
3601     case tok::kw_static_cast:
3602     case tok::kw_const_cast:
3603     case tok::kw_reinterpret_cast:
3604     case tok::kw_dynamic_cast:
3605       nextToken();
3606       if (FormatTok->isNot(tok::less))
3607         return;
3608
3609       nextToken();
3610       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3611                       /*ClosingBraceKind=*/tok::greater);
3612       break;
3613
3614     default:
3615       if (!FormatTok->Tok.getIdentifierInfo()) {
3616         // Identifiers are part of the default case, we check for more then
3617         // tok::identifier to handle builtin type traits.
3618         return;
3619       }
3620
3621       // We need to differentiate identifiers for a template deduction guide,
3622       // variables, or function return types (the constraint expression has
3623       // ended before that), and basically all other cases. But it's easier to
3624       // check the other way around.
3625       assert(FormatTok->Previous);
3626       switch (FormatTok->Previous->Tok.getKind()) {
3627       case tok::coloncolon:  // Nested identifier.
3628       case tok::ampamp:      // Start of a function or variable for the
3629       case tok::pipepipe:    // constraint expression. (binary)
3630       case tok::exclaim:     // The same as above, but unary.
3631       case tok::kw_requires: // Initial identifier of a requires clause.
3632       case tok::equal:       // Initial identifier of a concept declaration.
3633         break;
3634       default:
3635         return;
3636       }
3637
3638       // Read identifier with optional template declaration.
3639       nextToken();
3640       if (FormatTok->is(tok::less)) {
3641         nextToken();
3642         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3643                         /*ClosingBraceKind=*/tok::greater);
3644       }
3645       TopLevelParensAllowed = false;
3646       break;
3647     }
3648   } while (!eof());
3649 }
3650
3651 bool UnwrappedLineParser::parseEnum() {
3652   const FormatToken &InitialToken = *FormatTok;
3653
3654   // Won't be 'enum' for NS_ENUMs.
3655   if (FormatTok->is(tok::kw_enum))
3656     nextToken();
3657
3658   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3659   // declarations. An "enum" keyword followed by a colon would be a syntax
3660   // error and thus assume it is just an identifier.
3661   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3662     return false;
3663
3664   // In protobuf, "enum" can be used as a field name.
3665   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3666     return false;
3667
3668   // Eat up enum class ...
3669   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3670     nextToken();
3671
3672   while (FormatTok->Tok.getIdentifierInfo() ||
3673          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3674                             tok::greater, tok::comma, tok::question,
3675                             tok::l_square, tok::r_square)) {
3676     if (Style.isVerilog()) {
3677       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3678       nextToken();
3679       // In Verilog the base type can have dimensions.
3680       while (FormatTok->is(tok::l_square))
3681         parseSquare();
3682     } else {
3683       nextToken();
3684     }
3685     // We can have macros or attributes in between 'enum' and the enum name.
3686     if (FormatTok->is(tok::l_paren))
3687       parseParens();
3688     assert(FormatTok->isNot(TT_AttributeSquare));
3689     if (FormatTok->is(tok::identifier)) {
3690       nextToken();
3691       // If there are two identifiers in a row, this is likely an elaborate
3692       // return type. In Java, this can be "implements", etc.
3693       if (Style.isCpp() && FormatTok->is(tok::identifier))
3694         return false;
3695     }
3696   }
3697
3698   // Just a declaration or something is wrong.
3699   if (FormatTok->isNot(tok::l_brace))
3700     return true;
3701   FormatTok->setFinalizedType(TT_EnumLBrace);
3702   FormatTok->setBlockKind(BK_Block);
3703
3704   if (Style.Language == FormatStyle::LK_Java) {
3705     // Java enums are different.
3706     parseJavaEnumBody();
3707     return true;
3708   }
3709   if (Style.Language == FormatStyle::LK_Proto) {
3710     parseBlock(/*MustBeDeclaration=*/true);
3711     return true;
3712   }
3713
3714   if (!Style.AllowShortEnumsOnASingleLine &&
3715       ShouldBreakBeforeBrace(Style, InitialToken)) {
3716     addUnwrappedLine();
3717   }
3718   // Parse enum body.
3719   nextToken();
3720   if (!Style.AllowShortEnumsOnASingleLine) {
3721     addUnwrappedLine();
3722     Line->Level += 1;
3723   }
3724   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3725                                    /*IsEnum=*/true);
3726   if (!Style.AllowShortEnumsOnASingleLine)
3727     Line->Level -= 1;
3728   if (HasError) {
3729     if (FormatTok->is(tok::semi))
3730       nextToken();
3731     addUnwrappedLine();
3732   }
3733   setPreviousRBraceType(TT_EnumRBrace);
3734   return true;
3735
3736   // There is no addUnwrappedLine() here so that we fall through to parsing a
3737   // structural element afterwards. Thus, in "enum A {} n, m;",
3738   // "} n, m;" will end up in one unwrapped line.
3739 }
3740
3741 bool UnwrappedLineParser::parseStructLike() {
3742   // parseRecord falls through and does not yet add an unwrapped line as a
3743   // record declaration or definition can start a structural element.
3744   parseRecord();
3745   // This does not apply to Java, JavaScript and C#.
3746   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3747       Style.isCSharp()) {
3748     if (FormatTok->is(tok::semi))
3749       nextToken();
3750     addUnwrappedLine();
3751     return true;
3752   }
3753   return false;
3754 }
3755
3756 namespace {
3757 // A class used to set and restore the Token position when peeking
3758 // ahead in the token source.
3759 class ScopedTokenPosition {
3760   unsigned StoredPosition;
3761   FormatTokenSource *Tokens;
3762
3763 public:
3764   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3765     assert(Tokens && "Tokens expected to not be null");
3766     StoredPosition = Tokens->getPosition();
3767   }
3768
3769   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3770 };
3771 } // namespace
3772
3773 // Look to see if we have [[ by looking ahead, if
3774 // its not then rewind to the original position.
3775 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3776   ScopedTokenPosition AutoPosition(Tokens);
3777   FormatToken *Tok = Tokens->getNextToken();
3778   // We already read the first [ check for the second.
3779   if (Tok->isNot(tok::l_square))
3780     return false;
3781   // Double check that the attribute is just something
3782   // fairly simple.
3783   while (Tok->isNot(tok::eof)) {
3784     if (Tok->is(tok::r_square))
3785       break;
3786     Tok = Tokens->getNextToken();
3787   }
3788   if (Tok->is(tok::eof))
3789     return false;
3790   Tok = Tokens->getNextToken();
3791   if (Tok->isNot(tok::r_square))
3792     return false;
3793   Tok = Tokens->getNextToken();
3794   if (Tok->is(tok::semi))
3795     return false;
3796   return true;
3797 }
3798
3799 void UnwrappedLineParser::parseJavaEnumBody() {
3800   assert(FormatTok->is(tok::l_brace));
3801   const FormatToken *OpeningBrace = FormatTok;
3802
3803   // Determine whether the enum is simple, i.e. does not have a semicolon or
3804   // constants with class bodies. Simple enums can be formatted like braced
3805   // lists, contracted to a single line, etc.
3806   unsigned StoredPosition = Tokens->getPosition();
3807   bool IsSimple = true;
3808   FormatToken *Tok = Tokens->getNextToken();
3809   while (Tok->isNot(tok::eof)) {
3810     if (Tok->is(tok::r_brace))
3811       break;
3812     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3813       IsSimple = false;
3814       break;
3815     }
3816     // FIXME: This will also mark enums with braces in the arguments to enum
3817     // constants as "not simple". This is probably fine in practice, though.
3818     Tok = Tokens->getNextToken();
3819   }
3820   FormatTok = Tokens->setPosition(StoredPosition);
3821
3822   if (IsSimple) {
3823     nextToken();
3824     parseBracedList();
3825     addUnwrappedLine();
3826     return;
3827   }
3828
3829   // Parse the body of a more complex enum.
3830   // First add a line for everything up to the "{".
3831   nextToken();
3832   addUnwrappedLine();
3833   ++Line->Level;
3834
3835   // Parse the enum constants.
3836   while (!eof()) {
3837     if (FormatTok->is(tok::l_brace)) {
3838       // Parse the constant's class body.
3839       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3840                  /*MunchSemi=*/false);
3841     } else if (FormatTok->is(tok::l_paren)) {
3842       parseParens();
3843     } else if (FormatTok->is(tok::comma)) {
3844       nextToken();
3845       addUnwrappedLine();
3846     } else if (FormatTok->is(tok::semi)) {
3847       nextToken();
3848       addUnwrappedLine();
3849       break;
3850     } else if (FormatTok->is(tok::r_brace)) {
3851       addUnwrappedLine();
3852       break;
3853     } else {
3854       nextToken();
3855     }
3856   }
3857
3858   // Parse the class body after the enum's ";" if any.
3859   parseLevel(OpeningBrace);
3860   nextToken();
3861   --Line->Level;
3862   addUnwrappedLine();
3863 }
3864
3865 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3866   const FormatToken &InitialToken = *FormatTok;
3867   nextToken();
3868
3869   // The actual identifier can be a nested name specifier, and in macros
3870   // it is often token-pasted.
3871   // An [[attribute]] can be before the identifier.
3872   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3873                             tok::kw_alignas, tok::l_square) ||
3874          FormatTok->isAttribute() ||
3875          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3876           FormatTok->isOneOf(tok::period, tok::comma))) {
3877     if (Style.isJavaScript() &&
3878         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3879       // JavaScript/TypeScript supports inline object types in
3880       // extends/implements positions:
3881       //     class Foo implements {bar: number} { }
3882       nextToken();
3883       if (FormatTok->is(tok::l_brace)) {
3884         tryToParseBracedList();
3885         continue;
3886       }
3887     }
3888     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3889       continue;
3890     bool IsNonMacroIdentifier =
3891         FormatTok->is(tok::identifier) &&
3892         FormatTok->TokenText != FormatTok->TokenText.upper();
3893     nextToken();
3894     // We can have macros in between 'class' and the class name.
3895     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3896       parseParens();
3897   }
3898
3899   // Note that parsing away template declarations here leads to incorrectly
3900   // accepting function declarations as record declarations.
3901   // In general, we cannot solve this problem. Consider:
3902   // class A<int> B() {}
3903   // which can be a function definition or a class definition when B() is a
3904   // macro. If we find enough real-world cases where this is a problem, we
3905   // can parse for the 'template' keyword in the beginning of the statement,
3906   // and thus rule out the record production in case there is no template
3907   // (this would still leave us with an ambiguity between template function
3908   // and class declarations).
3909   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3910     do {
3911       if (FormatTok->is(tok::l_brace)) {
3912         calculateBraceTypes(/*ExpectClassBody=*/true);
3913         if (!tryToParseBracedList())
3914           break;
3915       }
3916       if (FormatTok->is(tok::l_square)) {
3917         FormatToken *Previous = FormatTok->Previous;
3918         if (!Previous ||
3919             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3920           // Don't try parsing a lambda if we had a closing parenthesis before,
3921           // it was probably a pointer to an array: int (*)[].
3922           if (!tryToParseLambda())
3923             continue;
3924         } else {
3925           parseSquare();
3926           continue;
3927         }
3928       }
3929       if (FormatTok->is(tok::semi))
3930         return;
3931       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3932         addUnwrappedLine();
3933         nextToken();
3934         parseCSharpGenericTypeConstraint();
3935         break;
3936       }
3937       nextToken();
3938     } while (!eof());
3939   }
3940
3941   auto GetBraceTypes =
3942       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3943     switch (RecordTok.Tok.getKind()) {
3944     case tok::kw_class:
3945       return {TT_ClassLBrace, TT_ClassRBrace};
3946     case tok::kw_struct:
3947       return {TT_StructLBrace, TT_StructRBrace};
3948     case tok::kw_union:
3949       return {TT_UnionLBrace, TT_UnionRBrace};
3950     default:
3951       // Useful for e.g. interface.
3952       return {TT_RecordLBrace, TT_RecordRBrace};
3953     }
3954   };
3955   if (FormatTok->is(tok::l_brace)) {
3956     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3957     FormatTok->setFinalizedType(OpenBraceType);
3958     if (ParseAsExpr) {
3959       parseChildBlock();
3960     } else {
3961       if (ShouldBreakBeforeBrace(Style, InitialToken))
3962         addUnwrappedLine();
3963
3964       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3965       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3966     }
3967     setPreviousRBraceType(ClosingBraceType);
3968   }
3969   // There is no addUnwrappedLine() here so that we fall through to parsing a
3970   // structural element afterwards. Thus, in "class A {} n, m;",
3971   // "} n, m;" will end up in one unwrapped line.
3972 }
3973
3974 void UnwrappedLineParser::parseObjCMethod() {
3975   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3976          "'(' or identifier expected.");
3977   do {
3978     if (FormatTok->is(tok::semi)) {
3979       nextToken();
3980       addUnwrappedLine();
3981       return;
3982     } else if (FormatTok->is(tok::l_brace)) {
3983       if (Style.BraceWrapping.AfterFunction)
3984         addUnwrappedLine();
3985       parseBlock();
3986       addUnwrappedLine();
3987       return;
3988     } else {
3989       nextToken();
3990     }
3991   } while (!eof());
3992 }
3993
3994 void UnwrappedLineParser::parseObjCProtocolList() {
3995   assert(FormatTok->is(tok::less) && "'<' expected.");
3996   do {
3997     nextToken();
3998     // Early exit in case someone forgot a close angle.
3999     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4000         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4001       return;
4002     }
4003   } while (!eof() && FormatTok->isNot(tok::greater));
4004   nextToken(); // Skip '>'.
4005 }
4006
4007 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4008   do {
4009     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4010       nextToken();
4011       addUnwrappedLine();
4012       break;
4013     }
4014     if (FormatTok->is(tok::l_brace)) {
4015       parseBlock();
4016       // In ObjC interfaces, nothing should be following the "}".
4017       addUnwrappedLine();
4018     } else if (FormatTok->is(tok::r_brace)) {
4019       // Ignore stray "}". parseStructuralElement doesn't consume them.
4020       nextToken();
4021       addUnwrappedLine();
4022     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4023       nextToken();
4024       parseObjCMethod();
4025     } else {
4026       parseStructuralElement();
4027     }
4028   } while (!eof());
4029 }
4030
4031 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4032   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4033          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4034   nextToken();
4035   nextToken(); // interface name
4036
4037   // @interface can be followed by a lightweight generic
4038   // specialization list, then either a base class or a category.
4039   if (FormatTok->is(tok::less))
4040     parseObjCLightweightGenerics();
4041   if (FormatTok->is(tok::colon)) {
4042     nextToken();
4043     nextToken(); // base class name
4044     // The base class can also have lightweight generics applied to it.
4045     if (FormatTok->is(tok::less))
4046       parseObjCLightweightGenerics();
4047   } else if (FormatTok->is(tok::l_paren)) {
4048     // Skip category, if present.
4049     parseParens();
4050   }
4051
4052   if (FormatTok->is(tok::less))
4053     parseObjCProtocolList();
4054
4055   if (FormatTok->is(tok::l_brace)) {
4056     if (Style.BraceWrapping.AfterObjCDeclaration)
4057       addUnwrappedLine();
4058     parseBlock(/*MustBeDeclaration=*/true);
4059   }
4060
4061   // With instance variables, this puts '}' on its own line.  Without instance
4062   // variables, this ends the @interface line.
4063   addUnwrappedLine();
4064
4065   parseObjCUntilAtEnd();
4066 }
4067
4068 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4069   assert(FormatTok->is(tok::less));
4070   // Unlike protocol lists, generic parameterizations support
4071   // nested angles:
4072   //
4073   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4074   //     NSObject <NSCopying, NSSecureCoding>
4075   //
4076   // so we need to count how many open angles we have left.
4077   unsigned NumOpenAngles = 1;
4078   do {
4079     nextToken();
4080     // Early exit in case someone forgot a close angle.
4081     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4082         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4083       break;
4084     }
4085     if (FormatTok->is(tok::less)) {
4086       ++NumOpenAngles;
4087     } else if (FormatTok->is(tok::greater)) {
4088       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4089       --NumOpenAngles;
4090     }
4091   } while (!eof() && NumOpenAngles != 0);
4092   nextToken(); // Skip '>'.
4093 }
4094
4095 // Returns true for the declaration/definition form of @protocol,
4096 // false for the expression form.
4097 bool UnwrappedLineParser::parseObjCProtocol() {
4098   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4099   nextToken();
4100
4101   if (FormatTok->is(tok::l_paren)) {
4102     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4103     return false;
4104   }
4105
4106   // The definition/declaration form,
4107   // @protocol Foo
4108   // - (int)someMethod;
4109   // @end
4110
4111   nextToken(); // protocol name
4112
4113   if (FormatTok->is(tok::less))
4114     parseObjCProtocolList();
4115
4116   // Check for protocol declaration.
4117   if (FormatTok->is(tok::semi)) {
4118     nextToken();
4119     addUnwrappedLine();
4120     return true;
4121   }
4122
4123   addUnwrappedLine();
4124   parseObjCUntilAtEnd();
4125   return true;
4126 }
4127
4128 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4129   bool IsImport = FormatTok->is(Keywords.kw_import);
4130   assert(IsImport || FormatTok->is(tok::kw_export));
4131   nextToken();
4132
4133   // Consume the "default" in "export default class/function".
4134   if (FormatTok->is(tok::kw_default))
4135     nextToken();
4136
4137   // Consume "async function", "function" and "default function", so that these
4138   // get parsed as free-standing JS functions, i.e. do not require a trailing
4139   // semicolon.
4140   if (FormatTok->is(Keywords.kw_async))
4141     nextToken();
4142   if (FormatTok->is(Keywords.kw_function)) {
4143     nextToken();
4144     return;
4145   }
4146
4147   // For imports, `export *`, `export {...}`, consume the rest of the line up
4148   // to the terminating `;`. For everything else, just return and continue
4149   // parsing the structural element, i.e. the declaration or expression for
4150   // `export default`.
4151   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4152       !FormatTok->isStringLiteral() &&
4153       !(FormatTok->is(Keywords.kw_type) &&
4154         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4155     return;
4156   }
4157
4158   while (!eof()) {
4159     if (FormatTok->is(tok::semi))
4160       return;
4161     if (Line->Tokens.empty()) {
4162       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4163       // import statement should terminate.
4164       return;
4165     }
4166     if (FormatTok->is(tok::l_brace)) {
4167       FormatTok->setBlockKind(BK_Block);
4168       nextToken();
4169       parseBracedList();
4170     } else {
4171       nextToken();
4172     }
4173   }
4174 }
4175
4176 void UnwrappedLineParser::parseStatementMacro() {
4177   nextToken();
4178   if (FormatTok->is(tok::l_paren))
4179     parseParens();
4180   if (FormatTok->is(tok::semi))
4181     nextToken();
4182   addUnwrappedLine();
4183 }
4184
4185 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4186   // consume things like a::`b.c[d:e] or a::*
4187   while (true) {
4188     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4189                            tok::coloncolon, tok::hash) ||
4190         Keywords.isVerilogIdentifier(*FormatTok)) {
4191       nextToken();
4192     } else if (FormatTok->is(tok::l_square)) {
4193       parseSquare();
4194     } else {
4195       break;
4196     }
4197   }
4198 }
4199
4200 void UnwrappedLineParser::parseVerilogSensitivityList() {
4201   if (FormatTok->isNot(tok::at))
4202     return;
4203   nextToken();
4204   // A block event expression has 2 at signs.
4205   if (FormatTok->is(tok::at))
4206     nextToken();
4207   switch (FormatTok->Tok.getKind()) {
4208   case tok::star:
4209     nextToken();
4210     break;
4211   case tok::l_paren:
4212     parseParens();
4213     break;
4214   default:
4215     parseVerilogHierarchyIdentifier();
4216     break;
4217   }
4218 }
4219
4220 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4221   unsigned AddLevels = 0;
4222
4223   if (FormatTok->is(Keywords.kw_clocking)) {
4224     nextToken();
4225     if (Keywords.isVerilogIdentifier(*FormatTok))
4226       nextToken();
4227     parseVerilogSensitivityList();
4228     if (FormatTok->is(tok::semi))
4229       nextToken();
4230   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4231                                 Keywords.kw_casez, Keywords.kw_randcase,
4232                                 Keywords.kw_randsequence)) {
4233     if (Style.IndentCaseLabels)
4234       AddLevels++;
4235     nextToken();
4236     if (FormatTok->is(tok::l_paren)) {
4237       FormatTok->setFinalizedType(TT_ConditionLParen);
4238       parseParens();
4239     }
4240     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4241       nextToken();
4242     // The case header has no semicolon.
4243   } else {
4244     // "module" etc.
4245     nextToken();
4246     // all the words like the name of the module and specifiers like
4247     // "automatic" and the width of function return type
4248     while (true) {
4249       if (FormatTok->is(tok::l_square)) {
4250         auto Prev = FormatTok->getPreviousNonComment();
4251         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4252           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4253         parseSquare();
4254       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4255                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4256         nextToken();
4257       } else {
4258         break;
4259       }
4260     }
4261
4262     auto NewLine = [this]() {
4263       addUnwrappedLine();
4264       Line->IsContinuation = true;
4265     };
4266
4267     // package imports
4268     while (FormatTok->is(Keywords.kw_import)) {
4269       NewLine();
4270       nextToken();
4271       parseVerilogHierarchyIdentifier();
4272       if (FormatTok->is(tok::semi))
4273         nextToken();
4274     }
4275
4276     // parameters and ports
4277     if (FormatTok->is(Keywords.kw_verilogHash)) {
4278       NewLine();
4279       nextToken();
4280       if (FormatTok->is(tok::l_paren)) {
4281         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4282         parseParens();
4283       }
4284     }
4285     if (FormatTok->is(tok::l_paren)) {
4286       NewLine();
4287       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4288       parseParens();
4289     }
4290
4291     // extends and implements
4292     if (FormatTok->is(Keywords.kw_extends)) {
4293       NewLine();
4294       nextToken();
4295       parseVerilogHierarchyIdentifier();
4296       if (FormatTok->is(tok::l_paren))
4297         parseParens();
4298     }
4299     if (FormatTok->is(Keywords.kw_implements)) {
4300       NewLine();
4301       do {
4302         nextToken();
4303         parseVerilogHierarchyIdentifier();
4304       } while (FormatTok->is(tok::comma));
4305     }
4306
4307     // Coverage event for cover groups.
4308     if (FormatTok->is(tok::at)) {
4309       NewLine();
4310       parseVerilogSensitivityList();
4311     }
4312
4313     if (FormatTok->is(tok::semi))
4314       nextToken(/*LevelDifference=*/1);
4315     addUnwrappedLine();
4316   }
4317
4318   return AddLevels;
4319 }
4320
4321 void UnwrappedLineParser::parseVerilogTable() {
4322   assert(FormatTok->is(Keywords.kw_table));
4323   nextToken(/*LevelDifference=*/1);
4324   addUnwrappedLine();
4325
4326   auto InitialLevel = Line->Level++;
4327   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4328     FormatToken *Tok = FormatTok;
4329     nextToken();
4330     if (Tok->is(tok::semi))
4331       addUnwrappedLine();
4332     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4333       Tok->setFinalizedType(TT_VerilogTableItem);
4334   }
4335   Line->Level = InitialLevel;
4336   nextToken(/*LevelDifference=*/-1);
4337   addUnwrappedLine();
4338 }
4339
4340 void UnwrappedLineParser::parseVerilogCaseLabel() {
4341   // The label will get unindented in AnnotatingParser. If there are no leading
4342   // spaces, indent the rest here so that things inside the block will be
4343   // indented relative to things outside. We don't use parseLabel because we
4344   // don't know whether this colon is a label or a ternary expression at this
4345   // point.
4346   auto OrigLevel = Line->Level;
4347   auto FirstLine = CurrentLines->size();
4348   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4349     ++Line->Level;
4350   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4351     --Line->Level;
4352   parseStructuralElement();
4353   // Restore the indentation in both the new line and the line that has the
4354   // label.
4355   if (CurrentLines->size() > FirstLine)
4356     (*CurrentLines)[FirstLine].Level = OrigLevel;
4357   Line->Level = OrigLevel;
4358 }
4359
4360 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4361   for (const auto &N : Line.Tokens) {
4362     if (N.Tok->MacroCtx)
4363       return true;
4364     for (const UnwrappedLine &Child : N.Children)
4365       if (containsExpansion(Child))
4366         return true;
4367   }
4368   return false;
4369 }
4370
4371 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4372   if (Line->Tokens.empty())
4373     return;
4374   LLVM_DEBUG({
4375     if (!parsingPPDirective()) {
4376       llvm::dbgs() << "Adding unwrapped line:\n";
4377       printDebugInfo(*Line);
4378     }
4379   });
4380
4381   // If this line closes a block when in Whitesmiths mode, remember that
4382   // information so that the level can be decreased after the line is added.
4383   // This has to happen after the addition of the line since the line itself
4384   // needs to be indented.
4385   bool ClosesWhitesmithsBlock =
4386       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4387       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4388
4389   // If the current line was expanded from a macro call, we use it to
4390   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4391   // line and the unexpanded token stream.
4392   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4393     if (!Reconstruct)
4394       Reconstruct.emplace(Line->Level, Unexpanded);
4395     Reconstruct->addLine(*Line);
4396
4397     // While the reconstructed unexpanded lines are stored in the normal
4398     // flow of lines, the expanded lines are stored on the side to be analyzed
4399     // in an extra step.
4400     CurrentExpandedLines.push_back(std::move(*Line));
4401
4402     if (Reconstruct->finished()) {
4403       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4404       assert(!Reconstructed.Tokens.empty() &&
4405              "Reconstructed must at least contain the macro identifier.");
4406       assert(!parsingPPDirective());
4407       LLVM_DEBUG({
4408         llvm::dbgs() << "Adding unexpanded line:\n";
4409         printDebugInfo(Reconstructed);
4410       });
4411       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4412       Lines.push_back(std::move(Reconstructed));
4413       CurrentExpandedLines.clear();
4414       Reconstruct.reset();
4415     }
4416   } else {
4417     // At the top level we only get here when no unexpansion is going on, or
4418     // when conditional formatting led to unfinished macro reconstructions.
4419     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4420     CurrentLines->push_back(std::move(*Line));
4421   }
4422   Line->Tokens.clear();
4423   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4424   Line->FirstStartColumn = 0;
4425   Line->IsContinuation = false;
4426   Line->SeenDecltypeAuto = false;
4427
4428   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4429     --Line->Level;
4430   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4431     CurrentLines->append(
4432         std::make_move_iterator(PreprocessorDirectives.begin()),
4433         std::make_move_iterator(PreprocessorDirectives.end()));
4434     PreprocessorDirectives.clear();
4435   }
4436   // Disconnect the current token from the last token on the previous line.
4437   FormatTok->Previous = nullptr;
4438 }
4439
4440 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4441
4442 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4443   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4444          FormatTok.NewlinesBefore > 0;
4445 }
4446
4447 // Checks if \p FormatTok is a line comment that continues the line comment
4448 // section on \p Line.
4449 static bool
4450 continuesLineCommentSection(const FormatToken &FormatTok,
4451                             const UnwrappedLine &Line,
4452                             const llvm::Regex &CommentPragmasRegex) {
4453   if (Line.Tokens.empty())
4454     return false;
4455
4456   StringRef IndentContent = FormatTok.TokenText;
4457   if (FormatTok.TokenText.startswith("//") ||
4458       FormatTok.TokenText.startswith("/*")) {
4459     IndentContent = FormatTok.TokenText.substr(2);
4460   }
4461   if (CommentPragmasRegex.match(IndentContent))
4462     return false;
4463
4464   // If Line starts with a line comment, then FormatTok continues the comment
4465   // section if its original column is greater or equal to the original start
4466   // column of the line.
4467   //
4468   // Define the min column token of a line as follows: if a line ends in '{' or
4469   // contains a '{' followed by a line comment, then the min column token is
4470   // that '{'. Otherwise, the min column token of the line is the first token of
4471   // the line.
4472   //
4473   // If Line starts with a token other than a line comment, then FormatTok
4474   // continues the comment section if its original column is greater than the
4475   // original start column of the min column token of the line.
4476   //
4477   // For example, the second line comment continues the first in these cases:
4478   //
4479   // // first line
4480   // // second line
4481   //
4482   // and:
4483   //
4484   // // first line
4485   //  // second line
4486   //
4487   // and:
4488   //
4489   // int i; // first line
4490   //  // second line
4491   //
4492   // and:
4493   //
4494   // do { // first line
4495   //      // second line
4496   //   int i;
4497   // } while (true);
4498   //
4499   // and:
4500   //
4501   // enum {
4502   //   a, // first line
4503   //    // second line
4504   //   b
4505   // };
4506   //
4507   // The second line comment doesn't continue the first in these cases:
4508   //
4509   //   // first line
4510   //  // second line
4511   //
4512   // and:
4513   //
4514   // int i; // first line
4515   // // second line
4516   //
4517   // and:
4518   //
4519   // do { // first line
4520   //   // second line
4521   //   int i;
4522   // } while (true);
4523   //
4524   // and:
4525   //
4526   // enum {
4527   //   a, // first line
4528   //   // second line
4529   // };
4530   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4531
4532   // Scan for '{//'. If found, use the column of '{' as a min column for line
4533   // comment section continuation.
4534   const FormatToken *PreviousToken = nullptr;
4535   for (const UnwrappedLineNode &Node : Line.Tokens) {
4536     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4537         isLineComment(*Node.Tok)) {
4538       MinColumnToken = PreviousToken;
4539       break;
4540     }
4541     PreviousToken = Node.Tok;
4542
4543     // Grab the last newline preceding a token in this unwrapped line.
4544     if (Node.Tok->NewlinesBefore > 0)
4545       MinColumnToken = Node.Tok;
4546   }
4547   if (PreviousToken && PreviousToken->is(tok::l_brace))
4548     MinColumnToken = PreviousToken;
4549
4550   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4551                               MinColumnToken);
4552 }
4553
4554 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4555   bool JustComments = Line->Tokens.empty();
4556   for (FormatToken *Tok : CommentsBeforeNextToken) {
4557     // Line comments that belong to the same line comment section are put on the
4558     // same line since later we might want to reflow content between them.
4559     // Additional fine-grained breaking of line comment sections is controlled
4560     // by the class BreakableLineCommentSection in case it is desirable to keep
4561     // several line comment sections in the same unwrapped line.
4562     //
4563     // FIXME: Consider putting separate line comment sections as children to the
4564     // unwrapped line instead.
4565     Tok->ContinuesLineCommentSection =
4566         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4567     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4568       addUnwrappedLine();
4569     pushToken(Tok);
4570   }
4571   if (NewlineBeforeNext && JustComments)
4572     addUnwrappedLine();
4573   CommentsBeforeNextToken.clear();
4574 }
4575
4576 void UnwrappedLineParser::nextToken(int LevelDifference) {
4577   if (eof())
4578     return;
4579   flushComments(isOnNewLine(*FormatTok));
4580   pushToken(FormatTok);
4581   FormatToken *Previous = FormatTok;
4582   if (!Style.isJavaScript())
4583     readToken(LevelDifference);
4584   else
4585     readTokenWithJavaScriptASI();
4586   FormatTok->Previous = Previous;
4587   if (Style.isVerilog()) {
4588     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4589     // keywords like `begin`, we can't treat them the same as left braces
4590     // because some contexts require one of them.  For example structs use
4591     // braces and if blocks use keywords, and a left brace can occur in an if
4592     // statement, but it is not a block.  For keywords like `end`, we simply
4593     // treat them the same as right braces.
4594     if (Keywords.isVerilogEnd(*FormatTok))
4595       FormatTok->Tok.setKind(tok::r_brace);
4596   }
4597 }
4598
4599 void UnwrappedLineParser::distributeComments(
4600     const SmallVectorImpl<FormatToken *> &Comments,
4601     const FormatToken *NextTok) {
4602   // Whether or not a line comment token continues a line is controlled by
4603   // the method continuesLineCommentSection, with the following caveat:
4604   //
4605   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4606   // that each comment line from the trail is aligned with the next token, if
4607   // the next token exists. If a trail exists, the beginning of the maximal
4608   // trail is marked as a start of a new comment section.
4609   //
4610   // For example in this code:
4611   //
4612   // int a; // line about a
4613   //   // line 1 about b
4614   //   // line 2 about b
4615   //   int b;
4616   //
4617   // the two lines about b form a maximal trail, so there are two sections, the
4618   // first one consisting of the single comment "// line about a" and the
4619   // second one consisting of the next two comments.
4620   if (Comments.empty())
4621     return;
4622   bool ShouldPushCommentsInCurrentLine = true;
4623   bool HasTrailAlignedWithNextToken = false;
4624   unsigned StartOfTrailAlignedWithNextToken = 0;
4625   if (NextTok) {
4626     // We are skipping the first element intentionally.
4627     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4628       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4629         HasTrailAlignedWithNextToken = true;
4630         StartOfTrailAlignedWithNextToken = i;
4631       }
4632     }
4633   }
4634   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4635     FormatToken *FormatTok = Comments[i];
4636     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4637       FormatTok->ContinuesLineCommentSection = false;
4638     } else {
4639       FormatTok->ContinuesLineCommentSection =
4640           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4641     }
4642     if (!FormatTok->ContinuesLineCommentSection &&
4643         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4644       ShouldPushCommentsInCurrentLine = false;
4645     }
4646     if (ShouldPushCommentsInCurrentLine)
4647       pushToken(FormatTok);
4648     else
4649       CommentsBeforeNextToken.push_back(FormatTok);
4650   }
4651 }
4652
4653 void UnwrappedLineParser::readToken(int LevelDifference) {
4654   SmallVector<FormatToken *, 1> Comments;
4655   bool PreviousWasComment = false;
4656   bool FirstNonCommentOnLine = false;
4657   do {
4658     FormatTok = Tokens->getNextToken();
4659     assert(FormatTok);
4660     while (FormatTok->getType() == TT_ConflictStart ||
4661            FormatTok->getType() == TT_ConflictEnd ||
4662            FormatTok->getType() == TT_ConflictAlternative) {
4663       if (FormatTok->getType() == TT_ConflictStart)
4664         conditionalCompilationStart(/*Unreachable=*/false);
4665       else if (FormatTok->getType() == TT_ConflictAlternative)
4666         conditionalCompilationAlternative();
4667       else if (FormatTok->getType() == TT_ConflictEnd)
4668         conditionalCompilationEnd();
4669       FormatTok = Tokens->getNextToken();
4670       FormatTok->MustBreakBefore = true;
4671     }
4672
4673     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4674                                       const FormatToken &Tok,
4675                                       bool PreviousWasComment) {
4676       auto IsFirstOnLine = [](const FormatToken &Tok) {
4677         return Tok.HasUnescapedNewline || Tok.IsFirst;
4678       };
4679
4680       // Consider preprocessor directives preceded by block comments as first
4681       // on line.
4682       if (PreviousWasComment)
4683         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4684       return IsFirstOnLine(Tok);
4685     };
4686
4687     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4688         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4689     PreviousWasComment = FormatTok->is(tok::comment);
4690
4691     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4692            (!Style.isVerilog() ||
4693             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4694            FirstNonCommentOnLine) {
4695       distributeComments(Comments, FormatTok);
4696       Comments.clear();
4697       // If there is an unfinished unwrapped line, we flush the preprocessor
4698       // directives only after that unwrapped line was finished later.
4699       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4700       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4701       assert((LevelDifference >= 0 ||
4702               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4703              "LevelDifference makes Line->Level negative");
4704       Line->Level += LevelDifference;
4705       // Comments stored before the preprocessor directive need to be output
4706       // before the preprocessor directive, at the same level as the
4707       // preprocessor directive, as we consider them to apply to the directive.
4708       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4709           PPBranchLevel > 0) {
4710         Line->Level += PPBranchLevel;
4711       }
4712       flushComments(isOnNewLine(*FormatTok));
4713       parsePPDirective();
4714       PreviousWasComment = FormatTok->is(tok::comment);
4715       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4716           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4717     }
4718
4719     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4720         !Line->InPPDirective) {
4721       continue;
4722     }
4723
4724     if (FormatTok->is(tok::identifier) &&
4725         Macros.defined(FormatTok->TokenText) &&
4726         // FIXME: Allow expanding macros in preprocessor directives.
4727         !Line->InPPDirective) {
4728       FormatToken *ID = FormatTok;
4729       unsigned Position = Tokens->getPosition();
4730
4731       // To correctly parse the code, we need to replace the tokens of the macro
4732       // call with its expansion.
4733       auto PreCall = std::move(Line);
4734       Line.reset(new UnwrappedLine);
4735       bool OldInExpansion = InExpansion;
4736       InExpansion = true;
4737       // We parse the macro call into a new line.
4738       auto Args = parseMacroCall();
4739       InExpansion = OldInExpansion;
4740       assert(Line->Tokens.front().Tok == ID);
4741       // And remember the unexpanded macro call tokens.
4742       auto UnexpandedLine = std::move(Line);
4743       // Reset to the old line.
4744       Line = std::move(PreCall);
4745
4746       LLVM_DEBUG({
4747         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4748         if (Args) {
4749           llvm::dbgs() << "(";
4750           for (const auto &Arg : Args.value())
4751             for (const auto &T : Arg)
4752               llvm::dbgs() << T->TokenText << " ";
4753           llvm::dbgs() << ")";
4754         }
4755         llvm::dbgs() << "\n";
4756       });
4757       if (Macros.objectLike(ID->TokenText) && Args &&
4758           !Macros.hasArity(ID->TokenText, Args->size())) {
4759         // The macro is either
4760         // - object-like, but we got argumnets, or
4761         // - overloaded to be both object-like and function-like, but none of
4762         //   the function-like arities match the number of arguments.
4763         // Thus, expand as object-like macro.
4764         LLVM_DEBUG(llvm::dbgs()
4765                    << "Macro \"" << ID->TokenText
4766                    << "\" not overloaded for arity " << Args->size()
4767                    << "or not function-like, using object-like overload.");
4768         Args.reset();
4769         UnexpandedLine->Tokens.resize(1);
4770         Tokens->setPosition(Position);
4771         nextToken();
4772         assert(!Args && Macros.objectLike(ID->TokenText));
4773       }
4774       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4775           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4776         // Next, we insert the expanded tokens in the token stream at the
4777         // current position, and continue parsing.
4778         Unexpanded[ID] = std::move(UnexpandedLine);
4779         SmallVector<FormatToken *, 8> Expansion =
4780             Macros.expand(ID, std::move(Args));
4781         if (!Expansion.empty())
4782           FormatTok = Tokens->insertTokens(Expansion);
4783
4784         LLVM_DEBUG({
4785           llvm::dbgs() << "Expanded: ";
4786           for (const auto &T : Expansion)
4787             llvm::dbgs() << T->TokenText << " ";
4788           llvm::dbgs() << "\n";
4789         });
4790       } else {
4791         LLVM_DEBUG({
4792           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4793                        << "\", because it was used ";
4794           if (Args)
4795             llvm::dbgs() << "with " << Args->size();
4796           else
4797             llvm::dbgs() << "without";
4798           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4799         });
4800         Tokens->setPosition(Position);
4801         FormatTok = ID;
4802       }
4803     }
4804
4805     if (FormatTok->isNot(tok::comment)) {
4806       distributeComments(Comments, FormatTok);
4807       Comments.clear();
4808       return;
4809     }
4810
4811     Comments.push_back(FormatTok);
4812   } while (!eof());
4813
4814   distributeComments(Comments, nullptr);
4815   Comments.clear();
4816 }
4817
4818 namespace {
4819 template <typename Iterator>
4820 void pushTokens(Iterator Begin, Iterator End,
4821                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4822   for (auto I = Begin; I != End; ++I) {
4823     Into.push_back(I->Tok);
4824     for (const auto &Child : I->Children)
4825       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4826   }
4827 }
4828 } // namespace
4829
4830 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4831 UnwrappedLineParser::parseMacroCall() {
4832   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4833   assert(Line->Tokens.empty());
4834   nextToken();
4835   if (FormatTok->isNot(tok::l_paren))
4836     return Args;
4837   unsigned Position = Tokens->getPosition();
4838   FormatToken *Tok = FormatTok;
4839   nextToken();
4840   Args.emplace();
4841   auto ArgStart = std::prev(Line->Tokens.end());
4842
4843   int Parens = 0;
4844   do {
4845     switch (FormatTok->Tok.getKind()) {
4846     case tok::l_paren:
4847       ++Parens;
4848       nextToken();
4849       break;
4850     case tok::r_paren: {
4851       if (Parens > 0) {
4852         --Parens;
4853         nextToken();
4854         break;
4855       }
4856       Args->push_back({});
4857       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4858       nextToken();
4859       return Args;
4860     }
4861     case tok::comma: {
4862       if (Parens > 0) {
4863         nextToken();
4864         break;
4865       }
4866       Args->push_back({});
4867       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4868       nextToken();
4869       ArgStart = std::prev(Line->Tokens.end());
4870       break;
4871     }
4872     default:
4873       nextToken();
4874       break;
4875     }
4876   } while (!eof());
4877   Line->Tokens.resize(1);
4878   Tokens->setPosition(Position);
4879   FormatTok = Tok;
4880   return {};
4881 }
4882
4883 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4884   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4885   if (MustBreakBeforeNextToken) {
4886     Line->Tokens.back().Tok->MustBreakBefore = true;
4887     MustBreakBeforeNextToken = false;
4888   }
4889 }
4890
4891 } // end namespace format
4892 } // end namespace clang