Reapply "[lldb][dwarf] Compute fully qualified names on simplified template names...
[llvm-project.git] / clang / lib / Format / UnwrappedLineParser.cpp
blobc182aaf0876d1bce8bf615558523a033663f9e41
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <utility>
31 #define DEBUG_TYPE "format-parser"
33 namespace clang {
34 namespace format {
36 namespace {
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
63 if (!NewLine)
64 OS << "\n";
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
71 class ScopedDeclarationState {
72 public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
87 private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
92 } // end anonymous namespace
94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
95 llvm::raw_os_ostream OS(Stream);
96 printLine(OS, Line);
97 return Stream;
100 class ScopedLineState {
101 public:
102 ScopedLineState(UnwrappedLineParser &Parser,
103 bool SwitchToPreprocessorLines = false)
104 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
105 if (SwitchToPreprocessorLines)
106 Parser.CurrentLines = &Parser.PreprocessorDirectives;
107 else if (!Parser.Line->Tokens.empty())
108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
109 PreBlockLine = std::move(Parser.Line);
110 Parser.Line = std::make_unique<UnwrappedLine>();
111 Parser.Line->Level = PreBlockLine->Level;
112 Parser.Line->PPLevel = PreBlockLine->PPLevel;
113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
118 ~ScopedLineState() {
119 if (!Parser.Line->Tokens.empty())
120 Parser.addUnwrappedLine();
121 assert(Parser.Line->Tokens.empty());
122 Parser.Line = std::move(PreBlockLine);
123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
124 Parser.MustBreakBeforeNextToken = true;
125 Parser.CurrentLines = OriginalLines;
128 private:
129 UnwrappedLineParser &Parser;
131 std::unique_ptr<UnwrappedLine> PreBlockLine;
132 SmallVectorImpl<UnwrappedLine> *OriginalLines;
135 class CompoundStatementIndenter {
136 public:
137 CompoundStatementIndenter(UnwrappedLineParser *Parser,
138 const FormatStyle &Style, unsigned &LineLevel)
139 : CompoundStatementIndenter(Parser, LineLevel,
140 Style.BraceWrapping.AfterControlStatement,
141 Style.BraceWrapping.IndentBraces) {}
142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
143 bool WrapBrace, bool IndentBrace)
144 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
145 if (WrapBrace)
146 Parser->addUnwrappedLine();
147 if (IndentBrace)
148 ++LineLevel;
150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
152 private:
153 unsigned &LineLevel;
154 unsigned OldLineLevel;
157 UnwrappedLineParser::UnwrappedLineParser(
158 SourceManager &SourceMgr, const FormatStyle &Style,
159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
162 IdentifierTable &IdentTable)
163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited),
171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
173 assert(IsCpp == LangOpts.CXXOperatorNames);
176 void UnwrappedLineParser::reset() {
177 PPBranchLevel = -1;
178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
179 ? IG_Rejected
180 : IG_Inited;
181 IncludeGuardToken = nullptr;
182 Line.reset(new UnwrappedLine);
183 CommentsBeforeNextToken.clear();
184 FormatTok = nullptr;
185 MustBreakBeforeNextToken = false;
186 IsDecltypeAutoFunction = false;
187 PreprocessorDirectives.clear();
188 CurrentLines = &Lines;
189 DeclarationScopeStack.clear();
190 NestedTooDeep.clear();
191 NestedLambdas.clear();
192 PPStack.clear();
193 Line->FirstStartColumn = FirstStartColumn;
195 if (!Unexpanded.empty())
196 for (FormatToken *Token : AllTokens)
197 Token->MacroCtx.reset();
198 CurrentExpandedLines.clear();
199 ExpandedLines.clear();
200 Unexpanded.clear();
201 InExpansion = false;
202 Reconstruct.reset();
205 void UnwrappedLineParser::parse() {
206 IndexedTokenSource TokenSource(AllTokens);
207 Line->FirstStartColumn = FirstStartColumn;
208 do {
209 LLVM_DEBUG(llvm::dbgs() << "----\n");
210 reset();
211 Tokens = &TokenSource;
212 TokenSource.reset();
214 readToken();
215 parseFile();
217 // If we found an include guard then all preprocessor directives (other than
218 // the guard) are over-indented by one.
219 if (IncludeGuard == IG_Found) {
220 for (auto &Line : Lines)
221 if (Line.InPPDirective && Line.Level > 0)
222 --Line.Level;
225 // Create line with eof token.
226 assert(eof());
227 pushToken(FormatTok);
228 addUnwrappedLine();
230 // In a first run, format everything with the lines containing macro calls
231 // replaced by the expansion.
232 if (!ExpandedLines.empty()) {
233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234 for (const auto &Line : Lines) {
235 if (!Line.Tokens.empty()) {
236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
237 if (it != ExpandedLines.end()) {
238 for (const auto &Expanded : it->second) {
239 LLVM_DEBUG(printDebugInfo(Expanded));
240 Callback.consumeUnwrappedLine(Expanded);
242 continue;
245 LLVM_DEBUG(printDebugInfo(Line));
246 Callback.consumeUnwrappedLine(Line);
248 Callback.finishRun();
251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252 for (const UnwrappedLine &Line : Lines) {
253 LLVM_DEBUG(printDebugInfo(Line));
254 Callback.consumeUnwrappedLine(Line);
256 Callback.finishRun();
257 Lines.clear();
258 while (!PPLevelBranchIndex.empty() &&
259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
263 if (!PPLevelBranchIndex.empty()) {
264 ++PPLevelBranchIndex.back();
265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
268 } while (!PPLevelBranchIndex.empty());
271 void UnwrappedLineParser::parseFile() {
272 // The top-level context in a file always has declarations, except for pre-
273 // processor directives and JavaScript files.
274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
276 MustBeDeclaration);
277 if (Style.Language == FormatStyle::LK_TextProto)
278 parseBracedList();
279 else
280 parseLevel();
281 // Make sure to format the remaining tokens.
283 // LK_TextProto is special since its top-level is parsed as the body of a
284 // braced list, which does not necessarily have natural line separators such
285 // as a semicolon. Comments after the last entry that have been determined to
286 // not belong to that line, as in:
287 // key: value
288 // // endfile comment
289 // do not have a chance to be put on a line of their own until this point.
290 // Here we add this newline before end-of-file comments.
291 if (Style.Language == FormatStyle::LK_TextProto &&
292 !CommentsBeforeNextToken.empty()) {
293 addUnwrappedLine();
295 flushComments(true);
296 addUnwrappedLine();
299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
300 do {
301 switch (FormatTok->Tok.getKind()) {
302 case tok::l_brace:
303 return;
304 default:
305 if (FormatTok->is(Keywords.kw_where)) {
306 addUnwrappedLine();
307 nextToken();
308 parseCSharpGenericTypeConstraint();
309 break;
311 nextToken();
312 break;
314 } while (!eof());
317 void UnwrappedLineParser::parseCSharpAttribute() {
318 int UnpairedSquareBrackets = 1;
319 do {
320 switch (FormatTok->Tok.getKind()) {
321 case tok::r_square:
322 nextToken();
323 --UnpairedSquareBrackets;
324 if (UnpairedSquareBrackets == 0) {
325 addUnwrappedLine();
326 return;
328 break;
329 case tok::l_square:
330 ++UnpairedSquareBrackets;
331 nextToken();
332 break;
333 default:
334 nextToken();
335 break;
337 } while (!eof());
340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341 if (!Lines.empty() && Lines.back().InPPDirective)
342 return true;
344 const FormatToken *Previous = Tokens->getPreviousToken();
345 return Previous && Previous->is(tok::comment) &&
346 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
349 /// \brief Parses a level, that is ???.
350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351 /// \param IfKind The \p if statement kind in the level.
352 /// \param IfLeftBrace The left brace of the \p if block in the level.
353 /// \returns true if a simple block of if/else/for/while, or false otherwise.
354 /// (A simple block has a single statement.)
355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
356 IfStmtKind *IfKind,
357 FormatToken **IfLeftBrace) {
358 const bool InRequiresExpression =
359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
360 const bool IsPrecededByCommentOrPPDirective =
361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
362 FormatToken *IfLBrace = nullptr;
363 bool HasDoWhile = false;
364 bool HasLabel = false;
365 unsigned StatementCount = 0;
366 bool SwitchLabelEncountered = false;
368 do {
369 if (FormatTok->isAttribute()) {
370 nextToken();
371 if (FormatTok->is(tok::l_paren))
372 parseParens();
373 continue;
375 tok::TokenKind Kind = FormatTok->Tok.getKind();
376 if (FormatTok->is(TT_MacroBlockBegin))
377 Kind = tok::l_brace;
378 else if (FormatTok->is(TT_MacroBlockEnd))
379 Kind = tok::r_brace;
381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
382 &HasLabel, &StatementCount] {
383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
384 HasDoWhile ? nullptr : &HasDoWhile,
385 HasLabel ? nullptr : &HasLabel);
386 ++StatementCount;
387 assert(StatementCount > 0 && "StatementCount overflow!");
390 switch (Kind) {
391 case tok::comment:
392 nextToken();
393 addUnwrappedLine();
394 break;
395 case tok::l_brace:
396 if (InRequiresExpression) {
397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
398 } else if (FormatTok->Previous &&
399 FormatTok->Previous->ClosesRequiresClause) {
400 // We need the 'default' case here to correctly parse a function
401 // l_brace.
402 ParseDefault();
403 continue;
405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
406 if (tryToParseBracedList())
407 continue;
408 FormatTok->setFinalizedType(TT_BlockLBrace);
410 parseBlock();
411 ++StatementCount;
412 assert(StatementCount > 0 && "StatementCount overflow!");
413 addUnwrappedLine();
414 break;
415 case tok::r_brace:
416 if (OpeningBrace) {
417 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
419 return false;
421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
422 HasDoWhile || IsPrecededByCommentOrPPDirective ||
423 precededByCommentOrPPDirective()) {
424 return false;
426 const FormatToken *Next = Tokens->peekNextToken();
427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
428 return false;
429 if (IfLeftBrace)
430 *IfLeftBrace = IfLBrace;
431 return true;
433 nextToken();
434 addUnwrappedLine();
435 break;
436 case tok::kw_default: {
437 unsigned StoredPosition = Tokens->getPosition();
438 auto *Next = Tokens->getNextNonComment();
439 FormatTok = Tokens->setPosition(StoredPosition);
440 if (!Next->isOneOf(tok::colon, tok::arrow)) {
441 // default not followed by `:` or `->` is not a case label; treat it
442 // like an identifier.
443 parseStructuralElement();
444 break;
446 // Else, if it is 'default:', fall through to the case handling.
447 [[fallthrough]];
449 case tok::kw_case:
450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
451 (Style.isJavaScript() && Line->MustBeDeclaration)) {
452 // Proto: there are no switch/case statements
453 // Verilog: Case labels don't have this word. We handle case
454 // labels including default in TokenAnnotator.
455 // JavaScript: A 'case: string' style field declaration.
456 ParseDefault();
457 break;
459 if (!SwitchLabelEncountered &&
460 (Style.IndentCaseLabels ||
461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
462 (Line->InPPDirective && Line->Level == 1))) {
463 ++Line->Level;
465 SwitchLabelEncountered = true;
466 parseStructuralElement();
467 break;
468 case tok::l_square:
469 if (Style.isCSharp()) {
470 nextToken();
471 parseCSharpAttribute();
472 break;
474 if (handleCppAttributes())
475 break;
476 [[fallthrough]];
477 default:
478 ParseDefault();
479 break;
481 } while (!eof());
483 return false;
486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
487 // We'll parse forward through the tokens until we hit
488 // a closing brace or eof - note that getNextToken() will
489 // parse macros, so this will magically work inside macro
490 // definitions, too.
491 unsigned StoredPosition = Tokens->getPosition();
492 FormatToken *Tok = FormatTok;
493 const FormatToken *PrevTok = Tok->Previous;
494 // Keep a stack of positions of lbrace tokens. We will
495 // update information about whether an lbrace starts a
496 // braced init list or a different block during the loop.
497 struct StackEntry {
498 FormatToken *Tok;
499 const FormatToken *PrevTok;
501 SmallVector<StackEntry, 8> LBraceStack;
502 assert(Tok->is(tok::l_brace));
504 do {
505 auto *NextTok = Tokens->getNextNonComment();
507 if (!Line->InMacroBody && !Style.isTableGen()) {
508 // Skip PPDirective lines and comments.
509 while (NextTok->is(tok::hash)) {
510 NextTok = Tokens->getNextToken();
511 if (NextTok->is(tok::pp_not_keyword))
512 break;
513 do {
514 NextTok = Tokens->getNextToken();
515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
517 while (NextTok->is(tok::comment))
518 NextTok = Tokens->getNextToken();
522 switch (Tok->Tok.getKind()) {
523 case tok::l_brace:
524 if (Style.isJavaScript() && PrevTok) {
525 if (PrevTok->isOneOf(tok::colon, tok::less)) {
526 // A ':' indicates this code is in a type, or a braced list
527 // following a label in an object literal ({a: {b: 1}}).
528 // A '<' could be an object used in a comparison, but that is nonsense
529 // code (can never return true), so more likely it is a generic type
530 // argument (`X<{a: string; b: number}>`).
531 // The code below could be confused by semicolons between the
532 // individual members in a type member list, which would normally
533 // trigger BK_Block. In both cases, this must be parsed as an inline
534 // braced init.
535 Tok->setBlockKind(BK_BracedInit);
536 } else if (PrevTok->is(tok::r_paren)) {
537 // `) { }` can only occur in function or method declarations in JS.
538 Tok->setBlockKind(BK_Block);
540 } else {
541 Tok->setBlockKind(BK_Unknown);
543 LBraceStack.push_back({Tok, PrevTok});
544 break;
545 case tok::r_brace:
546 if (LBraceStack.empty())
547 break;
548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
549 bool ProbablyBracedList = false;
550 if (Style.Language == FormatStyle::LK_Proto) {
551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
552 } else if (LBrace->isNot(TT_EnumLBrace)) {
553 // Using OriginalColumn to distinguish between ObjC methods and
554 // binary operators is a bit hacky.
555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
556 NextTok->OriginalColumn == 0;
558 // Try to detect a braced list. Note that regardless how we mark inner
559 // braces here, we will overwrite the BlockKind later if we parse a
560 // braced list (where all blocks inside are by default braced lists),
561 // or when we explicitly detect blocks (for example while parsing
562 // lambdas).
564 // If we already marked the opening brace as braced list, the closing
565 // must also be part of it.
566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
568 ProbablyBracedList = ProbablyBracedList ||
569 (Style.isJavaScript() &&
570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
571 Keywords.kw_as));
572 ProbablyBracedList =
573 ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
574 NextTok->is(tok::l_paren)));
576 // If there is a comma, semicolon or right paren after the closing
577 // brace, we assume this is a braced initializer list.
578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
579 // braced list in JS.
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
583 tok::r_paren, tok::r_square, tok::ellipsis);
585 // Distinguish between braced list in a constructor initializer list
586 // followed by constructor body, or just adjacent blocks.
587 ProbablyBracedList =
588 ProbablyBracedList ||
589 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
590 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
591 tok::greater));
593 ProbablyBracedList =
594 ProbablyBracedList ||
595 (NextTok->is(tok::identifier) &&
596 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
598 ProbablyBracedList = ProbablyBracedList ||
599 (NextTok->is(tok::semi) &&
600 (!ExpectClassBody || LBraceStack.size() != 1));
602 ProbablyBracedList =
603 ProbablyBracedList ||
604 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
606 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
607 // We can have an array subscript after a braced init
608 // list, but C++11 attributes are expected after blocks.
609 NextTok = Tokens->getNextToken();
610 ProbablyBracedList = NextTok->isNot(tok::l_square);
613 // Cpp macro definition body that is a nonempty braced list or block:
614 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
615 !FormatTok->Previous && NextTok->is(tok::eof) &&
616 // A statement can end with only `;` (simple statement), a block
617 // closing brace (compound statement), or `:` (label statement).
618 // If PrevTok is a block opening brace, Tok ends an empty block.
619 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
620 ProbablyBracedList = true;
623 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
624 Tok->setBlockKind(BlockKind);
625 LBrace->setBlockKind(BlockKind);
627 LBraceStack.pop_back();
628 break;
629 case tok::identifier:
630 if (Tok->isNot(TT_StatementMacro))
631 break;
632 [[fallthrough]];
633 case tok::at:
634 case tok::semi:
635 case tok::kw_if:
636 case tok::kw_while:
637 case tok::kw_for:
638 case tok::kw_switch:
639 case tok::kw_try:
640 case tok::kw___try:
641 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
642 LBraceStack.back().Tok->setBlockKind(BK_Block);
643 break;
644 default:
645 break;
648 PrevTok = Tok;
649 Tok = NextTok;
650 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
652 // Assume other blocks for all unclosed opening braces.
653 for (const auto &Entry : LBraceStack)
654 if (Entry.Tok->is(BK_Unknown))
655 Entry.Tok->setBlockKind(BK_Block);
657 FormatTok = Tokens->setPosition(StoredPosition);
660 // Sets the token type of the directly previous right brace.
661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
662 if (auto Prev = FormatTok->getPreviousNonComment();
663 Prev && Prev->is(tok::r_brace)) {
664 Prev->setFinalizedType(Type);
668 template <class T>
669 static inline void hash_combine(std::size_t &seed, const T &v) {
670 std::hash<T> hasher;
671 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
674 size_t UnwrappedLineParser::computePPHash() const {
675 size_t h = 0;
676 for (const auto &i : PPStack) {
677 hash_combine(h, size_t(i.Kind));
678 hash_combine(h, i.Line);
680 return h;
683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
684 // is not null, subtracts its length (plus the preceding space) when computing
685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
686 // running the token annotator on it so that we can restore them afterward.
687 bool UnwrappedLineParser::mightFitOnOneLine(
688 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
689 const auto ColumnLimit = Style.ColumnLimit;
690 if (ColumnLimit == 0)
691 return true;
693 auto &Tokens = ParsedLine.Tokens;
694 assert(!Tokens.empty());
696 const auto *LastToken = Tokens.back().Tok;
697 assert(LastToken);
699 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
701 int Index = 0;
702 for (const auto &Token : Tokens) {
703 assert(Token.Tok);
704 auto &SavedToken = SavedTokens[Index++];
705 SavedToken.Tok = new FormatToken;
706 SavedToken.Tok->copyFrom(*Token.Tok);
707 SavedToken.Children = std::move(Token.Children);
710 AnnotatedLine Line(ParsedLine);
711 assert(Line.Last == LastToken);
713 TokenAnnotator Annotator(Style, Keywords);
714 Annotator.annotate(Line);
715 Annotator.calculateFormattingInformation(Line);
717 auto Length = LastToken->TotalLength;
718 if (OpeningBrace) {
719 assert(OpeningBrace != Tokens.front().Tok);
720 if (auto Prev = OpeningBrace->Previous;
721 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
722 Length -= ColumnLimit;
724 Length -= OpeningBrace->TokenText.size() + 1;
727 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
728 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
729 Length -= FirstToken->TokenText.size() + 1;
732 Index = 0;
733 for (auto &Token : Tokens) {
734 const auto &SavedToken = SavedTokens[Index++];
735 Token.Tok->copyFrom(*SavedToken.Tok);
736 Token.Children = std::move(SavedToken.Children);
737 delete SavedToken.Tok;
740 // If these change PPLevel needs to be used for get correct indentation.
741 assert(!Line.InMacroBody);
742 assert(!Line.InPPDirective);
743 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
747 unsigned AddLevels, bool MunchSemi,
748 bool KeepBraces,
749 IfStmtKind *IfKind,
750 bool UnindentWhitesmithsBraces) {
751 auto HandleVerilogBlockLabel = [this]() {
752 // ":" name
753 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
754 nextToken();
755 if (Keywords.isVerilogIdentifier(*FormatTok))
756 nextToken();
760 // Whether this is a Verilog-specific block that has a special header like a
761 // module.
762 const bool VerilogHierarchy =
763 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
764 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
765 (Style.isVerilog() &&
766 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
767 "'{' or macro block token expected");
768 FormatToken *Tok = FormatTok;
769 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
770 auto Index = CurrentLines->size();
771 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
772 FormatTok->setBlockKind(BK_Block);
774 // For Whitesmiths mode, jump to the next level prior to skipping over the
775 // braces.
776 if (!VerilogHierarchy && AddLevels > 0 &&
777 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
778 ++Line->Level;
781 size_t PPStartHash = computePPHash();
783 const unsigned InitialLevel = Line->Level;
784 if (VerilogHierarchy) {
785 AddLevels += parseVerilogHierarchyHeader();
786 } else {
787 nextToken(/*LevelDifference=*/AddLevels);
788 HandleVerilogBlockLabel();
791 // Bail out if there are too many levels. Otherwise, the stack might overflow.
792 if (Line->Level > 300)
793 return nullptr;
795 if (MacroBlock && FormatTok->is(tok::l_paren))
796 parseParens();
798 size_t NbPreprocessorDirectives =
799 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
800 addUnwrappedLine();
801 size_t OpeningLineIndex =
802 CurrentLines->empty()
803 ? (UnwrappedLine::kInvalidIndex)
804 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
806 // Whitesmiths is weird here. The brace needs to be indented for the namespace
807 // block, but the block itself may not be indented depending on the style
808 // settings. This allows the format to back up one level in those cases.
809 if (UnindentWhitesmithsBraces)
810 --Line->Level;
812 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
813 MustBeDeclaration);
814 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
815 Line->Level += AddLevels;
817 FormatToken *IfLBrace = nullptr;
818 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
820 if (eof())
821 return IfLBrace;
823 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
824 : FormatTok->isNot(tok::r_brace)) {
825 Line->Level = InitialLevel;
826 FormatTok->setBlockKind(BK_Block);
827 return IfLBrace;
830 if (FormatTok->is(tok::r_brace)) {
831 FormatTok->setBlockKind(BK_Block);
832 if (Tok->is(TT_NamespaceLBrace))
833 FormatTok->setFinalizedType(TT_NamespaceRBrace);
836 const bool IsFunctionRBrace =
837 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
839 auto RemoveBraces = [=]() mutable {
840 if (!SimpleBlock)
841 return false;
842 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
843 assert(FormatTok->is(tok::r_brace));
844 const bool WrappedOpeningBrace = !Tok->Previous;
845 if (WrappedOpeningBrace && FollowedByComment)
846 return false;
847 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
848 if (KeepBraces && !HasRequiredIfBraces)
849 return false;
850 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
851 const FormatToken *Previous = Tokens->getPreviousToken();
852 assert(Previous);
853 if (Previous->is(tok::r_brace) && !Previous->Optional)
854 return false;
856 assert(!CurrentLines->empty());
857 auto &LastLine = CurrentLines->back();
858 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
859 return false;
860 if (Tok->is(TT_ElseLBrace))
861 return true;
862 if (WrappedOpeningBrace) {
863 assert(Index > 0);
864 --Index; // The line above the wrapped l_brace.
865 Tok = nullptr;
867 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
869 if (RemoveBraces()) {
870 Tok->MatchingParen = FormatTok;
871 FormatTok->MatchingParen = Tok;
874 size_t PPEndHash = computePPHash();
876 // Munch the closing brace.
877 nextToken(/*LevelDifference=*/-AddLevels);
879 // When this is a function block and there is an unnecessary semicolon
880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
881 // it later).
882 if (Style.RemoveSemicolon && IsFunctionRBrace) {
883 while (FormatTok->is(tok::semi)) {
884 FormatTok->Optional = true;
885 nextToken();
889 HandleVerilogBlockLabel();
891 if (MacroBlock && FormatTok->is(tok::l_paren))
892 parseParens();
894 Line->Level = InitialLevel;
896 if (FormatTok->is(tok::kw_noexcept)) {
897 // A noexcept in a requires expression.
898 nextToken();
901 if (FormatTok->is(tok::arrow)) {
902 // Following the } or noexcept we can find a trailing return type arrow
903 // as part of an implicit conversion constraint.
904 nextToken();
905 parseStructuralElement();
908 if (MunchSemi && FormatTok->is(tok::semi))
909 nextToken();
911 if (PPStartHash == PPEndHash) {
912 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
913 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
914 // Update the opening line to add the forward reference as well
915 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
916 CurrentLines->size() - 1;
920 return IfLBrace;
923 static bool isGoogScope(const UnwrappedLine &Line) {
924 // FIXME: Closure-library specific stuff should not be hard-coded but be
925 // configurable.
926 if (Line.Tokens.size() < 4)
927 return false;
928 auto I = Line.Tokens.begin();
929 if (I->Tok->TokenText != "goog")
930 return false;
931 ++I;
932 if (I->Tok->isNot(tok::period))
933 return false;
934 ++I;
935 if (I->Tok->TokenText != "scope")
936 return false;
937 ++I;
938 return I->Tok->is(tok::l_paren);
941 static bool isIIFE(const UnwrappedLine &Line,
942 const AdditionalKeywords &Keywords) {
943 // Look for the start of an immediately invoked anonymous function.
944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
945 // This is commonly done in JavaScript to create a new, anonymous scope.
946 // Example: (function() { ... })()
947 if (Line.Tokens.size() < 3)
948 return false;
949 auto I = Line.Tokens.begin();
950 if (I->Tok->isNot(tok::l_paren))
951 return false;
952 ++I;
953 if (I->Tok->isNot(Keywords.kw_function))
954 return false;
955 ++I;
956 return I->Tok->is(tok::l_paren);
959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
960 const FormatToken &InitialToken) {
961 tok::TokenKind Kind = InitialToken.Tok.getKind();
962 if (InitialToken.is(TT_NamespaceMacro))
963 Kind = tok::kw_namespace;
965 switch (Kind) {
966 case tok::kw_namespace:
967 return Style.BraceWrapping.AfterNamespace;
968 case tok::kw_class:
969 return Style.BraceWrapping.AfterClass;
970 case tok::kw_union:
971 return Style.BraceWrapping.AfterUnion;
972 case tok::kw_struct:
973 return Style.BraceWrapping.AfterStruct;
974 case tok::kw_enum:
975 return Style.BraceWrapping.AfterEnum;
976 default:
977 return false;
981 void UnwrappedLineParser::parseChildBlock() {
982 assert(FormatTok->is(tok::l_brace));
983 FormatTok->setBlockKind(BK_Block);
984 const FormatToken *OpeningBrace = FormatTok;
985 nextToken();
987 bool SkipIndent = (Style.isJavaScript() &&
988 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
989 ScopedLineState LineState(*this);
990 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
991 /*MustBeDeclaration=*/false);
992 Line->Level += SkipIndent ? 0 : 1;
993 parseLevel(OpeningBrace);
994 flushComments(isOnNewLine(*FormatTok));
995 Line->Level -= SkipIndent ? 0 : 1;
997 nextToken();
1000 void UnwrappedLineParser::parsePPDirective() {
1001 assert(FormatTok->is(tok::hash) && "'#' expected");
1002 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1004 nextToken();
1006 if (!FormatTok->Tok.getIdentifierInfo()) {
1007 parsePPUnknown();
1008 return;
1011 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1012 case tok::pp_define:
1013 parsePPDefine();
1014 return;
1015 case tok::pp_if:
1016 parsePPIf(/*IfDef=*/false);
1017 break;
1018 case tok::pp_ifdef:
1019 case tok::pp_ifndef:
1020 parsePPIf(/*IfDef=*/true);
1021 break;
1022 case tok::pp_else:
1023 case tok::pp_elifdef:
1024 case tok::pp_elifndef:
1025 case tok::pp_elif:
1026 parsePPElse();
1027 break;
1028 case tok::pp_endif:
1029 parsePPEndIf();
1030 break;
1031 case tok::pp_pragma:
1032 parsePPPragma();
1033 break;
1034 default:
1035 parsePPUnknown();
1036 break;
1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1041 size_t Line = CurrentLines->size();
1042 if (CurrentLines == &PreprocessorDirectives)
1043 Line += Lines.size();
1045 if (Unreachable ||
1046 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1047 PPStack.push_back({PP_Unreachable, Line});
1048 } else {
1049 PPStack.push_back({PP_Conditional, Line});
1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1054 ++PPBranchLevel;
1055 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1056 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1057 PPLevelBranchIndex.push_back(0);
1058 PPLevelBranchCount.push_back(0);
1060 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1061 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1062 conditionalCompilationCondition(Unreachable || Skip);
1065 void UnwrappedLineParser::conditionalCompilationAlternative() {
1066 if (!PPStack.empty())
1067 PPStack.pop_back();
1068 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1069 if (!PPChainBranchIndex.empty())
1070 ++PPChainBranchIndex.top();
1071 conditionalCompilationCondition(
1072 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1073 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1076 void UnwrappedLineParser::conditionalCompilationEnd() {
1077 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1078 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1079 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1080 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1082 // Guard against #endif's without #if.
1083 if (PPBranchLevel > -1)
1084 --PPBranchLevel;
1085 if (!PPChainBranchIndex.empty())
1086 PPChainBranchIndex.pop();
1087 if (!PPStack.empty())
1088 PPStack.pop_back();
1091 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1092 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1093 nextToken();
1094 bool Unreachable = false;
1095 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1096 Unreachable = true;
1097 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1098 Unreachable = true;
1099 conditionalCompilationStart(Unreachable);
1100 FormatToken *IfCondition = FormatTok;
1101 // If there's a #ifndef on the first line, and the only lines before it are
1102 // comments, it could be an include guard.
1103 bool MaybeIncludeGuard = IfNDef;
1104 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1105 for (auto &Line : Lines) {
1106 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1107 MaybeIncludeGuard = false;
1108 IncludeGuard = IG_Rejected;
1109 break;
1113 --PPBranchLevel;
1114 parsePPUnknown();
1115 ++PPBranchLevel;
1116 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1117 IncludeGuard = IG_IfNdefed;
1118 IncludeGuardToken = IfCondition;
1122 void UnwrappedLineParser::parsePPElse() {
1123 // If a potential include guard has an #else, it's not an include guard.
1124 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1125 IncludeGuard = IG_Rejected;
1126 // Don't crash when there is an #else without an #if.
1127 assert(PPBranchLevel >= -1);
1128 if (PPBranchLevel == -1)
1129 conditionalCompilationStart(/*Unreachable=*/true);
1130 conditionalCompilationAlternative();
1131 --PPBranchLevel;
1132 parsePPUnknown();
1133 ++PPBranchLevel;
1136 void UnwrappedLineParser::parsePPEndIf() {
1137 conditionalCompilationEnd();
1138 parsePPUnknown();
1139 // If the #endif of a potential include guard is the last thing in the file,
1140 // then we found an include guard.
1141 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1142 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1143 IncludeGuard = IG_Found;
1147 void UnwrappedLineParser::parsePPDefine() {
1148 nextToken();
1150 if (!FormatTok->Tok.getIdentifierInfo()) {
1151 IncludeGuard = IG_Rejected;
1152 IncludeGuardToken = nullptr;
1153 parsePPUnknown();
1154 return;
1157 if (IncludeGuard == IG_IfNdefed &&
1158 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1159 IncludeGuard = IG_Defined;
1160 IncludeGuardToken = nullptr;
1161 for (auto &Line : Lines) {
1162 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1163 IncludeGuard = IG_Rejected;
1164 break;
1169 // In the context of a define, even keywords should be treated as normal
1170 // identifiers. Setting the kind to identifier is not enough, because we need
1171 // to treat additional keywords like __except as well, which are already
1172 // identifiers. Setting the identifier info to null interferes with include
1173 // guard processing above, and changes preprocessing nesting.
1174 FormatTok->Tok.setKind(tok::identifier);
1175 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1176 nextToken();
1177 if (FormatTok->Tok.getKind() == tok::l_paren &&
1178 !FormatTok->hasWhitespaceBefore()) {
1179 parseParens();
1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1182 Line->Level += PPBranchLevel + 1;
1183 addUnwrappedLine();
1184 ++Line->Level;
1186 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1187 assert((int)Line->PPLevel >= 0);
1188 Line->InMacroBody = true;
1190 if (Style.SkipMacroDefinitionBody) {
1191 while (!eof()) {
1192 FormatTok->Finalized = true;
1193 FormatTok = Tokens->getNextToken();
1195 addUnwrappedLine();
1196 return;
1199 // Errors during a preprocessor directive can only affect the layout of the
1200 // preprocessor directive, and thus we ignore them. An alternative approach
1201 // would be to use the same approach we use on the file level (no
1202 // re-indentation if there was a structural error) within the macro
1203 // definition.
1204 parseFile();
1207 void UnwrappedLineParser::parsePPPragma() {
1208 Line->InPragmaDirective = true;
1209 parsePPUnknown();
1212 void UnwrappedLineParser::parsePPUnknown() {
1213 do {
1214 nextToken();
1215 } while (!eof());
1216 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1217 Line->Level += PPBranchLevel + 1;
1218 addUnwrappedLine();
1221 // Here we exclude certain tokens that are not usually the first token in an
1222 // unwrapped line. This is used in attempt to distinguish macro calls without
1223 // trailing semicolons from other constructs split to several lines.
1224 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1225 // Semicolon can be a null-statement, l_square can be a start of a macro or
1226 // a C++11 attribute, but this doesn't seem to be common.
1227 return !Tok.isOneOf(tok::semi, tok::l_brace,
1228 // Tokens that can only be used as binary operators and a
1229 // part of overloaded operator names.
1230 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1231 tok::less, tok::greater, tok::slash, tok::percent,
1232 tok::lessless, tok::greatergreater, tok::equal,
1233 tok::plusequal, tok::minusequal, tok::starequal,
1234 tok::slashequal, tok::percentequal, tok::ampequal,
1235 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1236 tok::lesslessequal,
1237 // Colon is used in labels, base class lists, initializer
1238 // lists, range-based for loops, ternary operator, but
1239 // should never be the first token in an unwrapped line.
1240 tok::colon,
1241 // 'noexcept' is a trailing annotation.
1242 tok::kw_noexcept);
1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1246 const FormatToken *FormatTok) {
1247 // FIXME: This returns true for C/C++ keywords like 'struct'.
1248 return FormatTok->is(tok::identifier) &&
1249 (!FormatTok->Tok.getIdentifierInfo() ||
1250 !FormatTok->isOneOf(
1251 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1252 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1253 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1254 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1255 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1256 Keywords.kw_instanceof, Keywords.kw_interface,
1257 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1261 const FormatToken *FormatTok) {
1262 return FormatTok->Tok.isLiteral() ||
1263 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1264 mustBeJSIdent(Keywords, FormatTok);
1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1268 // when encountered after a value (see mustBeJSIdentOrValue).
1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1270 const FormatToken *FormatTok) {
1271 return FormatTok->isOneOf(
1272 tok::kw_return, Keywords.kw_yield,
1273 // conditionals
1274 tok::kw_if, tok::kw_else,
1275 // loops
1276 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1277 // switch/case
1278 tok::kw_switch, tok::kw_case,
1279 // exceptions
1280 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1281 // declaration
1282 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1283 Keywords.kw_async, Keywords.kw_function,
1284 // import/export
1285 Keywords.kw_import, tok::kw_export);
1288 // Checks whether a token is a type in K&R C (aka C78).
1289 static bool isC78Type(const FormatToken &Tok) {
1290 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1291 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1292 tok::identifier);
1295 // This function checks whether a token starts the first parameter declaration
1296 // in a K&R C (aka C78) function definition, e.g.:
1297 // int f(a, b)
1298 // short a, b;
1299 // {
1300 // return a + b;
1301 // }
1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1303 const FormatToken *FuncName) {
1304 assert(Tok);
1305 assert(Next);
1306 assert(FuncName);
1308 if (FuncName->isNot(tok::identifier))
1309 return false;
1311 const FormatToken *Prev = FuncName->Previous;
1312 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1313 return false;
1315 if (!isC78Type(*Tok) &&
1316 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1317 return false;
1320 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1321 return false;
1323 Tok = Tok->Previous;
1324 if (!Tok || Tok->isNot(tok::r_paren))
1325 return false;
1327 Tok = Tok->Previous;
1328 if (!Tok || Tok->isNot(tok::identifier))
1329 return false;
1331 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1334 bool UnwrappedLineParser::parseModuleImport() {
1335 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1337 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1338 !Token->Tok.getIdentifierInfo() &&
1339 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1340 return false;
1343 nextToken();
1344 while (!eof()) {
1345 if (FormatTok->is(tok::colon)) {
1346 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1348 // Handle import <foo/bar.h> as we would an include statement.
1349 else if (FormatTok->is(tok::less)) {
1350 nextToken();
1351 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1352 // Mark tokens up to the trailing line comments as implicit string
1353 // literals.
1354 if (FormatTok->isNot(tok::comment) &&
1355 !FormatTok->TokenText.starts_with("//")) {
1356 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1358 nextToken();
1361 if (FormatTok->is(tok::semi)) {
1362 nextToken();
1363 break;
1365 nextToken();
1368 addUnwrappedLine();
1369 return true;
1372 // readTokenWithJavaScriptASI reads the next token and terminates the current
1373 // line if JavaScript Automatic Semicolon Insertion must
1374 // happen between the current token and the next token.
1376 // This method is conservative - it cannot cover all edge cases of JavaScript,
1377 // but only aims to correctly handle certain well known cases. It *must not*
1378 // return true in speculative cases.
1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1380 FormatToken *Previous = FormatTok;
1381 readToken();
1382 FormatToken *Next = FormatTok;
1384 bool IsOnSameLine =
1385 CommentsBeforeNextToken.empty()
1386 ? Next->NewlinesBefore == 0
1387 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1388 if (IsOnSameLine)
1389 return;
1391 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1392 bool PreviousStartsTemplateExpr =
1393 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1394 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1395 // If the line contains an '@' sign, the previous token might be an
1396 // annotation, which can precede another identifier/value.
1397 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1398 return LineNode.Tok->is(tok::at);
1400 if (HasAt)
1401 return;
1403 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1404 return addUnwrappedLine();
1405 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1406 bool NextEndsTemplateExpr =
1407 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1408 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1409 (PreviousMustBeValue ||
1410 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1411 tok::minusminus))) {
1412 return addUnwrappedLine();
1414 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1415 isJSDeclOrStmt(Keywords, Next)) {
1416 return addUnwrappedLine();
1420 void UnwrappedLineParser::parseStructuralElement(
1421 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1422 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1423 if (Style.Language == FormatStyle::LK_TableGen &&
1424 FormatTok->is(tok::pp_include)) {
1425 nextToken();
1426 if (FormatTok->is(tok::string_literal))
1427 nextToken();
1428 addUnwrappedLine();
1429 return;
1432 if (IsCpp) {
1433 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1435 } else if (Style.isVerilog()) {
1436 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1437 parseForOrWhileLoop(/*HasParens=*/false);
1438 return;
1440 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1441 parseForOrWhileLoop();
1442 return;
1444 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1445 Keywords.kw_assume, Keywords.kw_cover)) {
1446 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1447 return;
1450 // Skip things that can exist before keywords like 'if' and 'case'.
1451 while (true) {
1452 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1453 Keywords.kw_unique0)) {
1454 nextToken();
1455 } else if (FormatTok->is(tok::l_paren) &&
1456 Tokens->peekNextToken()->is(tok::star)) {
1457 parseParens();
1458 } else {
1459 break;
1464 // Tokens that only make sense at the beginning of a line.
1465 if (FormatTok->isAccessSpecifierKeyword()) {
1466 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1467 Style.isCSharp()) {
1468 nextToken();
1469 } else {
1470 parseAccessSpecifier();
1472 return;
1474 switch (FormatTok->Tok.getKind()) {
1475 case tok::kw_asm:
1476 nextToken();
1477 if (FormatTok->is(tok::l_brace)) {
1478 FormatTok->setFinalizedType(TT_InlineASMBrace);
1479 nextToken();
1480 while (FormatTok && !eof()) {
1481 if (FormatTok->is(tok::r_brace)) {
1482 FormatTok->setFinalizedType(TT_InlineASMBrace);
1483 nextToken();
1484 addUnwrappedLine();
1485 break;
1487 FormatTok->Finalized = true;
1488 nextToken();
1491 break;
1492 case tok::kw_namespace:
1493 parseNamespace();
1494 return;
1495 case tok::kw_if: {
1496 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497 // field/method declaration.
1498 break;
1500 FormatToken *Tok = parseIfThenElse(IfKind);
1501 if (IfLeftBrace)
1502 *IfLeftBrace = Tok;
1503 return;
1505 case tok::kw_for:
1506 case tok::kw_while:
1507 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1508 // field/method declaration.
1509 break;
1511 parseForOrWhileLoop();
1512 return;
1513 case tok::kw_do:
1514 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1515 // field/method declaration.
1516 break;
1518 parseDoWhile();
1519 if (HasDoWhile)
1520 *HasDoWhile = true;
1521 return;
1522 case tok::kw_switch:
1523 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524 // 'switch: string' field declaration.
1525 break;
1527 parseSwitch(/*IsExpr=*/false);
1528 return;
1529 case tok::kw_default: {
1530 // In Verilog default along with other labels are handled in the next loop.
1531 if (Style.isVerilog())
1532 break;
1533 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534 // 'default: string' field declaration.
1535 break;
1537 auto *Default = FormatTok;
1538 nextToken();
1539 if (FormatTok->is(tok::colon)) {
1540 FormatTok->setFinalizedType(TT_CaseLabelColon);
1541 parseLabel();
1542 return;
1544 if (FormatTok->is(tok::arrow)) {
1545 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1546 Default->setFinalizedType(TT_SwitchExpressionLabel);
1547 parseLabel();
1548 return;
1550 // e.g. "default void f() {}" in a Java interface.
1551 break;
1553 case tok::kw_case:
1554 // Proto: there are no switch/case statements.
1555 if (Style.Language == FormatStyle::LK_Proto) {
1556 nextToken();
1557 return;
1559 if (Style.isVerilog()) {
1560 parseBlock();
1561 addUnwrappedLine();
1562 return;
1564 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1565 // 'case: string' field declaration.
1566 nextToken();
1567 break;
1569 parseCaseLabel();
1570 return;
1571 case tok::kw_goto:
1572 nextToken();
1573 if (FormatTok->is(tok::kw_case))
1574 nextToken();
1575 break;
1576 case tok::kw_try:
1577 case tok::kw___try:
1578 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1579 // field/method declaration.
1580 break;
1582 parseTryCatch();
1583 return;
1584 case tok::kw_extern:
1585 nextToken();
1586 if (Style.isVerilog()) {
1587 // In Verilog and extern module declaration looks like a start of module.
1588 // But there is no body and endmodule. So we handle it separately.
1589 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1590 parseVerilogHierarchyHeader();
1591 return;
1593 } else if (FormatTok->is(tok::string_literal)) {
1594 nextToken();
1595 if (FormatTok->is(tok::l_brace)) {
1596 if (Style.BraceWrapping.AfterExternBlock)
1597 addUnwrappedLine();
1598 // Either we indent or for backwards compatibility we follow the
1599 // AfterExternBlock style.
1600 unsigned AddLevels =
1601 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1602 (Style.BraceWrapping.AfterExternBlock &&
1603 Style.IndentExternBlock ==
1604 FormatStyle::IEBS_AfterExternBlock)
1605 ? 1u
1606 : 0u;
1607 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1608 addUnwrappedLine();
1609 return;
1612 break;
1613 case tok::kw_export:
1614 if (Style.isJavaScript()) {
1615 parseJavaScriptEs6ImportExport();
1616 return;
1618 if (IsCpp) {
1619 nextToken();
1620 if (FormatTok->is(tok::kw_namespace)) {
1621 parseNamespace();
1622 return;
1624 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1625 return;
1627 break;
1628 case tok::kw_inline:
1629 nextToken();
1630 if (FormatTok->is(tok::kw_namespace)) {
1631 parseNamespace();
1632 return;
1634 break;
1635 case tok::identifier:
1636 if (FormatTok->is(TT_ForEachMacro)) {
1637 parseForOrWhileLoop();
1638 return;
1640 if (FormatTok->is(TT_MacroBlockBegin)) {
1641 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1642 /*MunchSemi=*/false);
1643 return;
1645 if (FormatTok->is(Keywords.kw_import)) {
1646 if (Style.isJavaScript()) {
1647 parseJavaScriptEs6ImportExport();
1648 return;
1650 if (Style.Language == FormatStyle::LK_Proto) {
1651 nextToken();
1652 if (FormatTok->is(tok::kw_public))
1653 nextToken();
1654 if (FormatTok->isNot(tok::string_literal))
1655 return;
1656 nextToken();
1657 if (FormatTok->is(tok::semi))
1658 nextToken();
1659 addUnwrappedLine();
1660 return;
1662 if (IsCpp && parseModuleImport())
1663 return;
1665 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1666 Keywords.kw_slots, Keywords.kw_qslots)) {
1667 nextToken();
1668 if (FormatTok->is(tok::colon)) {
1669 nextToken();
1670 addUnwrappedLine();
1671 return;
1674 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1675 parseStatementMacro();
1676 return;
1678 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1679 parseNamespace();
1680 return;
1682 // In Verilog labels can be any expression, so we don't do them here.
1683 // JS doesn't have macros, and within classes colons indicate fields, not
1684 // labels.
1685 // TableGen doesn't have labels.
1686 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1687 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1688 nextToken();
1689 if (!Line->InMacroBody || CurrentLines->size() > 1)
1690 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1691 FormatTok->setFinalizedType(TT_GotoLabelColon);
1692 parseLabel(!Style.IndentGotoLabels);
1693 if (HasLabel)
1694 *HasLabel = true;
1695 return;
1697 // In all other cases, parse the declaration.
1698 break;
1699 default:
1700 break;
1703 for (const bool InRequiresExpression =
1704 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1705 !eof();) {
1706 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1707 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1708 Next && Next->isBinaryOperator()) {
1709 FormatTok->Tok.setKind(tok::identifier);
1712 const FormatToken *Previous = FormatTok->Previous;
1713 switch (FormatTok->Tok.getKind()) {
1714 case tok::at:
1715 nextToken();
1716 if (FormatTok->is(tok::l_brace)) {
1717 nextToken();
1718 parseBracedList();
1719 break;
1720 } else if (Style.Language == FormatStyle::LK_Java &&
1721 FormatTok->is(Keywords.kw_interface)) {
1722 nextToken();
1723 break;
1725 switch (FormatTok->Tok.getObjCKeywordID()) {
1726 case tok::objc_public:
1727 case tok::objc_protected:
1728 case tok::objc_package:
1729 case tok::objc_private:
1730 return parseAccessSpecifier();
1731 case tok::objc_interface:
1732 case tok::objc_implementation:
1733 return parseObjCInterfaceOrImplementation();
1734 case tok::objc_protocol:
1735 if (parseObjCProtocol())
1736 return;
1737 break;
1738 case tok::objc_end:
1739 return; // Handled by the caller.
1740 case tok::objc_optional:
1741 case tok::objc_required:
1742 nextToken();
1743 addUnwrappedLine();
1744 return;
1745 case tok::objc_autoreleasepool:
1746 nextToken();
1747 if (FormatTok->is(tok::l_brace)) {
1748 if (Style.BraceWrapping.AfterControlStatement ==
1749 FormatStyle::BWACS_Always) {
1750 addUnwrappedLine();
1752 parseBlock();
1754 addUnwrappedLine();
1755 return;
1756 case tok::objc_synchronized:
1757 nextToken();
1758 if (FormatTok->is(tok::l_paren)) {
1759 // Skip synchronization object
1760 parseParens();
1762 if (FormatTok->is(tok::l_brace)) {
1763 if (Style.BraceWrapping.AfterControlStatement ==
1764 FormatStyle::BWACS_Always) {
1765 addUnwrappedLine();
1767 parseBlock();
1769 addUnwrappedLine();
1770 return;
1771 case tok::objc_try:
1772 // This branch isn't strictly necessary (the kw_try case below would
1773 // do this too after the tok::at is parsed above). But be explicit.
1774 parseTryCatch();
1775 return;
1776 default:
1777 break;
1779 break;
1780 case tok::kw_requires: {
1781 if (IsCpp) {
1782 bool ParsedClause = parseRequires();
1783 if (ParsedClause)
1784 return;
1785 } else {
1786 nextToken();
1788 break;
1790 case tok::kw_enum:
1791 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1792 // "template <..., enum ...>".
1793 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1794 nextToken();
1795 break;
1798 // parseEnum falls through and does not yet add an unwrapped line as an
1799 // enum definition can start a structural element.
1800 if (!parseEnum())
1801 break;
1802 // This only applies to C++ and Verilog.
1803 if (!IsCpp && !Style.isVerilog()) {
1804 addUnwrappedLine();
1805 return;
1807 break;
1808 case tok::kw_typedef:
1809 nextToken();
1810 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1811 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1812 Keywords.kw_CF_CLOSED_ENUM,
1813 Keywords.kw_NS_CLOSED_ENUM)) {
1814 parseEnum();
1816 break;
1817 case tok::kw_class:
1818 if (Style.isVerilog()) {
1819 parseBlock();
1820 addUnwrappedLine();
1821 return;
1823 if (Style.isTableGen()) {
1824 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1825 // This is same as def and so on.
1826 nextToken();
1827 break;
1829 [[fallthrough]];
1830 case tok::kw_struct:
1831 case tok::kw_union:
1832 if (parseStructLike())
1833 return;
1834 break;
1835 case tok::kw_decltype:
1836 nextToken();
1837 if (FormatTok->is(tok::l_paren)) {
1838 parseParens();
1839 assert(FormatTok->Previous);
1840 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1841 tok::l_paren)) {
1842 Line->SeenDecltypeAuto = true;
1845 break;
1846 case tok::period:
1847 nextToken();
1848 // In Java, classes have an implicit static member "class".
1849 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1850 FormatTok->is(tok::kw_class)) {
1851 nextToken();
1853 if (Style.isJavaScript() && FormatTok &&
1854 FormatTok->Tok.getIdentifierInfo()) {
1855 // JavaScript only has pseudo keywords, all keywords are allowed to
1856 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1857 nextToken();
1859 break;
1860 case tok::semi:
1861 nextToken();
1862 addUnwrappedLine();
1863 return;
1864 case tok::r_brace:
1865 addUnwrappedLine();
1866 return;
1867 case tok::l_paren: {
1868 parseParens();
1869 // Break the unwrapped line if a K&R C function definition has a parameter
1870 // declaration.
1871 if (OpeningBrace || !IsCpp || !Previous || eof())
1872 break;
1873 if (isC78ParameterDecl(FormatTok,
1874 Tokens->peekNextToken(/*SkipComment=*/true),
1875 Previous)) {
1876 addUnwrappedLine();
1877 return;
1879 break;
1881 case tok::kw_operator:
1882 nextToken();
1883 if (FormatTok->isBinaryOperator())
1884 nextToken();
1885 break;
1886 case tok::caret:
1887 nextToken();
1888 // Block return type.
1889 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1890 nextToken();
1891 // Return types: pointers are ok too.
1892 while (FormatTok->is(tok::star))
1893 nextToken();
1895 // Block argument list.
1896 if (FormatTok->is(tok::l_paren))
1897 parseParens();
1898 // Block body.
1899 if (FormatTok->is(tok::l_brace))
1900 parseChildBlock();
1901 break;
1902 case tok::l_brace:
1903 if (InRequiresExpression)
1904 FormatTok->setFinalizedType(TT_BracedListLBrace);
1905 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1906 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1907 // A block outside of parentheses must be the last part of a
1908 // structural element.
1909 // FIXME: Figure out cases where this is not true, and add projections
1910 // for them (the one we know is missing are lambdas).
1911 if (Style.Language == FormatStyle::LK_Java &&
1912 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1913 // If necessary, we could set the type to something different than
1914 // TT_FunctionLBrace.
1915 if (Style.BraceWrapping.AfterControlStatement ==
1916 FormatStyle::BWACS_Always) {
1917 addUnwrappedLine();
1919 } else if (Style.BraceWrapping.AfterFunction) {
1920 addUnwrappedLine();
1922 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1923 FormatTok->setFinalizedType(TT_FunctionLBrace);
1924 parseBlock();
1925 IsDecltypeAutoFunction = false;
1926 addUnwrappedLine();
1927 return;
1929 // Otherwise this was a braced init list, and the structural
1930 // element continues.
1931 break;
1932 case tok::kw_try:
1933 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1934 // field/method declaration.
1935 nextToken();
1936 break;
1938 // We arrive here when parsing function-try blocks.
1939 if (Style.BraceWrapping.AfterFunction)
1940 addUnwrappedLine();
1941 parseTryCatch();
1942 return;
1943 case tok::identifier: {
1944 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1945 Line->MustBeDeclaration) {
1946 addUnwrappedLine();
1947 parseCSharpGenericTypeConstraint();
1948 break;
1950 if (FormatTok->is(TT_MacroBlockEnd)) {
1951 addUnwrappedLine();
1952 return;
1955 // Function declarations (as opposed to function expressions) are parsed
1956 // on their own unwrapped line by continuing this loop. Function
1957 // expressions (functions that are not on their own line) must not create
1958 // a new unwrapped line, so they are special cased below.
1959 size_t TokenCount = Line->Tokens.size();
1960 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1961 (TokenCount > 1 ||
1962 (TokenCount == 1 &&
1963 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1964 tryToParseJSFunction();
1965 break;
1967 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1968 FormatTok->is(Keywords.kw_interface)) {
1969 if (Style.isJavaScript()) {
1970 // In JavaScript/TypeScript, "interface" can be used as a standalone
1971 // identifier, e.g. in `var interface = 1;`. If "interface" is
1972 // followed by another identifier, it is very like to be an actual
1973 // interface declaration.
1974 unsigned StoredPosition = Tokens->getPosition();
1975 FormatToken *Next = Tokens->getNextToken();
1976 FormatTok = Tokens->setPosition(StoredPosition);
1977 if (!mustBeJSIdent(Keywords, Next)) {
1978 nextToken();
1979 break;
1982 parseRecord();
1983 addUnwrappedLine();
1984 return;
1987 if (Style.isVerilog()) {
1988 if (FormatTok->is(Keywords.kw_table)) {
1989 parseVerilogTable();
1990 return;
1992 if (Keywords.isVerilogBegin(*FormatTok) ||
1993 Keywords.isVerilogHierarchy(*FormatTok)) {
1994 parseBlock();
1995 addUnwrappedLine();
1996 return;
2000 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
2001 if (parseStructLike())
2002 return;
2003 break;
2006 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2007 parseStatementMacro();
2008 return;
2011 // See if the following token should start a new unwrapped line.
2012 StringRef Text = FormatTok->TokenText;
2014 FormatToken *PreviousToken = FormatTok;
2015 nextToken();
2017 // JS doesn't have macros, and within classes colons indicate fields, not
2018 // labels.
2019 if (Style.isJavaScript())
2020 break;
2022 auto OneTokenSoFar = [&]() {
2023 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2024 while (I != E && I->Tok->is(tok::comment))
2025 ++I;
2026 if (Style.isVerilog())
2027 while (I != E && I->Tok->is(tok::hash))
2028 ++I;
2029 return I != E && (++I == E);
2031 if (OneTokenSoFar()) {
2032 // Recognize function-like macro usages without trailing semicolon as
2033 // well as free-standing macros like Q_OBJECT.
2034 bool FunctionLike = FormatTok->is(tok::l_paren);
2035 if (FunctionLike)
2036 parseParens();
2038 bool FollowedByNewline =
2039 CommentsBeforeNextToken.empty()
2040 ? FormatTok->NewlinesBefore > 0
2041 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2043 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2044 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2045 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2046 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2047 addUnwrappedLine();
2048 return;
2051 break;
2053 case tok::equal:
2054 if ((Style.isJavaScript() || Style.isCSharp()) &&
2055 FormatTok->is(TT_FatArrow)) {
2056 tryToParseChildBlock();
2057 break;
2060 nextToken();
2061 if (FormatTok->is(tok::l_brace)) {
2062 // Block kind should probably be set to BK_BracedInit for any language.
2063 // C# needs this change to ensure that array initialisers and object
2064 // initialisers are indented the same way.
2065 if (Style.isCSharp())
2066 FormatTok->setBlockKind(BK_BracedInit);
2067 // TableGen's defset statement has syntax of the form,
2068 // `defset <type> <name> = { <statement>... }`
2069 if (Style.isTableGen() &&
2070 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2071 FormatTok->setFinalizedType(TT_FunctionLBrace);
2072 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2073 /*MunchSemi=*/false);
2074 addUnwrappedLine();
2075 break;
2077 nextToken();
2078 parseBracedList();
2079 } else if (Style.Language == FormatStyle::LK_Proto &&
2080 FormatTok->is(tok::less)) {
2081 nextToken();
2082 parseBracedList(/*IsAngleBracket=*/true);
2084 break;
2085 case tok::l_square:
2086 parseSquare();
2087 break;
2088 case tok::kw_new:
2089 parseNew();
2090 break;
2091 case tok::kw_switch:
2092 if (Style.Language == FormatStyle::LK_Java)
2093 parseSwitch(/*IsExpr=*/true);
2094 else
2095 nextToken();
2096 break;
2097 case tok::kw_case:
2098 // Proto: there are no switch/case statements.
2099 if (Style.Language == FormatStyle::LK_Proto) {
2100 nextToken();
2101 return;
2103 // In Verilog switch is called case.
2104 if (Style.isVerilog()) {
2105 parseBlock();
2106 addUnwrappedLine();
2107 return;
2109 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2110 // 'case: string' field declaration.
2111 nextToken();
2112 break;
2114 parseCaseLabel();
2115 break;
2116 case tok::kw_default:
2117 nextToken();
2118 if (Style.isVerilog()) {
2119 if (FormatTok->is(tok::colon)) {
2120 // The label will be handled in the next iteration.
2121 break;
2123 if (FormatTok->is(Keywords.kw_clocking)) {
2124 // A default clocking block.
2125 parseBlock();
2126 addUnwrappedLine();
2127 return;
2129 parseVerilogCaseLabel();
2130 return;
2132 break;
2133 case tok::colon:
2134 nextToken();
2135 if (Style.isVerilog()) {
2136 parseVerilogCaseLabel();
2137 return;
2139 break;
2140 case tok::greater:
2141 nextToken();
2142 if (FormatTok->is(tok::l_brace))
2143 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2144 break;
2145 default:
2146 nextToken();
2147 break;
2152 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2153 assert(FormatTok->is(tok::l_brace));
2154 if (!Style.isCSharp())
2155 return false;
2156 // See if it's a property accessor.
2157 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier))
2158 return false;
2160 // See if we are inside a property accessor.
2162 // Record the current tokenPosition so that we can advance and
2163 // reset the current token. `Next` is not set yet so we need
2164 // another way to advance along the token stream.
2165 unsigned int StoredPosition = Tokens->getPosition();
2166 FormatToken *Tok = Tokens->getNextToken();
2168 // A trivial property accessor is of the form:
2169 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2170 // Track these as they do not require line breaks to be introduced.
2171 bool HasSpecialAccessor = false;
2172 bool IsTrivialPropertyAccessor = true;
2173 bool HasAttribute = false;
2174 while (!eof()) {
2175 if (const bool IsAccessorKeyword =
2176 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set);
2177 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2178 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) {
2179 if (IsAccessorKeyword)
2180 HasSpecialAccessor = true;
2181 else if (Tok->is(tok::l_square))
2182 HasAttribute = true;
2183 Tok = Tokens->getNextToken();
2184 continue;
2186 if (Tok->isNot(tok::r_brace))
2187 IsTrivialPropertyAccessor = false;
2188 break;
2191 if (!HasSpecialAccessor || HasAttribute) {
2192 Tokens->setPosition(StoredPosition);
2193 return false;
2196 // Try to parse the property accessor:
2197 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2198 Tokens->setPosition(StoredPosition);
2199 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2200 addUnwrappedLine();
2201 nextToken();
2202 do {
2203 switch (FormatTok->Tok.getKind()) {
2204 case tok::r_brace:
2205 nextToken();
2206 if (FormatTok->is(tok::equal)) {
2207 while (!eof() && FormatTok->isNot(tok::semi))
2208 nextToken();
2209 nextToken();
2211 addUnwrappedLine();
2212 return true;
2213 case tok::l_brace:
2214 ++Line->Level;
2215 parseBlock(/*MustBeDeclaration=*/true);
2216 addUnwrappedLine();
2217 --Line->Level;
2218 break;
2219 case tok::equal:
2220 if (FormatTok->is(TT_FatArrow)) {
2221 ++Line->Level;
2222 do {
2223 nextToken();
2224 } while (!eof() && FormatTok->isNot(tok::semi));
2225 nextToken();
2226 addUnwrappedLine();
2227 --Line->Level;
2228 break;
2230 nextToken();
2231 break;
2232 default:
2233 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2234 Keywords.kw_set) &&
2235 !IsTrivialPropertyAccessor) {
2236 // Non-trivial get/set needs to be on its own line.
2237 addUnwrappedLine();
2239 nextToken();
2241 } while (!eof());
2243 // Unreachable for well-formed code (paired '{' and '}').
2244 return true;
2247 bool UnwrappedLineParser::tryToParseLambda() {
2248 assert(FormatTok->is(tok::l_square));
2249 if (!IsCpp) {
2250 nextToken();
2251 return false;
2253 FormatToken &LSquare = *FormatTok;
2254 if (!tryToParseLambdaIntroducer())
2255 return false;
2257 bool SeenArrow = false;
2258 bool InTemplateParameterList = false;
2260 while (FormatTok->isNot(tok::l_brace)) {
2261 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2262 nextToken();
2263 continue;
2265 switch (FormatTok->Tok.getKind()) {
2266 case tok::l_brace:
2267 break;
2268 case tok::l_paren:
2269 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2270 break;
2271 case tok::l_square:
2272 parseSquare();
2273 break;
2274 case tok::less:
2275 assert(FormatTok->Previous);
2276 if (FormatTok->Previous->is(tok::r_square))
2277 InTemplateParameterList = true;
2278 nextToken();
2279 break;
2280 case tok::kw_auto:
2281 case tok::kw_class:
2282 case tok::kw_struct:
2283 case tok::kw_union:
2284 case tok::kw_template:
2285 case tok::kw_typename:
2286 case tok::amp:
2287 case tok::star:
2288 case tok::kw_const:
2289 case tok::kw_constexpr:
2290 case tok::kw_consteval:
2291 case tok::comma:
2292 case tok::greater:
2293 case tok::identifier:
2294 case tok::numeric_constant:
2295 case tok::coloncolon:
2296 case tok::kw_mutable:
2297 case tok::kw_noexcept:
2298 case tok::kw_static:
2299 nextToken();
2300 break;
2301 // Specialization of a template with an integer parameter can contain
2302 // arithmetic, logical, comparison and ternary operators.
2304 // FIXME: This also accepts sequences of operators that are not in the scope
2305 // of a template argument list.
2307 // In a C++ lambda a template type can only occur after an arrow. We use
2308 // this as an heuristic to distinguish between Objective-C expressions
2309 // followed by an `a->b` expression, such as:
2310 // ([obj func:arg] + a->b)
2311 // Otherwise the code below would parse as a lambda.
2312 case tok::plus:
2313 case tok::minus:
2314 case tok::exclaim:
2315 case tok::tilde:
2316 case tok::slash:
2317 case tok::percent:
2318 case tok::lessless:
2319 case tok::pipe:
2320 case tok::pipepipe:
2321 case tok::ampamp:
2322 case tok::caret:
2323 case tok::equalequal:
2324 case tok::exclaimequal:
2325 case tok::greaterequal:
2326 case tok::lessequal:
2327 case tok::question:
2328 case tok::colon:
2329 case tok::ellipsis:
2330 case tok::kw_true:
2331 case tok::kw_false:
2332 if (SeenArrow || InTemplateParameterList) {
2333 nextToken();
2334 break;
2336 return true;
2337 case tok::arrow:
2338 // This might or might not actually be a lambda arrow (this could be an
2339 // ObjC method invocation followed by a dereferencing arrow). We might
2340 // reset this back to TT_Unknown in TokenAnnotator.
2341 FormatTok->setFinalizedType(TT_LambdaArrow);
2342 SeenArrow = true;
2343 nextToken();
2344 break;
2345 case tok::kw_requires: {
2346 auto *RequiresToken = FormatTok;
2347 nextToken();
2348 parseRequiresClause(RequiresToken);
2349 break;
2351 case tok::equal:
2352 if (!InTemplateParameterList)
2353 return true;
2354 nextToken();
2355 break;
2356 default:
2357 return true;
2361 FormatTok->setFinalizedType(TT_LambdaLBrace);
2362 LSquare.setFinalizedType(TT_LambdaLSquare);
2364 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2365 parseChildBlock();
2366 assert(!NestedLambdas.empty());
2367 NestedLambdas.pop_back();
2369 return true;
2372 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2373 const FormatToken *Previous = FormatTok->Previous;
2374 const FormatToken *LeftSquare = FormatTok;
2375 nextToken();
2376 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2377 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2378 tok::kw_co_yield, tok::kw_co_return)) ||
2379 Previous->closesScope())) ||
2380 LeftSquare->isCppStructuredBinding(IsCpp)) {
2381 return false;
2383 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2384 return false;
2385 if (FormatTok->is(tok::r_square)) {
2386 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2387 if (Next->is(tok::greater))
2388 return false;
2390 parseSquare(/*LambdaIntroducer=*/true);
2391 return true;
2394 void UnwrappedLineParser::tryToParseJSFunction() {
2395 assert(FormatTok->is(Keywords.kw_function));
2396 if (FormatTok->is(Keywords.kw_async))
2397 nextToken();
2398 // Consume "function".
2399 nextToken();
2401 // Consume * (generator function). Treat it like C++'s overloaded operators.
2402 if (FormatTok->is(tok::star)) {
2403 FormatTok->setFinalizedType(TT_OverloadedOperator);
2404 nextToken();
2407 // Consume function name.
2408 if (FormatTok->is(tok::identifier))
2409 nextToken();
2411 if (FormatTok->isNot(tok::l_paren))
2412 return;
2414 // Parse formal parameter list.
2415 parseParens();
2417 if (FormatTok->is(tok::colon)) {
2418 // Parse a type definition.
2419 nextToken();
2421 // Eat the type declaration. For braced inline object types, balance braces,
2422 // otherwise just parse until finding an l_brace for the function body.
2423 if (FormatTok->is(tok::l_brace))
2424 tryToParseBracedList();
2425 else
2426 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2427 nextToken();
2430 if (FormatTok->is(tok::semi))
2431 return;
2433 parseChildBlock();
2436 bool UnwrappedLineParser::tryToParseBracedList() {
2437 if (FormatTok->is(BK_Unknown))
2438 calculateBraceTypes();
2439 assert(FormatTok->isNot(BK_Unknown));
2440 if (FormatTok->is(BK_Block))
2441 return false;
2442 nextToken();
2443 parseBracedList();
2444 return true;
2447 bool UnwrappedLineParser::tryToParseChildBlock() {
2448 assert(Style.isJavaScript() || Style.isCSharp());
2449 assert(FormatTok->is(TT_FatArrow));
2450 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2451 // They always start an expression or a child block if followed by a curly
2452 // brace.
2453 nextToken();
2454 if (FormatTok->isNot(tok::l_brace))
2455 return false;
2456 parseChildBlock();
2457 return true;
2460 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2461 assert(!IsAngleBracket || !IsEnum);
2462 bool HasError = false;
2464 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2465 // replace this by using parseAssignmentExpression() inside.
2466 do {
2467 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2468 tryToParseChildBlock()) {
2469 continue;
2471 if (Style.isJavaScript()) {
2472 if (FormatTok->is(Keywords.kw_function)) {
2473 tryToParseJSFunction();
2474 continue;
2476 if (FormatTok->is(tok::l_brace)) {
2477 // Could be a method inside of a braced list `{a() { return 1; }}`.
2478 if (tryToParseBracedList())
2479 continue;
2480 parseChildBlock();
2483 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2484 if (IsEnum) {
2485 FormatTok->setBlockKind(BK_Block);
2486 if (!Style.AllowShortEnumsOnASingleLine)
2487 addUnwrappedLine();
2489 nextToken();
2490 return !HasError;
2492 switch (FormatTok->Tok.getKind()) {
2493 case tok::l_square:
2494 if (Style.isCSharp())
2495 parseSquare();
2496 else
2497 tryToParseLambda();
2498 break;
2499 case tok::l_paren:
2500 parseParens();
2501 // JavaScript can just have free standing methods and getters/setters in
2502 // object literals. Detect them by a "{" following ")".
2503 if (Style.isJavaScript()) {
2504 if (FormatTok->is(tok::l_brace))
2505 parseChildBlock();
2506 break;
2508 break;
2509 case tok::l_brace:
2510 // Assume there are no blocks inside a braced init list apart
2511 // from the ones we explicitly parse out (like lambdas).
2512 FormatTok->setBlockKind(BK_BracedInit);
2513 if (!IsAngleBracket) {
2514 auto *Prev = FormatTok->Previous;
2515 if (Prev && Prev->is(tok::greater))
2516 Prev->setFinalizedType(TT_TemplateCloser);
2518 nextToken();
2519 parseBracedList();
2520 break;
2521 case tok::less:
2522 nextToken();
2523 if (IsAngleBracket)
2524 parseBracedList(/*IsAngleBracket=*/true);
2525 break;
2526 case tok::semi:
2527 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2528 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2529 // used for error recovery if we have otherwise determined that this is
2530 // a braced list.
2531 if (Style.isJavaScript()) {
2532 nextToken();
2533 break;
2535 HasError = true;
2536 if (!IsEnum)
2537 return false;
2538 nextToken();
2539 break;
2540 case tok::comma:
2541 nextToken();
2542 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2543 addUnwrappedLine();
2544 break;
2545 default:
2546 nextToken();
2547 break;
2549 } while (!eof());
2550 return false;
2553 /// \brief Parses a pair of parentheses (and everything between them).
2554 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2555 /// double ampersands. This applies for all nested scopes as well.
2557 /// Returns whether there is a `=` token between the parentheses.
2558 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2559 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2560 auto *LeftParen = FormatTok;
2561 bool SeenComma = false;
2562 bool SeenEqual = false;
2563 bool MightBeFoldExpr = false;
2564 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2565 nextToken();
2566 do {
2567 switch (FormatTok->Tok.getKind()) {
2568 case tok::l_paren:
2569 if (parseParens(AmpAmpTokenType))
2570 SeenEqual = true;
2571 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2572 parseChildBlock();
2573 break;
2574 case tok::r_paren: {
2575 auto *Prev = LeftParen->Previous;
2576 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2577 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2578 const auto *Next = Tokens->peekNextToken();
2579 const bool DoubleParens =
2580 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2581 const bool CommaSeparated =
2582 !DoubleParens && Prev && Prev->isOneOf(tok::l_paren, tok::comma) &&
2583 Next && Next->isOneOf(tok::comma, tok::r_paren);
2584 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2585 const bool Excluded =
2586 PrevPrev &&
2587 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2588 SeenComma ||
2589 (SeenEqual &&
2590 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2591 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2592 const bool ReturnParens =
2593 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2594 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2595 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2596 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2597 Next->is(tok::semi);
2598 if ((DoubleParens && !Excluded) || (CommaSeparated && !SeenComma) ||
2599 ReturnParens) {
2600 LeftParen->Optional = true;
2601 FormatTok->Optional = true;
2604 if (Prev) {
2605 if (Prev->is(TT_TypenameMacro)) {
2606 LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2607 FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2608 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2609 Prev->setFinalizedType(TT_TemplateCloser);
2612 nextToken();
2613 return SeenEqual;
2615 case tok::r_brace:
2616 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2617 return SeenEqual;
2618 case tok::l_square:
2619 tryToParseLambda();
2620 break;
2621 case tok::l_brace:
2622 if (!tryToParseBracedList())
2623 parseChildBlock();
2624 break;
2625 case tok::at:
2626 nextToken();
2627 if (FormatTok->is(tok::l_brace)) {
2628 nextToken();
2629 parseBracedList();
2631 break;
2632 case tok::comma:
2633 SeenComma = true;
2634 nextToken();
2635 break;
2636 case tok::ellipsis:
2637 MightBeFoldExpr = true;
2638 nextToken();
2639 break;
2640 case tok::equal:
2641 SeenEqual = true;
2642 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2643 tryToParseChildBlock();
2644 else
2645 nextToken();
2646 break;
2647 case tok::kw_class:
2648 if (Style.isJavaScript())
2649 parseRecord(/*ParseAsExpr=*/true);
2650 else
2651 nextToken();
2652 break;
2653 case tok::identifier:
2654 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2655 tryToParseJSFunction();
2656 else
2657 nextToken();
2658 break;
2659 case tok::kw_switch:
2660 if (Style.Language == FormatStyle::LK_Java)
2661 parseSwitch(/*IsExpr=*/true);
2662 else
2663 nextToken();
2664 break;
2665 case tok::kw_requires: {
2666 auto RequiresToken = FormatTok;
2667 nextToken();
2668 parseRequiresExpression(RequiresToken);
2669 break;
2671 case tok::ampamp:
2672 if (AmpAmpTokenType != TT_Unknown)
2673 FormatTok->setFinalizedType(AmpAmpTokenType);
2674 [[fallthrough]];
2675 default:
2676 nextToken();
2677 break;
2679 } while (!eof());
2680 return SeenEqual;
2683 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2684 if (!LambdaIntroducer) {
2685 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2686 if (tryToParseLambda())
2687 return;
2689 do {
2690 switch (FormatTok->Tok.getKind()) {
2691 case tok::l_paren:
2692 parseParens();
2693 break;
2694 case tok::r_square:
2695 nextToken();
2696 return;
2697 case tok::r_brace:
2698 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2699 return;
2700 case tok::l_square:
2701 parseSquare();
2702 break;
2703 case tok::l_brace: {
2704 if (!tryToParseBracedList())
2705 parseChildBlock();
2706 break;
2708 case tok::at:
2709 case tok::colon:
2710 nextToken();
2711 if (FormatTok->is(tok::l_brace)) {
2712 nextToken();
2713 parseBracedList();
2715 break;
2716 default:
2717 nextToken();
2718 break;
2720 } while (!eof());
2723 void UnwrappedLineParser::keepAncestorBraces() {
2724 if (!Style.RemoveBracesLLVM)
2725 return;
2727 const int MaxNestingLevels = 2;
2728 const int Size = NestedTooDeep.size();
2729 if (Size >= MaxNestingLevels)
2730 NestedTooDeep[Size - MaxNestingLevels] = true;
2731 NestedTooDeep.push_back(false);
2734 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2735 for (const auto &Token : llvm::reverse(Line.Tokens))
2736 if (Token.Tok->isNot(tok::comment))
2737 return Token.Tok;
2739 return nullptr;
2742 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2743 FormatToken *Tok = nullptr;
2745 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2746 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2747 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2748 ? getLastNonComment(*Line)
2749 : Line->Tokens.back().Tok;
2750 assert(Tok);
2751 if (Tok->BraceCount < 0) {
2752 assert(Tok->BraceCount == -1);
2753 Tok = nullptr;
2754 } else {
2755 Tok->BraceCount = -1;
2759 addUnwrappedLine();
2760 ++Line->Level;
2761 ++Line->UnbracedBodyLevel;
2762 parseStructuralElement();
2763 --Line->UnbracedBodyLevel;
2765 if (Tok) {
2766 assert(!Line->InPPDirective);
2767 Tok = nullptr;
2768 for (const auto &L : llvm::reverse(*CurrentLines)) {
2769 if (!L.InPPDirective && getLastNonComment(L)) {
2770 Tok = L.Tokens.back().Tok;
2771 break;
2774 assert(Tok);
2775 ++Tok->BraceCount;
2778 if (CheckEOF && eof())
2779 addUnwrappedLine();
2781 --Line->Level;
2784 static void markOptionalBraces(FormatToken *LeftBrace) {
2785 if (!LeftBrace)
2786 return;
2788 assert(LeftBrace->is(tok::l_brace));
2790 FormatToken *RightBrace = LeftBrace->MatchingParen;
2791 if (!RightBrace) {
2792 assert(!LeftBrace->Optional);
2793 return;
2796 assert(RightBrace->is(tok::r_brace));
2797 assert(RightBrace->MatchingParen == LeftBrace);
2798 assert(LeftBrace->Optional == RightBrace->Optional);
2800 LeftBrace->Optional = true;
2801 RightBrace->Optional = true;
2804 void UnwrappedLineParser::handleAttributes() {
2805 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2806 if (FormatTok->isAttribute())
2807 nextToken();
2808 else if (FormatTok->is(tok::l_square))
2809 handleCppAttributes();
2812 bool UnwrappedLineParser::handleCppAttributes() {
2813 // Handle [[likely]] / [[unlikely]] attributes.
2814 assert(FormatTok->is(tok::l_square));
2815 if (!tryToParseSimpleAttribute())
2816 return false;
2817 parseSquare();
2818 return true;
2821 /// Returns whether \c Tok begins a block.
2822 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2823 // FIXME: rename the function or make
2824 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2825 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2826 : Tok.is(tok::l_brace);
2829 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2830 bool KeepBraces,
2831 bool IsVerilogAssert) {
2832 assert((FormatTok->is(tok::kw_if) ||
2833 (Style.isVerilog() &&
2834 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2835 Keywords.kw_assume, Keywords.kw_cover))) &&
2836 "'if' expected");
2837 nextToken();
2839 if (IsVerilogAssert) {
2840 // Handle `assert #0` and `assert final`.
2841 if (FormatTok->is(Keywords.kw_verilogHash)) {
2842 nextToken();
2843 if (FormatTok->is(tok::numeric_constant))
2844 nextToken();
2845 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2846 Keywords.kw_sequence)) {
2847 nextToken();
2851 // TableGen's if statement has the form of `if <cond> then { ... }`.
2852 if (Style.isTableGen()) {
2853 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2854 // Simply skip until then. This range only contains a value.
2855 nextToken();
2859 // Handle `if !consteval`.
2860 if (FormatTok->is(tok::exclaim))
2861 nextToken();
2863 bool KeepIfBraces = true;
2864 if (FormatTok->is(tok::kw_consteval)) {
2865 nextToken();
2866 } else {
2867 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2868 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2869 nextToken();
2870 if (FormatTok->is(tok::l_paren)) {
2871 FormatTok->setFinalizedType(TT_ConditionLParen);
2872 parseParens();
2875 handleAttributes();
2876 // The then action is optional in Verilog assert statements.
2877 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2878 nextToken();
2879 addUnwrappedLine();
2880 return nullptr;
2883 bool NeedsUnwrappedLine = false;
2884 keepAncestorBraces();
2886 FormatToken *IfLeftBrace = nullptr;
2887 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2889 if (isBlockBegin(*FormatTok)) {
2890 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2891 IfLeftBrace = FormatTok;
2892 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2893 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2894 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2895 setPreviousRBraceType(TT_ControlStatementRBrace);
2896 if (Style.BraceWrapping.BeforeElse)
2897 addUnwrappedLine();
2898 else
2899 NeedsUnwrappedLine = true;
2900 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2901 addUnwrappedLine();
2902 } else {
2903 parseUnbracedBody();
2906 if (Style.RemoveBracesLLVM) {
2907 assert(!NestedTooDeep.empty());
2908 KeepIfBraces = KeepIfBraces ||
2909 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2910 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2911 IfBlockKind == IfStmtKind::IfElseIf;
2914 bool KeepElseBraces = KeepIfBraces;
2915 FormatToken *ElseLeftBrace = nullptr;
2916 IfStmtKind Kind = IfStmtKind::IfOnly;
2918 if (FormatTok->is(tok::kw_else)) {
2919 if (Style.RemoveBracesLLVM) {
2920 NestedTooDeep.back() = false;
2921 Kind = IfStmtKind::IfElse;
2923 nextToken();
2924 handleAttributes();
2925 if (isBlockBegin(*FormatTok)) {
2926 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2927 FormatTok->setFinalizedType(TT_ElseLBrace);
2928 ElseLeftBrace = FormatTok;
2929 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2930 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2931 FormatToken *IfLBrace =
2932 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2933 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2934 setPreviousRBraceType(TT_ElseRBrace);
2935 if (FormatTok->is(tok::kw_else)) {
2936 KeepElseBraces = KeepElseBraces ||
2937 ElseBlockKind == IfStmtKind::IfOnly ||
2938 ElseBlockKind == IfStmtKind::IfElseIf;
2939 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2940 KeepElseBraces = true;
2941 assert(ElseLeftBrace->MatchingParen);
2942 markOptionalBraces(ElseLeftBrace);
2944 addUnwrappedLine();
2945 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2946 const FormatToken *Previous = Tokens->getPreviousToken();
2947 assert(Previous);
2948 const bool IsPrecededByComment = Previous->is(tok::comment);
2949 if (IsPrecededByComment) {
2950 addUnwrappedLine();
2951 ++Line->Level;
2953 bool TooDeep = true;
2954 if (Style.RemoveBracesLLVM) {
2955 Kind = IfStmtKind::IfElseIf;
2956 TooDeep = NestedTooDeep.pop_back_val();
2958 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2959 if (Style.RemoveBracesLLVM)
2960 NestedTooDeep.push_back(TooDeep);
2961 if (IsPrecededByComment)
2962 --Line->Level;
2963 } else {
2964 parseUnbracedBody(/*CheckEOF=*/true);
2966 } else {
2967 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2968 if (NeedsUnwrappedLine)
2969 addUnwrappedLine();
2972 if (!Style.RemoveBracesLLVM)
2973 return nullptr;
2975 assert(!NestedTooDeep.empty());
2976 KeepElseBraces = KeepElseBraces ||
2977 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2978 NestedTooDeep.back();
2980 NestedTooDeep.pop_back();
2982 if (!KeepIfBraces && !KeepElseBraces) {
2983 markOptionalBraces(IfLeftBrace);
2984 markOptionalBraces(ElseLeftBrace);
2985 } else if (IfLeftBrace) {
2986 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2987 if (IfRightBrace) {
2988 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2989 assert(!IfLeftBrace->Optional);
2990 assert(!IfRightBrace->Optional);
2991 IfLeftBrace->MatchingParen = nullptr;
2992 IfRightBrace->MatchingParen = nullptr;
2996 if (IfKind)
2997 *IfKind = Kind;
2999 return IfLeftBrace;
3002 void UnwrappedLineParser::parseTryCatch() {
3003 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3004 nextToken();
3005 bool NeedsUnwrappedLine = false;
3006 bool HasCtorInitializer = false;
3007 if (FormatTok->is(tok::colon)) {
3008 auto *Colon = FormatTok;
3009 // We are in a function try block, what comes is an initializer list.
3010 nextToken();
3011 if (FormatTok->is(tok::identifier)) {
3012 HasCtorInitializer = true;
3013 Colon->setFinalizedType(TT_CtorInitializerColon);
3016 // In case identifiers were removed by clang-tidy, what might follow is
3017 // multiple commas in sequence - before the first identifier.
3018 while (FormatTok->is(tok::comma))
3019 nextToken();
3021 while (FormatTok->is(tok::identifier)) {
3022 nextToken();
3023 if (FormatTok->is(tok::l_paren)) {
3024 parseParens();
3025 } else if (FormatTok->is(tok::l_brace)) {
3026 nextToken();
3027 parseBracedList();
3030 // In case identifiers were removed by clang-tidy, what might follow is
3031 // multiple commas in sequence - after the first identifier.
3032 while (FormatTok->is(tok::comma))
3033 nextToken();
3036 // Parse try with resource.
3037 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3038 parseParens();
3040 keepAncestorBraces();
3042 if (FormatTok->is(tok::l_brace)) {
3043 if (HasCtorInitializer)
3044 FormatTok->setFinalizedType(TT_FunctionLBrace);
3045 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3046 parseBlock();
3047 if (Style.BraceWrapping.BeforeCatch)
3048 addUnwrappedLine();
3049 else
3050 NeedsUnwrappedLine = true;
3051 } else if (FormatTok->isNot(tok::kw_catch)) {
3052 // The C++ standard requires a compound-statement after a try.
3053 // If there's none, we try to assume there's a structuralElement
3054 // and try to continue.
3055 addUnwrappedLine();
3056 ++Line->Level;
3057 parseStructuralElement();
3058 --Line->Level;
3060 while (true) {
3061 if (FormatTok->is(tok::at))
3062 nextToken();
3063 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3064 tok::kw___finally) ||
3065 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3066 FormatTok->is(Keywords.kw_finally)) ||
3067 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3068 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3069 break;
3071 nextToken();
3072 while (FormatTok->isNot(tok::l_brace)) {
3073 if (FormatTok->is(tok::l_paren)) {
3074 parseParens();
3075 continue;
3077 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3078 if (Style.RemoveBracesLLVM)
3079 NestedTooDeep.pop_back();
3080 return;
3082 nextToken();
3084 NeedsUnwrappedLine = false;
3085 Line->MustBeDeclaration = false;
3086 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3087 parseBlock();
3088 if (Style.BraceWrapping.BeforeCatch)
3089 addUnwrappedLine();
3090 else
3091 NeedsUnwrappedLine = true;
3094 if (Style.RemoveBracesLLVM)
3095 NestedTooDeep.pop_back();
3097 if (NeedsUnwrappedLine)
3098 addUnwrappedLine();
3101 void UnwrappedLineParser::parseNamespace() {
3102 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3103 "'namespace' expected");
3105 const FormatToken &InitialToken = *FormatTok;
3106 nextToken();
3107 if (InitialToken.is(TT_NamespaceMacro)) {
3108 parseParens();
3109 } else {
3110 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3111 tok::l_square, tok::period, tok::l_paren) ||
3112 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3113 if (FormatTok->is(tok::l_square))
3114 parseSquare();
3115 else if (FormatTok->is(tok::l_paren))
3116 parseParens();
3117 else
3118 nextToken();
3121 if (FormatTok->is(tok::l_brace)) {
3122 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3124 if (ShouldBreakBeforeBrace(Style, InitialToken))
3125 addUnwrappedLine();
3127 unsigned AddLevels =
3128 Style.NamespaceIndentation == FormatStyle::NI_All ||
3129 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3130 DeclarationScopeStack.size() > 1)
3131 ? 1u
3132 : 0u;
3133 bool ManageWhitesmithsBraces =
3134 AddLevels == 0u &&
3135 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3137 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3138 // the whole block.
3139 if (ManageWhitesmithsBraces)
3140 ++Line->Level;
3142 // Munch the semicolon after a namespace. This is more common than one would
3143 // think. Putting the semicolon into its own line is very ugly.
3144 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3145 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3146 ManageWhitesmithsBraces);
3148 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3150 if (ManageWhitesmithsBraces)
3151 --Line->Level;
3153 // FIXME: Add error handling.
3156 void UnwrappedLineParser::parseNew() {
3157 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3158 nextToken();
3160 if (Style.isCSharp()) {
3161 do {
3162 // Handle constructor invocation, e.g. `new(field: value)`.
3163 if (FormatTok->is(tok::l_paren))
3164 parseParens();
3166 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3167 if (FormatTok->is(tok::l_brace))
3168 parseBracedList();
3170 if (FormatTok->isOneOf(tok::semi, tok::comma))
3171 return;
3173 nextToken();
3174 } while (!eof());
3177 if (Style.Language != FormatStyle::LK_Java)
3178 return;
3180 // In Java, we can parse everything up to the parens, which aren't optional.
3181 do {
3182 // There should not be a ;, { or } before the new's open paren.
3183 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3184 return;
3186 // Consume the parens.
3187 if (FormatTok->is(tok::l_paren)) {
3188 parseParens();
3190 // If there is a class body of an anonymous class, consume that as child.
3191 if (FormatTok->is(tok::l_brace))
3192 parseChildBlock();
3193 return;
3195 nextToken();
3196 } while (!eof());
3199 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3200 keepAncestorBraces();
3202 if (isBlockBegin(*FormatTok)) {
3203 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3204 FormatToken *LeftBrace = FormatTok;
3205 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3206 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3207 /*MunchSemi=*/true, KeepBraces);
3208 setPreviousRBraceType(TT_ControlStatementRBrace);
3209 if (!KeepBraces) {
3210 assert(!NestedTooDeep.empty());
3211 if (!NestedTooDeep.back())
3212 markOptionalBraces(LeftBrace);
3214 if (WrapRightBrace)
3215 addUnwrappedLine();
3216 } else {
3217 parseUnbracedBody();
3220 if (!KeepBraces)
3221 NestedTooDeep.pop_back();
3224 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3225 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3226 (Style.isVerilog() &&
3227 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3228 Keywords.kw_always_ff, Keywords.kw_always_latch,
3229 Keywords.kw_final, Keywords.kw_initial,
3230 Keywords.kw_foreach, Keywords.kw_forever,
3231 Keywords.kw_repeat))) &&
3232 "'for', 'while' or foreach macro expected");
3233 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3234 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3236 nextToken();
3237 // JS' for await ( ...
3238 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3239 nextToken();
3240 if (IsCpp && FormatTok->is(tok::kw_co_await))
3241 nextToken();
3242 if (HasParens && FormatTok->is(tok::l_paren)) {
3243 // The type is only set for Verilog basically because we were afraid to
3244 // change the existing behavior for loops. See the discussion on D121756 for
3245 // details.
3246 if (Style.isVerilog())
3247 FormatTok->setFinalizedType(TT_ConditionLParen);
3248 parseParens();
3251 if (Style.isVerilog()) {
3252 // Event control.
3253 parseVerilogSensitivityList();
3254 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3255 Tokens->getPreviousToken()->is(tok::r_paren)) {
3256 nextToken();
3257 addUnwrappedLine();
3258 return;
3261 handleAttributes();
3262 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3265 void UnwrappedLineParser::parseDoWhile() {
3266 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3267 nextToken();
3269 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3271 // FIXME: Add error handling.
3272 if (FormatTok->isNot(tok::kw_while)) {
3273 addUnwrappedLine();
3274 return;
3277 FormatTok->setFinalizedType(TT_DoWhile);
3279 // If in Whitesmiths mode, the line with the while() needs to be indented
3280 // to the same level as the block.
3281 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3282 ++Line->Level;
3284 nextToken();
3285 parseStructuralElement();
3288 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3289 nextToken();
3290 unsigned OldLineLevel = Line->Level;
3292 if (LeftAlignLabel)
3293 Line->Level = 0;
3294 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3295 --Line->Level;
3297 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3298 FormatTok->is(tok::l_brace)) {
3300 CompoundStatementIndenter Indenter(this, Line->Level,
3301 Style.BraceWrapping.AfterCaseLabel,
3302 Style.BraceWrapping.IndentBraces);
3303 parseBlock();
3304 if (FormatTok->is(tok::kw_break)) {
3305 if (Style.BraceWrapping.AfterControlStatement ==
3306 FormatStyle::BWACS_Always) {
3307 addUnwrappedLine();
3308 if (!Style.IndentCaseBlocks &&
3309 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3310 ++Line->Level;
3313 parseStructuralElement();
3315 addUnwrappedLine();
3316 } else {
3317 if (FormatTok->is(tok::semi))
3318 nextToken();
3319 addUnwrappedLine();
3321 Line->Level = OldLineLevel;
3322 if (FormatTok->isNot(tok::l_brace)) {
3323 parseStructuralElement();
3324 addUnwrappedLine();
3328 void UnwrappedLineParser::parseCaseLabel() {
3329 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3330 auto *Case = FormatTok;
3332 // FIXME: fix handling of complex expressions here.
3333 do {
3334 nextToken();
3335 if (FormatTok->is(tok::colon)) {
3336 FormatTok->setFinalizedType(TT_CaseLabelColon);
3337 break;
3339 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3340 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3341 Case->setFinalizedType(TT_SwitchExpressionLabel);
3342 break;
3344 } while (!eof());
3345 parseLabel();
3348 void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3349 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3350 nextToken();
3351 if (FormatTok->is(tok::l_paren))
3352 parseParens();
3354 keepAncestorBraces();
3356 if (FormatTok->is(tok::l_brace)) {
3357 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3358 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3359 : TT_ControlStatementLBrace);
3360 if (IsExpr)
3361 parseChildBlock();
3362 else
3363 parseBlock();
3364 setPreviousRBraceType(TT_ControlStatementRBrace);
3365 if (!IsExpr)
3366 addUnwrappedLine();
3367 } else {
3368 addUnwrappedLine();
3369 ++Line->Level;
3370 parseStructuralElement();
3371 --Line->Level;
3374 if (Style.RemoveBracesLLVM)
3375 NestedTooDeep.pop_back();
3378 // Operators that can follow a C variable.
3379 static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
3380 switch (Kind) {
3381 case tok::ampamp:
3382 case tok::ampequal:
3383 case tok::arrow:
3384 case tok::caret:
3385 case tok::caretequal:
3386 case tok::comma:
3387 case tok::ellipsis:
3388 case tok::equal:
3389 case tok::equalequal:
3390 case tok::exclaim:
3391 case tok::exclaimequal:
3392 case tok::greater:
3393 case tok::greaterequal:
3394 case tok::greatergreater:
3395 case tok::greatergreaterequal:
3396 case tok::l_paren:
3397 case tok::l_square:
3398 case tok::less:
3399 case tok::lessequal:
3400 case tok::lessless:
3401 case tok::lesslessequal:
3402 case tok::minus:
3403 case tok::minusequal:
3404 case tok::minusminus:
3405 case tok::percent:
3406 case tok::percentequal:
3407 case tok::period:
3408 case tok::pipe:
3409 case tok::pipeequal:
3410 case tok::pipepipe:
3411 case tok::plus:
3412 case tok::plusequal:
3413 case tok::plusplus:
3414 case tok::question:
3415 case tok::r_brace:
3416 case tok::r_paren:
3417 case tok::r_square:
3418 case tok::semi:
3419 case tok::slash:
3420 case tok::slashequal:
3421 case tok::star:
3422 case tok::starequal:
3423 return true;
3424 default:
3425 return false;
3429 void UnwrappedLineParser::parseAccessSpecifier() {
3430 FormatToken *AccessSpecifierCandidate = FormatTok;
3431 nextToken();
3432 // Understand Qt's slots.
3433 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3434 nextToken();
3435 // Otherwise, we don't know what it is, and we'd better keep the next token.
3436 if (FormatTok->is(tok::colon)) {
3437 nextToken();
3438 addUnwrappedLine();
3439 } else if (FormatTok->isNot(tok::coloncolon) &&
3440 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3441 // Not a variable name nor namespace name.
3442 addUnwrappedLine();
3443 } else if (AccessSpecifierCandidate) {
3444 // Consider the access specifier to be a C identifier.
3445 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3449 /// \brief Parses a requires, decides if it is a clause or an expression.
3450 /// \pre The current token has to be the requires keyword.
3451 /// \returns true if it parsed a clause.
3452 bool UnwrappedLineParser::parseRequires() {
3453 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3454 auto RequiresToken = FormatTok;
3456 // We try to guess if it is a requires clause, or a requires expression. For
3457 // that we first consume the keyword and check the next token.
3458 nextToken();
3460 switch (FormatTok->Tok.getKind()) {
3461 case tok::l_brace:
3462 // This can only be an expression, never a clause.
3463 parseRequiresExpression(RequiresToken);
3464 return false;
3465 case tok::l_paren:
3466 // Clauses and expression can start with a paren, it's unclear what we have.
3467 break;
3468 default:
3469 // All other tokens can only be a clause.
3470 parseRequiresClause(RequiresToken);
3471 return true;
3474 // Looking forward we would have to decide if there are function declaration
3475 // like arguments to the requires expression:
3476 // requires (T t) {
3477 // Or there is a constraint expression for the requires clause:
3478 // requires (C<T> && ...
3480 // But first let's look behind.
3481 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3483 if (!PreviousNonComment ||
3484 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3485 // If there is no token, or an expression left brace, we are a requires
3486 // clause within a requires expression.
3487 parseRequiresClause(RequiresToken);
3488 return true;
3491 switch (PreviousNonComment->Tok.getKind()) {
3492 case tok::greater:
3493 case tok::r_paren:
3494 case tok::kw_noexcept:
3495 case tok::kw_const:
3496 case tok::amp:
3497 // This is a requires clause.
3498 parseRequiresClause(RequiresToken);
3499 return true;
3500 case tok::ampamp: {
3501 // This can be either:
3502 // if (... && requires (T t) ...)
3503 // Or
3504 // void member(...) && requires (C<T> ...
3505 // We check the one token before that for a const:
3506 // void member(...) const && requires (C<T> ...
3507 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3508 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3509 parseRequiresClause(RequiresToken);
3510 return true;
3512 break;
3514 default:
3515 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3516 // This is a requires clause.
3517 parseRequiresClause(RequiresToken);
3518 return true;
3520 // It's an expression.
3521 parseRequiresExpression(RequiresToken);
3522 return false;
3525 // Now we look forward and try to check if the paren content is a parameter
3526 // list. The parameters can be cv-qualified and contain references or
3527 // pointers.
3528 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3529 // of stuff: typename, const, *, &, &&, ::, identifiers.
3531 unsigned StoredPosition = Tokens->getPosition();
3532 FormatToken *NextToken = Tokens->getNextToken();
3533 int Lookahead = 0;
3534 auto PeekNext = [&Lookahead, &NextToken, this] {
3535 ++Lookahead;
3536 NextToken = Tokens->getNextToken();
3539 bool FoundType = false;
3540 bool LastWasColonColon = false;
3541 int OpenAngles = 0;
3543 for (; Lookahead < 50; PeekNext()) {
3544 switch (NextToken->Tok.getKind()) {
3545 case tok::kw_volatile:
3546 case tok::kw_const:
3547 case tok::comma:
3548 if (OpenAngles == 0) {
3549 FormatTok = Tokens->setPosition(StoredPosition);
3550 parseRequiresExpression(RequiresToken);
3551 return false;
3553 break;
3554 case tok::eof:
3555 // Break out of the loop.
3556 Lookahead = 50;
3557 break;
3558 case tok::coloncolon:
3559 LastWasColonColon = true;
3560 break;
3561 case tok::kw_decltype:
3562 case tok::identifier:
3563 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3564 FormatTok = Tokens->setPosition(StoredPosition);
3565 parseRequiresExpression(RequiresToken);
3566 return false;
3568 FoundType = true;
3569 LastWasColonColon = false;
3570 break;
3571 case tok::less:
3572 ++OpenAngles;
3573 break;
3574 case tok::greater:
3575 --OpenAngles;
3576 break;
3577 default:
3578 if (NextToken->isTypeName(LangOpts)) {
3579 FormatTok = Tokens->setPosition(StoredPosition);
3580 parseRequiresExpression(RequiresToken);
3581 return false;
3583 break;
3586 // This seems to be a complicated expression, just assume it's a clause.
3587 FormatTok = Tokens->setPosition(StoredPosition);
3588 parseRequiresClause(RequiresToken);
3589 return true;
3592 /// \brief Parses a requires clause.
3593 /// \param RequiresToken The requires keyword token, which starts this clause.
3594 /// \pre We need to be on the next token after the requires keyword.
3595 /// \sa parseRequiresExpression
3597 /// Returns if it either has finished parsing the clause, or it detects, that
3598 /// the clause is incorrect.
3599 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3600 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3601 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3603 // If there is no previous token, we are within a requires expression,
3604 // otherwise we will always have the template or function declaration in front
3605 // of it.
3606 bool InRequiresExpression =
3607 !RequiresToken->Previous ||
3608 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3610 RequiresToken->setFinalizedType(InRequiresExpression
3611 ? TT_RequiresClauseInARequiresExpression
3612 : TT_RequiresClause);
3614 // NOTE: parseConstraintExpression is only ever called from this function.
3615 // It could be inlined into here.
3616 parseConstraintExpression();
3618 if (!InRequiresExpression)
3619 FormatTok->Previous->ClosesRequiresClause = true;
3622 /// \brief Parses a requires expression.
3623 /// \param RequiresToken The requires keyword token, which starts this clause.
3624 /// \pre We need to be on the next token after the requires keyword.
3625 /// \sa parseRequiresClause
3627 /// Returns if it either has finished parsing the expression, or it detects,
3628 /// that the expression is incorrect.
3629 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3630 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3631 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3633 RequiresToken->setFinalizedType(TT_RequiresExpression);
3635 if (FormatTok->is(tok::l_paren)) {
3636 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3637 parseParens();
3640 if (FormatTok->is(tok::l_brace)) {
3641 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3642 parseChildBlock();
3646 /// \brief Parses a constraint expression.
3648 /// This is the body of a requires clause. It returns, when the parsing is
3649 /// complete, or the expression is incorrect.
3650 void UnwrappedLineParser::parseConstraintExpression() {
3651 // The special handling for lambdas is needed since tryToParseLambda() eats a
3652 // token and if a requires expression is the last part of a requires clause
3653 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3654 // not set on the correct token. Thus we need to be aware if we even expect a
3655 // lambda to be possible.
3656 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3657 bool LambdaNextTimeAllowed = true;
3659 // Within lambda declarations, it is permitted to put a requires clause after
3660 // its template parameter list, which would place the requires clause right
3661 // before the parentheses of the parameters of the lambda declaration. Thus,
3662 // we track if we expect to see grouping parentheses at all.
3663 // Without this check, `requires foo<T> (T t)` in the below example would be
3664 // seen as the whole requires clause, accidentally eating the parameters of
3665 // the lambda.
3666 // [&]<typename T> requires foo<T> (T t) { ... };
3667 bool TopLevelParensAllowed = true;
3669 do {
3670 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3672 switch (FormatTok->Tok.getKind()) {
3673 case tok::kw_requires: {
3674 auto RequiresToken = FormatTok;
3675 nextToken();
3676 parseRequiresExpression(RequiresToken);
3677 break;
3680 case tok::l_paren:
3681 if (!TopLevelParensAllowed)
3682 return;
3683 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3684 TopLevelParensAllowed = false;
3685 break;
3687 case tok::l_square:
3688 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3689 return;
3690 break;
3692 case tok::kw_const:
3693 case tok::semi:
3694 case tok::kw_class:
3695 case tok::kw_struct:
3696 case tok::kw_union:
3697 return;
3699 case tok::l_brace:
3700 // Potential function body.
3701 return;
3703 case tok::ampamp:
3704 case tok::pipepipe:
3705 FormatTok->setFinalizedType(TT_BinaryOperator);
3706 nextToken();
3707 LambdaNextTimeAllowed = true;
3708 TopLevelParensAllowed = true;
3709 break;
3711 case tok::comma:
3712 case tok::comment:
3713 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3714 nextToken();
3715 break;
3717 case tok::kw_sizeof:
3718 case tok::greater:
3719 case tok::greaterequal:
3720 case tok::greatergreater:
3721 case tok::less:
3722 case tok::lessequal:
3723 case tok::lessless:
3724 case tok::equalequal:
3725 case tok::exclaim:
3726 case tok::exclaimequal:
3727 case tok::plus:
3728 case tok::minus:
3729 case tok::star:
3730 case tok::slash:
3731 LambdaNextTimeAllowed = true;
3732 TopLevelParensAllowed = true;
3733 // Just eat them.
3734 nextToken();
3735 break;
3737 case tok::numeric_constant:
3738 case tok::coloncolon:
3739 case tok::kw_true:
3740 case tok::kw_false:
3741 TopLevelParensAllowed = false;
3742 // Just eat them.
3743 nextToken();
3744 break;
3746 case tok::kw_static_cast:
3747 case tok::kw_const_cast:
3748 case tok::kw_reinterpret_cast:
3749 case tok::kw_dynamic_cast:
3750 nextToken();
3751 if (FormatTok->isNot(tok::less))
3752 return;
3754 nextToken();
3755 parseBracedList(/*IsAngleBracket=*/true);
3756 break;
3758 default:
3759 if (!FormatTok->Tok.getIdentifierInfo()) {
3760 // Identifiers are part of the default case, we check for more then
3761 // tok::identifier to handle builtin type traits.
3762 return;
3765 // We need to differentiate identifiers for a template deduction guide,
3766 // variables, or function return types (the constraint expression has
3767 // ended before that), and basically all other cases. But it's easier to
3768 // check the other way around.
3769 assert(FormatTok->Previous);
3770 switch (FormatTok->Previous->Tok.getKind()) {
3771 case tok::coloncolon: // Nested identifier.
3772 case tok::ampamp: // Start of a function or variable for the
3773 case tok::pipepipe: // constraint expression. (binary)
3774 case tok::exclaim: // The same as above, but unary.
3775 case tok::kw_requires: // Initial identifier of a requires clause.
3776 case tok::equal: // Initial identifier of a concept declaration.
3777 break;
3778 default:
3779 return;
3782 // Read identifier with optional template declaration.
3783 nextToken();
3784 if (FormatTok->is(tok::less)) {
3785 nextToken();
3786 parseBracedList(/*IsAngleBracket=*/true);
3788 TopLevelParensAllowed = false;
3789 break;
3791 } while (!eof());
3794 bool UnwrappedLineParser::parseEnum() {
3795 const FormatToken &InitialToken = *FormatTok;
3797 // Won't be 'enum' for NS_ENUMs.
3798 if (FormatTok->is(tok::kw_enum))
3799 nextToken();
3801 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3802 // declarations. An "enum" keyword followed by a colon would be a syntax
3803 // error and thus assume it is just an identifier.
3804 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3805 return false;
3807 // In protobuf, "enum" can be used as a field name.
3808 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3809 return false;
3811 if (IsCpp) {
3812 // Eat up enum class ...
3813 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3814 nextToken();
3815 while (FormatTok->is(tok::l_square))
3816 if (!handleCppAttributes())
3817 return false;
3820 while (FormatTok->Tok.getIdentifierInfo() ||
3821 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3822 tok::greater, tok::comma, tok::question,
3823 tok::l_square)) {
3824 if (Style.isVerilog()) {
3825 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3826 nextToken();
3827 // In Verilog the base type can have dimensions.
3828 while (FormatTok->is(tok::l_square))
3829 parseSquare();
3830 } else {
3831 nextToken();
3833 // We can have macros or attributes in between 'enum' and the enum name.
3834 if (FormatTok->is(tok::l_paren))
3835 parseParens();
3836 if (FormatTok->is(tok::identifier)) {
3837 nextToken();
3838 // If there are two identifiers in a row, this is likely an elaborate
3839 // return type. In Java, this can be "implements", etc.
3840 if (IsCpp && FormatTok->is(tok::identifier))
3841 return false;
3845 // Just a declaration or something is wrong.
3846 if (FormatTok->isNot(tok::l_brace))
3847 return true;
3848 FormatTok->setFinalizedType(TT_EnumLBrace);
3849 FormatTok->setBlockKind(BK_Block);
3851 if (Style.Language == FormatStyle::LK_Java) {
3852 // Java enums are different.
3853 parseJavaEnumBody();
3854 return true;
3856 if (Style.Language == FormatStyle::LK_Proto) {
3857 parseBlock(/*MustBeDeclaration=*/true);
3858 return true;
3861 if (!Style.AllowShortEnumsOnASingleLine &&
3862 ShouldBreakBeforeBrace(Style, InitialToken)) {
3863 addUnwrappedLine();
3865 // Parse enum body.
3866 nextToken();
3867 if (!Style.AllowShortEnumsOnASingleLine) {
3868 addUnwrappedLine();
3869 Line->Level += 1;
3871 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3872 if (!Style.AllowShortEnumsOnASingleLine)
3873 Line->Level -= 1;
3874 if (HasError) {
3875 if (FormatTok->is(tok::semi))
3876 nextToken();
3877 addUnwrappedLine();
3879 setPreviousRBraceType(TT_EnumRBrace);
3880 return true;
3882 // There is no addUnwrappedLine() here so that we fall through to parsing a
3883 // structural element afterwards. Thus, in "enum A {} n, m;",
3884 // "} n, m;" will end up in one unwrapped line.
3887 bool UnwrappedLineParser::parseStructLike() {
3888 // parseRecord falls through and does not yet add an unwrapped line as a
3889 // record declaration or definition can start a structural element.
3890 parseRecord();
3891 // This does not apply to Java, JavaScript and C#.
3892 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3893 Style.isCSharp()) {
3894 if (FormatTok->is(tok::semi))
3895 nextToken();
3896 addUnwrappedLine();
3897 return true;
3899 return false;
3902 namespace {
3903 // A class used to set and restore the Token position when peeking
3904 // ahead in the token source.
3905 class ScopedTokenPosition {
3906 unsigned StoredPosition;
3907 FormatTokenSource *Tokens;
3909 public:
3910 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3911 assert(Tokens && "Tokens expected to not be null");
3912 StoredPosition = Tokens->getPosition();
3915 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3917 } // namespace
3919 // Look to see if we have [[ by looking ahead, if
3920 // its not then rewind to the original position.
3921 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3922 ScopedTokenPosition AutoPosition(Tokens);
3923 FormatToken *Tok = Tokens->getNextToken();
3924 // We already read the first [ check for the second.
3925 if (Tok->isNot(tok::l_square))
3926 return false;
3927 // Double check that the attribute is just something
3928 // fairly simple.
3929 while (Tok->isNot(tok::eof)) {
3930 if (Tok->is(tok::r_square))
3931 break;
3932 Tok = Tokens->getNextToken();
3934 if (Tok->is(tok::eof))
3935 return false;
3936 Tok = Tokens->getNextToken();
3937 if (Tok->isNot(tok::r_square))
3938 return false;
3939 Tok = Tokens->getNextToken();
3940 if (Tok->is(tok::semi))
3941 return false;
3942 return true;
3945 void UnwrappedLineParser::parseJavaEnumBody() {
3946 assert(FormatTok->is(tok::l_brace));
3947 const FormatToken *OpeningBrace = FormatTok;
3949 // Determine whether the enum is simple, i.e. does not have a semicolon or
3950 // constants with class bodies. Simple enums can be formatted like braced
3951 // lists, contracted to a single line, etc.
3952 unsigned StoredPosition = Tokens->getPosition();
3953 bool IsSimple = true;
3954 FormatToken *Tok = Tokens->getNextToken();
3955 while (Tok->isNot(tok::eof)) {
3956 if (Tok->is(tok::r_brace))
3957 break;
3958 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3959 IsSimple = false;
3960 break;
3962 // FIXME: This will also mark enums with braces in the arguments to enum
3963 // constants as "not simple". This is probably fine in practice, though.
3964 Tok = Tokens->getNextToken();
3966 FormatTok = Tokens->setPosition(StoredPosition);
3968 if (IsSimple) {
3969 nextToken();
3970 parseBracedList();
3971 addUnwrappedLine();
3972 return;
3975 // Parse the body of a more complex enum.
3976 // First add a line for everything up to the "{".
3977 nextToken();
3978 addUnwrappedLine();
3979 ++Line->Level;
3981 // Parse the enum constants.
3982 while (!eof()) {
3983 if (FormatTok->is(tok::l_brace)) {
3984 // Parse the constant's class body.
3985 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3986 /*MunchSemi=*/false);
3987 } else if (FormatTok->is(tok::l_paren)) {
3988 parseParens();
3989 } else if (FormatTok->is(tok::comma)) {
3990 nextToken();
3991 addUnwrappedLine();
3992 } else if (FormatTok->is(tok::semi)) {
3993 nextToken();
3994 addUnwrappedLine();
3995 break;
3996 } else if (FormatTok->is(tok::r_brace)) {
3997 addUnwrappedLine();
3998 break;
3999 } else {
4000 nextToken();
4004 // Parse the class body after the enum's ";" if any.
4005 parseLevel(OpeningBrace);
4006 nextToken();
4007 --Line->Level;
4008 addUnwrappedLine();
4011 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
4012 const FormatToken &InitialToken = *FormatTok;
4013 nextToken();
4015 const FormatToken *ClassName = nullptr;
4016 bool IsDerived = false;
4017 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4018 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4020 // JavaScript/TypeScript supports anonymous classes like:
4021 // a = class extends foo { }
4022 bool JSPastExtendsOrImplements = false;
4023 // The actual identifier can be a nested name specifier, and in macros
4024 // it is often token-pasted.
4025 // An [[attribute]] can be before the identifier.
4026 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4027 tok::kw_alignas, tok::l_square) ||
4028 FormatTok->isAttribute() ||
4029 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4030 FormatTok->isOneOf(tok::period, tok::comma))) {
4031 if (Style.isJavaScript() &&
4032 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4033 JSPastExtendsOrImplements = true;
4034 // JavaScript/TypeScript supports inline object types in
4035 // extends/implements positions:
4036 // class Foo implements {bar: number} { }
4037 nextToken();
4038 if (FormatTok->is(tok::l_brace)) {
4039 tryToParseBracedList();
4040 continue;
4043 if (FormatTok->is(tok::l_square) && handleCppAttributes())
4044 continue;
4045 const auto *Previous = FormatTok;
4046 nextToken();
4047 switch (FormatTok->Tok.getKind()) {
4048 case tok::l_paren:
4049 // We can have macros in between 'class' and the class name.
4050 if (!IsNonMacroIdentifier(Previous) ||
4051 // e.g. `struct macro(a) S { int i; };`
4052 Previous->Previous == &InitialToken) {
4053 parseParens();
4055 break;
4056 case tok::coloncolon:
4057 case tok::hashhash:
4058 break;
4059 default:
4060 if (!JSPastExtendsOrImplements && !ClassName &&
4061 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4062 ClassName = Previous;
4067 auto IsListInitialization = [&] {
4068 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4069 return false;
4070 assert(FormatTok->is(tok::l_brace));
4071 const auto *Prev = FormatTok->getPreviousNonComment();
4072 assert(Prev);
4073 return Prev != ClassName && Prev->is(tok::identifier) &&
4074 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4077 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4078 int AngleNestingLevel = 0;
4079 do {
4080 if (FormatTok->is(tok::less))
4081 ++AngleNestingLevel;
4082 else if (FormatTok->is(tok::greater))
4083 --AngleNestingLevel;
4085 if (AngleNestingLevel == 0) {
4086 if (FormatTok->is(tok::colon)) {
4087 IsDerived = true;
4088 } else if (FormatTok->is(tok::identifier) &&
4089 FormatTok->Previous->is(tok::coloncolon)) {
4090 ClassName = FormatTok;
4091 } else if (FormatTok->is(tok::l_paren) &&
4092 IsNonMacroIdentifier(FormatTok->Previous)) {
4093 break;
4096 if (FormatTok->is(tok::l_brace)) {
4097 if (AngleNestingLevel == 0 && IsListInitialization())
4098 return;
4099 calculateBraceTypes(/*ExpectClassBody=*/true);
4100 if (!tryToParseBracedList())
4101 break;
4103 if (FormatTok->is(tok::l_square)) {
4104 FormatToken *Previous = FormatTok->Previous;
4105 if (!Previous || (Previous->isNot(tok::r_paren) &&
4106 !Previous->isTypeOrIdentifier(LangOpts))) {
4107 // Don't try parsing a lambda if we had a closing parenthesis before,
4108 // it was probably a pointer to an array: int (*)[].
4109 if (!tryToParseLambda())
4110 continue;
4111 } else {
4112 parseSquare();
4113 continue;
4116 if (FormatTok->is(tok::semi))
4117 return;
4118 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4119 addUnwrappedLine();
4120 nextToken();
4121 parseCSharpGenericTypeConstraint();
4122 break;
4124 nextToken();
4125 } while (!eof());
4128 auto GetBraceTypes =
4129 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4130 switch (RecordTok.Tok.getKind()) {
4131 case tok::kw_class:
4132 return {TT_ClassLBrace, TT_ClassRBrace};
4133 case tok::kw_struct:
4134 return {TT_StructLBrace, TT_StructRBrace};
4135 case tok::kw_union:
4136 return {TT_UnionLBrace, TT_UnionRBrace};
4137 default:
4138 // Useful for e.g. interface.
4139 return {TT_RecordLBrace, TT_RecordRBrace};
4142 if (FormatTok->is(tok::l_brace)) {
4143 if (IsListInitialization())
4144 return;
4145 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4146 FormatTok->setFinalizedType(OpenBraceType);
4147 if (ParseAsExpr) {
4148 parseChildBlock();
4149 } else {
4150 if (ShouldBreakBeforeBrace(Style, InitialToken))
4151 addUnwrappedLine();
4153 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4154 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4156 setPreviousRBraceType(ClosingBraceType);
4158 // There is no addUnwrappedLine() here so that we fall through to parsing a
4159 // structural element afterwards. Thus, in "class A {} n, m;",
4160 // "} n, m;" will end up in one unwrapped line.
4163 void UnwrappedLineParser::parseObjCMethod() {
4164 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4165 "'(' or identifier expected.");
4166 do {
4167 if (FormatTok->is(tok::semi)) {
4168 nextToken();
4169 addUnwrappedLine();
4170 return;
4171 } else if (FormatTok->is(tok::l_brace)) {
4172 if (Style.BraceWrapping.AfterFunction)
4173 addUnwrappedLine();
4174 parseBlock();
4175 addUnwrappedLine();
4176 return;
4177 } else {
4178 nextToken();
4180 } while (!eof());
4183 void UnwrappedLineParser::parseObjCProtocolList() {
4184 assert(FormatTok->is(tok::less) && "'<' expected.");
4185 do {
4186 nextToken();
4187 // Early exit in case someone forgot a close angle.
4188 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4189 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4190 return;
4192 } while (!eof() && FormatTok->isNot(tok::greater));
4193 nextToken(); // Skip '>'.
4196 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4197 do {
4198 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4199 nextToken();
4200 addUnwrappedLine();
4201 break;
4203 if (FormatTok->is(tok::l_brace)) {
4204 parseBlock();
4205 // In ObjC interfaces, nothing should be following the "}".
4206 addUnwrappedLine();
4207 } else if (FormatTok->is(tok::r_brace)) {
4208 // Ignore stray "}". parseStructuralElement doesn't consume them.
4209 nextToken();
4210 addUnwrappedLine();
4211 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4212 nextToken();
4213 parseObjCMethod();
4214 } else {
4215 parseStructuralElement();
4217 } while (!eof());
4220 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4221 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4222 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4223 nextToken();
4224 nextToken(); // interface name
4226 // @interface can be followed by a lightweight generic
4227 // specialization list, then either a base class or a category.
4228 if (FormatTok->is(tok::less))
4229 parseObjCLightweightGenerics();
4230 if (FormatTok->is(tok::colon)) {
4231 nextToken();
4232 nextToken(); // base class name
4233 // The base class can also have lightweight generics applied to it.
4234 if (FormatTok->is(tok::less))
4235 parseObjCLightweightGenerics();
4236 } else if (FormatTok->is(tok::l_paren)) {
4237 // Skip category, if present.
4238 parseParens();
4241 if (FormatTok->is(tok::less))
4242 parseObjCProtocolList();
4244 if (FormatTok->is(tok::l_brace)) {
4245 if (Style.BraceWrapping.AfterObjCDeclaration)
4246 addUnwrappedLine();
4247 parseBlock(/*MustBeDeclaration=*/true);
4250 // With instance variables, this puts '}' on its own line. Without instance
4251 // variables, this ends the @interface line.
4252 addUnwrappedLine();
4254 parseObjCUntilAtEnd();
4257 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4258 assert(FormatTok->is(tok::less));
4259 // Unlike protocol lists, generic parameterizations support
4260 // nested angles:
4262 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4263 // NSObject <NSCopying, NSSecureCoding>
4265 // so we need to count how many open angles we have left.
4266 unsigned NumOpenAngles = 1;
4267 do {
4268 nextToken();
4269 // Early exit in case someone forgot a close angle.
4270 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4271 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4272 break;
4274 if (FormatTok->is(tok::less)) {
4275 ++NumOpenAngles;
4276 } else if (FormatTok->is(tok::greater)) {
4277 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4278 --NumOpenAngles;
4280 } while (!eof() && NumOpenAngles != 0);
4281 nextToken(); // Skip '>'.
4284 // Returns true for the declaration/definition form of @protocol,
4285 // false for the expression form.
4286 bool UnwrappedLineParser::parseObjCProtocol() {
4287 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4288 nextToken();
4290 if (FormatTok->is(tok::l_paren)) {
4291 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4292 return false;
4295 // The definition/declaration form,
4296 // @protocol Foo
4297 // - (int)someMethod;
4298 // @end
4300 nextToken(); // protocol name
4302 if (FormatTok->is(tok::less))
4303 parseObjCProtocolList();
4305 // Check for protocol declaration.
4306 if (FormatTok->is(tok::semi)) {
4307 nextToken();
4308 addUnwrappedLine();
4309 return true;
4312 addUnwrappedLine();
4313 parseObjCUntilAtEnd();
4314 return true;
4317 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4318 bool IsImport = FormatTok->is(Keywords.kw_import);
4319 assert(IsImport || FormatTok->is(tok::kw_export));
4320 nextToken();
4322 // Consume the "default" in "export default class/function".
4323 if (FormatTok->is(tok::kw_default))
4324 nextToken();
4326 // Consume "async function", "function" and "default function", so that these
4327 // get parsed as free-standing JS functions, i.e. do not require a trailing
4328 // semicolon.
4329 if (FormatTok->is(Keywords.kw_async))
4330 nextToken();
4331 if (FormatTok->is(Keywords.kw_function)) {
4332 nextToken();
4333 return;
4336 // For imports, `export *`, `export {...}`, consume the rest of the line up
4337 // to the terminating `;`. For everything else, just return and continue
4338 // parsing the structural element, i.e. the declaration or expression for
4339 // `export default`.
4340 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4341 !FormatTok->isStringLiteral() &&
4342 !(FormatTok->is(Keywords.kw_type) &&
4343 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4344 return;
4347 while (!eof()) {
4348 if (FormatTok->is(tok::semi))
4349 return;
4350 if (Line->Tokens.empty()) {
4351 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4352 // import statement should terminate.
4353 return;
4355 if (FormatTok->is(tok::l_brace)) {
4356 FormatTok->setBlockKind(BK_Block);
4357 nextToken();
4358 parseBracedList();
4359 } else {
4360 nextToken();
4365 void UnwrappedLineParser::parseStatementMacro() {
4366 nextToken();
4367 if (FormatTok->is(tok::l_paren))
4368 parseParens();
4369 if (FormatTok->is(tok::semi))
4370 nextToken();
4371 addUnwrappedLine();
4374 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4375 // consume things like a::`b.c[d:e] or a::*
4376 while (true) {
4377 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4378 tok::coloncolon, tok::hash) ||
4379 Keywords.isVerilogIdentifier(*FormatTok)) {
4380 nextToken();
4381 } else if (FormatTok->is(tok::l_square)) {
4382 parseSquare();
4383 } else {
4384 break;
4389 void UnwrappedLineParser::parseVerilogSensitivityList() {
4390 if (FormatTok->isNot(tok::at))
4391 return;
4392 nextToken();
4393 // A block event expression has 2 at signs.
4394 if (FormatTok->is(tok::at))
4395 nextToken();
4396 switch (FormatTok->Tok.getKind()) {
4397 case tok::star:
4398 nextToken();
4399 break;
4400 case tok::l_paren:
4401 parseParens();
4402 break;
4403 default:
4404 parseVerilogHierarchyIdentifier();
4405 break;
4409 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4410 unsigned AddLevels = 0;
4412 if (FormatTok->is(Keywords.kw_clocking)) {
4413 nextToken();
4414 if (Keywords.isVerilogIdentifier(*FormatTok))
4415 nextToken();
4416 parseVerilogSensitivityList();
4417 if (FormatTok->is(tok::semi))
4418 nextToken();
4419 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4420 Keywords.kw_casez, Keywords.kw_randcase,
4421 Keywords.kw_randsequence)) {
4422 if (Style.IndentCaseLabels)
4423 AddLevels++;
4424 nextToken();
4425 if (FormatTok->is(tok::l_paren)) {
4426 FormatTok->setFinalizedType(TT_ConditionLParen);
4427 parseParens();
4429 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4430 nextToken();
4431 // The case header has no semicolon.
4432 } else {
4433 // "module" etc.
4434 nextToken();
4435 // all the words like the name of the module and specifiers like
4436 // "automatic" and the width of function return type
4437 while (true) {
4438 if (FormatTok->is(tok::l_square)) {
4439 auto Prev = FormatTok->getPreviousNonComment();
4440 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4441 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4442 parseSquare();
4443 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4444 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
4445 Keywords.kw_automatic, tok::kw_static)) {
4446 nextToken();
4447 } else {
4448 break;
4452 auto NewLine = [this]() {
4453 addUnwrappedLine();
4454 Line->IsContinuation = true;
4457 // package imports
4458 while (FormatTok->is(Keywords.kw_import)) {
4459 NewLine();
4460 nextToken();
4461 parseVerilogHierarchyIdentifier();
4462 if (FormatTok->is(tok::semi))
4463 nextToken();
4466 // parameters and ports
4467 if (FormatTok->is(Keywords.kw_verilogHash)) {
4468 NewLine();
4469 nextToken();
4470 if (FormatTok->is(tok::l_paren)) {
4471 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4472 parseParens();
4475 if (FormatTok->is(tok::l_paren)) {
4476 NewLine();
4477 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4478 parseParens();
4481 // extends and implements
4482 if (FormatTok->is(Keywords.kw_extends)) {
4483 NewLine();
4484 nextToken();
4485 parseVerilogHierarchyIdentifier();
4486 if (FormatTok->is(tok::l_paren))
4487 parseParens();
4489 if (FormatTok->is(Keywords.kw_implements)) {
4490 NewLine();
4491 do {
4492 nextToken();
4493 parseVerilogHierarchyIdentifier();
4494 } while (FormatTok->is(tok::comma));
4497 // Coverage event for cover groups.
4498 if (FormatTok->is(tok::at)) {
4499 NewLine();
4500 parseVerilogSensitivityList();
4503 if (FormatTok->is(tok::semi))
4504 nextToken(/*LevelDifference=*/1);
4505 addUnwrappedLine();
4508 return AddLevels;
4511 void UnwrappedLineParser::parseVerilogTable() {
4512 assert(FormatTok->is(Keywords.kw_table));
4513 nextToken(/*LevelDifference=*/1);
4514 addUnwrappedLine();
4516 auto InitialLevel = Line->Level++;
4517 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4518 FormatToken *Tok = FormatTok;
4519 nextToken();
4520 if (Tok->is(tok::semi))
4521 addUnwrappedLine();
4522 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4523 Tok->setFinalizedType(TT_VerilogTableItem);
4525 Line->Level = InitialLevel;
4526 nextToken(/*LevelDifference=*/-1);
4527 addUnwrappedLine();
4530 void UnwrappedLineParser::parseVerilogCaseLabel() {
4531 // The label will get unindented in AnnotatingParser. If there are no leading
4532 // spaces, indent the rest here so that things inside the block will be
4533 // indented relative to things outside. We don't use parseLabel because we
4534 // don't know whether this colon is a label or a ternary expression at this
4535 // point.
4536 auto OrigLevel = Line->Level;
4537 auto FirstLine = CurrentLines->size();
4538 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4539 ++Line->Level;
4540 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4541 --Line->Level;
4542 parseStructuralElement();
4543 // Restore the indentation in both the new line and the line that has the
4544 // label.
4545 if (CurrentLines->size() > FirstLine)
4546 (*CurrentLines)[FirstLine].Level = OrigLevel;
4547 Line->Level = OrigLevel;
4550 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4551 for (const auto &N : Line.Tokens) {
4552 if (N.Tok->MacroCtx)
4553 return true;
4554 for (const UnwrappedLine &Child : N.Children)
4555 if (containsExpansion(Child))
4556 return true;
4558 return false;
4561 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4562 if (Line->Tokens.empty())
4563 return;
4564 LLVM_DEBUG({
4565 if (!parsingPPDirective()) {
4566 llvm::dbgs() << "Adding unwrapped line:\n";
4567 printDebugInfo(*Line);
4571 // If this line closes a block when in Whitesmiths mode, remember that
4572 // information so that the level can be decreased after the line is added.
4573 // This has to happen after the addition of the line since the line itself
4574 // needs to be indented.
4575 bool ClosesWhitesmithsBlock =
4576 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4577 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4579 // If the current line was expanded from a macro call, we use it to
4580 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4581 // line and the unexpanded token stream.
4582 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4583 if (!Reconstruct)
4584 Reconstruct.emplace(Line->Level, Unexpanded);
4585 Reconstruct->addLine(*Line);
4587 // While the reconstructed unexpanded lines are stored in the normal
4588 // flow of lines, the expanded lines are stored on the side to be analyzed
4589 // in an extra step.
4590 CurrentExpandedLines.push_back(std::move(*Line));
4592 if (Reconstruct->finished()) {
4593 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4594 assert(!Reconstructed.Tokens.empty() &&
4595 "Reconstructed must at least contain the macro identifier.");
4596 assert(!parsingPPDirective());
4597 LLVM_DEBUG({
4598 llvm::dbgs() << "Adding unexpanded line:\n";
4599 printDebugInfo(Reconstructed);
4601 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4602 Lines.push_back(std::move(Reconstructed));
4603 CurrentExpandedLines.clear();
4604 Reconstruct.reset();
4606 } else {
4607 // At the top level we only get here when no unexpansion is going on, or
4608 // when conditional formatting led to unfinished macro reconstructions.
4609 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4610 CurrentLines->push_back(std::move(*Line));
4612 Line->Tokens.clear();
4613 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4614 Line->FirstStartColumn = 0;
4615 Line->IsContinuation = false;
4616 Line->SeenDecltypeAuto = false;
4618 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4619 --Line->Level;
4620 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4621 CurrentLines->append(
4622 std::make_move_iterator(PreprocessorDirectives.begin()),
4623 std::make_move_iterator(PreprocessorDirectives.end()));
4624 PreprocessorDirectives.clear();
4626 // Disconnect the current token from the last token on the previous line.
4627 FormatTok->Previous = nullptr;
4630 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4632 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4633 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4634 FormatTok.NewlinesBefore > 0;
4637 // Checks if \p FormatTok is a line comment that continues the line comment
4638 // section on \p Line.
4639 static bool
4640 continuesLineCommentSection(const FormatToken &FormatTok,
4641 const UnwrappedLine &Line, const FormatStyle &Style,
4642 const llvm::Regex &CommentPragmasRegex) {
4643 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4644 return false;
4646 StringRef IndentContent = FormatTok.TokenText;
4647 if (FormatTok.TokenText.starts_with("//") ||
4648 FormatTok.TokenText.starts_with("/*")) {
4649 IndentContent = FormatTok.TokenText.substr(2);
4651 if (CommentPragmasRegex.match(IndentContent))
4652 return false;
4654 // If Line starts with a line comment, then FormatTok continues the comment
4655 // section if its original column is greater or equal to the original start
4656 // column of the line.
4658 // Define the min column token of a line as follows: if a line ends in '{' or
4659 // contains a '{' followed by a line comment, then the min column token is
4660 // that '{'. Otherwise, the min column token of the line is the first token of
4661 // the line.
4663 // If Line starts with a token other than a line comment, then FormatTok
4664 // continues the comment section if its original column is greater than the
4665 // original start column of the min column token of the line.
4667 // For example, the second line comment continues the first in these cases:
4669 // // first line
4670 // // second line
4672 // and:
4674 // // first line
4675 // // second line
4677 // and:
4679 // int i; // first line
4680 // // second line
4682 // and:
4684 // do { // first line
4685 // // second line
4686 // int i;
4687 // } while (true);
4689 // and:
4691 // enum {
4692 // a, // first line
4693 // // second line
4694 // b
4695 // };
4697 // The second line comment doesn't continue the first in these cases:
4699 // // first line
4700 // // second line
4702 // and:
4704 // int i; // first line
4705 // // second line
4707 // and:
4709 // do { // first line
4710 // // second line
4711 // int i;
4712 // } while (true);
4714 // and:
4716 // enum {
4717 // a, // first line
4718 // // second line
4719 // };
4720 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4722 // Scan for '{//'. If found, use the column of '{' as a min column for line
4723 // comment section continuation.
4724 const FormatToken *PreviousToken = nullptr;
4725 for (const UnwrappedLineNode &Node : Line.Tokens) {
4726 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4727 isLineComment(*Node.Tok)) {
4728 MinColumnToken = PreviousToken;
4729 break;
4731 PreviousToken = Node.Tok;
4733 // Grab the last newline preceding a token in this unwrapped line.
4734 if (Node.Tok->NewlinesBefore > 0)
4735 MinColumnToken = Node.Tok;
4737 if (PreviousToken && PreviousToken->is(tok::l_brace))
4738 MinColumnToken = PreviousToken;
4740 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4741 MinColumnToken);
4744 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4745 bool JustComments = Line->Tokens.empty();
4746 for (FormatToken *Tok : CommentsBeforeNextToken) {
4747 // Line comments that belong to the same line comment section are put on the
4748 // same line since later we might want to reflow content between them.
4749 // Additional fine-grained breaking of line comment sections is controlled
4750 // by the class BreakableLineCommentSection in case it is desirable to keep
4751 // several line comment sections in the same unwrapped line.
4753 // FIXME: Consider putting separate line comment sections as children to the
4754 // unwrapped line instead.
4755 Tok->ContinuesLineCommentSection =
4756 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex);
4757 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4758 addUnwrappedLine();
4759 pushToken(Tok);
4761 if (NewlineBeforeNext && JustComments)
4762 addUnwrappedLine();
4763 CommentsBeforeNextToken.clear();
4766 void UnwrappedLineParser::nextToken(int LevelDifference) {
4767 if (eof())
4768 return;
4769 flushComments(isOnNewLine(*FormatTok));
4770 pushToken(FormatTok);
4771 FormatToken *Previous = FormatTok;
4772 if (!Style.isJavaScript())
4773 readToken(LevelDifference);
4774 else
4775 readTokenWithJavaScriptASI();
4776 FormatTok->Previous = Previous;
4777 if (Style.isVerilog()) {
4778 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4779 // keywords like `begin`, we can't treat them the same as left braces
4780 // because some contexts require one of them. For example structs use
4781 // braces and if blocks use keywords, and a left brace can occur in an if
4782 // statement, but it is not a block. For keywords like `end`, we simply
4783 // treat them the same as right braces.
4784 if (Keywords.isVerilogEnd(*FormatTok))
4785 FormatTok->Tok.setKind(tok::r_brace);
4789 void UnwrappedLineParser::distributeComments(
4790 const SmallVectorImpl<FormatToken *> &Comments,
4791 const FormatToken *NextTok) {
4792 // Whether or not a line comment token continues a line is controlled by
4793 // the method continuesLineCommentSection, with the following caveat:
4795 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4796 // that each comment line from the trail is aligned with the next token, if
4797 // the next token exists. If a trail exists, the beginning of the maximal
4798 // trail is marked as a start of a new comment section.
4800 // For example in this code:
4802 // int a; // line about a
4803 // // line 1 about b
4804 // // line 2 about b
4805 // int b;
4807 // the two lines about b form a maximal trail, so there are two sections, the
4808 // first one consisting of the single comment "// line about a" and the
4809 // second one consisting of the next two comments.
4810 if (Comments.empty())
4811 return;
4812 bool ShouldPushCommentsInCurrentLine = true;
4813 bool HasTrailAlignedWithNextToken = false;
4814 unsigned StartOfTrailAlignedWithNextToken = 0;
4815 if (NextTok) {
4816 // We are skipping the first element intentionally.
4817 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4818 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4819 HasTrailAlignedWithNextToken = true;
4820 StartOfTrailAlignedWithNextToken = i;
4824 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4825 FormatToken *FormatTok = Comments[i];
4826 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4827 FormatTok->ContinuesLineCommentSection = false;
4828 } else {
4829 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4830 *FormatTok, *Line, Style, CommentPragmasRegex);
4832 if (!FormatTok->ContinuesLineCommentSection &&
4833 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4834 ShouldPushCommentsInCurrentLine = false;
4836 if (ShouldPushCommentsInCurrentLine)
4837 pushToken(FormatTok);
4838 else
4839 CommentsBeforeNextToken.push_back(FormatTok);
4843 void UnwrappedLineParser::readToken(int LevelDifference) {
4844 SmallVector<FormatToken *, 1> Comments;
4845 bool PreviousWasComment = false;
4846 bool FirstNonCommentOnLine = false;
4847 do {
4848 FormatTok = Tokens->getNextToken();
4849 assert(FormatTok);
4850 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4851 TT_ConflictAlternative)) {
4852 if (FormatTok->is(TT_ConflictStart))
4853 conditionalCompilationStart(/*Unreachable=*/false);
4854 else if (FormatTok->is(TT_ConflictAlternative))
4855 conditionalCompilationAlternative();
4856 else if (FormatTok->is(TT_ConflictEnd))
4857 conditionalCompilationEnd();
4858 FormatTok = Tokens->getNextToken();
4859 FormatTok->MustBreakBefore = true;
4860 FormatTok->MustBreakBeforeFinalized = true;
4863 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4864 const FormatToken &Tok,
4865 bool PreviousWasComment) {
4866 auto IsFirstOnLine = [](const FormatToken &Tok) {
4867 return Tok.HasUnescapedNewline || Tok.IsFirst;
4870 // Consider preprocessor directives preceded by block comments as first
4871 // on line.
4872 if (PreviousWasComment)
4873 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4874 return IsFirstOnLine(Tok);
4877 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4878 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4879 PreviousWasComment = FormatTok->is(tok::comment);
4881 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4882 (!Style.isVerilog() ||
4883 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4884 FirstNonCommentOnLine) {
4885 distributeComments(Comments, FormatTok);
4886 Comments.clear();
4887 // If there is an unfinished unwrapped line, we flush the preprocessor
4888 // directives only after that unwrapped line was finished later.
4889 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4890 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4891 assert((LevelDifference >= 0 ||
4892 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4893 "LevelDifference makes Line->Level negative");
4894 Line->Level += LevelDifference;
4895 // Comments stored before the preprocessor directive need to be output
4896 // before the preprocessor directive, at the same level as the
4897 // preprocessor directive, as we consider them to apply to the directive.
4898 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4899 PPBranchLevel > 0) {
4900 Line->Level += PPBranchLevel;
4902 assert(Line->Level >= Line->UnbracedBodyLevel);
4903 Line->Level -= Line->UnbracedBodyLevel;
4904 flushComments(isOnNewLine(*FormatTok));
4905 parsePPDirective();
4906 PreviousWasComment = FormatTok->is(tok::comment);
4907 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4908 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4911 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4912 !Line->InPPDirective) {
4913 continue;
4916 if (FormatTok->is(tok::identifier) &&
4917 Macros.defined(FormatTok->TokenText) &&
4918 // FIXME: Allow expanding macros in preprocessor directives.
4919 !Line->InPPDirective) {
4920 FormatToken *ID = FormatTok;
4921 unsigned Position = Tokens->getPosition();
4923 // To correctly parse the code, we need to replace the tokens of the macro
4924 // call with its expansion.
4925 auto PreCall = std::move(Line);
4926 Line.reset(new UnwrappedLine);
4927 bool OldInExpansion = InExpansion;
4928 InExpansion = true;
4929 // We parse the macro call into a new line.
4930 auto Args = parseMacroCall();
4931 InExpansion = OldInExpansion;
4932 assert(Line->Tokens.front().Tok == ID);
4933 // And remember the unexpanded macro call tokens.
4934 auto UnexpandedLine = std::move(Line);
4935 // Reset to the old line.
4936 Line = std::move(PreCall);
4938 LLVM_DEBUG({
4939 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4940 if (Args) {
4941 llvm::dbgs() << "(";
4942 for (const auto &Arg : Args.value())
4943 for (const auto &T : Arg)
4944 llvm::dbgs() << T->TokenText << " ";
4945 llvm::dbgs() << ")";
4947 llvm::dbgs() << "\n";
4949 if (Macros.objectLike(ID->TokenText) && Args &&
4950 !Macros.hasArity(ID->TokenText, Args->size())) {
4951 // The macro is either
4952 // - object-like, but we got argumnets, or
4953 // - overloaded to be both object-like and function-like, but none of
4954 // the function-like arities match the number of arguments.
4955 // Thus, expand as object-like macro.
4956 LLVM_DEBUG(llvm::dbgs()
4957 << "Macro \"" << ID->TokenText
4958 << "\" not overloaded for arity " << Args->size()
4959 << "or not function-like, using object-like overload.");
4960 Args.reset();
4961 UnexpandedLine->Tokens.resize(1);
4962 Tokens->setPosition(Position);
4963 nextToken();
4964 assert(!Args && Macros.objectLike(ID->TokenText));
4966 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4967 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4968 // Next, we insert the expanded tokens in the token stream at the
4969 // current position, and continue parsing.
4970 Unexpanded[ID] = std::move(UnexpandedLine);
4971 SmallVector<FormatToken *, 8> Expansion =
4972 Macros.expand(ID, std::move(Args));
4973 if (!Expansion.empty())
4974 FormatTok = Tokens->insertTokens(Expansion);
4976 LLVM_DEBUG({
4977 llvm::dbgs() << "Expanded: ";
4978 for (const auto &T : Expansion)
4979 llvm::dbgs() << T->TokenText << " ";
4980 llvm::dbgs() << "\n";
4982 } else {
4983 LLVM_DEBUG({
4984 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4985 << "\", because it was used ";
4986 if (Args)
4987 llvm::dbgs() << "with " << Args->size();
4988 else
4989 llvm::dbgs() << "without";
4990 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4992 Tokens->setPosition(Position);
4993 FormatTok = ID;
4997 if (FormatTok->isNot(tok::comment)) {
4998 distributeComments(Comments, FormatTok);
4999 Comments.clear();
5000 return;
5003 Comments.push_back(FormatTok);
5004 } while (!eof());
5006 distributeComments(Comments, nullptr);
5007 Comments.clear();
5010 namespace {
5011 template <typename Iterator>
5012 void pushTokens(Iterator Begin, Iterator End,
5013 llvm::SmallVectorImpl<FormatToken *> &Into) {
5014 for (auto I = Begin; I != End; ++I) {
5015 Into.push_back(I->Tok);
5016 for (const auto &Child : I->Children)
5017 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5020 } // namespace
5022 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5023 UnwrappedLineParser::parseMacroCall() {
5024 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5025 assert(Line->Tokens.empty());
5026 nextToken();
5027 if (FormatTok->isNot(tok::l_paren))
5028 return Args;
5029 unsigned Position = Tokens->getPosition();
5030 FormatToken *Tok = FormatTok;
5031 nextToken();
5032 Args.emplace();
5033 auto ArgStart = std::prev(Line->Tokens.end());
5035 int Parens = 0;
5036 do {
5037 switch (FormatTok->Tok.getKind()) {
5038 case tok::l_paren:
5039 ++Parens;
5040 nextToken();
5041 break;
5042 case tok::r_paren: {
5043 if (Parens > 0) {
5044 --Parens;
5045 nextToken();
5046 break;
5048 Args->push_back({});
5049 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5050 nextToken();
5051 return Args;
5053 case tok::comma: {
5054 if (Parens > 0) {
5055 nextToken();
5056 break;
5058 Args->push_back({});
5059 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5060 nextToken();
5061 ArgStart = std::prev(Line->Tokens.end());
5062 break;
5064 default:
5065 nextToken();
5066 break;
5068 } while (!eof());
5069 Line->Tokens.resize(1);
5070 Tokens->setPosition(Position);
5071 FormatTok = Tok;
5072 return {};
5075 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5076 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5077 if (MustBreakBeforeNextToken) {
5078 Line->Tokens.back().Tok->MustBreakBefore = true;
5079 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5080 MustBreakBeforeNextToken = false;
5084 } // end namespace format
5085 } // end namespace clang