1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "format-parser"
38 void printLine(llvm::raw_ostream
&OS
, const UnwrappedLine
&Line
,
39 StringRef Prefix
= "", bool PrintText
= false) {
40 OS
<< Prefix
<< "Line(" << Line
.Level
<< ", FSC=" << Line
.FirstStartColumn
41 << ")" << (Line
.InPPDirective
? " MACRO" : "") << ": ";
43 for (std::list
<UnwrappedLineNode
>::const_iterator I
= Line
.Tokens
.begin(),
44 E
= Line
.Tokens
.end();
50 OS
<< I
->Tok
->Tok
.getName() << "[" << "T=" << (unsigned)I
->Tok
->getType()
51 << ", OC=" << I
->Tok
->OriginalColumn
<< ", \"" << I
->Tok
->TokenText
53 for (SmallVectorImpl
<UnwrappedLine
>::const_iterator
54 CI
= I
->Children
.begin(),
55 CE
= I
->Children
.end();
58 printLine(OS
, *CI
, (Prefix
+ " ").str());
66 LLVM_ATTRIBUTE_UNUSED
static void printDebugInfo(const UnwrappedLine
&Line
) {
67 printLine(llvm::dbgs(), Line
);
70 class ScopedDeclarationState
{
72 ScopedDeclarationState(UnwrappedLine
&Line
, llvm::BitVector
&Stack
,
73 bool MustBeDeclaration
)
74 : Line(Line
), Stack(Stack
) {
75 Line
.MustBeDeclaration
= MustBeDeclaration
;
76 Stack
.push_back(MustBeDeclaration
);
78 ~ScopedDeclarationState() {
81 Line
.MustBeDeclaration
= Stack
.back();
83 Line
.MustBeDeclaration
= true;
88 llvm::BitVector
&Stack
;
91 } // end anonymous namespace
93 class ScopedLineState
{
95 ScopedLineState(UnwrappedLineParser
&Parser
,
96 bool SwitchToPreprocessorLines
= false)
97 : Parser(Parser
), OriginalLines(Parser
.CurrentLines
) {
98 if (SwitchToPreprocessorLines
)
99 Parser
.CurrentLines
= &Parser
.PreprocessorDirectives
;
100 else if (!Parser
.Line
->Tokens
.empty())
101 Parser
.CurrentLines
= &Parser
.Line
->Tokens
.back().Children
;
102 PreBlockLine
= std::move(Parser
.Line
);
103 Parser
.Line
= std::make_unique
<UnwrappedLine
>();
104 Parser
.Line
->Level
= PreBlockLine
->Level
;
105 Parser
.Line
->PPLevel
= PreBlockLine
->PPLevel
;
106 Parser
.Line
->InPPDirective
= PreBlockLine
->InPPDirective
;
107 Parser
.Line
->InMacroBody
= PreBlockLine
->InMacroBody
;
111 if (!Parser
.Line
->Tokens
.empty())
112 Parser
.addUnwrappedLine();
113 assert(Parser
.Line
->Tokens
.empty());
114 Parser
.Line
= std::move(PreBlockLine
);
115 if (Parser
.CurrentLines
== &Parser
.PreprocessorDirectives
)
116 Parser
.MustBreakBeforeNextToken
= true;
117 Parser
.CurrentLines
= OriginalLines
;
121 UnwrappedLineParser
&Parser
;
123 std::unique_ptr
<UnwrappedLine
> PreBlockLine
;
124 SmallVectorImpl
<UnwrappedLine
> *OriginalLines
;
127 class CompoundStatementIndenter
{
129 CompoundStatementIndenter(UnwrappedLineParser
*Parser
,
130 const FormatStyle
&Style
, unsigned &LineLevel
)
131 : CompoundStatementIndenter(Parser
, LineLevel
,
132 Style
.BraceWrapping
.AfterControlStatement
,
133 Style
.BraceWrapping
.IndentBraces
) {}
134 CompoundStatementIndenter(UnwrappedLineParser
*Parser
, unsigned &LineLevel
,
135 bool WrapBrace
, bool IndentBrace
)
136 : LineLevel(LineLevel
), OldLineLevel(LineLevel
) {
138 Parser
->addUnwrappedLine();
142 ~CompoundStatementIndenter() { LineLevel
= OldLineLevel
; }
146 unsigned OldLineLevel
;
149 UnwrappedLineParser::UnwrappedLineParser(
150 SourceManager
&SourceMgr
, const FormatStyle
&Style
,
151 const AdditionalKeywords
&Keywords
, unsigned FirstStartColumn
,
152 ArrayRef
<FormatToken
*> Tokens
, UnwrappedLineConsumer
&Callback
,
153 llvm::SpecificBumpPtrAllocator
<FormatToken
> &Allocator
,
154 IdentifierTable
&IdentTable
)
155 : Line(new UnwrappedLine
), MustBreakBeforeNextToken(false),
156 CurrentLines(&Lines
), Style(Style
), Keywords(Keywords
),
157 CommentPragmasRegex(Style
.CommentPragmas
), Tokens(nullptr),
158 Callback(Callback
), AllTokens(Tokens
), PPBranchLevel(-1),
159 IncludeGuard(Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn
),
163 Macros(Style
.Macros
, SourceMgr
, Style
, Allocator
, IdentTable
) {}
165 void UnwrappedLineParser::reset() {
167 IncludeGuard
= Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
170 IncludeGuardToken
= nullptr;
171 Line
.reset(new UnwrappedLine
);
172 CommentsBeforeNextToken
.clear();
174 MustBreakBeforeNextToken
= false;
175 IsDecltypeAutoFunction
= false;
176 PreprocessorDirectives
.clear();
177 CurrentLines
= &Lines
;
178 DeclarationScopeStack
.clear();
179 NestedTooDeep
.clear();
180 NestedLambdas
.clear();
182 Line
->FirstStartColumn
= FirstStartColumn
;
184 if (!Unexpanded
.empty())
185 for (FormatToken
*Token
: AllTokens
)
186 Token
->MacroCtx
.reset();
187 CurrentExpandedLines
.clear();
188 ExpandedLines
.clear();
194 void UnwrappedLineParser::parse() {
195 IndexedTokenSource
TokenSource(AllTokens
);
196 Line
->FirstStartColumn
= FirstStartColumn
;
198 LLVM_DEBUG(llvm::dbgs() << "----\n");
200 Tokens
= &TokenSource
;
206 // If we found an include guard then all preprocessor directives (other than
207 // the guard) are over-indented by one.
208 if (IncludeGuard
== IG_Found
) {
209 for (auto &Line
: Lines
)
210 if (Line
.InPPDirective
&& Line
.Level
> 0)
214 // Create line with eof token.
216 pushToken(FormatTok
);
219 // In a first run, format everything with the lines containing macro calls
220 // replaced by the expansion.
221 if (!ExpandedLines
.empty()) {
222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223 for (const auto &Line
: Lines
) {
224 if (!Line
.Tokens
.empty()) {
225 auto it
= ExpandedLines
.find(Line
.Tokens
.begin()->Tok
);
226 if (it
!= ExpandedLines
.end()) {
227 for (const auto &Expanded
: it
->second
) {
228 LLVM_DEBUG(printDebugInfo(Expanded
));
229 Callback
.consumeUnwrappedLine(Expanded
);
234 LLVM_DEBUG(printDebugInfo(Line
));
235 Callback
.consumeUnwrappedLine(Line
);
237 Callback
.finishRun();
240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241 for (const UnwrappedLine
&Line
: Lines
) {
242 LLVM_DEBUG(printDebugInfo(Line
));
243 Callback
.consumeUnwrappedLine(Line
);
245 Callback
.finishRun();
247 while (!PPLevelBranchIndex
.empty() &&
248 PPLevelBranchIndex
.back() + 1 >= PPLevelBranchCount
.back()) {
249 PPLevelBranchIndex
.resize(PPLevelBranchIndex
.size() - 1);
250 PPLevelBranchCount
.resize(PPLevelBranchCount
.size() - 1);
252 if (!PPLevelBranchIndex
.empty()) {
253 ++PPLevelBranchIndex
.back();
254 assert(PPLevelBranchIndex
.size() == PPLevelBranchCount
.size());
255 assert(PPLevelBranchIndex
.back() <= PPLevelBranchCount
.back());
257 } while (!PPLevelBranchIndex
.empty());
260 void UnwrappedLineParser::parseFile() {
261 // The top-level context in a file always has declarations, except for pre-
262 // processor directives and JavaScript files.
263 bool MustBeDeclaration
= !Line
->InPPDirective
&& !Style
.isJavaScript();
264 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
266 if (Style
.Language
== FormatStyle::LK_TextProto
)
270 // Make sure to format the remaining tokens.
272 // LK_TextProto is special since its top-level is parsed as the body of a
273 // braced list, which does not necessarily have natural line separators such
274 // as a semicolon. Comments after the last entry that have been determined to
275 // not belong to that line, as in:
277 // // endfile comment
278 // do not have a chance to be put on a line of their own until this point.
279 // Here we add this newline before end-of-file comments.
280 if (Style
.Language
== FormatStyle::LK_TextProto
&&
281 !CommentsBeforeNextToken
.empty()) {
288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
290 switch (FormatTok
->Tok
.getKind()) {
294 if (FormatTok
->is(Keywords
.kw_where
)) {
297 parseCSharpGenericTypeConstraint();
306 void UnwrappedLineParser::parseCSharpAttribute() {
307 int UnpairedSquareBrackets
= 1;
309 switch (FormatTok
->Tok
.getKind()) {
312 --UnpairedSquareBrackets
;
313 if (UnpairedSquareBrackets
== 0) {
319 ++UnpairedSquareBrackets
;
329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330 if (!Lines
.empty() && Lines
.back().InPPDirective
)
333 const FormatToken
*Previous
= Tokens
->getPreviousToken();
334 return Previous
&& Previous
->is(tok::comment
) &&
335 (Previous
->IsMultiline
|| Previous
->NewlinesBefore
> 0);
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
344 bool UnwrappedLineParser::parseLevel(const FormatToken
*OpeningBrace
,
346 FormatToken
**IfLeftBrace
) {
347 const bool InRequiresExpression
=
348 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
349 const bool IsPrecededByCommentOrPPDirective
=
350 !Style
.RemoveBracesLLVM
|| precededByCommentOrPPDirective();
351 FormatToken
*IfLBrace
= nullptr;
352 bool HasDoWhile
= false;
353 bool HasLabel
= false;
354 unsigned StatementCount
= 0;
355 bool SwitchLabelEncountered
= false;
358 if (FormatTok
->isAttribute()) {
362 tok::TokenKind kind
= FormatTok
->Tok
.getKind();
363 if (FormatTok
->getType() == TT_MacroBlockBegin
)
365 else if (FormatTok
->getType() == TT_MacroBlockEnd
)
368 auto ParseDefault
= [this, OpeningBrace
, IfKind
, &IfLBrace
, &HasDoWhile
,
369 &HasLabel
, &StatementCount
] {
370 parseStructuralElement(OpeningBrace
, IfKind
, &IfLBrace
,
371 HasDoWhile
? nullptr : &HasDoWhile
,
372 HasLabel
? nullptr : &HasLabel
);
374 assert(StatementCount
> 0 && "StatementCount overflow!");
383 if (InRequiresExpression
) {
384 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
385 } else if (FormatTok
->Previous
&&
386 FormatTok
->Previous
->ClosesRequiresClause
) {
387 // We need the 'default' case here to correctly parse a function
392 if (!InRequiresExpression
&& FormatTok
->isNot(TT_MacroBlockBegin
) &&
393 tryToParseBracedList()) {
398 assert(StatementCount
> 0 && "StatementCount overflow!");
403 if (!Style
.RemoveBracesLLVM
|| Line
->InPPDirective
||
404 !OpeningBrace
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
)) {
407 if (FormatTok
->isNot(tok::r_brace
) || StatementCount
!= 1 || HasLabel
||
408 HasDoWhile
|| IsPrecededByCommentOrPPDirective
||
409 precededByCommentOrPPDirective()) {
412 const FormatToken
*Next
= Tokens
->peekNextToken();
413 if (Next
->is(tok::comment
) && Next
->NewlinesBefore
== 0)
416 *IfLeftBrace
= IfLBrace
;
422 case tok::kw_default
: {
423 unsigned StoredPosition
= Tokens
->getPosition();
426 Next
= Tokens
->getNextToken();
428 } while (Next
->is(tok::comment
));
429 FormatTok
= Tokens
->setPosition(StoredPosition
);
430 if (Next
->isNot(tok::colon
)) {
431 // default not followed by ':' is not a case label; treat it like
433 parseStructuralElement();
436 // Else, if it is 'default:', fall through to the case handling.
440 if (Style
.Language
== FormatStyle::LK_Proto
|| Style
.isVerilog() ||
441 (Style
.isJavaScript() && Line
->MustBeDeclaration
)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
449 if (!SwitchLabelEncountered
&&
450 (Style
.IndentCaseLabels
||
451 (Line
->InPPDirective
&& Line
->Level
== 1))) {
454 SwitchLabelEncountered
= true;
455 parseStructuralElement();
458 if (Style
.isCSharp()) {
460 parseCSharpAttribute();
463 if (handleCppAttributes())
475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody
) {
476 // We'll parse forward through the tokens until we hit
477 // a closing brace or eof - note that getNextToken() will
478 // parse macros, so this will magically work inside macro
480 unsigned StoredPosition
= Tokens
->getPosition();
481 FormatToken
*Tok
= FormatTok
;
482 const FormatToken
*PrevTok
= Tok
->Previous
;
483 // Keep a stack of positions of lbrace tokens. We will
484 // update information about whether an lbrace starts a
485 // braced init list or a different block during the loop.
488 const FormatToken
*PrevTok
;
490 SmallVector
<StackEntry
, 8> LBraceStack
;
491 assert(Tok
->is(tok::l_brace
));
493 // Get next non-comment, non-preprocessor token.
494 FormatToken
*NextTok
;
496 NextTok
= Tokens
->getNextToken();
497 } while (NextTok
->is(tok::comment
));
498 while (NextTok
->is(tok::hash
) && !Line
->InMacroBody
) {
499 NextTok
= Tokens
->getNextToken();
501 NextTok
= Tokens
->getNextToken();
502 } while (NextTok
->is(tok::comment
) ||
503 (NextTok
->NewlinesBefore
== 0 && NextTok
->isNot(tok::eof
)));
506 switch (Tok
->Tok
.getKind()) {
508 if (Style
.isJavaScript() && PrevTok
) {
509 if (PrevTok
->isOneOf(tok::colon
, tok::less
)) {
510 // A ':' indicates this code is in a type, or a braced list
511 // following a label in an object literal ({a: {b: 1}}).
512 // A '<' could be an object used in a comparison, but that is nonsense
513 // code (can never return true), so more likely it is a generic type
514 // argument (`X<{a: string; b: number}>`).
515 // The code below could be confused by semicolons between the
516 // individual members in a type member list, which would normally
517 // trigger BK_Block. In both cases, this must be parsed as an inline
519 Tok
->setBlockKind(BK_BracedInit
);
520 } else if (PrevTok
->is(tok::r_paren
)) {
521 // `) { }` can only occur in function or method declarations in JS.
522 Tok
->setBlockKind(BK_Block
);
525 Tok
->setBlockKind(BK_Unknown
);
527 LBraceStack
.push_back({Tok
, PrevTok
});
530 if (LBraceStack
.empty())
532 if (LBraceStack
.back().Tok
->is(BK_Unknown
)) {
533 bool ProbablyBracedList
= false;
534 if (Style
.Language
== FormatStyle::LK_Proto
) {
535 ProbablyBracedList
= NextTok
->isOneOf(tok::comma
, tok::r_square
);
537 // Skip NextTok over preprocessor lines, otherwise we may not
538 // properly diagnose the block as a braced intializer
539 // if the comma separator appears after the pp directive.
540 while (NextTok
->is(tok::hash
)) {
541 ScopedMacroState
MacroState(*Line
, Tokens
, NextTok
);
543 NextTok
= Tokens
->getNextToken();
544 } while (NextTok
->isNot(tok::eof
));
547 // Using OriginalColumn to distinguish between ObjC methods and
548 // binary operators is a bit hacky.
549 bool NextIsObjCMethod
= NextTok
->isOneOf(tok::plus
, tok::minus
) &&
550 NextTok
->OriginalColumn
== 0;
552 // Try to detect a braced list. Note that regardless how we mark inner
553 // braces here, we will overwrite the BlockKind later if we parse a
554 // braced list (where all blocks inside are by default braced lists),
555 // or when we explicitly detect blocks (for example while parsing
558 // If we already marked the opening brace as braced list, the closing
559 // must also be part of it.
560 ProbablyBracedList
= LBraceStack
.back().Tok
->is(TT_BracedListLBrace
);
562 ProbablyBracedList
= ProbablyBracedList
||
563 (Style
.isJavaScript() &&
564 NextTok
->isOneOf(Keywords
.kw_of
, Keywords
.kw_in
,
566 ProbablyBracedList
= ProbablyBracedList
||
567 (Style
.isCpp() && NextTok
->is(tok::l_paren
));
569 // If there is a comma, semicolon or right paren after the closing
570 // brace, we assume this is a braced initializer list.
571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572 // braced list in JS.
574 ProbablyBracedList
||
575 NextTok
->isOneOf(tok::comma
, tok::period
, tok::colon
,
576 tok::r_paren
, tok::r_square
, tok::ellipsis
);
578 // Distinguish between braced list in a constructor initializer list
579 // followed by constructor body, or just adjacent blocks.
581 ProbablyBracedList
||
582 (NextTok
->is(tok::l_brace
) && LBraceStack
.back().PrevTok
&&
583 LBraceStack
.back().PrevTok
->isOneOf(tok::identifier
,
587 ProbablyBracedList
||
588 (NextTok
->is(tok::identifier
) &&
589 !PrevTok
->isOneOf(tok::semi
, tok::r_brace
, tok::l_brace
));
591 ProbablyBracedList
= ProbablyBracedList
||
592 (NextTok
->is(tok::semi
) &&
593 (!ExpectClassBody
|| LBraceStack
.size() != 1));
596 ProbablyBracedList
||
597 (NextTok
->isBinaryOperator() && !NextIsObjCMethod
);
599 if (!Style
.isCSharp() && NextTok
->is(tok::l_square
)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok
= Tokens
->getNextToken();
603 ProbablyBracedList
= NextTok
->isNot(tok::l_square
);
606 if (ProbablyBracedList
) {
607 Tok
->setBlockKind(BK_BracedInit
);
608 LBraceStack
.back().Tok
->setBlockKind(BK_BracedInit
);
610 Tok
->setBlockKind(BK_Block
);
611 LBraceStack
.back().Tok
->setBlockKind(BK_Block
);
614 LBraceStack
.pop_back();
616 case tok::identifier
:
617 if (Tok
->isNot(TT_StatementMacro
))
628 if (!LBraceStack
.empty() && LBraceStack
.back().Tok
->is(BK_Unknown
))
629 LBraceStack
.back().Tok
->setBlockKind(BK_Block
);
636 } while (Tok
->isNot(tok::eof
) && !LBraceStack
.empty());
638 // Assume other blocks for all unclosed opening braces.
639 for (const auto &Entry
: LBraceStack
)
640 if (Entry
.Tok
->is(BK_Unknown
))
641 Entry
.Tok
->setBlockKind(BK_Block
);
643 FormatTok
= Tokens
->setPosition(StoredPosition
);
646 // Sets the token type of the directly previous right brace.
647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type
) {
648 if (auto Prev
= FormatTok
->getPreviousNonComment();
649 Prev
&& Prev
->is(tok::r_brace
)) {
650 Prev
->setFinalizedType(Type
);
655 static inline void hash_combine(std::size_t &seed
, const T
&v
) {
657 seed
^= hasher(v
) + 0x9e3779b9 + (seed
<< 6) + (seed
>> 2);
660 size_t UnwrappedLineParser::computePPHash() const {
662 for (const auto &i
: PPStack
) {
663 hash_combine(h
, size_t(i
.Kind
));
664 hash_combine(h
, i
.Line
);
669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
670 // is not null, subtracts its length (plus the preceding space) when computing
671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
672 // running the token annotator on it so that we can restore them afterward.
673 bool UnwrappedLineParser::mightFitOnOneLine(
674 UnwrappedLine
&ParsedLine
, const FormatToken
*OpeningBrace
) const {
675 const auto ColumnLimit
= Style
.ColumnLimit
;
676 if (ColumnLimit
== 0)
679 auto &Tokens
= ParsedLine
.Tokens
;
680 assert(!Tokens
.empty());
682 const auto *LastToken
= Tokens
.back().Tok
;
685 SmallVector
<UnwrappedLineNode
> SavedTokens(Tokens
.size());
688 for (const auto &Token
: Tokens
) {
690 auto &SavedToken
= SavedTokens
[Index
++];
691 SavedToken
.Tok
= new FormatToken
;
692 SavedToken
.Tok
->copyFrom(*Token
.Tok
);
693 SavedToken
.Children
= std::move(Token
.Children
);
696 AnnotatedLine
Line(ParsedLine
);
697 assert(Line
.Last
== LastToken
);
699 TokenAnnotator
Annotator(Style
, Keywords
);
700 Annotator
.annotate(Line
);
701 Annotator
.calculateFormattingInformation(Line
);
703 auto Length
= LastToken
->TotalLength
;
705 assert(OpeningBrace
!= Tokens
.front().Tok
);
706 if (auto Prev
= OpeningBrace
->Previous
;
707 Prev
&& Prev
->TotalLength
+ ColumnLimit
== OpeningBrace
->TotalLength
) {
708 Length
-= ColumnLimit
;
710 Length
-= OpeningBrace
->TokenText
.size() + 1;
713 if (const auto *FirstToken
= Line
.First
; FirstToken
->is(tok::r_brace
)) {
714 assert(!OpeningBrace
|| OpeningBrace
->is(TT_ControlStatementLBrace
));
715 Length
-= FirstToken
->TokenText
.size() + 1;
719 for (auto &Token
: Tokens
) {
720 const auto &SavedToken
= SavedTokens
[Index
++];
721 Token
.Tok
->copyFrom(*SavedToken
.Tok
);
722 Token
.Children
= std::move(SavedToken
.Children
);
723 delete SavedToken
.Tok
;
726 // If these change PPLevel needs to be used for get correct indentation.
727 assert(!Line
.InMacroBody
);
728 assert(!Line
.InPPDirective
);
729 return Line
.Level
* Style
.IndentWidth
+ Length
<= ColumnLimit
;
732 FormatToken
*UnwrappedLineParser::parseBlock(bool MustBeDeclaration
,
733 unsigned AddLevels
, bool MunchSemi
,
736 bool UnindentWhitesmithsBraces
) {
737 auto HandleVerilogBlockLabel
= [this]() {
739 if (Style
.isVerilog() && FormatTok
->is(tok::colon
)) {
741 if (Keywords
.isVerilogIdentifier(*FormatTok
))
746 // Whether this is a Verilog-specific block that has a special header like a
748 const bool VerilogHierarchy
=
749 Style
.isVerilog() && Keywords
.isVerilogHierarchy(*FormatTok
);
750 assert((FormatTok
->isOneOf(tok::l_brace
, TT_MacroBlockBegin
) ||
751 (Style
.isVerilog() &&
752 (Keywords
.isVerilogBegin(*FormatTok
) || VerilogHierarchy
))) &&
753 "'{' or macro block token expected");
754 FormatToken
*Tok
= FormatTok
;
755 const bool FollowedByComment
= Tokens
->peekNextToken()->is(tok::comment
);
756 auto Index
= CurrentLines
->size();
757 const bool MacroBlock
= FormatTok
->is(TT_MacroBlockBegin
);
758 FormatTok
->setBlockKind(BK_Block
);
760 // For Whitesmiths mode, jump to the next level prior to skipping over the
762 if (!VerilogHierarchy
&& AddLevels
> 0 &&
763 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
767 size_t PPStartHash
= computePPHash();
769 const unsigned InitialLevel
= Line
->Level
;
770 if (VerilogHierarchy
) {
771 AddLevels
+= parseVerilogHierarchyHeader();
773 nextToken(/*LevelDifference=*/AddLevels
);
774 HandleVerilogBlockLabel();
777 // Bail out if there are too many levels. Otherwise, the stack might overflow.
778 if (Line
->Level
> 300)
781 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
784 size_t NbPreprocessorDirectives
=
785 !parsingPPDirective() ? PreprocessorDirectives
.size() : 0;
787 size_t OpeningLineIndex
=
788 CurrentLines
->empty()
789 ? (UnwrappedLine::kInvalidIndex
)
790 : (CurrentLines
->size() - 1 - NbPreprocessorDirectives
);
792 // Whitesmiths is weird here. The brace needs to be indented for the namespace
793 // block, but the block itself may not be indented depending on the style
794 // settings. This allows the format to back up one level in those cases.
795 if (UnindentWhitesmithsBraces
)
798 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
800 if (AddLevels
> 0u && Style
.BreakBeforeBraces
!= FormatStyle::BS_Whitesmiths
)
801 Line
->Level
+= AddLevels
;
803 FormatToken
*IfLBrace
= nullptr;
804 const bool SimpleBlock
= parseLevel(Tok
, IfKind
, &IfLBrace
);
809 if (MacroBlock
? FormatTok
->isNot(TT_MacroBlockEnd
)
810 : FormatTok
->isNot(tok::r_brace
)) {
811 Line
->Level
= InitialLevel
;
812 FormatTok
->setBlockKind(BK_Block
);
816 if (FormatTok
->is(tok::r_brace
) && Tok
->is(TT_NamespaceLBrace
))
817 FormatTok
->setFinalizedType(TT_NamespaceRBrace
);
819 const bool IsFunctionRBrace
=
820 FormatTok
->is(tok::r_brace
) && Tok
->is(TT_FunctionLBrace
);
822 auto RemoveBraces
= [=]() mutable {
825 assert(Tok
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
));
826 assert(FormatTok
->is(tok::r_brace
));
827 const bool WrappedOpeningBrace
= !Tok
->Previous
;
828 if (WrappedOpeningBrace
&& FollowedByComment
)
830 const bool HasRequiredIfBraces
= IfLBrace
&& !IfLBrace
->Optional
;
831 if (KeepBraces
&& !HasRequiredIfBraces
)
833 if (Tok
->isNot(TT_ElseLBrace
) || !HasRequiredIfBraces
) {
834 const FormatToken
*Previous
= Tokens
->getPreviousToken();
836 if (Previous
->is(tok::r_brace
) && !Previous
->Optional
)
839 assert(!CurrentLines
->empty());
840 auto &LastLine
= CurrentLines
->back();
841 if (LastLine
.Level
== InitialLevel
+ 1 && !mightFitOnOneLine(LastLine
))
843 if (Tok
->is(TT_ElseLBrace
))
845 if (WrappedOpeningBrace
) {
847 --Index
; // The line above the wrapped l_brace.
850 return mightFitOnOneLine((*CurrentLines
)[Index
], Tok
);
852 if (RemoveBraces()) {
853 Tok
->MatchingParen
= FormatTok
;
854 FormatTok
->MatchingParen
= Tok
;
857 size_t PPEndHash
= computePPHash();
859 // Munch the closing brace.
860 nextToken(/*LevelDifference=*/-AddLevels
);
862 // When this is a function block and there is an unnecessary semicolon
863 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
865 if (Style
.RemoveSemicolon
&& IsFunctionRBrace
) {
866 while (FormatTok
->is(tok::semi
)) {
867 FormatTok
->Optional
= true;
872 HandleVerilogBlockLabel();
874 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
877 Line
->Level
= InitialLevel
;
879 if (FormatTok
->is(tok::kw_noexcept
)) {
880 // A noexcept in a requires expression.
884 if (FormatTok
->is(tok::arrow
)) {
885 // Following the } or noexcept we can find a trailing return type arrow
886 // as part of an implicit conversion constraint.
888 parseStructuralElement();
891 if (MunchSemi
&& FormatTok
->is(tok::semi
))
894 if (PPStartHash
== PPEndHash
) {
895 Line
->MatchingOpeningBlockLineIndex
= OpeningLineIndex
;
896 if (OpeningLineIndex
!= UnwrappedLine::kInvalidIndex
) {
897 // Update the opening line to add the forward reference as well
898 (*CurrentLines
)[OpeningLineIndex
].MatchingClosingBlockLineIndex
=
899 CurrentLines
->size() - 1;
906 static bool isGoogScope(const UnwrappedLine
&Line
) {
907 // FIXME: Closure-library specific stuff should not be hard-coded but be
909 if (Line
.Tokens
.size() < 4)
911 auto I
= Line
.Tokens
.begin();
912 if (I
->Tok
->TokenText
!= "goog")
915 if (I
->Tok
->isNot(tok::period
))
918 if (I
->Tok
->TokenText
!= "scope")
921 return I
->Tok
->is(tok::l_paren
);
924 static bool isIIFE(const UnwrappedLine
&Line
,
925 const AdditionalKeywords
&Keywords
) {
926 // Look for the start of an immediately invoked anonymous function.
927 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
928 // This is commonly done in JavaScript to create a new, anonymous scope.
929 // Example: (function() { ... })()
930 if (Line
.Tokens
.size() < 3)
932 auto I
= Line
.Tokens
.begin();
933 if (I
->Tok
->isNot(tok::l_paren
))
936 if (I
->Tok
->isNot(Keywords
.kw_function
))
939 return I
->Tok
->is(tok::l_paren
);
942 static bool ShouldBreakBeforeBrace(const FormatStyle
&Style
,
943 const FormatToken
&InitialToken
) {
944 tok::TokenKind Kind
= InitialToken
.Tok
.getKind();
945 if (InitialToken
.is(TT_NamespaceMacro
))
946 Kind
= tok::kw_namespace
;
949 case tok::kw_namespace
:
950 return Style
.BraceWrapping
.AfterNamespace
;
952 return Style
.BraceWrapping
.AfterClass
;
954 return Style
.BraceWrapping
.AfterUnion
;
956 return Style
.BraceWrapping
.AfterStruct
;
958 return Style
.BraceWrapping
.AfterEnum
;
964 void UnwrappedLineParser::parseChildBlock() {
965 assert(FormatTok
->is(tok::l_brace
));
966 FormatTok
->setBlockKind(BK_Block
);
967 const FormatToken
*OpeningBrace
= FormatTok
;
970 bool SkipIndent
= (Style
.isJavaScript() &&
971 (isGoogScope(*Line
) || isIIFE(*Line
, Keywords
)));
972 ScopedLineState
LineState(*this);
973 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
974 /*MustBeDeclaration=*/false);
975 Line
->Level
+= SkipIndent
? 0 : 1;
976 parseLevel(OpeningBrace
);
977 flushComments(isOnNewLine(*FormatTok
));
978 Line
->Level
-= SkipIndent
? 0 : 1;
983 void UnwrappedLineParser::parsePPDirective() {
984 assert(FormatTok
->is(tok::hash
) && "'#' expected");
985 ScopedMacroState
MacroState(*Line
, Tokens
, FormatTok
);
989 if (!FormatTok
->Tok
.getIdentifierInfo()) {
994 switch (FormatTok
->Tok
.getIdentifierInfo()->getPPKeywordID()) {
999 parsePPIf(/*IfDef=*/false);
1002 case tok::pp_ifndef
:
1003 parsePPIf(/*IfDef=*/true);
1006 case tok::pp_elifdef
:
1007 case tok::pp_elifndef
:
1014 case tok::pp_pragma
:
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable
) {
1024 size_t Line
= CurrentLines
->size();
1025 if (CurrentLines
== &PreprocessorDirectives
)
1026 Line
+= Lines
.size();
1029 (!PPStack
.empty() && PPStack
.back().Kind
== PP_Unreachable
)) {
1030 PPStack
.push_back({PP_Unreachable
, Line
});
1032 PPStack
.push_back({PP_Conditional
, Line
});
1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable
) {
1038 assert(PPBranchLevel
>= 0 && PPBranchLevel
<= (int)PPLevelBranchIndex
.size());
1039 if (PPBranchLevel
== (int)PPLevelBranchIndex
.size()) {
1040 PPLevelBranchIndex
.push_back(0);
1041 PPLevelBranchCount
.push_back(0);
1043 PPChainBranchIndex
.push(Unreachable
? -1 : 0);
1044 bool Skip
= PPLevelBranchIndex
[PPBranchLevel
] > 0;
1045 conditionalCompilationCondition(Unreachable
|| Skip
);
1048 void UnwrappedLineParser::conditionalCompilationAlternative() {
1049 if (!PPStack
.empty())
1051 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1052 if (!PPChainBranchIndex
.empty())
1053 ++PPChainBranchIndex
.top();
1054 conditionalCompilationCondition(
1055 PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty() &&
1056 PPLevelBranchIndex
[PPBranchLevel
] != PPChainBranchIndex
.top());
1059 void UnwrappedLineParser::conditionalCompilationEnd() {
1060 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1061 if (PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty()) {
1062 if (PPChainBranchIndex
.top() + 1 > PPLevelBranchCount
[PPBranchLevel
])
1063 PPLevelBranchCount
[PPBranchLevel
] = PPChainBranchIndex
.top() + 1;
1065 // Guard against #endif's without #if.
1066 if (PPBranchLevel
> -1)
1068 if (!PPChainBranchIndex
.empty())
1069 PPChainBranchIndex
.pop();
1070 if (!PPStack
.empty())
1074 void UnwrappedLineParser::parsePPIf(bool IfDef
) {
1075 bool IfNDef
= FormatTok
->is(tok::pp_ifndef
);
1077 bool Unreachable
= false;
1078 if (!IfDef
&& (FormatTok
->is(tok::kw_false
) || FormatTok
->TokenText
== "0"))
1080 if (IfDef
&& !IfNDef
&& FormatTok
->TokenText
== "SWIG")
1082 conditionalCompilationStart(Unreachable
);
1083 FormatToken
*IfCondition
= FormatTok
;
1084 // If there's a #ifndef on the first line, and the only lines before it are
1085 // comments, it could be an include guard.
1086 bool MaybeIncludeGuard
= IfNDef
;
1087 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1088 for (auto &Line
: Lines
) {
1089 if (Line
.Tokens
.front().Tok
->isNot(tok::comment
)) {
1090 MaybeIncludeGuard
= false;
1091 IncludeGuard
= IG_Rejected
;
1099 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1100 IncludeGuard
= IG_IfNdefed
;
1101 IncludeGuardToken
= IfCondition
;
1105 void UnwrappedLineParser::parsePPElse() {
1106 // If a potential include guard has an #else, it's not an include guard.
1107 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== 0)
1108 IncludeGuard
= IG_Rejected
;
1109 // Don't crash when there is an #else without an #if.
1110 assert(PPBranchLevel
>= -1);
1111 if (PPBranchLevel
== -1)
1112 conditionalCompilationStart(/*Unreachable=*/true);
1113 conditionalCompilationAlternative();
1119 void UnwrappedLineParser::parsePPEndIf() {
1120 conditionalCompilationEnd();
1122 // If the #endif of a potential include guard is the last thing in the file,
1123 // then we found an include guard.
1124 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== -1 && Tokens
->isEOF() &&
1125 Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
) {
1126 IncludeGuard
= IG_Found
;
1130 void UnwrappedLineParser::parsePPDefine() {
1133 if (!FormatTok
->Tok
.getIdentifierInfo()) {
1134 IncludeGuard
= IG_Rejected
;
1135 IncludeGuardToken
= nullptr;
1140 if (IncludeGuard
== IG_IfNdefed
&&
1141 IncludeGuardToken
->TokenText
== FormatTok
->TokenText
) {
1142 IncludeGuard
= IG_Defined
;
1143 IncludeGuardToken
= nullptr;
1144 for (auto &Line
: Lines
) {
1145 if (!Line
.Tokens
.front().Tok
->isOneOf(tok::comment
, tok::hash
)) {
1146 IncludeGuard
= IG_Rejected
;
1152 // In the context of a define, even keywords should be treated as normal
1153 // identifiers. Setting the kind to identifier is not enough, because we need
1154 // to treat additional keywords like __except as well, which are already
1155 // identifiers. Setting the identifier info to null interferes with include
1156 // guard processing above, and changes preprocessing nesting.
1157 FormatTok
->Tok
.setKind(tok::identifier
);
1158 FormatTok
->Tok
.setIdentifierInfo(Keywords
.kw_internal_ident_after_define
);
1160 if (FormatTok
->Tok
.getKind() == tok::l_paren
&&
1161 !FormatTok
->hasWhitespaceBefore()) {
1164 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1165 Line
->Level
+= PPBranchLevel
+ 1;
1169 Line
->PPLevel
= PPBranchLevel
+ (IncludeGuard
== IG_Defined
? 0 : 1);
1170 assert((int)Line
->PPLevel
>= 0);
1171 Line
->InMacroBody
= true;
1173 if (FormatTok
->is(tok::identifier
) &&
1174 Tokens
->peekNextToken()->is(tok::colon
)) {
1179 // Errors during a preprocessor directive can only affect the layout of the
1180 // preprocessor directive, and thus we ignore them. An alternative approach
1181 // would be to use the same approach we use on the file level (no
1182 // re-indentation if there was a structural error) within the macro
1187 void UnwrappedLineParser::parsePPPragma() {
1188 Line
->InPragmaDirective
= true;
1192 void UnwrappedLineParser::parsePPUnknown() {
1196 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1197 Line
->Level
+= PPBranchLevel
+ 1;
1201 // Here we exclude certain tokens that are not usually the first token in an
1202 // unwrapped line. This is used in attempt to distinguish macro calls without
1203 // trailing semicolons from other constructs split to several lines.
1204 static bool tokenCanStartNewLine(const FormatToken
&Tok
) {
1205 // Semicolon can be a null-statement, l_square can be a start of a macro or
1206 // a C++11 attribute, but this doesn't seem to be common.
1207 assert(Tok
.isNot(TT_AttributeSquare
));
1208 return !Tok
.isOneOf(tok::semi
, tok::l_brace
,
1209 // Tokens that can only be used as binary operators and a
1210 // part of overloaded operator names.
1211 tok::period
, tok::periodstar
, tok::arrow
, tok::arrowstar
,
1212 tok::less
, tok::greater
, tok::slash
, tok::percent
,
1213 tok::lessless
, tok::greatergreater
, tok::equal
,
1214 tok::plusequal
, tok::minusequal
, tok::starequal
,
1215 tok::slashequal
, tok::percentequal
, tok::ampequal
,
1216 tok::pipeequal
, tok::caretequal
, tok::greatergreaterequal
,
1218 // Colon is used in labels, base class lists, initializer
1219 // lists, range-based for loops, ternary operator, but
1220 // should never be the first token in an unwrapped line.
1222 // 'noexcept' is a trailing annotation.
1226 static bool mustBeJSIdent(const AdditionalKeywords
&Keywords
,
1227 const FormatToken
*FormatTok
) {
1228 // FIXME: This returns true for C/C++ keywords like 'struct'.
1229 return FormatTok
->is(tok::identifier
) &&
1230 (!FormatTok
->Tok
.getIdentifierInfo() ||
1231 !FormatTok
->isOneOf(
1232 Keywords
.kw_in
, Keywords
.kw_of
, Keywords
.kw_as
, Keywords
.kw_async
,
1233 Keywords
.kw_await
, Keywords
.kw_yield
, Keywords
.kw_finally
,
1234 Keywords
.kw_function
, Keywords
.kw_import
, Keywords
.kw_is
,
1235 Keywords
.kw_let
, Keywords
.kw_var
, tok::kw_const
,
1236 Keywords
.kw_abstract
, Keywords
.kw_extends
, Keywords
.kw_implements
,
1237 Keywords
.kw_instanceof
, Keywords
.kw_interface
,
1238 Keywords
.kw_override
, Keywords
.kw_throws
, Keywords
.kw_from
));
1241 static bool mustBeJSIdentOrValue(const AdditionalKeywords
&Keywords
,
1242 const FormatToken
*FormatTok
) {
1243 return FormatTok
->Tok
.isLiteral() ||
1244 FormatTok
->isOneOf(tok::kw_true
, tok::kw_false
) ||
1245 mustBeJSIdent(Keywords
, FormatTok
);
1248 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1249 // when encountered after a value (see mustBeJSIdentOrValue).
1250 static bool isJSDeclOrStmt(const AdditionalKeywords
&Keywords
,
1251 const FormatToken
*FormatTok
) {
1252 return FormatTok
->isOneOf(
1253 tok::kw_return
, Keywords
.kw_yield
,
1255 tok::kw_if
, tok::kw_else
,
1257 tok::kw_for
, tok::kw_while
, tok::kw_do
, tok::kw_continue
, tok::kw_break
,
1259 tok::kw_switch
, tok::kw_case
,
1261 tok::kw_throw
, tok::kw_try
, tok::kw_catch
, Keywords
.kw_finally
,
1263 tok::kw_const
, tok::kw_class
, Keywords
.kw_var
, Keywords
.kw_let
,
1264 Keywords
.kw_async
, Keywords
.kw_function
,
1266 Keywords
.kw_import
, tok::kw_export
);
1269 // Checks whether a token is a type in K&R C (aka C78).
1270 static bool isC78Type(const FormatToken
&Tok
) {
1271 return Tok
.isOneOf(tok::kw_char
, tok::kw_short
, tok::kw_int
, tok::kw_long
,
1272 tok::kw_unsigned
, tok::kw_float
, tok::kw_double
,
1276 // This function checks whether a token starts the first parameter declaration
1277 // in a K&R C (aka C78) function definition, e.g.:
1283 static bool isC78ParameterDecl(const FormatToken
*Tok
, const FormatToken
*Next
,
1284 const FormatToken
*FuncName
) {
1289 if (FuncName
->isNot(tok::identifier
))
1292 const FormatToken
*Prev
= FuncName
->Previous
;
1293 if (!Prev
|| (Prev
->isNot(tok::star
) && !isC78Type(*Prev
)))
1296 if (!isC78Type(*Tok
) &&
1297 !Tok
->isOneOf(tok::kw_register
, tok::kw_struct
, tok::kw_union
)) {
1301 if (Next
->isNot(tok::star
) && !Next
->Tok
.getIdentifierInfo())
1304 Tok
= Tok
->Previous
;
1305 if (!Tok
|| Tok
->isNot(tok::r_paren
))
1308 Tok
= Tok
->Previous
;
1309 if (!Tok
|| Tok
->isNot(tok::identifier
))
1312 return Tok
->Previous
&& Tok
->Previous
->isOneOf(tok::l_paren
, tok::comma
);
1315 bool UnwrappedLineParser::parseModuleImport() {
1316 assert(FormatTok
->is(Keywords
.kw_import
) && "'import' expected");
1318 if (auto Token
= Tokens
->peekNextToken(/*SkipComment=*/true);
1319 !Token
->Tok
.getIdentifierInfo() &&
1320 !Token
->isOneOf(tok::colon
, tok::less
, tok::string_literal
)) {
1326 if (FormatTok
->is(tok::colon
)) {
1327 FormatTok
->setFinalizedType(TT_ModulePartitionColon
);
1329 // Handle import <foo/bar.h> as we would an include statement.
1330 else if (FormatTok
->is(tok::less
)) {
1332 while (!FormatTok
->isOneOf(tok::semi
, tok::greater
, tok::eof
)) {
1333 // Mark tokens up to the trailing line comments as implicit string
1335 if (FormatTok
->isNot(tok::comment
) &&
1336 !FormatTok
->TokenText
.starts_with("//")) {
1337 FormatTok
->setFinalizedType(TT_ImplicitStringLiteral
);
1342 if (FormatTok
->is(tok::semi
)) {
1353 // readTokenWithJavaScriptASI reads the next token and terminates the current
1354 // line if JavaScript Automatic Semicolon Insertion must
1355 // happen between the current token and the next token.
1357 // This method is conservative - it cannot cover all edge cases of JavaScript,
1358 // but only aims to correctly handle certain well known cases. It *must not*
1359 // return true in speculative cases.
1360 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1361 FormatToken
*Previous
= FormatTok
;
1363 FormatToken
*Next
= FormatTok
;
1366 CommentsBeforeNextToken
.empty()
1367 ? Next
->NewlinesBefore
== 0
1368 : CommentsBeforeNextToken
.front()->NewlinesBefore
== 0;
1372 bool PreviousMustBeValue
= mustBeJSIdentOrValue(Keywords
, Previous
);
1373 bool PreviousStartsTemplateExpr
=
1374 Previous
->is(TT_TemplateString
) && Previous
->TokenText
.ends_with("${");
1375 if (PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) {
1376 // If the line contains an '@' sign, the previous token might be an
1377 // annotation, which can precede another identifier/value.
1378 bool HasAt
= llvm::any_of(Line
->Tokens
, [](UnwrappedLineNode
&LineNode
) {
1379 return LineNode
.Tok
->is(tok::at
);
1384 if (Next
->is(tok::exclaim
) && PreviousMustBeValue
)
1385 return addUnwrappedLine();
1386 bool NextMustBeValue
= mustBeJSIdentOrValue(Keywords
, Next
);
1387 bool NextEndsTemplateExpr
=
1388 Next
->is(TT_TemplateString
) && Next
->TokenText
.starts_with("}");
1389 if (NextMustBeValue
&& !NextEndsTemplateExpr
&& !PreviousStartsTemplateExpr
&&
1390 (PreviousMustBeValue
||
1391 Previous
->isOneOf(tok::r_square
, tok::r_paren
, tok::plusplus
,
1392 tok::minusminus
))) {
1393 return addUnwrappedLine();
1395 if ((PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) &&
1396 isJSDeclOrStmt(Keywords
, Next
)) {
1397 return addUnwrappedLine();
1401 void UnwrappedLineParser::parseStructuralElement(
1402 const FormatToken
*OpeningBrace
, IfStmtKind
*IfKind
,
1403 FormatToken
**IfLeftBrace
, bool *HasDoWhile
, bool *HasLabel
) {
1404 if (Style
.Language
== FormatStyle::LK_TableGen
&&
1405 FormatTok
->is(tok::pp_include
)) {
1407 if (FormatTok
->is(tok::string_literal
))
1413 if (Style
.isCpp()) {
1414 while (FormatTok
->is(tok::l_square
) && handleCppAttributes()) {
1416 } else if (Style
.isVerilog()) {
1417 if (Keywords
.isVerilogStructuredProcedure(*FormatTok
)) {
1418 parseForOrWhileLoop(/*HasParens=*/false);
1421 if (FormatTok
->isOneOf(Keywords
.kw_foreach
, Keywords
.kw_repeat
)) {
1422 parseForOrWhileLoop();
1425 if (FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
1426 Keywords
.kw_assume
, Keywords
.kw_cover
)) {
1427 parseIfThenElse(IfKind
, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1431 // Skip things that can exist before keywords like 'if' and 'case'.
1433 if (FormatTok
->isOneOf(Keywords
.kw_priority
, Keywords
.kw_unique
,
1434 Keywords
.kw_unique0
)) {
1436 } else if (FormatTok
->is(tok::l_paren
) &&
1437 Tokens
->peekNextToken()->is(tok::star
)) {
1445 // Tokens that only make sense at the beginning of a line.
1446 switch (FormatTok
->Tok
.getKind()) {
1449 if (FormatTok
->is(tok::l_brace
)) {
1450 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1452 while (FormatTok
&& !eof()) {
1453 if (FormatTok
->is(tok::r_brace
)) {
1454 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1459 FormatTok
->Finalized
= true;
1464 case tok::kw_namespace
:
1467 case tok::kw_public
:
1468 case tok::kw_protected
:
1469 case tok::kw_private
:
1470 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
1474 parseAccessSpecifier();
1478 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1479 // field/method declaration.
1482 FormatToken
*Tok
= parseIfThenElse(IfKind
);
1489 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1490 // field/method declaration.
1493 parseForOrWhileLoop();
1496 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1497 // field/method declaration.
1504 case tok::kw_switch
:
1505 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1506 // 'switch: string' field declaration.
1511 case tok::kw_default
:
1512 // In Verilog default along with other labels are handled in the next loop.
1513 if (Style
.isVerilog())
1515 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1516 // 'default: string' field declaration.
1520 if (FormatTok
->is(tok::colon
)) {
1521 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
1525 // e.g. "default void f() {}" in a Java interface.
1528 // Proto: there are no switch/case statements.
1529 if (Style
.Language
== FormatStyle::LK_Proto
) {
1533 if (Style
.isVerilog()) {
1538 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1539 // 'case: string' field declaration.
1547 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1548 // field/method declaration.
1553 case tok::kw_extern
:
1555 if (Style
.isVerilog()) {
1556 // In Verilog and extern module declaration looks like a start of module.
1557 // But there is no body and endmodule. So we handle it separately.
1558 if (Keywords
.isVerilogHierarchy(*FormatTok
)) {
1559 parseVerilogHierarchyHeader();
1562 } else if (FormatTok
->is(tok::string_literal
)) {
1564 if (FormatTok
->is(tok::l_brace
)) {
1565 if (Style
.BraceWrapping
.AfterExternBlock
)
1567 // Either we indent or for backwards compatibility we follow the
1568 // AfterExternBlock style.
1569 unsigned AddLevels
=
1570 (Style
.IndentExternBlock
== FormatStyle::IEBS_Indent
) ||
1571 (Style
.BraceWrapping
.AfterExternBlock
&&
1572 Style
.IndentExternBlock
==
1573 FormatStyle::IEBS_AfterExternBlock
)
1576 parseBlock(/*MustBeDeclaration=*/true, AddLevels
);
1582 case tok::kw_export
:
1583 if (Style
.isJavaScript()) {
1584 parseJavaScriptEs6ImportExport();
1587 if (Style
.isCpp()) {
1589 if (FormatTok
->is(tok::kw_namespace
)) {
1593 if (FormatTok
->is(Keywords
.kw_import
) && parseModuleImport())
1597 case tok::kw_inline
:
1599 if (FormatTok
->is(tok::kw_namespace
)) {
1604 case tok::identifier
:
1605 if (FormatTok
->is(TT_ForEachMacro
)) {
1606 parseForOrWhileLoop();
1609 if (FormatTok
->is(TT_MacroBlockBegin
)) {
1610 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1611 /*MunchSemi=*/false);
1614 if (FormatTok
->is(Keywords
.kw_import
)) {
1615 if (Style
.isJavaScript()) {
1616 parseJavaScriptEs6ImportExport();
1619 if (Style
.Language
== FormatStyle::LK_Proto
) {
1621 if (FormatTok
->is(tok::kw_public
))
1623 if (FormatTok
->isNot(tok::string_literal
))
1626 if (FormatTok
->is(tok::semi
))
1631 if (Style
.isCpp() && parseModuleImport())
1634 if (Style
.isCpp() &&
1635 FormatTok
->isOneOf(Keywords
.kw_signals
, Keywords
.kw_qsignals
,
1636 Keywords
.kw_slots
, Keywords
.kw_qslots
)) {
1638 if (FormatTok
->is(tok::colon
)) {
1644 if (Style
.isCpp() && FormatTok
->is(TT_StatementMacro
)) {
1645 parseStatementMacro();
1648 if (Style
.isCpp() && FormatTok
->is(TT_NamespaceMacro
)) {
1652 // In Verilog labels can be any expression, so we don't do them here.
1653 if (!Style
.isVerilog() && Tokens
->peekNextToken()->is(tok::colon
) &&
1654 !Line
->MustBeDeclaration
) {
1656 Line
->Tokens
.begin()->Tok
->MustBreakBefore
= true;
1657 FormatTok
->setFinalizedType(TT_GotoLabelColon
);
1658 parseLabel(!Style
.IndentGotoLabels
);
1663 // In all other cases, parse the declaration.
1669 const bool InRequiresExpression
=
1670 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
1672 const FormatToken
*Previous
= FormatTok
->Previous
;
1673 switch (FormatTok
->Tok
.getKind()) {
1676 if (FormatTok
->is(tok::l_brace
)) {
1680 } else if (Style
.Language
== FormatStyle::LK_Java
&&
1681 FormatTok
->is(Keywords
.kw_interface
)) {
1685 switch (FormatTok
->Tok
.getObjCKeywordID()) {
1686 case tok::objc_public
:
1687 case tok::objc_protected
:
1688 case tok::objc_package
:
1689 case tok::objc_private
:
1690 return parseAccessSpecifier();
1691 case tok::objc_interface
:
1692 case tok::objc_implementation
:
1693 return parseObjCInterfaceOrImplementation();
1694 case tok::objc_protocol
:
1695 if (parseObjCProtocol())
1699 return; // Handled by the caller.
1700 case tok::objc_optional
:
1701 case tok::objc_required
:
1705 case tok::objc_autoreleasepool
:
1707 if (FormatTok
->is(tok::l_brace
)) {
1708 if (Style
.BraceWrapping
.AfterControlStatement
==
1709 FormatStyle::BWACS_Always
) {
1716 case tok::objc_synchronized
:
1718 if (FormatTok
->is(tok::l_paren
)) {
1719 // Skip synchronization object
1722 if (FormatTok
->is(tok::l_brace
)) {
1723 if (Style
.BraceWrapping
.AfterControlStatement
==
1724 FormatStyle::BWACS_Always
) {
1732 // This branch isn't strictly necessary (the kw_try case below would
1733 // do this too after the tok::at is parsed above). But be explicit.
1740 case tok::kw_requires
: {
1741 if (Style
.isCpp()) {
1742 bool ParsedClause
= parseRequires();
1751 // Ignore if this is part of "template <enum ...".
1752 if (Previous
&& Previous
->is(tok::less
)) {
1757 // parseEnum falls through and does not yet add an unwrapped line as an
1758 // enum definition can start a structural element.
1761 // This only applies to C++ and Verilog.
1762 if (!Style
.isCpp() && !Style
.isVerilog()) {
1767 case tok::kw_typedef
:
1769 if (FormatTok
->isOneOf(Keywords
.kw_NS_ENUM
, Keywords
.kw_NS_OPTIONS
,
1770 Keywords
.kw_CF_ENUM
, Keywords
.kw_CF_OPTIONS
,
1771 Keywords
.kw_CF_CLOSED_ENUM
,
1772 Keywords
.kw_NS_CLOSED_ENUM
)) {
1777 if (Style
.isVerilog()) {
1783 case tok::kw_struct
:
1785 if (parseStructLike())
1788 case tok::kw_decltype
:
1790 if (FormatTok
->is(tok::l_paren
)) {
1792 assert(FormatTok
->Previous
);
1793 if (FormatTok
->Previous
->endsSequence(tok::r_paren
, tok::kw_auto
,
1795 Line
->SeenDecltypeAuto
= true;
1801 // In Java, classes have an implicit static member "class".
1802 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
&&
1803 FormatTok
->is(tok::kw_class
)) {
1806 if (Style
.isJavaScript() && FormatTok
&&
1807 FormatTok
->Tok
.getIdentifierInfo()) {
1808 // JavaScript only has pseudo keywords, all keywords are allowed to
1809 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1820 case tok::l_paren
: {
1822 // Break the unwrapped line if a K&R C function definition has a parameter
1824 if (OpeningBrace
|| !Style
.isCpp() || !Previous
|| eof())
1826 if (isC78ParameterDecl(FormatTok
,
1827 Tokens
->peekNextToken(/*SkipComment=*/true),
1834 case tok::kw_operator
:
1836 if (FormatTok
->isBinaryOperator())
1841 // Block return type.
1842 if (FormatTok
->Tok
.isAnyIdentifier() ||
1843 FormatTok
->isSimpleTypeSpecifier()) {
1845 // Return types: pointers are ok too.
1846 while (FormatTok
->is(tok::star
))
1849 // Block argument list.
1850 if (FormatTok
->is(tok::l_paren
))
1853 if (FormatTok
->is(tok::l_brace
))
1857 if (InRequiresExpression
)
1858 FormatTok
->setFinalizedType(TT_BracedListLBrace
);
1859 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1860 IsDecltypeAutoFunction
= Line
->SeenDecltypeAuto
;
1861 // A block outside of parentheses must be the last part of a
1862 // structural element.
1863 // FIXME: Figure out cases where this is not true, and add projections
1864 // for them (the one we know is missing are lambdas).
1865 if (Style
.Language
== FormatStyle::LK_Java
&&
1866 Line
->Tokens
.front().Tok
->is(Keywords
.kw_synchronized
)) {
1867 // If necessary, we could set the type to something different than
1868 // TT_FunctionLBrace.
1869 if (Style
.BraceWrapping
.AfterControlStatement
==
1870 FormatStyle::BWACS_Always
) {
1873 } else if (Style
.BraceWrapping
.AfterFunction
) {
1876 FormatTok
->setFinalizedType(TT_FunctionLBrace
);
1878 IsDecltypeAutoFunction
= false;
1882 // Otherwise this was a braced init list, and the structural
1883 // element continues.
1886 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1887 // field/method declaration.
1891 // We arrive here when parsing function-try blocks.
1892 if (Style
.BraceWrapping
.AfterFunction
)
1896 case tok::identifier
: {
1897 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
) &&
1898 Line
->MustBeDeclaration
) {
1900 parseCSharpGenericTypeConstraint();
1903 if (FormatTok
->is(TT_MacroBlockEnd
)) {
1908 // Function declarations (as opposed to function expressions) are parsed
1909 // on their own unwrapped line by continuing this loop. Function
1910 // expressions (functions that are not on their own line) must not create
1911 // a new unwrapped line, so they are special cased below.
1912 size_t TokenCount
= Line
->Tokens
.size();
1913 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_function
) &&
1916 Line
->Tokens
.front().Tok
->isNot(Keywords
.kw_async
)))) {
1917 tryToParseJSFunction();
1920 if ((Style
.isJavaScript() || Style
.Language
== FormatStyle::LK_Java
) &&
1921 FormatTok
->is(Keywords
.kw_interface
)) {
1922 if (Style
.isJavaScript()) {
1923 // In JavaScript/TypeScript, "interface" can be used as a standalone
1924 // identifier, e.g. in `var interface = 1;`. If "interface" is
1925 // followed by another identifier, it is very like to be an actual
1926 // interface declaration.
1927 unsigned StoredPosition
= Tokens
->getPosition();
1928 FormatToken
*Next
= Tokens
->getNextToken();
1929 FormatTok
= Tokens
->setPosition(StoredPosition
);
1930 if (!mustBeJSIdent(Keywords
, Next
)) {
1940 if (Style
.isVerilog()) {
1941 if (FormatTok
->is(Keywords
.kw_table
)) {
1942 parseVerilogTable();
1945 if (Keywords
.isVerilogBegin(*FormatTok
) ||
1946 Keywords
.isVerilogHierarchy(*FormatTok
)) {
1953 if (!Style
.isCpp() && FormatTok
->is(Keywords
.kw_interface
)) {
1954 if (parseStructLike())
1959 if (Style
.isCpp() && FormatTok
->is(TT_StatementMacro
)) {
1960 parseStatementMacro();
1964 // See if the following token should start a new unwrapped line.
1965 StringRef Text
= FormatTok
->TokenText
;
1967 FormatToken
*PreviousToken
= FormatTok
;
1970 // JS doesn't have macros, and within classes colons indicate fields, not
1972 if (Style
.isJavaScript())
1975 auto OneTokenSoFar
= [&]() {
1976 auto I
= Line
->Tokens
.begin(), E
= Line
->Tokens
.end();
1977 while (I
!= E
&& I
->Tok
->is(tok::comment
))
1979 if (Style
.isVerilog())
1980 while (I
!= E
&& I
->Tok
->is(tok::hash
))
1982 return I
!= E
&& (++I
== E
);
1984 if (OneTokenSoFar()) {
1985 // Recognize function-like macro usages without trailing semicolon as
1986 // well as free-standing macros like Q_OBJECT.
1987 bool FunctionLike
= FormatTok
->is(tok::l_paren
);
1991 bool FollowedByNewline
=
1992 CommentsBeforeNextToken
.empty()
1993 ? FormatTok
->NewlinesBefore
> 0
1994 : CommentsBeforeNextToken
.front()->NewlinesBefore
> 0;
1996 if (FollowedByNewline
&& (Text
.size() >= 5 || FunctionLike
) &&
1997 tokenCanStartNewLine(*FormatTok
) && Text
== Text
.upper()) {
1998 if (PreviousToken
->isNot(TT_UntouchableMacroFunc
))
1999 PreviousToken
->setFinalizedType(TT_FunctionLikeOrFreestandingMacro
);
2007 if ((Style
.isJavaScript() || Style
.isCSharp()) &&
2008 FormatTok
->is(TT_FatArrow
)) {
2009 tryToParseChildBlock();
2014 if (FormatTok
->is(tok::l_brace
)) {
2015 // Block kind should probably be set to BK_BracedInit for any language.
2016 // C# needs this change to ensure that array initialisers and object
2017 // initialisers are indented the same way.
2018 if (Style
.isCSharp())
2019 FormatTok
->setBlockKind(BK_BracedInit
);
2022 } else if (Style
.Language
== FormatStyle::LK_Proto
&&
2023 FormatTok
->is(tok::less
)) {
2025 parseBracedList(/*IsAngleBracket=*/true);
2035 // Proto: there are no switch/case statements.
2036 if (Style
.Language
== FormatStyle::LK_Proto
) {
2040 // In Verilog switch is called case.
2041 if (Style
.isVerilog()) {
2046 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
2047 // 'case: string' field declaration.
2053 case tok::kw_default
:
2055 if (Style
.isVerilog()) {
2056 if (FormatTok
->is(tok::colon
)) {
2057 // The label will be handled in the next iteration.
2060 if (FormatTok
->is(Keywords
.kw_clocking
)) {
2061 // A default clocking block.
2066 parseVerilogCaseLabel();
2072 if (Style
.isVerilog()) {
2073 parseVerilogCaseLabel();
2084 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2085 assert(FormatTok
->is(tok::l_brace
));
2086 if (!Style
.isCSharp())
2088 // See if it's a property accessor.
2089 if (FormatTok
->Previous
->isNot(tok::identifier
))
2092 // See if we are inside a property accessor.
2094 // Record the current tokenPosition so that we can advance and
2095 // reset the current token. `Next` is not set yet so we need
2096 // another way to advance along the token stream.
2097 unsigned int StoredPosition
= Tokens
->getPosition();
2098 FormatToken
*Tok
= Tokens
->getNextToken();
2100 // A trivial property accessor is of the form:
2101 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2102 // Track these as they do not require line breaks to be introduced.
2103 bool HasSpecialAccessor
= false;
2104 bool IsTrivialPropertyAccessor
= true;
2106 if (Tok
->isOneOf(tok::semi
, tok::kw_public
, tok::kw_private
,
2107 tok::kw_protected
, Keywords
.kw_internal
, Keywords
.kw_get
,
2108 Keywords
.kw_init
, Keywords
.kw_set
)) {
2109 if (Tok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
, Keywords
.kw_set
))
2110 HasSpecialAccessor
= true;
2111 Tok
= Tokens
->getNextToken();
2114 if (Tok
->isNot(tok::r_brace
))
2115 IsTrivialPropertyAccessor
= false;
2119 if (!HasSpecialAccessor
) {
2120 Tokens
->setPosition(StoredPosition
);
2124 // Try to parse the property accessor:
2125 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2126 Tokens
->setPosition(StoredPosition
);
2127 if (!IsTrivialPropertyAccessor
&& Style
.BraceWrapping
.AfterFunction
)
2131 switch (FormatTok
->Tok
.getKind()) {
2134 if (FormatTok
->is(tok::equal
)) {
2135 while (!eof() && FormatTok
->isNot(tok::semi
))
2143 parseBlock(/*MustBeDeclaration=*/true);
2148 if (FormatTok
->is(TT_FatArrow
)) {
2152 } while (!eof() && FormatTok
->isNot(tok::semi
));
2161 if (FormatTok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
,
2163 !IsTrivialPropertyAccessor
) {
2164 // Non-trivial get/set needs to be on its own line.
2171 // Unreachable for well-formed code (paired '{' and '}').
2175 bool UnwrappedLineParser::tryToParseLambda() {
2176 assert(FormatTok
->is(tok::l_square
));
2177 if (!Style
.isCpp()) {
2181 FormatToken
&LSquare
= *FormatTok
;
2182 if (!tryToParseLambdaIntroducer())
2185 bool SeenArrow
= false;
2186 bool InTemplateParameterList
= false;
2188 while (FormatTok
->isNot(tok::l_brace
)) {
2189 if (FormatTok
->isSimpleTypeSpecifier()) {
2193 switch (FormatTok
->Tok
.getKind()) {
2197 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference
);
2203 assert(FormatTok
->Previous
);
2204 if (FormatTok
->Previous
->is(tok::r_square
))
2205 InTemplateParameterList
= true;
2210 case tok::kw_template
:
2211 case tok::kw_typename
:
2215 case tok::kw_constexpr
:
2216 case tok::kw_consteval
:
2219 case tok::identifier
:
2220 case tok::numeric_constant
:
2221 case tok::coloncolon
:
2222 case tok::kw_mutable
:
2223 case tok::kw_noexcept
:
2224 case tok::kw_static
:
2227 // Specialization of a template with an integer parameter can contain
2228 // arithmetic, logical, comparison and ternary operators.
2230 // FIXME: This also accepts sequences of operators that are not in the scope
2231 // of a template argument list.
2233 // In a C++ lambda a template type can only occur after an arrow. We use
2234 // this as an heuristic to distinguish between Objective-C expressions
2235 // followed by an `a->b` expression, such as:
2236 // ([obj func:arg] + a->b)
2237 // Otherwise the code below would parse as a lambda.
2249 case tok::equalequal
:
2250 case tok::exclaimequal
:
2251 case tok::greaterequal
:
2252 case tok::lessequal
:
2258 if (SeenArrow
|| InTemplateParameterList
) {
2264 // This might or might not actually be a lambda arrow (this could be an
2265 // ObjC method invocation followed by a dereferencing arrow). We might
2266 // reset this back to TT_Unknown in TokenAnnotator.
2267 FormatTok
->setFinalizedType(TT_TrailingReturnArrow
);
2271 case tok::kw_requires
: {
2272 auto *RequiresToken
= FormatTok
;
2274 parseRequiresClause(RequiresToken
);
2278 if (!InTemplateParameterList
)
2287 FormatTok
->setFinalizedType(TT_LambdaLBrace
);
2288 LSquare
.setFinalizedType(TT_LambdaLSquare
);
2290 NestedLambdas
.push_back(Line
->SeenDecltypeAuto
);
2292 assert(!NestedLambdas
.empty());
2293 NestedLambdas
.pop_back();
2298 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2299 const FormatToken
*Previous
= FormatTok
->Previous
;
2300 const FormatToken
*LeftSquare
= FormatTok
;
2302 if ((Previous
&& ((Previous
->Tok
.getIdentifierInfo() &&
2303 !Previous
->isOneOf(tok::kw_return
, tok::kw_co_await
,
2304 tok::kw_co_yield
, tok::kw_co_return
)) ||
2305 Previous
->closesScope())) ||
2306 LeftSquare
->isCppStructuredBinding(Style
)) {
2309 if (FormatTok
->is(tok::l_square
))
2311 if (FormatTok
->is(tok::r_square
)) {
2312 const FormatToken
*Next
= Tokens
->peekNextToken(/*SkipComment=*/true);
2313 if (Next
->is(tok::greater
))
2316 parseSquare(/*LambdaIntroducer=*/true);
2320 void UnwrappedLineParser::tryToParseJSFunction() {
2321 assert(FormatTok
->is(Keywords
.kw_function
));
2322 if (FormatTok
->is(Keywords
.kw_async
))
2324 // Consume "function".
2327 // Consume * (generator function). Treat it like C++'s overloaded operators.
2328 if (FormatTok
->is(tok::star
)) {
2329 FormatTok
->setFinalizedType(TT_OverloadedOperator
);
2333 // Consume function name.
2334 if (FormatTok
->is(tok::identifier
))
2337 if (FormatTok
->isNot(tok::l_paren
))
2340 // Parse formal parameter list.
2343 if (FormatTok
->is(tok::colon
)) {
2344 // Parse a type definition.
2347 // Eat the type declaration. For braced inline object types, balance braces,
2348 // otherwise just parse until finding an l_brace for the function body.
2349 if (FormatTok
->is(tok::l_brace
))
2350 tryToParseBracedList();
2352 while (!FormatTok
->isOneOf(tok::l_brace
, tok::semi
) && !eof())
2356 if (FormatTok
->is(tok::semi
))
2362 bool UnwrappedLineParser::tryToParseBracedList() {
2363 if (FormatTok
->is(BK_Unknown
))
2364 calculateBraceTypes();
2365 assert(FormatTok
->isNot(BK_Unknown
));
2366 if (FormatTok
->is(BK_Block
))
2373 bool UnwrappedLineParser::tryToParseChildBlock() {
2374 assert(Style
.isJavaScript() || Style
.isCSharp());
2375 assert(FormatTok
->is(TT_FatArrow
));
2376 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2377 // They always start an expression or a child block if followed by a curly
2380 if (FormatTok
->isNot(tok::l_brace
))
2386 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket
, bool IsEnum
) {
2387 bool HasError
= false;
2389 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2390 // replace this by using parseAssignmentExpression() inside.
2392 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
) &&
2393 tryToParseChildBlock()) {
2396 if (Style
.isJavaScript()) {
2397 if (FormatTok
->is(Keywords
.kw_function
)) {
2398 tryToParseJSFunction();
2401 if (FormatTok
->is(tok::l_brace
)) {
2402 // Could be a method inside of a braced list `{a() { return 1; }}`.
2403 if (tryToParseBracedList())
2408 if (FormatTok
->is(IsAngleBracket
? tok::greater
: tok::r_brace
)) {
2409 if (IsEnum
&& !Style
.AllowShortEnumsOnASingleLine
)
2414 switch (FormatTok
->Tok
.getKind()) {
2416 if (Style
.isCSharp())
2423 // JavaScript can just have free standing methods and getters/setters in
2424 // object literals. Detect them by a "{" following ")".
2425 if (Style
.isJavaScript()) {
2426 if (FormatTok
->is(tok::l_brace
))
2432 // Assume there are no blocks inside a braced init list apart
2433 // from the ones we explicitly parse out (like lambdas).
2434 FormatTok
->setBlockKind(BK_BracedInit
);
2441 parseBracedList(/*IsAngleBracket=*/true);
2444 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2445 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2446 // used for error recovery if we have otherwise determined that this is
2448 if (Style
.isJavaScript()) {
2459 if (IsEnum
&& !Style
.AllowShortEnumsOnASingleLine
)
2470 /// \brief Parses a pair of parentheses (and everything between them).
2471 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2472 /// double ampersands. This applies for all nested scopes as well.
2474 /// Returns whether there is a `=` token between the parentheses.
2475 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType
) {
2476 assert(FormatTok
->is(tok::l_paren
) && "'(' expected.");
2477 auto *LeftParen
= FormatTok
;
2478 bool SeenEqual
= false;
2479 const bool MightBeStmtExpr
= Tokens
->peekNextToken()->is(tok::l_brace
);
2482 switch (FormatTok
->Tok
.getKind()) {
2484 if (parseParens(AmpAmpTokenType
))
2486 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_brace
))
2490 if (!MightBeStmtExpr
&&
2491 Style
.RemoveParentheses
> FormatStyle::RPS_Leave
) {
2492 const auto *Prev
= LeftParen
->Previous
;
2493 const auto *Next
= Tokens
->peekNextToken();
2494 const bool DoubleParens
=
2495 Prev
&& Prev
->is(tok::l_paren
) && Next
&& Next
->is(tok::r_paren
);
2496 const auto *PrevPrev
= Prev
? Prev
->getPreviousNonComment() : nullptr;
2497 const bool Blacklisted
=
2499 (PrevPrev
->isOneOf(tok::kw___attribute
, tok::kw_decltype
) ||
2501 (PrevPrev
->isOneOf(tok::kw_if
, tok::kw_while
) ||
2502 PrevPrev
->endsSequence(tok::kw_constexpr
, tok::kw_if
))));
2503 const bool ReturnParens
=
2504 Style
.RemoveParentheses
== FormatStyle::RPS_ReturnStatement
&&
2505 ((NestedLambdas
.empty() && !IsDecltypeAutoFunction
) ||
2506 (!NestedLambdas
.empty() && !NestedLambdas
.back())) &&
2507 Prev
&& Prev
->isOneOf(tok::kw_return
, tok::kw_co_return
) && Next
&&
2508 Next
->is(tok::semi
);
2509 if ((DoubleParens
&& !Blacklisted
) || ReturnParens
) {
2510 LeftParen
->Optional
= true;
2511 FormatTok
->Optional
= true;
2517 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2523 if (!tryToParseBracedList())
2528 if (FormatTok
->is(tok::l_brace
)) {
2535 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
))
2536 tryToParseChildBlock();
2541 if (Style
.isJavaScript())
2542 parseRecord(/*ParseAsExpr=*/true);
2546 case tok::identifier
:
2547 if (Style
.isJavaScript() && (FormatTok
->is(Keywords
.kw_function
)))
2548 tryToParseJSFunction();
2552 case tok::kw_requires
: {
2553 auto RequiresToken
= FormatTok
;
2555 parseRequiresExpression(RequiresToken
);
2559 if (AmpAmpTokenType
!= TT_Unknown
)
2560 FormatTok
->setFinalizedType(AmpAmpTokenType
);
2570 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer
) {
2571 if (!LambdaIntroducer
) {
2572 assert(FormatTok
->is(tok::l_square
) && "'[' expected.");
2573 if (tryToParseLambda())
2577 switch (FormatTok
->Tok
.getKind()) {
2585 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2590 case tok::l_brace
: {
2591 if (!tryToParseBracedList())
2597 if (FormatTok
->is(tok::l_brace
)) {
2609 void UnwrappedLineParser::keepAncestorBraces() {
2610 if (!Style
.RemoveBracesLLVM
)
2613 const int MaxNestingLevels
= 2;
2614 const int Size
= NestedTooDeep
.size();
2615 if (Size
>= MaxNestingLevels
)
2616 NestedTooDeep
[Size
- MaxNestingLevels
] = true;
2617 NestedTooDeep
.push_back(false);
2620 static FormatToken
*getLastNonComment(const UnwrappedLine
&Line
) {
2621 for (const auto &Token
: llvm::reverse(Line
.Tokens
))
2622 if (Token
.Tok
->isNot(tok::comment
))
2628 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF
) {
2629 FormatToken
*Tok
= nullptr;
2631 if (Style
.InsertBraces
&& !Line
->InPPDirective
&& !Line
->Tokens
.empty() &&
2632 PreprocessorDirectives
.empty() && FormatTok
->isNot(tok::semi
)) {
2633 Tok
= Style
.BraceWrapping
.AfterControlStatement
== FormatStyle::BWACS_Never
2634 ? getLastNonComment(*Line
)
2635 : Line
->Tokens
.back().Tok
;
2637 if (Tok
->BraceCount
< 0) {
2638 assert(Tok
->BraceCount
== -1);
2641 Tok
->BraceCount
= -1;
2647 parseStructuralElement();
2650 assert(!Line
->InPPDirective
);
2652 for (const auto &L
: llvm::reverse(*CurrentLines
)) {
2653 if (!L
.InPPDirective
&& getLastNonComment(L
)) {
2654 Tok
= L
.Tokens
.back().Tok
;
2662 if (CheckEOF
&& eof())
2668 static void markOptionalBraces(FormatToken
*LeftBrace
) {
2672 assert(LeftBrace
->is(tok::l_brace
));
2674 FormatToken
*RightBrace
= LeftBrace
->MatchingParen
;
2676 assert(!LeftBrace
->Optional
);
2680 assert(RightBrace
->is(tok::r_brace
));
2681 assert(RightBrace
->MatchingParen
== LeftBrace
);
2682 assert(LeftBrace
->Optional
== RightBrace
->Optional
);
2684 LeftBrace
->Optional
= true;
2685 RightBrace
->Optional
= true;
2688 void UnwrappedLineParser::handleAttributes() {
2689 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2690 if (FormatTok
->isAttribute())
2692 else if (FormatTok
->is(tok::l_square
))
2693 handleCppAttributes();
2696 bool UnwrappedLineParser::handleCppAttributes() {
2697 // Handle [[likely]] / [[unlikely]] attributes.
2698 assert(FormatTok
->is(tok::l_square
));
2699 if (!tryToParseSimpleAttribute())
2705 /// Returns whether \c Tok begins a block.
2706 bool UnwrappedLineParser::isBlockBegin(const FormatToken
&Tok
) const {
2707 // FIXME: rename the function or make
2708 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2709 return Style
.isVerilog() ? Keywords
.isVerilogBegin(Tok
)
2710 : Tok
.is(tok::l_brace
);
2713 FormatToken
*UnwrappedLineParser::parseIfThenElse(IfStmtKind
*IfKind
,
2715 bool IsVerilogAssert
) {
2716 assert((FormatTok
->is(tok::kw_if
) ||
2717 (Style
.isVerilog() &&
2718 FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
2719 Keywords
.kw_assume
, Keywords
.kw_cover
))) &&
2723 if (IsVerilogAssert
) {
2724 // Handle `assert #0` and `assert final`.
2725 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
2727 if (FormatTok
->is(tok::numeric_constant
))
2729 } else if (FormatTok
->isOneOf(Keywords
.kw_final
, Keywords
.kw_property
,
2730 Keywords
.kw_sequence
)) {
2735 // Handle `if !consteval`.
2736 if (FormatTok
->is(tok::exclaim
))
2739 bool KeepIfBraces
= true;
2740 if (FormatTok
->is(tok::kw_consteval
)) {
2743 KeepIfBraces
= !Style
.RemoveBracesLLVM
|| KeepBraces
;
2744 if (FormatTok
->isOneOf(tok::kw_constexpr
, tok::identifier
))
2746 if (FormatTok
->is(tok::l_paren
)) {
2747 FormatTok
->setFinalizedType(TT_ConditionLParen
);
2752 // The then action is optional in Verilog assert statements.
2753 if (IsVerilogAssert
&& FormatTok
->is(tok::semi
)) {
2759 bool NeedsUnwrappedLine
= false;
2760 keepAncestorBraces();
2762 FormatToken
*IfLeftBrace
= nullptr;
2763 IfStmtKind IfBlockKind
= IfStmtKind::NotIf
;
2765 if (isBlockBegin(*FormatTok
)) {
2766 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
2767 IfLeftBrace
= FormatTok
;
2768 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2769 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2770 /*MunchSemi=*/true, KeepIfBraces
, &IfBlockKind
);
2771 setPreviousRBraceType(TT_ControlStatementRBrace
);
2772 if (Style
.BraceWrapping
.BeforeElse
)
2775 NeedsUnwrappedLine
= true;
2776 } else if (IsVerilogAssert
&& FormatTok
->is(tok::kw_else
)) {
2779 parseUnbracedBody();
2782 if (Style
.RemoveBracesLLVM
) {
2783 assert(!NestedTooDeep
.empty());
2784 KeepIfBraces
= KeepIfBraces
||
2785 (IfLeftBrace
&& !IfLeftBrace
->MatchingParen
) ||
2786 NestedTooDeep
.back() || IfBlockKind
== IfStmtKind::IfOnly
||
2787 IfBlockKind
== IfStmtKind::IfElseIf
;
2790 bool KeepElseBraces
= KeepIfBraces
;
2791 FormatToken
*ElseLeftBrace
= nullptr;
2792 IfStmtKind Kind
= IfStmtKind::IfOnly
;
2794 if (FormatTok
->is(tok::kw_else
)) {
2795 if (Style
.RemoveBracesLLVM
) {
2796 NestedTooDeep
.back() = false;
2797 Kind
= IfStmtKind::IfElse
;
2801 if (isBlockBegin(*FormatTok
)) {
2802 const bool FollowedByIf
= Tokens
->peekNextToken()->is(tok::kw_if
);
2803 FormatTok
->setFinalizedType(TT_ElseLBrace
);
2804 ElseLeftBrace
= FormatTok
;
2805 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2806 IfStmtKind ElseBlockKind
= IfStmtKind::NotIf
;
2807 FormatToken
*IfLBrace
=
2808 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2809 /*MunchSemi=*/true, KeepElseBraces
, &ElseBlockKind
);
2810 setPreviousRBraceType(TT_ElseRBrace
);
2811 if (FormatTok
->is(tok::kw_else
)) {
2812 KeepElseBraces
= KeepElseBraces
||
2813 ElseBlockKind
== IfStmtKind::IfOnly
||
2814 ElseBlockKind
== IfStmtKind::IfElseIf
;
2815 } else if (FollowedByIf
&& IfLBrace
&& !IfLBrace
->Optional
) {
2816 KeepElseBraces
= true;
2817 assert(ElseLeftBrace
->MatchingParen
);
2818 markOptionalBraces(ElseLeftBrace
);
2821 } else if (!IsVerilogAssert
&& FormatTok
->is(tok::kw_if
)) {
2822 const FormatToken
*Previous
= Tokens
->getPreviousToken();
2824 const bool IsPrecededByComment
= Previous
->is(tok::comment
);
2825 if (IsPrecededByComment
) {
2829 bool TooDeep
= true;
2830 if (Style
.RemoveBracesLLVM
) {
2831 Kind
= IfStmtKind::IfElseIf
;
2832 TooDeep
= NestedTooDeep
.pop_back_val();
2834 ElseLeftBrace
= parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces
);
2835 if (Style
.RemoveBracesLLVM
)
2836 NestedTooDeep
.push_back(TooDeep
);
2837 if (IsPrecededByComment
)
2840 parseUnbracedBody(/*CheckEOF=*/true);
2843 KeepIfBraces
= KeepIfBraces
|| IfBlockKind
== IfStmtKind::IfElse
;
2844 if (NeedsUnwrappedLine
)
2848 if (!Style
.RemoveBracesLLVM
)
2851 assert(!NestedTooDeep
.empty());
2852 KeepElseBraces
= KeepElseBraces
||
2853 (ElseLeftBrace
&& !ElseLeftBrace
->MatchingParen
) ||
2854 NestedTooDeep
.back();
2856 NestedTooDeep
.pop_back();
2858 if (!KeepIfBraces
&& !KeepElseBraces
) {
2859 markOptionalBraces(IfLeftBrace
);
2860 markOptionalBraces(ElseLeftBrace
);
2861 } else if (IfLeftBrace
) {
2862 FormatToken
*IfRightBrace
= IfLeftBrace
->MatchingParen
;
2864 assert(IfRightBrace
->MatchingParen
== IfLeftBrace
);
2865 assert(!IfLeftBrace
->Optional
);
2866 assert(!IfRightBrace
->Optional
);
2867 IfLeftBrace
->MatchingParen
= nullptr;
2868 IfRightBrace
->MatchingParen
= nullptr;
2878 void UnwrappedLineParser::parseTryCatch() {
2879 assert(FormatTok
->isOneOf(tok::kw_try
, tok::kw___try
) && "'try' expected");
2881 bool NeedsUnwrappedLine
= false;
2882 if (FormatTok
->is(tok::colon
)) {
2883 // We are in a function try block, what comes is an initializer list.
2886 // In case identifiers were removed by clang-tidy, what might follow is
2887 // multiple commas in sequence - before the first identifier.
2888 while (FormatTok
->is(tok::comma
))
2891 while (FormatTok
->is(tok::identifier
)) {
2893 if (FormatTok
->is(tok::l_paren
))
2895 if (FormatTok
->Previous
&& FormatTok
->Previous
->is(tok::identifier
) &&
2896 FormatTok
->is(tok::l_brace
)) {
2899 } while (FormatTok
->isNot(tok::r_brace
));
2903 // In case identifiers were removed by clang-tidy, what might follow is
2904 // multiple commas in sequence - after the first identifier.
2905 while (FormatTok
->is(tok::comma
))
2909 // Parse try with resource.
2910 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_paren
))
2913 keepAncestorBraces();
2915 if (FormatTok
->is(tok::l_brace
)) {
2916 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2918 if (Style
.BraceWrapping
.BeforeCatch
)
2921 NeedsUnwrappedLine
= true;
2922 } else if (FormatTok
->isNot(tok::kw_catch
)) {
2923 // The C++ standard requires a compound-statement after a try.
2924 // If there's none, we try to assume there's a structuralElement
2925 // and try to continue.
2928 parseStructuralElement();
2932 if (FormatTok
->is(tok::at
))
2934 if (!(FormatTok
->isOneOf(tok::kw_catch
, Keywords
.kw___except
,
2935 tok::kw___finally
) ||
2936 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
2937 FormatTok
->is(Keywords
.kw_finally
)) ||
2938 (FormatTok
->isObjCAtKeyword(tok::objc_catch
) ||
2939 FormatTok
->isObjCAtKeyword(tok::objc_finally
)))) {
2943 while (FormatTok
->isNot(tok::l_brace
)) {
2944 if (FormatTok
->is(tok::l_paren
)) {
2948 if (FormatTok
->isOneOf(tok::semi
, tok::r_brace
, tok::eof
)) {
2949 if (Style
.RemoveBracesLLVM
)
2950 NestedTooDeep
.pop_back();
2955 NeedsUnwrappedLine
= false;
2956 Line
->MustBeDeclaration
= false;
2957 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2959 if (Style
.BraceWrapping
.BeforeCatch
)
2962 NeedsUnwrappedLine
= true;
2965 if (Style
.RemoveBracesLLVM
)
2966 NestedTooDeep
.pop_back();
2968 if (NeedsUnwrappedLine
)
2972 void UnwrappedLineParser::parseNamespace() {
2973 assert(FormatTok
->isOneOf(tok::kw_namespace
, TT_NamespaceMacro
) &&
2974 "'namespace' expected");
2976 const FormatToken
&InitialToken
= *FormatTok
;
2978 if (InitialToken
.is(TT_NamespaceMacro
)) {
2981 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::kw_inline
,
2982 tok::l_square
, tok::period
, tok::l_paren
) ||
2983 (Style
.isCSharp() && FormatTok
->is(tok::kw_union
))) {
2984 if (FormatTok
->is(tok::l_square
))
2986 else if (FormatTok
->is(tok::l_paren
))
2992 if (FormatTok
->is(tok::l_brace
)) {
2993 FormatTok
->setFinalizedType(TT_NamespaceLBrace
);
2995 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
2998 unsigned AddLevels
=
2999 Style
.NamespaceIndentation
== FormatStyle::NI_All
||
3000 (Style
.NamespaceIndentation
== FormatStyle::NI_Inner
&&
3001 DeclarationScopeStack
.size() > 1)
3004 bool ManageWhitesmithsBraces
=
3006 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
3008 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3010 if (ManageWhitesmithsBraces
)
3013 // Munch the semicolon after a namespace. This is more common than one would
3014 // think. Putting the semicolon into its own line is very ugly.
3015 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/true,
3016 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3017 ManageWhitesmithsBraces
);
3019 addUnwrappedLine(AddLevels
> 0 ? LineLevel::Remove
: LineLevel::Keep
);
3021 if (ManageWhitesmithsBraces
)
3024 // FIXME: Add error handling.
3027 void UnwrappedLineParser::parseNew() {
3028 assert(FormatTok
->is(tok::kw_new
) && "'new' expected");
3031 if (Style
.isCSharp()) {
3033 // Handle constructor invocation, e.g. `new(field: value)`.
3034 if (FormatTok
->is(tok::l_paren
))
3037 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3038 if (FormatTok
->is(tok::l_brace
))
3041 if (FormatTok
->isOneOf(tok::semi
, tok::comma
))
3048 if (Style
.Language
!= FormatStyle::LK_Java
)
3051 // In Java, we can parse everything up to the parens, which aren't optional.
3053 // There should not be a ;, { or } before the new's open paren.
3054 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
, tok::r_brace
))
3057 // Consume the parens.
3058 if (FormatTok
->is(tok::l_paren
)) {
3061 // If there is a class body of an anonymous class, consume that as child.
3062 if (FormatTok
->is(tok::l_brace
))
3070 void UnwrappedLineParser::parseLoopBody(bool KeepBraces
, bool WrapRightBrace
) {
3071 keepAncestorBraces();
3073 if (isBlockBegin(*FormatTok
)) {
3074 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
3075 FormatToken
*LeftBrace
= FormatTok
;
3076 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3077 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3078 /*MunchSemi=*/true, KeepBraces
);
3079 setPreviousRBraceType(TT_ControlStatementRBrace
);
3081 assert(!NestedTooDeep
.empty());
3082 if (!NestedTooDeep
.back())
3083 markOptionalBraces(LeftBrace
);
3088 parseUnbracedBody();
3092 NestedTooDeep
.pop_back();
3095 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens
) {
3096 assert((FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
, TT_ForEachMacro
) ||
3097 (Style
.isVerilog() &&
3098 FormatTok
->isOneOf(Keywords
.kw_always
, Keywords
.kw_always_comb
,
3099 Keywords
.kw_always_ff
, Keywords
.kw_always_latch
,
3100 Keywords
.kw_final
, Keywords
.kw_initial
,
3101 Keywords
.kw_foreach
, Keywords
.kw_forever
,
3102 Keywords
.kw_repeat
))) &&
3103 "'for', 'while' or foreach macro expected");
3104 const bool KeepBraces
= !Style
.RemoveBracesLLVM
||
3105 !FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
);
3108 // JS' for await ( ...
3109 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_await
))
3111 if (Style
.isCpp() && FormatTok
->is(tok::kw_co_await
))
3113 if (HasParens
&& FormatTok
->is(tok::l_paren
)) {
3114 // The type is only set for Verilog basically because we were afraid to
3115 // change the existing behavior for loops. See the discussion on D121756 for
3117 if (Style
.isVerilog())
3118 FormatTok
->setFinalizedType(TT_ConditionLParen
);
3122 if (Style
.isVerilog()) {
3124 parseVerilogSensitivityList();
3125 } else if (Style
.AllowShortLoopsOnASingleLine
&& FormatTok
->is(tok::semi
) &&
3126 Tokens
->getPreviousToken()->is(tok::r_paren
)) {
3133 parseLoopBody(KeepBraces
, /*WrapRightBrace=*/true);
3136 void UnwrappedLineParser::parseDoWhile() {
3137 assert(FormatTok
->is(tok::kw_do
) && "'do' expected");
3140 parseLoopBody(/*KeepBraces=*/true, Style
.BraceWrapping
.BeforeWhile
);
3142 // FIXME: Add error handling.
3143 if (FormatTok
->isNot(tok::kw_while
)) {
3148 FormatTok
->setFinalizedType(TT_DoWhile
);
3150 // If in Whitesmiths mode, the line with the while() needs to be indented
3151 // to the same level as the block.
3152 if (Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
)
3156 parseStructuralElement();
3159 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel
) {
3161 unsigned OldLineLevel
= Line
->Level
;
3162 if (Line
->Level
> 1 || (!Line
->InPPDirective
&& Line
->Level
> 0))
3167 if (!Style
.IndentCaseBlocks
&& CommentsBeforeNextToken
.empty() &&
3168 FormatTok
->is(tok::l_brace
)) {
3170 CompoundStatementIndenter
Indenter(this, Line
->Level
,
3171 Style
.BraceWrapping
.AfterCaseLabel
,
3172 Style
.BraceWrapping
.IndentBraces
);
3174 if (FormatTok
->is(tok::kw_break
)) {
3175 if (Style
.BraceWrapping
.AfterControlStatement
==
3176 FormatStyle::BWACS_Always
) {
3178 if (!Style
.IndentCaseBlocks
&&
3179 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
3183 parseStructuralElement();
3187 if (FormatTok
->is(tok::semi
))
3191 Line
->Level
= OldLineLevel
;
3192 if (FormatTok
->isNot(tok::l_brace
)) {
3193 parseStructuralElement();
3198 void UnwrappedLineParser::parseCaseLabel() {
3199 assert(FormatTok
->is(tok::kw_case
) && "'case' expected");
3201 // FIXME: fix handling of complex expressions here.
3204 if (FormatTok
->is(tok::colon
)) {
3205 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
3212 void UnwrappedLineParser::parseSwitch() {
3213 assert(FormatTok
->is(tok::kw_switch
) && "'switch' expected");
3215 if (FormatTok
->is(tok::l_paren
))
3218 keepAncestorBraces();
3220 if (FormatTok
->is(tok::l_brace
)) {
3221 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3222 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
3224 setPreviousRBraceType(TT_ControlStatementRBrace
);
3229 parseStructuralElement();
3233 if (Style
.RemoveBracesLLVM
)
3234 NestedTooDeep
.pop_back();
3237 // Operators that can follow a C variable.
3238 static bool isCOperatorFollowingVar(tok::TokenKind kind
) {
3244 case tok::caretequal
:
3248 case tok::equalequal
:
3250 case tok::exclaimequal
:
3252 case tok::greaterequal
:
3253 case tok::greatergreater
:
3254 case tok::greatergreaterequal
:
3258 case tok::lessequal
:
3260 case tok::lesslessequal
:
3262 case tok::minusequal
:
3263 case tok::minusminus
:
3265 case tok::percentequal
:
3268 case tok::pipeequal
:
3271 case tok::plusequal
:
3279 case tok::slashequal
:
3281 case tok::starequal
:
3288 void UnwrappedLineParser::parseAccessSpecifier() {
3289 FormatToken
*AccessSpecifierCandidate
= FormatTok
;
3291 // Understand Qt's slots.
3292 if (FormatTok
->isOneOf(Keywords
.kw_slots
, Keywords
.kw_qslots
))
3294 // Otherwise, we don't know what it is, and we'd better keep the next token.
3295 if (FormatTok
->is(tok::colon
)) {
3298 } else if (FormatTok
->isNot(tok::coloncolon
) &&
3299 !isCOperatorFollowingVar(FormatTok
->Tok
.getKind())) {
3300 // Not a variable name nor namespace name.
3302 } else if (AccessSpecifierCandidate
) {
3303 // Consider the access specifier to be a C identifier.
3304 AccessSpecifierCandidate
->Tok
.setKind(tok::identifier
);
3308 /// \brief Parses a requires, decides if it is a clause or an expression.
3309 /// \pre The current token has to be the requires keyword.
3310 /// \returns true if it parsed a clause.
3311 bool clang::format::UnwrappedLineParser::parseRequires() {
3312 assert(FormatTok
->is(tok::kw_requires
) && "'requires' expected");
3313 auto RequiresToken
= FormatTok
;
3315 // We try to guess if it is a requires clause, or a requires expression. For
3316 // that we first consume the keyword and check the next token.
3319 switch (FormatTok
->Tok
.getKind()) {
3321 // This can only be an expression, never a clause.
3322 parseRequiresExpression(RequiresToken
);
3325 // Clauses and expression can start with a paren, it's unclear what we have.
3328 // All other tokens can only be a clause.
3329 parseRequiresClause(RequiresToken
);
3333 // Looking forward we would have to decide if there are function declaration
3334 // like arguments to the requires expression:
3336 // Or there is a constraint expression for the requires clause:
3337 // requires (C<T> && ...
3339 // But first let's look behind.
3340 auto *PreviousNonComment
= RequiresToken
->getPreviousNonComment();
3342 if (!PreviousNonComment
||
3343 PreviousNonComment
->is(TT_RequiresExpressionLBrace
)) {
3344 // If there is no token, or an expression left brace, we are a requires
3345 // clause within a requires expression.
3346 parseRequiresClause(RequiresToken
);
3350 switch (PreviousNonComment
->Tok
.getKind()) {
3353 case tok::kw_noexcept
:
3355 // This is a requires clause.
3356 parseRequiresClause(RequiresToken
);
3360 // This can be either:
3361 // if (... && requires (T t) ...)
3363 // void member(...) && requires (C<T> ...
3364 // We check the one token before that for a const:
3365 // void member(...) const && requires (C<T> ...
3366 auto PrevPrev
= PreviousNonComment
->getPreviousNonComment();
3367 if (PrevPrev
&& PrevPrev
->is(tok::kw_const
)) {
3368 parseRequiresClause(RequiresToken
);
3374 if (PreviousNonComment
->isTypeOrIdentifier()) {
3375 // This is a requires clause.
3376 parseRequiresClause(RequiresToken
);
3379 // It's an expression.
3380 parseRequiresExpression(RequiresToken
);
3384 // Now we look forward and try to check if the paren content is a parameter
3385 // list. The parameters can be cv-qualified and contain references or
3387 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3388 // of stuff: typename, const, *, &, &&, ::, identifiers.
3390 unsigned StoredPosition
= Tokens
->getPosition();
3391 FormatToken
*NextToken
= Tokens
->getNextToken();
3393 auto PeekNext
= [&Lookahead
, &NextToken
, this] {
3395 NextToken
= Tokens
->getNextToken();
3398 bool FoundType
= false;
3399 bool LastWasColonColon
= false;
3402 for (; Lookahead
< 50; PeekNext()) {
3403 switch (NextToken
->Tok
.getKind()) {
3404 case tok::kw_volatile
:
3407 if (OpenAngles
== 0) {
3408 FormatTok
= Tokens
->setPosition(StoredPosition
);
3409 parseRequiresExpression(RequiresToken
);
3415 FormatTok
= Tokens
->setPosition(StoredPosition
);
3416 parseRequiresClause(RequiresToken
);
3419 // Break out of the loop.
3422 case tok::coloncolon
:
3423 LastWasColonColon
= true;
3425 case tok::identifier
:
3426 if (FoundType
&& !LastWasColonColon
&& OpenAngles
== 0) {
3427 FormatTok
= Tokens
->setPosition(StoredPosition
);
3428 parseRequiresExpression(RequiresToken
);
3432 LastWasColonColon
= false;
3441 if (NextToken
->isSimpleTypeSpecifier()) {
3442 FormatTok
= Tokens
->setPosition(StoredPosition
);
3443 parseRequiresExpression(RequiresToken
);
3449 // This seems to be a complicated expression, just assume it's a clause.
3450 FormatTok
= Tokens
->setPosition(StoredPosition
);
3451 parseRequiresClause(RequiresToken
);
3455 /// \brief Parses a requires clause.
3456 /// \param RequiresToken The requires keyword token, which starts this clause.
3457 /// \pre We need to be on the next token after the requires keyword.
3458 /// \sa parseRequiresExpression
3460 /// Returns if it either has finished parsing the clause, or it detects, that
3461 /// the clause is incorrect.
3462 void UnwrappedLineParser::parseRequiresClause(FormatToken
*RequiresToken
) {
3463 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3464 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3466 // If there is no previous token, we are within a requires expression,
3467 // otherwise we will always have the template or function declaration in front
3469 bool InRequiresExpression
=
3470 !RequiresToken
->Previous
||
3471 RequiresToken
->Previous
->is(TT_RequiresExpressionLBrace
);
3473 RequiresToken
->setFinalizedType(InRequiresExpression
3474 ? TT_RequiresClauseInARequiresExpression
3475 : TT_RequiresClause
);
3477 // NOTE: parseConstraintExpression is only ever called from this function.
3478 // It could be inlined into here.
3479 parseConstraintExpression();
3481 if (!InRequiresExpression
)
3482 FormatTok
->Previous
->ClosesRequiresClause
= true;
3485 /// \brief Parses a requires expression.
3486 /// \param RequiresToken The requires keyword token, which starts this clause.
3487 /// \pre We need to be on the next token after the requires keyword.
3488 /// \sa parseRequiresClause
3490 /// Returns if it either has finished parsing the expression, or it detects,
3491 /// that the expression is incorrect.
3492 void UnwrappedLineParser::parseRequiresExpression(FormatToken
*RequiresToken
) {
3493 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3494 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3496 RequiresToken
->setFinalizedType(TT_RequiresExpression
);
3498 if (FormatTok
->is(tok::l_paren
)) {
3499 FormatTok
->setFinalizedType(TT_RequiresExpressionLParen
);
3503 if (FormatTok
->is(tok::l_brace
)) {
3504 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
3509 /// \brief Parses a constraint expression.
3511 /// This is the body of a requires clause. It returns, when the parsing is
3512 /// complete, or the expression is incorrect.
3513 void UnwrappedLineParser::parseConstraintExpression() {
3514 // The special handling for lambdas is needed since tryToParseLambda() eats a
3515 // token and if a requires expression is the last part of a requires clause
3516 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3517 // not set on the correct token. Thus we need to be aware if we even expect a
3518 // lambda to be possible.
3519 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3520 bool LambdaNextTimeAllowed
= true;
3522 // Within lambda declarations, it is permitted to put a requires clause after
3523 // its template parameter list, which would place the requires clause right
3524 // before the parentheses of the parameters of the lambda declaration. Thus,
3525 // we track if we expect to see grouping parentheses at all.
3526 // Without this check, `requires foo<T> (T t)` in the below example would be
3527 // seen as the whole requires clause, accidentally eating the parameters of
3529 // [&]<typename T> requires foo<T> (T t) { ... };
3530 bool TopLevelParensAllowed
= true;
3533 bool LambdaThisTimeAllowed
= std::exchange(LambdaNextTimeAllowed
, false);
3535 switch (FormatTok
->Tok
.getKind()) {
3536 case tok::kw_requires
: {
3537 auto RequiresToken
= FormatTok
;
3539 parseRequiresExpression(RequiresToken
);
3544 if (!TopLevelParensAllowed
)
3546 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator
);
3547 TopLevelParensAllowed
= false;
3551 if (!LambdaThisTimeAllowed
|| !tryToParseLambda())
3558 case tok::kw_struct
:
3563 // Potential function body.
3568 FormatTok
->setFinalizedType(TT_BinaryOperator
);
3570 LambdaNextTimeAllowed
= true;
3571 TopLevelParensAllowed
= true;
3576 LambdaNextTimeAllowed
= LambdaThisTimeAllowed
;
3580 case tok::kw_sizeof
:
3582 case tok::greaterequal
:
3583 case tok::greatergreater
:
3585 case tok::lessequal
:
3587 case tok::equalequal
:
3589 case tok::exclaimequal
:
3594 LambdaNextTimeAllowed
= true;
3595 TopLevelParensAllowed
= true;
3600 case tok::numeric_constant
:
3601 case tok::coloncolon
:
3604 TopLevelParensAllowed
= false;
3609 case tok::kw_static_cast
:
3610 case tok::kw_const_cast
:
3611 case tok::kw_reinterpret_cast
:
3612 case tok::kw_dynamic_cast
:
3614 if (FormatTok
->isNot(tok::less
))
3618 parseBracedList(/*IsAngleBracket=*/true);
3622 if (!FormatTok
->Tok
.getIdentifierInfo()) {
3623 // Identifiers are part of the default case, we check for more then
3624 // tok::identifier to handle builtin type traits.
3628 // We need to differentiate identifiers for a template deduction guide,
3629 // variables, or function return types (the constraint expression has
3630 // ended before that), and basically all other cases. But it's easier to
3631 // check the other way around.
3632 assert(FormatTok
->Previous
);
3633 switch (FormatTok
->Previous
->Tok
.getKind()) {
3634 case tok::coloncolon
: // Nested identifier.
3635 case tok::ampamp
: // Start of a function or variable for the
3636 case tok::pipepipe
: // constraint expression. (binary)
3637 case tok::exclaim
: // The same as above, but unary.
3638 case tok::kw_requires
: // Initial identifier of a requires clause.
3639 case tok::equal
: // Initial identifier of a concept declaration.
3645 // Read identifier with optional template declaration.
3647 if (FormatTok
->is(tok::less
)) {
3649 parseBracedList(/*IsAngleBracket=*/true);
3651 TopLevelParensAllowed
= false;
3657 bool UnwrappedLineParser::parseEnum() {
3658 const FormatToken
&InitialToken
= *FormatTok
;
3660 // Won't be 'enum' for NS_ENUMs.
3661 if (FormatTok
->is(tok::kw_enum
))
3664 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3665 // declarations. An "enum" keyword followed by a colon would be a syntax
3666 // error and thus assume it is just an identifier.
3667 if (Style
.isJavaScript() && FormatTok
->isOneOf(tok::colon
, tok::question
))
3670 // In protobuf, "enum" can be used as a field name.
3671 if (Style
.Language
== FormatStyle::LK_Proto
&& FormatTok
->is(tok::equal
))
3674 // Eat up enum class ...
3675 if (FormatTok
->isOneOf(tok::kw_class
, tok::kw_struct
))
3678 while (FormatTok
->Tok
.getIdentifierInfo() ||
3679 FormatTok
->isOneOf(tok::colon
, tok::coloncolon
, tok::less
,
3680 tok::greater
, tok::comma
, tok::question
,
3681 tok::l_square
, tok::r_square
)) {
3682 if (Style
.isVerilog()) {
3683 FormatTok
->setFinalizedType(TT_VerilogDimensionedTypeName
);
3685 // In Verilog the base type can have dimensions.
3686 while (FormatTok
->is(tok::l_square
))
3691 // We can have macros or attributes in between 'enum' and the enum name.
3692 if (FormatTok
->is(tok::l_paren
))
3694 assert(FormatTok
->isNot(TT_AttributeSquare
));
3695 if (FormatTok
->is(tok::identifier
)) {
3697 // If there are two identifiers in a row, this is likely an elaborate
3698 // return type. In Java, this can be "implements", etc.
3699 if (Style
.isCpp() && FormatTok
->is(tok::identifier
))
3704 // Just a declaration or something is wrong.
3705 if (FormatTok
->isNot(tok::l_brace
))
3707 FormatTok
->setFinalizedType(TT_EnumLBrace
);
3708 FormatTok
->setBlockKind(BK_Block
);
3710 if (Style
.Language
== FormatStyle::LK_Java
) {
3711 // Java enums are different.
3712 parseJavaEnumBody();
3715 if (Style
.Language
== FormatStyle::LK_Proto
) {
3716 parseBlock(/*MustBeDeclaration=*/true);
3720 if (!Style
.AllowShortEnumsOnASingleLine
&&
3721 ShouldBreakBeforeBrace(Style
, InitialToken
)) {
3726 if (!Style
.AllowShortEnumsOnASingleLine
) {
3730 bool HasError
= !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3731 if (!Style
.AllowShortEnumsOnASingleLine
)
3734 if (FormatTok
->is(tok::semi
))
3738 setPreviousRBraceType(TT_EnumRBrace
);
3741 // There is no addUnwrappedLine() here so that we fall through to parsing a
3742 // structural element afterwards. Thus, in "enum A {} n, m;",
3743 // "} n, m;" will end up in one unwrapped line.
3746 bool UnwrappedLineParser::parseStructLike() {
3747 // parseRecord falls through and does not yet add an unwrapped line as a
3748 // record declaration or definition can start a structural element.
3750 // This does not apply to Java, JavaScript and C#.
3751 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
3753 if (FormatTok
->is(tok::semi
))
3762 // A class used to set and restore the Token position when peeking
3763 // ahead in the token source.
3764 class ScopedTokenPosition
{
3765 unsigned StoredPosition
;
3766 FormatTokenSource
*Tokens
;
3769 ScopedTokenPosition(FormatTokenSource
*Tokens
) : Tokens(Tokens
) {
3770 assert(Tokens
&& "Tokens expected to not be null");
3771 StoredPosition
= Tokens
->getPosition();
3774 ~ScopedTokenPosition() { Tokens
->setPosition(StoredPosition
); }
3778 // Look to see if we have [[ by looking ahead, if
3779 // its not then rewind to the original position.
3780 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3781 ScopedTokenPosition
AutoPosition(Tokens
);
3782 FormatToken
*Tok
= Tokens
->getNextToken();
3783 // We already read the first [ check for the second.
3784 if (Tok
->isNot(tok::l_square
))
3786 // Double check that the attribute is just something
3788 while (Tok
->isNot(tok::eof
)) {
3789 if (Tok
->is(tok::r_square
))
3791 Tok
= Tokens
->getNextToken();
3793 if (Tok
->is(tok::eof
))
3795 Tok
= Tokens
->getNextToken();
3796 if (Tok
->isNot(tok::r_square
))
3798 Tok
= Tokens
->getNextToken();
3799 if (Tok
->is(tok::semi
))
3804 void UnwrappedLineParser::parseJavaEnumBody() {
3805 assert(FormatTok
->is(tok::l_brace
));
3806 const FormatToken
*OpeningBrace
= FormatTok
;
3808 // Determine whether the enum is simple, i.e. does not have a semicolon or
3809 // constants with class bodies. Simple enums can be formatted like braced
3810 // lists, contracted to a single line, etc.
3811 unsigned StoredPosition
= Tokens
->getPosition();
3812 bool IsSimple
= true;
3813 FormatToken
*Tok
= Tokens
->getNextToken();
3814 while (Tok
->isNot(tok::eof
)) {
3815 if (Tok
->is(tok::r_brace
))
3817 if (Tok
->isOneOf(tok::l_brace
, tok::semi
)) {
3821 // FIXME: This will also mark enums with braces in the arguments to enum
3822 // constants as "not simple". This is probably fine in practice, though.
3823 Tok
= Tokens
->getNextToken();
3825 FormatTok
= Tokens
->setPosition(StoredPosition
);
3834 // Parse the body of a more complex enum.
3835 // First add a line for everything up to the "{".
3840 // Parse the enum constants.
3842 if (FormatTok
->is(tok::l_brace
)) {
3843 // Parse the constant's class body.
3844 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3845 /*MunchSemi=*/false);
3846 } else if (FormatTok
->is(tok::l_paren
)) {
3848 } else if (FormatTok
->is(tok::comma
)) {
3851 } else if (FormatTok
->is(tok::semi
)) {
3855 } else if (FormatTok
->is(tok::r_brace
)) {
3863 // Parse the class body after the enum's ";" if any.
3864 parseLevel(OpeningBrace
);
3870 void UnwrappedLineParser::parseRecord(bool ParseAsExpr
) {
3871 const FormatToken
&InitialToken
= *FormatTok
;
3874 // The actual identifier can be a nested name specifier, and in macros
3875 // it is often token-pasted.
3876 // An [[attribute]] can be before the identifier.
3877 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::hashhash
,
3878 tok::kw_alignas
, tok::l_square
) ||
3879 FormatTok
->isAttribute() ||
3880 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
3881 FormatTok
->isOneOf(tok::period
, tok::comma
))) {
3882 if (Style
.isJavaScript() &&
3883 FormatTok
->isOneOf(Keywords
.kw_extends
, Keywords
.kw_implements
)) {
3884 // JavaScript/TypeScript supports inline object types in
3885 // extends/implements positions:
3886 // class Foo implements {bar: number} { }
3888 if (FormatTok
->is(tok::l_brace
)) {
3889 tryToParseBracedList();
3893 if (FormatTok
->is(tok::l_square
) && handleCppAttributes())
3895 bool IsNonMacroIdentifier
=
3896 FormatTok
->is(tok::identifier
) &&
3897 FormatTok
->TokenText
!= FormatTok
->TokenText
.upper();
3899 // We can have macros in between 'class' and the class name.
3900 if (!IsNonMacroIdentifier
&& FormatTok
->is(tok::l_paren
))
3904 // Note that parsing away template declarations here leads to incorrectly
3905 // accepting function declarations as record declarations.
3906 // In general, we cannot solve this problem. Consider:
3907 // class A<int> B() {}
3908 // which can be a function definition or a class definition when B() is a
3909 // macro. If we find enough real-world cases where this is a problem, we
3910 // can parse for the 'template' keyword in the beginning of the statement,
3911 // and thus rule out the record production in case there is no template
3912 // (this would still leave us with an ambiguity between template function
3913 // and class declarations).
3914 if (FormatTok
->isOneOf(tok::colon
, tok::less
)) {
3916 if (FormatTok
->is(tok::l_brace
)) {
3917 calculateBraceTypes(/*ExpectClassBody=*/true);
3918 if (!tryToParseBracedList())
3921 if (FormatTok
->is(tok::l_square
)) {
3922 FormatToken
*Previous
= FormatTok
->Previous
;
3924 !(Previous
->is(tok::r_paren
) || Previous
->isTypeOrIdentifier())) {
3925 // Don't try parsing a lambda if we had a closing parenthesis before,
3926 // it was probably a pointer to an array: int (*)[].
3927 if (!tryToParseLambda())
3934 if (FormatTok
->is(tok::semi
))
3936 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
)) {
3939 parseCSharpGenericTypeConstraint();
3946 auto GetBraceTypes
=
3947 [](const FormatToken
&RecordTok
) -> std::pair
<TokenType
, TokenType
> {
3948 switch (RecordTok
.Tok
.getKind()) {
3950 return {TT_ClassLBrace
, TT_ClassRBrace
};
3951 case tok::kw_struct
:
3952 return {TT_StructLBrace
, TT_StructRBrace
};
3954 return {TT_UnionLBrace
, TT_UnionRBrace
};
3956 // Useful for e.g. interface.
3957 return {TT_RecordLBrace
, TT_RecordRBrace
};
3960 if (FormatTok
->is(tok::l_brace
)) {
3961 auto [OpenBraceType
, ClosingBraceType
] = GetBraceTypes(InitialToken
);
3962 FormatTok
->setFinalizedType(OpenBraceType
);
3966 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
3969 unsigned AddLevels
= Style
.IndentAccessModifiers
? 2u : 1u;
3970 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/false);
3972 setPreviousRBraceType(ClosingBraceType
);
3974 // There is no addUnwrappedLine() here so that we fall through to parsing a
3975 // structural element afterwards. Thus, in "class A {} n, m;",
3976 // "} n, m;" will end up in one unwrapped line.
3979 void UnwrappedLineParser::parseObjCMethod() {
3980 assert(FormatTok
->isOneOf(tok::l_paren
, tok::identifier
) &&
3981 "'(' or identifier expected.");
3983 if (FormatTok
->is(tok::semi
)) {
3987 } else if (FormatTok
->is(tok::l_brace
)) {
3988 if (Style
.BraceWrapping
.AfterFunction
)
3999 void UnwrappedLineParser::parseObjCProtocolList() {
4000 assert(FormatTok
->is(tok::less
) && "'<' expected.");
4003 // Early exit in case someone forgot a close angle.
4004 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4005 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4008 } while (!eof() && FormatTok
->isNot(tok::greater
));
4009 nextToken(); // Skip '>'.
4012 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4014 if (FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4019 if (FormatTok
->is(tok::l_brace
)) {
4021 // In ObjC interfaces, nothing should be following the "}".
4023 } else if (FormatTok
->is(tok::r_brace
)) {
4024 // Ignore stray "}". parseStructuralElement doesn't consume them.
4027 } else if (FormatTok
->isOneOf(tok::minus
, tok::plus
)) {
4031 parseStructuralElement();
4036 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4037 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_interface
||
4038 FormatTok
->Tok
.getObjCKeywordID() == tok::objc_implementation
);
4040 nextToken(); // interface name
4042 // @interface can be followed by a lightweight generic
4043 // specialization list, then either a base class or a category.
4044 if (FormatTok
->is(tok::less
))
4045 parseObjCLightweightGenerics();
4046 if (FormatTok
->is(tok::colon
)) {
4048 nextToken(); // base class name
4049 // The base class can also have lightweight generics applied to it.
4050 if (FormatTok
->is(tok::less
))
4051 parseObjCLightweightGenerics();
4052 } else if (FormatTok
->is(tok::l_paren
)) {
4053 // Skip category, if present.
4057 if (FormatTok
->is(tok::less
))
4058 parseObjCProtocolList();
4060 if (FormatTok
->is(tok::l_brace
)) {
4061 if (Style
.BraceWrapping
.AfterObjCDeclaration
)
4063 parseBlock(/*MustBeDeclaration=*/true);
4066 // With instance variables, this puts '}' on its own line. Without instance
4067 // variables, this ends the @interface line.
4070 parseObjCUntilAtEnd();
4073 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4074 assert(FormatTok
->is(tok::less
));
4075 // Unlike protocol lists, generic parameterizations support
4078 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4079 // NSObject <NSCopying, NSSecureCoding>
4081 // so we need to count how many open angles we have left.
4082 unsigned NumOpenAngles
= 1;
4085 // Early exit in case someone forgot a close angle.
4086 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4087 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4090 if (FormatTok
->is(tok::less
)) {
4092 } else if (FormatTok
->is(tok::greater
)) {
4093 assert(NumOpenAngles
> 0 && "'>' makes NumOpenAngles negative");
4096 } while (!eof() && NumOpenAngles
!= 0);
4097 nextToken(); // Skip '>'.
4100 // Returns true for the declaration/definition form of @protocol,
4101 // false for the expression form.
4102 bool UnwrappedLineParser::parseObjCProtocol() {
4103 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_protocol
);
4106 if (FormatTok
->is(tok::l_paren
)) {
4107 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4111 // The definition/declaration form,
4113 // - (int)someMethod;
4116 nextToken(); // protocol name
4118 if (FormatTok
->is(tok::less
))
4119 parseObjCProtocolList();
4121 // Check for protocol declaration.
4122 if (FormatTok
->is(tok::semi
)) {
4129 parseObjCUntilAtEnd();
4133 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4134 bool IsImport
= FormatTok
->is(Keywords
.kw_import
);
4135 assert(IsImport
|| FormatTok
->is(tok::kw_export
));
4138 // Consume the "default" in "export default class/function".
4139 if (FormatTok
->is(tok::kw_default
))
4142 // Consume "async function", "function" and "default function", so that these
4143 // get parsed as free-standing JS functions, i.e. do not require a trailing
4145 if (FormatTok
->is(Keywords
.kw_async
))
4147 if (FormatTok
->is(Keywords
.kw_function
)) {
4152 // For imports, `export *`, `export {...}`, consume the rest of the line up
4153 // to the terminating `;`. For everything else, just return and continue
4154 // parsing the structural element, i.e. the declaration or expression for
4155 // `export default`.
4156 if (!IsImport
&& !FormatTok
->isOneOf(tok::l_brace
, tok::star
) &&
4157 !FormatTok
->isStringLiteral() &&
4158 !(FormatTok
->is(Keywords
.kw_type
) &&
4159 Tokens
->peekNextToken()->isOneOf(tok::l_brace
, tok::star
))) {
4164 if (FormatTok
->is(tok::semi
))
4166 if (Line
->Tokens
.empty()) {
4167 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4168 // import statement should terminate.
4171 if (FormatTok
->is(tok::l_brace
)) {
4172 FormatTok
->setBlockKind(BK_Block
);
4181 void UnwrappedLineParser::parseStatementMacro() {
4183 if (FormatTok
->is(tok::l_paren
))
4185 if (FormatTok
->is(tok::semi
))
4190 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4191 // consume things like a::`b.c[d:e] or a::*
4193 if (FormatTok
->isOneOf(tok::star
, tok::period
, tok::periodstar
,
4194 tok::coloncolon
, tok::hash
) ||
4195 Keywords
.isVerilogIdentifier(*FormatTok
)) {
4197 } else if (FormatTok
->is(tok::l_square
)) {
4205 void UnwrappedLineParser::parseVerilogSensitivityList() {
4206 if (FormatTok
->isNot(tok::at
))
4209 // A block event expression has 2 at signs.
4210 if (FormatTok
->is(tok::at
))
4212 switch (FormatTok
->Tok
.getKind()) {
4220 parseVerilogHierarchyIdentifier();
4225 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4226 unsigned AddLevels
= 0;
4228 if (FormatTok
->is(Keywords
.kw_clocking
)) {
4230 if (Keywords
.isVerilogIdentifier(*FormatTok
))
4232 parseVerilogSensitivityList();
4233 if (FormatTok
->is(tok::semi
))
4235 } else if (FormatTok
->isOneOf(tok::kw_case
, Keywords
.kw_casex
,
4236 Keywords
.kw_casez
, Keywords
.kw_randcase
,
4237 Keywords
.kw_randsequence
)) {
4238 if (Style
.IndentCaseLabels
)
4241 if (FormatTok
->is(tok::l_paren
)) {
4242 FormatTok
->setFinalizedType(TT_ConditionLParen
);
4245 if (FormatTok
->isOneOf(Keywords
.kw_inside
, Keywords
.kw_matches
))
4247 // The case header has no semicolon.
4251 // all the words like the name of the module and specifiers like
4252 // "automatic" and the width of function return type
4254 if (FormatTok
->is(tok::l_square
)) {
4255 auto Prev
= FormatTok
->getPreviousNonComment();
4256 if (Prev
&& Keywords
.isVerilogIdentifier(*Prev
))
4257 Prev
->setFinalizedType(TT_VerilogDimensionedTypeName
);
4259 } else if (Keywords
.isVerilogIdentifier(*FormatTok
) ||
4260 FormatTok
->isOneOf(Keywords
.kw_automatic
, tok::kw_static
)) {
4267 auto NewLine
= [this]() {
4269 Line
->IsContinuation
= true;
4273 while (FormatTok
->is(Keywords
.kw_import
)) {
4276 parseVerilogHierarchyIdentifier();
4277 if (FormatTok
->is(tok::semi
))
4281 // parameters and ports
4282 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
4285 if (FormatTok
->is(tok::l_paren
)) {
4286 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4290 if (FormatTok
->is(tok::l_paren
)) {
4292 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4296 // extends and implements
4297 if (FormatTok
->is(Keywords
.kw_extends
)) {
4300 parseVerilogHierarchyIdentifier();
4301 if (FormatTok
->is(tok::l_paren
))
4304 if (FormatTok
->is(Keywords
.kw_implements
)) {
4308 parseVerilogHierarchyIdentifier();
4309 } while (FormatTok
->is(tok::comma
));
4312 // Coverage event for cover groups.
4313 if (FormatTok
->is(tok::at
)) {
4315 parseVerilogSensitivityList();
4318 if (FormatTok
->is(tok::semi
))
4319 nextToken(/*LevelDifference=*/1);
4326 void UnwrappedLineParser::parseVerilogTable() {
4327 assert(FormatTok
->is(Keywords
.kw_table
));
4328 nextToken(/*LevelDifference=*/1);
4331 auto InitialLevel
= Line
->Level
++;
4332 while (!eof() && !Keywords
.isVerilogEnd(*FormatTok
)) {
4333 FormatToken
*Tok
= FormatTok
;
4335 if (Tok
->is(tok::semi
))
4337 else if (Tok
->isOneOf(tok::star
, tok::colon
, tok::question
, tok::minus
))
4338 Tok
->setFinalizedType(TT_VerilogTableItem
);
4340 Line
->Level
= InitialLevel
;
4341 nextToken(/*LevelDifference=*/-1);
4345 void UnwrappedLineParser::parseVerilogCaseLabel() {
4346 // The label will get unindented in AnnotatingParser. If there are no leading
4347 // spaces, indent the rest here so that things inside the block will be
4348 // indented relative to things outside. We don't use parseLabel because we
4349 // don't know whether this colon is a label or a ternary expression at this
4351 auto OrigLevel
= Line
->Level
;
4352 auto FirstLine
= CurrentLines
->size();
4353 if (Line
->Level
== 0 || (Line
->InPPDirective
&& Line
->Level
<= 1))
4355 else if (!Style
.IndentCaseBlocks
&& Keywords
.isVerilogBegin(*FormatTok
))
4357 parseStructuralElement();
4358 // Restore the indentation in both the new line and the line that has the
4360 if (CurrentLines
->size() > FirstLine
)
4361 (*CurrentLines
)[FirstLine
].Level
= OrigLevel
;
4362 Line
->Level
= OrigLevel
;
4365 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine
&Line
) const {
4366 for (const auto &N
: Line
.Tokens
) {
4367 if (N
.Tok
->MacroCtx
)
4369 for (const UnwrappedLine
&Child
: N
.Children
)
4370 if (containsExpansion(Child
))
4376 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel
) {
4377 if (Line
->Tokens
.empty())
4380 if (!parsingPPDirective()) {
4381 llvm::dbgs() << "Adding unwrapped line:\n";
4382 printDebugInfo(*Line
);
4386 // If this line closes a block when in Whitesmiths mode, remember that
4387 // information so that the level can be decreased after the line is added.
4388 // This has to happen after the addition of the line since the line itself
4389 // needs to be indented.
4390 bool ClosesWhitesmithsBlock
=
4391 Line
->MatchingOpeningBlockLineIndex
!= UnwrappedLine::kInvalidIndex
&&
4392 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
4394 // If the current line was expanded from a macro call, we use it to
4395 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4396 // line and the unexpanded token stream.
4397 if (!parsingPPDirective() && !InExpansion
&& containsExpansion(*Line
)) {
4399 Reconstruct
.emplace(Line
->Level
, Unexpanded
);
4400 Reconstruct
->addLine(*Line
);
4402 // While the reconstructed unexpanded lines are stored in the normal
4403 // flow of lines, the expanded lines are stored on the side to be analyzed
4404 // in an extra step.
4405 CurrentExpandedLines
.push_back(std::move(*Line
));
4407 if (Reconstruct
->finished()) {
4408 UnwrappedLine Reconstructed
= std::move(*Reconstruct
).takeResult();
4409 assert(!Reconstructed
.Tokens
.empty() &&
4410 "Reconstructed must at least contain the macro identifier.");
4411 assert(!parsingPPDirective());
4413 llvm::dbgs() << "Adding unexpanded line:\n";
4414 printDebugInfo(Reconstructed
);
4416 ExpandedLines
[Reconstructed
.Tokens
.begin()->Tok
] = CurrentExpandedLines
;
4417 Lines
.push_back(std::move(Reconstructed
));
4418 CurrentExpandedLines
.clear();
4419 Reconstruct
.reset();
4422 // At the top level we only get here when no unexpansion is going on, or
4423 // when conditional formatting led to unfinished macro reconstructions.
4424 assert(!Reconstruct
|| (CurrentLines
!= &Lines
) || PPStack
.size() > 0);
4425 CurrentLines
->push_back(std::move(*Line
));
4427 Line
->Tokens
.clear();
4428 Line
->MatchingOpeningBlockLineIndex
= UnwrappedLine::kInvalidIndex
;
4429 Line
->FirstStartColumn
= 0;
4430 Line
->IsContinuation
= false;
4431 Line
->SeenDecltypeAuto
= false;
4433 if (ClosesWhitesmithsBlock
&& AdjustLevel
== LineLevel::Remove
)
4435 if (!parsingPPDirective() && !PreprocessorDirectives
.empty()) {
4436 CurrentLines
->append(
4437 std::make_move_iterator(PreprocessorDirectives
.begin()),
4438 std::make_move_iterator(PreprocessorDirectives
.end()));
4439 PreprocessorDirectives
.clear();
4441 // Disconnect the current token from the last token on the previous line.
4442 FormatTok
->Previous
= nullptr;
4445 bool UnwrappedLineParser::eof() const { return FormatTok
->is(tok::eof
); }
4447 bool UnwrappedLineParser::isOnNewLine(const FormatToken
&FormatTok
) {
4448 return (Line
->InPPDirective
|| FormatTok
.HasUnescapedNewline
) &&
4449 FormatTok
.NewlinesBefore
> 0;
4452 // Checks if \p FormatTok is a line comment that continues the line comment
4453 // section on \p Line.
4455 continuesLineCommentSection(const FormatToken
&FormatTok
,
4456 const UnwrappedLine
&Line
,
4457 const llvm::Regex
&CommentPragmasRegex
) {
4458 if (Line
.Tokens
.empty())
4461 StringRef IndentContent
= FormatTok
.TokenText
;
4462 if (FormatTok
.TokenText
.starts_with("//") ||
4463 FormatTok
.TokenText
.starts_with("/*")) {
4464 IndentContent
= FormatTok
.TokenText
.substr(2);
4466 if (CommentPragmasRegex
.match(IndentContent
))
4469 // If Line starts with a line comment, then FormatTok continues the comment
4470 // section if its original column is greater or equal to the original start
4471 // column of the line.
4473 // Define the min column token of a line as follows: if a line ends in '{' or
4474 // contains a '{' followed by a line comment, then the min column token is
4475 // that '{'. Otherwise, the min column token of the line is the first token of
4478 // If Line starts with a token other than a line comment, then FormatTok
4479 // continues the comment section if its original column is greater than the
4480 // original start column of the min column token of the line.
4482 // For example, the second line comment continues the first in these cases:
4494 // int i; // first line
4499 // do { // first line
4512 // The second line comment doesn't continue the first in these cases:
4519 // int i; // first line
4524 // do { // first line
4535 const FormatToken
*MinColumnToken
= Line
.Tokens
.front().Tok
;
4537 // Scan for '{//'. If found, use the column of '{' as a min column for line
4538 // comment section continuation.
4539 const FormatToken
*PreviousToken
= nullptr;
4540 for (const UnwrappedLineNode
&Node
: Line
.Tokens
) {
4541 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
) &&
4542 isLineComment(*Node
.Tok
)) {
4543 MinColumnToken
= PreviousToken
;
4546 PreviousToken
= Node
.Tok
;
4548 // Grab the last newline preceding a token in this unwrapped line.
4549 if (Node
.Tok
->NewlinesBefore
> 0)
4550 MinColumnToken
= Node
.Tok
;
4552 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
))
4553 MinColumnToken
= PreviousToken
;
4555 return continuesLineComment(FormatTok
, /*Previous=*/Line
.Tokens
.back().Tok
,
4559 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext
) {
4560 bool JustComments
= Line
->Tokens
.empty();
4561 for (FormatToken
*Tok
: CommentsBeforeNextToken
) {
4562 // Line comments that belong to the same line comment section are put on the
4563 // same line since later we might want to reflow content between them.
4564 // Additional fine-grained breaking of line comment sections is controlled
4565 // by the class BreakableLineCommentSection in case it is desirable to keep
4566 // several line comment sections in the same unwrapped line.
4568 // FIXME: Consider putting separate line comment sections as children to the
4569 // unwrapped line instead.
4570 Tok
->ContinuesLineCommentSection
=
4571 continuesLineCommentSection(*Tok
, *Line
, CommentPragmasRegex
);
4572 if (isOnNewLine(*Tok
) && JustComments
&& !Tok
->ContinuesLineCommentSection
)
4576 if (NewlineBeforeNext
&& JustComments
)
4578 CommentsBeforeNextToken
.clear();
4581 void UnwrappedLineParser::nextToken(int LevelDifference
) {
4584 flushComments(isOnNewLine(*FormatTok
));
4585 pushToken(FormatTok
);
4586 FormatToken
*Previous
= FormatTok
;
4587 if (!Style
.isJavaScript())
4588 readToken(LevelDifference
);
4590 readTokenWithJavaScriptASI();
4591 FormatTok
->Previous
= Previous
;
4592 if (Style
.isVerilog()) {
4593 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4594 // keywords like `begin`, we can't treat them the same as left braces
4595 // because some contexts require one of them. For example structs use
4596 // braces and if blocks use keywords, and a left brace can occur in an if
4597 // statement, but it is not a block. For keywords like `end`, we simply
4598 // treat them the same as right braces.
4599 if (Keywords
.isVerilogEnd(*FormatTok
))
4600 FormatTok
->Tok
.setKind(tok::r_brace
);
4604 void UnwrappedLineParser::distributeComments(
4605 const SmallVectorImpl
<FormatToken
*> &Comments
,
4606 const FormatToken
*NextTok
) {
4607 // Whether or not a line comment token continues a line is controlled by
4608 // the method continuesLineCommentSection, with the following caveat:
4610 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4611 // that each comment line from the trail is aligned with the next token, if
4612 // the next token exists. If a trail exists, the beginning of the maximal
4613 // trail is marked as a start of a new comment section.
4615 // For example in this code:
4617 // int a; // line about a
4618 // // line 1 about b
4619 // // line 2 about b
4622 // the two lines about b form a maximal trail, so there are two sections, the
4623 // first one consisting of the single comment "// line about a" and the
4624 // second one consisting of the next two comments.
4625 if (Comments
.empty())
4627 bool ShouldPushCommentsInCurrentLine
= true;
4628 bool HasTrailAlignedWithNextToken
= false;
4629 unsigned StartOfTrailAlignedWithNextToken
= 0;
4631 // We are skipping the first element intentionally.
4632 for (unsigned i
= Comments
.size() - 1; i
> 0; --i
) {
4633 if (Comments
[i
]->OriginalColumn
== NextTok
->OriginalColumn
) {
4634 HasTrailAlignedWithNextToken
= true;
4635 StartOfTrailAlignedWithNextToken
= i
;
4639 for (unsigned i
= 0, e
= Comments
.size(); i
< e
; ++i
) {
4640 FormatToken
*FormatTok
= Comments
[i
];
4641 if (HasTrailAlignedWithNextToken
&& i
== StartOfTrailAlignedWithNextToken
) {
4642 FormatTok
->ContinuesLineCommentSection
= false;
4644 FormatTok
->ContinuesLineCommentSection
=
4645 continuesLineCommentSection(*FormatTok
, *Line
, CommentPragmasRegex
);
4647 if (!FormatTok
->ContinuesLineCommentSection
&&
4648 (isOnNewLine(*FormatTok
) || FormatTok
->IsFirst
)) {
4649 ShouldPushCommentsInCurrentLine
= false;
4651 if (ShouldPushCommentsInCurrentLine
)
4652 pushToken(FormatTok
);
4654 CommentsBeforeNextToken
.push_back(FormatTok
);
4658 void UnwrappedLineParser::readToken(int LevelDifference
) {
4659 SmallVector
<FormatToken
*, 1> Comments
;
4660 bool PreviousWasComment
= false;
4661 bool FirstNonCommentOnLine
= false;
4663 FormatTok
= Tokens
->getNextToken();
4665 while (FormatTok
->getType() == TT_ConflictStart
||
4666 FormatTok
->getType() == TT_ConflictEnd
||
4667 FormatTok
->getType() == TT_ConflictAlternative
) {
4668 if (FormatTok
->getType() == TT_ConflictStart
)
4669 conditionalCompilationStart(/*Unreachable=*/false);
4670 else if (FormatTok
->getType() == TT_ConflictAlternative
)
4671 conditionalCompilationAlternative();
4672 else if (FormatTok
->getType() == TT_ConflictEnd
)
4673 conditionalCompilationEnd();
4674 FormatTok
= Tokens
->getNextToken();
4675 FormatTok
->MustBreakBefore
= true;
4678 auto IsFirstNonCommentOnLine
= [](bool FirstNonCommentOnLine
,
4679 const FormatToken
&Tok
,
4680 bool PreviousWasComment
) {
4681 auto IsFirstOnLine
= [](const FormatToken
&Tok
) {
4682 return Tok
.HasUnescapedNewline
|| Tok
.IsFirst
;
4685 // Consider preprocessor directives preceded by block comments as first
4687 if (PreviousWasComment
)
4688 return FirstNonCommentOnLine
|| IsFirstOnLine(Tok
);
4689 return IsFirstOnLine(Tok
);
4692 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4693 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4694 PreviousWasComment
= FormatTok
->is(tok::comment
);
4696 while (!Line
->InPPDirective
&& FormatTok
->is(tok::hash
) &&
4697 (!Style
.isVerilog() ||
4698 Keywords
.isVerilogPPDirective(*Tokens
->peekNextToken())) &&
4699 FirstNonCommentOnLine
) {
4700 distributeComments(Comments
, FormatTok
);
4702 // If there is an unfinished unwrapped line, we flush the preprocessor
4703 // directives only after that unwrapped line was finished later.
4704 bool SwitchToPreprocessorLines
= !Line
->Tokens
.empty();
4705 ScopedLineState
BlockState(*this, SwitchToPreprocessorLines
);
4706 assert((LevelDifference
>= 0 ||
4707 static_cast<unsigned>(-LevelDifference
) <= Line
->Level
) &&
4708 "LevelDifference makes Line->Level negative");
4709 Line
->Level
+= LevelDifference
;
4710 // Comments stored before the preprocessor directive need to be output
4711 // before the preprocessor directive, at the same level as the
4712 // preprocessor directive, as we consider them to apply to the directive.
4713 if (Style
.IndentPPDirectives
== FormatStyle::PPDIS_BeforeHash
&&
4714 PPBranchLevel
> 0) {
4715 Line
->Level
+= PPBranchLevel
;
4717 flushComments(isOnNewLine(*FormatTok
));
4719 PreviousWasComment
= FormatTok
->is(tok::comment
);
4720 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4721 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4724 if (!PPStack
.empty() && (PPStack
.back().Kind
== PP_Unreachable
) &&
4725 !Line
->InPPDirective
) {
4729 if (FormatTok
->is(tok::identifier
) &&
4730 Macros
.defined(FormatTok
->TokenText
) &&
4731 // FIXME: Allow expanding macros in preprocessor directives.
4732 !Line
->InPPDirective
) {
4733 FormatToken
*ID
= FormatTok
;
4734 unsigned Position
= Tokens
->getPosition();
4736 // To correctly parse the code, we need to replace the tokens of the macro
4737 // call with its expansion.
4738 auto PreCall
= std::move(Line
);
4739 Line
.reset(new UnwrappedLine
);
4740 bool OldInExpansion
= InExpansion
;
4742 // We parse the macro call into a new line.
4743 auto Args
= parseMacroCall();
4744 InExpansion
= OldInExpansion
;
4745 assert(Line
->Tokens
.front().Tok
== ID
);
4746 // And remember the unexpanded macro call tokens.
4747 auto UnexpandedLine
= std::move(Line
);
4748 // Reset to the old line.
4749 Line
= std::move(PreCall
);
4752 llvm::dbgs() << "Macro call: " << ID
->TokenText
<< "(";
4754 llvm::dbgs() << "(";
4755 for (const auto &Arg
: Args
.value())
4756 for (const auto &T
: Arg
)
4757 llvm::dbgs() << T
->TokenText
<< " ";
4758 llvm::dbgs() << ")";
4760 llvm::dbgs() << "\n";
4762 if (Macros
.objectLike(ID
->TokenText
) && Args
&&
4763 !Macros
.hasArity(ID
->TokenText
, Args
->size())) {
4764 // The macro is either
4765 // - object-like, but we got argumnets, or
4766 // - overloaded to be both object-like and function-like, but none of
4767 // the function-like arities match the number of arguments.
4768 // Thus, expand as object-like macro.
4769 LLVM_DEBUG(llvm::dbgs()
4770 << "Macro \"" << ID
->TokenText
4771 << "\" not overloaded for arity " << Args
->size()
4772 << "or not function-like, using object-like overload.");
4774 UnexpandedLine
->Tokens
.resize(1);
4775 Tokens
->setPosition(Position
);
4777 assert(!Args
&& Macros
.objectLike(ID
->TokenText
));
4779 if ((!Args
&& Macros
.objectLike(ID
->TokenText
)) ||
4780 (Args
&& Macros
.hasArity(ID
->TokenText
, Args
->size()))) {
4781 // Next, we insert the expanded tokens in the token stream at the
4782 // current position, and continue parsing.
4783 Unexpanded
[ID
] = std::move(UnexpandedLine
);
4784 SmallVector
<FormatToken
*, 8> Expansion
=
4785 Macros
.expand(ID
, std::move(Args
));
4786 if (!Expansion
.empty())
4787 FormatTok
= Tokens
->insertTokens(Expansion
);
4790 llvm::dbgs() << "Expanded: ";
4791 for (const auto &T
: Expansion
)
4792 llvm::dbgs() << T
->TokenText
<< " ";
4793 llvm::dbgs() << "\n";
4797 llvm::dbgs() << "Did not expand macro \"" << ID
->TokenText
4798 << "\", because it was used ";
4800 llvm::dbgs() << "with " << Args
->size();
4802 llvm::dbgs() << "without";
4803 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4805 Tokens
->setPosition(Position
);
4810 if (FormatTok
->isNot(tok::comment
)) {
4811 distributeComments(Comments
, FormatTok
);
4816 Comments
.push_back(FormatTok
);
4819 distributeComments(Comments
, nullptr);
4824 template <typename Iterator
>
4825 void pushTokens(Iterator Begin
, Iterator End
,
4826 llvm::SmallVectorImpl
<FormatToken
*> &Into
) {
4827 for (auto I
= Begin
; I
!= End
; ++I
) {
4828 Into
.push_back(I
->Tok
);
4829 for (const auto &Child
: I
->Children
)
4830 pushTokens(Child
.Tokens
.begin(), Child
.Tokens
.end(), Into
);
4835 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>>
4836 UnwrappedLineParser::parseMacroCall() {
4837 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>> Args
;
4838 assert(Line
->Tokens
.empty());
4840 if (FormatTok
->isNot(tok::l_paren
))
4842 unsigned Position
= Tokens
->getPosition();
4843 FormatToken
*Tok
= FormatTok
;
4846 auto ArgStart
= std::prev(Line
->Tokens
.end());
4850 switch (FormatTok
->Tok
.getKind()) {
4855 case tok::r_paren
: {
4861 Args
->push_back({});
4862 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
4871 Args
->push_back({});
4872 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
4874 ArgStart
= std::prev(Line
->Tokens
.end());
4882 Line
->Tokens
.resize(1);
4883 Tokens
->setPosition(Position
);
4888 void UnwrappedLineParser::pushToken(FormatToken
*Tok
) {
4889 Line
->Tokens
.push_back(UnwrappedLineNode(Tok
));
4890 if (MustBreakBeforeNextToken
) {
4891 Line
->Tokens
.back().Tok
->MustBreakBefore
= true;
4892 MustBreakBeforeNextToken
= false;
4896 } // end namespace format
4897 } // end namespace clang