1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "format-parser"
38 void printLine(llvm::raw_ostream
&OS
, const UnwrappedLine
&Line
,
39 StringRef Prefix
= "", bool PrintText
= false) {
40 OS
<< Prefix
<< "Line(" << Line
.Level
<< ", FSC=" << Line
.FirstStartColumn
41 << ")" << (Line
.InPPDirective
? " MACRO" : "") << ": ";
43 for (std::list
<UnwrappedLineNode
>::const_iterator I
= Line
.Tokens
.begin(),
44 E
= Line
.Tokens
.end();
50 OS
<< I
->Tok
->Tok
.getName() << "["
51 << "T=" << (unsigned)I
->Tok
->getType()
52 << ", OC=" << I
->Tok
->OriginalColumn
<< ", \"" << I
->Tok
->TokenText
54 for (SmallVectorImpl
<UnwrappedLine
>::const_iterator
55 CI
= I
->Children
.begin(),
56 CE
= I
->Children
.end();
59 printLine(OS
, *CI
, (Prefix
+ " ").str());
67 LLVM_ATTRIBUTE_UNUSED
static void printDebugInfo(const UnwrappedLine
&Line
) {
68 printLine(llvm::dbgs(), Line
);
71 class ScopedDeclarationState
{
73 ScopedDeclarationState(UnwrappedLine
&Line
, llvm::BitVector
&Stack
,
74 bool MustBeDeclaration
)
75 : Line(Line
), Stack(Stack
) {
76 Line
.MustBeDeclaration
= MustBeDeclaration
;
77 Stack
.push_back(MustBeDeclaration
);
79 ~ScopedDeclarationState() {
82 Line
.MustBeDeclaration
= Stack
.back();
84 Line
.MustBeDeclaration
= true;
89 llvm::BitVector
&Stack
;
92 } // end anonymous namespace
94 std::ostream
&operator<<(std::ostream
&Stream
, const UnwrappedLine
&Line
) {
95 llvm::raw_os_ostream
OS(Stream
);
100 class ScopedLineState
{
102 ScopedLineState(UnwrappedLineParser
&Parser
,
103 bool SwitchToPreprocessorLines
= false)
104 : Parser(Parser
), OriginalLines(Parser
.CurrentLines
) {
105 if (SwitchToPreprocessorLines
)
106 Parser
.CurrentLines
= &Parser
.PreprocessorDirectives
;
107 else if (!Parser
.Line
->Tokens
.empty())
108 Parser
.CurrentLines
= &Parser
.Line
->Tokens
.back().Children
;
109 PreBlockLine
= std::move(Parser
.Line
);
110 Parser
.Line
= std::make_unique
<UnwrappedLine
>();
111 Parser
.Line
->Level
= PreBlockLine
->Level
;
112 Parser
.Line
->PPLevel
= PreBlockLine
->PPLevel
;
113 Parser
.Line
->InPPDirective
= PreBlockLine
->InPPDirective
;
114 Parser
.Line
->InMacroBody
= PreBlockLine
->InMacroBody
;
115 Parser
.Line
->UnbracedBodyLevel
= PreBlockLine
->UnbracedBodyLevel
;
119 if (!Parser
.Line
->Tokens
.empty())
120 Parser
.addUnwrappedLine();
121 assert(Parser
.Line
->Tokens
.empty());
122 Parser
.Line
= std::move(PreBlockLine
);
123 if (Parser
.CurrentLines
== &Parser
.PreprocessorDirectives
)
124 Parser
.MustBreakBeforeNextToken
= true;
125 Parser
.CurrentLines
= OriginalLines
;
129 UnwrappedLineParser
&Parser
;
131 std::unique_ptr
<UnwrappedLine
> PreBlockLine
;
132 SmallVectorImpl
<UnwrappedLine
> *OriginalLines
;
135 class CompoundStatementIndenter
{
137 CompoundStatementIndenter(UnwrappedLineParser
*Parser
,
138 const FormatStyle
&Style
, unsigned &LineLevel
)
139 : CompoundStatementIndenter(Parser
, LineLevel
,
140 Style
.BraceWrapping
.AfterControlStatement
,
141 Style
.BraceWrapping
.IndentBraces
) {}
142 CompoundStatementIndenter(UnwrappedLineParser
*Parser
, unsigned &LineLevel
,
143 bool WrapBrace
, bool IndentBrace
)
144 : LineLevel(LineLevel
), OldLineLevel(LineLevel
) {
146 Parser
->addUnwrappedLine();
150 ~CompoundStatementIndenter() { LineLevel
= OldLineLevel
; }
154 unsigned OldLineLevel
;
157 UnwrappedLineParser::UnwrappedLineParser(
158 SourceManager
&SourceMgr
, const FormatStyle
&Style
,
159 const AdditionalKeywords
&Keywords
, unsigned FirstStartColumn
,
160 ArrayRef
<FormatToken
*> Tokens
, UnwrappedLineConsumer
&Callback
,
161 llvm::SpecificBumpPtrAllocator
<FormatToken
> &Allocator
,
162 IdentifierTable
&IdentTable
)
163 : Line(new UnwrappedLine
), MustBreakBeforeNextToken(false),
164 CurrentLines(&Lines
), Style(Style
), IsCpp(Style
.isCpp()),
165 LangOpts(getFormattingLangOpts(Style
)), Keywords(Keywords
),
166 CommentPragmasRegex(Style
.CommentPragmas
), Tokens(nullptr),
167 Callback(Callback
), AllTokens(Tokens
), PPBranchLevel(-1),
168 IncludeGuard(Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn
),
172 Macros(Style
.Macros
, SourceMgr
, Style
, Allocator
, IdentTable
) {
173 assert(IsCpp
== LangOpts
.CXXOperatorNames
);
176 void UnwrappedLineParser::reset() {
178 IncludeGuard
= Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
181 IncludeGuardToken
= nullptr;
182 Line
.reset(new UnwrappedLine
);
183 CommentsBeforeNextToken
.clear();
185 MustBreakBeforeNextToken
= false;
186 IsDecltypeAutoFunction
= false;
187 PreprocessorDirectives
.clear();
188 CurrentLines
= &Lines
;
189 DeclarationScopeStack
.clear();
190 NestedTooDeep
.clear();
191 NestedLambdas
.clear();
193 Line
->FirstStartColumn
= FirstStartColumn
;
195 if (!Unexpanded
.empty())
196 for (FormatToken
*Token
: AllTokens
)
197 Token
->MacroCtx
.reset();
198 CurrentExpandedLines
.clear();
199 ExpandedLines
.clear();
205 void UnwrappedLineParser::parse() {
206 IndexedTokenSource
TokenSource(AllTokens
);
207 Line
->FirstStartColumn
= FirstStartColumn
;
209 LLVM_DEBUG(llvm::dbgs() << "----\n");
211 Tokens
= &TokenSource
;
217 // If we found an include guard then all preprocessor directives (other than
218 // the guard) are over-indented by one.
219 if (IncludeGuard
== IG_Found
) {
220 for (auto &Line
: Lines
)
221 if (Line
.InPPDirective
&& Line
.Level
> 0)
225 // Create line with eof token.
227 pushToken(FormatTok
);
230 // In a first run, format everything with the lines containing macro calls
231 // replaced by the expansion.
232 if (!ExpandedLines
.empty()) {
233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234 for (const auto &Line
: Lines
) {
235 if (!Line
.Tokens
.empty()) {
236 auto it
= ExpandedLines
.find(Line
.Tokens
.begin()->Tok
);
237 if (it
!= ExpandedLines
.end()) {
238 for (const auto &Expanded
: it
->second
) {
239 LLVM_DEBUG(printDebugInfo(Expanded
));
240 Callback
.consumeUnwrappedLine(Expanded
);
245 LLVM_DEBUG(printDebugInfo(Line
));
246 Callback
.consumeUnwrappedLine(Line
);
248 Callback
.finishRun();
251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252 for (const UnwrappedLine
&Line
: Lines
) {
253 LLVM_DEBUG(printDebugInfo(Line
));
254 Callback
.consumeUnwrappedLine(Line
);
256 Callback
.finishRun();
258 while (!PPLevelBranchIndex
.empty() &&
259 PPLevelBranchIndex
.back() + 1 >= PPLevelBranchCount
.back()) {
260 PPLevelBranchIndex
.resize(PPLevelBranchIndex
.size() - 1);
261 PPLevelBranchCount
.resize(PPLevelBranchCount
.size() - 1);
263 if (!PPLevelBranchIndex
.empty()) {
264 ++PPLevelBranchIndex
.back();
265 assert(PPLevelBranchIndex
.size() == PPLevelBranchCount
.size());
266 assert(PPLevelBranchIndex
.back() <= PPLevelBranchCount
.back());
268 } while (!PPLevelBranchIndex
.empty());
271 void UnwrappedLineParser::parseFile() {
272 // The top-level context in a file always has declarations, except for pre-
273 // processor directives and JavaScript files.
274 bool MustBeDeclaration
= !Line
->InPPDirective
&& !Style
.isJavaScript();
275 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
277 if (Style
.Language
== FormatStyle::LK_TextProto
)
281 // Make sure to format the remaining tokens.
283 // LK_TextProto is special since its top-level is parsed as the body of a
284 // braced list, which does not necessarily have natural line separators such
285 // as a semicolon. Comments after the last entry that have been determined to
286 // not belong to that line, as in:
288 // // endfile comment
289 // do not have a chance to be put on a line of their own until this point.
290 // Here we add this newline before end-of-file comments.
291 if (Style
.Language
== FormatStyle::LK_TextProto
&&
292 !CommentsBeforeNextToken
.empty()) {
299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
301 switch (FormatTok
->Tok
.getKind()) {
305 if (FormatTok
->is(Keywords
.kw_where
)) {
308 parseCSharpGenericTypeConstraint();
317 void UnwrappedLineParser::parseCSharpAttribute() {
318 int UnpairedSquareBrackets
= 1;
320 switch (FormatTok
->Tok
.getKind()) {
323 --UnpairedSquareBrackets
;
324 if (UnpairedSquareBrackets
== 0) {
330 ++UnpairedSquareBrackets
;
340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341 if (!Lines
.empty() && Lines
.back().InPPDirective
)
344 const FormatToken
*Previous
= Tokens
->getPreviousToken();
345 return Previous
&& Previous
->is(tok::comment
) &&
346 (Previous
->IsMultiline
|| Previous
->NewlinesBefore
> 0);
349 /// \brief Parses a level, that is ???.
350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351 /// \param IfKind The \p if statement kind in the level.
352 /// \param IfLeftBrace The left brace of the \p if block in the level.
353 /// \returns true if a simple block of if/else/for/while, or false otherwise.
354 /// (A simple block has a single statement.)
355 bool UnwrappedLineParser::parseLevel(const FormatToken
*OpeningBrace
,
357 FormatToken
**IfLeftBrace
) {
358 const bool InRequiresExpression
=
359 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
360 const bool IsPrecededByCommentOrPPDirective
=
361 !Style
.RemoveBracesLLVM
|| precededByCommentOrPPDirective();
362 FormatToken
*IfLBrace
= nullptr;
363 bool HasDoWhile
= false;
364 bool HasLabel
= false;
365 unsigned StatementCount
= 0;
366 bool SwitchLabelEncountered
= false;
369 if (FormatTok
->isAttribute()) {
371 if (FormatTok
->is(tok::l_paren
))
375 tok::TokenKind Kind
= FormatTok
->Tok
.getKind();
376 if (FormatTok
->is(TT_MacroBlockBegin
))
378 else if (FormatTok
->is(TT_MacroBlockEnd
))
381 auto ParseDefault
= [this, OpeningBrace
, IfKind
, &IfLBrace
, &HasDoWhile
,
382 &HasLabel
, &StatementCount
] {
383 parseStructuralElement(OpeningBrace
, IfKind
, &IfLBrace
,
384 HasDoWhile
? nullptr : &HasDoWhile
,
385 HasLabel
? nullptr : &HasLabel
);
387 assert(StatementCount
> 0 && "StatementCount overflow!");
396 if (InRequiresExpression
) {
397 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
398 } else if (FormatTok
->Previous
&&
399 FormatTok
->Previous
->ClosesRequiresClause
) {
400 // We need the 'default' case here to correctly parse a function
405 if (!InRequiresExpression
&& FormatTok
->isNot(TT_MacroBlockBegin
)) {
406 if (tryToParseBracedList())
408 FormatTok
->setFinalizedType(TT_BlockLBrace
);
412 assert(StatementCount
> 0 && "StatementCount overflow!");
417 if (!Style
.RemoveBracesLLVM
|| Line
->InPPDirective
||
418 !OpeningBrace
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
)) {
421 if (FormatTok
->isNot(tok::r_brace
) || StatementCount
!= 1 || HasLabel
||
422 HasDoWhile
|| IsPrecededByCommentOrPPDirective
||
423 precededByCommentOrPPDirective()) {
426 const FormatToken
*Next
= Tokens
->peekNextToken();
427 if (Next
->is(tok::comment
) && Next
->NewlinesBefore
== 0)
430 *IfLeftBrace
= IfLBrace
;
436 case tok::kw_default
: {
437 unsigned StoredPosition
= Tokens
->getPosition();
438 auto *Next
= Tokens
->getNextNonComment();
439 FormatTok
= Tokens
->setPosition(StoredPosition
);
440 if (!Next
->isOneOf(tok::colon
, tok::arrow
)) {
441 // default not followed by `:` or `->` is not a case label; treat it
442 // like an identifier.
443 parseStructuralElement();
446 // Else, if it is 'default:', fall through to the case handling.
450 if (Style
.Language
== FormatStyle::LK_Proto
|| Style
.isVerilog() ||
451 (Style
.isJavaScript() && Line
->MustBeDeclaration
)) {
452 // Proto: there are no switch/case statements
453 // Verilog: Case labels don't have this word. We handle case
454 // labels including default in TokenAnnotator.
455 // JavaScript: A 'case: string' style field declaration.
459 if (!SwitchLabelEncountered
&&
460 (Style
.IndentCaseLabels
||
461 (OpeningBrace
&& OpeningBrace
->is(TT_SwitchExpressionLBrace
)) ||
462 (Line
->InPPDirective
&& Line
->Level
== 1))) {
465 SwitchLabelEncountered
= true;
466 parseStructuralElement();
469 if (Style
.isCSharp()) {
471 parseCSharpAttribute();
474 if (handleCppAttributes())
486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody
) {
487 // We'll parse forward through the tokens until we hit
488 // a closing brace or eof - note that getNextToken() will
489 // parse macros, so this will magically work inside macro
491 unsigned StoredPosition
= Tokens
->getPosition();
492 FormatToken
*Tok
= FormatTok
;
493 const FormatToken
*PrevTok
= Tok
->Previous
;
494 // Keep a stack of positions of lbrace tokens. We will
495 // update information about whether an lbrace starts a
496 // braced init list or a different block during the loop.
499 const FormatToken
*PrevTok
;
501 SmallVector
<StackEntry
, 8> LBraceStack
;
502 assert(Tok
->is(tok::l_brace
));
505 auto *NextTok
= Tokens
->getNextNonComment();
507 if (!Line
->InMacroBody
&& !Style
.isTableGen()) {
508 // Skip PPDirective lines and comments.
509 while (NextTok
->is(tok::hash
)) {
510 NextTok
= Tokens
->getNextToken();
511 if (NextTok
->is(tok::pp_not_keyword
))
514 NextTok
= Tokens
->getNextToken();
515 } while (NextTok
->NewlinesBefore
== 0 && NextTok
->isNot(tok::eof
));
517 while (NextTok
->is(tok::comment
))
518 NextTok
= Tokens
->getNextToken();
522 switch (Tok
->Tok
.getKind()) {
524 if (Style
.isJavaScript() && PrevTok
) {
525 if (PrevTok
->isOneOf(tok::colon
, tok::less
)) {
526 // A ':' indicates this code is in a type, or a braced list
527 // following a label in an object literal ({a: {b: 1}}).
528 // A '<' could be an object used in a comparison, but that is nonsense
529 // code (can never return true), so more likely it is a generic type
530 // argument (`X<{a: string; b: number}>`).
531 // The code below could be confused by semicolons between the
532 // individual members in a type member list, which would normally
533 // trigger BK_Block. In both cases, this must be parsed as an inline
535 Tok
->setBlockKind(BK_BracedInit
);
536 } else if (PrevTok
->is(tok::r_paren
)) {
537 // `) { }` can only occur in function or method declarations in JS.
538 Tok
->setBlockKind(BK_Block
);
541 Tok
->setBlockKind(BK_Unknown
);
543 LBraceStack
.push_back({Tok
, PrevTok
});
546 if (LBraceStack
.empty())
548 if (auto *LBrace
= LBraceStack
.back().Tok
; LBrace
->is(BK_Unknown
)) {
549 bool ProbablyBracedList
= false;
550 if (Style
.Language
== FormatStyle::LK_Proto
) {
551 ProbablyBracedList
= NextTok
->isOneOf(tok::comma
, tok::r_square
);
552 } else if (LBrace
->isNot(TT_EnumLBrace
)) {
553 // Using OriginalColumn to distinguish between ObjC methods and
554 // binary operators is a bit hacky.
555 bool NextIsObjCMethod
= NextTok
->isOneOf(tok::plus
, tok::minus
) &&
556 NextTok
->OriginalColumn
== 0;
558 // Try to detect a braced list. Note that regardless how we mark inner
559 // braces here, we will overwrite the BlockKind later if we parse a
560 // braced list (where all blocks inside are by default braced lists),
561 // or when we explicitly detect blocks (for example while parsing
564 // If we already marked the opening brace as braced list, the closing
565 // must also be part of it.
566 ProbablyBracedList
= LBrace
->is(TT_BracedListLBrace
);
568 ProbablyBracedList
= ProbablyBracedList
||
569 (Style
.isJavaScript() &&
570 NextTok
->isOneOf(Keywords
.kw_of
, Keywords
.kw_in
,
573 ProbablyBracedList
|| (IsCpp
&& (PrevTok
->Tok
.isLiteral() ||
574 NextTok
->is(tok::l_paren
)));
576 // If there is a comma, semicolon or right paren after the closing
577 // brace, we assume this is a braced initializer list.
578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
579 // braced list in JS.
581 ProbablyBracedList
||
582 NextTok
->isOneOf(tok::comma
, tok::period
, tok::colon
,
583 tok::r_paren
, tok::r_square
, tok::ellipsis
);
585 // Distinguish between braced list in a constructor initializer list
586 // followed by constructor body, or just adjacent blocks.
588 ProbablyBracedList
||
589 (NextTok
->is(tok::l_brace
) && LBraceStack
.back().PrevTok
&&
590 LBraceStack
.back().PrevTok
->isOneOf(tok::identifier
,
594 ProbablyBracedList
||
595 (NextTok
->is(tok::identifier
) &&
596 !PrevTok
->isOneOf(tok::semi
, tok::r_brace
, tok::l_brace
));
598 ProbablyBracedList
= ProbablyBracedList
||
599 (NextTok
->is(tok::semi
) &&
600 (!ExpectClassBody
|| LBraceStack
.size() != 1));
603 ProbablyBracedList
||
604 (NextTok
->isBinaryOperator() && !NextIsObjCMethod
);
606 if (!Style
.isCSharp() && NextTok
->is(tok::l_square
)) {
607 // We can have an array subscript after a braced init
608 // list, but C++11 attributes are expected after blocks.
609 NextTok
= Tokens
->getNextToken();
610 ProbablyBracedList
= NextTok
->isNot(tok::l_square
);
613 // Cpp macro definition body that is a nonempty braced list or block:
614 if (IsCpp
&& Line
->InMacroBody
&& PrevTok
!= FormatTok
&&
615 !FormatTok
->Previous
&& NextTok
->is(tok::eof
) &&
616 // A statement can end with only `;` (simple statement), a block
617 // closing brace (compound statement), or `:` (label statement).
618 // If PrevTok is a block opening brace, Tok ends an empty block.
619 !PrevTok
->isOneOf(tok::semi
, BK_Block
, tok::colon
)) {
620 ProbablyBracedList
= true;
623 const auto BlockKind
= ProbablyBracedList
? BK_BracedInit
: BK_Block
;
624 Tok
->setBlockKind(BlockKind
);
625 LBrace
->setBlockKind(BlockKind
);
627 LBraceStack
.pop_back();
629 case tok::identifier
:
630 if (Tok
->isNot(TT_StatementMacro
))
641 if (!LBraceStack
.empty() && LBraceStack
.back().Tok
->is(BK_Unknown
))
642 LBraceStack
.back().Tok
->setBlockKind(BK_Block
);
650 } while (Tok
->isNot(tok::eof
) && !LBraceStack
.empty());
652 // Assume other blocks for all unclosed opening braces.
653 for (const auto &Entry
: LBraceStack
)
654 if (Entry
.Tok
->is(BK_Unknown
))
655 Entry
.Tok
->setBlockKind(BK_Block
);
657 FormatTok
= Tokens
->setPosition(StoredPosition
);
660 // Sets the token type of the directly previous right brace.
661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type
) {
662 if (auto Prev
= FormatTok
->getPreviousNonComment();
663 Prev
&& Prev
->is(tok::r_brace
)) {
664 Prev
->setFinalizedType(Type
);
669 static inline void hash_combine(std::size_t &seed
, const T
&v
) {
671 seed
^= hasher(v
) + 0x9e3779b9 + (seed
<< 6) + (seed
>> 2);
674 size_t UnwrappedLineParser::computePPHash() const {
676 for (const auto &i
: PPStack
) {
677 hash_combine(h
, size_t(i
.Kind
));
678 hash_combine(h
, i
.Line
);
683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
684 // is not null, subtracts its length (plus the preceding space) when computing
685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
686 // running the token annotator on it so that we can restore them afterward.
687 bool UnwrappedLineParser::mightFitOnOneLine(
688 UnwrappedLine
&ParsedLine
, const FormatToken
*OpeningBrace
) const {
689 const auto ColumnLimit
= Style
.ColumnLimit
;
690 if (ColumnLimit
== 0)
693 auto &Tokens
= ParsedLine
.Tokens
;
694 assert(!Tokens
.empty());
696 const auto *LastToken
= Tokens
.back().Tok
;
699 SmallVector
<UnwrappedLineNode
> SavedTokens(Tokens
.size());
702 for (const auto &Token
: Tokens
) {
704 auto &SavedToken
= SavedTokens
[Index
++];
705 SavedToken
.Tok
= new FormatToken
;
706 SavedToken
.Tok
->copyFrom(*Token
.Tok
);
707 SavedToken
.Children
= std::move(Token
.Children
);
710 AnnotatedLine
Line(ParsedLine
);
711 assert(Line
.Last
== LastToken
);
713 TokenAnnotator
Annotator(Style
, Keywords
);
714 Annotator
.annotate(Line
);
715 Annotator
.calculateFormattingInformation(Line
);
717 auto Length
= LastToken
->TotalLength
;
719 assert(OpeningBrace
!= Tokens
.front().Tok
);
720 if (auto Prev
= OpeningBrace
->Previous
;
721 Prev
&& Prev
->TotalLength
+ ColumnLimit
== OpeningBrace
->TotalLength
) {
722 Length
-= ColumnLimit
;
724 Length
-= OpeningBrace
->TokenText
.size() + 1;
727 if (const auto *FirstToken
= Line
.First
; FirstToken
->is(tok::r_brace
)) {
728 assert(!OpeningBrace
|| OpeningBrace
->is(TT_ControlStatementLBrace
));
729 Length
-= FirstToken
->TokenText
.size() + 1;
733 for (auto &Token
: Tokens
) {
734 const auto &SavedToken
= SavedTokens
[Index
++];
735 Token
.Tok
->copyFrom(*SavedToken
.Tok
);
736 Token
.Children
= std::move(SavedToken
.Children
);
737 delete SavedToken
.Tok
;
740 // If these change PPLevel needs to be used for get correct indentation.
741 assert(!Line
.InMacroBody
);
742 assert(!Line
.InPPDirective
);
743 return Line
.Level
* Style
.IndentWidth
+ Length
<= ColumnLimit
;
746 FormatToken
*UnwrappedLineParser::parseBlock(bool MustBeDeclaration
,
747 unsigned AddLevels
, bool MunchSemi
,
750 bool UnindentWhitesmithsBraces
) {
751 auto HandleVerilogBlockLabel
= [this]() {
753 if (Style
.isVerilog() && FormatTok
->is(tok::colon
)) {
755 if (Keywords
.isVerilogIdentifier(*FormatTok
))
760 // Whether this is a Verilog-specific block that has a special header like a
762 const bool VerilogHierarchy
=
763 Style
.isVerilog() && Keywords
.isVerilogHierarchy(*FormatTok
);
764 assert((FormatTok
->isOneOf(tok::l_brace
, TT_MacroBlockBegin
) ||
765 (Style
.isVerilog() &&
766 (Keywords
.isVerilogBegin(*FormatTok
) || VerilogHierarchy
))) &&
767 "'{' or macro block token expected");
768 FormatToken
*Tok
= FormatTok
;
769 const bool FollowedByComment
= Tokens
->peekNextToken()->is(tok::comment
);
770 auto Index
= CurrentLines
->size();
771 const bool MacroBlock
= FormatTok
->is(TT_MacroBlockBegin
);
772 FormatTok
->setBlockKind(BK_Block
);
774 // For Whitesmiths mode, jump to the next level prior to skipping over the
776 if (!VerilogHierarchy
&& AddLevels
> 0 &&
777 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
781 size_t PPStartHash
= computePPHash();
783 const unsigned InitialLevel
= Line
->Level
;
784 if (VerilogHierarchy
) {
785 AddLevels
+= parseVerilogHierarchyHeader();
787 nextToken(/*LevelDifference=*/AddLevels
);
788 HandleVerilogBlockLabel();
791 // Bail out if there are too many levels. Otherwise, the stack might overflow.
792 if (Line
->Level
> 300)
795 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
798 size_t NbPreprocessorDirectives
=
799 !parsingPPDirective() ? PreprocessorDirectives
.size() : 0;
801 size_t OpeningLineIndex
=
802 CurrentLines
->empty()
803 ? (UnwrappedLine::kInvalidIndex
)
804 : (CurrentLines
->size() - 1 - NbPreprocessorDirectives
);
806 // Whitesmiths is weird here. The brace needs to be indented for the namespace
807 // block, but the block itself may not be indented depending on the style
808 // settings. This allows the format to back up one level in those cases.
809 if (UnindentWhitesmithsBraces
)
812 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
814 if (AddLevels
> 0u && Style
.BreakBeforeBraces
!= FormatStyle::BS_Whitesmiths
)
815 Line
->Level
+= AddLevels
;
817 FormatToken
*IfLBrace
= nullptr;
818 const bool SimpleBlock
= parseLevel(Tok
, IfKind
, &IfLBrace
);
823 if (MacroBlock
? FormatTok
->isNot(TT_MacroBlockEnd
)
824 : FormatTok
->isNot(tok::r_brace
)) {
825 Line
->Level
= InitialLevel
;
826 FormatTok
->setBlockKind(BK_Block
);
830 if (FormatTok
->is(tok::r_brace
)) {
831 FormatTok
->setBlockKind(BK_Block
);
832 if (Tok
->is(TT_NamespaceLBrace
))
833 FormatTok
->setFinalizedType(TT_NamespaceRBrace
);
836 const bool IsFunctionRBrace
=
837 FormatTok
->is(tok::r_brace
) && Tok
->is(TT_FunctionLBrace
);
839 auto RemoveBraces
= [=]() mutable {
842 assert(Tok
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
));
843 assert(FormatTok
->is(tok::r_brace
));
844 const bool WrappedOpeningBrace
= !Tok
->Previous
;
845 if (WrappedOpeningBrace
&& FollowedByComment
)
847 const bool HasRequiredIfBraces
= IfLBrace
&& !IfLBrace
->Optional
;
848 if (KeepBraces
&& !HasRequiredIfBraces
)
850 if (Tok
->isNot(TT_ElseLBrace
) || !HasRequiredIfBraces
) {
851 const FormatToken
*Previous
= Tokens
->getPreviousToken();
853 if (Previous
->is(tok::r_brace
) && !Previous
->Optional
)
856 assert(!CurrentLines
->empty());
857 auto &LastLine
= CurrentLines
->back();
858 if (LastLine
.Level
== InitialLevel
+ 1 && !mightFitOnOneLine(LastLine
))
860 if (Tok
->is(TT_ElseLBrace
))
862 if (WrappedOpeningBrace
) {
864 --Index
; // The line above the wrapped l_brace.
867 return mightFitOnOneLine((*CurrentLines
)[Index
], Tok
);
869 if (RemoveBraces()) {
870 Tok
->MatchingParen
= FormatTok
;
871 FormatTok
->MatchingParen
= Tok
;
874 size_t PPEndHash
= computePPHash();
876 // Munch the closing brace.
877 nextToken(/*LevelDifference=*/-AddLevels
);
879 // When this is a function block and there is an unnecessary semicolon
880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
882 if (Style
.RemoveSemicolon
&& IsFunctionRBrace
) {
883 while (FormatTok
->is(tok::semi
)) {
884 FormatTok
->Optional
= true;
889 HandleVerilogBlockLabel();
891 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
894 Line
->Level
= InitialLevel
;
896 if (FormatTok
->is(tok::kw_noexcept
)) {
897 // A noexcept in a requires expression.
901 if (FormatTok
->is(tok::arrow
)) {
902 // Following the } or noexcept we can find a trailing return type arrow
903 // as part of an implicit conversion constraint.
905 parseStructuralElement();
908 if (MunchSemi
&& FormatTok
->is(tok::semi
))
911 if (PPStartHash
== PPEndHash
) {
912 Line
->MatchingOpeningBlockLineIndex
= OpeningLineIndex
;
913 if (OpeningLineIndex
!= UnwrappedLine::kInvalidIndex
) {
914 // Update the opening line to add the forward reference as well
915 (*CurrentLines
)[OpeningLineIndex
].MatchingClosingBlockLineIndex
=
916 CurrentLines
->size() - 1;
923 static bool isGoogScope(const UnwrappedLine
&Line
) {
924 // FIXME: Closure-library specific stuff should not be hard-coded but be
926 if (Line
.Tokens
.size() < 4)
928 auto I
= Line
.Tokens
.begin();
929 if (I
->Tok
->TokenText
!= "goog")
932 if (I
->Tok
->isNot(tok::period
))
935 if (I
->Tok
->TokenText
!= "scope")
938 return I
->Tok
->is(tok::l_paren
);
941 static bool isIIFE(const UnwrappedLine
&Line
,
942 const AdditionalKeywords
&Keywords
) {
943 // Look for the start of an immediately invoked anonymous function.
944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
945 // This is commonly done in JavaScript to create a new, anonymous scope.
946 // Example: (function() { ... })()
947 if (Line
.Tokens
.size() < 3)
949 auto I
= Line
.Tokens
.begin();
950 if (I
->Tok
->isNot(tok::l_paren
))
953 if (I
->Tok
->isNot(Keywords
.kw_function
))
956 return I
->Tok
->is(tok::l_paren
);
959 static bool ShouldBreakBeforeBrace(const FormatStyle
&Style
,
960 const FormatToken
&InitialToken
) {
961 tok::TokenKind Kind
= InitialToken
.Tok
.getKind();
962 if (InitialToken
.is(TT_NamespaceMacro
))
963 Kind
= tok::kw_namespace
;
966 case tok::kw_namespace
:
967 return Style
.BraceWrapping
.AfterNamespace
;
969 return Style
.BraceWrapping
.AfterClass
;
971 return Style
.BraceWrapping
.AfterUnion
;
973 return Style
.BraceWrapping
.AfterStruct
;
975 return Style
.BraceWrapping
.AfterEnum
;
981 void UnwrappedLineParser::parseChildBlock() {
982 assert(FormatTok
->is(tok::l_brace
));
983 FormatTok
->setBlockKind(BK_Block
);
984 const FormatToken
*OpeningBrace
= FormatTok
;
987 bool SkipIndent
= (Style
.isJavaScript() &&
988 (isGoogScope(*Line
) || isIIFE(*Line
, Keywords
)));
989 ScopedLineState
LineState(*this);
990 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
991 /*MustBeDeclaration=*/false);
992 Line
->Level
+= SkipIndent
? 0 : 1;
993 parseLevel(OpeningBrace
);
994 flushComments(isOnNewLine(*FormatTok
));
995 Line
->Level
-= SkipIndent
? 0 : 1;
1000 void UnwrappedLineParser::parsePPDirective() {
1001 assert(FormatTok
->is(tok::hash
) && "'#' expected");
1002 ScopedMacroState
MacroState(*Line
, Tokens
, FormatTok
);
1006 if (!FormatTok
->Tok
.getIdentifierInfo()) {
1011 switch (FormatTok
->Tok
.getIdentifierInfo()->getPPKeywordID()) {
1012 case tok::pp_define
:
1016 parsePPIf(/*IfDef=*/false);
1019 case tok::pp_ifndef
:
1020 parsePPIf(/*IfDef=*/true);
1023 case tok::pp_elifdef
:
1024 case tok::pp_elifndef
:
1031 case tok::pp_pragma
:
1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable
) {
1041 size_t Line
= CurrentLines
->size();
1042 if (CurrentLines
== &PreprocessorDirectives
)
1043 Line
+= Lines
.size();
1046 (!PPStack
.empty() && PPStack
.back().Kind
== PP_Unreachable
)) {
1047 PPStack
.push_back({PP_Unreachable
, Line
});
1049 PPStack
.push_back({PP_Conditional
, Line
});
1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable
) {
1055 assert(PPBranchLevel
>= 0 && PPBranchLevel
<= (int)PPLevelBranchIndex
.size());
1056 if (PPBranchLevel
== (int)PPLevelBranchIndex
.size()) {
1057 PPLevelBranchIndex
.push_back(0);
1058 PPLevelBranchCount
.push_back(0);
1060 PPChainBranchIndex
.push(Unreachable
? -1 : 0);
1061 bool Skip
= PPLevelBranchIndex
[PPBranchLevel
] > 0;
1062 conditionalCompilationCondition(Unreachable
|| Skip
);
1065 void UnwrappedLineParser::conditionalCompilationAlternative() {
1066 if (!PPStack
.empty())
1068 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1069 if (!PPChainBranchIndex
.empty())
1070 ++PPChainBranchIndex
.top();
1071 conditionalCompilationCondition(
1072 PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty() &&
1073 PPLevelBranchIndex
[PPBranchLevel
] != PPChainBranchIndex
.top());
1076 void UnwrappedLineParser::conditionalCompilationEnd() {
1077 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1078 if (PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty()) {
1079 if (PPChainBranchIndex
.top() + 1 > PPLevelBranchCount
[PPBranchLevel
])
1080 PPLevelBranchCount
[PPBranchLevel
] = PPChainBranchIndex
.top() + 1;
1082 // Guard against #endif's without #if.
1083 if (PPBranchLevel
> -1)
1085 if (!PPChainBranchIndex
.empty())
1086 PPChainBranchIndex
.pop();
1087 if (!PPStack
.empty())
1091 void UnwrappedLineParser::parsePPIf(bool IfDef
) {
1092 bool IfNDef
= FormatTok
->is(tok::pp_ifndef
);
1094 bool Unreachable
= false;
1095 if (!IfDef
&& (FormatTok
->is(tok::kw_false
) || FormatTok
->TokenText
== "0"))
1097 if (IfDef
&& !IfNDef
&& FormatTok
->TokenText
== "SWIG")
1099 conditionalCompilationStart(Unreachable
);
1100 FormatToken
*IfCondition
= FormatTok
;
1101 // If there's a #ifndef on the first line, and the only lines before it are
1102 // comments, it could be an include guard.
1103 bool MaybeIncludeGuard
= IfNDef
;
1104 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1105 for (auto &Line
: Lines
) {
1106 if (Line
.Tokens
.front().Tok
->isNot(tok::comment
)) {
1107 MaybeIncludeGuard
= false;
1108 IncludeGuard
= IG_Rejected
;
1116 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1117 IncludeGuard
= IG_IfNdefed
;
1118 IncludeGuardToken
= IfCondition
;
1122 void UnwrappedLineParser::parsePPElse() {
1123 // If a potential include guard has an #else, it's not an include guard.
1124 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== 0)
1125 IncludeGuard
= IG_Rejected
;
1126 // Don't crash when there is an #else without an #if.
1127 assert(PPBranchLevel
>= -1);
1128 if (PPBranchLevel
== -1)
1129 conditionalCompilationStart(/*Unreachable=*/true);
1130 conditionalCompilationAlternative();
1136 void UnwrappedLineParser::parsePPEndIf() {
1137 conditionalCompilationEnd();
1139 // If the #endif of a potential include guard is the last thing in the file,
1140 // then we found an include guard.
1141 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== -1 && Tokens
->isEOF() &&
1142 Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
) {
1143 IncludeGuard
= IG_Found
;
1147 void UnwrappedLineParser::parsePPDefine() {
1150 if (!FormatTok
->Tok
.getIdentifierInfo()) {
1151 IncludeGuard
= IG_Rejected
;
1152 IncludeGuardToken
= nullptr;
1157 if (IncludeGuard
== IG_IfNdefed
&&
1158 IncludeGuardToken
->TokenText
== FormatTok
->TokenText
) {
1159 IncludeGuard
= IG_Defined
;
1160 IncludeGuardToken
= nullptr;
1161 for (auto &Line
: Lines
) {
1162 if (!Line
.Tokens
.front().Tok
->isOneOf(tok::comment
, tok::hash
)) {
1163 IncludeGuard
= IG_Rejected
;
1169 // In the context of a define, even keywords should be treated as normal
1170 // identifiers. Setting the kind to identifier is not enough, because we need
1171 // to treat additional keywords like __except as well, which are already
1172 // identifiers. Setting the identifier info to null interferes with include
1173 // guard processing above, and changes preprocessing nesting.
1174 FormatTok
->Tok
.setKind(tok::identifier
);
1175 FormatTok
->Tok
.setIdentifierInfo(Keywords
.kw_internal_ident_after_define
);
1177 if (FormatTok
->Tok
.getKind() == tok::l_paren
&&
1178 !FormatTok
->hasWhitespaceBefore()) {
1181 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1182 Line
->Level
+= PPBranchLevel
+ 1;
1186 Line
->PPLevel
= PPBranchLevel
+ (IncludeGuard
== IG_Defined
? 0 : 1);
1187 assert((int)Line
->PPLevel
>= 0);
1188 Line
->InMacroBody
= true;
1190 if (Style
.SkipMacroDefinitionBody
) {
1192 FormatTok
->Finalized
= true;
1193 FormatTok
= Tokens
->getNextToken();
1199 // Errors during a preprocessor directive can only affect the layout of the
1200 // preprocessor directive, and thus we ignore them. An alternative approach
1201 // would be to use the same approach we use on the file level (no
1202 // re-indentation if there was a structural error) within the macro
1207 void UnwrappedLineParser::parsePPPragma() {
1208 Line
->InPragmaDirective
= true;
1212 void UnwrappedLineParser::parsePPUnknown() {
1216 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1217 Line
->Level
+= PPBranchLevel
+ 1;
1221 // Here we exclude certain tokens that are not usually the first token in an
1222 // unwrapped line. This is used in attempt to distinguish macro calls without
1223 // trailing semicolons from other constructs split to several lines.
1224 static bool tokenCanStartNewLine(const FormatToken
&Tok
) {
1225 // Semicolon can be a null-statement, l_square can be a start of a macro or
1226 // a C++11 attribute, but this doesn't seem to be common.
1227 return !Tok
.isOneOf(tok::semi
, tok::l_brace
,
1228 // Tokens that can only be used as binary operators and a
1229 // part of overloaded operator names.
1230 tok::period
, tok::periodstar
, tok::arrow
, tok::arrowstar
,
1231 tok::less
, tok::greater
, tok::slash
, tok::percent
,
1232 tok::lessless
, tok::greatergreater
, tok::equal
,
1233 tok::plusequal
, tok::minusequal
, tok::starequal
,
1234 tok::slashequal
, tok::percentequal
, tok::ampequal
,
1235 tok::pipeequal
, tok::caretequal
, tok::greatergreaterequal
,
1237 // Colon is used in labels, base class lists, initializer
1238 // lists, range-based for loops, ternary operator, but
1239 // should never be the first token in an unwrapped line.
1241 // 'noexcept' is a trailing annotation.
1245 static bool mustBeJSIdent(const AdditionalKeywords
&Keywords
,
1246 const FormatToken
*FormatTok
) {
1247 // FIXME: This returns true for C/C++ keywords like 'struct'.
1248 return FormatTok
->is(tok::identifier
) &&
1249 (!FormatTok
->Tok
.getIdentifierInfo() ||
1250 !FormatTok
->isOneOf(
1251 Keywords
.kw_in
, Keywords
.kw_of
, Keywords
.kw_as
, Keywords
.kw_async
,
1252 Keywords
.kw_await
, Keywords
.kw_yield
, Keywords
.kw_finally
,
1253 Keywords
.kw_function
, Keywords
.kw_import
, Keywords
.kw_is
,
1254 Keywords
.kw_let
, Keywords
.kw_var
, tok::kw_const
,
1255 Keywords
.kw_abstract
, Keywords
.kw_extends
, Keywords
.kw_implements
,
1256 Keywords
.kw_instanceof
, Keywords
.kw_interface
,
1257 Keywords
.kw_override
, Keywords
.kw_throws
, Keywords
.kw_from
));
1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords
&Keywords
,
1261 const FormatToken
*FormatTok
) {
1262 return FormatTok
->Tok
.isLiteral() ||
1263 FormatTok
->isOneOf(tok::kw_true
, tok::kw_false
) ||
1264 mustBeJSIdent(Keywords
, FormatTok
);
1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1268 // when encountered after a value (see mustBeJSIdentOrValue).
1269 static bool isJSDeclOrStmt(const AdditionalKeywords
&Keywords
,
1270 const FormatToken
*FormatTok
) {
1271 return FormatTok
->isOneOf(
1272 tok::kw_return
, Keywords
.kw_yield
,
1274 tok::kw_if
, tok::kw_else
,
1276 tok::kw_for
, tok::kw_while
, tok::kw_do
, tok::kw_continue
, tok::kw_break
,
1278 tok::kw_switch
, tok::kw_case
,
1280 tok::kw_throw
, tok::kw_try
, tok::kw_catch
, Keywords
.kw_finally
,
1282 tok::kw_const
, tok::kw_class
, Keywords
.kw_var
, Keywords
.kw_let
,
1283 Keywords
.kw_async
, Keywords
.kw_function
,
1285 Keywords
.kw_import
, tok::kw_export
);
1288 // Checks whether a token is a type in K&R C (aka C78).
1289 static bool isC78Type(const FormatToken
&Tok
) {
1290 return Tok
.isOneOf(tok::kw_char
, tok::kw_short
, tok::kw_int
, tok::kw_long
,
1291 tok::kw_unsigned
, tok::kw_float
, tok::kw_double
,
1295 // This function checks whether a token starts the first parameter declaration
1296 // in a K&R C (aka C78) function definition, e.g.:
1302 static bool isC78ParameterDecl(const FormatToken
*Tok
, const FormatToken
*Next
,
1303 const FormatToken
*FuncName
) {
1308 if (FuncName
->isNot(tok::identifier
))
1311 const FormatToken
*Prev
= FuncName
->Previous
;
1312 if (!Prev
|| (Prev
->isNot(tok::star
) && !isC78Type(*Prev
)))
1315 if (!isC78Type(*Tok
) &&
1316 !Tok
->isOneOf(tok::kw_register
, tok::kw_struct
, tok::kw_union
)) {
1320 if (Next
->isNot(tok::star
) && !Next
->Tok
.getIdentifierInfo())
1323 Tok
= Tok
->Previous
;
1324 if (!Tok
|| Tok
->isNot(tok::r_paren
))
1327 Tok
= Tok
->Previous
;
1328 if (!Tok
|| Tok
->isNot(tok::identifier
))
1331 return Tok
->Previous
&& Tok
->Previous
->isOneOf(tok::l_paren
, tok::comma
);
1334 bool UnwrappedLineParser::parseModuleImport() {
1335 assert(FormatTok
->is(Keywords
.kw_import
) && "'import' expected");
1337 if (auto Token
= Tokens
->peekNextToken(/*SkipComment=*/true);
1338 !Token
->Tok
.getIdentifierInfo() &&
1339 !Token
->isOneOf(tok::colon
, tok::less
, tok::string_literal
)) {
1345 if (FormatTok
->is(tok::colon
)) {
1346 FormatTok
->setFinalizedType(TT_ModulePartitionColon
);
1348 // Handle import <foo/bar.h> as we would an include statement.
1349 else if (FormatTok
->is(tok::less
)) {
1351 while (!FormatTok
->isOneOf(tok::semi
, tok::greater
, tok::eof
)) {
1352 // Mark tokens up to the trailing line comments as implicit string
1354 if (FormatTok
->isNot(tok::comment
) &&
1355 !FormatTok
->TokenText
.starts_with("//")) {
1356 FormatTok
->setFinalizedType(TT_ImplicitStringLiteral
);
1361 if (FormatTok
->is(tok::semi
)) {
1372 // readTokenWithJavaScriptASI reads the next token and terminates the current
1373 // line if JavaScript Automatic Semicolon Insertion must
1374 // happen between the current token and the next token.
1376 // This method is conservative - it cannot cover all edge cases of JavaScript,
1377 // but only aims to correctly handle certain well known cases. It *must not*
1378 // return true in speculative cases.
1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1380 FormatToken
*Previous
= FormatTok
;
1382 FormatToken
*Next
= FormatTok
;
1385 CommentsBeforeNextToken
.empty()
1386 ? Next
->NewlinesBefore
== 0
1387 : CommentsBeforeNextToken
.front()->NewlinesBefore
== 0;
1391 bool PreviousMustBeValue
= mustBeJSIdentOrValue(Keywords
, Previous
);
1392 bool PreviousStartsTemplateExpr
=
1393 Previous
->is(TT_TemplateString
) && Previous
->TokenText
.ends_with("${");
1394 if (PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) {
1395 // If the line contains an '@' sign, the previous token might be an
1396 // annotation, which can precede another identifier/value.
1397 bool HasAt
= llvm::any_of(Line
->Tokens
, [](UnwrappedLineNode
&LineNode
) {
1398 return LineNode
.Tok
->is(tok::at
);
1403 if (Next
->is(tok::exclaim
) && PreviousMustBeValue
)
1404 return addUnwrappedLine();
1405 bool NextMustBeValue
= mustBeJSIdentOrValue(Keywords
, Next
);
1406 bool NextEndsTemplateExpr
=
1407 Next
->is(TT_TemplateString
) && Next
->TokenText
.starts_with("}");
1408 if (NextMustBeValue
&& !NextEndsTemplateExpr
&& !PreviousStartsTemplateExpr
&&
1409 (PreviousMustBeValue
||
1410 Previous
->isOneOf(tok::r_square
, tok::r_paren
, tok::plusplus
,
1411 tok::minusminus
))) {
1412 return addUnwrappedLine();
1414 if ((PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) &&
1415 isJSDeclOrStmt(Keywords
, Next
)) {
1416 return addUnwrappedLine();
1420 void UnwrappedLineParser::parseStructuralElement(
1421 const FormatToken
*OpeningBrace
, IfStmtKind
*IfKind
,
1422 FormatToken
**IfLeftBrace
, bool *HasDoWhile
, bool *HasLabel
) {
1423 if (Style
.Language
== FormatStyle::LK_TableGen
&&
1424 FormatTok
->is(tok::pp_include
)) {
1426 if (FormatTok
->is(tok::string_literal
))
1433 while (FormatTok
->is(tok::l_square
) && handleCppAttributes()) {
1435 } else if (Style
.isVerilog()) {
1436 if (Keywords
.isVerilogStructuredProcedure(*FormatTok
)) {
1437 parseForOrWhileLoop(/*HasParens=*/false);
1440 if (FormatTok
->isOneOf(Keywords
.kw_foreach
, Keywords
.kw_repeat
)) {
1441 parseForOrWhileLoop();
1444 if (FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
1445 Keywords
.kw_assume
, Keywords
.kw_cover
)) {
1446 parseIfThenElse(IfKind
, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1450 // Skip things that can exist before keywords like 'if' and 'case'.
1452 if (FormatTok
->isOneOf(Keywords
.kw_priority
, Keywords
.kw_unique
,
1453 Keywords
.kw_unique0
)) {
1455 } else if (FormatTok
->is(tok::l_paren
) &&
1456 Tokens
->peekNextToken()->is(tok::star
)) {
1464 // Tokens that only make sense at the beginning of a line.
1465 if (FormatTok
->isAccessSpecifierKeyword()) {
1466 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
1470 parseAccessSpecifier();
1474 switch (FormatTok
->Tok
.getKind()) {
1477 if (FormatTok
->is(tok::l_brace
)) {
1478 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1480 while (FormatTok
&& !eof()) {
1481 if (FormatTok
->is(tok::r_brace
)) {
1482 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1487 FormatTok
->Finalized
= true;
1492 case tok::kw_namespace
:
1496 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1497 // field/method declaration.
1500 FormatToken
*Tok
= parseIfThenElse(IfKind
);
1507 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1508 // field/method declaration.
1511 parseForOrWhileLoop();
1514 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1515 // field/method declaration.
1522 case tok::kw_switch
:
1523 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1524 // 'switch: string' field declaration.
1527 parseSwitch(/*IsExpr=*/false);
1529 case tok::kw_default
: {
1530 // In Verilog default along with other labels are handled in the next loop.
1531 if (Style
.isVerilog())
1533 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1534 // 'default: string' field declaration.
1537 auto *Default
= FormatTok
;
1539 if (FormatTok
->is(tok::colon
)) {
1540 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
1544 if (FormatTok
->is(tok::arrow
)) {
1545 FormatTok
->setFinalizedType(TT_CaseLabelArrow
);
1546 Default
->setFinalizedType(TT_SwitchExpressionLabel
);
1550 // e.g. "default void f() {}" in a Java interface.
1554 // Proto: there are no switch/case statements.
1555 if (Style
.Language
== FormatStyle::LK_Proto
) {
1559 if (Style
.isVerilog()) {
1564 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1565 // 'case: string' field declaration.
1573 if (FormatTok
->is(tok::kw_case
))
1578 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1579 // field/method declaration.
1584 case tok::kw_extern
:
1586 if (Style
.isVerilog()) {
1587 // In Verilog and extern module declaration looks like a start of module.
1588 // But there is no body and endmodule. So we handle it separately.
1589 if (Keywords
.isVerilogHierarchy(*FormatTok
)) {
1590 parseVerilogHierarchyHeader();
1593 } else if (FormatTok
->is(tok::string_literal
)) {
1595 if (FormatTok
->is(tok::l_brace
)) {
1596 if (Style
.BraceWrapping
.AfterExternBlock
)
1598 // Either we indent or for backwards compatibility we follow the
1599 // AfterExternBlock style.
1600 unsigned AddLevels
=
1601 (Style
.IndentExternBlock
== FormatStyle::IEBS_Indent
) ||
1602 (Style
.BraceWrapping
.AfterExternBlock
&&
1603 Style
.IndentExternBlock
==
1604 FormatStyle::IEBS_AfterExternBlock
)
1607 parseBlock(/*MustBeDeclaration=*/true, AddLevels
);
1613 case tok::kw_export
:
1614 if (Style
.isJavaScript()) {
1615 parseJavaScriptEs6ImportExport();
1620 if (FormatTok
->is(tok::kw_namespace
)) {
1624 if (FormatTok
->is(Keywords
.kw_import
) && parseModuleImport())
1628 case tok::kw_inline
:
1630 if (FormatTok
->is(tok::kw_namespace
)) {
1635 case tok::identifier
:
1636 if (FormatTok
->is(TT_ForEachMacro
)) {
1637 parseForOrWhileLoop();
1640 if (FormatTok
->is(TT_MacroBlockBegin
)) {
1641 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1642 /*MunchSemi=*/false);
1645 if (FormatTok
->is(Keywords
.kw_import
)) {
1646 if (Style
.isJavaScript()) {
1647 parseJavaScriptEs6ImportExport();
1650 if (Style
.Language
== FormatStyle::LK_Proto
) {
1652 if (FormatTok
->is(tok::kw_public
))
1654 if (FormatTok
->isNot(tok::string_literal
))
1657 if (FormatTok
->is(tok::semi
))
1662 if (IsCpp
&& parseModuleImport())
1665 if (IsCpp
&& FormatTok
->isOneOf(Keywords
.kw_signals
, Keywords
.kw_qsignals
,
1666 Keywords
.kw_slots
, Keywords
.kw_qslots
)) {
1668 if (FormatTok
->is(tok::colon
)) {
1674 if (IsCpp
&& FormatTok
->is(TT_StatementMacro
)) {
1675 parseStatementMacro();
1678 if (IsCpp
&& FormatTok
->is(TT_NamespaceMacro
)) {
1682 // In Verilog labels can be any expression, so we don't do them here.
1683 // JS doesn't have macros, and within classes colons indicate fields, not
1685 // TableGen doesn't have labels.
1686 if (!Style
.isJavaScript() && !Style
.isVerilog() && !Style
.isTableGen() &&
1687 Tokens
->peekNextToken()->is(tok::colon
) && !Line
->MustBeDeclaration
) {
1689 if (!Line
->InMacroBody
|| CurrentLines
->size() > 1)
1690 Line
->Tokens
.begin()->Tok
->MustBreakBefore
= true;
1691 FormatTok
->setFinalizedType(TT_GotoLabelColon
);
1692 parseLabel(!Style
.IndentGotoLabels
);
1697 // In all other cases, parse the declaration.
1703 for (const bool InRequiresExpression
=
1704 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
1706 if (IsCpp
&& FormatTok
->isCppAlternativeOperatorKeyword()) {
1707 if (auto *Next
= Tokens
->peekNextToken(/*SkipComment=*/true);
1708 Next
&& Next
->isBinaryOperator()) {
1709 FormatTok
->Tok
.setKind(tok::identifier
);
1712 const FormatToken
*Previous
= FormatTok
->Previous
;
1713 switch (FormatTok
->Tok
.getKind()) {
1716 if (FormatTok
->is(tok::l_brace
)) {
1720 } else if (Style
.Language
== FormatStyle::LK_Java
&&
1721 FormatTok
->is(Keywords
.kw_interface
)) {
1725 switch (FormatTok
->Tok
.getObjCKeywordID()) {
1726 case tok::objc_public
:
1727 case tok::objc_protected
:
1728 case tok::objc_package
:
1729 case tok::objc_private
:
1730 return parseAccessSpecifier();
1731 case tok::objc_interface
:
1732 case tok::objc_implementation
:
1733 return parseObjCInterfaceOrImplementation();
1734 case tok::objc_protocol
:
1735 if (parseObjCProtocol())
1739 return; // Handled by the caller.
1740 case tok::objc_optional
:
1741 case tok::objc_required
:
1745 case tok::objc_autoreleasepool
:
1747 if (FormatTok
->is(tok::l_brace
)) {
1748 if (Style
.BraceWrapping
.AfterControlStatement
==
1749 FormatStyle::BWACS_Always
) {
1756 case tok::objc_synchronized
:
1758 if (FormatTok
->is(tok::l_paren
)) {
1759 // Skip synchronization object
1762 if (FormatTok
->is(tok::l_brace
)) {
1763 if (Style
.BraceWrapping
.AfterControlStatement
==
1764 FormatStyle::BWACS_Always
) {
1772 // This branch isn't strictly necessary (the kw_try case below would
1773 // do this too after the tok::at is parsed above). But be explicit.
1780 case tok::kw_requires
: {
1782 bool ParsedClause
= parseRequires();
1791 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1792 // "template <..., enum ...>".
1793 if (Previous
&& Previous
->isOneOf(tok::less
, tok::arrow
, tok::comma
)) {
1798 // parseEnum falls through and does not yet add an unwrapped line as an
1799 // enum definition can start a structural element.
1802 // This only applies to C++ and Verilog.
1803 if (!IsCpp
&& !Style
.isVerilog()) {
1808 case tok::kw_typedef
:
1810 if (FormatTok
->isOneOf(Keywords
.kw_NS_ENUM
, Keywords
.kw_NS_OPTIONS
,
1811 Keywords
.kw_CF_ENUM
, Keywords
.kw_CF_OPTIONS
,
1812 Keywords
.kw_CF_CLOSED_ENUM
,
1813 Keywords
.kw_NS_CLOSED_ENUM
)) {
1818 if (Style
.isVerilog()) {
1823 if (Style
.isTableGen()) {
1824 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1825 // This is same as def and so on.
1830 case tok::kw_struct
:
1832 if (parseStructLike())
1835 case tok::kw_decltype
:
1837 if (FormatTok
->is(tok::l_paren
)) {
1839 assert(FormatTok
->Previous
);
1840 if (FormatTok
->Previous
->endsSequence(tok::r_paren
, tok::kw_auto
,
1842 Line
->SeenDecltypeAuto
= true;
1848 // In Java, classes have an implicit static member "class".
1849 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
&&
1850 FormatTok
->is(tok::kw_class
)) {
1853 if (Style
.isJavaScript() && FormatTok
&&
1854 FormatTok
->Tok
.getIdentifierInfo()) {
1855 // JavaScript only has pseudo keywords, all keywords are allowed to
1856 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1867 case tok::l_paren
: {
1869 // Break the unwrapped line if a K&R C function definition has a parameter
1871 if (OpeningBrace
|| !IsCpp
|| !Previous
|| eof())
1873 if (isC78ParameterDecl(FormatTok
,
1874 Tokens
->peekNextToken(/*SkipComment=*/true),
1881 case tok::kw_operator
:
1883 if (FormatTok
->isBinaryOperator())
1888 // Block return type.
1889 if (FormatTok
->Tok
.isAnyIdentifier() || FormatTok
->isTypeName(LangOpts
)) {
1891 // Return types: pointers are ok too.
1892 while (FormatTok
->is(tok::star
))
1895 // Block argument list.
1896 if (FormatTok
->is(tok::l_paren
))
1899 if (FormatTok
->is(tok::l_brace
))
1903 if (InRequiresExpression
)
1904 FormatTok
->setFinalizedType(TT_BracedListLBrace
);
1905 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1906 IsDecltypeAutoFunction
= Line
->SeenDecltypeAuto
;
1907 // A block outside of parentheses must be the last part of a
1908 // structural element.
1909 // FIXME: Figure out cases where this is not true, and add projections
1910 // for them (the one we know is missing are lambdas).
1911 if (Style
.Language
== FormatStyle::LK_Java
&&
1912 Line
->Tokens
.front().Tok
->is(Keywords
.kw_synchronized
)) {
1913 // If necessary, we could set the type to something different than
1914 // TT_FunctionLBrace.
1915 if (Style
.BraceWrapping
.AfterControlStatement
==
1916 FormatStyle::BWACS_Always
) {
1919 } else if (Style
.BraceWrapping
.AfterFunction
) {
1922 if (!Previous
|| Previous
->isNot(TT_TypeDeclarationParen
))
1923 FormatTok
->setFinalizedType(TT_FunctionLBrace
);
1925 IsDecltypeAutoFunction
= false;
1929 // Otherwise this was a braced init list, and the structural
1930 // element continues.
1933 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1934 // field/method declaration.
1938 // We arrive here when parsing function-try blocks.
1939 if (Style
.BraceWrapping
.AfterFunction
)
1943 case tok::identifier
: {
1944 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
) &&
1945 Line
->MustBeDeclaration
) {
1947 parseCSharpGenericTypeConstraint();
1950 if (FormatTok
->is(TT_MacroBlockEnd
)) {
1955 // Function declarations (as opposed to function expressions) are parsed
1956 // on their own unwrapped line by continuing this loop. Function
1957 // expressions (functions that are not on their own line) must not create
1958 // a new unwrapped line, so they are special cased below.
1959 size_t TokenCount
= Line
->Tokens
.size();
1960 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_function
) &&
1963 Line
->Tokens
.front().Tok
->isNot(Keywords
.kw_async
)))) {
1964 tryToParseJSFunction();
1967 if ((Style
.isJavaScript() || Style
.Language
== FormatStyle::LK_Java
) &&
1968 FormatTok
->is(Keywords
.kw_interface
)) {
1969 if (Style
.isJavaScript()) {
1970 // In JavaScript/TypeScript, "interface" can be used as a standalone
1971 // identifier, e.g. in `var interface = 1;`. If "interface" is
1972 // followed by another identifier, it is very like to be an actual
1973 // interface declaration.
1974 unsigned StoredPosition
= Tokens
->getPosition();
1975 FormatToken
*Next
= Tokens
->getNextToken();
1976 FormatTok
= Tokens
->setPosition(StoredPosition
);
1977 if (!mustBeJSIdent(Keywords
, Next
)) {
1987 if (Style
.isVerilog()) {
1988 if (FormatTok
->is(Keywords
.kw_table
)) {
1989 parseVerilogTable();
1992 if (Keywords
.isVerilogBegin(*FormatTok
) ||
1993 Keywords
.isVerilogHierarchy(*FormatTok
)) {
2000 if (!IsCpp
&& FormatTok
->is(Keywords
.kw_interface
)) {
2001 if (parseStructLike())
2006 if (IsCpp
&& FormatTok
->is(TT_StatementMacro
)) {
2007 parseStatementMacro();
2011 // See if the following token should start a new unwrapped line.
2012 StringRef Text
= FormatTok
->TokenText
;
2014 FormatToken
*PreviousToken
= FormatTok
;
2017 // JS doesn't have macros, and within classes colons indicate fields, not
2019 if (Style
.isJavaScript())
2022 auto OneTokenSoFar
= [&]() {
2023 auto I
= Line
->Tokens
.begin(), E
= Line
->Tokens
.end();
2024 while (I
!= E
&& I
->Tok
->is(tok::comment
))
2026 if (Style
.isVerilog())
2027 while (I
!= E
&& I
->Tok
->is(tok::hash
))
2029 return I
!= E
&& (++I
== E
);
2031 if (OneTokenSoFar()) {
2032 // Recognize function-like macro usages without trailing semicolon as
2033 // well as free-standing macros like Q_OBJECT.
2034 bool FunctionLike
= FormatTok
->is(tok::l_paren
);
2038 bool FollowedByNewline
=
2039 CommentsBeforeNextToken
.empty()
2040 ? FormatTok
->NewlinesBefore
> 0
2041 : CommentsBeforeNextToken
.front()->NewlinesBefore
> 0;
2043 if (FollowedByNewline
&& (Text
.size() >= 5 || FunctionLike
) &&
2044 tokenCanStartNewLine(*FormatTok
) && Text
== Text
.upper()) {
2045 if (PreviousToken
->isNot(TT_UntouchableMacroFunc
))
2046 PreviousToken
->setFinalizedType(TT_FunctionLikeOrFreestandingMacro
);
2054 if ((Style
.isJavaScript() || Style
.isCSharp()) &&
2055 FormatTok
->is(TT_FatArrow
)) {
2056 tryToParseChildBlock();
2061 if (FormatTok
->is(tok::l_brace
)) {
2062 // Block kind should probably be set to BK_BracedInit for any language.
2063 // C# needs this change to ensure that array initialisers and object
2064 // initialisers are indented the same way.
2065 if (Style
.isCSharp())
2066 FormatTok
->setBlockKind(BK_BracedInit
);
2067 // TableGen's defset statement has syntax of the form,
2068 // `defset <type> <name> = { <statement>... }`
2069 if (Style
.isTableGen() &&
2070 Line
->Tokens
.begin()->Tok
->is(Keywords
.kw_defset
)) {
2071 FormatTok
->setFinalizedType(TT_FunctionLBrace
);
2072 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2073 /*MunchSemi=*/false);
2079 } else if (Style
.Language
== FormatStyle::LK_Proto
&&
2080 FormatTok
->is(tok::less
)) {
2082 parseBracedList(/*IsAngleBracket=*/true);
2091 case tok::kw_switch
:
2092 if (Style
.Language
== FormatStyle::LK_Java
)
2093 parseSwitch(/*IsExpr=*/true);
2098 // Proto: there are no switch/case statements.
2099 if (Style
.Language
== FormatStyle::LK_Proto
) {
2103 // In Verilog switch is called case.
2104 if (Style
.isVerilog()) {
2109 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
2110 // 'case: string' field declaration.
2116 case tok::kw_default
:
2118 if (Style
.isVerilog()) {
2119 if (FormatTok
->is(tok::colon
)) {
2120 // The label will be handled in the next iteration.
2123 if (FormatTok
->is(Keywords
.kw_clocking
)) {
2124 // A default clocking block.
2129 parseVerilogCaseLabel();
2135 if (Style
.isVerilog()) {
2136 parseVerilogCaseLabel();
2142 if (FormatTok
->is(tok::l_brace
))
2143 FormatTok
->Previous
->setFinalizedType(TT_TemplateCloser
);
2152 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2153 assert(FormatTok
->is(tok::l_brace
));
2154 if (!Style
.isCSharp())
2156 // See if it's a property accessor.
2157 if (!FormatTok
->Previous
|| FormatTok
->Previous
->isNot(tok::identifier
))
2160 // See if we are inside a property accessor.
2162 // Record the current tokenPosition so that we can advance and
2163 // reset the current token. `Next` is not set yet so we need
2164 // another way to advance along the token stream.
2165 unsigned int StoredPosition
= Tokens
->getPosition();
2166 FormatToken
*Tok
= Tokens
->getNextToken();
2168 // A trivial property accessor is of the form:
2169 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2170 // Track these as they do not require line breaks to be introduced.
2171 bool HasSpecialAccessor
= false;
2172 bool IsTrivialPropertyAccessor
= true;
2173 bool HasAttribute
= false;
2175 if (const bool IsAccessorKeyword
=
2176 Tok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
, Keywords
.kw_set
);
2177 IsAccessorKeyword
|| Tok
->isAccessSpecifierKeyword() ||
2178 Tok
->isOneOf(tok::l_square
, tok::semi
, Keywords
.kw_internal
)) {
2179 if (IsAccessorKeyword
)
2180 HasSpecialAccessor
= true;
2181 else if (Tok
->is(tok::l_square
))
2182 HasAttribute
= true;
2183 Tok
= Tokens
->getNextToken();
2186 if (Tok
->isNot(tok::r_brace
))
2187 IsTrivialPropertyAccessor
= false;
2191 if (!HasSpecialAccessor
|| HasAttribute
) {
2192 Tokens
->setPosition(StoredPosition
);
2196 // Try to parse the property accessor:
2197 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2198 Tokens
->setPosition(StoredPosition
);
2199 if (!IsTrivialPropertyAccessor
&& Style
.BraceWrapping
.AfterFunction
)
2203 switch (FormatTok
->Tok
.getKind()) {
2206 if (FormatTok
->is(tok::equal
)) {
2207 while (!eof() && FormatTok
->isNot(tok::semi
))
2215 parseBlock(/*MustBeDeclaration=*/true);
2220 if (FormatTok
->is(TT_FatArrow
)) {
2224 } while (!eof() && FormatTok
->isNot(tok::semi
));
2233 if (FormatTok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
,
2235 !IsTrivialPropertyAccessor
) {
2236 // Non-trivial get/set needs to be on its own line.
2243 // Unreachable for well-formed code (paired '{' and '}').
2247 bool UnwrappedLineParser::tryToParseLambda() {
2248 assert(FormatTok
->is(tok::l_square
));
2253 FormatToken
&LSquare
= *FormatTok
;
2254 if (!tryToParseLambdaIntroducer())
2257 bool SeenArrow
= false;
2258 bool InTemplateParameterList
= false;
2260 while (FormatTok
->isNot(tok::l_brace
)) {
2261 if (FormatTok
->isTypeName(LangOpts
) || FormatTok
->isAttribute()) {
2265 switch (FormatTok
->Tok
.getKind()) {
2269 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference
);
2275 assert(FormatTok
->Previous
);
2276 if (FormatTok
->Previous
->is(tok::r_square
))
2277 InTemplateParameterList
= true;
2282 case tok::kw_struct
:
2284 case tok::kw_template
:
2285 case tok::kw_typename
:
2289 case tok::kw_constexpr
:
2290 case tok::kw_consteval
:
2293 case tok::identifier
:
2294 case tok::numeric_constant
:
2295 case tok::coloncolon
:
2296 case tok::kw_mutable
:
2297 case tok::kw_noexcept
:
2298 case tok::kw_static
:
2301 // Specialization of a template with an integer parameter can contain
2302 // arithmetic, logical, comparison and ternary operators.
2304 // FIXME: This also accepts sequences of operators that are not in the scope
2305 // of a template argument list.
2307 // In a C++ lambda a template type can only occur after an arrow. We use
2308 // this as an heuristic to distinguish between Objective-C expressions
2309 // followed by an `a->b` expression, such as:
2310 // ([obj func:arg] + a->b)
2311 // Otherwise the code below would parse as a lambda.
2323 case tok::equalequal
:
2324 case tok::exclaimequal
:
2325 case tok::greaterequal
:
2326 case tok::lessequal
:
2332 if (SeenArrow
|| InTemplateParameterList
) {
2338 // This might or might not actually be a lambda arrow (this could be an
2339 // ObjC method invocation followed by a dereferencing arrow). We might
2340 // reset this back to TT_Unknown in TokenAnnotator.
2341 FormatTok
->setFinalizedType(TT_LambdaArrow
);
2345 case tok::kw_requires
: {
2346 auto *RequiresToken
= FormatTok
;
2348 parseRequiresClause(RequiresToken
);
2352 if (!InTemplateParameterList
)
2361 FormatTok
->setFinalizedType(TT_LambdaLBrace
);
2362 LSquare
.setFinalizedType(TT_LambdaLSquare
);
2364 NestedLambdas
.push_back(Line
->SeenDecltypeAuto
);
2366 assert(!NestedLambdas
.empty());
2367 NestedLambdas
.pop_back();
2372 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2373 const FormatToken
*Previous
= FormatTok
->Previous
;
2374 const FormatToken
*LeftSquare
= FormatTok
;
2376 if ((Previous
&& ((Previous
->Tok
.getIdentifierInfo() &&
2377 !Previous
->isOneOf(tok::kw_return
, tok::kw_co_await
,
2378 tok::kw_co_yield
, tok::kw_co_return
)) ||
2379 Previous
->closesScope())) ||
2380 LeftSquare
->isCppStructuredBinding(IsCpp
)) {
2383 if (FormatTok
->is(tok::l_square
) || tok::isLiteral(FormatTok
->Tok
.getKind()))
2385 if (FormatTok
->is(tok::r_square
)) {
2386 const FormatToken
*Next
= Tokens
->peekNextToken(/*SkipComment=*/true);
2387 if (Next
->is(tok::greater
))
2390 parseSquare(/*LambdaIntroducer=*/true);
2394 void UnwrappedLineParser::tryToParseJSFunction() {
2395 assert(FormatTok
->is(Keywords
.kw_function
));
2396 if (FormatTok
->is(Keywords
.kw_async
))
2398 // Consume "function".
2401 // Consume * (generator function). Treat it like C++'s overloaded operators.
2402 if (FormatTok
->is(tok::star
)) {
2403 FormatTok
->setFinalizedType(TT_OverloadedOperator
);
2407 // Consume function name.
2408 if (FormatTok
->is(tok::identifier
))
2411 if (FormatTok
->isNot(tok::l_paren
))
2414 // Parse formal parameter list.
2417 if (FormatTok
->is(tok::colon
)) {
2418 // Parse a type definition.
2421 // Eat the type declaration. For braced inline object types, balance braces,
2422 // otherwise just parse until finding an l_brace for the function body.
2423 if (FormatTok
->is(tok::l_brace
))
2424 tryToParseBracedList();
2426 while (!FormatTok
->isOneOf(tok::l_brace
, tok::semi
) && !eof())
2430 if (FormatTok
->is(tok::semi
))
2436 bool UnwrappedLineParser::tryToParseBracedList() {
2437 if (FormatTok
->is(BK_Unknown
))
2438 calculateBraceTypes();
2439 assert(FormatTok
->isNot(BK_Unknown
));
2440 if (FormatTok
->is(BK_Block
))
2447 bool UnwrappedLineParser::tryToParseChildBlock() {
2448 assert(Style
.isJavaScript() || Style
.isCSharp());
2449 assert(FormatTok
->is(TT_FatArrow
));
2450 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2451 // They always start an expression or a child block if followed by a curly
2454 if (FormatTok
->isNot(tok::l_brace
))
2460 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket
, bool IsEnum
) {
2461 assert(!IsAngleBracket
|| !IsEnum
);
2462 bool HasError
= false;
2464 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2465 // replace this by using parseAssignmentExpression() inside.
2467 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
) &&
2468 tryToParseChildBlock()) {
2471 if (Style
.isJavaScript()) {
2472 if (FormatTok
->is(Keywords
.kw_function
)) {
2473 tryToParseJSFunction();
2476 if (FormatTok
->is(tok::l_brace
)) {
2477 // Could be a method inside of a braced list `{a() { return 1; }}`.
2478 if (tryToParseBracedList())
2483 if (FormatTok
->is(IsAngleBracket
? tok::greater
: tok::r_brace
)) {
2485 FormatTok
->setBlockKind(BK_Block
);
2486 if (!Style
.AllowShortEnumsOnASingleLine
)
2492 switch (FormatTok
->Tok
.getKind()) {
2494 if (Style
.isCSharp())
2501 // JavaScript can just have free standing methods and getters/setters in
2502 // object literals. Detect them by a "{" following ")".
2503 if (Style
.isJavaScript()) {
2504 if (FormatTok
->is(tok::l_brace
))
2510 // Assume there are no blocks inside a braced init list apart
2511 // from the ones we explicitly parse out (like lambdas).
2512 FormatTok
->setBlockKind(BK_BracedInit
);
2513 if (!IsAngleBracket
) {
2514 auto *Prev
= FormatTok
->Previous
;
2515 if (Prev
&& Prev
->is(tok::greater
))
2516 Prev
->setFinalizedType(TT_TemplateCloser
);
2524 parseBracedList(/*IsAngleBracket=*/true);
2527 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2528 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2529 // used for error recovery if we have otherwise determined that this is
2531 if (Style
.isJavaScript()) {
2542 if (IsEnum
&& !Style
.AllowShortEnumsOnASingleLine
)
2553 /// \brief Parses a pair of parentheses (and everything between them).
2554 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2555 /// double ampersands. This applies for all nested scopes as well.
2557 /// Returns whether there is a `=` token between the parentheses.
2558 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType
) {
2559 assert(FormatTok
->is(tok::l_paren
) && "'(' expected.");
2560 auto *LeftParen
= FormatTok
;
2561 bool SeenComma
= false;
2562 bool SeenEqual
= false;
2563 bool MightBeFoldExpr
= false;
2564 const bool MightBeStmtExpr
= Tokens
->peekNextToken()->is(tok::l_brace
);
2567 switch (FormatTok
->Tok
.getKind()) {
2569 if (parseParens(AmpAmpTokenType
))
2571 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_brace
))
2574 case tok::r_paren
: {
2575 auto *Prev
= LeftParen
->Previous
;
2576 if (!MightBeStmtExpr
&& !MightBeFoldExpr
&& !Line
->InMacroBody
&&
2577 Style
.RemoveParentheses
> FormatStyle::RPS_Leave
) {
2578 const auto *Next
= Tokens
->peekNextToken();
2579 const bool DoubleParens
=
2580 Prev
&& Prev
->is(tok::l_paren
) && Next
&& Next
->is(tok::r_paren
);
2581 const bool CommaSeparated
=
2582 !DoubleParens
&& Prev
&& Prev
->isOneOf(tok::l_paren
, tok::comma
) &&
2583 Next
&& Next
->isOneOf(tok::comma
, tok::r_paren
);
2584 const auto *PrevPrev
= Prev
? Prev
->getPreviousNonComment() : nullptr;
2585 const bool Excluded
=
2587 (PrevPrev
->isOneOf(tok::kw___attribute
, tok::kw_decltype
) ||
2590 (PrevPrev
->isOneOf(tok::kw_if
, tok::kw_while
) ||
2591 PrevPrev
->endsSequence(tok::kw_constexpr
, tok::kw_if
))));
2592 const bool ReturnParens
=
2593 Style
.RemoveParentheses
== FormatStyle::RPS_ReturnStatement
&&
2594 ((NestedLambdas
.empty() && !IsDecltypeAutoFunction
) ||
2595 (!NestedLambdas
.empty() && !NestedLambdas
.back())) &&
2596 Prev
&& Prev
->isOneOf(tok::kw_return
, tok::kw_co_return
) && Next
&&
2597 Next
->is(tok::semi
);
2598 if ((DoubleParens
&& !Excluded
) || (CommaSeparated
&& !SeenComma
) ||
2600 LeftParen
->Optional
= true;
2601 FormatTok
->Optional
= true;
2605 if (Prev
->is(TT_TypenameMacro
)) {
2606 LeftParen
->setFinalizedType(TT_TypeDeclarationParen
);
2607 FormatTok
->setFinalizedType(TT_TypeDeclarationParen
);
2608 } else if (Prev
->is(tok::greater
) && FormatTok
->Previous
== LeftParen
) {
2609 Prev
->setFinalizedType(TT_TemplateCloser
);
2616 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2622 if (!tryToParseBracedList())
2627 if (FormatTok
->is(tok::l_brace
)) {
2637 MightBeFoldExpr
= true;
2642 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
))
2643 tryToParseChildBlock();
2648 if (Style
.isJavaScript())
2649 parseRecord(/*ParseAsExpr=*/true);
2653 case tok::identifier
:
2654 if (Style
.isJavaScript() && (FormatTok
->is(Keywords
.kw_function
)))
2655 tryToParseJSFunction();
2659 case tok::kw_switch
:
2660 if (Style
.Language
== FormatStyle::LK_Java
)
2661 parseSwitch(/*IsExpr=*/true);
2665 case tok::kw_requires
: {
2666 auto RequiresToken
= FormatTok
;
2668 parseRequiresExpression(RequiresToken
);
2672 if (AmpAmpTokenType
!= TT_Unknown
)
2673 FormatTok
->setFinalizedType(AmpAmpTokenType
);
2683 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer
) {
2684 if (!LambdaIntroducer
) {
2685 assert(FormatTok
->is(tok::l_square
) && "'[' expected.");
2686 if (tryToParseLambda())
2690 switch (FormatTok
->Tok
.getKind()) {
2698 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2703 case tok::l_brace
: {
2704 if (!tryToParseBracedList())
2711 if (FormatTok
->is(tok::l_brace
)) {
2723 void UnwrappedLineParser::keepAncestorBraces() {
2724 if (!Style
.RemoveBracesLLVM
)
2727 const int MaxNestingLevels
= 2;
2728 const int Size
= NestedTooDeep
.size();
2729 if (Size
>= MaxNestingLevels
)
2730 NestedTooDeep
[Size
- MaxNestingLevels
] = true;
2731 NestedTooDeep
.push_back(false);
2734 static FormatToken
*getLastNonComment(const UnwrappedLine
&Line
) {
2735 for (const auto &Token
: llvm::reverse(Line
.Tokens
))
2736 if (Token
.Tok
->isNot(tok::comment
))
2742 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF
) {
2743 FormatToken
*Tok
= nullptr;
2745 if (Style
.InsertBraces
&& !Line
->InPPDirective
&& !Line
->Tokens
.empty() &&
2746 PreprocessorDirectives
.empty() && FormatTok
->isNot(tok::semi
)) {
2747 Tok
= Style
.BraceWrapping
.AfterControlStatement
== FormatStyle::BWACS_Never
2748 ? getLastNonComment(*Line
)
2749 : Line
->Tokens
.back().Tok
;
2751 if (Tok
->BraceCount
< 0) {
2752 assert(Tok
->BraceCount
== -1);
2755 Tok
->BraceCount
= -1;
2761 ++Line
->UnbracedBodyLevel
;
2762 parseStructuralElement();
2763 --Line
->UnbracedBodyLevel
;
2766 assert(!Line
->InPPDirective
);
2768 for (const auto &L
: llvm::reverse(*CurrentLines
)) {
2769 if (!L
.InPPDirective
&& getLastNonComment(L
)) {
2770 Tok
= L
.Tokens
.back().Tok
;
2778 if (CheckEOF
&& eof())
2784 static void markOptionalBraces(FormatToken
*LeftBrace
) {
2788 assert(LeftBrace
->is(tok::l_brace
));
2790 FormatToken
*RightBrace
= LeftBrace
->MatchingParen
;
2792 assert(!LeftBrace
->Optional
);
2796 assert(RightBrace
->is(tok::r_brace
));
2797 assert(RightBrace
->MatchingParen
== LeftBrace
);
2798 assert(LeftBrace
->Optional
== RightBrace
->Optional
);
2800 LeftBrace
->Optional
= true;
2801 RightBrace
->Optional
= true;
2804 void UnwrappedLineParser::handleAttributes() {
2805 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2806 if (FormatTok
->isAttribute())
2808 else if (FormatTok
->is(tok::l_square
))
2809 handleCppAttributes();
2812 bool UnwrappedLineParser::handleCppAttributes() {
2813 // Handle [[likely]] / [[unlikely]] attributes.
2814 assert(FormatTok
->is(tok::l_square
));
2815 if (!tryToParseSimpleAttribute())
2821 /// Returns whether \c Tok begins a block.
2822 bool UnwrappedLineParser::isBlockBegin(const FormatToken
&Tok
) const {
2823 // FIXME: rename the function or make
2824 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2825 return Style
.isVerilog() ? Keywords
.isVerilogBegin(Tok
)
2826 : Tok
.is(tok::l_brace
);
2829 FormatToken
*UnwrappedLineParser::parseIfThenElse(IfStmtKind
*IfKind
,
2831 bool IsVerilogAssert
) {
2832 assert((FormatTok
->is(tok::kw_if
) ||
2833 (Style
.isVerilog() &&
2834 FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
2835 Keywords
.kw_assume
, Keywords
.kw_cover
))) &&
2839 if (IsVerilogAssert
) {
2840 // Handle `assert #0` and `assert final`.
2841 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
2843 if (FormatTok
->is(tok::numeric_constant
))
2845 } else if (FormatTok
->isOneOf(Keywords
.kw_final
, Keywords
.kw_property
,
2846 Keywords
.kw_sequence
)) {
2851 // TableGen's if statement has the form of `if <cond> then { ... }`.
2852 if (Style
.isTableGen()) {
2853 while (!eof() && FormatTok
->isNot(Keywords
.kw_then
)) {
2854 // Simply skip until then. This range only contains a value.
2859 // Handle `if !consteval`.
2860 if (FormatTok
->is(tok::exclaim
))
2863 bool KeepIfBraces
= true;
2864 if (FormatTok
->is(tok::kw_consteval
)) {
2867 KeepIfBraces
= !Style
.RemoveBracesLLVM
|| KeepBraces
;
2868 if (FormatTok
->isOneOf(tok::kw_constexpr
, tok::identifier
))
2870 if (FormatTok
->is(tok::l_paren
)) {
2871 FormatTok
->setFinalizedType(TT_ConditionLParen
);
2876 // The then action is optional in Verilog assert statements.
2877 if (IsVerilogAssert
&& FormatTok
->is(tok::semi
)) {
2883 bool NeedsUnwrappedLine
= false;
2884 keepAncestorBraces();
2886 FormatToken
*IfLeftBrace
= nullptr;
2887 IfStmtKind IfBlockKind
= IfStmtKind::NotIf
;
2889 if (isBlockBegin(*FormatTok
)) {
2890 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
2891 IfLeftBrace
= FormatTok
;
2892 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2893 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2894 /*MunchSemi=*/true, KeepIfBraces
, &IfBlockKind
);
2895 setPreviousRBraceType(TT_ControlStatementRBrace
);
2896 if (Style
.BraceWrapping
.BeforeElse
)
2899 NeedsUnwrappedLine
= true;
2900 } else if (IsVerilogAssert
&& FormatTok
->is(tok::kw_else
)) {
2903 parseUnbracedBody();
2906 if (Style
.RemoveBracesLLVM
) {
2907 assert(!NestedTooDeep
.empty());
2908 KeepIfBraces
= KeepIfBraces
||
2909 (IfLeftBrace
&& !IfLeftBrace
->MatchingParen
) ||
2910 NestedTooDeep
.back() || IfBlockKind
== IfStmtKind::IfOnly
||
2911 IfBlockKind
== IfStmtKind::IfElseIf
;
2914 bool KeepElseBraces
= KeepIfBraces
;
2915 FormatToken
*ElseLeftBrace
= nullptr;
2916 IfStmtKind Kind
= IfStmtKind::IfOnly
;
2918 if (FormatTok
->is(tok::kw_else
)) {
2919 if (Style
.RemoveBracesLLVM
) {
2920 NestedTooDeep
.back() = false;
2921 Kind
= IfStmtKind::IfElse
;
2925 if (isBlockBegin(*FormatTok
)) {
2926 const bool FollowedByIf
= Tokens
->peekNextToken()->is(tok::kw_if
);
2927 FormatTok
->setFinalizedType(TT_ElseLBrace
);
2928 ElseLeftBrace
= FormatTok
;
2929 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2930 IfStmtKind ElseBlockKind
= IfStmtKind::NotIf
;
2931 FormatToken
*IfLBrace
=
2932 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2933 /*MunchSemi=*/true, KeepElseBraces
, &ElseBlockKind
);
2934 setPreviousRBraceType(TT_ElseRBrace
);
2935 if (FormatTok
->is(tok::kw_else
)) {
2936 KeepElseBraces
= KeepElseBraces
||
2937 ElseBlockKind
== IfStmtKind::IfOnly
||
2938 ElseBlockKind
== IfStmtKind::IfElseIf
;
2939 } else if (FollowedByIf
&& IfLBrace
&& !IfLBrace
->Optional
) {
2940 KeepElseBraces
= true;
2941 assert(ElseLeftBrace
->MatchingParen
);
2942 markOptionalBraces(ElseLeftBrace
);
2945 } else if (!IsVerilogAssert
&& FormatTok
->is(tok::kw_if
)) {
2946 const FormatToken
*Previous
= Tokens
->getPreviousToken();
2948 const bool IsPrecededByComment
= Previous
->is(tok::comment
);
2949 if (IsPrecededByComment
) {
2953 bool TooDeep
= true;
2954 if (Style
.RemoveBracesLLVM
) {
2955 Kind
= IfStmtKind::IfElseIf
;
2956 TooDeep
= NestedTooDeep
.pop_back_val();
2958 ElseLeftBrace
= parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces
);
2959 if (Style
.RemoveBracesLLVM
)
2960 NestedTooDeep
.push_back(TooDeep
);
2961 if (IsPrecededByComment
)
2964 parseUnbracedBody(/*CheckEOF=*/true);
2967 KeepIfBraces
= KeepIfBraces
|| IfBlockKind
== IfStmtKind::IfElse
;
2968 if (NeedsUnwrappedLine
)
2972 if (!Style
.RemoveBracesLLVM
)
2975 assert(!NestedTooDeep
.empty());
2976 KeepElseBraces
= KeepElseBraces
||
2977 (ElseLeftBrace
&& !ElseLeftBrace
->MatchingParen
) ||
2978 NestedTooDeep
.back();
2980 NestedTooDeep
.pop_back();
2982 if (!KeepIfBraces
&& !KeepElseBraces
) {
2983 markOptionalBraces(IfLeftBrace
);
2984 markOptionalBraces(ElseLeftBrace
);
2985 } else if (IfLeftBrace
) {
2986 FormatToken
*IfRightBrace
= IfLeftBrace
->MatchingParen
;
2988 assert(IfRightBrace
->MatchingParen
== IfLeftBrace
);
2989 assert(!IfLeftBrace
->Optional
);
2990 assert(!IfRightBrace
->Optional
);
2991 IfLeftBrace
->MatchingParen
= nullptr;
2992 IfRightBrace
->MatchingParen
= nullptr;
3002 void UnwrappedLineParser::parseTryCatch() {
3003 assert(FormatTok
->isOneOf(tok::kw_try
, tok::kw___try
) && "'try' expected");
3005 bool NeedsUnwrappedLine
= false;
3006 bool HasCtorInitializer
= false;
3007 if (FormatTok
->is(tok::colon
)) {
3008 auto *Colon
= FormatTok
;
3009 // We are in a function try block, what comes is an initializer list.
3011 if (FormatTok
->is(tok::identifier
)) {
3012 HasCtorInitializer
= true;
3013 Colon
->setFinalizedType(TT_CtorInitializerColon
);
3016 // In case identifiers were removed by clang-tidy, what might follow is
3017 // multiple commas in sequence - before the first identifier.
3018 while (FormatTok
->is(tok::comma
))
3021 while (FormatTok
->is(tok::identifier
)) {
3023 if (FormatTok
->is(tok::l_paren
)) {
3025 } else if (FormatTok
->is(tok::l_brace
)) {
3030 // In case identifiers were removed by clang-tidy, what might follow is
3031 // multiple commas in sequence - after the first identifier.
3032 while (FormatTok
->is(tok::comma
))
3036 // Parse try with resource.
3037 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_paren
))
3040 keepAncestorBraces();
3042 if (FormatTok
->is(tok::l_brace
)) {
3043 if (HasCtorInitializer
)
3044 FormatTok
->setFinalizedType(TT_FunctionLBrace
);
3045 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3047 if (Style
.BraceWrapping
.BeforeCatch
)
3050 NeedsUnwrappedLine
= true;
3051 } else if (FormatTok
->isNot(tok::kw_catch
)) {
3052 // The C++ standard requires a compound-statement after a try.
3053 // If there's none, we try to assume there's a structuralElement
3054 // and try to continue.
3057 parseStructuralElement();
3061 if (FormatTok
->is(tok::at
))
3063 if (!(FormatTok
->isOneOf(tok::kw_catch
, Keywords
.kw___except
,
3064 tok::kw___finally
) ||
3065 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
3066 FormatTok
->is(Keywords
.kw_finally
)) ||
3067 (FormatTok
->isObjCAtKeyword(tok::objc_catch
) ||
3068 FormatTok
->isObjCAtKeyword(tok::objc_finally
)))) {
3072 while (FormatTok
->isNot(tok::l_brace
)) {
3073 if (FormatTok
->is(tok::l_paren
)) {
3077 if (FormatTok
->isOneOf(tok::semi
, tok::r_brace
, tok::eof
)) {
3078 if (Style
.RemoveBracesLLVM
)
3079 NestedTooDeep
.pop_back();
3084 NeedsUnwrappedLine
= false;
3085 Line
->MustBeDeclaration
= false;
3086 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3088 if (Style
.BraceWrapping
.BeforeCatch
)
3091 NeedsUnwrappedLine
= true;
3094 if (Style
.RemoveBracesLLVM
)
3095 NestedTooDeep
.pop_back();
3097 if (NeedsUnwrappedLine
)
3101 void UnwrappedLineParser::parseNamespace() {
3102 assert(FormatTok
->isOneOf(tok::kw_namespace
, TT_NamespaceMacro
) &&
3103 "'namespace' expected");
3105 const FormatToken
&InitialToken
= *FormatTok
;
3107 if (InitialToken
.is(TT_NamespaceMacro
)) {
3110 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::kw_inline
,
3111 tok::l_square
, tok::period
, tok::l_paren
) ||
3112 (Style
.isCSharp() && FormatTok
->is(tok::kw_union
))) {
3113 if (FormatTok
->is(tok::l_square
))
3115 else if (FormatTok
->is(tok::l_paren
))
3121 if (FormatTok
->is(tok::l_brace
)) {
3122 FormatTok
->setFinalizedType(TT_NamespaceLBrace
);
3124 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
3127 unsigned AddLevels
=
3128 Style
.NamespaceIndentation
== FormatStyle::NI_All
||
3129 (Style
.NamespaceIndentation
== FormatStyle::NI_Inner
&&
3130 DeclarationScopeStack
.size() > 1)
3133 bool ManageWhitesmithsBraces
=
3135 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
3137 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3139 if (ManageWhitesmithsBraces
)
3142 // Munch the semicolon after a namespace. This is more common than one would
3143 // think. Putting the semicolon into its own line is very ugly.
3144 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/true,
3145 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3146 ManageWhitesmithsBraces
);
3148 addUnwrappedLine(AddLevels
> 0 ? LineLevel::Remove
: LineLevel::Keep
);
3150 if (ManageWhitesmithsBraces
)
3153 // FIXME: Add error handling.
3156 void UnwrappedLineParser::parseNew() {
3157 assert(FormatTok
->is(tok::kw_new
) && "'new' expected");
3160 if (Style
.isCSharp()) {
3162 // Handle constructor invocation, e.g. `new(field: value)`.
3163 if (FormatTok
->is(tok::l_paren
))
3166 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3167 if (FormatTok
->is(tok::l_brace
))
3170 if (FormatTok
->isOneOf(tok::semi
, tok::comma
))
3177 if (Style
.Language
!= FormatStyle::LK_Java
)
3180 // In Java, we can parse everything up to the parens, which aren't optional.
3182 // There should not be a ;, { or } before the new's open paren.
3183 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
, tok::r_brace
))
3186 // Consume the parens.
3187 if (FormatTok
->is(tok::l_paren
)) {
3190 // If there is a class body of an anonymous class, consume that as child.
3191 if (FormatTok
->is(tok::l_brace
))
3199 void UnwrappedLineParser::parseLoopBody(bool KeepBraces
, bool WrapRightBrace
) {
3200 keepAncestorBraces();
3202 if (isBlockBegin(*FormatTok
)) {
3203 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
3204 FormatToken
*LeftBrace
= FormatTok
;
3205 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3206 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3207 /*MunchSemi=*/true, KeepBraces
);
3208 setPreviousRBraceType(TT_ControlStatementRBrace
);
3210 assert(!NestedTooDeep
.empty());
3211 if (!NestedTooDeep
.back())
3212 markOptionalBraces(LeftBrace
);
3217 parseUnbracedBody();
3221 NestedTooDeep
.pop_back();
3224 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens
) {
3225 assert((FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
, TT_ForEachMacro
) ||
3226 (Style
.isVerilog() &&
3227 FormatTok
->isOneOf(Keywords
.kw_always
, Keywords
.kw_always_comb
,
3228 Keywords
.kw_always_ff
, Keywords
.kw_always_latch
,
3229 Keywords
.kw_final
, Keywords
.kw_initial
,
3230 Keywords
.kw_foreach
, Keywords
.kw_forever
,
3231 Keywords
.kw_repeat
))) &&
3232 "'for', 'while' or foreach macro expected");
3233 const bool KeepBraces
= !Style
.RemoveBracesLLVM
||
3234 !FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
);
3237 // JS' for await ( ...
3238 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_await
))
3240 if (IsCpp
&& FormatTok
->is(tok::kw_co_await
))
3242 if (HasParens
&& FormatTok
->is(tok::l_paren
)) {
3243 // The type is only set for Verilog basically because we were afraid to
3244 // change the existing behavior for loops. See the discussion on D121756 for
3246 if (Style
.isVerilog())
3247 FormatTok
->setFinalizedType(TT_ConditionLParen
);
3251 if (Style
.isVerilog()) {
3253 parseVerilogSensitivityList();
3254 } else if (Style
.AllowShortLoopsOnASingleLine
&& FormatTok
->is(tok::semi
) &&
3255 Tokens
->getPreviousToken()->is(tok::r_paren
)) {
3262 parseLoopBody(KeepBraces
, /*WrapRightBrace=*/true);
3265 void UnwrappedLineParser::parseDoWhile() {
3266 assert(FormatTok
->is(tok::kw_do
) && "'do' expected");
3269 parseLoopBody(/*KeepBraces=*/true, Style
.BraceWrapping
.BeforeWhile
);
3271 // FIXME: Add error handling.
3272 if (FormatTok
->isNot(tok::kw_while
)) {
3277 FormatTok
->setFinalizedType(TT_DoWhile
);
3279 // If in Whitesmiths mode, the line with the while() needs to be indented
3280 // to the same level as the block.
3281 if (Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
)
3285 parseStructuralElement();
3288 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel
) {
3290 unsigned OldLineLevel
= Line
->Level
;
3294 else if (Line
->Level
> 1 || (!Line
->InPPDirective
&& Line
->Level
> 0))
3297 if (!Style
.IndentCaseBlocks
&& CommentsBeforeNextToken
.empty() &&
3298 FormatTok
->is(tok::l_brace
)) {
3300 CompoundStatementIndenter
Indenter(this, Line
->Level
,
3301 Style
.BraceWrapping
.AfterCaseLabel
,
3302 Style
.BraceWrapping
.IndentBraces
);
3304 if (FormatTok
->is(tok::kw_break
)) {
3305 if (Style
.BraceWrapping
.AfterControlStatement
==
3306 FormatStyle::BWACS_Always
) {
3308 if (!Style
.IndentCaseBlocks
&&
3309 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
3313 parseStructuralElement();
3317 if (FormatTok
->is(tok::semi
))
3321 Line
->Level
= OldLineLevel
;
3322 if (FormatTok
->isNot(tok::l_brace
)) {
3323 parseStructuralElement();
3328 void UnwrappedLineParser::parseCaseLabel() {
3329 assert(FormatTok
->is(tok::kw_case
) && "'case' expected");
3330 auto *Case
= FormatTok
;
3332 // FIXME: fix handling of complex expressions here.
3335 if (FormatTok
->is(tok::colon
)) {
3336 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
3339 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::arrow
)) {
3340 FormatTok
->setFinalizedType(TT_CaseLabelArrow
);
3341 Case
->setFinalizedType(TT_SwitchExpressionLabel
);
3348 void UnwrappedLineParser::parseSwitch(bool IsExpr
) {
3349 assert(FormatTok
->is(tok::kw_switch
) && "'switch' expected");
3351 if (FormatTok
->is(tok::l_paren
))
3354 keepAncestorBraces();
3356 if (FormatTok
->is(tok::l_brace
)) {
3357 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3358 FormatTok
->setFinalizedType(IsExpr
? TT_SwitchExpressionLBrace
3359 : TT_ControlStatementLBrace
);
3364 setPreviousRBraceType(TT_ControlStatementRBrace
);
3370 parseStructuralElement();
3374 if (Style
.RemoveBracesLLVM
)
3375 NestedTooDeep
.pop_back();
3378 // Operators that can follow a C variable.
3379 static bool isCOperatorFollowingVar(tok::TokenKind Kind
) {
3385 case tok::caretequal
:
3389 case tok::equalequal
:
3391 case tok::exclaimequal
:
3393 case tok::greaterequal
:
3394 case tok::greatergreater
:
3395 case tok::greatergreaterequal
:
3399 case tok::lessequal
:
3401 case tok::lesslessequal
:
3403 case tok::minusequal
:
3404 case tok::minusminus
:
3406 case tok::percentequal
:
3409 case tok::pipeequal
:
3412 case tok::plusequal
:
3420 case tok::slashequal
:
3422 case tok::starequal
:
3429 void UnwrappedLineParser::parseAccessSpecifier() {
3430 FormatToken
*AccessSpecifierCandidate
= FormatTok
;
3432 // Understand Qt's slots.
3433 if (FormatTok
->isOneOf(Keywords
.kw_slots
, Keywords
.kw_qslots
))
3435 // Otherwise, we don't know what it is, and we'd better keep the next token.
3436 if (FormatTok
->is(tok::colon
)) {
3439 } else if (FormatTok
->isNot(tok::coloncolon
) &&
3440 !isCOperatorFollowingVar(FormatTok
->Tok
.getKind())) {
3441 // Not a variable name nor namespace name.
3443 } else if (AccessSpecifierCandidate
) {
3444 // Consider the access specifier to be a C identifier.
3445 AccessSpecifierCandidate
->Tok
.setKind(tok::identifier
);
3449 /// \brief Parses a requires, decides if it is a clause or an expression.
3450 /// \pre The current token has to be the requires keyword.
3451 /// \returns true if it parsed a clause.
3452 bool UnwrappedLineParser::parseRequires() {
3453 assert(FormatTok
->is(tok::kw_requires
) && "'requires' expected");
3454 auto RequiresToken
= FormatTok
;
3456 // We try to guess if it is a requires clause, or a requires expression. For
3457 // that we first consume the keyword and check the next token.
3460 switch (FormatTok
->Tok
.getKind()) {
3462 // This can only be an expression, never a clause.
3463 parseRequiresExpression(RequiresToken
);
3466 // Clauses and expression can start with a paren, it's unclear what we have.
3469 // All other tokens can only be a clause.
3470 parseRequiresClause(RequiresToken
);
3474 // Looking forward we would have to decide if there are function declaration
3475 // like arguments to the requires expression:
3477 // Or there is a constraint expression for the requires clause:
3478 // requires (C<T> && ...
3480 // But first let's look behind.
3481 auto *PreviousNonComment
= RequiresToken
->getPreviousNonComment();
3483 if (!PreviousNonComment
||
3484 PreviousNonComment
->is(TT_RequiresExpressionLBrace
)) {
3485 // If there is no token, or an expression left brace, we are a requires
3486 // clause within a requires expression.
3487 parseRequiresClause(RequiresToken
);
3491 switch (PreviousNonComment
->Tok
.getKind()) {
3494 case tok::kw_noexcept
:
3497 // This is a requires clause.
3498 parseRequiresClause(RequiresToken
);
3501 // This can be either:
3502 // if (... && requires (T t) ...)
3504 // void member(...) && requires (C<T> ...
3505 // We check the one token before that for a const:
3506 // void member(...) const && requires (C<T> ...
3507 auto PrevPrev
= PreviousNonComment
->getPreviousNonComment();
3508 if (PrevPrev
&& PrevPrev
->is(tok::kw_const
)) {
3509 parseRequiresClause(RequiresToken
);
3515 if (PreviousNonComment
->isTypeOrIdentifier(LangOpts
)) {
3516 // This is a requires clause.
3517 parseRequiresClause(RequiresToken
);
3520 // It's an expression.
3521 parseRequiresExpression(RequiresToken
);
3525 // Now we look forward and try to check if the paren content is a parameter
3526 // list. The parameters can be cv-qualified and contain references or
3528 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3529 // of stuff: typename, const, *, &, &&, ::, identifiers.
3531 unsigned StoredPosition
= Tokens
->getPosition();
3532 FormatToken
*NextToken
= Tokens
->getNextToken();
3534 auto PeekNext
= [&Lookahead
, &NextToken
, this] {
3536 NextToken
= Tokens
->getNextToken();
3539 bool FoundType
= false;
3540 bool LastWasColonColon
= false;
3543 for (; Lookahead
< 50; PeekNext()) {
3544 switch (NextToken
->Tok
.getKind()) {
3545 case tok::kw_volatile
:
3548 if (OpenAngles
== 0) {
3549 FormatTok
= Tokens
->setPosition(StoredPosition
);
3550 parseRequiresExpression(RequiresToken
);
3555 // Break out of the loop.
3558 case tok::coloncolon
:
3559 LastWasColonColon
= true;
3561 case tok::kw_decltype
:
3562 case tok::identifier
:
3563 if (FoundType
&& !LastWasColonColon
&& OpenAngles
== 0) {
3564 FormatTok
= Tokens
->setPosition(StoredPosition
);
3565 parseRequiresExpression(RequiresToken
);
3569 LastWasColonColon
= false;
3578 if (NextToken
->isTypeName(LangOpts
)) {
3579 FormatTok
= Tokens
->setPosition(StoredPosition
);
3580 parseRequiresExpression(RequiresToken
);
3586 // This seems to be a complicated expression, just assume it's a clause.
3587 FormatTok
= Tokens
->setPosition(StoredPosition
);
3588 parseRequiresClause(RequiresToken
);
3592 /// \brief Parses a requires clause.
3593 /// \param RequiresToken The requires keyword token, which starts this clause.
3594 /// \pre We need to be on the next token after the requires keyword.
3595 /// \sa parseRequiresExpression
3597 /// Returns if it either has finished parsing the clause, or it detects, that
3598 /// the clause is incorrect.
3599 void UnwrappedLineParser::parseRequiresClause(FormatToken
*RequiresToken
) {
3600 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3601 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3603 // If there is no previous token, we are within a requires expression,
3604 // otherwise we will always have the template or function declaration in front
3606 bool InRequiresExpression
=
3607 !RequiresToken
->Previous
||
3608 RequiresToken
->Previous
->is(TT_RequiresExpressionLBrace
);
3610 RequiresToken
->setFinalizedType(InRequiresExpression
3611 ? TT_RequiresClauseInARequiresExpression
3612 : TT_RequiresClause
);
3614 // NOTE: parseConstraintExpression is only ever called from this function.
3615 // It could be inlined into here.
3616 parseConstraintExpression();
3618 if (!InRequiresExpression
)
3619 FormatTok
->Previous
->ClosesRequiresClause
= true;
3622 /// \brief Parses a requires expression.
3623 /// \param RequiresToken The requires keyword token, which starts this clause.
3624 /// \pre We need to be on the next token after the requires keyword.
3625 /// \sa parseRequiresClause
3627 /// Returns if it either has finished parsing the expression, or it detects,
3628 /// that the expression is incorrect.
3629 void UnwrappedLineParser::parseRequiresExpression(FormatToken
*RequiresToken
) {
3630 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3631 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3633 RequiresToken
->setFinalizedType(TT_RequiresExpression
);
3635 if (FormatTok
->is(tok::l_paren
)) {
3636 FormatTok
->setFinalizedType(TT_RequiresExpressionLParen
);
3640 if (FormatTok
->is(tok::l_brace
)) {
3641 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
3646 /// \brief Parses a constraint expression.
3648 /// This is the body of a requires clause. It returns, when the parsing is
3649 /// complete, or the expression is incorrect.
3650 void UnwrappedLineParser::parseConstraintExpression() {
3651 // The special handling for lambdas is needed since tryToParseLambda() eats a
3652 // token and if a requires expression is the last part of a requires clause
3653 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3654 // not set on the correct token. Thus we need to be aware if we even expect a
3655 // lambda to be possible.
3656 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3657 bool LambdaNextTimeAllowed
= true;
3659 // Within lambda declarations, it is permitted to put a requires clause after
3660 // its template parameter list, which would place the requires clause right
3661 // before the parentheses of the parameters of the lambda declaration. Thus,
3662 // we track if we expect to see grouping parentheses at all.
3663 // Without this check, `requires foo<T> (T t)` in the below example would be
3664 // seen as the whole requires clause, accidentally eating the parameters of
3666 // [&]<typename T> requires foo<T> (T t) { ... };
3667 bool TopLevelParensAllowed
= true;
3670 bool LambdaThisTimeAllowed
= std::exchange(LambdaNextTimeAllowed
, false);
3672 switch (FormatTok
->Tok
.getKind()) {
3673 case tok::kw_requires
: {
3674 auto RequiresToken
= FormatTok
;
3676 parseRequiresExpression(RequiresToken
);
3681 if (!TopLevelParensAllowed
)
3683 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator
);
3684 TopLevelParensAllowed
= false;
3688 if (!LambdaThisTimeAllowed
|| !tryToParseLambda())
3695 case tok::kw_struct
:
3700 // Potential function body.
3705 FormatTok
->setFinalizedType(TT_BinaryOperator
);
3707 LambdaNextTimeAllowed
= true;
3708 TopLevelParensAllowed
= true;
3713 LambdaNextTimeAllowed
= LambdaThisTimeAllowed
;
3717 case tok::kw_sizeof
:
3719 case tok::greaterequal
:
3720 case tok::greatergreater
:
3722 case tok::lessequal
:
3724 case tok::equalequal
:
3726 case tok::exclaimequal
:
3731 LambdaNextTimeAllowed
= true;
3732 TopLevelParensAllowed
= true;
3737 case tok::numeric_constant
:
3738 case tok::coloncolon
:
3741 TopLevelParensAllowed
= false;
3746 case tok::kw_static_cast
:
3747 case tok::kw_const_cast
:
3748 case tok::kw_reinterpret_cast
:
3749 case tok::kw_dynamic_cast
:
3751 if (FormatTok
->isNot(tok::less
))
3755 parseBracedList(/*IsAngleBracket=*/true);
3759 if (!FormatTok
->Tok
.getIdentifierInfo()) {
3760 // Identifiers are part of the default case, we check for more then
3761 // tok::identifier to handle builtin type traits.
3765 // We need to differentiate identifiers for a template deduction guide,
3766 // variables, or function return types (the constraint expression has
3767 // ended before that), and basically all other cases. But it's easier to
3768 // check the other way around.
3769 assert(FormatTok
->Previous
);
3770 switch (FormatTok
->Previous
->Tok
.getKind()) {
3771 case tok::coloncolon
: // Nested identifier.
3772 case tok::ampamp
: // Start of a function or variable for the
3773 case tok::pipepipe
: // constraint expression. (binary)
3774 case tok::exclaim
: // The same as above, but unary.
3775 case tok::kw_requires
: // Initial identifier of a requires clause.
3776 case tok::equal
: // Initial identifier of a concept declaration.
3782 // Read identifier with optional template declaration.
3784 if (FormatTok
->is(tok::less
)) {
3786 parseBracedList(/*IsAngleBracket=*/true);
3788 TopLevelParensAllowed
= false;
3794 bool UnwrappedLineParser::parseEnum() {
3795 const FormatToken
&InitialToken
= *FormatTok
;
3797 // Won't be 'enum' for NS_ENUMs.
3798 if (FormatTok
->is(tok::kw_enum
))
3801 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3802 // declarations. An "enum" keyword followed by a colon would be a syntax
3803 // error and thus assume it is just an identifier.
3804 if (Style
.isJavaScript() && FormatTok
->isOneOf(tok::colon
, tok::question
))
3807 // In protobuf, "enum" can be used as a field name.
3808 if (Style
.Language
== FormatStyle::LK_Proto
&& FormatTok
->is(tok::equal
))
3812 // Eat up enum class ...
3813 if (FormatTok
->isOneOf(tok::kw_class
, tok::kw_struct
))
3815 while (FormatTok
->is(tok::l_square
))
3816 if (!handleCppAttributes())
3820 while (FormatTok
->Tok
.getIdentifierInfo() ||
3821 FormatTok
->isOneOf(tok::colon
, tok::coloncolon
, tok::less
,
3822 tok::greater
, tok::comma
, tok::question
,
3824 if (Style
.isVerilog()) {
3825 FormatTok
->setFinalizedType(TT_VerilogDimensionedTypeName
);
3827 // In Verilog the base type can have dimensions.
3828 while (FormatTok
->is(tok::l_square
))
3833 // We can have macros or attributes in between 'enum' and the enum name.
3834 if (FormatTok
->is(tok::l_paren
))
3836 if (FormatTok
->is(tok::identifier
)) {
3838 // If there are two identifiers in a row, this is likely an elaborate
3839 // return type. In Java, this can be "implements", etc.
3840 if (IsCpp
&& FormatTok
->is(tok::identifier
))
3845 // Just a declaration or something is wrong.
3846 if (FormatTok
->isNot(tok::l_brace
))
3848 FormatTok
->setFinalizedType(TT_EnumLBrace
);
3849 FormatTok
->setBlockKind(BK_Block
);
3851 if (Style
.Language
== FormatStyle::LK_Java
) {
3852 // Java enums are different.
3853 parseJavaEnumBody();
3856 if (Style
.Language
== FormatStyle::LK_Proto
) {
3857 parseBlock(/*MustBeDeclaration=*/true);
3861 if (!Style
.AllowShortEnumsOnASingleLine
&&
3862 ShouldBreakBeforeBrace(Style
, InitialToken
)) {
3867 if (!Style
.AllowShortEnumsOnASingleLine
) {
3871 bool HasError
= !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3872 if (!Style
.AllowShortEnumsOnASingleLine
)
3875 if (FormatTok
->is(tok::semi
))
3879 setPreviousRBraceType(TT_EnumRBrace
);
3882 // There is no addUnwrappedLine() here so that we fall through to parsing a
3883 // structural element afterwards. Thus, in "enum A {} n, m;",
3884 // "} n, m;" will end up in one unwrapped line.
3887 bool UnwrappedLineParser::parseStructLike() {
3888 // parseRecord falls through and does not yet add an unwrapped line as a
3889 // record declaration or definition can start a structural element.
3891 // This does not apply to Java, JavaScript and C#.
3892 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
3894 if (FormatTok
->is(tok::semi
))
3903 // A class used to set and restore the Token position when peeking
3904 // ahead in the token source.
3905 class ScopedTokenPosition
{
3906 unsigned StoredPosition
;
3907 FormatTokenSource
*Tokens
;
3910 ScopedTokenPosition(FormatTokenSource
*Tokens
) : Tokens(Tokens
) {
3911 assert(Tokens
&& "Tokens expected to not be null");
3912 StoredPosition
= Tokens
->getPosition();
3915 ~ScopedTokenPosition() { Tokens
->setPosition(StoredPosition
); }
3919 // Look to see if we have [[ by looking ahead, if
3920 // its not then rewind to the original position.
3921 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3922 ScopedTokenPosition
AutoPosition(Tokens
);
3923 FormatToken
*Tok
= Tokens
->getNextToken();
3924 // We already read the first [ check for the second.
3925 if (Tok
->isNot(tok::l_square
))
3927 // Double check that the attribute is just something
3929 while (Tok
->isNot(tok::eof
)) {
3930 if (Tok
->is(tok::r_square
))
3932 Tok
= Tokens
->getNextToken();
3934 if (Tok
->is(tok::eof
))
3936 Tok
= Tokens
->getNextToken();
3937 if (Tok
->isNot(tok::r_square
))
3939 Tok
= Tokens
->getNextToken();
3940 if (Tok
->is(tok::semi
))
3945 void UnwrappedLineParser::parseJavaEnumBody() {
3946 assert(FormatTok
->is(tok::l_brace
));
3947 const FormatToken
*OpeningBrace
= FormatTok
;
3949 // Determine whether the enum is simple, i.e. does not have a semicolon or
3950 // constants with class bodies. Simple enums can be formatted like braced
3951 // lists, contracted to a single line, etc.
3952 unsigned StoredPosition
= Tokens
->getPosition();
3953 bool IsSimple
= true;
3954 FormatToken
*Tok
= Tokens
->getNextToken();
3955 while (Tok
->isNot(tok::eof
)) {
3956 if (Tok
->is(tok::r_brace
))
3958 if (Tok
->isOneOf(tok::l_brace
, tok::semi
)) {
3962 // FIXME: This will also mark enums with braces in the arguments to enum
3963 // constants as "not simple". This is probably fine in practice, though.
3964 Tok
= Tokens
->getNextToken();
3966 FormatTok
= Tokens
->setPosition(StoredPosition
);
3975 // Parse the body of a more complex enum.
3976 // First add a line for everything up to the "{".
3981 // Parse the enum constants.
3983 if (FormatTok
->is(tok::l_brace
)) {
3984 // Parse the constant's class body.
3985 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3986 /*MunchSemi=*/false);
3987 } else if (FormatTok
->is(tok::l_paren
)) {
3989 } else if (FormatTok
->is(tok::comma
)) {
3992 } else if (FormatTok
->is(tok::semi
)) {
3996 } else if (FormatTok
->is(tok::r_brace
)) {
4004 // Parse the class body after the enum's ";" if any.
4005 parseLevel(OpeningBrace
);
4011 void UnwrappedLineParser::parseRecord(bool ParseAsExpr
) {
4012 const FormatToken
&InitialToken
= *FormatTok
;
4015 const FormatToken
*ClassName
= nullptr;
4016 bool IsDerived
= false;
4017 auto IsNonMacroIdentifier
= [](const FormatToken
*Tok
) {
4018 return Tok
->is(tok::identifier
) && Tok
->TokenText
!= Tok
->TokenText
.upper();
4020 // JavaScript/TypeScript supports anonymous classes like:
4021 // a = class extends foo { }
4022 bool JSPastExtendsOrImplements
= false;
4023 // The actual identifier can be a nested name specifier, and in macros
4024 // it is often token-pasted.
4025 // An [[attribute]] can be before the identifier.
4026 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::hashhash
,
4027 tok::kw_alignas
, tok::l_square
) ||
4028 FormatTok
->isAttribute() ||
4029 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
4030 FormatTok
->isOneOf(tok::period
, tok::comma
))) {
4031 if (Style
.isJavaScript() &&
4032 FormatTok
->isOneOf(Keywords
.kw_extends
, Keywords
.kw_implements
)) {
4033 JSPastExtendsOrImplements
= true;
4034 // JavaScript/TypeScript supports inline object types in
4035 // extends/implements positions:
4036 // class Foo implements {bar: number} { }
4038 if (FormatTok
->is(tok::l_brace
)) {
4039 tryToParseBracedList();
4043 if (FormatTok
->is(tok::l_square
) && handleCppAttributes())
4045 const auto *Previous
= FormatTok
;
4047 switch (FormatTok
->Tok
.getKind()) {
4049 // We can have macros in between 'class' and the class name.
4050 if (!IsNonMacroIdentifier(Previous
) ||
4051 // e.g. `struct macro(a) S { int i; };`
4052 Previous
->Previous
== &InitialToken
) {
4056 case tok::coloncolon
:
4060 if (!JSPastExtendsOrImplements
&& !ClassName
&&
4061 Previous
->is(tok::identifier
) && Previous
->isNot(TT_AttributeMacro
)) {
4062 ClassName
= Previous
;
4067 auto IsListInitialization
= [&] {
4068 if (!ClassName
|| IsDerived
|| JSPastExtendsOrImplements
)
4070 assert(FormatTok
->is(tok::l_brace
));
4071 const auto *Prev
= FormatTok
->getPreviousNonComment();
4073 return Prev
!= ClassName
&& Prev
->is(tok::identifier
) &&
4074 Prev
->isNot(Keywords
.kw_final
) && tryToParseBracedList();
4077 if (FormatTok
->isOneOf(tok::colon
, tok::less
)) {
4078 int AngleNestingLevel
= 0;
4080 if (FormatTok
->is(tok::less
))
4081 ++AngleNestingLevel
;
4082 else if (FormatTok
->is(tok::greater
))
4083 --AngleNestingLevel
;
4085 if (AngleNestingLevel
== 0) {
4086 if (FormatTok
->is(tok::colon
)) {
4088 } else if (FormatTok
->is(tok::identifier
) &&
4089 FormatTok
->Previous
->is(tok::coloncolon
)) {
4090 ClassName
= FormatTok
;
4091 } else if (FormatTok
->is(tok::l_paren
) &&
4092 IsNonMacroIdentifier(FormatTok
->Previous
)) {
4096 if (FormatTok
->is(tok::l_brace
)) {
4097 if (AngleNestingLevel
== 0 && IsListInitialization())
4099 calculateBraceTypes(/*ExpectClassBody=*/true);
4100 if (!tryToParseBracedList())
4103 if (FormatTok
->is(tok::l_square
)) {
4104 FormatToken
*Previous
= FormatTok
->Previous
;
4105 if (!Previous
|| (Previous
->isNot(tok::r_paren
) &&
4106 !Previous
->isTypeOrIdentifier(LangOpts
))) {
4107 // Don't try parsing a lambda if we had a closing parenthesis before,
4108 // it was probably a pointer to an array: int (*)[].
4109 if (!tryToParseLambda())
4116 if (FormatTok
->is(tok::semi
))
4118 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
)) {
4121 parseCSharpGenericTypeConstraint();
4128 auto GetBraceTypes
=
4129 [](const FormatToken
&RecordTok
) -> std::pair
<TokenType
, TokenType
> {
4130 switch (RecordTok
.Tok
.getKind()) {
4132 return {TT_ClassLBrace
, TT_ClassRBrace
};
4133 case tok::kw_struct
:
4134 return {TT_StructLBrace
, TT_StructRBrace
};
4136 return {TT_UnionLBrace
, TT_UnionRBrace
};
4138 // Useful for e.g. interface.
4139 return {TT_RecordLBrace
, TT_RecordRBrace
};
4142 if (FormatTok
->is(tok::l_brace
)) {
4143 if (IsListInitialization())
4145 auto [OpenBraceType
, ClosingBraceType
] = GetBraceTypes(InitialToken
);
4146 FormatTok
->setFinalizedType(OpenBraceType
);
4150 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
4153 unsigned AddLevels
= Style
.IndentAccessModifiers
? 2u : 1u;
4154 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/false);
4156 setPreviousRBraceType(ClosingBraceType
);
4158 // There is no addUnwrappedLine() here so that we fall through to parsing a
4159 // structural element afterwards. Thus, in "class A {} n, m;",
4160 // "} n, m;" will end up in one unwrapped line.
4163 void UnwrappedLineParser::parseObjCMethod() {
4164 assert(FormatTok
->isOneOf(tok::l_paren
, tok::identifier
) &&
4165 "'(' or identifier expected.");
4167 if (FormatTok
->is(tok::semi
)) {
4171 } else if (FormatTok
->is(tok::l_brace
)) {
4172 if (Style
.BraceWrapping
.AfterFunction
)
4183 void UnwrappedLineParser::parseObjCProtocolList() {
4184 assert(FormatTok
->is(tok::less
) && "'<' expected.");
4187 // Early exit in case someone forgot a close angle.
4188 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4189 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4192 } while (!eof() && FormatTok
->isNot(tok::greater
));
4193 nextToken(); // Skip '>'.
4196 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4198 if (FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4203 if (FormatTok
->is(tok::l_brace
)) {
4205 // In ObjC interfaces, nothing should be following the "}".
4207 } else if (FormatTok
->is(tok::r_brace
)) {
4208 // Ignore stray "}". parseStructuralElement doesn't consume them.
4211 } else if (FormatTok
->isOneOf(tok::minus
, tok::plus
)) {
4215 parseStructuralElement();
4220 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4221 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_interface
||
4222 FormatTok
->Tok
.getObjCKeywordID() == tok::objc_implementation
);
4224 nextToken(); // interface name
4226 // @interface can be followed by a lightweight generic
4227 // specialization list, then either a base class or a category.
4228 if (FormatTok
->is(tok::less
))
4229 parseObjCLightweightGenerics();
4230 if (FormatTok
->is(tok::colon
)) {
4232 nextToken(); // base class name
4233 // The base class can also have lightweight generics applied to it.
4234 if (FormatTok
->is(tok::less
))
4235 parseObjCLightweightGenerics();
4236 } else if (FormatTok
->is(tok::l_paren
)) {
4237 // Skip category, if present.
4241 if (FormatTok
->is(tok::less
))
4242 parseObjCProtocolList();
4244 if (FormatTok
->is(tok::l_brace
)) {
4245 if (Style
.BraceWrapping
.AfterObjCDeclaration
)
4247 parseBlock(/*MustBeDeclaration=*/true);
4250 // With instance variables, this puts '}' on its own line. Without instance
4251 // variables, this ends the @interface line.
4254 parseObjCUntilAtEnd();
4257 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4258 assert(FormatTok
->is(tok::less
));
4259 // Unlike protocol lists, generic parameterizations support
4262 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4263 // NSObject <NSCopying, NSSecureCoding>
4265 // so we need to count how many open angles we have left.
4266 unsigned NumOpenAngles
= 1;
4269 // Early exit in case someone forgot a close angle.
4270 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4271 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4274 if (FormatTok
->is(tok::less
)) {
4276 } else if (FormatTok
->is(tok::greater
)) {
4277 assert(NumOpenAngles
> 0 && "'>' makes NumOpenAngles negative");
4280 } while (!eof() && NumOpenAngles
!= 0);
4281 nextToken(); // Skip '>'.
4284 // Returns true for the declaration/definition form of @protocol,
4285 // false for the expression form.
4286 bool UnwrappedLineParser::parseObjCProtocol() {
4287 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_protocol
);
4290 if (FormatTok
->is(tok::l_paren
)) {
4291 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4295 // The definition/declaration form,
4297 // - (int)someMethod;
4300 nextToken(); // protocol name
4302 if (FormatTok
->is(tok::less
))
4303 parseObjCProtocolList();
4305 // Check for protocol declaration.
4306 if (FormatTok
->is(tok::semi
)) {
4313 parseObjCUntilAtEnd();
4317 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4318 bool IsImport
= FormatTok
->is(Keywords
.kw_import
);
4319 assert(IsImport
|| FormatTok
->is(tok::kw_export
));
4322 // Consume the "default" in "export default class/function".
4323 if (FormatTok
->is(tok::kw_default
))
4326 // Consume "async function", "function" and "default function", so that these
4327 // get parsed as free-standing JS functions, i.e. do not require a trailing
4329 if (FormatTok
->is(Keywords
.kw_async
))
4331 if (FormatTok
->is(Keywords
.kw_function
)) {
4336 // For imports, `export *`, `export {...}`, consume the rest of the line up
4337 // to the terminating `;`. For everything else, just return and continue
4338 // parsing the structural element, i.e. the declaration or expression for
4339 // `export default`.
4340 if (!IsImport
&& !FormatTok
->isOneOf(tok::l_brace
, tok::star
) &&
4341 !FormatTok
->isStringLiteral() &&
4342 !(FormatTok
->is(Keywords
.kw_type
) &&
4343 Tokens
->peekNextToken()->isOneOf(tok::l_brace
, tok::star
))) {
4348 if (FormatTok
->is(tok::semi
))
4350 if (Line
->Tokens
.empty()) {
4351 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4352 // import statement should terminate.
4355 if (FormatTok
->is(tok::l_brace
)) {
4356 FormatTok
->setBlockKind(BK_Block
);
4365 void UnwrappedLineParser::parseStatementMacro() {
4367 if (FormatTok
->is(tok::l_paren
))
4369 if (FormatTok
->is(tok::semi
))
4374 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4375 // consume things like a::`b.c[d:e] or a::*
4377 if (FormatTok
->isOneOf(tok::star
, tok::period
, tok::periodstar
,
4378 tok::coloncolon
, tok::hash
) ||
4379 Keywords
.isVerilogIdentifier(*FormatTok
)) {
4381 } else if (FormatTok
->is(tok::l_square
)) {
4389 void UnwrappedLineParser::parseVerilogSensitivityList() {
4390 if (FormatTok
->isNot(tok::at
))
4393 // A block event expression has 2 at signs.
4394 if (FormatTok
->is(tok::at
))
4396 switch (FormatTok
->Tok
.getKind()) {
4404 parseVerilogHierarchyIdentifier();
4409 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4410 unsigned AddLevels
= 0;
4412 if (FormatTok
->is(Keywords
.kw_clocking
)) {
4414 if (Keywords
.isVerilogIdentifier(*FormatTok
))
4416 parseVerilogSensitivityList();
4417 if (FormatTok
->is(tok::semi
))
4419 } else if (FormatTok
->isOneOf(tok::kw_case
, Keywords
.kw_casex
,
4420 Keywords
.kw_casez
, Keywords
.kw_randcase
,
4421 Keywords
.kw_randsequence
)) {
4422 if (Style
.IndentCaseLabels
)
4425 if (FormatTok
->is(tok::l_paren
)) {
4426 FormatTok
->setFinalizedType(TT_ConditionLParen
);
4429 if (FormatTok
->isOneOf(Keywords
.kw_inside
, Keywords
.kw_matches
))
4431 // The case header has no semicolon.
4435 // all the words like the name of the module and specifiers like
4436 // "automatic" and the width of function return type
4438 if (FormatTok
->is(tok::l_square
)) {
4439 auto Prev
= FormatTok
->getPreviousNonComment();
4440 if (Prev
&& Keywords
.isVerilogIdentifier(*Prev
))
4441 Prev
->setFinalizedType(TT_VerilogDimensionedTypeName
);
4443 } else if (Keywords
.isVerilogIdentifier(*FormatTok
) ||
4444 FormatTok
->isOneOf(tok::hash
, tok::hashhash
, tok::coloncolon
,
4445 Keywords
.kw_automatic
, tok::kw_static
)) {
4452 auto NewLine
= [this]() {
4454 Line
->IsContinuation
= true;
4458 while (FormatTok
->is(Keywords
.kw_import
)) {
4461 parseVerilogHierarchyIdentifier();
4462 if (FormatTok
->is(tok::semi
))
4466 // parameters and ports
4467 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
4470 if (FormatTok
->is(tok::l_paren
)) {
4471 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4475 if (FormatTok
->is(tok::l_paren
)) {
4477 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4481 // extends and implements
4482 if (FormatTok
->is(Keywords
.kw_extends
)) {
4485 parseVerilogHierarchyIdentifier();
4486 if (FormatTok
->is(tok::l_paren
))
4489 if (FormatTok
->is(Keywords
.kw_implements
)) {
4493 parseVerilogHierarchyIdentifier();
4494 } while (FormatTok
->is(tok::comma
));
4497 // Coverage event for cover groups.
4498 if (FormatTok
->is(tok::at
)) {
4500 parseVerilogSensitivityList();
4503 if (FormatTok
->is(tok::semi
))
4504 nextToken(/*LevelDifference=*/1);
4511 void UnwrappedLineParser::parseVerilogTable() {
4512 assert(FormatTok
->is(Keywords
.kw_table
));
4513 nextToken(/*LevelDifference=*/1);
4516 auto InitialLevel
= Line
->Level
++;
4517 while (!eof() && !Keywords
.isVerilogEnd(*FormatTok
)) {
4518 FormatToken
*Tok
= FormatTok
;
4520 if (Tok
->is(tok::semi
))
4522 else if (Tok
->isOneOf(tok::star
, tok::colon
, tok::question
, tok::minus
))
4523 Tok
->setFinalizedType(TT_VerilogTableItem
);
4525 Line
->Level
= InitialLevel
;
4526 nextToken(/*LevelDifference=*/-1);
4530 void UnwrappedLineParser::parseVerilogCaseLabel() {
4531 // The label will get unindented in AnnotatingParser. If there are no leading
4532 // spaces, indent the rest here so that things inside the block will be
4533 // indented relative to things outside. We don't use parseLabel because we
4534 // don't know whether this colon is a label or a ternary expression at this
4536 auto OrigLevel
= Line
->Level
;
4537 auto FirstLine
= CurrentLines
->size();
4538 if (Line
->Level
== 0 || (Line
->InPPDirective
&& Line
->Level
<= 1))
4540 else if (!Style
.IndentCaseBlocks
&& Keywords
.isVerilogBegin(*FormatTok
))
4542 parseStructuralElement();
4543 // Restore the indentation in both the new line and the line that has the
4545 if (CurrentLines
->size() > FirstLine
)
4546 (*CurrentLines
)[FirstLine
].Level
= OrigLevel
;
4547 Line
->Level
= OrigLevel
;
4550 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine
&Line
) const {
4551 for (const auto &N
: Line
.Tokens
) {
4552 if (N
.Tok
->MacroCtx
)
4554 for (const UnwrappedLine
&Child
: N
.Children
)
4555 if (containsExpansion(Child
))
4561 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel
) {
4562 if (Line
->Tokens
.empty())
4565 if (!parsingPPDirective()) {
4566 llvm::dbgs() << "Adding unwrapped line:\n";
4567 printDebugInfo(*Line
);
4571 // If this line closes a block when in Whitesmiths mode, remember that
4572 // information so that the level can be decreased after the line is added.
4573 // This has to happen after the addition of the line since the line itself
4574 // needs to be indented.
4575 bool ClosesWhitesmithsBlock
=
4576 Line
->MatchingOpeningBlockLineIndex
!= UnwrappedLine::kInvalidIndex
&&
4577 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
4579 // If the current line was expanded from a macro call, we use it to
4580 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4581 // line and the unexpanded token stream.
4582 if (!parsingPPDirective() && !InExpansion
&& containsExpansion(*Line
)) {
4584 Reconstruct
.emplace(Line
->Level
, Unexpanded
);
4585 Reconstruct
->addLine(*Line
);
4587 // While the reconstructed unexpanded lines are stored in the normal
4588 // flow of lines, the expanded lines are stored on the side to be analyzed
4589 // in an extra step.
4590 CurrentExpandedLines
.push_back(std::move(*Line
));
4592 if (Reconstruct
->finished()) {
4593 UnwrappedLine Reconstructed
= std::move(*Reconstruct
).takeResult();
4594 assert(!Reconstructed
.Tokens
.empty() &&
4595 "Reconstructed must at least contain the macro identifier.");
4596 assert(!parsingPPDirective());
4598 llvm::dbgs() << "Adding unexpanded line:\n";
4599 printDebugInfo(Reconstructed
);
4601 ExpandedLines
[Reconstructed
.Tokens
.begin()->Tok
] = CurrentExpandedLines
;
4602 Lines
.push_back(std::move(Reconstructed
));
4603 CurrentExpandedLines
.clear();
4604 Reconstruct
.reset();
4607 // At the top level we only get here when no unexpansion is going on, or
4608 // when conditional formatting led to unfinished macro reconstructions.
4609 assert(!Reconstruct
|| (CurrentLines
!= &Lines
) || PPStack
.size() > 0);
4610 CurrentLines
->push_back(std::move(*Line
));
4612 Line
->Tokens
.clear();
4613 Line
->MatchingOpeningBlockLineIndex
= UnwrappedLine::kInvalidIndex
;
4614 Line
->FirstStartColumn
= 0;
4615 Line
->IsContinuation
= false;
4616 Line
->SeenDecltypeAuto
= false;
4618 if (ClosesWhitesmithsBlock
&& AdjustLevel
== LineLevel::Remove
)
4620 if (!parsingPPDirective() && !PreprocessorDirectives
.empty()) {
4621 CurrentLines
->append(
4622 std::make_move_iterator(PreprocessorDirectives
.begin()),
4623 std::make_move_iterator(PreprocessorDirectives
.end()));
4624 PreprocessorDirectives
.clear();
4626 // Disconnect the current token from the last token on the previous line.
4627 FormatTok
->Previous
= nullptr;
4630 bool UnwrappedLineParser::eof() const { return FormatTok
->is(tok::eof
); }
4632 bool UnwrappedLineParser::isOnNewLine(const FormatToken
&FormatTok
) {
4633 return (Line
->InPPDirective
|| FormatTok
.HasUnescapedNewline
) &&
4634 FormatTok
.NewlinesBefore
> 0;
4637 // Checks if \p FormatTok is a line comment that continues the line comment
4638 // section on \p Line.
4640 continuesLineCommentSection(const FormatToken
&FormatTok
,
4641 const UnwrappedLine
&Line
, const FormatStyle
&Style
,
4642 const llvm::Regex
&CommentPragmasRegex
) {
4643 if (Line
.Tokens
.empty() || Style
.ReflowComments
!= FormatStyle::RCS_Always
)
4646 StringRef IndentContent
= FormatTok
.TokenText
;
4647 if (FormatTok
.TokenText
.starts_with("//") ||
4648 FormatTok
.TokenText
.starts_with("/*")) {
4649 IndentContent
= FormatTok
.TokenText
.substr(2);
4651 if (CommentPragmasRegex
.match(IndentContent
))
4654 // If Line starts with a line comment, then FormatTok continues the comment
4655 // section if its original column is greater or equal to the original start
4656 // column of the line.
4658 // Define the min column token of a line as follows: if a line ends in '{' or
4659 // contains a '{' followed by a line comment, then the min column token is
4660 // that '{'. Otherwise, the min column token of the line is the first token of
4663 // If Line starts with a token other than a line comment, then FormatTok
4664 // continues the comment section if its original column is greater than the
4665 // original start column of the min column token of the line.
4667 // For example, the second line comment continues the first in these cases:
4679 // int i; // first line
4684 // do { // first line
4697 // The second line comment doesn't continue the first in these cases:
4704 // int i; // first line
4709 // do { // first line
4720 const FormatToken
*MinColumnToken
= Line
.Tokens
.front().Tok
;
4722 // Scan for '{//'. If found, use the column of '{' as a min column for line
4723 // comment section continuation.
4724 const FormatToken
*PreviousToken
= nullptr;
4725 for (const UnwrappedLineNode
&Node
: Line
.Tokens
) {
4726 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
) &&
4727 isLineComment(*Node
.Tok
)) {
4728 MinColumnToken
= PreviousToken
;
4731 PreviousToken
= Node
.Tok
;
4733 // Grab the last newline preceding a token in this unwrapped line.
4734 if (Node
.Tok
->NewlinesBefore
> 0)
4735 MinColumnToken
= Node
.Tok
;
4737 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
))
4738 MinColumnToken
= PreviousToken
;
4740 return continuesLineComment(FormatTok
, /*Previous=*/Line
.Tokens
.back().Tok
,
4744 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext
) {
4745 bool JustComments
= Line
->Tokens
.empty();
4746 for (FormatToken
*Tok
: CommentsBeforeNextToken
) {
4747 // Line comments that belong to the same line comment section are put on the
4748 // same line since later we might want to reflow content between them.
4749 // Additional fine-grained breaking of line comment sections is controlled
4750 // by the class BreakableLineCommentSection in case it is desirable to keep
4751 // several line comment sections in the same unwrapped line.
4753 // FIXME: Consider putting separate line comment sections as children to the
4754 // unwrapped line instead.
4755 Tok
->ContinuesLineCommentSection
=
4756 continuesLineCommentSection(*Tok
, *Line
, Style
, CommentPragmasRegex
);
4757 if (isOnNewLine(*Tok
) && JustComments
&& !Tok
->ContinuesLineCommentSection
)
4761 if (NewlineBeforeNext
&& JustComments
)
4763 CommentsBeforeNextToken
.clear();
4766 void UnwrappedLineParser::nextToken(int LevelDifference
) {
4769 flushComments(isOnNewLine(*FormatTok
));
4770 pushToken(FormatTok
);
4771 FormatToken
*Previous
= FormatTok
;
4772 if (!Style
.isJavaScript())
4773 readToken(LevelDifference
);
4775 readTokenWithJavaScriptASI();
4776 FormatTok
->Previous
= Previous
;
4777 if (Style
.isVerilog()) {
4778 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4779 // keywords like `begin`, we can't treat them the same as left braces
4780 // because some contexts require one of them. For example structs use
4781 // braces and if blocks use keywords, and a left brace can occur in an if
4782 // statement, but it is not a block. For keywords like `end`, we simply
4783 // treat them the same as right braces.
4784 if (Keywords
.isVerilogEnd(*FormatTok
))
4785 FormatTok
->Tok
.setKind(tok::r_brace
);
4789 void UnwrappedLineParser::distributeComments(
4790 const SmallVectorImpl
<FormatToken
*> &Comments
,
4791 const FormatToken
*NextTok
) {
4792 // Whether or not a line comment token continues a line is controlled by
4793 // the method continuesLineCommentSection, with the following caveat:
4795 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4796 // that each comment line from the trail is aligned with the next token, if
4797 // the next token exists. If a trail exists, the beginning of the maximal
4798 // trail is marked as a start of a new comment section.
4800 // For example in this code:
4802 // int a; // line about a
4803 // // line 1 about b
4804 // // line 2 about b
4807 // the two lines about b form a maximal trail, so there are two sections, the
4808 // first one consisting of the single comment "// line about a" and the
4809 // second one consisting of the next two comments.
4810 if (Comments
.empty())
4812 bool ShouldPushCommentsInCurrentLine
= true;
4813 bool HasTrailAlignedWithNextToken
= false;
4814 unsigned StartOfTrailAlignedWithNextToken
= 0;
4816 // We are skipping the first element intentionally.
4817 for (unsigned i
= Comments
.size() - 1; i
> 0; --i
) {
4818 if (Comments
[i
]->OriginalColumn
== NextTok
->OriginalColumn
) {
4819 HasTrailAlignedWithNextToken
= true;
4820 StartOfTrailAlignedWithNextToken
= i
;
4824 for (unsigned i
= 0, e
= Comments
.size(); i
< e
; ++i
) {
4825 FormatToken
*FormatTok
= Comments
[i
];
4826 if (HasTrailAlignedWithNextToken
&& i
== StartOfTrailAlignedWithNextToken
) {
4827 FormatTok
->ContinuesLineCommentSection
= false;
4829 FormatTok
->ContinuesLineCommentSection
= continuesLineCommentSection(
4830 *FormatTok
, *Line
, Style
, CommentPragmasRegex
);
4832 if (!FormatTok
->ContinuesLineCommentSection
&&
4833 (isOnNewLine(*FormatTok
) || FormatTok
->IsFirst
)) {
4834 ShouldPushCommentsInCurrentLine
= false;
4836 if (ShouldPushCommentsInCurrentLine
)
4837 pushToken(FormatTok
);
4839 CommentsBeforeNextToken
.push_back(FormatTok
);
4843 void UnwrappedLineParser::readToken(int LevelDifference
) {
4844 SmallVector
<FormatToken
*, 1> Comments
;
4845 bool PreviousWasComment
= false;
4846 bool FirstNonCommentOnLine
= false;
4848 FormatTok
= Tokens
->getNextToken();
4850 while (FormatTok
->isOneOf(TT_ConflictStart
, TT_ConflictEnd
,
4851 TT_ConflictAlternative
)) {
4852 if (FormatTok
->is(TT_ConflictStart
))
4853 conditionalCompilationStart(/*Unreachable=*/false);
4854 else if (FormatTok
->is(TT_ConflictAlternative
))
4855 conditionalCompilationAlternative();
4856 else if (FormatTok
->is(TT_ConflictEnd
))
4857 conditionalCompilationEnd();
4858 FormatTok
= Tokens
->getNextToken();
4859 FormatTok
->MustBreakBefore
= true;
4860 FormatTok
->MustBreakBeforeFinalized
= true;
4863 auto IsFirstNonCommentOnLine
= [](bool FirstNonCommentOnLine
,
4864 const FormatToken
&Tok
,
4865 bool PreviousWasComment
) {
4866 auto IsFirstOnLine
= [](const FormatToken
&Tok
) {
4867 return Tok
.HasUnescapedNewline
|| Tok
.IsFirst
;
4870 // Consider preprocessor directives preceded by block comments as first
4872 if (PreviousWasComment
)
4873 return FirstNonCommentOnLine
|| IsFirstOnLine(Tok
);
4874 return IsFirstOnLine(Tok
);
4877 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4878 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4879 PreviousWasComment
= FormatTok
->is(tok::comment
);
4881 while (!Line
->InPPDirective
&& FormatTok
->is(tok::hash
) &&
4882 (!Style
.isVerilog() ||
4883 Keywords
.isVerilogPPDirective(*Tokens
->peekNextToken())) &&
4884 FirstNonCommentOnLine
) {
4885 distributeComments(Comments
, FormatTok
);
4887 // If there is an unfinished unwrapped line, we flush the preprocessor
4888 // directives only after that unwrapped line was finished later.
4889 bool SwitchToPreprocessorLines
= !Line
->Tokens
.empty();
4890 ScopedLineState
BlockState(*this, SwitchToPreprocessorLines
);
4891 assert((LevelDifference
>= 0 ||
4892 static_cast<unsigned>(-LevelDifference
) <= Line
->Level
) &&
4893 "LevelDifference makes Line->Level negative");
4894 Line
->Level
+= LevelDifference
;
4895 // Comments stored before the preprocessor directive need to be output
4896 // before the preprocessor directive, at the same level as the
4897 // preprocessor directive, as we consider them to apply to the directive.
4898 if (Style
.IndentPPDirectives
== FormatStyle::PPDIS_BeforeHash
&&
4899 PPBranchLevel
> 0) {
4900 Line
->Level
+= PPBranchLevel
;
4902 assert(Line
->Level
>= Line
->UnbracedBodyLevel
);
4903 Line
->Level
-= Line
->UnbracedBodyLevel
;
4904 flushComments(isOnNewLine(*FormatTok
));
4906 PreviousWasComment
= FormatTok
->is(tok::comment
);
4907 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4908 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4911 if (!PPStack
.empty() && (PPStack
.back().Kind
== PP_Unreachable
) &&
4912 !Line
->InPPDirective
) {
4916 if (FormatTok
->is(tok::identifier
) &&
4917 Macros
.defined(FormatTok
->TokenText
) &&
4918 // FIXME: Allow expanding macros in preprocessor directives.
4919 !Line
->InPPDirective
) {
4920 FormatToken
*ID
= FormatTok
;
4921 unsigned Position
= Tokens
->getPosition();
4923 // To correctly parse the code, we need to replace the tokens of the macro
4924 // call with its expansion.
4925 auto PreCall
= std::move(Line
);
4926 Line
.reset(new UnwrappedLine
);
4927 bool OldInExpansion
= InExpansion
;
4929 // We parse the macro call into a new line.
4930 auto Args
= parseMacroCall();
4931 InExpansion
= OldInExpansion
;
4932 assert(Line
->Tokens
.front().Tok
== ID
);
4933 // And remember the unexpanded macro call tokens.
4934 auto UnexpandedLine
= std::move(Line
);
4935 // Reset to the old line.
4936 Line
= std::move(PreCall
);
4939 llvm::dbgs() << "Macro call: " << ID
->TokenText
<< "(";
4941 llvm::dbgs() << "(";
4942 for (const auto &Arg
: Args
.value())
4943 for (const auto &T
: Arg
)
4944 llvm::dbgs() << T
->TokenText
<< " ";
4945 llvm::dbgs() << ")";
4947 llvm::dbgs() << "\n";
4949 if (Macros
.objectLike(ID
->TokenText
) && Args
&&
4950 !Macros
.hasArity(ID
->TokenText
, Args
->size())) {
4951 // The macro is either
4952 // - object-like, but we got argumnets, or
4953 // - overloaded to be both object-like and function-like, but none of
4954 // the function-like arities match the number of arguments.
4955 // Thus, expand as object-like macro.
4956 LLVM_DEBUG(llvm::dbgs()
4957 << "Macro \"" << ID
->TokenText
4958 << "\" not overloaded for arity " << Args
->size()
4959 << "or not function-like, using object-like overload.");
4961 UnexpandedLine
->Tokens
.resize(1);
4962 Tokens
->setPosition(Position
);
4964 assert(!Args
&& Macros
.objectLike(ID
->TokenText
));
4966 if ((!Args
&& Macros
.objectLike(ID
->TokenText
)) ||
4967 (Args
&& Macros
.hasArity(ID
->TokenText
, Args
->size()))) {
4968 // Next, we insert the expanded tokens in the token stream at the
4969 // current position, and continue parsing.
4970 Unexpanded
[ID
] = std::move(UnexpandedLine
);
4971 SmallVector
<FormatToken
*, 8> Expansion
=
4972 Macros
.expand(ID
, std::move(Args
));
4973 if (!Expansion
.empty())
4974 FormatTok
= Tokens
->insertTokens(Expansion
);
4977 llvm::dbgs() << "Expanded: ";
4978 for (const auto &T
: Expansion
)
4979 llvm::dbgs() << T
->TokenText
<< " ";
4980 llvm::dbgs() << "\n";
4984 llvm::dbgs() << "Did not expand macro \"" << ID
->TokenText
4985 << "\", because it was used ";
4987 llvm::dbgs() << "with " << Args
->size();
4989 llvm::dbgs() << "without";
4990 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4992 Tokens
->setPosition(Position
);
4997 if (FormatTok
->isNot(tok::comment
)) {
4998 distributeComments(Comments
, FormatTok
);
5003 Comments
.push_back(FormatTok
);
5006 distributeComments(Comments
, nullptr);
5011 template <typename Iterator
>
5012 void pushTokens(Iterator Begin
, Iterator End
,
5013 llvm::SmallVectorImpl
<FormatToken
*> &Into
) {
5014 for (auto I
= Begin
; I
!= End
; ++I
) {
5015 Into
.push_back(I
->Tok
);
5016 for (const auto &Child
: I
->Children
)
5017 pushTokens(Child
.Tokens
.begin(), Child
.Tokens
.end(), Into
);
5022 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>>
5023 UnwrappedLineParser::parseMacroCall() {
5024 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>> Args
;
5025 assert(Line
->Tokens
.empty());
5027 if (FormatTok
->isNot(tok::l_paren
))
5029 unsigned Position
= Tokens
->getPosition();
5030 FormatToken
*Tok
= FormatTok
;
5033 auto ArgStart
= std::prev(Line
->Tokens
.end());
5037 switch (FormatTok
->Tok
.getKind()) {
5042 case tok::r_paren
: {
5048 Args
->push_back({});
5049 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
5058 Args
->push_back({});
5059 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
5061 ArgStart
= std::prev(Line
->Tokens
.end());
5069 Line
->Tokens
.resize(1);
5070 Tokens
->setPosition(Position
);
5075 void UnwrappedLineParser::pushToken(FormatToken
*Tok
) {
5076 Line
->Tokens
.push_back(UnwrappedLineNode(Tok
));
5077 if (MustBreakBeforeNextToken
) {
5078 Line
->Tokens
.back().Tok
->MustBreakBefore
= true;
5079 Line
->Tokens
.back().Tok
->MustBreakBeforeFinalized
= true;
5080 MustBreakBeforeNextToken
= false;
5084 } // end namespace format
5085 } // end namespace clang