1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "format-parser"
38 void printLine(llvm::raw_ostream
&OS
, const UnwrappedLine
&Line
,
39 StringRef Prefix
= "", bool PrintText
= false) {
40 OS
<< Prefix
<< "Line(" << Line
.Level
<< ", FSC=" << Line
.FirstStartColumn
41 << ")" << (Line
.InPPDirective
? " MACRO" : "") << ": ";
43 for (std::list
<UnwrappedLineNode
>::const_iterator I
= Line
.Tokens
.begin(),
44 E
= Line
.Tokens
.end();
50 OS
<< I
->Tok
->Tok
.getName() << "["
51 << "T=" << (unsigned)I
->Tok
->getType()
52 << ", OC=" << I
->Tok
->OriginalColumn
<< ", \"" << I
->Tok
->TokenText
54 for (SmallVectorImpl
<UnwrappedLine
>::const_iterator
55 CI
= I
->Children
.begin(),
56 CE
= I
->Children
.end();
59 printLine(OS
, *CI
, (Prefix
+ " ").str());
67 LLVM_ATTRIBUTE_UNUSED
static void printDebugInfo(const UnwrappedLine
&Line
) {
68 printLine(llvm::dbgs(), Line
);
71 class ScopedDeclarationState
{
73 ScopedDeclarationState(UnwrappedLine
&Line
, llvm::BitVector
&Stack
,
74 bool MustBeDeclaration
)
75 : Line(Line
), Stack(Stack
) {
76 Line
.MustBeDeclaration
= MustBeDeclaration
;
77 Stack
.push_back(MustBeDeclaration
);
79 ~ScopedDeclarationState() {
82 Line
.MustBeDeclaration
= Stack
.back();
84 Line
.MustBeDeclaration
= true;
89 llvm::BitVector
&Stack
;
92 } // end anonymous namespace
94 class ScopedLineState
{
96 ScopedLineState(UnwrappedLineParser
&Parser
,
97 bool SwitchToPreprocessorLines
= false)
98 : Parser(Parser
), OriginalLines(Parser
.CurrentLines
) {
99 if (SwitchToPreprocessorLines
)
100 Parser
.CurrentLines
= &Parser
.PreprocessorDirectives
;
101 else if (!Parser
.Line
->Tokens
.empty())
102 Parser
.CurrentLines
= &Parser
.Line
->Tokens
.back().Children
;
103 PreBlockLine
= std::move(Parser
.Line
);
104 Parser
.Line
= std::make_unique
<UnwrappedLine
>();
105 Parser
.Line
->Level
= PreBlockLine
->Level
;
106 Parser
.Line
->PPLevel
= PreBlockLine
->PPLevel
;
107 Parser
.Line
->InPPDirective
= PreBlockLine
->InPPDirective
;
108 Parser
.Line
->InMacroBody
= PreBlockLine
->InMacroBody
;
112 if (!Parser
.Line
->Tokens
.empty())
113 Parser
.addUnwrappedLine();
114 assert(Parser
.Line
->Tokens
.empty());
115 Parser
.Line
= std::move(PreBlockLine
);
116 if (Parser
.CurrentLines
== &Parser
.PreprocessorDirectives
)
117 Parser
.MustBreakBeforeNextToken
= true;
118 Parser
.CurrentLines
= OriginalLines
;
122 UnwrappedLineParser
&Parser
;
124 std::unique_ptr
<UnwrappedLine
> PreBlockLine
;
125 SmallVectorImpl
<UnwrappedLine
> *OriginalLines
;
128 class CompoundStatementIndenter
{
130 CompoundStatementIndenter(UnwrappedLineParser
*Parser
,
131 const FormatStyle
&Style
, unsigned &LineLevel
)
132 : CompoundStatementIndenter(Parser
, LineLevel
,
133 Style
.BraceWrapping
.AfterControlStatement
,
134 Style
.BraceWrapping
.IndentBraces
) {}
135 CompoundStatementIndenter(UnwrappedLineParser
*Parser
, unsigned &LineLevel
,
136 bool WrapBrace
, bool IndentBrace
)
137 : LineLevel(LineLevel
), OldLineLevel(LineLevel
) {
139 Parser
->addUnwrappedLine();
143 ~CompoundStatementIndenter() { LineLevel
= OldLineLevel
; }
147 unsigned OldLineLevel
;
150 UnwrappedLineParser::UnwrappedLineParser(
151 SourceManager
&SourceMgr
, const FormatStyle
&Style
,
152 const AdditionalKeywords
&Keywords
, unsigned FirstStartColumn
,
153 ArrayRef
<FormatToken
*> Tokens
, UnwrappedLineConsumer
&Callback
,
154 llvm::SpecificBumpPtrAllocator
<FormatToken
> &Allocator
,
155 IdentifierTable
&IdentTable
)
156 : Line(new UnwrappedLine
), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines
), Style(Style
), Keywords(Keywords
),
158 CommentPragmasRegex(Style
.CommentPragmas
), Tokens(nullptr),
159 Callback(Callback
), AllTokens(Tokens
), PPBranchLevel(-1),
160 IncludeGuard(Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn
),
164 Macros(Style
.Macros
, SourceMgr
, Style
, Allocator
, IdentTable
) {}
166 void UnwrappedLineParser::reset() {
168 IncludeGuard
= Style
.IndentPPDirectives
== FormatStyle::PPDIS_None
171 IncludeGuardToken
= nullptr;
172 Line
.reset(new UnwrappedLine
);
173 CommentsBeforeNextToken
.clear();
175 MustBreakBeforeNextToken
= false;
176 IsDecltypeAutoFunction
= false;
177 PreprocessorDirectives
.clear();
178 CurrentLines
= &Lines
;
179 DeclarationScopeStack
.clear();
180 NestedTooDeep
.clear();
181 NestedLambdas
.clear();
183 Line
->FirstStartColumn
= FirstStartColumn
;
185 if (!Unexpanded
.empty())
186 for (FormatToken
*Token
: AllTokens
)
187 Token
->MacroCtx
.reset();
188 CurrentExpandedLines
.clear();
189 ExpandedLines
.clear();
195 void UnwrappedLineParser::parse() {
196 IndexedTokenSource
TokenSource(AllTokens
);
197 Line
->FirstStartColumn
= FirstStartColumn
;
199 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 Tokens
= &TokenSource
;
207 // If we found an include guard then all preprocessor directives (other than
208 // the guard) are over-indented by one.
209 if (IncludeGuard
== IG_Found
) {
210 for (auto &Line
: Lines
)
211 if (Line
.InPPDirective
&& Line
.Level
> 0)
215 // Create line with eof token.
217 pushToken(FormatTok
);
220 // In a first run, format everything with the lines containing macro calls
221 // replaced by the expansion.
222 if (!ExpandedLines
.empty()) {
223 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
224 for (const auto &Line
: Lines
) {
225 if (!Line
.Tokens
.empty()) {
226 auto it
= ExpandedLines
.find(Line
.Tokens
.begin()->Tok
);
227 if (it
!= ExpandedLines
.end()) {
228 for (const auto &Expanded
: it
->second
) {
229 LLVM_DEBUG(printDebugInfo(Expanded
));
230 Callback
.consumeUnwrappedLine(Expanded
);
235 LLVM_DEBUG(printDebugInfo(Line
));
236 Callback
.consumeUnwrappedLine(Line
);
238 Callback
.finishRun();
241 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
242 for (const UnwrappedLine
&Line
: Lines
) {
243 LLVM_DEBUG(printDebugInfo(Line
));
244 Callback
.consumeUnwrappedLine(Line
);
246 Callback
.finishRun();
248 while (!PPLevelBranchIndex
.empty() &&
249 PPLevelBranchIndex
.back() + 1 >= PPLevelBranchCount
.back()) {
250 PPLevelBranchIndex
.resize(PPLevelBranchIndex
.size() - 1);
251 PPLevelBranchCount
.resize(PPLevelBranchCount
.size() - 1);
253 if (!PPLevelBranchIndex
.empty()) {
254 ++PPLevelBranchIndex
.back();
255 assert(PPLevelBranchIndex
.size() == PPLevelBranchCount
.size());
256 assert(PPLevelBranchIndex
.back() <= PPLevelBranchCount
.back());
258 } while (!PPLevelBranchIndex
.empty());
261 void UnwrappedLineParser::parseFile() {
262 // The top-level context in a file always has declarations, except for pre-
263 // processor directives and JavaScript files.
264 bool MustBeDeclaration
= !Line
->InPPDirective
&& !Style
.isJavaScript();
265 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
267 if (Style
.Language
== FormatStyle::LK_TextProto
)
271 // Make sure to format the remaining tokens.
273 // LK_TextProto is special since its top-level is parsed as the body of a
274 // braced list, which does not necessarily have natural line separators such
275 // as a semicolon. Comments after the last entry that have been determined to
276 // not belong to that line, as in:
278 // // endfile comment
279 // do not have a chance to be put on a line of their own until this point.
280 // Here we add this newline before end-of-file comments.
281 if (Style
.Language
== FormatStyle::LK_TextProto
&&
282 !CommentsBeforeNextToken
.empty()) {
289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
291 switch (FormatTok
->Tok
.getKind()) {
295 if (FormatTok
->is(Keywords
.kw_where
)) {
298 parseCSharpGenericTypeConstraint();
307 void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets
= 1;
310 switch (FormatTok
->Tok
.getKind()) {
313 --UnpairedSquareBrackets
;
314 if (UnpairedSquareBrackets
== 0) {
320 ++UnpairedSquareBrackets
;
330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines
.empty() && Lines
.back().InPPDirective
)
334 const FormatToken
*Previous
= Tokens
->getPreviousToken();
335 return Previous
&& Previous
->is(tok::comment
) &&
336 (Previous
->IsMultiline
|| Previous
->NewlinesBefore
> 0);
339 /// \brief Parses a level, that is ???.
340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341 /// \param IfKind The \p if statement kind in the level.
342 /// \param IfLeftBrace The left brace of the \p if block in the level.
343 /// \returns true if a simple block of if/else/for/while, or false otherwise.
344 /// (A simple block has a single statement.)
345 bool UnwrappedLineParser::parseLevel(const FormatToken
*OpeningBrace
,
347 FormatToken
**IfLeftBrace
) {
348 const bool InRequiresExpression
=
349 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
350 const bool IsPrecededByCommentOrPPDirective
=
351 !Style
.RemoveBracesLLVM
|| precededByCommentOrPPDirective();
352 FormatToken
*IfLBrace
= nullptr;
353 bool HasDoWhile
= false;
354 bool HasLabel
= false;
355 unsigned StatementCount
= 0;
356 bool SwitchLabelEncountered
= false;
359 if (FormatTok
->isAttribute()) {
363 tok::TokenKind kind
= FormatTok
->Tok
.getKind();
364 if (FormatTok
->getType() == TT_MacroBlockBegin
)
366 else if (FormatTok
->getType() == TT_MacroBlockEnd
)
369 auto ParseDefault
= [this, OpeningBrace
, IfKind
, &IfLBrace
, &HasDoWhile
,
370 &HasLabel
, &StatementCount
] {
371 parseStructuralElement(OpeningBrace
, IfKind
, &IfLBrace
,
372 HasDoWhile
? nullptr : &HasDoWhile
,
373 HasLabel
? nullptr : &HasLabel
);
375 assert(StatementCount
> 0 && "StatementCount overflow!");
384 if (InRequiresExpression
) {
385 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
386 } else if (FormatTok
->Previous
&&
387 FormatTok
->Previous
->ClosesRequiresClause
) {
388 // We need the 'default' case here to correctly parse a function
393 if (!InRequiresExpression
&& FormatTok
->isNot(TT_MacroBlockBegin
) &&
394 tryToParseBracedList()) {
399 assert(StatementCount
> 0 && "StatementCount overflow!");
404 if (!Style
.RemoveBracesLLVM
|| Line
->InPPDirective
||
405 !OpeningBrace
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
)) {
408 if (FormatTok
->isNot(tok::r_brace
) || StatementCount
!= 1 || HasLabel
||
409 HasDoWhile
|| IsPrecededByCommentOrPPDirective
||
410 precededByCommentOrPPDirective()) {
413 const FormatToken
*Next
= Tokens
->peekNextToken();
414 if (Next
->is(tok::comment
) && Next
->NewlinesBefore
== 0)
417 *IfLeftBrace
= IfLBrace
;
423 case tok::kw_default
: {
424 unsigned StoredPosition
= Tokens
->getPosition();
427 Next
= Tokens
->getNextToken();
429 } while (Next
->is(tok::comment
));
430 FormatTok
= Tokens
->setPosition(StoredPosition
);
431 if (Next
->isNot(tok::colon
)) {
432 // default not followed by ':' is not a case label; treat it like
434 parseStructuralElement();
437 // Else, if it is 'default:', fall through to the case handling.
441 if (Style
.isProto() || Style
.isVerilog() ||
442 (Style
.isJavaScript() && Line
->MustBeDeclaration
)) {
443 // Proto: there are no switch/case statements
444 // Verilog: Case labels don't have this word. We handle case
445 // labels including default in TokenAnnotator.
446 // JavaScript: A 'case: string' style field declaration.
450 if (!SwitchLabelEncountered
&&
451 (Style
.IndentCaseLabels
||
452 (Line
->InPPDirective
&& Line
->Level
== 1))) {
455 SwitchLabelEncountered
= true;
456 parseStructuralElement();
459 if (Style
.isCSharp()) {
461 parseCSharpAttribute();
464 if (handleCppAttributes())
476 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody
) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
481 unsigned StoredPosition
= Tokens
->getPosition();
482 FormatToken
*Tok
= FormatTok
;
483 const FormatToken
*PrevTok
= Tok
->Previous
;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
489 const FormatToken
*PrevTok
;
491 SmallVector
<StackEntry
, 8> LBraceStack
;
492 assert(Tok
->is(tok::l_brace
));
494 // Get next non-comment, non-preprocessor token.
495 FormatToken
*NextTok
;
497 NextTok
= Tokens
->getNextToken();
498 } while (NextTok
->is(tok::comment
));
499 while (NextTok
->is(tok::hash
)) {
500 NextTok
= Tokens
->getNextToken();
502 NextTok
= Tokens
->getNextToken();
503 } while (NextTok
->is(tok::comment
) ||
504 (NextTok
->NewlinesBefore
== 0 && NextTok
->isNot(tok::eof
)));
507 switch (Tok
->Tok
.getKind()) {
509 if (Style
.isJavaScript() && PrevTok
) {
510 if (PrevTok
->isOneOf(tok::colon
, tok::less
)) {
511 // A ':' indicates this code is in a type, or a braced list
512 // following a label in an object literal ({a: {b: 1}}).
513 // A '<' could be an object used in a comparison, but that is nonsense
514 // code (can never return true), so more likely it is a generic type
515 // argument (`X<{a: string; b: number}>`).
516 // The code below could be confused by semicolons between the
517 // individual members in a type member list, which would normally
518 // trigger BK_Block. In both cases, this must be parsed as an inline
520 Tok
->setBlockKind(BK_BracedInit
);
521 } else if (PrevTok
->is(tok::r_paren
)) {
522 // `) { }` can only occur in function or method declarations in JS.
523 Tok
->setBlockKind(BK_Block
);
526 Tok
->setBlockKind(BK_Unknown
);
528 LBraceStack
.push_back({Tok
, PrevTok
});
531 if (LBraceStack
.empty())
533 if (LBraceStack
.back().Tok
->is(BK_Unknown
)) {
534 bool ProbablyBracedList
= false;
535 if (Style
.Language
== FormatStyle::LK_Proto
) {
536 ProbablyBracedList
= NextTok
->isOneOf(tok::comma
, tok::r_square
);
538 // Skip NextTok over preprocessor lines, otherwise we may not
539 // properly diagnose the block as a braced intializer
540 // if the comma separator appears after the pp directive.
541 while (NextTok
->is(tok::hash
)) {
542 ScopedMacroState
MacroState(*Line
, Tokens
, NextTok
);
544 NextTok
= Tokens
->getNextToken();
545 } while (NextTok
->isNot(tok::eof
));
548 // Using OriginalColumn to distinguish between ObjC methods and
549 // binary operators is a bit hacky.
550 bool NextIsObjCMethod
= NextTok
->isOneOf(tok::plus
, tok::minus
) &&
551 NextTok
->OriginalColumn
== 0;
553 // Try to detect a braced list. Note that regardless how we mark inner
554 // braces here, we will overwrite the BlockKind later if we parse a
555 // braced list (where all blocks inside are by default braced lists),
556 // or when we explicitly detect blocks (for example while parsing
559 // If we already marked the opening brace as braced list, the closing
560 // must also be part of it.
561 ProbablyBracedList
= LBraceStack
.back().Tok
->is(TT_BracedListLBrace
);
563 ProbablyBracedList
= ProbablyBracedList
||
564 (Style
.isJavaScript() &&
565 NextTok
->isOneOf(Keywords
.kw_of
, Keywords
.kw_in
,
567 ProbablyBracedList
= ProbablyBracedList
||
568 (Style
.isCpp() && NextTok
->is(tok::l_paren
));
570 // If there is a comma, semicolon or right paren after the closing
571 // brace, we assume this is a braced initializer list.
572 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
573 // braced list in JS.
575 ProbablyBracedList
||
576 NextTok
->isOneOf(tok::comma
, tok::period
, tok::colon
,
577 tok::r_paren
, tok::r_square
, tok::ellipsis
);
579 // Distinguish between braced list in a constructor initializer list
580 // followed by constructor body, or just adjacent blocks.
582 ProbablyBracedList
||
583 (NextTok
->is(tok::l_brace
) && LBraceStack
.back().PrevTok
&&
584 LBraceStack
.back().PrevTok
->isOneOf(tok::identifier
,
588 ProbablyBracedList
||
589 (NextTok
->is(tok::identifier
) &&
590 !PrevTok
->isOneOf(tok::semi
, tok::r_brace
, tok::l_brace
));
592 ProbablyBracedList
= ProbablyBracedList
||
593 (NextTok
->is(tok::semi
) &&
594 (!ExpectClassBody
|| LBraceStack
.size() != 1));
597 ProbablyBracedList
||
598 (NextTok
->isBinaryOperator() && !NextIsObjCMethod
);
600 if (!Style
.isCSharp() && NextTok
->is(tok::l_square
)) {
601 // We can have an array subscript after a braced init
602 // list, but C++11 attributes are expected after blocks.
603 NextTok
= Tokens
->getNextToken();
604 ProbablyBracedList
= NextTok
->isNot(tok::l_square
);
607 if (ProbablyBracedList
) {
608 Tok
->setBlockKind(BK_BracedInit
);
609 LBraceStack
.back().Tok
->setBlockKind(BK_BracedInit
);
611 Tok
->setBlockKind(BK_Block
);
612 LBraceStack
.back().Tok
->setBlockKind(BK_Block
);
615 LBraceStack
.pop_back();
617 case tok::identifier
:
618 if (Tok
->isNot(TT_StatementMacro
))
629 if (!LBraceStack
.empty() && LBraceStack
.back().Tok
->is(BK_Unknown
))
630 LBraceStack
.back().Tok
->setBlockKind(BK_Block
);
637 } while (Tok
->isNot(tok::eof
) && !LBraceStack
.empty());
639 // Assume other blocks for all unclosed opening braces.
640 for (const auto &Entry
: LBraceStack
)
641 if (Entry
.Tok
->is(BK_Unknown
))
642 Entry
.Tok
->setBlockKind(BK_Block
);
644 FormatTok
= Tokens
->setPosition(StoredPosition
);
647 // Sets the token type of the directly previous right brace.
648 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type
) {
649 if (auto Prev
= FormatTok
->getPreviousNonComment();
650 Prev
&& Prev
->is(tok::r_brace
)) {
651 Prev
->setFinalizedType(Type
);
656 static inline void hash_combine(std::size_t &seed
, const T
&v
) {
658 seed
^= hasher(v
) + 0x9e3779b9 + (seed
<< 6) + (seed
>> 2);
661 size_t UnwrappedLineParser::computePPHash() const {
663 for (const auto &i
: PPStack
) {
664 hash_combine(h
, size_t(i
.Kind
));
665 hash_combine(h
, i
.Line
);
670 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
671 // is not null, subtracts its length (plus the preceding space) when computing
672 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
673 // running the token annotator on it so that we can restore them afterward.
674 bool UnwrappedLineParser::mightFitOnOneLine(
675 UnwrappedLine
&ParsedLine
, const FormatToken
*OpeningBrace
) const {
676 const auto ColumnLimit
= Style
.ColumnLimit
;
677 if (ColumnLimit
== 0)
680 auto &Tokens
= ParsedLine
.Tokens
;
681 assert(!Tokens
.empty());
683 const auto *LastToken
= Tokens
.back().Tok
;
686 SmallVector
<UnwrappedLineNode
> SavedTokens(Tokens
.size());
689 for (const auto &Token
: Tokens
) {
691 auto &SavedToken
= SavedTokens
[Index
++];
692 SavedToken
.Tok
= new FormatToken
;
693 SavedToken
.Tok
->copyFrom(*Token
.Tok
);
694 SavedToken
.Children
= std::move(Token
.Children
);
697 AnnotatedLine
Line(ParsedLine
);
698 assert(Line
.Last
== LastToken
);
700 TokenAnnotator
Annotator(Style
, Keywords
);
701 Annotator
.annotate(Line
);
702 Annotator
.calculateFormattingInformation(Line
);
704 auto Length
= LastToken
->TotalLength
;
706 assert(OpeningBrace
!= Tokens
.front().Tok
);
707 if (auto Prev
= OpeningBrace
->Previous
;
708 Prev
&& Prev
->TotalLength
+ ColumnLimit
== OpeningBrace
->TotalLength
) {
709 Length
-= ColumnLimit
;
711 Length
-= OpeningBrace
->TokenText
.size() + 1;
714 if (const auto *FirstToken
= Line
.First
; FirstToken
->is(tok::r_brace
)) {
715 assert(!OpeningBrace
|| OpeningBrace
->is(TT_ControlStatementLBrace
));
716 Length
-= FirstToken
->TokenText
.size() + 1;
720 for (auto &Token
: Tokens
) {
721 const auto &SavedToken
= SavedTokens
[Index
++];
722 Token
.Tok
->copyFrom(*SavedToken
.Tok
);
723 Token
.Children
= std::move(SavedToken
.Children
);
724 delete SavedToken
.Tok
;
727 // If these change PPLevel needs to be used for get correct indentation.
728 assert(!Line
.InMacroBody
);
729 assert(!Line
.InPPDirective
);
730 return Line
.Level
* Style
.IndentWidth
+ Length
<= ColumnLimit
;
733 FormatToken
*UnwrappedLineParser::parseBlock(bool MustBeDeclaration
,
734 unsigned AddLevels
, bool MunchSemi
,
737 bool UnindentWhitesmithsBraces
) {
738 auto HandleVerilogBlockLabel
= [this]() {
740 if (Style
.isVerilog() && FormatTok
->is(tok::colon
)) {
742 if (Keywords
.isVerilogIdentifier(*FormatTok
))
747 // Whether this is a Verilog-specific block that has a special header like a
749 const bool VerilogHierarchy
=
750 Style
.isVerilog() && Keywords
.isVerilogHierarchy(*FormatTok
);
751 assert((FormatTok
->isOneOf(tok::l_brace
, TT_MacroBlockBegin
) ||
752 (Style
.isVerilog() &&
753 (Keywords
.isVerilogBegin(*FormatTok
) || VerilogHierarchy
))) &&
754 "'{' or macro block token expected");
755 FormatToken
*Tok
= FormatTok
;
756 const bool FollowedByComment
= Tokens
->peekNextToken()->is(tok::comment
);
757 auto Index
= CurrentLines
->size();
758 const bool MacroBlock
= FormatTok
->is(TT_MacroBlockBegin
);
759 FormatTok
->setBlockKind(BK_Block
);
761 // For Whitesmiths mode, jump to the next level prior to skipping over the
763 if (!VerilogHierarchy
&& AddLevels
> 0 &&
764 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
768 size_t PPStartHash
= computePPHash();
770 const unsigned InitialLevel
= Line
->Level
;
771 if (VerilogHierarchy
) {
772 AddLevels
+= parseVerilogHierarchyHeader();
774 nextToken(/*LevelDifference=*/AddLevels
);
775 HandleVerilogBlockLabel();
778 // Bail out if there are too many levels. Otherwise, the stack might overflow.
779 if (Line
->Level
> 300)
782 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
785 size_t NbPreprocessorDirectives
=
786 !parsingPPDirective() ? PreprocessorDirectives
.size() : 0;
788 size_t OpeningLineIndex
=
789 CurrentLines
->empty()
790 ? (UnwrappedLine::kInvalidIndex
)
791 : (CurrentLines
->size() - 1 - NbPreprocessorDirectives
);
793 // Whitesmiths is weird here. The brace needs to be indented for the namespace
794 // block, but the block itself may not be indented depending on the style
795 // settings. This allows the format to back up one level in those cases.
796 if (UnindentWhitesmithsBraces
)
799 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
801 if (AddLevels
> 0u && Style
.BreakBeforeBraces
!= FormatStyle::BS_Whitesmiths
)
802 Line
->Level
+= AddLevels
;
804 FormatToken
*IfLBrace
= nullptr;
805 const bool SimpleBlock
= parseLevel(Tok
, IfKind
, &IfLBrace
);
810 if (MacroBlock
? FormatTok
->isNot(TT_MacroBlockEnd
)
811 : FormatTok
->isNot(tok::r_brace
)) {
812 Line
->Level
= InitialLevel
;
813 FormatTok
->setBlockKind(BK_Block
);
817 if (FormatTok
->is(tok::r_brace
) && Tok
->is(TT_NamespaceLBrace
))
818 FormatTok
->setFinalizedType(TT_NamespaceRBrace
);
820 const bool IsFunctionRBrace
=
821 FormatTok
->is(tok::r_brace
) && Tok
->is(TT_FunctionLBrace
);
823 auto RemoveBraces
= [=]() mutable {
826 assert(Tok
->isOneOf(TT_ControlStatementLBrace
, TT_ElseLBrace
));
827 assert(FormatTok
->is(tok::r_brace
));
828 const bool WrappedOpeningBrace
= !Tok
->Previous
;
829 if (WrappedOpeningBrace
&& FollowedByComment
)
831 const bool HasRequiredIfBraces
= IfLBrace
&& !IfLBrace
->Optional
;
832 if (KeepBraces
&& !HasRequiredIfBraces
)
834 if (Tok
->isNot(TT_ElseLBrace
) || !HasRequiredIfBraces
) {
835 const FormatToken
*Previous
= Tokens
->getPreviousToken();
837 if (Previous
->is(tok::r_brace
) && !Previous
->Optional
)
840 assert(!CurrentLines
->empty());
841 auto &LastLine
= CurrentLines
->back();
842 if (LastLine
.Level
== InitialLevel
+ 1 && !mightFitOnOneLine(LastLine
))
844 if (Tok
->is(TT_ElseLBrace
))
846 if (WrappedOpeningBrace
) {
848 --Index
; // The line above the wrapped l_brace.
851 return mightFitOnOneLine((*CurrentLines
)[Index
], Tok
);
853 if (RemoveBraces()) {
854 Tok
->MatchingParen
= FormatTok
;
855 FormatTok
->MatchingParen
= Tok
;
858 size_t PPEndHash
= computePPHash();
860 // Munch the closing brace.
861 nextToken(/*LevelDifference=*/-AddLevels
);
863 // When this is a function block and there is an unnecessary semicolon
864 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
866 if (Style
.RemoveSemicolon
&& IsFunctionRBrace
) {
867 while (FormatTok
->is(tok::semi
)) {
868 FormatTok
->Optional
= true;
873 HandleVerilogBlockLabel();
875 if (MacroBlock
&& FormatTok
->is(tok::l_paren
))
878 Line
->Level
= InitialLevel
;
880 if (FormatTok
->is(tok::kw_noexcept
)) {
881 // A noexcept in a requires expression.
885 if (FormatTok
->is(tok::arrow
)) {
886 // Following the } or noexcept we can find a trailing return type arrow
887 // as part of an implicit conversion constraint.
889 parseStructuralElement();
892 if (MunchSemi
&& FormatTok
->is(tok::semi
))
895 if (PPStartHash
== PPEndHash
) {
896 Line
->MatchingOpeningBlockLineIndex
= OpeningLineIndex
;
897 if (OpeningLineIndex
!= UnwrappedLine::kInvalidIndex
) {
898 // Update the opening line to add the forward reference as well
899 (*CurrentLines
)[OpeningLineIndex
].MatchingClosingBlockLineIndex
=
900 CurrentLines
->size() - 1;
907 static bool isGoogScope(const UnwrappedLine
&Line
) {
908 // FIXME: Closure-library specific stuff should not be hard-coded but be
910 if (Line
.Tokens
.size() < 4)
912 auto I
= Line
.Tokens
.begin();
913 if (I
->Tok
->TokenText
!= "goog")
916 if (I
->Tok
->isNot(tok::period
))
919 if (I
->Tok
->TokenText
!= "scope")
922 return I
->Tok
->is(tok::l_paren
);
925 static bool isIIFE(const UnwrappedLine
&Line
,
926 const AdditionalKeywords
&Keywords
) {
927 // Look for the start of an immediately invoked anonymous function.
928 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
929 // This is commonly done in JavaScript to create a new, anonymous scope.
930 // Example: (function() { ... })()
931 if (Line
.Tokens
.size() < 3)
933 auto I
= Line
.Tokens
.begin();
934 if (I
->Tok
->isNot(tok::l_paren
))
937 if (I
->Tok
->isNot(Keywords
.kw_function
))
940 return I
->Tok
->is(tok::l_paren
);
943 static bool ShouldBreakBeforeBrace(const FormatStyle
&Style
,
944 const FormatToken
&InitialToken
) {
945 tok::TokenKind Kind
= InitialToken
.Tok
.getKind();
946 if (InitialToken
.is(TT_NamespaceMacro
))
947 Kind
= tok::kw_namespace
;
950 case tok::kw_namespace
:
951 return Style
.BraceWrapping
.AfterNamespace
;
953 return Style
.BraceWrapping
.AfterClass
;
955 return Style
.BraceWrapping
.AfterUnion
;
957 return Style
.BraceWrapping
.AfterStruct
;
959 return Style
.BraceWrapping
.AfterEnum
;
965 void UnwrappedLineParser::parseChildBlock() {
966 assert(FormatTok
->is(tok::l_brace
));
967 FormatTok
->setBlockKind(BK_Block
);
968 const FormatToken
*OpeningBrace
= FormatTok
;
971 bool SkipIndent
= (Style
.isJavaScript() &&
972 (isGoogScope(*Line
) || isIIFE(*Line
, Keywords
)));
973 ScopedLineState
LineState(*this);
974 ScopedDeclarationState
DeclarationState(*Line
, DeclarationScopeStack
,
975 /*MustBeDeclaration=*/false);
976 Line
->Level
+= SkipIndent
? 0 : 1;
977 parseLevel(OpeningBrace
);
978 flushComments(isOnNewLine(*FormatTok
));
979 Line
->Level
-= SkipIndent
? 0 : 1;
984 void UnwrappedLineParser::parsePPDirective() {
985 assert(FormatTok
->is(tok::hash
) && "'#' expected");
986 ScopedMacroState
MacroState(*Line
, Tokens
, FormatTok
);
990 if (!FormatTok
->Tok
.getIdentifierInfo()) {
995 switch (FormatTok
->Tok
.getIdentifierInfo()->getPPKeywordID()) {
1000 parsePPIf(/*IfDef=*/false);
1003 case tok::pp_ifndef
:
1004 parsePPIf(/*IfDef=*/true);
1007 case tok::pp_elifdef
:
1008 case tok::pp_elifndef
:
1015 case tok::pp_pragma
:
1024 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable
) {
1025 size_t Line
= CurrentLines
->size();
1026 if (CurrentLines
== &PreprocessorDirectives
)
1027 Line
+= Lines
.size();
1030 (!PPStack
.empty() && PPStack
.back().Kind
== PP_Unreachable
)) {
1031 PPStack
.push_back({PP_Unreachable
, Line
});
1033 PPStack
.push_back({PP_Conditional
, Line
});
1037 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable
) {
1039 assert(PPBranchLevel
>= 0 && PPBranchLevel
<= (int)PPLevelBranchIndex
.size());
1040 if (PPBranchLevel
== (int)PPLevelBranchIndex
.size()) {
1041 PPLevelBranchIndex
.push_back(0);
1042 PPLevelBranchCount
.push_back(0);
1044 PPChainBranchIndex
.push(Unreachable
? -1 : 0);
1045 bool Skip
= PPLevelBranchIndex
[PPBranchLevel
] > 0;
1046 conditionalCompilationCondition(Unreachable
|| Skip
);
1049 void UnwrappedLineParser::conditionalCompilationAlternative() {
1050 if (!PPStack
.empty())
1052 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1053 if (!PPChainBranchIndex
.empty())
1054 ++PPChainBranchIndex
.top();
1055 conditionalCompilationCondition(
1056 PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty() &&
1057 PPLevelBranchIndex
[PPBranchLevel
] != PPChainBranchIndex
.top());
1060 void UnwrappedLineParser::conditionalCompilationEnd() {
1061 assert(PPBranchLevel
< (int)PPLevelBranchIndex
.size());
1062 if (PPBranchLevel
>= 0 && !PPChainBranchIndex
.empty()) {
1063 if (PPChainBranchIndex
.top() + 1 > PPLevelBranchCount
[PPBranchLevel
])
1064 PPLevelBranchCount
[PPBranchLevel
] = PPChainBranchIndex
.top() + 1;
1066 // Guard against #endif's without #if.
1067 if (PPBranchLevel
> -1)
1069 if (!PPChainBranchIndex
.empty())
1070 PPChainBranchIndex
.pop();
1071 if (!PPStack
.empty())
1075 void UnwrappedLineParser::parsePPIf(bool IfDef
) {
1076 bool IfNDef
= FormatTok
->is(tok::pp_ifndef
);
1078 bool Unreachable
= false;
1079 if (!IfDef
&& (FormatTok
->is(tok::kw_false
) || FormatTok
->TokenText
== "0"))
1081 if (IfDef
&& !IfNDef
&& FormatTok
->TokenText
== "SWIG")
1083 conditionalCompilationStart(Unreachable
);
1084 FormatToken
*IfCondition
= FormatTok
;
1085 // If there's a #ifndef on the first line, and the only lines before it are
1086 // comments, it could be an include guard.
1087 bool MaybeIncludeGuard
= IfNDef
;
1088 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1089 for (auto &Line
: Lines
) {
1090 if (Line
.Tokens
.front().Tok
->isNot(tok::comment
)) {
1091 MaybeIncludeGuard
= false;
1092 IncludeGuard
= IG_Rejected
;
1100 if (IncludeGuard
== IG_Inited
&& MaybeIncludeGuard
) {
1101 IncludeGuard
= IG_IfNdefed
;
1102 IncludeGuardToken
= IfCondition
;
1106 void UnwrappedLineParser::parsePPElse() {
1107 // If a potential include guard has an #else, it's not an include guard.
1108 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== 0)
1109 IncludeGuard
= IG_Rejected
;
1110 // Don't crash when there is an #else without an #if.
1111 assert(PPBranchLevel
>= -1);
1112 if (PPBranchLevel
== -1)
1113 conditionalCompilationStart(/*Unreachable=*/true);
1114 conditionalCompilationAlternative();
1120 void UnwrappedLineParser::parsePPEndIf() {
1121 conditionalCompilationEnd();
1123 // If the #endif of a potential include guard is the last thing in the file,
1124 // then we found an include guard.
1125 if (IncludeGuard
== IG_Defined
&& PPBranchLevel
== -1 && Tokens
->isEOF() &&
1126 Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
) {
1127 IncludeGuard
= IG_Found
;
1131 void UnwrappedLineParser::parsePPDefine() {
1134 if (!FormatTok
->Tok
.getIdentifierInfo()) {
1135 IncludeGuard
= IG_Rejected
;
1136 IncludeGuardToken
= nullptr;
1141 if (IncludeGuard
== IG_IfNdefed
&&
1142 IncludeGuardToken
->TokenText
== FormatTok
->TokenText
) {
1143 IncludeGuard
= IG_Defined
;
1144 IncludeGuardToken
= nullptr;
1145 for (auto &Line
: Lines
) {
1146 if (!Line
.Tokens
.front().Tok
->isOneOf(tok::comment
, tok::hash
)) {
1147 IncludeGuard
= IG_Rejected
;
1153 // In the context of a define, even keywords should be treated as normal
1154 // identifiers. Setting the kind to identifier is not enough, because we need
1155 // to treat additional keywords like __except as well, which are already
1156 // identifiers. Setting the identifier info to null interferes with include
1157 // guard processing above, and changes preprocessing nesting.
1158 FormatTok
->Tok
.setKind(tok::identifier
);
1159 FormatTok
->Tok
.setIdentifierInfo(Keywords
.kw_internal_ident_after_define
);
1161 if (FormatTok
->Tok
.getKind() == tok::l_paren
&&
1162 !FormatTok
->hasWhitespaceBefore()) {
1165 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1166 Line
->Level
+= PPBranchLevel
+ 1;
1170 Line
->PPLevel
= PPBranchLevel
+ (IncludeGuard
== IG_Defined
? 0 : 1);
1171 assert((int)Line
->PPLevel
>= 0);
1172 Line
->InMacroBody
= true;
1174 // Errors during a preprocessor directive can only affect the layout of the
1175 // preprocessor directive, and thus we ignore them. An alternative approach
1176 // would be to use the same approach we use on the file level (no
1177 // re-indentation if there was a structural error) within the macro
1182 void UnwrappedLineParser::parsePPPragma() {
1183 Line
->InPragmaDirective
= true;
1187 void UnwrappedLineParser::parsePPUnknown() {
1191 if (Style
.IndentPPDirectives
!= FormatStyle::PPDIS_None
)
1192 Line
->Level
+= PPBranchLevel
+ 1;
1196 // Here we exclude certain tokens that are not usually the first token in an
1197 // unwrapped line. This is used in attempt to distinguish macro calls without
1198 // trailing semicolons from other constructs split to several lines.
1199 static bool tokenCanStartNewLine(const FormatToken
&Tok
) {
1200 // Semicolon can be a null-statement, l_square can be a start of a macro or
1201 // a C++11 attribute, but this doesn't seem to be common.
1202 assert(Tok
.isNot(TT_AttributeSquare
));
1203 return !Tok
.isOneOf(tok::semi
, tok::l_brace
,
1204 // Tokens that can only be used as binary operators and a
1205 // part of overloaded operator names.
1206 tok::period
, tok::periodstar
, tok::arrow
, tok::arrowstar
,
1207 tok::less
, tok::greater
, tok::slash
, tok::percent
,
1208 tok::lessless
, tok::greatergreater
, tok::equal
,
1209 tok::plusequal
, tok::minusequal
, tok::starequal
,
1210 tok::slashequal
, tok::percentequal
, tok::ampequal
,
1211 tok::pipeequal
, tok::caretequal
, tok::greatergreaterequal
,
1213 // Colon is used in labels, base class lists, initializer
1214 // lists, range-based for loops, ternary operator, but
1215 // should never be the first token in an unwrapped line.
1217 // 'noexcept' is a trailing annotation.
1221 static bool mustBeJSIdent(const AdditionalKeywords
&Keywords
,
1222 const FormatToken
*FormatTok
) {
1223 // FIXME: This returns true for C/C++ keywords like 'struct'.
1224 return FormatTok
->is(tok::identifier
) &&
1225 (!FormatTok
->Tok
.getIdentifierInfo() ||
1226 !FormatTok
->isOneOf(
1227 Keywords
.kw_in
, Keywords
.kw_of
, Keywords
.kw_as
, Keywords
.kw_async
,
1228 Keywords
.kw_await
, Keywords
.kw_yield
, Keywords
.kw_finally
,
1229 Keywords
.kw_function
, Keywords
.kw_import
, Keywords
.kw_is
,
1230 Keywords
.kw_let
, Keywords
.kw_var
, tok::kw_const
,
1231 Keywords
.kw_abstract
, Keywords
.kw_extends
, Keywords
.kw_implements
,
1232 Keywords
.kw_instanceof
, Keywords
.kw_interface
,
1233 Keywords
.kw_override
, Keywords
.kw_throws
, Keywords
.kw_from
));
1236 static bool mustBeJSIdentOrValue(const AdditionalKeywords
&Keywords
,
1237 const FormatToken
*FormatTok
) {
1238 return FormatTok
->Tok
.isLiteral() ||
1239 FormatTok
->isOneOf(tok::kw_true
, tok::kw_false
) ||
1240 mustBeJSIdent(Keywords
, FormatTok
);
1243 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1244 // when encountered after a value (see mustBeJSIdentOrValue).
1245 static bool isJSDeclOrStmt(const AdditionalKeywords
&Keywords
,
1246 const FormatToken
*FormatTok
) {
1247 return FormatTok
->isOneOf(
1248 tok::kw_return
, Keywords
.kw_yield
,
1250 tok::kw_if
, tok::kw_else
,
1252 tok::kw_for
, tok::kw_while
, tok::kw_do
, tok::kw_continue
, tok::kw_break
,
1254 tok::kw_switch
, tok::kw_case
,
1256 tok::kw_throw
, tok::kw_try
, tok::kw_catch
, Keywords
.kw_finally
,
1258 tok::kw_const
, tok::kw_class
, Keywords
.kw_var
, Keywords
.kw_let
,
1259 Keywords
.kw_async
, Keywords
.kw_function
,
1261 Keywords
.kw_import
, tok::kw_export
);
1264 // Checks whether a token is a type in K&R C (aka C78).
1265 static bool isC78Type(const FormatToken
&Tok
) {
1266 return Tok
.isOneOf(tok::kw_char
, tok::kw_short
, tok::kw_int
, tok::kw_long
,
1267 tok::kw_unsigned
, tok::kw_float
, tok::kw_double
,
1271 // This function checks whether a token starts the first parameter declaration
1272 // in a K&R C (aka C78) function definition, e.g.:
1278 static bool isC78ParameterDecl(const FormatToken
*Tok
, const FormatToken
*Next
,
1279 const FormatToken
*FuncName
) {
1284 if (FuncName
->isNot(tok::identifier
))
1287 const FormatToken
*Prev
= FuncName
->Previous
;
1288 if (!Prev
|| (Prev
->isNot(tok::star
) && !isC78Type(*Prev
)))
1291 if (!isC78Type(*Tok
) &&
1292 !Tok
->isOneOf(tok::kw_register
, tok::kw_struct
, tok::kw_union
)) {
1296 if (Next
->isNot(tok::star
) && !Next
->Tok
.getIdentifierInfo())
1299 Tok
= Tok
->Previous
;
1300 if (!Tok
|| Tok
->isNot(tok::r_paren
))
1303 Tok
= Tok
->Previous
;
1304 if (!Tok
|| Tok
->isNot(tok::identifier
))
1307 return Tok
->Previous
&& Tok
->Previous
->isOneOf(tok::l_paren
, tok::comma
);
1310 bool UnwrappedLineParser::parseModuleImport() {
1311 assert(FormatTok
->is(Keywords
.kw_import
) && "'import' expected");
1313 if (auto Token
= Tokens
->peekNextToken(/*SkipComment=*/true);
1314 !Token
->Tok
.getIdentifierInfo() &&
1315 !Token
->isOneOf(tok::colon
, tok::less
, tok::string_literal
)) {
1321 if (FormatTok
->is(tok::colon
)) {
1322 FormatTok
->setFinalizedType(TT_ModulePartitionColon
);
1324 // Handle import <foo/bar.h> as we would an include statement.
1325 else if (FormatTok
->is(tok::less
)) {
1327 while (!FormatTok
->isOneOf(tok::semi
, tok::greater
, tok::eof
)) {
1328 // Mark tokens up to the trailing line comments as implicit string
1330 if (FormatTok
->isNot(tok::comment
) &&
1331 !FormatTok
->TokenText
.startswith("//")) {
1332 FormatTok
->setFinalizedType(TT_ImplicitStringLiteral
);
1337 if (FormatTok
->is(tok::semi
)) {
1348 // readTokenWithJavaScriptASI reads the next token and terminates the current
1349 // line if JavaScript Automatic Semicolon Insertion must
1350 // happen between the current token and the next token.
1352 // This method is conservative - it cannot cover all edge cases of JavaScript,
1353 // but only aims to correctly handle certain well known cases. It *must not*
1354 // return true in speculative cases.
1355 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1356 FormatToken
*Previous
= FormatTok
;
1358 FormatToken
*Next
= FormatTok
;
1361 CommentsBeforeNextToken
.empty()
1362 ? Next
->NewlinesBefore
== 0
1363 : CommentsBeforeNextToken
.front()->NewlinesBefore
== 0;
1367 bool PreviousMustBeValue
= mustBeJSIdentOrValue(Keywords
, Previous
);
1368 bool PreviousStartsTemplateExpr
=
1369 Previous
->is(TT_TemplateString
) && Previous
->TokenText
.endswith("${");
1370 if (PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) {
1371 // If the line contains an '@' sign, the previous token might be an
1372 // annotation, which can precede another identifier/value.
1373 bool HasAt
= llvm::any_of(Line
->Tokens
, [](UnwrappedLineNode
&LineNode
) {
1374 return LineNode
.Tok
->is(tok::at
);
1379 if (Next
->is(tok::exclaim
) && PreviousMustBeValue
)
1380 return addUnwrappedLine();
1381 bool NextMustBeValue
= mustBeJSIdentOrValue(Keywords
, Next
);
1382 bool NextEndsTemplateExpr
=
1383 Next
->is(TT_TemplateString
) && Next
->TokenText
.startswith("}");
1384 if (NextMustBeValue
&& !NextEndsTemplateExpr
&& !PreviousStartsTemplateExpr
&&
1385 (PreviousMustBeValue
||
1386 Previous
->isOneOf(tok::r_square
, tok::r_paren
, tok::plusplus
,
1387 tok::minusminus
))) {
1388 return addUnwrappedLine();
1390 if ((PreviousMustBeValue
|| Previous
->is(tok::r_paren
)) &&
1391 isJSDeclOrStmt(Keywords
, Next
)) {
1392 return addUnwrappedLine();
1396 void UnwrappedLineParser::parseStructuralElement(
1397 const FormatToken
*OpeningBrace
, IfStmtKind
*IfKind
,
1398 FormatToken
**IfLeftBrace
, bool *HasDoWhile
, bool *HasLabel
) {
1399 if (Style
.Language
== FormatStyle::LK_TableGen
&&
1400 FormatTok
->is(tok::pp_include
)) {
1402 if (FormatTok
->is(tok::string_literal
))
1408 if (Style
.isCpp()) {
1409 while (FormatTok
->is(tok::l_square
) && handleCppAttributes()) {
1411 } else if (Style
.isVerilog()) {
1412 if (Keywords
.isVerilogStructuredProcedure(*FormatTok
)) {
1413 parseForOrWhileLoop(/*HasParens=*/false);
1416 if (FormatTok
->isOneOf(Keywords
.kw_foreach
, Keywords
.kw_repeat
)) {
1417 parseForOrWhileLoop();
1420 if (FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
1421 Keywords
.kw_assume
, Keywords
.kw_cover
)) {
1422 parseIfThenElse(IfKind
, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1426 // Skip things that can exist before keywords like 'if' and 'case'.
1428 if (FormatTok
->isOneOf(Keywords
.kw_priority
, Keywords
.kw_unique
,
1429 Keywords
.kw_unique0
)) {
1431 } else if (FormatTok
->is(tok::l_paren
) &&
1432 Tokens
->peekNextToken()->is(tok::star
)) {
1440 // Tokens that only make sense at the beginning of a line.
1441 switch (FormatTok
->Tok
.getKind()) {
1444 if (FormatTok
->is(tok::l_brace
)) {
1445 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1447 while (FormatTok
&& !eof()) {
1448 if (FormatTok
->is(tok::r_brace
)) {
1449 FormatTok
->setFinalizedType(TT_InlineASMBrace
);
1454 FormatTok
->Finalized
= true;
1459 case tok::kw_namespace
:
1462 case tok::kw_public
:
1463 case tok::kw_protected
:
1464 case tok::kw_private
:
1465 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
1469 parseAccessSpecifier();
1473 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1474 // field/method declaration.
1477 FormatToken
*Tok
= parseIfThenElse(IfKind
);
1484 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1485 // field/method declaration.
1488 parseForOrWhileLoop();
1491 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1492 // field/method declaration.
1499 case tok::kw_switch
:
1500 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1501 // 'switch: string' field declaration.
1506 case tok::kw_default
:
1507 // In Verilog default along with other labels are handled in the next loop.
1508 if (Style
.isVerilog())
1510 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1511 // 'default: string' field declaration.
1515 if (FormatTok
->is(tok::colon
)) {
1516 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
1520 // e.g. "default void f() {}" in a Java interface.
1523 // Proto: there are no switch/case statements.
1524 if (Style
.isProto()) {
1528 if (Style
.isVerilog()) {
1533 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1534 // 'case: string' field declaration.
1542 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1543 // field/method declaration.
1548 case tok::kw_extern
:
1550 if (Style
.isVerilog()) {
1551 // In Verilog and extern module declaration looks like a start of module.
1552 // But there is no body and endmodule. So we handle it separately.
1553 if (Keywords
.isVerilogHierarchy(*FormatTok
)) {
1554 parseVerilogHierarchyHeader();
1557 } else if (FormatTok
->is(tok::string_literal
)) {
1559 if (FormatTok
->is(tok::l_brace
)) {
1560 if (Style
.BraceWrapping
.AfterExternBlock
)
1562 // Either we indent or for backwards compatibility we follow the
1563 // AfterExternBlock style.
1564 unsigned AddLevels
=
1565 (Style
.IndentExternBlock
== FormatStyle::IEBS_Indent
) ||
1566 (Style
.BraceWrapping
.AfterExternBlock
&&
1567 Style
.IndentExternBlock
==
1568 FormatStyle::IEBS_AfterExternBlock
)
1571 parseBlock(/*MustBeDeclaration=*/true, AddLevels
);
1577 case tok::kw_export
:
1578 if (Style
.isJavaScript()) {
1579 parseJavaScriptEs6ImportExport();
1582 if (Style
.isCpp()) {
1584 if (FormatTok
->is(tok::kw_namespace
)) {
1588 if (FormatTok
->is(Keywords
.kw_import
) && parseModuleImport())
1592 case tok::kw_inline
:
1594 if (FormatTok
->is(tok::kw_namespace
)) {
1599 case tok::identifier
:
1600 if (FormatTok
->is(TT_ForEachMacro
)) {
1601 parseForOrWhileLoop();
1604 if (FormatTok
->is(TT_MacroBlockBegin
)) {
1605 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1606 /*MunchSemi=*/false);
1609 if (FormatTok
->is(Keywords
.kw_import
)) {
1610 if (Style
.isJavaScript()) {
1611 parseJavaScriptEs6ImportExport();
1614 if (Style
.Language
== FormatStyle::LK_Proto
) {
1616 if (FormatTok
->is(tok::kw_public
))
1618 if (FormatTok
->isNot(tok::string_literal
))
1621 if (FormatTok
->is(tok::semi
))
1626 if (Style
.isCpp() && parseModuleImport())
1629 if (Style
.isCpp() &&
1630 FormatTok
->isOneOf(Keywords
.kw_signals
, Keywords
.kw_qsignals
,
1631 Keywords
.kw_slots
, Keywords
.kw_qslots
)) {
1633 if (FormatTok
->is(tok::colon
)) {
1639 if (Style
.isCpp() && FormatTok
->is(TT_StatementMacro
)) {
1640 parseStatementMacro();
1643 if (Style
.isCpp() && FormatTok
->is(TT_NamespaceMacro
)) {
1647 // In Verilog labels can be any expression, so we don't do them here.
1648 if (!Style
.isVerilog() && Tokens
->peekNextToken()->is(tok::colon
) &&
1649 !Line
->MustBeDeclaration
) {
1651 Line
->Tokens
.begin()->Tok
->MustBreakBefore
= true;
1652 FormatTok
->setFinalizedType(TT_GotoLabelColon
);
1653 parseLabel(!Style
.IndentGotoLabels
);
1658 // In all other cases, parse the declaration.
1664 const bool InRequiresExpression
=
1665 OpeningBrace
&& OpeningBrace
->is(TT_RequiresExpressionLBrace
);
1667 const FormatToken
*Previous
= FormatTok
->Previous
;
1668 switch (FormatTok
->Tok
.getKind()) {
1671 if (FormatTok
->is(tok::l_brace
)) {
1675 } else if (Style
.Language
== FormatStyle::LK_Java
&&
1676 FormatTok
->is(Keywords
.kw_interface
)) {
1680 switch (FormatTok
->Tok
.getObjCKeywordID()) {
1681 case tok::objc_public
:
1682 case tok::objc_protected
:
1683 case tok::objc_package
:
1684 case tok::objc_private
:
1685 return parseAccessSpecifier();
1686 case tok::objc_interface
:
1687 case tok::objc_implementation
:
1688 return parseObjCInterfaceOrImplementation();
1689 case tok::objc_protocol
:
1690 if (parseObjCProtocol())
1694 return; // Handled by the caller.
1695 case tok::objc_optional
:
1696 case tok::objc_required
:
1700 case tok::objc_autoreleasepool
:
1702 if (FormatTok
->is(tok::l_brace
)) {
1703 if (Style
.BraceWrapping
.AfterControlStatement
==
1704 FormatStyle::BWACS_Always
) {
1711 case tok::objc_synchronized
:
1713 if (FormatTok
->is(tok::l_paren
)) {
1714 // Skip synchronization object
1717 if (FormatTok
->is(tok::l_brace
)) {
1718 if (Style
.BraceWrapping
.AfterControlStatement
==
1719 FormatStyle::BWACS_Always
) {
1727 // This branch isn't strictly necessary (the kw_try case below would
1728 // do this too after the tok::at is parsed above). But be explicit.
1735 case tok::kw_requires
: {
1736 if (Style
.isCpp()) {
1737 bool ParsedClause
= parseRequires();
1746 // Ignore if this is part of "template <enum ...".
1747 if (Previous
&& Previous
->is(tok::less
)) {
1752 // parseEnum falls through and does not yet add an unwrapped line as an
1753 // enum definition can start a structural element.
1756 // This only applies to C++ and Verilog.
1757 if (!Style
.isCpp() && !Style
.isVerilog()) {
1762 case tok::kw_typedef
:
1764 if (FormatTok
->isOneOf(Keywords
.kw_NS_ENUM
, Keywords
.kw_NS_OPTIONS
,
1765 Keywords
.kw_CF_ENUM
, Keywords
.kw_CF_OPTIONS
,
1766 Keywords
.kw_CF_CLOSED_ENUM
,
1767 Keywords
.kw_NS_CLOSED_ENUM
)) {
1772 if (Style
.isVerilog()) {
1778 case tok::kw_struct
:
1780 if (parseStructLike())
1783 case tok::kw_decltype
:
1785 if (FormatTok
->is(tok::l_paren
)) {
1787 assert(FormatTok
->Previous
);
1788 if (FormatTok
->Previous
->endsSequence(tok::r_paren
, tok::kw_auto
,
1790 Line
->SeenDecltypeAuto
= true;
1796 // In Java, classes have an implicit static member "class".
1797 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
&&
1798 FormatTok
->is(tok::kw_class
)) {
1801 if (Style
.isJavaScript() && FormatTok
&&
1802 FormatTok
->Tok
.getIdentifierInfo()) {
1803 // JavaScript only has pseudo keywords, all keywords are allowed to
1804 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1815 case tok::l_paren
: {
1817 // Break the unwrapped line if a K&R C function definition has a parameter
1819 if (OpeningBrace
|| !Style
.isCpp() || !Previous
|| eof())
1821 if (isC78ParameterDecl(FormatTok
,
1822 Tokens
->peekNextToken(/*SkipComment=*/true),
1829 case tok::kw_operator
:
1831 if (FormatTok
->isBinaryOperator())
1836 // Block return type.
1837 if (FormatTok
->Tok
.isAnyIdentifier() ||
1838 FormatTok
->isSimpleTypeSpecifier()) {
1840 // Return types: pointers are ok too.
1841 while (FormatTok
->is(tok::star
))
1844 // Block argument list.
1845 if (FormatTok
->is(tok::l_paren
))
1848 if (FormatTok
->is(tok::l_brace
))
1852 if (InRequiresExpression
)
1853 FormatTok
->setFinalizedType(TT_BracedListLBrace
);
1854 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1855 IsDecltypeAutoFunction
= Line
->SeenDecltypeAuto
;
1856 // A block outside of parentheses must be the last part of a
1857 // structural element.
1858 // FIXME: Figure out cases where this is not true, and add projections
1859 // for them (the one we know is missing are lambdas).
1860 if (Style
.Language
== FormatStyle::LK_Java
&&
1861 Line
->Tokens
.front().Tok
->is(Keywords
.kw_synchronized
)) {
1862 // If necessary, we could set the type to something different than
1863 // TT_FunctionLBrace.
1864 if (Style
.BraceWrapping
.AfterControlStatement
==
1865 FormatStyle::BWACS_Always
) {
1868 } else if (Style
.BraceWrapping
.AfterFunction
) {
1871 FormatTok
->setFinalizedType(TT_FunctionLBrace
);
1873 IsDecltypeAutoFunction
= false;
1877 // Otherwise this was a braced init list, and the structural
1878 // element continues.
1881 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
1882 // field/method declaration.
1886 // We arrive here when parsing function-try blocks.
1887 if (Style
.BraceWrapping
.AfterFunction
)
1891 case tok::identifier
: {
1892 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
) &&
1893 Line
->MustBeDeclaration
) {
1895 parseCSharpGenericTypeConstraint();
1898 if (FormatTok
->is(TT_MacroBlockEnd
)) {
1903 // Function declarations (as opposed to function expressions) are parsed
1904 // on their own unwrapped line by continuing this loop. Function
1905 // expressions (functions that are not on their own line) must not create
1906 // a new unwrapped line, so they are special cased below.
1907 size_t TokenCount
= Line
->Tokens
.size();
1908 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_function
) &&
1911 Line
->Tokens
.front().Tok
->isNot(Keywords
.kw_async
)))) {
1912 tryToParseJSFunction();
1915 if ((Style
.isJavaScript() || Style
.Language
== FormatStyle::LK_Java
) &&
1916 FormatTok
->is(Keywords
.kw_interface
)) {
1917 if (Style
.isJavaScript()) {
1918 // In JavaScript/TypeScript, "interface" can be used as a standalone
1919 // identifier, e.g. in `var interface = 1;`. If "interface" is
1920 // followed by another identifier, it is very like to be an actual
1921 // interface declaration.
1922 unsigned StoredPosition
= Tokens
->getPosition();
1923 FormatToken
*Next
= Tokens
->getNextToken();
1924 FormatTok
= Tokens
->setPosition(StoredPosition
);
1925 if (!mustBeJSIdent(Keywords
, Next
)) {
1935 if (Style
.isVerilog()) {
1936 if (FormatTok
->is(Keywords
.kw_table
)) {
1937 parseVerilogTable();
1940 if (Keywords
.isVerilogBegin(*FormatTok
) ||
1941 Keywords
.isVerilogHierarchy(*FormatTok
)) {
1948 if (!Style
.isCpp() && FormatTok
->is(Keywords
.kw_interface
)) {
1949 if (parseStructLike())
1954 if (Style
.isCpp() && FormatTok
->is(TT_StatementMacro
)) {
1955 parseStatementMacro();
1959 // See if the following token should start a new unwrapped line.
1960 StringRef Text
= FormatTok
->TokenText
;
1962 FormatToken
*PreviousToken
= FormatTok
;
1965 // JS doesn't have macros, and within classes colons indicate fields, not
1967 if (Style
.isJavaScript())
1970 auto OneTokenSoFar
= [&]() {
1971 auto I
= Line
->Tokens
.begin(), E
= Line
->Tokens
.end();
1972 while (I
!= E
&& I
->Tok
->is(tok::comment
))
1974 if (Style
.isVerilog())
1975 while (I
!= E
&& I
->Tok
->is(tok::hash
))
1977 return I
!= E
&& (++I
== E
);
1979 if (OneTokenSoFar()) {
1980 // Recognize function-like macro usages without trailing semicolon as
1981 // well as free-standing macros like Q_OBJECT.
1982 bool FunctionLike
= FormatTok
->is(tok::l_paren
);
1986 bool FollowedByNewline
=
1987 CommentsBeforeNextToken
.empty()
1988 ? FormatTok
->NewlinesBefore
> 0
1989 : CommentsBeforeNextToken
.front()->NewlinesBefore
> 0;
1991 if (FollowedByNewline
&& (Text
.size() >= 5 || FunctionLike
) &&
1992 tokenCanStartNewLine(*FormatTok
) && Text
== Text
.upper()) {
1993 if (PreviousToken
->isNot(TT_UntouchableMacroFunc
))
1994 PreviousToken
->setFinalizedType(TT_FunctionLikeOrFreestandingMacro
);
2002 if ((Style
.isJavaScript() || Style
.isCSharp()) &&
2003 FormatTok
->is(TT_FatArrow
)) {
2004 tryToParseChildBlock();
2009 if (FormatTok
->is(tok::l_brace
)) {
2010 // Block kind should probably be set to BK_BracedInit for any language.
2011 // C# needs this change to ensure that array initialisers and object
2012 // initialisers are indented the same way.
2013 if (Style
.isCSharp())
2014 FormatTok
->setBlockKind(BK_BracedInit
);
2017 } else if (Style
.Language
== FormatStyle::LK_Proto
&&
2018 FormatTok
->is(tok::less
)) {
2020 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2021 /*ClosingBraceKind=*/tok::greater
);
2031 // Proto: there are no switch/case statements.
2032 if (Style
.isProto()) {
2036 // In Verilog switch is called case.
2037 if (Style
.isVerilog()) {
2042 if (Style
.isJavaScript() && Line
->MustBeDeclaration
) {
2043 // 'case: string' field declaration.
2049 case tok::kw_default
:
2051 if (Style
.isVerilog()) {
2052 if (FormatTok
->is(tok::colon
)) {
2053 // The label will be handled in the next iteration.
2056 if (FormatTok
->is(Keywords
.kw_clocking
)) {
2057 // A default clocking block.
2062 parseVerilogCaseLabel();
2068 if (Style
.isVerilog()) {
2069 parseVerilogCaseLabel();
2080 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2081 assert(FormatTok
->is(tok::l_brace
));
2082 if (!Style
.isCSharp())
2084 // See if it's a property accessor.
2085 if (FormatTok
->Previous
->isNot(tok::identifier
))
2088 // See if we are inside a property accessor.
2090 // Record the current tokenPosition so that we can advance and
2091 // reset the current token. `Next` is not set yet so we need
2092 // another way to advance along the token stream.
2093 unsigned int StoredPosition
= Tokens
->getPosition();
2094 FormatToken
*Tok
= Tokens
->getNextToken();
2096 // A trivial property accessor is of the form:
2097 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2098 // Track these as they do not require line breaks to be introduced.
2099 bool HasSpecialAccessor
= false;
2100 bool IsTrivialPropertyAccessor
= true;
2102 if (Tok
->isOneOf(tok::semi
, tok::kw_public
, tok::kw_private
,
2103 tok::kw_protected
, Keywords
.kw_internal
, Keywords
.kw_get
,
2104 Keywords
.kw_init
, Keywords
.kw_set
)) {
2105 if (Tok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
, Keywords
.kw_set
))
2106 HasSpecialAccessor
= true;
2107 Tok
= Tokens
->getNextToken();
2110 if (Tok
->isNot(tok::r_brace
))
2111 IsTrivialPropertyAccessor
= false;
2115 if (!HasSpecialAccessor
) {
2116 Tokens
->setPosition(StoredPosition
);
2120 // Try to parse the property accessor:
2121 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2122 Tokens
->setPosition(StoredPosition
);
2123 if (!IsTrivialPropertyAccessor
&& Style
.BraceWrapping
.AfterFunction
)
2127 switch (FormatTok
->Tok
.getKind()) {
2130 if (FormatTok
->is(tok::equal
)) {
2131 while (!eof() && FormatTok
->isNot(tok::semi
))
2139 parseBlock(/*MustBeDeclaration=*/true);
2144 if (FormatTok
->is(TT_FatArrow
)) {
2148 } while (!eof() && FormatTok
->isNot(tok::semi
));
2157 if (FormatTok
->isOneOf(Keywords
.kw_get
, Keywords
.kw_init
,
2159 !IsTrivialPropertyAccessor
) {
2160 // Non-trivial get/set needs to be on its own line.
2167 // Unreachable for well-formed code (paired '{' and '}').
2171 bool UnwrappedLineParser::tryToParseLambda() {
2172 assert(FormatTok
->is(tok::l_square
));
2173 if (!Style
.isCpp()) {
2177 FormatToken
&LSquare
= *FormatTok
;
2178 if (!tryToParseLambdaIntroducer())
2181 bool SeenArrow
= false;
2182 bool InTemplateParameterList
= false;
2184 while (FormatTok
->isNot(tok::l_brace
)) {
2185 if (FormatTok
->isSimpleTypeSpecifier()) {
2189 switch (FormatTok
->Tok
.getKind()) {
2193 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference
);
2199 assert(FormatTok
->Previous
);
2200 if (FormatTok
->Previous
->is(tok::r_square
))
2201 InTemplateParameterList
= true;
2206 case tok::kw_template
:
2207 case tok::kw_typename
:
2211 case tok::kw_constexpr
:
2212 case tok::kw_consteval
:
2215 case tok::identifier
:
2216 case tok::numeric_constant
:
2217 case tok::coloncolon
:
2218 case tok::kw_mutable
:
2219 case tok::kw_noexcept
:
2220 case tok::kw_static
:
2223 // Specialization of a template with an integer parameter can contain
2224 // arithmetic, logical, comparison and ternary operators.
2226 // FIXME: This also accepts sequences of operators that are not in the scope
2227 // of a template argument list.
2229 // In a C++ lambda a template type can only occur after an arrow. We use
2230 // this as an heuristic to distinguish between Objective-C expressions
2231 // followed by an `a->b` expression, such as:
2232 // ([obj func:arg] + a->b)
2233 // Otherwise the code below would parse as a lambda.
2245 case tok::equalequal
:
2246 case tok::exclaimequal
:
2247 case tok::greaterequal
:
2248 case tok::lessequal
:
2254 if (SeenArrow
|| InTemplateParameterList
) {
2260 // This might or might not actually be a lambda arrow (this could be an
2261 // ObjC method invocation followed by a dereferencing arrow). We might
2262 // reset this back to TT_Unknown in TokenAnnotator.
2263 FormatTok
->setFinalizedType(TT_TrailingReturnArrow
);
2267 case tok::kw_requires
: {
2268 auto *RequiresToken
= FormatTok
;
2270 parseRequiresClause(RequiresToken
);
2274 if (!InTemplateParameterList
)
2283 FormatTok
->setFinalizedType(TT_LambdaLBrace
);
2284 LSquare
.setFinalizedType(TT_LambdaLSquare
);
2286 NestedLambdas
.push_back(Line
->SeenDecltypeAuto
);
2288 assert(!NestedLambdas
.empty());
2289 NestedLambdas
.pop_back();
2294 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2295 const FormatToken
*Previous
= FormatTok
->Previous
;
2296 const FormatToken
*LeftSquare
= FormatTok
;
2298 if ((Previous
&& ((Previous
->Tok
.getIdentifierInfo() &&
2299 !Previous
->isOneOf(tok::kw_return
, tok::kw_co_await
,
2300 tok::kw_co_yield
, tok::kw_co_return
)) ||
2301 Previous
->closesScope())) ||
2302 LeftSquare
->isCppStructuredBinding(Style
)) {
2305 if (FormatTok
->is(tok::l_square
))
2307 if (FormatTok
->is(tok::r_square
)) {
2308 const FormatToken
*Next
= Tokens
->peekNextToken(/*SkipComment=*/true);
2309 if (Next
->is(tok::greater
))
2312 parseSquare(/*LambdaIntroducer=*/true);
2316 void UnwrappedLineParser::tryToParseJSFunction() {
2317 assert(FormatTok
->is(Keywords
.kw_function
));
2318 if (FormatTok
->is(Keywords
.kw_async
))
2320 // Consume "function".
2323 // Consume * (generator function). Treat it like C++'s overloaded operators.
2324 if (FormatTok
->is(tok::star
)) {
2325 FormatTok
->setFinalizedType(TT_OverloadedOperator
);
2329 // Consume function name.
2330 if (FormatTok
->is(tok::identifier
))
2333 if (FormatTok
->isNot(tok::l_paren
))
2336 // Parse formal parameter list.
2339 if (FormatTok
->is(tok::colon
)) {
2340 // Parse a type definition.
2343 // Eat the type declaration. For braced inline object types, balance braces,
2344 // otherwise just parse until finding an l_brace for the function body.
2345 if (FormatTok
->is(tok::l_brace
))
2346 tryToParseBracedList();
2348 while (!FormatTok
->isOneOf(tok::l_brace
, tok::semi
) && !eof())
2352 if (FormatTok
->is(tok::semi
))
2358 bool UnwrappedLineParser::tryToParseBracedList() {
2359 if (FormatTok
->is(BK_Unknown
))
2360 calculateBraceTypes();
2361 assert(FormatTok
->isNot(BK_Unknown
));
2362 if (FormatTok
->is(BK_Block
))
2369 bool UnwrappedLineParser::tryToParseChildBlock() {
2370 assert(Style
.isJavaScript() || Style
.isCSharp());
2371 assert(FormatTok
->is(TT_FatArrow
));
2372 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2373 // They always start an expression or a child block if followed by a curly
2376 if (FormatTok
->isNot(tok::l_brace
))
2382 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons
,
2384 tok::TokenKind ClosingBraceKind
) {
2385 bool HasError
= false;
2387 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2388 // replace this by using parseAssignmentExpression() inside.
2390 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
) &&
2391 tryToParseChildBlock()) {
2394 if (Style
.isJavaScript()) {
2395 if (FormatTok
->is(Keywords
.kw_function
)) {
2396 tryToParseJSFunction();
2399 if (FormatTok
->is(tok::l_brace
)) {
2400 // Could be a method inside of a braced list `{a() { return 1; }}`.
2401 if (tryToParseBracedList())
2406 if (FormatTok
->Tok
.getKind() == ClosingBraceKind
) {
2407 if (IsEnum
&& !Style
.AllowShortEnumsOnASingleLine
)
2412 switch (FormatTok
->Tok
.getKind()) {
2414 if (Style
.isCSharp())
2421 // JavaScript can just have free standing methods and getters/setters in
2422 // object literals. Detect them by a "{" following ")".
2423 if (Style
.isJavaScript()) {
2424 if (FormatTok
->is(tok::l_brace
))
2430 // Assume there are no blocks inside a braced init list apart
2431 // from the ones we explicitly parse out (like lambdas).
2432 FormatTok
->setBlockKind(BK_BracedInit
);
2437 if (Style
.Language
== FormatStyle::LK_Proto
||
2438 ClosingBraceKind
== tok::greater
) {
2440 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2441 /*ClosingBraceKind=*/tok::greater
);
2447 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2448 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2449 // used for error recovery if we have otherwise determined that this is
2451 if (Style
.isJavaScript()) {
2456 if (!ContinueOnSemicolons
)
2462 if (IsEnum
&& !Style
.AllowShortEnumsOnASingleLine
)
2473 /// \brief Parses a pair of parentheses (and everything between them).
2474 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2475 /// double ampersands. This applies for all nested scopes as well.
2477 /// Returns whether there is a `=` token between the parentheses.
2478 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType
) {
2479 assert(FormatTok
->is(tok::l_paren
) && "'(' expected.");
2480 auto *LeftParen
= FormatTok
;
2481 bool SeenEqual
= false;
2482 const bool MightBeStmtExpr
= Tokens
->peekNextToken()->is(tok::l_brace
);
2485 switch (FormatTok
->Tok
.getKind()) {
2487 if (parseParens(AmpAmpTokenType
))
2489 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_brace
))
2493 if (!MightBeStmtExpr
&&
2494 Style
.RemoveParentheses
> FormatStyle::RPS_Leave
) {
2495 const auto *Prev
= LeftParen
->Previous
;
2496 const auto *Next
= Tokens
->peekNextToken();
2497 const bool DoubleParens
=
2498 Prev
&& Prev
->is(tok::l_paren
) && Next
&& Next
->is(tok::r_paren
);
2499 const auto *PrevPrev
= Prev
? Prev
->getPreviousNonComment() : nullptr;
2500 const bool Blacklisted
=
2502 (PrevPrev
->isOneOf(tok::kw___attribute
, tok::kw_decltype
) ||
2504 (PrevPrev
->isOneOf(tok::kw_if
, tok::kw_while
) ||
2505 PrevPrev
->endsSequence(tok::kw_constexpr
, tok::kw_if
))));
2506 const bool ReturnParens
=
2507 Style
.RemoveParentheses
== FormatStyle::RPS_ReturnStatement
&&
2508 ((NestedLambdas
.empty() && !IsDecltypeAutoFunction
) ||
2509 (!NestedLambdas
.empty() && !NestedLambdas
.back())) &&
2510 Prev
&& Prev
->isOneOf(tok::kw_return
, tok::kw_co_return
) && Next
&&
2511 Next
->is(tok::semi
);
2512 if ((DoubleParens
&& !Blacklisted
) || ReturnParens
) {
2513 LeftParen
->Optional
= true;
2514 FormatTok
->Optional
= true;
2520 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2526 if (!tryToParseBracedList())
2531 if (FormatTok
->is(tok::l_brace
)) {
2538 if (Style
.isCSharp() && FormatTok
->is(TT_FatArrow
))
2539 tryToParseChildBlock();
2544 if (Style
.isJavaScript())
2545 parseRecord(/*ParseAsExpr=*/true);
2549 case tok::identifier
:
2550 if (Style
.isJavaScript() && (FormatTok
->is(Keywords
.kw_function
)))
2551 tryToParseJSFunction();
2555 case tok::kw_requires
: {
2556 auto RequiresToken
= FormatTok
;
2558 parseRequiresExpression(RequiresToken
);
2562 if (AmpAmpTokenType
!= TT_Unknown
)
2563 FormatTok
->setFinalizedType(AmpAmpTokenType
);
2573 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer
) {
2574 if (!LambdaIntroducer
) {
2575 assert(FormatTok
->is(tok::l_square
) && "'[' expected.");
2576 if (tryToParseLambda())
2580 switch (FormatTok
->Tok
.getKind()) {
2588 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2593 case tok::l_brace
: {
2594 if (!tryToParseBracedList())
2600 if (FormatTok
->is(tok::l_brace
)) {
2612 void UnwrappedLineParser::keepAncestorBraces() {
2613 if (!Style
.RemoveBracesLLVM
)
2616 const int MaxNestingLevels
= 2;
2617 const int Size
= NestedTooDeep
.size();
2618 if (Size
>= MaxNestingLevels
)
2619 NestedTooDeep
[Size
- MaxNestingLevels
] = true;
2620 NestedTooDeep
.push_back(false);
2623 static FormatToken
*getLastNonComment(const UnwrappedLine
&Line
) {
2624 for (const auto &Token
: llvm::reverse(Line
.Tokens
))
2625 if (Token
.Tok
->isNot(tok::comment
))
2631 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF
) {
2632 FormatToken
*Tok
= nullptr;
2634 if (Style
.InsertBraces
&& !Line
->InPPDirective
&& !Line
->Tokens
.empty() &&
2635 PreprocessorDirectives
.empty() && FormatTok
->isNot(tok::semi
)) {
2636 Tok
= Style
.BraceWrapping
.AfterControlStatement
== FormatStyle::BWACS_Never
2637 ? getLastNonComment(*Line
)
2638 : Line
->Tokens
.back().Tok
;
2640 if (Tok
->BraceCount
< 0) {
2641 assert(Tok
->BraceCount
== -1);
2644 Tok
->BraceCount
= -1;
2650 parseStructuralElement();
2653 assert(!Line
->InPPDirective
);
2655 for (const auto &L
: llvm::reverse(*CurrentLines
)) {
2656 if (!L
.InPPDirective
&& getLastNonComment(L
)) {
2657 Tok
= L
.Tokens
.back().Tok
;
2665 if (CheckEOF
&& eof())
2671 static void markOptionalBraces(FormatToken
*LeftBrace
) {
2675 assert(LeftBrace
->is(tok::l_brace
));
2677 FormatToken
*RightBrace
= LeftBrace
->MatchingParen
;
2679 assert(!LeftBrace
->Optional
);
2683 assert(RightBrace
->is(tok::r_brace
));
2684 assert(RightBrace
->MatchingParen
== LeftBrace
);
2685 assert(LeftBrace
->Optional
== RightBrace
->Optional
);
2687 LeftBrace
->Optional
= true;
2688 RightBrace
->Optional
= true;
2691 void UnwrappedLineParser::handleAttributes() {
2692 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2693 if (FormatTok
->isAttribute())
2695 else if (FormatTok
->is(tok::l_square
))
2696 handleCppAttributes();
2699 bool UnwrappedLineParser::handleCppAttributes() {
2700 // Handle [[likely]] / [[unlikely]] attributes.
2701 assert(FormatTok
->is(tok::l_square
));
2702 if (!tryToParseSimpleAttribute())
2708 /// Returns whether \c Tok begins a block.
2709 bool UnwrappedLineParser::isBlockBegin(const FormatToken
&Tok
) const {
2710 // FIXME: rename the function or make
2711 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2712 return Style
.isVerilog() ? Keywords
.isVerilogBegin(Tok
)
2713 : Tok
.is(tok::l_brace
);
2716 FormatToken
*UnwrappedLineParser::parseIfThenElse(IfStmtKind
*IfKind
,
2718 bool IsVerilogAssert
) {
2719 assert((FormatTok
->is(tok::kw_if
) ||
2720 (Style
.isVerilog() &&
2721 FormatTok
->isOneOf(tok::kw_restrict
, Keywords
.kw_assert
,
2722 Keywords
.kw_assume
, Keywords
.kw_cover
))) &&
2726 if (IsVerilogAssert
) {
2727 // Handle `assert #0` and `assert final`.
2728 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
2730 if (FormatTok
->is(tok::numeric_constant
))
2732 } else if (FormatTok
->isOneOf(Keywords
.kw_final
, Keywords
.kw_property
,
2733 Keywords
.kw_sequence
)) {
2738 // Handle `if !consteval`.
2739 if (FormatTok
->is(tok::exclaim
))
2742 bool KeepIfBraces
= true;
2743 if (FormatTok
->is(tok::kw_consteval
)) {
2746 KeepIfBraces
= !Style
.RemoveBracesLLVM
|| KeepBraces
;
2747 if (FormatTok
->isOneOf(tok::kw_constexpr
, tok::identifier
))
2749 if (FormatTok
->is(tok::l_paren
)) {
2750 FormatTok
->setFinalizedType(TT_ConditionLParen
);
2755 // The then action is optional in Verilog assert statements.
2756 if (IsVerilogAssert
&& FormatTok
->is(tok::semi
)) {
2762 bool NeedsUnwrappedLine
= false;
2763 keepAncestorBraces();
2765 FormatToken
*IfLeftBrace
= nullptr;
2766 IfStmtKind IfBlockKind
= IfStmtKind::NotIf
;
2768 if (isBlockBegin(*FormatTok
)) {
2769 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
2770 IfLeftBrace
= FormatTok
;
2771 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2772 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2773 /*MunchSemi=*/true, KeepIfBraces
, &IfBlockKind
);
2774 setPreviousRBraceType(TT_ControlStatementRBrace
);
2775 if (Style
.BraceWrapping
.BeforeElse
)
2778 NeedsUnwrappedLine
= true;
2779 } else if (IsVerilogAssert
&& FormatTok
->is(tok::kw_else
)) {
2782 parseUnbracedBody();
2785 if (Style
.RemoveBracesLLVM
) {
2786 assert(!NestedTooDeep
.empty());
2787 KeepIfBraces
= KeepIfBraces
||
2788 (IfLeftBrace
&& !IfLeftBrace
->MatchingParen
) ||
2789 NestedTooDeep
.back() || IfBlockKind
== IfStmtKind::IfOnly
||
2790 IfBlockKind
== IfStmtKind::IfElseIf
;
2793 bool KeepElseBraces
= KeepIfBraces
;
2794 FormatToken
*ElseLeftBrace
= nullptr;
2795 IfStmtKind Kind
= IfStmtKind::IfOnly
;
2797 if (FormatTok
->is(tok::kw_else
)) {
2798 if (Style
.RemoveBracesLLVM
) {
2799 NestedTooDeep
.back() = false;
2800 Kind
= IfStmtKind::IfElse
;
2804 if (isBlockBegin(*FormatTok
)) {
2805 const bool FollowedByIf
= Tokens
->peekNextToken()->is(tok::kw_if
);
2806 FormatTok
->setFinalizedType(TT_ElseLBrace
);
2807 ElseLeftBrace
= FormatTok
;
2808 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2809 IfStmtKind ElseBlockKind
= IfStmtKind::NotIf
;
2810 FormatToken
*IfLBrace
=
2811 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2812 /*MunchSemi=*/true, KeepElseBraces
, &ElseBlockKind
);
2813 setPreviousRBraceType(TT_ElseRBrace
);
2814 if (FormatTok
->is(tok::kw_else
)) {
2815 KeepElseBraces
= KeepElseBraces
||
2816 ElseBlockKind
== IfStmtKind::IfOnly
||
2817 ElseBlockKind
== IfStmtKind::IfElseIf
;
2818 } else if (FollowedByIf
&& IfLBrace
&& !IfLBrace
->Optional
) {
2819 KeepElseBraces
= true;
2820 assert(ElseLeftBrace
->MatchingParen
);
2821 markOptionalBraces(ElseLeftBrace
);
2824 } else if (!IsVerilogAssert
&& FormatTok
->is(tok::kw_if
)) {
2825 const FormatToken
*Previous
= Tokens
->getPreviousToken();
2827 const bool IsPrecededByComment
= Previous
->is(tok::comment
);
2828 if (IsPrecededByComment
) {
2832 bool TooDeep
= true;
2833 if (Style
.RemoveBracesLLVM
) {
2834 Kind
= IfStmtKind::IfElseIf
;
2835 TooDeep
= NestedTooDeep
.pop_back_val();
2837 ElseLeftBrace
= parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces
);
2838 if (Style
.RemoveBracesLLVM
)
2839 NestedTooDeep
.push_back(TooDeep
);
2840 if (IsPrecededByComment
)
2843 parseUnbracedBody(/*CheckEOF=*/true);
2846 KeepIfBraces
= KeepIfBraces
|| IfBlockKind
== IfStmtKind::IfElse
;
2847 if (NeedsUnwrappedLine
)
2851 if (!Style
.RemoveBracesLLVM
)
2854 assert(!NestedTooDeep
.empty());
2855 KeepElseBraces
= KeepElseBraces
||
2856 (ElseLeftBrace
&& !ElseLeftBrace
->MatchingParen
) ||
2857 NestedTooDeep
.back();
2859 NestedTooDeep
.pop_back();
2861 if (!KeepIfBraces
&& !KeepElseBraces
) {
2862 markOptionalBraces(IfLeftBrace
);
2863 markOptionalBraces(ElseLeftBrace
);
2864 } else if (IfLeftBrace
) {
2865 FormatToken
*IfRightBrace
= IfLeftBrace
->MatchingParen
;
2867 assert(IfRightBrace
->MatchingParen
== IfLeftBrace
);
2868 assert(!IfLeftBrace
->Optional
);
2869 assert(!IfRightBrace
->Optional
);
2870 IfLeftBrace
->MatchingParen
= nullptr;
2871 IfRightBrace
->MatchingParen
= nullptr;
2881 void UnwrappedLineParser::parseTryCatch() {
2882 assert(FormatTok
->isOneOf(tok::kw_try
, tok::kw___try
) && "'try' expected");
2884 bool NeedsUnwrappedLine
= false;
2885 if (FormatTok
->is(tok::colon
)) {
2886 // We are in a function try block, what comes is an initializer list.
2889 // In case identifiers were removed by clang-tidy, what might follow is
2890 // multiple commas in sequence - before the first identifier.
2891 while (FormatTok
->is(tok::comma
))
2894 while (FormatTok
->is(tok::identifier
)) {
2896 if (FormatTok
->is(tok::l_paren
))
2898 if (FormatTok
->Previous
&& FormatTok
->Previous
->is(tok::identifier
) &&
2899 FormatTok
->is(tok::l_brace
)) {
2902 } while (FormatTok
->isNot(tok::r_brace
));
2906 // In case identifiers were removed by clang-tidy, what might follow is
2907 // multiple commas in sequence - after the first identifier.
2908 while (FormatTok
->is(tok::comma
))
2912 // Parse try with resource.
2913 if (Style
.Language
== FormatStyle::LK_Java
&& FormatTok
->is(tok::l_paren
))
2916 keepAncestorBraces();
2918 if (FormatTok
->is(tok::l_brace
)) {
2919 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2921 if (Style
.BraceWrapping
.BeforeCatch
)
2924 NeedsUnwrappedLine
= true;
2925 } else if (FormatTok
->isNot(tok::kw_catch
)) {
2926 // The C++ standard requires a compound-statement after a try.
2927 // If there's none, we try to assume there's a structuralElement
2928 // and try to continue.
2931 parseStructuralElement();
2935 if (FormatTok
->is(tok::at
))
2937 if (!(FormatTok
->isOneOf(tok::kw_catch
, Keywords
.kw___except
,
2938 tok::kw___finally
) ||
2939 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
2940 FormatTok
->is(Keywords
.kw_finally
)) ||
2941 (FormatTok
->isObjCAtKeyword(tok::objc_catch
) ||
2942 FormatTok
->isObjCAtKeyword(tok::objc_finally
)))) {
2946 while (FormatTok
->isNot(tok::l_brace
)) {
2947 if (FormatTok
->is(tok::l_paren
)) {
2951 if (FormatTok
->isOneOf(tok::semi
, tok::r_brace
, tok::eof
)) {
2952 if (Style
.RemoveBracesLLVM
)
2953 NestedTooDeep
.pop_back();
2958 NeedsUnwrappedLine
= false;
2959 Line
->MustBeDeclaration
= false;
2960 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
2962 if (Style
.BraceWrapping
.BeforeCatch
)
2965 NeedsUnwrappedLine
= true;
2968 if (Style
.RemoveBracesLLVM
)
2969 NestedTooDeep
.pop_back();
2971 if (NeedsUnwrappedLine
)
2975 void UnwrappedLineParser::parseNamespace() {
2976 assert(FormatTok
->isOneOf(tok::kw_namespace
, TT_NamespaceMacro
) &&
2977 "'namespace' expected");
2979 const FormatToken
&InitialToken
= *FormatTok
;
2981 if (InitialToken
.is(TT_NamespaceMacro
)) {
2984 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::kw_inline
,
2985 tok::l_square
, tok::period
, tok::l_paren
) ||
2986 (Style
.isCSharp() && FormatTok
->is(tok::kw_union
))) {
2987 if (FormatTok
->is(tok::l_square
))
2989 else if (FormatTok
->is(tok::l_paren
))
2995 if (FormatTok
->is(tok::l_brace
)) {
2996 FormatTok
->setFinalizedType(TT_NamespaceLBrace
);
2998 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
3001 unsigned AddLevels
=
3002 Style
.NamespaceIndentation
== FormatStyle::NI_All
||
3003 (Style
.NamespaceIndentation
== FormatStyle::NI_Inner
&&
3004 DeclarationScopeStack
.size() > 1)
3007 bool ManageWhitesmithsBraces
=
3009 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
3011 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3013 if (ManageWhitesmithsBraces
)
3016 // Munch the semicolon after a namespace. This is more common than one would
3017 // think. Putting the semicolon into its own line is very ugly.
3018 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/true,
3019 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3020 ManageWhitesmithsBraces
);
3022 addUnwrappedLine(AddLevels
> 0 ? LineLevel::Remove
: LineLevel::Keep
);
3024 if (ManageWhitesmithsBraces
)
3027 // FIXME: Add error handling.
3030 void UnwrappedLineParser::parseNew() {
3031 assert(FormatTok
->is(tok::kw_new
) && "'new' expected");
3034 if (Style
.isCSharp()) {
3036 // Handle constructor invocation, e.g. `new(field: value)`.
3037 if (FormatTok
->is(tok::l_paren
))
3040 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3041 if (FormatTok
->is(tok::l_brace
))
3044 if (FormatTok
->isOneOf(tok::semi
, tok::comma
))
3051 if (Style
.Language
!= FormatStyle::LK_Java
)
3054 // In Java, we can parse everything up to the parens, which aren't optional.
3056 // There should not be a ;, { or } before the new's open paren.
3057 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
, tok::r_brace
))
3060 // Consume the parens.
3061 if (FormatTok
->is(tok::l_paren
)) {
3064 // If there is a class body of an anonymous class, consume that as child.
3065 if (FormatTok
->is(tok::l_brace
))
3073 void UnwrappedLineParser::parseLoopBody(bool KeepBraces
, bool WrapRightBrace
) {
3074 keepAncestorBraces();
3076 if (isBlockBegin(*FormatTok
)) {
3077 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
3078 FormatToken
*LeftBrace
= FormatTok
;
3079 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3080 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3081 /*MunchSemi=*/true, KeepBraces
);
3082 setPreviousRBraceType(TT_ControlStatementRBrace
);
3084 assert(!NestedTooDeep
.empty());
3085 if (!NestedTooDeep
.back())
3086 markOptionalBraces(LeftBrace
);
3091 parseUnbracedBody();
3095 NestedTooDeep
.pop_back();
3098 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens
) {
3099 assert((FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
, TT_ForEachMacro
) ||
3100 (Style
.isVerilog() &&
3101 FormatTok
->isOneOf(Keywords
.kw_always
, Keywords
.kw_always_comb
,
3102 Keywords
.kw_always_ff
, Keywords
.kw_always_latch
,
3103 Keywords
.kw_final
, Keywords
.kw_initial
,
3104 Keywords
.kw_foreach
, Keywords
.kw_forever
,
3105 Keywords
.kw_repeat
))) &&
3106 "'for', 'while' or foreach macro expected");
3107 const bool KeepBraces
= !Style
.RemoveBracesLLVM
||
3108 !FormatTok
->isOneOf(tok::kw_for
, tok::kw_while
);
3111 // JS' for await ( ...
3112 if (Style
.isJavaScript() && FormatTok
->is(Keywords
.kw_await
))
3114 if (Style
.isCpp() && FormatTok
->is(tok::kw_co_await
))
3116 if (HasParens
&& FormatTok
->is(tok::l_paren
)) {
3117 // The type is only set for Verilog basically because we were afraid to
3118 // change the existing behavior for loops. See the discussion on D121756 for
3120 if (Style
.isVerilog())
3121 FormatTok
->setFinalizedType(TT_ConditionLParen
);
3125 if (Style
.isVerilog()) {
3127 parseVerilogSensitivityList();
3128 } else if (Style
.AllowShortLoopsOnASingleLine
&& FormatTok
->is(tok::semi
) &&
3129 Tokens
->getPreviousToken()->is(tok::r_paren
)) {
3136 parseLoopBody(KeepBraces
, /*WrapRightBrace=*/true);
3139 void UnwrappedLineParser::parseDoWhile() {
3140 assert(FormatTok
->is(tok::kw_do
) && "'do' expected");
3143 parseLoopBody(/*KeepBraces=*/true, Style
.BraceWrapping
.BeforeWhile
);
3145 // FIXME: Add error handling.
3146 if (FormatTok
->isNot(tok::kw_while
)) {
3151 FormatTok
->setFinalizedType(TT_DoWhile
);
3153 // If in Whitesmiths mode, the line with the while() needs to be indented
3154 // to the same level as the block.
3155 if (Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
)
3159 parseStructuralElement();
3162 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel
) {
3164 unsigned OldLineLevel
= Line
->Level
;
3165 if (Line
->Level
> 1 || (!Line
->InPPDirective
&& Line
->Level
> 0))
3170 if (!Style
.IndentCaseBlocks
&& CommentsBeforeNextToken
.empty() &&
3171 FormatTok
->is(tok::l_brace
)) {
3173 CompoundStatementIndenter
Indenter(this, Line
->Level
,
3174 Style
.BraceWrapping
.AfterCaseLabel
,
3175 Style
.BraceWrapping
.IndentBraces
);
3177 if (FormatTok
->is(tok::kw_break
)) {
3178 if (Style
.BraceWrapping
.AfterControlStatement
==
3179 FormatStyle::BWACS_Always
) {
3181 if (!Style
.IndentCaseBlocks
&&
3182 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
) {
3186 parseStructuralElement();
3190 if (FormatTok
->is(tok::semi
))
3194 Line
->Level
= OldLineLevel
;
3195 if (FormatTok
->isNot(tok::l_brace
)) {
3196 parseStructuralElement();
3201 void UnwrappedLineParser::parseCaseLabel() {
3202 assert(FormatTok
->is(tok::kw_case
) && "'case' expected");
3204 // FIXME: fix handling of complex expressions here.
3207 if (FormatTok
->is(tok::colon
)) {
3208 FormatTok
->setFinalizedType(TT_CaseLabelColon
);
3215 void UnwrappedLineParser::parseSwitch() {
3216 assert(FormatTok
->is(tok::kw_switch
) && "'switch' expected");
3218 if (FormatTok
->is(tok::l_paren
))
3221 keepAncestorBraces();
3223 if (FormatTok
->is(tok::l_brace
)) {
3224 CompoundStatementIndenter
Indenter(this, Style
, Line
->Level
);
3225 FormatTok
->setFinalizedType(TT_ControlStatementLBrace
);
3227 setPreviousRBraceType(TT_ControlStatementRBrace
);
3232 parseStructuralElement();
3236 if (Style
.RemoveBracesLLVM
)
3237 NestedTooDeep
.pop_back();
3240 // Operators that can follow a C variable.
3241 static bool isCOperatorFollowingVar(tok::TokenKind kind
) {
3247 case tok::caretequal
:
3251 case tok::equalequal
:
3253 case tok::exclaimequal
:
3255 case tok::greaterequal
:
3256 case tok::greatergreater
:
3257 case tok::greatergreaterequal
:
3261 case tok::lessequal
:
3263 case tok::lesslessequal
:
3265 case tok::minusequal
:
3266 case tok::minusminus
:
3268 case tok::percentequal
:
3271 case tok::pipeequal
:
3274 case tok::plusequal
:
3282 case tok::slashequal
:
3284 case tok::starequal
:
3291 void UnwrappedLineParser::parseAccessSpecifier() {
3292 FormatToken
*AccessSpecifierCandidate
= FormatTok
;
3294 // Understand Qt's slots.
3295 if (FormatTok
->isOneOf(Keywords
.kw_slots
, Keywords
.kw_qslots
))
3297 // Otherwise, we don't know what it is, and we'd better keep the next token.
3298 if (FormatTok
->is(tok::colon
)) {
3301 } else if (FormatTok
->isNot(tok::coloncolon
) &&
3302 !isCOperatorFollowingVar(FormatTok
->Tok
.getKind())) {
3303 // Not a variable name nor namespace name.
3305 } else if (AccessSpecifierCandidate
) {
3306 // Consider the access specifier to be a C identifier.
3307 AccessSpecifierCandidate
->Tok
.setKind(tok::identifier
);
3311 /// \brief Parses a requires, decides if it is a clause or an expression.
3312 /// \pre The current token has to be the requires keyword.
3313 /// \returns true if it parsed a clause.
3314 bool clang::format::UnwrappedLineParser::parseRequires() {
3315 assert(FormatTok
->is(tok::kw_requires
) && "'requires' expected");
3316 auto RequiresToken
= FormatTok
;
3318 // We try to guess if it is a requires clause, or a requires expression. For
3319 // that we first consume the keyword and check the next token.
3322 switch (FormatTok
->Tok
.getKind()) {
3324 // This can only be an expression, never a clause.
3325 parseRequiresExpression(RequiresToken
);
3328 // Clauses and expression can start with a paren, it's unclear what we have.
3331 // All other tokens can only be a clause.
3332 parseRequiresClause(RequiresToken
);
3336 // Looking forward we would have to decide if there are function declaration
3337 // like arguments to the requires expression:
3339 // Or there is a constraint expression for the requires clause:
3340 // requires (C<T> && ...
3342 // But first let's look behind.
3343 auto *PreviousNonComment
= RequiresToken
->getPreviousNonComment();
3345 if (!PreviousNonComment
||
3346 PreviousNonComment
->is(TT_RequiresExpressionLBrace
)) {
3347 // If there is no token, or an expression left brace, we are a requires
3348 // clause within a requires expression.
3349 parseRequiresClause(RequiresToken
);
3353 switch (PreviousNonComment
->Tok
.getKind()) {
3356 case tok::kw_noexcept
:
3358 // This is a requires clause.
3359 parseRequiresClause(RequiresToken
);
3363 // This can be either:
3364 // if (... && requires (T t) ...)
3366 // void member(...) && requires (C<T> ...
3367 // We check the one token before that for a const:
3368 // void member(...) const && requires (C<T> ...
3369 auto PrevPrev
= PreviousNonComment
->getPreviousNonComment();
3370 if (PrevPrev
&& PrevPrev
->is(tok::kw_const
)) {
3371 parseRequiresClause(RequiresToken
);
3377 if (PreviousNonComment
->isTypeOrIdentifier()) {
3378 // This is a requires clause.
3379 parseRequiresClause(RequiresToken
);
3382 // It's an expression.
3383 parseRequiresExpression(RequiresToken
);
3387 // Now we look forward and try to check if the paren content is a parameter
3388 // list. The parameters can be cv-qualified and contain references or
3390 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3391 // of stuff: typename, const, *, &, &&, ::, identifiers.
3393 unsigned StoredPosition
= Tokens
->getPosition();
3394 FormatToken
*NextToken
= Tokens
->getNextToken();
3396 auto PeekNext
= [&Lookahead
, &NextToken
, this] {
3398 NextToken
= Tokens
->getNextToken();
3401 bool FoundType
= false;
3402 bool LastWasColonColon
= false;
3405 for (; Lookahead
< 50; PeekNext()) {
3406 switch (NextToken
->Tok
.getKind()) {
3407 case tok::kw_volatile
:
3410 if (OpenAngles
== 0) {
3411 FormatTok
= Tokens
->setPosition(StoredPosition
);
3412 parseRequiresExpression(RequiresToken
);
3418 FormatTok
= Tokens
->setPosition(StoredPosition
);
3419 parseRequiresClause(RequiresToken
);
3422 // Break out of the loop.
3425 case tok::coloncolon
:
3426 LastWasColonColon
= true;
3428 case tok::identifier
:
3429 if (FoundType
&& !LastWasColonColon
&& OpenAngles
== 0) {
3430 FormatTok
= Tokens
->setPosition(StoredPosition
);
3431 parseRequiresExpression(RequiresToken
);
3435 LastWasColonColon
= false;
3444 if (NextToken
->isSimpleTypeSpecifier()) {
3445 FormatTok
= Tokens
->setPosition(StoredPosition
);
3446 parseRequiresExpression(RequiresToken
);
3452 // This seems to be a complicated expression, just assume it's a clause.
3453 FormatTok
= Tokens
->setPosition(StoredPosition
);
3454 parseRequiresClause(RequiresToken
);
3458 /// \brief Parses a requires clause.
3459 /// \param RequiresToken The requires keyword token, which starts this clause.
3460 /// \pre We need to be on the next token after the requires keyword.
3461 /// \sa parseRequiresExpression
3463 /// Returns if it either has finished parsing the clause, or it detects, that
3464 /// the clause is incorrect.
3465 void UnwrappedLineParser::parseRequiresClause(FormatToken
*RequiresToken
) {
3466 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3467 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3469 // If there is no previous token, we are within a requires expression,
3470 // otherwise we will always have the template or function declaration in front
3472 bool InRequiresExpression
=
3473 !RequiresToken
->Previous
||
3474 RequiresToken
->Previous
->is(TT_RequiresExpressionLBrace
);
3476 RequiresToken
->setFinalizedType(InRequiresExpression
3477 ? TT_RequiresClauseInARequiresExpression
3478 : TT_RequiresClause
);
3480 // NOTE: parseConstraintExpression is only ever called from this function.
3481 // It could be inlined into here.
3482 parseConstraintExpression();
3484 if (!InRequiresExpression
)
3485 FormatTok
->Previous
->ClosesRequiresClause
= true;
3488 /// \brief Parses a requires expression.
3489 /// \param RequiresToken The requires keyword token, which starts this clause.
3490 /// \pre We need to be on the next token after the requires keyword.
3491 /// \sa parseRequiresClause
3493 /// Returns if it either has finished parsing the expression, or it detects,
3494 /// that the expression is incorrect.
3495 void UnwrappedLineParser::parseRequiresExpression(FormatToken
*RequiresToken
) {
3496 assert(FormatTok
->getPreviousNonComment() == RequiresToken
);
3497 assert(RequiresToken
->is(tok::kw_requires
) && "'requires' expected");
3499 RequiresToken
->setFinalizedType(TT_RequiresExpression
);
3501 if (FormatTok
->is(tok::l_paren
)) {
3502 FormatTok
->setFinalizedType(TT_RequiresExpressionLParen
);
3506 if (FormatTok
->is(tok::l_brace
)) {
3507 FormatTok
->setFinalizedType(TT_RequiresExpressionLBrace
);
3512 /// \brief Parses a constraint expression.
3514 /// This is the body of a requires clause. It returns, when the parsing is
3515 /// complete, or the expression is incorrect.
3516 void UnwrappedLineParser::parseConstraintExpression() {
3517 // The special handling for lambdas is needed since tryToParseLambda() eats a
3518 // token and if a requires expression is the last part of a requires clause
3519 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3520 // not set on the correct token. Thus we need to be aware if we even expect a
3521 // lambda to be possible.
3522 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3523 bool LambdaNextTimeAllowed
= true;
3525 // Within lambda declarations, it is permitted to put a requires clause after
3526 // its template parameter list, which would place the requires clause right
3527 // before the parentheses of the parameters of the lambda declaration. Thus,
3528 // we track if we expect to see grouping parentheses at all.
3529 // Without this check, `requires foo<T> (T t)` in the below example would be
3530 // seen as the whole requires clause, accidentally eating the parameters of
3532 // [&]<typename T> requires foo<T> (T t) { ... };
3533 bool TopLevelParensAllowed
= true;
3536 bool LambdaThisTimeAllowed
= std::exchange(LambdaNextTimeAllowed
, false);
3538 switch (FormatTok
->Tok
.getKind()) {
3539 case tok::kw_requires
: {
3540 auto RequiresToken
= FormatTok
;
3542 parseRequiresExpression(RequiresToken
);
3547 if (!TopLevelParensAllowed
)
3549 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator
);
3550 TopLevelParensAllowed
= false;
3554 if (!LambdaThisTimeAllowed
|| !tryToParseLambda())
3561 case tok::kw_struct
:
3566 // Potential function body.
3571 FormatTok
->setFinalizedType(TT_BinaryOperator
);
3573 LambdaNextTimeAllowed
= true;
3574 TopLevelParensAllowed
= true;
3579 LambdaNextTimeAllowed
= LambdaThisTimeAllowed
;
3583 case tok::kw_sizeof
:
3585 case tok::greaterequal
:
3586 case tok::greatergreater
:
3588 case tok::lessequal
:
3590 case tok::equalequal
:
3592 case tok::exclaimequal
:
3597 LambdaNextTimeAllowed
= true;
3598 TopLevelParensAllowed
= true;
3603 case tok::numeric_constant
:
3604 case tok::coloncolon
:
3607 TopLevelParensAllowed
= false;
3612 case tok::kw_static_cast
:
3613 case tok::kw_const_cast
:
3614 case tok::kw_reinterpret_cast
:
3615 case tok::kw_dynamic_cast
:
3617 if (FormatTok
->isNot(tok::less
))
3621 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3622 /*ClosingBraceKind=*/tok::greater
);
3626 if (!FormatTok
->Tok
.getIdentifierInfo()) {
3627 // Identifiers are part of the default case, we check for more then
3628 // tok::identifier to handle builtin type traits.
3632 // We need to differentiate identifiers for a template deduction guide,
3633 // variables, or function return types (the constraint expression has
3634 // ended before that), and basically all other cases. But it's easier to
3635 // check the other way around.
3636 assert(FormatTok
->Previous
);
3637 switch (FormatTok
->Previous
->Tok
.getKind()) {
3638 case tok::coloncolon
: // Nested identifier.
3639 case tok::ampamp
: // Start of a function or variable for the
3640 case tok::pipepipe
: // constraint expression. (binary)
3641 case tok::exclaim
: // The same as above, but unary.
3642 case tok::kw_requires
: // Initial identifier of a requires clause.
3643 case tok::equal
: // Initial identifier of a concept declaration.
3649 // Read identifier with optional template declaration.
3651 if (FormatTok
->is(tok::less
)) {
3653 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3654 /*ClosingBraceKind=*/tok::greater
);
3656 TopLevelParensAllowed
= false;
3662 bool UnwrappedLineParser::parseEnum() {
3663 const FormatToken
&InitialToken
= *FormatTok
;
3665 // Won't be 'enum' for NS_ENUMs.
3666 if (FormatTok
->is(tok::kw_enum
))
3669 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3670 // declarations. An "enum" keyword followed by a colon would be a syntax
3671 // error and thus assume it is just an identifier.
3672 if (Style
.isJavaScript() && FormatTok
->isOneOf(tok::colon
, tok::question
))
3675 // In protobuf, "enum" can be used as a field name.
3676 if (Style
.Language
== FormatStyle::LK_Proto
&& FormatTok
->is(tok::equal
))
3679 // Eat up enum class ...
3680 if (FormatTok
->isOneOf(tok::kw_class
, tok::kw_struct
))
3683 while (FormatTok
->Tok
.getIdentifierInfo() ||
3684 FormatTok
->isOneOf(tok::colon
, tok::coloncolon
, tok::less
,
3685 tok::greater
, tok::comma
, tok::question
,
3686 tok::l_square
, tok::r_square
)) {
3687 if (Style
.isVerilog()) {
3688 FormatTok
->setFinalizedType(TT_VerilogDimensionedTypeName
);
3690 // In Verilog the base type can have dimensions.
3691 while (FormatTok
->is(tok::l_square
))
3696 // We can have macros or attributes in between 'enum' and the enum name.
3697 if (FormatTok
->is(tok::l_paren
))
3699 assert(FormatTok
->isNot(TT_AttributeSquare
));
3700 if (FormatTok
->is(tok::identifier
)) {
3702 // If there are two identifiers in a row, this is likely an elaborate
3703 // return type. In Java, this can be "implements", etc.
3704 if (Style
.isCpp() && FormatTok
->is(tok::identifier
))
3709 // Just a declaration or something is wrong.
3710 if (FormatTok
->isNot(tok::l_brace
))
3712 FormatTok
->setFinalizedType(TT_EnumLBrace
);
3713 FormatTok
->setBlockKind(BK_Block
);
3715 if (Style
.Language
== FormatStyle::LK_Java
) {
3716 // Java enums are different.
3717 parseJavaEnumBody();
3720 if (Style
.Language
== FormatStyle::LK_Proto
) {
3721 parseBlock(/*MustBeDeclaration=*/true);
3725 if (!Style
.AllowShortEnumsOnASingleLine
&&
3726 ShouldBreakBeforeBrace(Style
, InitialToken
)) {
3731 if (!Style
.AllowShortEnumsOnASingleLine
) {
3735 bool HasError
= !parseBracedList(/*ContinueOnSemicolons=*/true,
3737 if (!Style
.AllowShortEnumsOnASingleLine
)
3740 if (FormatTok
->is(tok::semi
))
3744 setPreviousRBraceType(TT_EnumRBrace
);
3747 // There is no addUnwrappedLine() here so that we fall through to parsing a
3748 // structural element afterwards. Thus, in "enum A {} n, m;",
3749 // "} n, m;" will end up in one unwrapped line.
3752 bool UnwrappedLineParser::parseStructLike() {
3753 // parseRecord falls through and does not yet add an unwrapped line as a
3754 // record declaration or definition can start a structural element.
3756 // This does not apply to Java, JavaScript and C#.
3757 if (Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript() ||
3759 if (FormatTok
->is(tok::semi
))
3768 // A class used to set and restore the Token position when peeking
3769 // ahead in the token source.
3770 class ScopedTokenPosition
{
3771 unsigned StoredPosition
;
3772 FormatTokenSource
*Tokens
;
3775 ScopedTokenPosition(FormatTokenSource
*Tokens
) : Tokens(Tokens
) {
3776 assert(Tokens
&& "Tokens expected to not be null");
3777 StoredPosition
= Tokens
->getPosition();
3780 ~ScopedTokenPosition() { Tokens
->setPosition(StoredPosition
); }
3784 // Look to see if we have [[ by looking ahead, if
3785 // its not then rewind to the original position.
3786 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3787 ScopedTokenPosition
AutoPosition(Tokens
);
3788 FormatToken
*Tok
= Tokens
->getNextToken();
3789 // We already read the first [ check for the second.
3790 if (Tok
->isNot(tok::l_square
))
3792 // Double check that the attribute is just something
3794 while (Tok
->isNot(tok::eof
)) {
3795 if (Tok
->is(tok::r_square
))
3797 Tok
= Tokens
->getNextToken();
3799 if (Tok
->is(tok::eof
))
3801 Tok
= Tokens
->getNextToken();
3802 if (Tok
->isNot(tok::r_square
))
3804 Tok
= Tokens
->getNextToken();
3805 if (Tok
->is(tok::semi
))
3810 void UnwrappedLineParser::parseJavaEnumBody() {
3811 assert(FormatTok
->is(tok::l_brace
));
3812 const FormatToken
*OpeningBrace
= FormatTok
;
3814 // Determine whether the enum is simple, i.e. does not have a semicolon or
3815 // constants with class bodies. Simple enums can be formatted like braced
3816 // lists, contracted to a single line, etc.
3817 unsigned StoredPosition
= Tokens
->getPosition();
3818 bool IsSimple
= true;
3819 FormatToken
*Tok
= Tokens
->getNextToken();
3820 while (Tok
->isNot(tok::eof
)) {
3821 if (Tok
->is(tok::r_brace
))
3823 if (Tok
->isOneOf(tok::l_brace
, tok::semi
)) {
3827 // FIXME: This will also mark enums with braces in the arguments to enum
3828 // constants as "not simple". This is probably fine in practice, though.
3829 Tok
= Tokens
->getNextToken();
3831 FormatTok
= Tokens
->setPosition(StoredPosition
);
3840 // Parse the body of a more complex enum.
3841 // First add a line for everything up to the "{".
3846 // Parse the enum constants.
3848 if (FormatTok
->is(tok::l_brace
)) {
3849 // Parse the constant's class body.
3850 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3851 /*MunchSemi=*/false);
3852 } else if (FormatTok
->is(tok::l_paren
)) {
3854 } else if (FormatTok
->is(tok::comma
)) {
3857 } else if (FormatTok
->is(tok::semi
)) {
3861 } else if (FormatTok
->is(tok::r_brace
)) {
3869 // Parse the class body after the enum's ";" if any.
3870 parseLevel(OpeningBrace
);
3876 void UnwrappedLineParser::parseRecord(bool ParseAsExpr
) {
3877 const FormatToken
&InitialToken
= *FormatTok
;
3880 // The actual identifier can be a nested name specifier, and in macros
3881 // it is often token-pasted.
3882 // An [[attribute]] can be before the identifier.
3883 while (FormatTok
->isOneOf(tok::identifier
, tok::coloncolon
, tok::hashhash
,
3884 tok::kw_alignas
, tok::l_square
) ||
3885 FormatTok
->isAttribute() ||
3886 ((Style
.Language
== FormatStyle::LK_Java
|| Style
.isJavaScript()) &&
3887 FormatTok
->isOneOf(tok::period
, tok::comma
))) {
3888 if (Style
.isJavaScript() &&
3889 FormatTok
->isOneOf(Keywords
.kw_extends
, Keywords
.kw_implements
)) {
3890 // JavaScript/TypeScript supports inline object types in
3891 // extends/implements positions:
3892 // class Foo implements {bar: number} { }
3894 if (FormatTok
->is(tok::l_brace
)) {
3895 tryToParseBracedList();
3899 if (FormatTok
->is(tok::l_square
) && handleCppAttributes())
3901 bool IsNonMacroIdentifier
=
3902 FormatTok
->is(tok::identifier
) &&
3903 FormatTok
->TokenText
!= FormatTok
->TokenText
.upper();
3905 // We can have macros in between 'class' and the class name.
3906 if (!IsNonMacroIdentifier
&& FormatTok
->is(tok::l_paren
))
3910 // Note that parsing away template declarations here leads to incorrectly
3911 // accepting function declarations as record declarations.
3912 // In general, we cannot solve this problem. Consider:
3913 // class A<int> B() {}
3914 // which can be a function definition or a class definition when B() is a
3915 // macro. If we find enough real-world cases where this is a problem, we
3916 // can parse for the 'template' keyword in the beginning of the statement,
3917 // and thus rule out the record production in case there is no template
3918 // (this would still leave us with an ambiguity between template function
3919 // and class declarations).
3920 if (FormatTok
->isOneOf(tok::colon
, tok::less
)) {
3922 if (FormatTok
->is(tok::l_brace
)) {
3923 calculateBraceTypes(/*ExpectClassBody=*/true);
3924 if (!tryToParseBracedList())
3927 if (FormatTok
->is(tok::l_square
)) {
3928 FormatToken
*Previous
= FormatTok
->Previous
;
3930 !(Previous
->is(tok::r_paren
) || Previous
->isTypeOrIdentifier())) {
3931 // Don't try parsing a lambda if we had a closing parenthesis before,
3932 // it was probably a pointer to an array: int (*)[].
3933 if (!tryToParseLambda())
3940 if (FormatTok
->is(tok::semi
))
3942 if (Style
.isCSharp() && FormatTok
->is(Keywords
.kw_where
)) {
3945 parseCSharpGenericTypeConstraint();
3952 auto GetBraceTypes
=
3953 [](const FormatToken
&RecordTok
) -> std::pair
<TokenType
, TokenType
> {
3954 switch (RecordTok
.Tok
.getKind()) {
3956 return {TT_ClassLBrace
, TT_ClassRBrace
};
3957 case tok::kw_struct
:
3958 return {TT_StructLBrace
, TT_StructRBrace
};
3960 return {TT_UnionLBrace
, TT_UnionRBrace
};
3962 // Useful for e.g. interface.
3963 return {TT_RecordLBrace
, TT_RecordRBrace
};
3966 if (FormatTok
->is(tok::l_brace
)) {
3967 auto [OpenBraceType
, ClosingBraceType
] = GetBraceTypes(InitialToken
);
3968 FormatTok
->setFinalizedType(OpenBraceType
);
3972 if (ShouldBreakBeforeBrace(Style
, InitialToken
))
3975 unsigned AddLevels
= Style
.IndentAccessModifiers
? 2u : 1u;
3976 parseBlock(/*MustBeDeclaration=*/true, AddLevels
, /*MunchSemi=*/false);
3978 setPreviousRBraceType(ClosingBraceType
);
3980 // There is no addUnwrappedLine() here so that we fall through to parsing a
3981 // structural element afterwards. Thus, in "class A {} n, m;",
3982 // "} n, m;" will end up in one unwrapped line.
3985 void UnwrappedLineParser::parseObjCMethod() {
3986 assert(FormatTok
->isOneOf(tok::l_paren
, tok::identifier
) &&
3987 "'(' or identifier expected.");
3989 if (FormatTok
->is(tok::semi
)) {
3993 } else if (FormatTok
->is(tok::l_brace
)) {
3994 if (Style
.BraceWrapping
.AfterFunction
)
4005 void UnwrappedLineParser::parseObjCProtocolList() {
4006 assert(FormatTok
->is(tok::less
) && "'<' expected.");
4009 // Early exit in case someone forgot a close angle.
4010 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4011 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4014 } while (!eof() && FormatTok
->isNot(tok::greater
));
4015 nextToken(); // Skip '>'.
4018 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4020 if (FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4025 if (FormatTok
->is(tok::l_brace
)) {
4027 // In ObjC interfaces, nothing should be following the "}".
4029 } else if (FormatTok
->is(tok::r_brace
)) {
4030 // Ignore stray "}". parseStructuralElement doesn't consume them.
4033 } else if (FormatTok
->isOneOf(tok::minus
, tok::plus
)) {
4037 parseStructuralElement();
4042 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4043 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_interface
||
4044 FormatTok
->Tok
.getObjCKeywordID() == tok::objc_implementation
);
4046 nextToken(); // interface name
4048 // @interface can be followed by a lightweight generic
4049 // specialization list, then either a base class or a category.
4050 if (FormatTok
->is(tok::less
))
4051 parseObjCLightweightGenerics();
4052 if (FormatTok
->is(tok::colon
)) {
4054 nextToken(); // base class name
4055 // The base class can also have lightweight generics applied to it.
4056 if (FormatTok
->is(tok::less
))
4057 parseObjCLightweightGenerics();
4058 } else if (FormatTok
->is(tok::l_paren
)) {
4059 // Skip category, if present.
4063 if (FormatTok
->is(tok::less
))
4064 parseObjCProtocolList();
4066 if (FormatTok
->is(tok::l_brace
)) {
4067 if (Style
.BraceWrapping
.AfterObjCDeclaration
)
4069 parseBlock(/*MustBeDeclaration=*/true);
4072 // With instance variables, this puts '}' on its own line. Without instance
4073 // variables, this ends the @interface line.
4076 parseObjCUntilAtEnd();
4079 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4080 assert(FormatTok
->is(tok::less
));
4081 // Unlike protocol lists, generic parameterizations support
4084 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4085 // NSObject <NSCopying, NSSecureCoding>
4087 // so we need to count how many open angles we have left.
4088 unsigned NumOpenAngles
= 1;
4091 // Early exit in case someone forgot a close angle.
4092 if (FormatTok
->isOneOf(tok::semi
, tok::l_brace
) ||
4093 FormatTok
->isObjCAtKeyword(tok::objc_end
)) {
4096 if (FormatTok
->is(tok::less
)) {
4098 } else if (FormatTok
->is(tok::greater
)) {
4099 assert(NumOpenAngles
> 0 && "'>' makes NumOpenAngles negative");
4102 } while (!eof() && NumOpenAngles
!= 0);
4103 nextToken(); // Skip '>'.
4106 // Returns true for the declaration/definition form of @protocol,
4107 // false for the expression form.
4108 bool UnwrappedLineParser::parseObjCProtocol() {
4109 assert(FormatTok
->Tok
.getObjCKeywordID() == tok::objc_protocol
);
4112 if (FormatTok
->is(tok::l_paren
)) {
4113 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4117 // The definition/declaration form,
4119 // - (int)someMethod;
4122 nextToken(); // protocol name
4124 if (FormatTok
->is(tok::less
))
4125 parseObjCProtocolList();
4127 // Check for protocol declaration.
4128 if (FormatTok
->is(tok::semi
)) {
4135 parseObjCUntilAtEnd();
4139 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4140 bool IsImport
= FormatTok
->is(Keywords
.kw_import
);
4141 assert(IsImport
|| FormatTok
->is(tok::kw_export
));
4144 // Consume the "default" in "export default class/function".
4145 if (FormatTok
->is(tok::kw_default
))
4148 // Consume "async function", "function" and "default function", so that these
4149 // get parsed as free-standing JS functions, i.e. do not require a trailing
4151 if (FormatTok
->is(Keywords
.kw_async
))
4153 if (FormatTok
->is(Keywords
.kw_function
)) {
4158 // For imports, `export *`, `export {...}`, consume the rest of the line up
4159 // to the terminating `;`. For everything else, just return and continue
4160 // parsing the structural element, i.e. the declaration or expression for
4161 // `export default`.
4162 if (!IsImport
&& !FormatTok
->isOneOf(tok::l_brace
, tok::star
) &&
4163 !FormatTok
->isStringLiteral() &&
4164 !(FormatTok
->is(Keywords
.kw_type
) &&
4165 Tokens
->peekNextToken()->isOneOf(tok::l_brace
, tok::star
))) {
4170 if (FormatTok
->is(tok::semi
))
4172 if (Line
->Tokens
.empty()) {
4173 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4174 // import statement should terminate.
4177 if (FormatTok
->is(tok::l_brace
)) {
4178 FormatTok
->setBlockKind(BK_Block
);
4187 void UnwrappedLineParser::parseStatementMacro() {
4189 if (FormatTok
->is(tok::l_paren
))
4191 if (FormatTok
->is(tok::semi
))
4196 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4197 // consume things like a::`b.c[d:e] or a::*
4199 if (FormatTok
->isOneOf(tok::star
, tok::period
, tok::periodstar
,
4200 tok::coloncolon
, tok::hash
) ||
4201 Keywords
.isVerilogIdentifier(*FormatTok
)) {
4203 } else if (FormatTok
->is(tok::l_square
)) {
4211 void UnwrappedLineParser::parseVerilogSensitivityList() {
4212 if (FormatTok
->isNot(tok::at
))
4215 // A block event expression has 2 at signs.
4216 if (FormatTok
->is(tok::at
))
4218 switch (FormatTok
->Tok
.getKind()) {
4226 parseVerilogHierarchyIdentifier();
4231 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4232 unsigned AddLevels
= 0;
4234 if (FormatTok
->is(Keywords
.kw_clocking
)) {
4236 if (Keywords
.isVerilogIdentifier(*FormatTok
))
4238 parseVerilogSensitivityList();
4239 if (FormatTok
->is(tok::semi
))
4241 } else if (FormatTok
->isOneOf(tok::kw_case
, Keywords
.kw_casex
,
4242 Keywords
.kw_casez
, Keywords
.kw_randcase
,
4243 Keywords
.kw_randsequence
)) {
4244 if (Style
.IndentCaseLabels
)
4247 if (FormatTok
->is(tok::l_paren
)) {
4248 FormatTok
->setFinalizedType(TT_ConditionLParen
);
4251 if (FormatTok
->isOneOf(Keywords
.kw_inside
, Keywords
.kw_matches
))
4253 // The case header has no semicolon.
4257 // all the words like the name of the module and specifiers like
4258 // "automatic" and the width of function return type
4260 if (FormatTok
->is(tok::l_square
)) {
4261 auto Prev
= FormatTok
->getPreviousNonComment();
4262 if (Prev
&& Keywords
.isVerilogIdentifier(*Prev
))
4263 Prev
->setFinalizedType(TT_VerilogDimensionedTypeName
);
4265 } else if (Keywords
.isVerilogIdentifier(*FormatTok
) ||
4266 FormatTok
->isOneOf(Keywords
.kw_automatic
, tok::kw_static
)) {
4273 auto NewLine
= [this]() {
4275 Line
->IsContinuation
= true;
4279 while (FormatTok
->is(Keywords
.kw_import
)) {
4282 parseVerilogHierarchyIdentifier();
4283 if (FormatTok
->is(tok::semi
))
4287 // parameters and ports
4288 if (FormatTok
->is(Keywords
.kw_verilogHash
)) {
4291 if (FormatTok
->is(tok::l_paren
)) {
4292 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4296 if (FormatTok
->is(tok::l_paren
)) {
4298 FormatTok
->setFinalizedType(TT_VerilogMultiLineListLParen
);
4302 // extends and implements
4303 if (FormatTok
->is(Keywords
.kw_extends
)) {
4306 parseVerilogHierarchyIdentifier();
4307 if (FormatTok
->is(tok::l_paren
))
4310 if (FormatTok
->is(Keywords
.kw_implements
)) {
4314 parseVerilogHierarchyIdentifier();
4315 } while (FormatTok
->is(tok::comma
));
4318 // Coverage event for cover groups.
4319 if (FormatTok
->is(tok::at
)) {
4321 parseVerilogSensitivityList();
4324 if (FormatTok
->is(tok::semi
))
4325 nextToken(/*LevelDifference=*/1);
4332 void UnwrappedLineParser::parseVerilogTable() {
4333 assert(FormatTok
->is(Keywords
.kw_table
));
4334 nextToken(/*LevelDifference=*/1);
4337 auto InitialLevel
= Line
->Level
++;
4338 while (!eof() && !Keywords
.isVerilogEnd(*FormatTok
)) {
4339 FormatToken
*Tok
= FormatTok
;
4341 if (Tok
->is(tok::semi
))
4343 else if (Tok
->isOneOf(tok::star
, tok::colon
, tok::question
, tok::minus
))
4344 Tok
->setFinalizedType(TT_VerilogTableItem
);
4346 Line
->Level
= InitialLevel
;
4347 nextToken(/*LevelDifference=*/-1);
4351 void UnwrappedLineParser::parseVerilogCaseLabel() {
4352 // The label will get unindented in AnnotatingParser. If there are no leading
4353 // spaces, indent the rest here so that things inside the block will be
4354 // indented relative to things outside. We don't use parseLabel because we
4355 // don't know whether this colon is a label or a ternary expression at this
4357 auto OrigLevel
= Line
->Level
;
4358 auto FirstLine
= CurrentLines
->size();
4359 if (Line
->Level
== 0 || (Line
->InPPDirective
&& Line
->Level
<= 1))
4361 else if (!Style
.IndentCaseBlocks
&& Keywords
.isVerilogBegin(*FormatTok
))
4363 parseStructuralElement();
4364 // Restore the indentation in both the new line and the line that has the
4366 if (CurrentLines
->size() > FirstLine
)
4367 (*CurrentLines
)[FirstLine
].Level
= OrigLevel
;
4368 Line
->Level
= OrigLevel
;
4371 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine
&Line
) const {
4372 for (const auto &N
: Line
.Tokens
) {
4373 if (N
.Tok
->MacroCtx
)
4375 for (const UnwrappedLine
&Child
: N
.Children
)
4376 if (containsExpansion(Child
))
4382 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel
) {
4383 if (Line
->Tokens
.empty())
4386 if (!parsingPPDirective()) {
4387 llvm::dbgs() << "Adding unwrapped line:\n";
4388 printDebugInfo(*Line
);
4392 // If this line closes a block when in Whitesmiths mode, remember that
4393 // information so that the level can be decreased after the line is added.
4394 // This has to happen after the addition of the line since the line itself
4395 // needs to be indented.
4396 bool ClosesWhitesmithsBlock
=
4397 Line
->MatchingOpeningBlockLineIndex
!= UnwrappedLine::kInvalidIndex
&&
4398 Style
.BreakBeforeBraces
== FormatStyle::BS_Whitesmiths
;
4400 // If the current line was expanded from a macro call, we use it to
4401 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4402 // line and the unexpanded token stream.
4403 if (!parsingPPDirective() && !InExpansion
&& containsExpansion(*Line
)) {
4405 Reconstruct
.emplace(Line
->Level
, Unexpanded
);
4406 Reconstruct
->addLine(*Line
);
4408 // While the reconstructed unexpanded lines are stored in the normal
4409 // flow of lines, the expanded lines are stored on the side to be analyzed
4410 // in an extra step.
4411 CurrentExpandedLines
.push_back(std::move(*Line
));
4413 if (Reconstruct
->finished()) {
4414 UnwrappedLine Reconstructed
= std::move(*Reconstruct
).takeResult();
4415 assert(!Reconstructed
.Tokens
.empty() &&
4416 "Reconstructed must at least contain the macro identifier.");
4417 assert(!parsingPPDirective());
4419 llvm::dbgs() << "Adding unexpanded line:\n";
4420 printDebugInfo(Reconstructed
);
4422 ExpandedLines
[Reconstructed
.Tokens
.begin()->Tok
] = CurrentExpandedLines
;
4423 Lines
.push_back(std::move(Reconstructed
));
4424 CurrentExpandedLines
.clear();
4425 Reconstruct
.reset();
4428 // At the top level we only get here when no unexpansion is going on, or
4429 // when conditional formatting led to unfinished macro reconstructions.
4430 assert(!Reconstruct
|| (CurrentLines
!= &Lines
) || PPStack
.size() > 0);
4431 CurrentLines
->push_back(std::move(*Line
));
4433 Line
->Tokens
.clear();
4434 Line
->MatchingOpeningBlockLineIndex
= UnwrappedLine::kInvalidIndex
;
4435 Line
->FirstStartColumn
= 0;
4436 Line
->IsContinuation
= false;
4437 Line
->SeenDecltypeAuto
= false;
4439 if (ClosesWhitesmithsBlock
&& AdjustLevel
== LineLevel::Remove
)
4441 if (!parsingPPDirective() && !PreprocessorDirectives
.empty()) {
4442 CurrentLines
->append(
4443 std::make_move_iterator(PreprocessorDirectives
.begin()),
4444 std::make_move_iterator(PreprocessorDirectives
.end()));
4445 PreprocessorDirectives
.clear();
4447 // Disconnect the current token from the last token on the previous line.
4448 FormatTok
->Previous
= nullptr;
4451 bool UnwrappedLineParser::eof() const { return FormatTok
->is(tok::eof
); }
4453 bool UnwrappedLineParser::isOnNewLine(const FormatToken
&FormatTok
) {
4454 return (Line
->InPPDirective
|| FormatTok
.HasUnescapedNewline
) &&
4455 FormatTok
.NewlinesBefore
> 0;
4458 // Checks if \p FormatTok is a line comment that continues the line comment
4459 // section on \p Line.
4461 continuesLineCommentSection(const FormatToken
&FormatTok
,
4462 const UnwrappedLine
&Line
,
4463 const llvm::Regex
&CommentPragmasRegex
) {
4464 if (Line
.Tokens
.empty())
4467 StringRef IndentContent
= FormatTok
.TokenText
;
4468 if (FormatTok
.TokenText
.startswith("//") ||
4469 FormatTok
.TokenText
.startswith("/*")) {
4470 IndentContent
= FormatTok
.TokenText
.substr(2);
4472 if (CommentPragmasRegex
.match(IndentContent
))
4475 // If Line starts with a line comment, then FormatTok continues the comment
4476 // section if its original column is greater or equal to the original start
4477 // column of the line.
4479 // Define the min column token of a line as follows: if a line ends in '{' or
4480 // contains a '{' followed by a line comment, then the min column token is
4481 // that '{'. Otherwise, the min column token of the line is the first token of
4484 // If Line starts with a token other than a line comment, then FormatTok
4485 // continues the comment section if its original column is greater than the
4486 // original start column of the min column token of the line.
4488 // For example, the second line comment continues the first in these cases:
4500 // int i; // first line
4505 // do { // first line
4518 // The second line comment doesn't continue the first in these cases:
4525 // int i; // first line
4530 // do { // first line
4541 const FormatToken
*MinColumnToken
= Line
.Tokens
.front().Tok
;
4543 // Scan for '{//'. If found, use the column of '{' as a min column for line
4544 // comment section continuation.
4545 const FormatToken
*PreviousToken
= nullptr;
4546 for (const UnwrappedLineNode
&Node
: Line
.Tokens
) {
4547 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
) &&
4548 isLineComment(*Node
.Tok
)) {
4549 MinColumnToken
= PreviousToken
;
4552 PreviousToken
= Node
.Tok
;
4554 // Grab the last newline preceding a token in this unwrapped line.
4555 if (Node
.Tok
->NewlinesBefore
> 0)
4556 MinColumnToken
= Node
.Tok
;
4558 if (PreviousToken
&& PreviousToken
->is(tok::l_brace
))
4559 MinColumnToken
= PreviousToken
;
4561 return continuesLineComment(FormatTok
, /*Previous=*/Line
.Tokens
.back().Tok
,
4565 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext
) {
4566 bool JustComments
= Line
->Tokens
.empty();
4567 for (FormatToken
*Tok
: CommentsBeforeNextToken
) {
4568 // Line comments that belong to the same line comment section are put on the
4569 // same line since later we might want to reflow content between them.
4570 // Additional fine-grained breaking of line comment sections is controlled
4571 // by the class BreakableLineCommentSection in case it is desirable to keep
4572 // several line comment sections in the same unwrapped line.
4574 // FIXME: Consider putting separate line comment sections as children to the
4575 // unwrapped line instead.
4576 Tok
->ContinuesLineCommentSection
=
4577 continuesLineCommentSection(*Tok
, *Line
, CommentPragmasRegex
);
4578 if (isOnNewLine(*Tok
) && JustComments
&& !Tok
->ContinuesLineCommentSection
)
4582 if (NewlineBeforeNext
&& JustComments
)
4584 CommentsBeforeNextToken
.clear();
4587 void UnwrappedLineParser::nextToken(int LevelDifference
) {
4590 flushComments(isOnNewLine(*FormatTok
));
4591 pushToken(FormatTok
);
4592 FormatToken
*Previous
= FormatTok
;
4593 if (!Style
.isJavaScript())
4594 readToken(LevelDifference
);
4596 readTokenWithJavaScriptASI();
4597 FormatTok
->Previous
= Previous
;
4598 if (Style
.isVerilog()) {
4599 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4600 // keywords like `begin`, we can't treat them the same as left braces
4601 // because some contexts require one of them. For example structs use
4602 // braces and if blocks use keywords, and a left brace can occur in an if
4603 // statement, but it is not a block. For keywords like `end`, we simply
4604 // treat them the same as right braces.
4605 if (Keywords
.isVerilogEnd(*FormatTok
))
4606 FormatTok
->Tok
.setKind(tok::r_brace
);
4610 void UnwrappedLineParser::distributeComments(
4611 const SmallVectorImpl
<FormatToken
*> &Comments
,
4612 const FormatToken
*NextTok
) {
4613 // Whether or not a line comment token continues a line is controlled by
4614 // the method continuesLineCommentSection, with the following caveat:
4616 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4617 // that each comment line from the trail is aligned with the next token, if
4618 // the next token exists. If a trail exists, the beginning of the maximal
4619 // trail is marked as a start of a new comment section.
4621 // For example in this code:
4623 // int a; // line about a
4624 // // line 1 about b
4625 // // line 2 about b
4628 // the two lines about b form a maximal trail, so there are two sections, the
4629 // first one consisting of the single comment "// line about a" and the
4630 // second one consisting of the next two comments.
4631 if (Comments
.empty())
4633 bool ShouldPushCommentsInCurrentLine
= true;
4634 bool HasTrailAlignedWithNextToken
= false;
4635 unsigned StartOfTrailAlignedWithNextToken
= 0;
4637 // We are skipping the first element intentionally.
4638 for (unsigned i
= Comments
.size() - 1; i
> 0; --i
) {
4639 if (Comments
[i
]->OriginalColumn
== NextTok
->OriginalColumn
) {
4640 HasTrailAlignedWithNextToken
= true;
4641 StartOfTrailAlignedWithNextToken
= i
;
4645 for (unsigned i
= 0, e
= Comments
.size(); i
< e
; ++i
) {
4646 FormatToken
*FormatTok
= Comments
[i
];
4647 if (HasTrailAlignedWithNextToken
&& i
== StartOfTrailAlignedWithNextToken
) {
4648 FormatTok
->ContinuesLineCommentSection
= false;
4650 FormatTok
->ContinuesLineCommentSection
=
4651 continuesLineCommentSection(*FormatTok
, *Line
, CommentPragmasRegex
);
4653 if (!FormatTok
->ContinuesLineCommentSection
&&
4654 (isOnNewLine(*FormatTok
) || FormatTok
->IsFirst
)) {
4655 ShouldPushCommentsInCurrentLine
= false;
4657 if (ShouldPushCommentsInCurrentLine
)
4658 pushToken(FormatTok
);
4660 CommentsBeforeNextToken
.push_back(FormatTok
);
4664 void UnwrappedLineParser::readToken(int LevelDifference
) {
4665 SmallVector
<FormatToken
*, 1> Comments
;
4666 bool PreviousWasComment
= false;
4667 bool FirstNonCommentOnLine
= false;
4669 FormatTok
= Tokens
->getNextToken();
4671 while (FormatTok
->getType() == TT_ConflictStart
||
4672 FormatTok
->getType() == TT_ConflictEnd
||
4673 FormatTok
->getType() == TT_ConflictAlternative
) {
4674 if (FormatTok
->getType() == TT_ConflictStart
)
4675 conditionalCompilationStart(/*Unreachable=*/false);
4676 else if (FormatTok
->getType() == TT_ConflictAlternative
)
4677 conditionalCompilationAlternative();
4678 else if (FormatTok
->getType() == TT_ConflictEnd
)
4679 conditionalCompilationEnd();
4680 FormatTok
= Tokens
->getNextToken();
4681 FormatTok
->MustBreakBefore
= true;
4684 auto IsFirstNonCommentOnLine
= [](bool FirstNonCommentOnLine
,
4685 const FormatToken
&Tok
,
4686 bool PreviousWasComment
) {
4687 auto IsFirstOnLine
= [](const FormatToken
&Tok
) {
4688 return Tok
.HasUnescapedNewline
|| Tok
.IsFirst
;
4691 // Consider preprocessor directives preceded by block comments as first
4693 if (PreviousWasComment
)
4694 return FirstNonCommentOnLine
|| IsFirstOnLine(Tok
);
4695 return IsFirstOnLine(Tok
);
4698 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4699 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4700 PreviousWasComment
= FormatTok
->is(tok::comment
);
4702 while (!Line
->InPPDirective
&& FormatTok
->is(tok::hash
) &&
4703 (!Style
.isVerilog() ||
4704 Keywords
.isVerilogPPDirective(*Tokens
->peekNextToken())) &&
4705 FirstNonCommentOnLine
) {
4706 distributeComments(Comments
, FormatTok
);
4708 // If there is an unfinished unwrapped line, we flush the preprocessor
4709 // directives only after that unwrapped line was finished later.
4710 bool SwitchToPreprocessorLines
= !Line
->Tokens
.empty();
4711 ScopedLineState
BlockState(*this, SwitchToPreprocessorLines
);
4712 assert((LevelDifference
>= 0 ||
4713 static_cast<unsigned>(-LevelDifference
) <= Line
->Level
) &&
4714 "LevelDifference makes Line->Level negative");
4715 Line
->Level
+= LevelDifference
;
4716 // Comments stored before the preprocessor directive need to be output
4717 // before the preprocessor directive, at the same level as the
4718 // preprocessor directive, as we consider them to apply to the directive.
4719 if (Style
.IndentPPDirectives
== FormatStyle::PPDIS_BeforeHash
&&
4720 PPBranchLevel
> 0) {
4721 Line
->Level
+= PPBranchLevel
;
4723 flushComments(isOnNewLine(*FormatTok
));
4725 PreviousWasComment
= FormatTok
->is(tok::comment
);
4726 FirstNonCommentOnLine
= IsFirstNonCommentOnLine(
4727 FirstNonCommentOnLine
, *FormatTok
, PreviousWasComment
);
4730 if (!PPStack
.empty() && (PPStack
.back().Kind
== PP_Unreachable
) &&
4731 !Line
->InPPDirective
) {
4735 if (FormatTok
->is(tok::identifier
) &&
4736 Macros
.defined(FormatTok
->TokenText
) &&
4737 // FIXME: Allow expanding macros in preprocessor directives.
4738 !Line
->InPPDirective
) {
4739 FormatToken
*ID
= FormatTok
;
4740 unsigned Position
= Tokens
->getPosition();
4742 // To correctly parse the code, we need to replace the tokens of the macro
4743 // call with its expansion.
4744 auto PreCall
= std::move(Line
);
4745 Line
.reset(new UnwrappedLine
);
4746 bool OldInExpansion
= InExpansion
;
4748 // We parse the macro call into a new line.
4749 auto Args
= parseMacroCall();
4750 InExpansion
= OldInExpansion
;
4751 assert(Line
->Tokens
.front().Tok
== ID
);
4752 // And remember the unexpanded macro call tokens.
4753 auto UnexpandedLine
= std::move(Line
);
4754 // Reset to the old line.
4755 Line
= std::move(PreCall
);
4758 llvm::dbgs() << "Macro call: " << ID
->TokenText
<< "(";
4760 llvm::dbgs() << "(";
4761 for (const auto &Arg
: Args
.value())
4762 for (const auto &T
: Arg
)
4763 llvm::dbgs() << T
->TokenText
<< " ";
4764 llvm::dbgs() << ")";
4766 llvm::dbgs() << "\n";
4768 if (Macros
.objectLike(ID
->TokenText
) && Args
&&
4769 !Macros
.hasArity(ID
->TokenText
, Args
->size())) {
4770 // The macro is either
4771 // - object-like, but we got argumnets, or
4772 // - overloaded to be both object-like and function-like, but none of
4773 // the function-like arities match the number of arguments.
4774 // Thus, expand as object-like macro.
4775 LLVM_DEBUG(llvm::dbgs()
4776 << "Macro \"" << ID
->TokenText
4777 << "\" not overloaded for arity " << Args
->size()
4778 << "or not function-like, using object-like overload.");
4780 UnexpandedLine
->Tokens
.resize(1);
4781 Tokens
->setPosition(Position
);
4783 assert(!Args
&& Macros
.objectLike(ID
->TokenText
));
4785 if ((!Args
&& Macros
.objectLike(ID
->TokenText
)) ||
4786 (Args
&& Macros
.hasArity(ID
->TokenText
, Args
->size()))) {
4787 // Next, we insert the expanded tokens in the token stream at the
4788 // current position, and continue parsing.
4789 Unexpanded
[ID
] = std::move(UnexpandedLine
);
4790 SmallVector
<FormatToken
*, 8> Expansion
=
4791 Macros
.expand(ID
, std::move(Args
));
4792 if (!Expansion
.empty())
4793 FormatTok
= Tokens
->insertTokens(Expansion
);
4796 llvm::dbgs() << "Expanded: ";
4797 for (const auto &T
: Expansion
)
4798 llvm::dbgs() << T
->TokenText
<< " ";
4799 llvm::dbgs() << "\n";
4803 llvm::dbgs() << "Did not expand macro \"" << ID
->TokenText
4804 << "\", because it was used ";
4806 llvm::dbgs() << "with " << Args
->size();
4808 llvm::dbgs() << "without";
4809 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4811 Tokens
->setPosition(Position
);
4816 if (FormatTok
->isNot(tok::comment
)) {
4817 distributeComments(Comments
, FormatTok
);
4822 Comments
.push_back(FormatTok
);
4825 distributeComments(Comments
, nullptr);
4830 template <typename Iterator
>
4831 void pushTokens(Iterator Begin
, Iterator End
,
4832 llvm::SmallVectorImpl
<FormatToken
*> &Into
) {
4833 for (auto I
= Begin
; I
!= End
; ++I
) {
4834 Into
.push_back(I
->Tok
);
4835 for (const auto &Child
: I
->Children
)
4836 pushTokens(Child
.Tokens
.begin(), Child
.Tokens
.end(), Into
);
4841 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>>
4842 UnwrappedLineParser::parseMacroCall() {
4843 std::optional
<llvm::SmallVector
<llvm::SmallVector
<FormatToken
*, 8>, 1>> Args
;
4844 assert(Line
->Tokens
.empty());
4846 if (FormatTok
->isNot(tok::l_paren
))
4848 unsigned Position
= Tokens
->getPosition();
4849 FormatToken
*Tok
= FormatTok
;
4852 auto ArgStart
= std::prev(Line
->Tokens
.end());
4856 switch (FormatTok
->Tok
.getKind()) {
4861 case tok::r_paren
: {
4867 Args
->push_back({});
4868 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
4877 Args
->push_back({});
4878 pushTokens(std::next(ArgStart
), Line
->Tokens
.end(), Args
->back());
4880 ArgStart
= std::prev(Line
->Tokens
.end());
4888 Line
->Tokens
.resize(1);
4889 Tokens
->setPosition(Position
);
4894 void UnwrappedLineParser::pushToken(FormatToken
*Tok
) {
4895 Line
->Tokens
.push_back(UnwrappedLineNode(Tok
));
4896 if (MustBreakBeforeNextToken
) {
4897 Line
->Tokens
.back().Tok
->MustBreakBefore
= true;
4898 MustBreakBeforeNextToken
= false;
4902 } // end namespace format
4903 } // end namespace clang