[Flang] remove whole-archive option for AIX linker (#76039)
[llvm-project.git] / clang / lib / Format / UnwrappedLineParser.cpp
blobc38b4c884070bb74ccad00dcf8a2539a3653c30e
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <utility>
31 #define DEBUG_TYPE "format-parser"
33 namespace clang {
34 namespace format {
36 namespace {
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52 << "\"] ";
53 for (SmallVectorImpl<UnwrappedLine>::const_iterator
54 CI = I->Children.begin(),
55 CE = I->Children.end();
56 CI != CE; ++CI) {
57 OS << "\n";
58 printLine(OS, *CI, (Prefix + " ").str());
59 NewLine = true;
62 if (!NewLine)
63 OS << "\n";
66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67 printLine(llvm::dbgs(), Line);
70 class ScopedDeclarationState {
71 public:
72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73 bool MustBeDeclaration)
74 : Line(Line), Stack(Stack) {
75 Line.MustBeDeclaration = MustBeDeclaration;
76 Stack.push_back(MustBeDeclaration);
78 ~ScopedDeclarationState() {
79 Stack.pop_back();
80 if (!Stack.empty())
81 Line.MustBeDeclaration = Stack.back();
82 else
83 Line.MustBeDeclaration = true;
86 private:
87 UnwrappedLine &Line;
88 llvm::BitVector &Stack;
91 } // end anonymous namespace
93 class ScopedLineState {
94 public:
95 ScopedLineState(UnwrappedLineParser &Parser,
96 bool SwitchToPreprocessorLines = false)
97 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98 if (SwitchToPreprocessorLines)
99 Parser.CurrentLines = &Parser.PreprocessorDirectives;
100 else if (!Parser.Line->Tokens.empty())
101 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102 PreBlockLine = std::move(Parser.Line);
103 Parser.Line = std::make_unique<UnwrappedLine>();
104 Parser.Line->Level = PreBlockLine->Level;
105 Parser.Line->PPLevel = PreBlockLine->PPLevel;
106 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
110 ~ScopedLineState() {
111 if (!Parser.Line->Tokens.empty())
112 Parser.addUnwrappedLine();
113 assert(Parser.Line->Tokens.empty());
114 Parser.Line = std::move(PreBlockLine);
115 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116 Parser.MustBreakBeforeNextToken = true;
117 Parser.CurrentLines = OriginalLines;
120 private:
121 UnwrappedLineParser &Parser;
123 std::unique_ptr<UnwrappedLine> PreBlockLine;
124 SmallVectorImpl<UnwrappedLine> *OriginalLines;
127 class CompoundStatementIndenter {
128 public:
129 CompoundStatementIndenter(UnwrappedLineParser *Parser,
130 const FormatStyle &Style, unsigned &LineLevel)
131 : CompoundStatementIndenter(Parser, LineLevel,
132 Style.BraceWrapping.AfterControlStatement,
133 Style.BraceWrapping.IndentBraces) {}
134 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135 bool WrapBrace, bool IndentBrace)
136 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137 if (WrapBrace)
138 Parser->addUnwrappedLine();
139 if (IndentBrace)
140 ++LineLevel;
142 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
144 private:
145 unsigned &LineLevel;
146 unsigned OldLineLevel;
149 UnwrappedLineParser::UnwrappedLineParser(
150 SourceManager &SourceMgr, const FormatStyle &Style,
151 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154 IdentifierTable &IdentTable)
155 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160 ? IG_Rejected
161 : IG_Inited),
162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
165 void UnwrappedLineParser::reset() {
166 PPBranchLevel = -1;
167 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168 ? IG_Rejected
169 : IG_Inited;
170 IncludeGuardToken = nullptr;
171 Line.reset(new UnwrappedLine);
172 CommentsBeforeNextToken.clear();
173 FormatTok = nullptr;
174 MustBreakBeforeNextToken = false;
175 IsDecltypeAutoFunction = false;
176 PreprocessorDirectives.clear();
177 CurrentLines = &Lines;
178 DeclarationScopeStack.clear();
179 NestedTooDeep.clear();
180 NestedLambdas.clear();
181 PPStack.clear();
182 Line->FirstStartColumn = FirstStartColumn;
184 if (!Unexpanded.empty())
185 for (FormatToken *Token : AllTokens)
186 Token->MacroCtx.reset();
187 CurrentExpandedLines.clear();
188 ExpandedLines.clear();
189 Unexpanded.clear();
190 InExpansion = false;
191 Reconstruct.reset();
194 void UnwrappedLineParser::parse() {
195 IndexedTokenSource TokenSource(AllTokens);
196 Line->FirstStartColumn = FirstStartColumn;
197 do {
198 LLVM_DEBUG(llvm::dbgs() << "----\n");
199 reset();
200 Tokens = &TokenSource;
201 TokenSource.reset();
203 readToken();
204 parseFile();
206 // If we found an include guard then all preprocessor directives (other than
207 // the guard) are over-indented by one.
208 if (IncludeGuard == IG_Found) {
209 for (auto &Line : Lines)
210 if (Line.InPPDirective && Line.Level > 0)
211 --Line.Level;
214 // Create line with eof token.
215 assert(eof());
216 pushToken(FormatTok);
217 addUnwrappedLine();
219 // In a first run, format everything with the lines containing macro calls
220 // replaced by the expansion.
221 if (!ExpandedLines.empty()) {
222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223 for (const auto &Line : Lines) {
224 if (!Line.Tokens.empty()) {
225 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226 if (it != ExpandedLines.end()) {
227 for (const auto &Expanded : it->second) {
228 LLVM_DEBUG(printDebugInfo(Expanded));
229 Callback.consumeUnwrappedLine(Expanded);
231 continue;
234 LLVM_DEBUG(printDebugInfo(Line));
235 Callback.consumeUnwrappedLine(Line);
237 Callback.finishRun();
240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241 for (const UnwrappedLine &Line : Lines) {
242 LLVM_DEBUG(printDebugInfo(Line));
243 Callback.consumeUnwrappedLine(Line);
245 Callback.finishRun();
246 Lines.clear();
247 while (!PPLevelBranchIndex.empty() &&
248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
252 if (!PPLevelBranchIndex.empty()) {
253 ++PPLevelBranchIndex.back();
254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
257 } while (!PPLevelBranchIndex.empty());
260 void UnwrappedLineParser::parseFile() {
261 // The top-level context in a file always has declarations, except for pre-
262 // processor directives and JavaScript files.
263 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265 MustBeDeclaration);
266 if (Style.Language == FormatStyle::LK_TextProto)
267 parseBracedList();
268 else
269 parseLevel();
270 // Make sure to format the remaining tokens.
272 // LK_TextProto is special since its top-level is parsed as the body of a
273 // braced list, which does not necessarily have natural line separators such
274 // as a semicolon. Comments after the last entry that have been determined to
275 // not belong to that line, as in:
276 // key: value
277 // // endfile comment
278 // do not have a chance to be put on a line of their own until this point.
279 // Here we add this newline before end-of-file comments.
280 if (Style.Language == FormatStyle::LK_TextProto &&
281 !CommentsBeforeNextToken.empty()) {
282 addUnwrappedLine();
284 flushComments(true);
285 addUnwrappedLine();
288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 return;
293 default:
294 if (FormatTok->is(Keywords.kw_where)) {
295 addUnwrappedLine();
296 nextToken();
297 parseCSharpGenericTypeConstraint();
298 break;
300 nextToken();
301 break;
303 } while (!eof());
306 void UnwrappedLineParser::parseCSharpAttribute() {
307 int UnpairedSquareBrackets = 1;
308 do {
309 switch (FormatTok->Tok.getKind()) {
310 case tok::r_square:
311 nextToken();
312 --UnpairedSquareBrackets;
313 if (UnpairedSquareBrackets == 0) {
314 addUnwrappedLine();
315 return;
317 break;
318 case tok::l_square:
319 ++UnpairedSquareBrackets;
320 nextToken();
321 break;
322 default:
323 nextToken();
324 break;
326 } while (!eof());
329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330 if (!Lines.empty() && Lines.back().InPPDirective)
331 return true;
333 const FormatToken *Previous = Tokens->getPreviousToken();
334 return Previous && Previous->is(tok::comment) &&
335 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345 IfStmtKind *IfKind,
346 FormatToken **IfLeftBrace) {
347 const bool InRequiresExpression =
348 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349 const bool IsPrecededByCommentOrPPDirective =
350 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351 FormatToken *IfLBrace = nullptr;
352 bool HasDoWhile = false;
353 bool HasLabel = false;
354 unsigned StatementCount = 0;
355 bool SwitchLabelEncountered = false;
357 do {
358 if (FormatTok->isAttribute()) {
359 nextToken();
360 continue;
362 tok::TokenKind kind = FormatTok->Tok.getKind();
363 if (FormatTok->getType() == TT_MacroBlockBegin)
364 kind = tok::l_brace;
365 else if (FormatTok->getType() == TT_MacroBlockEnd)
366 kind = tok::r_brace;
368 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369 &HasLabel, &StatementCount] {
370 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371 HasDoWhile ? nullptr : &HasDoWhile,
372 HasLabel ? nullptr : &HasLabel);
373 ++StatementCount;
374 assert(StatementCount > 0 && "StatementCount overflow!");
377 switch (kind) {
378 case tok::comment:
379 nextToken();
380 addUnwrappedLine();
381 break;
382 case tok::l_brace:
383 if (InRequiresExpression) {
384 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385 } else if (FormatTok->Previous &&
386 FormatTok->Previous->ClosesRequiresClause) {
387 // We need the 'default' case here to correctly parse a function
388 // l_brace.
389 ParseDefault();
390 continue;
392 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393 tryToParseBracedList()) {
394 continue;
396 parseBlock();
397 ++StatementCount;
398 assert(StatementCount > 0 && "StatementCount overflow!");
399 addUnwrappedLine();
400 break;
401 case tok::r_brace:
402 if (OpeningBrace) {
403 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405 return false;
407 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408 HasDoWhile || IsPrecededByCommentOrPPDirective ||
409 precededByCommentOrPPDirective()) {
410 return false;
412 const FormatToken *Next = Tokens->peekNextToken();
413 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414 return false;
415 if (IfLeftBrace)
416 *IfLeftBrace = IfLBrace;
417 return true;
419 nextToken();
420 addUnwrappedLine();
421 break;
422 case tok::kw_default: {
423 unsigned StoredPosition = Tokens->getPosition();
424 FormatToken *Next;
425 do {
426 Next = Tokens->getNextToken();
427 assert(Next);
428 } while (Next->is(tok::comment));
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNot(tok::colon)) {
431 // default not followed by ':' is not a case label; treat it like
432 // an identifier.
433 parseStructuralElement();
434 break;
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (Line->InPPDirective && Line->Level == 1))) {
452 ++Line->Level;
454 SwitchLabelEncountered = true;
455 parseStructuralElement();
456 break;
457 case tok::l_square:
458 if (Style.isCSharp()) {
459 nextToken();
460 parseCSharpAttribute();
461 break;
463 if (handleCppAttributes())
464 break;
465 [[fallthrough]];
466 default:
467 ParseDefault();
468 break;
470 } while (!eof());
472 return false;
475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476 // We'll parse forward through the tokens until we hit
477 // a closing brace or eof - note that getNextToken() will
478 // parse macros, so this will magically work inside macro
479 // definitions, too.
480 unsigned StoredPosition = Tokens->getPosition();
481 FormatToken *Tok = FormatTok;
482 const FormatToken *PrevTok = Tok->Previous;
483 // Keep a stack of positions of lbrace tokens. We will
484 // update information about whether an lbrace starts a
485 // braced init list or a different block during the loop.
486 struct StackEntry {
487 FormatToken *Tok;
488 const FormatToken *PrevTok;
490 SmallVector<StackEntry, 8> LBraceStack;
491 assert(Tok->is(tok::l_brace));
492 do {
493 // Get next non-comment, non-preprocessor token.
494 FormatToken *NextTok;
495 do {
496 NextTok = Tokens->getNextToken();
497 } while (NextTok->is(tok::comment));
498 while (NextTok->is(tok::hash) && !Line->InMacroBody) {
499 NextTok = Tokens->getNextToken();
500 do {
501 NextTok = Tokens->getNextToken();
502 } while (NextTok->is(tok::comment) ||
503 (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
506 switch (Tok->Tok.getKind()) {
507 case tok::l_brace:
508 if (Style.isJavaScript() && PrevTok) {
509 if (PrevTok->isOneOf(tok::colon, tok::less)) {
510 // A ':' indicates this code is in a type, or a braced list
511 // following a label in an object literal ({a: {b: 1}}).
512 // A '<' could be an object used in a comparison, but that is nonsense
513 // code (can never return true), so more likely it is a generic type
514 // argument (`X<{a: string; b: number}>`).
515 // The code below could be confused by semicolons between the
516 // individual members in a type member list, which would normally
517 // trigger BK_Block. In both cases, this must be parsed as an inline
518 // braced init.
519 Tok->setBlockKind(BK_BracedInit);
520 } else if (PrevTok->is(tok::r_paren)) {
521 // `) { }` can only occur in function or method declarations in JS.
522 Tok->setBlockKind(BK_Block);
524 } else {
525 Tok->setBlockKind(BK_Unknown);
527 LBraceStack.push_back({Tok, PrevTok});
528 break;
529 case tok::r_brace:
530 if (LBraceStack.empty())
531 break;
532 if (LBraceStack.back().Tok->is(BK_Unknown)) {
533 bool ProbablyBracedList = false;
534 if (Style.Language == FormatStyle::LK_Proto) {
535 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
536 } else {
537 // Skip NextTok over preprocessor lines, otherwise we may not
538 // properly diagnose the block as a braced intializer
539 // if the comma separator appears after the pp directive.
540 while (NextTok->is(tok::hash)) {
541 ScopedMacroState MacroState(*Line, Tokens, NextTok);
542 do {
543 NextTok = Tokens->getNextToken();
544 } while (NextTok->isNot(tok::eof));
547 // Using OriginalColumn to distinguish between ObjC methods and
548 // binary operators is a bit hacky.
549 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
550 NextTok->OriginalColumn == 0;
552 // Try to detect a braced list. Note that regardless how we mark inner
553 // braces here, we will overwrite the BlockKind later if we parse a
554 // braced list (where all blocks inside are by default braced lists),
555 // or when we explicitly detect blocks (for example while parsing
556 // lambdas).
558 // If we already marked the opening brace as braced list, the closing
559 // must also be part of it.
560 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
562 ProbablyBracedList = ProbablyBracedList ||
563 (Style.isJavaScript() &&
564 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
565 Keywords.kw_as));
566 ProbablyBracedList = ProbablyBracedList ||
567 (Style.isCpp() && NextTok->is(tok::l_paren));
569 // If there is a comma, semicolon or right paren after the closing
570 // brace, we assume this is a braced initializer list.
571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572 // braced list in JS.
573 ProbablyBracedList =
574 ProbablyBracedList ||
575 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
576 tok::r_paren, tok::r_square, tok::ellipsis);
578 // Distinguish between braced list in a constructor initializer list
579 // followed by constructor body, or just adjacent blocks.
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
583 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
584 tok::greater));
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(tok::identifier) &&
589 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
591 ProbablyBracedList = ProbablyBracedList ||
592 (NextTok->is(tok::semi) &&
593 (!ExpectClassBody || LBraceStack.size() != 1));
595 ProbablyBracedList =
596 ProbablyBracedList ||
597 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
599 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok = Tokens->getNextToken();
603 ProbablyBracedList = NextTok->isNot(tok::l_square);
606 if (ProbablyBracedList) {
607 Tok->setBlockKind(BK_BracedInit);
608 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
609 } else {
610 Tok->setBlockKind(BK_Block);
611 LBraceStack.back().Tok->setBlockKind(BK_Block);
614 LBraceStack.pop_back();
615 break;
616 case tok::identifier:
617 if (Tok->isNot(TT_StatementMacro))
618 break;
619 [[fallthrough]];
620 case tok::at:
621 case tok::semi:
622 case tok::kw_if:
623 case tok::kw_while:
624 case tok::kw_for:
625 case tok::kw_switch:
626 case tok::kw_try:
627 case tok::kw___try:
628 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
629 LBraceStack.back().Tok->setBlockKind(BK_Block);
630 break;
631 default:
632 break;
634 PrevTok = Tok;
635 Tok = NextTok;
636 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
638 // Assume other blocks for all unclosed opening braces.
639 for (const auto &Entry : LBraceStack)
640 if (Entry.Tok->is(BK_Unknown))
641 Entry.Tok->setBlockKind(BK_Block);
643 FormatTok = Tokens->setPosition(StoredPosition);
646 // Sets the token type of the directly previous right brace.
647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
648 if (auto Prev = FormatTok->getPreviousNonComment();
649 Prev && Prev->is(tok::r_brace)) {
650 Prev->setFinalizedType(Type);
654 template <class T>
655 static inline void hash_combine(std::size_t &seed, const T &v) {
656 std::hash<T> hasher;
657 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
660 size_t UnwrappedLineParser::computePPHash() const {
661 size_t h = 0;
662 for (const auto &i : PPStack) {
663 hash_combine(h, size_t(i.Kind));
664 hash_combine(h, i.Line);
666 return h;
669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
670 // is not null, subtracts its length (plus the preceding space) when computing
671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
672 // running the token annotator on it so that we can restore them afterward.
673 bool UnwrappedLineParser::mightFitOnOneLine(
674 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
675 const auto ColumnLimit = Style.ColumnLimit;
676 if (ColumnLimit == 0)
677 return true;
679 auto &Tokens = ParsedLine.Tokens;
680 assert(!Tokens.empty());
682 const auto *LastToken = Tokens.back().Tok;
683 assert(LastToken);
685 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
687 int Index = 0;
688 for (const auto &Token : Tokens) {
689 assert(Token.Tok);
690 auto &SavedToken = SavedTokens[Index++];
691 SavedToken.Tok = new FormatToken;
692 SavedToken.Tok->copyFrom(*Token.Tok);
693 SavedToken.Children = std::move(Token.Children);
696 AnnotatedLine Line(ParsedLine);
697 assert(Line.Last == LastToken);
699 TokenAnnotator Annotator(Style, Keywords);
700 Annotator.annotate(Line);
701 Annotator.calculateFormattingInformation(Line);
703 auto Length = LastToken->TotalLength;
704 if (OpeningBrace) {
705 assert(OpeningBrace != Tokens.front().Tok);
706 if (auto Prev = OpeningBrace->Previous;
707 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
708 Length -= ColumnLimit;
710 Length -= OpeningBrace->TokenText.size() + 1;
713 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
714 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
715 Length -= FirstToken->TokenText.size() + 1;
718 Index = 0;
719 for (auto &Token : Tokens) {
720 const auto &SavedToken = SavedTokens[Index++];
721 Token.Tok->copyFrom(*SavedToken.Tok);
722 Token.Children = std::move(SavedToken.Children);
723 delete SavedToken.Tok;
726 // If these change PPLevel needs to be used for get correct indentation.
727 assert(!Line.InMacroBody);
728 assert(!Line.InPPDirective);
729 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
732 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
733 unsigned AddLevels, bool MunchSemi,
734 bool KeepBraces,
735 IfStmtKind *IfKind,
736 bool UnindentWhitesmithsBraces) {
737 auto HandleVerilogBlockLabel = [this]() {
738 // ":" name
739 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
740 nextToken();
741 if (Keywords.isVerilogIdentifier(*FormatTok))
742 nextToken();
746 // Whether this is a Verilog-specific block that has a special header like a
747 // module.
748 const bool VerilogHierarchy =
749 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
750 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
751 (Style.isVerilog() &&
752 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
753 "'{' or macro block token expected");
754 FormatToken *Tok = FormatTok;
755 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
756 auto Index = CurrentLines->size();
757 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
758 FormatTok->setBlockKind(BK_Block);
760 // For Whitesmiths mode, jump to the next level prior to skipping over the
761 // braces.
762 if (!VerilogHierarchy && AddLevels > 0 &&
763 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
764 ++Line->Level;
767 size_t PPStartHash = computePPHash();
769 const unsigned InitialLevel = Line->Level;
770 if (VerilogHierarchy) {
771 AddLevels += parseVerilogHierarchyHeader();
772 } else {
773 nextToken(/*LevelDifference=*/AddLevels);
774 HandleVerilogBlockLabel();
777 // Bail out if there are too many levels. Otherwise, the stack might overflow.
778 if (Line->Level > 300)
779 return nullptr;
781 if (MacroBlock && FormatTok->is(tok::l_paren))
782 parseParens();
784 size_t NbPreprocessorDirectives =
785 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
786 addUnwrappedLine();
787 size_t OpeningLineIndex =
788 CurrentLines->empty()
789 ? (UnwrappedLine::kInvalidIndex)
790 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
792 // Whitesmiths is weird here. The brace needs to be indented for the namespace
793 // block, but the block itself may not be indented depending on the style
794 // settings. This allows the format to back up one level in those cases.
795 if (UnindentWhitesmithsBraces)
796 --Line->Level;
798 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
799 MustBeDeclaration);
800 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
801 Line->Level += AddLevels;
803 FormatToken *IfLBrace = nullptr;
804 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
806 if (eof())
807 return IfLBrace;
809 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
810 : FormatTok->isNot(tok::r_brace)) {
811 Line->Level = InitialLevel;
812 FormatTok->setBlockKind(BK_Block);
813 return IfLBrace;
816 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
817 FormatTok->setFinalizedType(TT_NamespaceRBrace);
819 const bool IsFunctionRBrace =
820 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
822 auto RemoveBraces = [=]() mutable {
823 if (!SimpleBlock)
824 return false;
825 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
826 assert(FormatTok->is(tok::r_brace));
827 const bool WrappedOpeningBrace = !Tok->Previous;
828 if (WrappedOpeningBrace && FollowedByComment)
829 return false;
830 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
831 if (KeepBraces && !HasRequiredIfBraces)
832 return false;
833 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
834 const FormatToken *Previous = Tokens->getPreviousToken();
835 assert(Previous);
836 if (Previous->is(tok::r_brace) && !Previous->Optional)
837 return false;
839 assert(!CurrentLines->empty());
840 auto &LastLine = CurrentLines->back();
841 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
842 return false;
843 if (Tok->is(TT_ElseLBrace))
844 return true;
845 if (WrappedOpeningBrace) {
846 assert(Index > 0);
847 --Index; // The line above the wrapped l_brace.
848 Tok = nullptr;
850 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
852 if (RemoveBraces()) {
853 Tok->MatchingParen = FormatTok;
854 FormatTok->MatchingParen = Tok;
857 size_t PPEndHash = computePPHash();
859 // Munch the closing brace.
860 nextToken(/*LevelDifference=*/-AddLevels);
862 // When this is a function block and there is an unnecessary semicolon
863 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
864 // it later).
865 if (Style.RemoveSemicolon && IsFunctionRBrace) {
866 while (FormatTok->is(tok::semi)) {
867 FormatTok->Optional = true;
868 nextToken();
872 HandleVerilogBlockLabel();
874 if (MacroBlock && FormatTok->is(tok::l_paren))
875 parseParens();
877 Line->Level = InitialLevel;
879 if (FormatTok->is(tok::kw_noexcept)) {
880 // A noexcept in a requires expression.
881 nextToken();
884 if (FormatTok->is(tok::arrow)) {
885 // Following the } or noexcept we can find a trailing return type arrow
886 // as part of an implicit conversion constraint.
887 nextToken();
888 parseStructuralElement();
891 if (MunchSemi && FormatTok->is(tok::semi))
892 nextToken();
894 if (PPStartHash == PPEndHash) {
895 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
896 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
897 // Update the opening line to add the forward reference as well
898 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
899 CurrentLines->size() - 1;
903 return IfLBrace;
906 static bool isGoogScope(const UnwrappedLine &Line) {
907 // FIXME: Closure-library specific stuff should not be hard-coded but be
908 // configurable.
909 if (Line.Tokens.size() < 4)
910 return false;
911 auto I = Line.Tokens.begin();
912 if (I->Tok->TokenText != "goog")
913 return false;
914 ++I;
915 if (I->Tok->isNot(tok::period))
916 return false;
917 ++I;
918 if (I->Tok->TokenText != "scope")
919 return false;
920 ++I;
921 return I->Tok->is(tok::l_paren);
924 static bool isIIFE(const UnwrappedLine &Line,
925 const AdditionalKeywords &Keywords) {
926 // Look for the start of an immediately invoked anonymous function.
927 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
928 // This is commonly done in JavaScript to create a new, anonymous scope.
929 // Example: (function() { ... })()
930 if (Line.Tokens.size() < 3)
931 return false;
932 auto I = Line.Tokens.begin();
933 if (I->Tok->isNot(tok::l_paren))
934 return false;
935 ++I;
936 if (I->Tok->isNot(Keywords.kw_function))
937 return false;
938 ++I;
939 return I->Tok->is(tok::l_paren);
942 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
943 const FormatToken &InitialToken) {
944 tok::TokenKind Kind = InitialToken.Tok.getKind();
945 if (InitialToken.is(TT_NamespaceMacro))
946 Kind = tok::kw_namespace;
948 switch (Kind) {
949 case tok::kw_namespace:
950 return Style.BraceWrapping.AfterNamespace;
951 case tok::kw_class:
952 return Style.BraceWrapping.AfterClass;
953 case tok::kw_union:
954 return Style.BraceWrapping.AfterUnion;
955 case tok::kw_struct:
956 return Style.BraceWrapping.AfterStruct;
957 case tok::kw_enum:
958 return Style.BraceWrapping.AfterEnum;
959 default:
960 return false;
964 void UnwrappedLineParser::parseChildBlock() {
965 assert(FormatTok->is(tok::l_brace));
966 FormatTok->setBlockKind(BK_Block);
967 const FormatToken *OpeningBrace = FormatTok;
968 nextToken();
970 bool SkipIndent = (Style.isJavaScript() &&
971 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
972 ScopedLineState LineState(*this);
973 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
974 /*MustBeDeclaration=*/false);
975 Line->Level += SkipIndent ? 0 : 1;
976 parseLevel(OpeningBrace);
977 flushComments(isOnNewLine(*FormatTok));
978 Line->Level -= SkipIndent ? 0 : 1;
980 nextToken();
983 void UnwrappedLineParser::parsePPDirective() {
984 assert(FormatTok->is(tok::hash) && "'#' expected");
985 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
987 nextToken();
989 if (!FormatTok->Tok.getIdentifierInfo()) {
990 parsePPUnknown();
991 return;
994 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
995 case tok::pp_define:
996 parsePPDefine();
997 return;
998 case tok::pp_if:
999 parsePPIf(/*IfDef=*/false);
1000 break;
1001 case tok::pp_ifdef:
1002 case tok::pp_ifndef:
1003 parsePPIf(/*IfDef=*/true);
1004 break;
1005 case tok::pp_else:
1006 case tok::pp_elifdef:
1007 case tok::pp_elifndef:
1008 case tok::pp_elif:
1009 parsePPElse();
1010 break;
1011 case tok::pp_endif:
1012 parsePPEndIf();
1013 break;
1014 case tok::pp_pragma:
1015 parsePPPragma();
1016 break;
1017 default:
1018 parsePPUnknown();
1019 break;
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024 size_t Line = CurrentLines->size();
1025 if (CurrentLines == &PreprocessorDirectives)
1026 Line += Lines.size();
1028 if (Unreachable ||
1029 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1030 PPStack.push_back({PP_Unreachable, Line});
1031 } else {
1032 PPStack.push_back({PP_Conditional, Line});
1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1037 ++PPBranchLevel;
1038 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1039 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1040 PPLevelBranchIndex.push_back(0);
1041 PPLevelBranchCount.push_back(0);
1043 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1044 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1045 conditionalCompilationCondition(Unreachable || Skip);
1048 void UnwrappedLineParser::conditionalCompilationAlternative() {
1049 if (!PPStack.empty())
1050 PPStack.pop_back();
1051 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1052 if (!PPChainBranchIndex.empty())
1053 ++PPChainBranchIndex.top();
1054 conditionalCompilationCondition(
1055 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1056 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1059 void UnwrappedLineParser::conditionalCompilationEnd() {
1060 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1061 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1062 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1063 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1065 // Guard against #endif's without #if.
1066 if (PPBranchLevel > -1)
1067 --PPBranchLevel;
1068 if (!PPChainBranchIndex.empty())
1069 PPChainBranchIndex.pop();
1070 if (!PPStack.empty())
1071 PPStack.pop_back();
1074 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1075 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1076 nextToken();
1077 bool Unreachable = false;
1078 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1079 Unreachable = true;
1080 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1081 Unreachable = true;
1082 conditionalCompilationStart(Unreachable);
1083 FormatToken *IfCondition = FormatTok;
1084 // If there's a #ifndef on the first line, and the only lines before it are
1085 // comments, it could be an include guard.
1086 bool MaybeIncludeGuard = IfNDef;
1087 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1088 for (auto &Line : Lines) {
1089 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1090 MaybeIncludeGuard = false;
1091 IncludeGuard = IG_Rejected;
1092 break;
1096 --PPBranchLevel;
1097 parsePPUnknown();
1098 ++PPBranchLevel;
1099 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1100 IncludeGuard = IG_IfNdefed;
1101 IncludeGuardToken = IfCondition;
1105 void UnwrappedLineParser::parsePPElse() {
1106 // If a potential include guard has an #else, it's not an include guard.
1107 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1108 IncludeGuard = IG_Rejected;
1109 // Don't crash when there is an #else without an #if.
1110 assert(PPBranchLevel >= -1);
1111 if (PPBranchLevel == -1)
1112 conditionalCompilationStart(/*Unreachable=*/true);
1113 conditionalCompilationAlternative();
1114 --PPBranchLevel;
1115 parsePPUnknown();
1116 ++PPBranchLevel;
1119 void UnwrappedLineParser::parsePPEndIf() {
1120 conditionalCompilationEnd();
1121 parsePPUnknown();
1122 // If the #endif of a potential include guard is the last thing in the file,
1123 // then we found an include guard.
1124 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1125 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1126 IncludeGuard = IG_Found;
1130 void UnwrappedLineParser::parsePPDefine() {
1131 nextToken();
1133 if (!FormatTok->Tok.getIdentifierInfo()) {
1134 IncludeGuard = IG_Rejected;
1135 IncludeGuardToken = nullptr;
1136 parsePPUnknown();
1137 return;
1140 if (IncludeGuard == IG_IfNdefed &&
1141 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1142 IncludeGuard = IG_Defined;
1143 IncludeGuardToken = nullptr;
1144 for (auto &Line : Lines) {
1145 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1146 IncludeGuard = IG_Rejected;
1147 break;
1152 // In the context of a define, even keywords should be treated as normal
1153 // identifiers. Setting the kind to identifier is not enough, because we need
1154 // to treat additional keywords like __except as well, which are already
1155 // identifiers. Setting the identifier info to null interferes with include
1156 // guard processing above, and changes preprocessing nesting.
1157 FormatTok->Tok.setKind(tok::identifier);
1158 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1159 nextToken();
1160 if (FormatTok->Tok.getKind() == tok::l_paren &&
1161 !FormatTok->hasWhitespaceBefore()) {
1162 parseParens();
1164 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1165 Line->Level += PPBranchLevel + 1;
1166 addUnwrappedLine();
1167 ++Line->Level;
1169 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1170 assert((int)Line->PPLevel >= 0);
1171 Line->InMacroBody = true;
1173 if (FormatTok->is(tok::identifier) &&
1174 Tokens->peekNextToken()->is(tok::colon)) {
1175 nextToken();
1176 nextToken();
1179 // Errors during a preprocessor directive can only affect the layout of the
1180 // preprocessor directive, and thus we ignore them. An alternative approach
1181 // would be to use the same approach we use on the file level (no
1182 // re-indentation if there was a structural error) within the macro
1183 // definition.
1184 parseFile();
1187 void UnwrappedLineParser::parsePPPragma() {
1188 Line->InPragmaDirective = true;
1189 parsePPUnknown();
1192 void UnwrappedLineParser::parsePPUnknown() {
1193 do {
1194 nextToken();
1195 } while (!eof());
1196 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1197 Line->Level += PPBranchLevel + 1;
1198 addUnwrappedLine();
1201 // Here we exclude certain tokens that are not usually the first token in an
1202 // unwrapped line. This is used in attempt to distinguish macro calls without
1203 // trailing semicolons from other constructs split to several lines.
1204 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1205 // Semicolon can be a null-statement, l_square can be a start of a macro or
1206 // a C++11 attribute, but this doesn't seem to be common.
1207 assert(Tok.isNot(TT_AttributeSquare));
1208 return !Tok.isOneOf(tok::semi, tok::l_brace,
1209 // Tokens that can only be used as binary operators and a
1210 // part of overloaded operator names.
1211 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1212 tok::less, tok::greater, tok::slash, tok::percent,
1213 tok::lessless, tok::greatergreater, tok::equal,
1214 tok::plusequal, tok::minusequal, tok::starequal,
1215 tok::slashequal, tok::percentequal, tok::ampequal,
1216 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1217 tok::lesslessequal,
1218 // Colon is used in labels, base class lists, initializer
1219 // lists, range-based for loops, ternary operator, but
1220 // should never be the first token in an unwrapped line.
1221 tok::colon,
1222 // 'noexcept' is a trailing annotation.
1223 tok::kw_noexcept);
1226 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1227 const FormatToken *FormatTok) {
1228 // FIXME: This returns true for C/C++ keywords like 'struct'.
1229 return FormatTok->is(tok::identifier) &&
1230 (!FormatTok->Tok.getIdentifierInfo() ||
1231 !FormatTok->isOneOf(
1232 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1233 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1234 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1235 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1236 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1237 Keywords.kw_instanceof, Keywords.kw_interface,
1238 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1241 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1242 const FormatToken *FormatTok) {
1243 return FormatTok->Tok.isLiteral() ||
1244 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1245 mustBeJSIdent(Keywords, FormatTok);
1248 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1249 // when encountered after a value (see mustBeJSIdentOrValue).
1250 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1251 const FormatToken *FormatTok) {
1252 return FormatTok->isOneOf(
1253 tok::kw_return, Keywords.kw_yield,
1254 // conditionals
1255 tok::kw_if, tok::kw_else,
1256 // loops
1257 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1258 // switch/case
1259 tok::kw_switch, tok::kw_case,
1260 // exceptions
1261 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1262 // declaration
1263 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1264 Keywords.kw_async, Keywords.kw_function,
1265 // import/export
1266 Keywords.kw_import, tok::kw_export);
1269 // Checks whether a token is a type in K&R C (aka C78).
1270 static bool isC78Type(const FormatToken &Tok) {
1271 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1272 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1273 tok::identifier);
1276 // This function checks whether a token starts the first parameter declaration
1277 // in a K&R C (aka C78) function definition, e.g.:
1278 // int f(a, b)
1279 // short a, b;
1280 // {
1281 // return a + b;
1282 // }
1283 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1284 const FormatToken *FuncName) {
1285 assert(Tok);
1286 assert(Next);
1287 assert(FuncName);
1289 if (FuncName->isNot(tok::identifier))
1290 return false;
1292 const FormatToken *Prev = FuncName->Previous;
1293 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1294 return false;
1296 if (!isC78Type(*Tok) &&
1297 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1298 return false;
1301 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1302 return false;
1304 Tok = Tok->Previous;
1305 if (!Tok || Tok->isNot(tok::r_paren))
1306 return false;
1308 Tok = Tok->Previous;
1309 if (!Tok || Tok->isNot(tok::identifier))
1310 return false;
1312 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1315 bool UnwrappedLineParser::parseModuleImport() {
1316 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1318 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1319 !Token->Tok.getIdentifierInfo() &&
1320 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1321 return false;
1324 nextToken();
1325 while (!eof()) {
1326 if (FormatTok->is(tok::colon)) {
1327 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1329 // Handle import <foo/bar.h> as we would an include statement.
1330 else if (FormatTok->is(tok::less)) {
1331 nextToken();
1332 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1333 // Mark tokens up to the trailing line comments as implicit string
1334 // literals.
1335 if (FormatTok->isNot(tok::comment) &&
1336 !FormatTok->TokenText.starts_with("//")) {
1337 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1339 nextToken();
1342 if (FormatTok->is(tok::semi)) {
1343 nextToken();
1344 break;
1346 nextToken();
1349 addUnwrappedLine();
1350 return true;
1353 // readTokenWithJavaScriptASI reads the next token and terminates the current
1354 // line if JavaScript Automatic Semicolon Insertion must
1355 // happen between the current token and the next token.
1357 // This method is conservative - it cannot cover all edge cases of JavaScript,
1358 // but only aims to correctly handle certain well known cases. It *must not*
1359 // return true in speculative cases.
1360 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1361 FormatToken *Previous = FormatTok;
1362 readToken();
1363 FormatToken *Next = FormatTok;
1365 bool IsOnSameLine =
1366 CommentsBeforeNextToken.empty()
1367 ? Next->NewlinesBefore == 0
1368 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1369 if (IsOnSameLine)
1370 return;
1372 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1373 bool PreviousStartsTemplateExpr =
1374 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1375 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1376 // If the line contains an '@' sign, the previous token might be an
1377 // annotation, which can precede another identifier/value.
1378 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1379 return LineNode.Tok->is(tok::at);
1381 if (HasAt)
1382 return;
1384 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1385 return addUnwrappedLine();
1386 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1387 bool NextEndsTemplateExpr =
1388 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1389 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1390 (PreviousMustBeValue ||
1391 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1392 tok::minusminus))) {
1393 return addUnwrappedLine();
1395 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1396 isJSDeclOrStmt(Keywords, Next)) {
1397 return addUnwrappedLine();
1401 void UnwrappedLineParser::parseStructuralElement(
1402 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1403 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1404 if (Style.Language == FormatStyle::LK_TableGen &&
1405 FormatTok->is(tok::pp_include)) {
1406 nextToken();
1407 if (FormatTok->is(tok::string_literal))
1408 nextToken();
1409 addUnwrappedLine();
1410 return;
1413 if (Style.isCpp()) {
1414 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1416 } else if (Style.isVerilog()) {
1417 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1418 parseForOrWhileLoop(/*HasParens=*/false);
1419 return;
1421 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1422 parseForOrWhileLoop();
1423 return;
1425 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1426 Keywords.kw_assume, Keywords.kw_cover)) {
1427 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1428 return;
1431 // Skip things that can exist before keywords like 'if' and 'case'.
1432 while (true) {
1433 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1434 Keywords.kw_unique0)) {
1435 nextToken();
1436 } else if (FormatTok->is(tok::l_paren) &&
1437 Tokens->peekNextToken()->is(tok::star)) {
1438 parseParens();
1439 } else {
1440 break;
1445 // Tokens that only make sense at the beginning of a line.
1446 switch (FormatTok->Tok.getKind()) {
1447 case tok::kw_asm:
1448 nextToken();
1449 if (FormatTok->is(tok::l_brace)) {
1450 FormatTok->setFinalizedType(TT_InlineASMBrace);
1451 nextToken();
1452 while (FormatTok && !eof()) {
1453 if (FormatTok->is(tok::r_brace)) {
1454 FormatTok->setFinalizedType(TT_InlineASMBrace);
1455 nextToken();
1456 addUnwrappedLine();
1457 break;
1459 FormatTok->Finalized = true;
1460 nextToken();
1463 break;
1464 case tok::kw_namespace:
1465 parseNamespace();
1466 return;
1467 case tok::kw_public:
1468 case tok::kw_protected:
1469 case tok::kw_private:
1470 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471 Style.isCSharp()) {
1472 nextToken();
1473 } else {
1474 parseAccessSpecifier();
1476 return;
1477 case tok::kw_if: {
1478 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1479 // field/method declaration.
1480 break;
1482 FormatToken *Tok = parseIfThenElse(IfKind);
1483 if (IfLeftBrace)
1484 *IfLeftBrace = Tok;
1485 return;
1487 case tok::kw_for:
1488 case tok::kw_while:
1489 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1490 // field/method declaration.
1491 break;
1493 parseForOrWhileLoop();
1494 return;
1495 case tok::kw_do:
1496 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497 // field/method declaration.
1498 break;
1500 parseDoWhile();
1501 if (HasDoWhile)
1502 *HasDoWhile = true;
1503 return;
1504 case tok::kw_switch:
1505 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1506 // 'switch: string' field declaration.
1507 break;
1509 parseSwitch();
1510 return;
1511 case tok::kw_default:
1512 // In Verilog default along with other labels are handled in the next loop.
1513 if (Style.isVerilog())
1514 break;
1515 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1516 // 'default: string' field declaration.
1517 break;
1519 nextToken();
1520 if (FormatTok->is(tok::colon)) {
1521 FormatTok->setFinalizedType(TT_CaseLabelColon);
1522 parseLabel();
1523 return;
1525 // e.g. "default void f() {}" in a Java interface.
1526 break;
1527 case tok::kw_case:
1528 // Proto: there are no switch/case statements.
1529 if (Style.Language == FormatStyle::LK_Proto) {
1530 nextToken();
1531 return;
1533 if (Style.isVerilog()) {
1534 parseBlock();
1535 addUnwrappedLine();
1536 return;
1538 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539 // 'case: string' field declaration.
1540 nextToken();
1541 break;
1543 parseCaseLabel();
1544 return;
1545 case tok::kw_try:
1546 case tok::kw___try:
1547 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548 // field/method declaration.
1549 break;
1551 parseTryCatch();
1552 return;
1553 case tok::kw_extern:
1554 nextToken();
1555 if (Style.isVerilog()) {
1556 // In Verilog and extern module declaration looks like a start of module.
1557 // But there is no body and endmodule. So we handle it separately.
1558 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1559 parseVerilogHierarchyHeader();
1560 return;
1562 } else if (FormatTok->is(tok::string_literal)) {
1563 nextToken();
1564 if (FormatTok->is(tok::l_brace)) {
1565 if (Style.BraceWrapping.AfterExternBlock)
1566 addUnwrappedLine();
1567 // Either we indent or for backwards compatibility we follow the
1568 // AfterExternBlock style.
1569 unsigned AddLevels =
1570 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1571 (Style.BraceWrapping.AfterExternBlock &&
1572 Style.IndentExternBlock ==
1573 FormatStyle::IEBS_AfterExternBlock)
1574 ? 1u
1575 : 0u;
1576 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1577 addUnwrappedLine();
1578 return;
1581 break;
1582 case tok::kw_export:
1583 if (Style.isJavaScript()) {
1584 parseJavaScriptEs6ImportExport();
1585 return;
1587 if (Style.isCpp()) {
1588 nextToken();
1589 if (FormatTok->is(tok::kw_namespace)) {
1590 parseNamespace();
1591 return;
1593 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1594 return;
1596 break;
1597 case tok::kw_inline:
1598 nextToken();
1599 if (FormatTok->is(tok::kw_namespace)) {
1600 parseNamespace();
1601 return;
1603 break;
1604 case tok::identifier:
1605 if (FormatTok->is(TT_ForEachMacro)) {
1606 parseForOrWhileLoop();
1607 return;
1609 if (FormatTok->is(TT_MacroBlockBegin)) {
1610 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1611 /*MunchSemi=*/false);
1612 return;
1614 if (FormatTok->is(Keywords.kw_import)) {
1615 if (Style.isJavaScript()) {
1616 parseJavaScriptEs6ImportExport();
1617 return;
1619 if (Style.Language == FormatStyle::LK_Proto) {
1620 nextToken();
1621 if (FormatTok->is(tok::kw_public))
1622 nextToken();
1623 if (FormatTok->isNot(tok::string_literal))
1624 return;
1625 nextToken();
1626 if (FormatTok->is(tok::semi))
1627 nextToken();
1628 addUnwrappedLine();
1629 return;
1631 if (Style.isCpp() && parseModuleImport())
1632 return;
1634 if (Style.isCpp() &&
1635 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1636 Keywords.kw_slots, Keywords.kw_qslots)) {
1637 nextToken();
1638 if (FormatTok->is(tok::colon)) {
1639 nextToken();
1640 addUnwrappedLine();
1641 return;
1644 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1645 parseStatementMacro();
1646 return;
1648 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1649 parseNamespace();
1650 return;
1652 // In Verilog labels can be any expression, so we don't do them here.
1653 if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1654 !Line->MustBeDeclaration) {
1655 nextToken();
1656 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1657 FormatTok->setFinalizedType(TT_GotoLabelColon);
1658 parseLabel(!Style.IndentGotoLabels);
1659 if (HasLabel)
1660 *HasLabel = true;
1661 return;
1663 // In all other cases, parse the declaration.
1664 break;
1665 default:
1666 break;
1669 const bool InRequiresExpression =
1670 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1671 do {
1672 const FormatToken *Previous = FormatTok->Previous;
1673 switch (FormatTok->Tok.getKind()) {
1674 case tok::at:
1675 nextToken();
1676 if (FormatTok->is(tok::l_brace)) {
1677 nextToken();
1678 parseBracedList();
1679 break;
1680 } else if (Style.Language == FormatStyle::LK_Java &&
1681 FormatTok->is(Keywords.kw_interface)) {
1682 nextToken();
1683 break;
1685 switch (FormatTok->Tok.getObjCKeywordID()) {
1686 case tok::objc_public:
1687 case tok::objc_protected:
1688 case tok::objc_package:
1689 case tok::objc_private:
1690 return parseAccessSpecifier();
1691 case tok::objc_interface:
1692 case tok::objc_implementation:
1693 return parseObjCInterfaceOrImplementation();
1694 case tok::objc_protocol:
1695 if (parseObjCProtocol())
1696 return;
1697 break;
1698 case tok::objc_end:
1699 return; // Handled by the caller.
1700 case tok::objc_optional:
1701 case tok::objc_required:
1702 nextToken();
1703 addUnwrappedLine();
1704 return;
1705 case tok::objc_autoreleasepool:
1706 nextToken();
1707 if (FormatTok->is(tok::l_brace)) {
1708 if (Style.BraceWrapping.AfterControlStatement ==
1709 FormatStyle::BWACS_Always) {
1710 addUnwrappedLine();
1712 parseBlock();
1714 addUnwrappedLine();
1715 return;
1716 case tok::objc_synchronized:
1717 nextToken();
1718 if (FormatTok->is(tok::l_paren)) {
1719 // Skip synchronization object
1720 parseParens();
1722 if (FormatTok->is(tok::l_brace)) {
1723 if (Style.BraceWrapping.AfterControlStatement ==
1724 FormatStyle::BWACS_Always) {
1725 addUnwrappedLine();
1727 parseBlock();
1729 addUnwrappedLine();
1730 return;
1731 case tok::objc_try:
1732 // This branch isn't strictly necessary (the kw_try case below would
1733 // do this too after the tok::at is parsed above). But be explicit.
1734 parseTryCatch();
1735 return;
1736 default:
1737 break;
1739 break;
1740 case tok::kw_requires: {
1741 if (Style.isCpp()) {
1742 bool ParsedClause = parseRequires();
1743 if (ParsedClause)
1744 return;
1745 } else {
1746 nextToken();
1748 break;
1750 case tok::kw_enum:
1751 // Ignore if this is part of "template <enum ...".
1752 if (Previous && Previous->is(tok::less)) {
1753 nextToken();
1754 break;
1757 // parseEnum falls through and does not yet add an unwrapped line as an
1758 // enum definition can start a structural element.
1759 if (!parseEnum())
1760 break;
1761 // This only applies to C++ and Verilog.
1762 if (!Style.isCpp() && !Style.isVerilog()) {
1763 addUnwrappedLine();
1764 return;
1766 break;
1767 case tok::kw_typedef:
1768 nextToken();
1769 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1770 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1771 Keywords.kw_CF_CLOSED_ENUM,
1772 Keywords.kw_NS_CLOSED_ENUM)) {
1773 parseEnum();
1775 break;
1776 case tok::kw_class:
1777 if (Style.isVerilog()) {
1778 parseBlock();
1779 addUnwrappedLine();
1780 return;
1782 [[fallthrough]];
1783 case tok::kw_struct:
1784 case tok::kw_union:
1785 if (parseStructLike())
1786 return;
1787 break;
1788 case tok::kw_decltype:
1789 nextToken();
1790 if (FormatTok->is(tok::l_paren)) {
1791 parseParens();
1792 assert(FormatTok->Previous);
1793 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1794 tok::l_paren)) {
1795 Line->SeenDecltypeAuto = true;
1798 break;
1799 case tok::period:
1800 nextToken();
1801 // In Java, classes have an implicit static member "class".
1802 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1803 FormatTok->is(tok::kw_class)) {
1804 nextToken();
1806 if (Style.isJavaScript() && FormatTok &&
1807 FormatTok->Tok.getIdentifierInfo()) {
1808 // JavaScript only has pseudo keywords, all keywords are allowed to
1809 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1810 nextToken();
1812 break;
1813 case tok::semi:
1814 nextToken();
1815 addUnwrappedLine();
1816 return;
1817 case tok::r_brace:
1818 addUnwrappedLine();
1819 return;
1820 case tok::l_paren: {
1821 parseParens();
1822 // Break the unwrapped line if a K&R C function definition has a parameter
1823 // declaration.
1824 if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1825 break;
1826 if (isC78ParameterDecl(FormatTok,
1827 Tokens->peekNextToken(/*SkipComment=*/true),
1828 Previous)) {
1829 addUnwrappedLine();
1830 return;
1832 break;
1834 case tok::kw_operator:
1835 nextToken();
1836 if (FormatTok->isBinaryOperator())
1837 nextToken();
1838 break;
1839 case tok::caret:
1840 nextToken();
1841 // Block return type.
1842 if (FormatTok->Tok.isAnyIdentifier() ||
1843 FormatTok->isSimpleTypeSpecifier()) {
1844 nextToken();
1845 // Return types: pointers are ok too.
1846 while (FormatTok->is(tok::star))
1847 nextToken();
1849 // Block argument list.
1850 if (FormatTok->is(tok::l_paren))
1851 parseParens();
1852 // Block body.
1853 if (FormatTok->is(tok::l_brace))
1854 parseChildBlock();
1855 break;
1856 case tok::l_brace:
1857 if (InRequiresExpression)
1858 FormatTok->setFinalizedType(TT_BracedListLBrace);
1859 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1860 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1861 // A block outside of parentheses must be the last part of a
1862 // structural element.
1863 // FIXME: Figure out cases where this is not true, and add projections
1864 // for them (the one we know is missing are lambdas).
1865 if (Style.Language == FormatStyle::LK_Java &&
1866 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1867 // If necessary, we could set the type to something different than
1868 // TT_FunctionLBrace.
1869 if (Style.BraceWrapping.AfterControlStatement ==
1870 FormatStyle::BWACS_Always) {
1871 addUnwrappedLine();
1873 } else if (Style.BraceWrapping.AfterFunction) {
1874 addUnwrappedLine();
1876 FormatTok->setFinalizedType(TT_FunctionLBrace);
1877 parseBlock();
1878 IsDecltypeAutoFunction = false;
1879 addUnwrappedLine();
1880 return;
1882 // Otherwise this was a braced init list, and the structural
1883 // element continues.
1884 break;
1885 case tok::kw_try:
1886 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1887 // field/method declaration.
1888 nextToken();
1889 break;
1891 // We arrive here when parsing function-try blocks.
1892 if (Style.BraceWrapping.AfterFunction)
1893 addUnwrappedLine();
1894 parseTryCatch();
1895 return;
1896 case tok::identifier: {
1897 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1898 Line->MustBeDeclaration) {
1899 addUnwrappedLine();
1900 parseCSharpGenericTypeConstraint();
1901 break;
1903 if (FormatTok->is(TT_MacroBlockEnd)) {
1904 addUnwrappedLine();
1905 return;
1908 // Function declarations (as opposed to function expressions) are parsed
1909 // on their own unwrapped line by continuing this loop. Function
1910 // expressions (functions that are not on their own line) must not create
1911 // a new unwrapped line, so they are special cased below.
1912 size_t TokenCount = Line->Tokens.size();
1913 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1914 (TokenCount > 1 ||
1915 (TokenCount == 1 &&
1916 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1917 tryToParseJSFunction();
1918 break;
1920 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1921 FormatTok->is(Keywords.kw_interface)) {
1922 if (Style.isJavaScript()) {
1923 // In JavaScript/TypeScript, "interface" can be used as a standalone
1924 // identifier, e.g. in `var interface = 1;`. If "interface" is
1925 // followed by another identifier, it is very like to be an actual
1926 // interface declaration.
1927 unsigned StoredPosition = Tokens->getPosition();
1928 FormatToken *Next = Tokens->getNextToken();
1929 FormatTok = Tokens->setPosition(StoredPosition);
1930 if (!mustBeJSIdent(Keywords, Next)) {
1931 nextToken();
1932 break;
1935 parseRecord();
1936 addUnwrappedLine();
1937 return;
1940 if (Style.isVerilog()) {
1941 if (FormatTok->is(Keywords.kw_table)) {
1942 parseVerilogTable();
1943 return;
1945 if (Keywords.isVerilogBegin(*FormatTok) ||
1946 Keywords.isVerilogHierarchy(*FormatTok)) {
1947 parseBlock();
1948 addUnwrappedLine();
1949 return;
1953 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1954 if (parseStructLike())
1955 return;
1956 break;
1959 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1960 parseStatementMacro();
1961 return;
1964 // See if the following token should start a new unwrapped line.
1965 StringRef Text = FormatTok->TokenText;
1967 FormatToken *PreviousToken = FormatTok;
1968 nextToken();
1970 // JS doesn't have macros, and within classes colons indicate fields, not
1971 // labels.
1972 if (Style.isJavaScript())
1973 break;
1975 auto OneTokenSoFar = [&]() {
1976 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1977 while (I != E && I->Tok->is(tok::comment))
1978 ++I;
1979 if (Style.isVerilog())
1980 while (I != E && I->Tok->is(tok::hash))
1981 ++I;
1982 return I != E && (++I == E);
1984 if (OneTokenSoFar()) {
1985 // Recognize function-like macro usages without trailing semicolon as
1986 // well as free-standing macros like Q_OBJECT.
1987 bool FunctionLike = FormatTok->is(tok::l_paren);
1988 if (FunctionLike)
1989 parseParens();
1991 bool FollowedByNewline =
1992 CommentsBeforeNextToken.empty()
1993 ? FormatTok->NewlinesBefore > 0
1994 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1996 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1997 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1998 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1999 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2000 addUnwrappedLine();
2001 return;
2004 break;
2006 case tok::equal:
2007 if ((Style.isJavaScript() || Style.isCSharp()) &&
2008 FormatTok->is(TT_FatArrow)) {
2009 tryToParseChildBlock();
2010 break;
2013 nextToken();
2014 if (FormatTok->is(tok::l_brace)) {
2015 // Block kind should probably be set to BK_BracedInit for any language.
2016 // C# needs this change to ensure that array initialisers and object
2017 // initialisers are indented the same way.
2018 if (Style.isCSharp())
2019 FormatTok->setBlockKind(BK_BracedInit);
2020 nextToken();
2021 parseBracedList();
2022 } else if (Style.Language == FormatStyle::LK_Proto &&
2023 FormatTok->is(tok::less)) {
2024 nextToken();
2025 parseBracedList(/*IsAngleBracket=*/true);
2027 break;
2028 case tok::l_square:
2029 parseSquare();
2030 break;
2031 case tok::kw_new:
2032 parseNew();
2033 break;
2034 case tok::kw_case:
2035 // Proto: there are no switch/case statements.
2036 if (Style.Language == FormatStyle::LK_Proto) {
2037 nextToken();
2038 return;
2040 // In Verilog switch is called case.
2041 if (Style.isVerilog()) {
2042 parseBlock();
2043 addUnwrappedLine();
2044 return;
2046 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2047 // 'case: string' field declaration.
2048 nextToken();
2049 break;
2051 parseCaseLabel();
2052 break;
2053 case tok::kw_default:
2054 nextToken();
2055 if (Style.isVerilog()) {
2056 if (FormatTok->is(tok::colon)) {
2057 // The label will be handled in the next iteration.
2058 break;
2060 if (FormatTok->is(Keywords.kw_clocking)) {
2061 // A default clocking block.
2062 parseBlock();
2063 addUnwrappedLine();
2064 return;
2066 parseVerilogCaseLabel();
2067 return;
2069 break;
2070 case tok::colon:
2071 nextToken();
2072 if (Style.isVerilog()) {
2073 parseVerilogCaseLabel();
2074 return;
2076 break;
2077 default:
2078 nextToken();
2079 break;
2081 } while (!eof());
2084 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2085 assert(FormatTok->is(tok::l_brace));
2086 if (!Style.isCSharp())
2087 return false;
2088 // See if it's a property accessor.
2089 if (FormatTok->Previous->isNot(tok::identifier))
2090 return false;
2092 // See if we are inside a property accessor.
2094 // Record the current tokenPosition so that we can advance and
2095 // reset the current token. `Next` is not set yet so we need
2096 // another way to advance along the token stream.
2097 unsigned int StoredPosition = Tokens->getPosition();
2098 FormatToken *Tok = Tokens->getNextToken();
2100 // A trivial property accessor is of the form:
2101 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2102 // Track these as they do not require line breaks to be introduced.
2103 bool HasSpecialAccessor = false;
2104 bool IsTrivialPropertyAccessor = true;
2105 while (!eof()) {
2106 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2107 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2108 Keywords.kw_init, Keywords.kw_set)) {
2109 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2110 HasSpecialAccessor = true;
2111 Tok = Tokens->getNextToken();
2112 continue;
2114 if (Tok->isNot(tok::r_brace))
2115 IsTrivialPropertyAccessor = false;
2116 break;
2119 if (!HasSpecialAccessor) {
2120 Tokens->setPosition(StoredPosition);
2121 return false;
2124 // Try to parse the property accessor:
2125 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2126 Tokens->setPosition(StoredPosition);
2127 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2128 addUnwrappedLine();
2129 nextToken();
2130 do {
2131 switch (FormatTok->Tok.getKind()) {
2132 case tok::r_brace:
2133 nextToken();
2134 if (FormatTok->is(tok::equal)) {
2135 while (!eof() && FormatTok->isNot(tok::semi))
2136 nextToken();
2137 nextToken();
2139 addUnwrappedLine();
2140 return true;
2141 case tok::l_brace:
2142 ++Line->Level;
2143 parseBlock(/*MustBeDeclaration=*/true);
2144 addUnwrappedLine();
2145 --Line->Level;
2146 break;
2147 case tok::equal:
2148 if (FormatTok->is(TT_FatArrow)) {
2149 ++Line->Level;
2150 do {
2151 nextToken();
2152 } while (!eof() && FormatTok->isNot(tok::semi));
2153 nextToken();
2154 addUnwrappedLine();
2155 --Line->Level;
2156 break;
2158 nextToken();
2159 break;
2160 default:
2161 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2162 Keywords.kw_set) &&
2163 !IsTrivialPropertyAccessor) {
2164 // Non-trivial get/set needs to be on its own line.
2165 addUnwrappedLine();
2167 nextToken();
2169 } while (!eof());
2171 // Unreachable for well-formed code (paired '{' and '}').
2172 return true;
2175 bool UnwrappedLineParser::tryToParseLambda() {
2176 assert(FormatTok->is(tok::l_square));
2177 if (!Style.isCpp()) {
2178 nextToken();
2179 return false;
2181 FormatToken &LSquare = *FormatTok;
2182 if (!tryToParseLambdaIntroducer())
2183 return false;
2185 bool SeenArrow = false;
2186 bool InTemplateParameterList = false;
2188 while (FormatTok->isNot(tok::l_brace)) {
2189 if (FormatTok->isSimpleTypeSpecifier()) {
2190 nextToken();
2191 continue;
2193 switch (FormatTok->Tok.getKind()) {
2194 case tok::l_brace:
2195 break;
2196 case tok::l_paren:
2197 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2198 break;
2199 case tok::l_square:
2200 parseSquare();
2201 break;
2202 case tok::less:
2203 assert(FormatTok->Previous);
2204 if (FormatTok->Previous->is(tok::r_square))
2205 InTemplateParameterList = true;
2206 nextToken();
2207 break;
2208 case tok::kw_auto:
2209 case tok::kw_class:
2210 case tok::kw_template:
2211 case tok::kw_typename:
2212 case tok::amp:
2213 case tok::star:
2214 case tok::kw_const:
2215 case tok::kw_constexpr:
2216 case tok::kw_consteval:
2217 case tok::comma:
2218 case tok::greater:
2219 case tok::identifier:
2220 case tok::numeric_constant:
2221 case tok::coloncolon:
2222 case tok::kw_mutable:
2223 case tok::kw_noexcept:
2224 case tok::kw_static:
2225 nextToken();
2226 break;
2227 // Specialization of a template with an integer parameter can contain
2228 // arithmetic, logical, comparison and ternary operators.
2230 // FIXME: This also accepts sequences of operators that are not in the scope
2231 // of a template argument list.
2233 // In a C++ lambda a template type can only occur after an arrow. We use
2234 // this as an heuristic to distinguish between Objective-C expressions
2235 // followed by an `a->b` expression, such as:
2236 // ([obj func:arg] + a->b)
2237 // Otherwise the code below would parse as a lambda.
2238 case tok::plus:
2239 case tok::minus:
2240 case tok::exclaim:
2241 case tok::tilde:
2242 case tok::slash:
2243 case tok::percent:
2244 case tok::lessless:
2245 case tok::pipe:
2246 case tok::pipepipe:
2247 case tok::ampamp:
2248 case tok::caret:
2249 case tok::equalequal:
2250 case tok::exclaimequal:
2251 case tok::greaterequal:
2252 case tok::lessequal:
2253 case tok::question:
2254 case tok::colon:
2255 case tok::ellipsis:
2256 case tok::kw_true:
2257 case tok::kw_false:
2258 if (SeenArrow || InTemplateParameterList) {
2259 nextToken();
2260 break;
2262 return true;
2263 case tok::arrow:
2264 // This might or might not actually be a lambda arrow (this could be an
2265 // ObjC method invocation followed by a dereferencing arrow). We might
2266 // reset this back to TT_Unknown in TokenAnnotator.
2267 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2268 SeenArrow = true;
2269 nextToken();
2270 break;
2271 case tok::kw_requires: {
2272 auto *RequiresToken = FormatTok;
2273 nextToken();
2274 parseRequiresClause(RequiresToken);
2275 break;
2277 case tok::equal:
2278 if (!InTemplateParameterList)
2279 return true;
2280 nextToken();
2281 break;
2282 default:
2283 return true;
2287 FormatTok->setFinalizedType(TT_LambdaLBrace);
2288 LSquare.setFinalizedType(TT_LambdaLSquare);
2290 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2291 parseChildBlock();
2292 assert(!NestedLambdas.empty());
2293 NestedLambdas.pop_back();
2295 return true;
2298 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2299 const FormatToken *Previous = FormatTok->Previous;
2300 const FormatToken *LeftSquare = FormatTok;
2301 nextToken();
2302 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2303 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2304 tok::kw_co_yield, tok::kw_co_return)) ||
2305 Previous->closesScope())) ||
2306 LeftSquare->isCppStructuredBinding(Style)) {
2307 return false;
2309 if (FormatTok->is(tok::l_square))
2310 return false;
2311 if (FormatTok->is(tok::r_square)) {
2312 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2313 if (Next->is(tok::greater))
2314 return false;
2316 parseSquare(/*LambdaIntroducer=*/true);
2317 return true;
2320 void UnwrappedLineParser::tryToParseJSFunction() {
2321 assert(FormatTok->is(Keywords.kw_function));
2322 if (FormatTok->is(Keywords.kw_async))
2323 nextToken();
2324 // Consume "function".
2325 nextToken();
2327 // Consume * (generator function). Treat it like C++'s overloaded operators.
2328 if (FormatTok->is(tok::star)) {
2329 FormatTok->setFinalizedType(TT_OverloadedOperator);
2330 nextToken();
2333 // Consume function name.
2334 if (FormatTok->is(tok::identifier))
2335 nextToken();
2337 if (FormatTok->isNot(tok::l_paren))
2338 return;
2340 // Parse formal parameter list.
2341 parseParens();
2343 if (FormatTok->is(tok::colon)) {
2344 // Parse a type definition.
2345 nextToken();
2347 // Eat the type declaration. For braced inline object types, balance braces,
2348 // otherwise just parse until finding an l_brace for the function body.
2349 if (FormatTok->is(tok::l_brace))
2350 tryToParseBracedList();
2351 else
2352 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2353 nextToken();
2356 if (FormatTok->is(tok::semi))
2357 return;
2359 parseChildBlock();
2362 bool UnwrappedLineParser::tryToParseBracedList() {
2363 if (FormatTok->is(BK_Unknown))
2364 calculateBraceTypes();
2365 assert(FormatTok->isNot(BK_Unknown));
2366 if (FormatTok->is(BK_Block))
2367 return false;
2368 nextToken();
2369 parseBracedList();
2370 return true;
2373 bool UnwrappedLineParser::tryToParseChildBlock() {
2374 assert(Style.isJavaScript() || Style.isCSharp());
2375 assert(FormatTok->is(TT_FatArrow));
2376 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2377 // They always start an expression or a child block if followed by a curly
2378 // brace.
2379 nextToken();
2380 if (FormatTok->isNot(tok::l_brace))
2381 return false;
2382 parseChildBlock();
2383 return true;
2386 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2387 bool HasError = false;
2389 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2390 // replace this by using parseAssignmentExpression() inside.
2391 do {
2392 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2393 tryToParseChildBlock()) {
2394 continue;
2396 if (Style.isJavaScript()) {
2397 if (FormatTok->is(Keywords.kw_function)) {
2398 tryToParseJSFunction();
2399 continue;
2401 if (FormatTok->is(tok::l_brace)) {
2402 // Could be a method inside of a braced list `{a() { return 1; }}`.
2403 if (tryToParseBracedList())
2404 continue;
2405 parseChildBlock();
2408 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2409 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2410 addUnwrappedLine();
2411 nextToken();
2412 return !HasError;
2414 switch (FormatTok->Tok.getKind()) {
2415 case tok::l_square:
2416 if (Style.isCSharp())
2417 parseSquare();
2418 else
2419 tryToParseLambda();
2420 break;
2421 case tok::l_paren:
2422 parseParens();
2423 // JavaScript can just have free standing methods and getters/setters in
2424 // object literals. Detect them by a "{" following ")".
2425 if (Style.isJavaScript()) {
2426 if (FormatTok->is(tok::l_brace))
2427 parseChildBlock();
2428 break;
2430 break;
2431 case tok::l_brace:
2432 // Assume there are no blocks inside a braced init list apart
2433 // from the ones we explicitly parse out (like lambdas).
2434 FormatTok->setBlockKind(BK_BracedInit);
2435 nextToken();
2436 parseBracedList();
2437 break;
2438 case tok::less:
2439 nextToken();
2440 if (IsAngleBracket)
2441 parseBracedList(/*IsAngleBracket=*/true);
2442 break;
2443 case tok::semi:
2444 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2445 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2446 // used for error recovery if we have otherwise determined that this is
2447 // a braced list.
2448 if (Style.isJavaScript()) {
2449 nextToken();
2450 break;
2452 HasError = true;
2453 if (!IsEnum)
2454 return false;
2455 nextToken();
2456 break;
2457 case tok::comma:
2458 nextToken();
2459 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2460 addUnwrappedLine();
2461 break;
2462 default:
2463 nextToken();
2464 break;
2466 } while (!eof());
2467 return false;
2470 /// \brief Parses a pair of parentheses (and everything between them).
2471 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2472 /// double ampersands. This applies for all nested scopes as well.
2474 /// Returns whether there is a `=` token between the parentheses.
2475 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2476 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2477 auto *LeftParen = FormatTok;
2478 bool SeenEqual = false;
2479 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2480 nextToken();
2481 do {
2482 switch (FormatTok->Tok.getKind()) {
2483 case tok::l_paren:
2484 if (parseParens(AmpAmpTokenType))
2485 SeenEqual = true;
2486 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2487 parseChildBlock();
2488 break;
2489 case tok::r_paren:
2490 if (!MightBeStmtExpr &&
2491 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2492 const auto *Prev = LeftParen->Previous;
2493 const auto *Next = Tokens->peekNextToken();
2494 const bool DoubleParens =
2495 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2496 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2497 const bool Blacklisted =
2498 PrevPrev &&
2499 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2500 (SeenEqual &&
2501 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2502 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2503 const bool ReturnParens =
2504 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2505 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2506 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2507 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2508 Next->is(tok::semi);
2509 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2510 LeftParen->Optional = true;
2511 FormatTok->Optional = true;
2514 nextToken();
2515 return SeenEqual;
2516 case tok::r_brace:
2517 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2518 return SeenEqual;
2519 case tok::l_square:
2520 tryToParseLambda();
2521 break;
2522 case tok::l_brace:
2523 if (!tryToParseBracedList())
2524 parseChildBlock();
2525 break;
2526 case tok::at:
2527 nextToken();
2528 if (FormatTok->is(tok::l_brace)) {
2529 nextToken();
2530 parseBracedList();
2532 break;
2533 case tok::equal:
2534 SeenEqual = true;
2535 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2536 tryToParseChildBlock();
2537 else
2538 nextToken();
2539 break;
2540 case tok::kw_class:
2541 if (Style.isJavaScript())
2542 parseRecord(/*ParseAsExpr=*/true);
2543 else
2544 nextToken();
2545 break;
2546 case tok::identifier:
2547 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2548 tryToParseJSFunction();
2549 else
2550 nextToken();
2551 break;
2552 case tok::kw_requires: {
2553 auto RequiresToken = FormatTok;
2554 nextToken();
2555 parseRequiresExpression(RequiresToken);
2556 break;
2558 case tok::ampamp:
2559 if (AmpAmpTokenType != TT_Unknown)
2560 FormatTok->setFinalizedType(AmpAmpTokenType);
2561 [[fallthrough]];
2562 default:
2563 nextToken();
2564 break;
2566 } while (!eof());
2567 return SeenEqual;
2570 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2571 if (!LambdaIntroducer) {
2572 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2573 if (tryToParseLambda())
2574 return;
2576 do {
2577 switch (FormatTok->Tok.getKind()) {
2578 case tok::l_paren:
2579 parseParens();
2580 break;
2581 case tok::r_square:
2582 nextToken();
2583 return;
2584 case tok::r_brace:
2585 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2586 return;
2587 case tok::l_square:
2588 parseSquare();
2589 break;
2590 case tok::l_brace: {
2591 if (!tryToParseBracedList())
2592 parseChildBlock();
2593 break;
2595 case tok::at:
2596 nextToken();
2597 if (FormatTok->is(tok::l_brace)) {
2598 nextToken();
2599 parseBracedList();
2601 break;
2602 default:
2603 nextToken();
2604 break;
2606 } while (!eof());
2609 void UnwrappedLineParser::keepAncestorBraces() {
2610 if (!Style.RemoveBracesLLVM)
2611 return;
2613 const int MaxNestingLevels = 2;
2614 const int Size = NestedTooDeep.size();
2615 if (Size >= MaxNestingLevels)
2616 NestedTooDeep[Size - MaxNestingLevels] = true;
2617 NestedTooDeep.push_back(false);
2620 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2621 for (const auto &Token : llvm::reverse(Line.Tokens))
2622 if (Token.Tok->isNot(tok::comment))
2623 return Token.Tok;
2625 return nullptr;
2628 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2629 FormatToken *Tok = nullptr;
2631 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2632 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2633 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2634 ? getLastNonComment(*Line)
2635 : Line->Tokens.back().Tok;
2636 assert(Tok);
2637 if (Tok->BraceCount < 0) {
2638 assert(Tok->BraceCount == -1);
2639 Tok = nullptr;
2640 } else {
2641 Tok->BraceCount = -1;
2645 addUnwrappedLine();
2646 ++Line->Level;
2647 parseStructuralElement();
2649 if (Tok) {
2650 assert(!Line->InPPDirective);
2651 Tok = nullptr;
2652 for (const auto &L : llvm::reverse(*CurrentLines)) {
2653 if (!L.InPPDirective && getLastNonComment(L)) {
2654 Tok = L.Tokens.back().Tok;
2655 break;
2658 assert(Tok);
2659 ++Tok->BraceCount;
2662 if (CheckEOF && eof())
2663 addUnwrappedLine();
2665 --Line->Level;
2668 static void markOptionalBraces(FormatToken *LeftBrace) {
2669 if (!LeftBrace)
2670 return;
2672 assert(LeftBrace->is(tok::l_brace));
2674 FormatToken *RightBrace = LeftBrace->MatchingParen;
2675 if (!RightBrace) {
2676 assert(!LeftBrace->Optional);
2677 return;
2680 assert(RightBrace->is(tok::r_brace));
2681 assert(RightBrace->MatchingParen == LeftBrace);
2682 assert(LeftBrace->Optional == RightBrace->Optional);
2684 LeftBrace->Optional = true;
2685 RightBrace->Optional = true;
2688 void UnwrappedLineParser::handleAttributes() {
2689 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2690 if (FormatTok->isAttribute())
2691 nextToken();
2692 else if (FormatTok->is(tok::l_square))
2693 handleCppAttributes();
2696 bool UnwrappedLineParser::handleCppAttributes() {
2697 // Handle [[likely]] / [[unlikely]] attributes.
2698 assert(FormatTok->is(tok::l_square));
2699 if (!tryToParseSimpleAttribute())
2700 return false;
2701 parseSquare();
2702 return true;
2705 /// Returns whether \c Tok begins a block.
2706 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2707 // FIXME: rename the function or make
2708 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2709 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2710 : Tok.is(tok::l_brace);
2713 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2714 bool KeepBraces,
2715 bool IsVerilogAssert) {
2716 assert((FormatTok->is(tok::kw_if) ||
2717 (Style.isVerilog() &&
2718 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2719 Keywords.kw_assume, Keywords.kw_cover))) &&
2720 "'if' expected");
2721 nextToken();
2723 if (IsVerilogAssert) {
2724 // Handle `assert #0` and `assert final`.
2725 if (FormatTok->is(Keywords.kw_verilogHash)) {
2726 nextToken();
2727 if (FormatTok->is(tok::numeric_constant))
2728 nextToken();
2729 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2730 Keywords.kw_sequence)) {
2731 nextToken();
2735 // Handle `if !consteval`.
2736 if (FormatTok->is(tok::exclaim))
2737 nextToken();
2739 bool KeepIfBraces = true;
2740 if (FormatTok->is(tok::kw_consteval)) {
2741 nextToken();
2742 } else {
2743 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2744 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2745 nextToken();
2746 if (FormatTok->is(tok::l_paren)) {
2747 FormatTok->setFinalizedType(TT_ConditionLParen);
2748 parseParens();
2751 handleAttributes();
2752 // The then action is optional in Verilog assert statements.
2753 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2754 nextToken();
2755 addUnwrappedLine();
2756 return nullptr;
2759 bool NeedsUnwrappedLine = false;
2760 keepAncestorBraces();
2762 FormatToken *IfLeftBrace = nullptr;
2763 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2765 if (isBlockBegin(*FormatTok)) {
2766 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2767 IfLeftBrace = FormatTok;
2768 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2769 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2770 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2771 setPreviousRBraceType(TT_ControlStatementRBrace);
2772 if (Style.BraceWrapping.BeforeElse)
2773 addUnwrappedLine();
2774 else
2775 NeedsUnwrappedLine = true;
2776 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2777 addUnwrappedLine();
2778 } else {
2779 parseUnbracedBody();
2782 if (Style.RemoveBracesLLVM) {
2783 assert(!NestedTooDeep.empty());
2784 KeepIfBraces = KeepIfBraces ||
2785 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2786 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2787 IfBlockKind == IfStmtKind::IfElseIf;
2790 bool KeepElseBraces = KeepIfBraces;
2791 FormatToken *ElseLeftBrace = nullptr;
2792 IfStmtKind Kind = IfStmtKind::IfOnly;
2794 if (FormatTok->is(tok::kw_else)) {
2795 if (Style.RemoveBracesLLVM) {
2796 NestedTooDeep.back() = false;
2797 Kind = IfStmtKind::IfElse;
2799 nextToken();
2800 handleAttributes();
2801 if (isBlockBegin(*FormatTok)) {
2802 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2803 FormatTok->setFinalizedType(TT_ElseLBrace);
2804 ElseLeftBrace = FormatTok;
2805 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2806 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2807 FormatToken *IfLBrace =
2808 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2809 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2810 setPreviousRBraceType(TT_ElseRBrace);
2811 if (FormatTok->is(tok::kw_else)) {
2812 KeepElseBraces = KeepElseBraces ||
2813 ElseBlockKind == IfStmtKind::IfOnly ||
2814 ElseBlockKind == IfStmtKind::IfElseIf;
2815 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2816 KeepElseBraces = true;
2817 assert(ElseLeftBrace->MatchingParen);
2818 markOptionalBraces(ElseLeftBrace);
2820 addUnwrappedLine();
2821 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2822 const FormatToken *Previous = Tokens->getPreviousToken();
2823 assert(Previous);
2824 const bool IsPrecededByComment = Previous->is(tok::comment);
2825 if (IsPrecededByComment) {
2826 addUnwrappedLine();
2827 ++Line->Level;
2829 bool TooDeep = true;
2830 if (Style.RemoveBracesLLVM) {
2831 Kind = IfStmtKind::IfElseIf;
2832 TooDeep = NestedTooDeep.pop_back_val();
2834 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2835 if (Style.RemoveBracesLLVM)
2836 NestedTooDeep.push_back(TooDeep);
2837 if (IsPrecededByComment)
2838 --Line->Level;
2839 } else {
2840 parseUnbracedBody(/*CheckEOF=*/true);
2842 } else {
2843 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2844 if (NeedsUnwrappedLine)
2845 addUnwrappedLine();
2848 if (!Style.RemoveBracesLLVM)
2849 return nullptr;
2851 assert(!NestedTooDeep.empty());
2852 KeepElseBraces = KeepElseBraces ||
2853 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2854 NestedTooDeep.back();
2856 NestedTooDeep.pop_back();
2858 if (!KeepIfBraces && !KeepElseBraces) {
2859 markOptionalBraces(IfLeftBrace);
2860 markOptionalBraces(ElseLeftBrace);
2861 } else if (IfLeftBrace) {
2862 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2863 if (IfRightBrace) {
2864 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2865 assert(!IfLeftBrace->Optional);
2866 assert(!IfRightBrace->Optional);
2867 IfLeftBrace->MatchingParen = nullptr;
2868 IfRightBrace->MatchingParen = nullptr;
2872 if (IfKind)
2873 *IfKind = Kind;
2875 return IfLeftBrace;
2878 void UnwrappedLineParser::parseTryCatch() {
2879 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2880 nextToken();
2881 bool NeedsUnwrappedLine = false;
2882 if (FormatTok->is(tok::colon)) {
2883 // We are in a function try block, what comes is an initializer list.
2884 nextToken();
2886 // In case identifiers were removed by clang-tidy, what might follow is
2887 // multiple commas in sequence - before the first identifier.
2888 while (FormatTok->is(tok::comma))
2889 nextToken();
2891 while (FormatTok->is(tok::identifier)) {
2892 nextToken();
2893 if (FormatTok->is(tok::l_paren))
2894 parseParens();
2895 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2896 FormatTok->is(tok::l_brace)) {
2897 do {
2898 nextToken();
2899 } while (FormatTok->isNot(tok::r_brace));
2900 nextToken();
2903 // In case identifiers were removed by clang-tidy, what might follow is
2904 // multiple commas in sequence - after the first identifier.
2905 while (FormatTok->is(tok::comma))
2906 nextToken();
2909 // Parse try with resource.
2910 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2911 parseParens();
2913 keepAncestorBraces();
2915 if (FormatTok->is(tok::l_brace)) {
2916 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2917 parseBlock();
2918 if (Style.BraceWrapping.BeforeCatch)
2919 addUnwrappedLine();
2920 else
2921 NeedsUnwrappedLine = true;
2922 } else if (FormatTok->isNot(tok::kw_catch)) {
2923 // The C++ standard requires a compound-statement after a try.
2924 // If there's none, we try to assume there's a structuralElement
2925 // and try to continue.
2926 addUnwrappedLine();
2927 ++Line->Level;
2928 parseStructuralElement();
2929 --Line->Level;
2931 while (true) {
2932 if (FormatTok->is(tok::at))
2933 nextToken();
2934 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2935 tok::kw___finally) ||
2936 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2937 FormatTok->is(Keywords.kw_finally)) ||
2938 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2939 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2940 break;
2942 nextToken();
2943 while (FormatTok->isNot(tok::l_brace)) {
2944 if (FormatTok->is(tok::l_paren)) {
2945 parseParens();
2946 continue;
2948 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2949 if (Style.RemoveBracesLLVM)
2950 NestedTooDeep.pop_back();
2951 return;
2953 nextToken();
2955 NeedsUnwrappedLine = false;
2956 Line->MustBeDeclaration = false;
2957 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2958 parseBlock();
2959 if (Style.BraceWrapping.BeforeCatch)
2960 addUnwrappedLine();
2961 else
2962 NeedsUnwrappedLine = true;
2965 if (Style.RemoveBracesLLVM)
2966 NestedTooDeep.pop_back();
2968 if (NeedsUnwrappedLine)
2969 addUnwrappedLine();
2972 void UnwrappedLineParser::parseNamespace() {
2973 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2974 "'namespace' expected");
2976 const FormatToken &InitialToken = *FormatTok;
2977 nextToken();
2978 if (InitialToken.is(TT_NamespaceMacro)) {
2979 parseParens();
2980 } else {
2981 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2982 tok::l_square, tok::period, tok::l_paren) ||
2983 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2984 if (FormatTok->is(tok::l_square))
2985 parseSquare();
2986 else if (FormatTok->is(tok::l_paren))
2987 parseParens();
2988 else
2989 nextToken();
2992 if (FormatTok->is(tok::l_brace)) {
2993 FormatTok->setFinalizedType(TT_NamespaceLBrace);
2995 if (ShouldBreakBeforeBrace(Style, InitialToken))
2996 addUnwrappedLine();
2998 unsigned AddLevels =
2999 Style.NamespaceIndentation == FormatStyle::NI_All ||
3000 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3001 DeclarationScopeStack.size() > 1)
3002 ? 1u
3003 : 0u;
3004 bool ManageWhitesmithsBraces =
3005 AddLevels == 0u &&
3006 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3008 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3009 // the whole block.
3010 if (ManageWhitesmithsBraces)
3011 ++Line->Level;
3013 // Munch the semicolon after a namespace. This is more common than one would
3014 // think. Putting the semicolon into its own line is very ugly.
3015 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3016 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3017 ManageWhitesmithsBraces);
3019 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3021 if (ManageWhitesmithsBraces)
3022 --Line->Level;
3024 // FIXME: Add error handling.
3027 void UnwrappedLineParser::parseNew() {
3028 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3029 nextToken();
3031 if (Style.isCSharp()) {
3032 do {
3033 // Handle constructor invocation, e.g. `new(field: value)`.
3034 if (FormatTok->is(tok::l_paren))
3035 parseParens();
3037 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3038 if (FormatTok->is(tok::l_brace))
3039 parseBracedList();
3041 if (FormatTok->isOneOf(tok::semi, tok::comma))
3042 return;
3044 nextToken();
3045 } while (!eof());
3048 if (Style.Language != FormatStyle::LK_Java)
3049 return;
3051 // In Java, we can parse everything up to the parens, which aren't optional.
3052 do {
3053 // There should not be a ;, { or } before the new's open paren.
3054 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3055 return;
3057 // Consume the parens.
3058 if (FormatTok->is(tok::l_paren)) {
3059 parseParens();
3061 // If there is a class body of an anonymous class, consume that as child.
3062 if (FormatTok->is(tok::l_brace))
3063 parseChildBlock();
3064 return;
3066 nextToken();
3067 } while (!eof());
3070 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3071 keepAncestorBraces();
3073 if (isBlockBegin(*FormatTok)) {
3074 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3075 FormatToken *LeftBrace = FormatTok;
3076 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3077 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3078 /*MunchSemi=*/true, KeepBraces);
3079 setPreviousRBraceType(TT_ControlStatementRBrace);
3080 if (!KeepBraces) {
3081 assert(!NestedTooDeep.empty());
3082 if (!NestedTooDeep.back())
3083 markOptionalBraces(LeftBrace);
3085 if (WrapRightBrace)
3086 addUnwrappedLine();
3087 } else {
3088 parseUnbracedBody();
3091 if (!KeepBraces)
3092 NestedTooDeep.pop_back();
3095 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3096 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3097 (Style.isVerilog() &&
3098 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3099 Keywords.kw_always_ff, Keywords.kw_always_latch,
3100 Keywords.kw_final, Keywords.kw_initial,
3101 Keywords.kw_foreach, Keywords.kw_forever,
3102 Keywords.kw_repeat))) &&
3103 "'for', 'while' or foreach macro expected");
3104 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3105 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3107 nextToken();
3108 // JS' for await ( ...
3109 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3110 nextToken();
3111 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3112 nextToken();
3113 if (HasParens && FormatTok->is(tok::l_paren)) {
3114 // The type is only set for Verilog basically because we were afraid to
3115 // change the existing behavior for loops. See the discussion on D121756 for
3116 // details.
3117 if (Style.isVerilog())
3118 FormatTok->setFinalizedType(TT_ConditionLParen);
3119 parseParens();
3122 if (Style.isVerilog()) {
3123 // Event control.
3124 parseVerilogSensitivityList();
3125 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3126 Tokens->getPreviousToken()->is(tok::r_paren)) {
3127 nextToken();
3128 addUnwrappedLine();
3129 return;
3132 handleAttributes();
3133 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3136 void UnwrappedLineParser::parseDoWhile() {
3137 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3138 nextToken();
3140 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3142 // FIXME: Add error handling.
3143 if (FormatTok->isNot(tok::kw_while)) {
3144 addUnwrappedLine();
3145 return;
3148 FormatTok->setFinalizedType(TT_DoWhile);
3150 // If in Whitesmiths mode, the line with the while() needs to be indented
3151 // to the same level as the block.
3152 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3153 ++Line->Level;
3155 nextToken();
3156 parseStructuralElement();
3159 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3160 nextToken();
3161 unsigned OldLineLevel = Line->Level;
3162 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3163 --Line->Level;
3164 if (LeftAlignLabel)
3165 Line->Level = 0;
3167 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3168 FormatTok->is(tok::l_brace)) {
3170 CompoundStatementIndenter Indenter(this, Line->Level,
3171 Style.BraceWrapping.AfterCaseLabel,
3172 Style.BraceWrapping.IndentBraces);
3173 parseBlock();
3174 if (FormatTok->is(tok::kw_break)) {
3175 if (Style.BraceWrapping.AfterControlStatement ==
3176 FormatStyle::BWACS_Always) {
3177 addUnwrappedLine();
3178 if (!Style.IndentCaseBlocks &&
3179 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3180 ++Line->Level;
3183 parseStructuralElement();
3185 addUnwrappedLine();
3186 } else {
3187 if (FormatTok->is(tok::semi))
3188 nextToken();
3189 addUnwrappedLine();
3191 Line->Level = OldLineLevel;
3192 if (FormatTok->isNot(tok::l_brace)) {
3193 parseStructuralElement();
3194 addUnwrappedLine();
3198 void UnwrappedLineParser::parseCaseLabel() {
3199 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3201 // FIXME: fix handling of complex expressions here.
3202 do {
3203 nextToken();
3204 if (FormatTok->is(tok::colon)) {
3205 FormatTok->setFinalizedType(TT_CaseLabelColon);
3206 break;
3208 } while (!eof());
3209 parseLabel();
3212 void UnwrappedLineParser::parseSwitch() {
3213 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3214 nextToken();
3215 if (FormatTok->is(tok::l_paren))
3216 parseParens();
3218 keepAncestorBraces();
3220 if (FormatTok->is(tok::l_brace)) {
3221 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3222 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3223 parseBlock();
3224 setPreviousRBraceType(TT_ControlStatementRBrace);
3225 addUnwrappedLine();
3226 } else {
3227 addUnwrappedLine();
3228 ++Line->Level;
3229 parseStructuralElement();
3230 --Line->Level;
3233 if (Style.RemoveBracesLLVM)
3234 NestedTooDeep.pop_back();
3237 // Operators that can follow a C variable.
3238 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3239 switch (kind) {
3240 case tok::ampamp:
3241 case tok::ampequal:
3242 case tok::arrow:
3243 case tok::caret:
3244 case tok::caretequal:
3245 case tok::comma:
3246 case tok::ellipsis:
3247 case tok::equal:
3248 case tok::equalequal:
3249 case tok::exclaim:
3250 case tok::exclaimequal:
3251 case tok::greater:
3252 case tok::greaterequal:
3253 case tok::greatergreater:
3254 case tok::greatergreaterequal:
3255 case tok::l_paren:
3256 case tok::l_square:
3257 case tok::less:
3258 case tok::lessequal:
3259 case tok::lessless:
3260 case tok::lesslessequal:
3261 case tok::minus:
3262 case tok::minusequal:
3263 case tok::minusminus:
3264 case tok::percent:
3265 case tok::percentequal:
3266 case tok::period:
3267 case tok::pipe:
3268 case tok::pipeequal:
3269 case tok::pipepipe:
3270 case tok::plus:
3271 case tok::plusequal:
3272 case tok::plusplus:
3273 case tok::question:
3274 case tok::r_brace:
3275 case tok::r_paren:
3276 case tok::r_square:
3277 case tok::semi:
3278 case tok::slash:
3279 case tok::slashequal:
3280 case tok::star:
3281 case tok::starequal:
3282 return true;
3283 default:
3284 return false;
3288 void UnwrappedLineParser::parseAccessSpecifier() {
3289 FormatToken *AccessSpecifierCandidate = FormatTok;
3290 nextToken();
3291 // Understand Qt's slots.
3292 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3293 nextToken();
3294 // Otherwise, we don't know what it is, and we'd better keep the next token.
3295 if (FormatTok->is(tok::colon)) {
3296 nextToken();
3297 addUnwrappedLine();
3298 } else if (FormatTok->isNot(tok::coloncolon) &&
3299 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3300 // Not a variable name nor namespace name.
3301 addUnwrappedLine();
3302 } else if (AccessSpecifierCandidate) {
3303 // Consider the access specifier to be a C identifier.
3304 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3308 /// \brief Parses a requires, decides if it is a clause or an expression.
3309 /// \pre The current token has to be the requires keyword.
3310 /// \returns true if it parsed a clause.
3311 bool clang::format::UnwrappedLineParser::parseRequires() {
3312 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3313 auto RequiresToken = FormatTok;
3315 // We try to guess if it is a requires clause, or a requires expression. For
3316 // that we first consume the keyword and check the next token.
3317 nextToken();
3319 switch (FormatTok->Tok.getKind()) {
3320 case tok::l_brace:
3321 // This can only be an expression, never a clause.
3322 parseRequiresExpression(RequiresToken);
3323 return false;
3324 case tok::l_paren:
3325 // Clauses and expression can start with a paren, it's unclear what we have.
3326 break;
3327 default:
3328 // All other tokens can only be a clause.
3329 parseRequiresClause(RequiresToken);
3330 return true;
3333 // Looking forward we would have to decide if there are function declaration
3334 // like arguments to the requires expression:
3335 // requires (T t) {
3336 // Or there is a constraint expression for the requires clause:
3337 // requires (C<T> && ...
3339 // But first let's look behind.
3340 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3342 if (!PreviousNonComment ||
3343 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3344 // If there is no token, or an expression left brace, we are a requires
3345 // clause within a requires expression.
3346 parseRequiresClause(RequiresToken);
3347 return true;
3350 switch (PreviousNonComment->Tok.getKind()) {
3351 case tok::greater:
3352 case tok::r_paren:
3353 case tok::kw_noexcept:
3354 case tok::kw_const:
3355 // This is a requires clause.
3356 parseRequiresClause(RequiresToken);
3357 return true;
3358 case tok::amp:
3359 case tok::ampamp: {
3360 // This can be either:
3361 // if (... && requires (T t) ...)
3362 // Or
3363 // void member(...) && requires (C<T> ...
3364 // We check the one token before that for a const:
3365 // void member(...) const && requires (C<T> ...
3366 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3367 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3368 parseRequiresClause(RequiresToken);
3369 return true;
3371 break;
3373 default:
3374 if (PreviousNonComment->isTypeOrIdentifier()) {
3375 // This is a requires clause.
3376 parseRequiresClause(RequiresToken);
3377 return true;
3379 // It's an expression.
3380 parseRequiresExpression(RequiresToken);
3381 return false;
3384 // Now we look forward and try to check if the paren content is a parameter
3385 // list. The parameters can be cv-qualified and contain references or
3386 // pointers.
3387 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3388 // of stuff: typename, const, *, &, &&, ::, identifiers.
3390 unsigned StoredPosition = Tokens->getPosition();
3391 FormatToken *NextToken = Tokens->getNextToken();
3392 int Lookahead = 0;
3393 auto PeekNext = [&Lookahead, &NextToken, this] {
3394 ++Lookahead;
3395 NextToken = Tokens->getNextToken();
3398 bool FoundType = false;
3399 bool LastWasColonColon = false;
3400 int OpenAngles = 0;
3402 for (; Lookahead < 50; PeekNext()) {
3403 switch (NextToken->Tok.getKind()) {
3404 case tok::kw_volatile:
3405 case tok::kw_const:
3406 case tok::comma:
3407 if (OpenAngles == 0) {
3408 FormatTok = Tokens->setPosition(StoredPosition);
3409 parseRequiresExpression(RequiresToken);
3410 return false;
3412 break;
3413 case tok::r_paren:
3414 case tok::pipepipe:
3415 FormatTok = Tokens->setPosition(StoredPosition);
3416 parseRequiresClause(RequiresToken);
3417 return true;
3418 case tok::eof:
3419 // Break out of the loop.
3420 Lookahead = 50;
3421 break;
3422 case tok::coloncolon:
3423 LastWasColonColon = true;
3424 break;
3425 case tok::identifier:
3426 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3427 FormatTok = Tokens->setPosition(StoredPosition);
3428 parseRequiresExpression(RequiresToken);
3429 return false;
3431 FoundType = true;
3432 LastWasColonColon = false;
3433 break;
3434 case tok::less:
3435 ++OpenAngles;
3436 break;
3437 case tok::greater:
3438 --OpenAngles;
3439 break;
3440 default:
3441 if (NextToken->isSimpleTypeSpecifier()) {
3442 FormatTok = Tokens->setPosition(StoredPosition);
3443 parseRequiresExpression(RequiresToken);
3444 return false;
3446 break;
3449 // This seems to be a complicated expression, just assume it's a clause.
3450 FormatTok = Tokens->setPosition(StoredPosition);
3451 parseRequiresClause(RequiresToken);
3452 return true;
3455 /// \brief Parses a requires clause.
3456 /// \param RequiresToken The requires keyword token, which starts this clause.
3457 /// \pre We need to be on the next token after the requires keyword.
3458 /// \sa parseRequiresExpression
3460 /// Returns if it either has finished parsing the clause, or it detects, that
3461 /// the clause is incorrect.
3462 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3463 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3464 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3466 // If there is no previous token, we are within a requires expression,
3467 // otherwise we will always have the template or function declaration in front
3468 // of it.
3469 bool InRequiresExpression =
3470 !RequiresToken->Previous ||
3471 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3473 RequiresToken->setFinalizedType(InRequiresExpression
3474 ? TT_RequiresClauseInARequiresExpression
3475 : TT_RequiresClause);
3477 // NOTE: parseConstraintExpression is only ever called from this function.
3478 // It could be inlined into here.
3479 parseConstraintExpression();
3481 if (!InRequiresExpression)
3482 FormatTok->Previous->ClosesRequiresClause = true;
3485 /// \brief Parses a requires expression.
3486 /// \param RequiresToken The requires keyword token, which starts this clause.
3487 /// \pre We need to be on the next token after the requires keyword.
3488 /// \sa parseRequiresClause
3490 /// Returns if it either has finished parsing the expression, or it detects,
3491 /// that the expression is incorrect.
3492 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3493 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3494 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3496 RequiresToken->setFinalizedType(TT_RequiresExpression);
3498 if (FormatTok->is(tok::l_paren)) {
3499 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3500 parseParens();
3503 if (FormatTok->is(tok::l_brace)) {
3504 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3505 parseChildBlock();
3509 /// \brief Parses a constraint expression.
3511 /// This is the body of a requires clause. It returns, when the parsing is
3512 /// complete, or the expression is incorrect.
3513 void UnwrappedLineParser::parseConstraintExpression() {
3514 // The special handling for lambdas is needed since tryToParseLambda() eats a
3515 // token and if a requires expression is the last part of a requires clause
3516 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3517 // not set on the correct token. Thus we need to be aware if we even expect a
3518 // lambda to be possible.
3519 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3520 bool LambdaNextTimeAllowed = true;
3522 // Within lambda declarations, it is permitted to put a requires clause after
3523 // its template parameter list, which would place the requires clause right
3524 // before the parentheses of the parameters of the lambda declaration. Thus,
3525 // we track if we expect to see grouping parentheses at all.
3526 // Without this check, `requires foo<T> (T t)` in the below example would be
3527 // seen as the whole requires clause, accidentally eating the parameters of
3528 // the lambda.
3529 // [&]<typename T> requires foo<T> (T t) { ... };
3530 bool TopLevelParensAllowed = true;
3532 do {
3533 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3535 switch (FormatTok->Tok.getKind()) {
3536 case tok::kw_requires: {
3537 auto RequiresToken = FormatTok;
3538 nextToken();
3539 parseRequiresExpression(RequiresToken);
3540 break;
3543 case tok::l_paren:
3544 if (!TopLevelParensAllowed)
3545 return;
3546 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3547 TopLevelParensAllowed = false;
3548 break;
3550 case tok::l_square:
3551 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3552 return;
3553 break;
3555 case tok::kw_const:
3556 case tok::semi:
3557 case tok::kw_class:
3558 case tok::kw_struct:
3559 case tok::kw_union:
3560 return;
3562 case tok::l_brace:
3563 // Potential function body.
3564 return;
3566 case tok::ampamp:
3567 case tok::pipepipe:
3568 FormatTok->setFinalizedType(TT_BinaryOperator);
3569 nextToken();
3570 LambdaNextTimeAllowed = true;
3571 TopLevelParensAllowed = true;
3572 break;
3574 case tok::comma:
3575 case tok::comment:
3576 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3577 nextToken();
3578 break;
3580 case tok::kw_sizeof:
3581 case tok::greater:
3582 case tok::greaterequal:
3583 case tok::greatergreater:
3584 case tok::less:
3585 case tok::lessequal:
3586 case tok::lessless:
3587 case tok::equalequal:
3588 case tok::exclaim:
3589 case tok::exclaimequal:
3590 case tok::plus:
3591 case tok::minus:
3592 case tok::star:
3593 case tok::slash:
3594 LambdaNextTimeAllowed = true;
3595 TopLevelParensAllowed = true;
3596 // Just eat them.
3597 nextToken();
3598 break;
3600 case tok::numeric_constant:
3601 case tok::coloncolon:
3602 case tok::kw_true:
3603 case tok::kw_false:
3604 TopLevelParensAllowed = false;
3605 // Just eat them.
3606 nextToken();
3607 break;
3609 case tok::kw_static_cast:
3610 case tok::kw_const_cast:
3611 case tok::kw_reinterpret_cast:
3612 case tok::kw_dynamic_cast:
3613 nextToken();
3614 if (FormatTok->isNot(tok::less))
3615 return;
3617 nextToken();
3618 parseBracedList(/*IsAngleBracket=*/true);
3619 break;
3621 default:
3622 if (!FormatTok->Tok.getIdentifierInfo()) {
3623 // Identifiers are part of the default case, we check for more then
3624 // tok::identifier to handle builtin type traits.
3625 return;
3628 // We need to differentiate identifiers for a template deduction guide,
3629 // variables, or function return types (the constraint expression has
3630 // ended before that), and basically all other cases. But it's easier to
3631 // check the other way around.
3632 assert(FormatTok->Previous);
3633 switch (FormatTok->Previous->Tok.getKind()) {
3634 case tok::coloncolon: // Nested identifier.
3635 case tok::ampamp: // Start of a function or variable for the
3636 case tok::pipepipe: // constraint expression. (binary)
3637 case tok::exclaim: // The same as above, but unary.
3638 case tok::kw_requires: // Initial identifier of a requires clause.
3639 case tok::equal: // Initial identifier of a concept declaration.
3640 break;
3641 default:
3642 return;
3645 // Read identifier with optional template declaration.
3646 nextToken();
3647 if (FormatTok->is(tok::less)) {
3648 nextToken();
3649 parseBracedList(/*IsAngleBracket=*/true);
3651 TopLevelParensAllowed = false;
3652 break;
3654 } while (!eof());
3657 bool UnwrappedLineParser::parseEnum() {
3658 const FormatToken &InitialToken = *FormatTok;
3660 // Won't be 'enum' for NS_ENUMs.
3661 if (FormatTok->is(tok::kw_enum))
3662 nextToken();
3664 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3665 // declarations. An "enum" keyword followed by a colon would be a syntax
3666 // error and thus assume it is just an identifier.
3667 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3668 return false;
3670 // In protobuf, "enum" can be used as a field name.
3671 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3672 return false;
3674 // Eat up enum class ...
3675 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3676 nextToken();
3678 while (FormatTok->Tok.getIdentifierInfo() ||
3679 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3680 tok::greater, tok::comma, tok::question,
3681 tok::l_square, tok::r_square)) {
3682 if (Style.isVerilog()) {
3683 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3684 nextToken();
3685 // In Verilog the base type can have dimensions.
3686 while (FormatTok->is(tok::l_square))
3687 parseSquare();
3688 } else {
3689 nextToken();
3691 // We can have macros or attributes in between 'enum' and the enum name.
3692 if (FormatTok->is(tok::l_paren))
3693 parseParens();
3694 assert(FormatTok->isNot(TT_AttributeSquare));
3695 if (FormatTok->is(tok::identifier)) {
3696 nextToken();
3697 // If there are two identifiers in a row, this is likely an elaborate
3698 // return type. In Java, this can be "implements", etc.
3699 if (Style.isCpp() && FormatTok->is(tok::identifier))
3700 return false;
3704 // Just a declaration or something is wrong.
3705 if (FormatTok->isNot(tok::l_brace))
3706 return true;
3707 FormatTok->setFinalizedType(TT_EnumLBrace);
3708 FormatTok->setBlockKind(BK_Block);
3710 if (Style.Language == FormatStyle::LK_Java) {
3711 // Java enums are different.
3712 parseJavaEnumBody();
3713 return true;
3715 if (Style.Language == FormatStyle::LK_Proto) {
3716 parseBlock(/*MustBeDeclaration=*/true);
3717 return true;
3720 if (!Style.AllowShortEnumsOnASingleLine &&
3721 ShouldBreakBeforeBrace(Style, InitialToken)) {
3722 addUnwrappedLine();
3724 // Parse enum body.
3725 nextToken();
3726 if (!Style.AllowShortEnumsOnASingleLine) {
3727 addUnwrappedLine();
3728 Line->Level += 1;
3730 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3731 if (!Style.AllowShortEnumsOnASingleLine)
3732 Line->Level -= 1;
3733 if (HasError) {
3734 if (FormatTok->is(tok::semi))
3735 nextToken();
3736 addUnwrappedLine();
3738 setPreviousRBraceType(TT_EnumRBrace);
3739 return true;
3741 // There is no addUnwrappedLine() here so that we fall through to parsing a
3742 // structural element afterwards. Thus, in "enum A {} n, m;",
3743 // "} n, m;" will end up in one unwrapped line.
3746 bool UnwrappedLineParser::parseStructLike() {
3747 // parseRecord falls through and does not yet add an unwrapped line as a
3748 // record declaration or definition can start a structural element.
3749 parseRecord();
3750 // This does not apply to Java, JavaScript and C#.
3751 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3752 Style.isCSharp()) {
3753 if (FormatTok->is(tok::semi))
3754 nextToken();
3755 addUnwrappedLine();
3756 return true;
3758 return false;
3761 namespace {
3762 // A class used to set and restore the Token position when peeking
3763 // ahead in the token source.
3764 class ScopedTokenPosition {
3765 unsigned StoredPosition;
3766 FormatTokenSource *Tokens;
3768 public:
3769 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3770 assert(Tokens && "Tokens expected to not be null");
3771 StoredPosition = Tokens->getPosition();
3774 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3776 } // namespace
3778 // Look to see if we have [[ by looking ahead, if
3779 // its not then rewind to the original position.
3780 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3781 ScopedTokenPosition AutoPosition(Tokens);
3782 FormatToken *Tok = Tokens->getNextToken();
3783 // We already read the first [ check for the second.
3784 if (Tok->isNot(tok::l_square))
3785 return false;
3786 // Double check that the attribute is just something
3787 // fairly simple.
3788 while (Tok->isNot(tok::eof)) {
3789 if (Tok->is(tok::r_square))
3790 break;
3791 Tok = Tokens->getNextToken();
3793 if (Tok->is(tok::eof))
3794 return false;
3795 Tok = Tokens->getNextToken();
3796 if (Tok->isNot(tok::r_square))
3797 return false;
3798 Tok = Tokens->getNextToken();
3799 if (Tok->is(tok::semi))
3800 return false;
3801 return true;
3804 void UnwrappedLineParser::parseJavaEnumBody() {
3805 assert(FormatTok->is(tok::l_brace));
3806 const FormatToken *OpeningBrace = FormatTok;
3808 // Determine whether the enum is simple, i.e. does not have a semicolon or
3809 // constants with class bodies. Simple enums can be formatted like braced
3810 // lists, contracted to a single line, etc.
3811 unsigned StoredPosition = Tokens->getPosition();
3812 bool IsSimple = true;
3813 FormatToken *Tok = Tokens->getNextToken();
3814 while (Tok->isNot(tok::eof)) {
3815 if (Tok->is(tok::r_brace))
3816 break;
3817 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3818 IsSimple = false;
3819 break;
3821 // FIXME: This will also mark enums with braces in the arguments to enum
3822 // constants as "not simple". This is probably fine in practice, though.
3823 Tok = Tokens->getNextToken();
3825 FormatTok = Tokens->setPosition(StoredPosition);
3827 if (IsSimple) {
3828 nextToken();
3829 parseBracedList();
3830 addUnwrappedLine();
3831 return;
3834 // Parse the body of a more complex enum.
3835 // First add a line for everything up to the "{".
3836 nextToken();
3837 addUnwrappedLine();
3838 ++Line->Level;
3840 // Parse the enum constants.
3841 while (!eof()) {
3842 if (FormatTok->is(tok::l_brace)) {
3843 // Parse the constant's class body.
3844 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3845 /*MunchSemi=*/false);
3846 } else if (FormatTok->is(tok::l_paren)) {
3847 parseParens();
3848 } else if (FormatTok->is(tok::comma)) {
3849 nextToken();
3850 addUnwrappedLine();
3851 } else if (FormatTok->is(tok::semi)) {
3852 nextToken();
3853 addUnwrappedLine();
3854 break;
3855 } else if (FormatTok->is(tok::r_brace)) {
3856 addUnwrappedLine();
3857 break;
3858 } else {
3859 nextToken();
3863 // Parse the class body after the enum's ";" if any.
3864 parseLevel(OpeningBrace);
3865 nextToken();
3866 --Line->Level;
3867 addUnwrappedLine();
3870 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3871 const FormatToken &InitialToken = *FormatTok;
3872 nextToken();
3874 // The actual identifier can be a nested name specifier, and in macros
3875 // it is often token-pasted.
3876 // An [[attribute]] can be before the identifier.
3877 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3878 tok::kw_alignas, tok::l_square) ||
3879 FormatTok->isAttribute() ||
3880 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3881 FormatTok->isOneOf(tok::period, tok::comma))) {
3882 if (Style.isJavaScript() &&
3883 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3884 // JavaScript/TypeScript supports inline object types in
3885 // extends/implements positions:
3886 // class Foo implements {bar: number} { }
3887 nextToken();
3888 if (FormatTok->is(tok::l_brace)) {
3889 tryToParseBracedList();
3890 continue;
3893 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3894 continue;
3895 bool IsNonMacroIdentifier =
3896 FormatTok->is(tok::identifier) &&
3897 FormatTok->TokenText != FormatTok->TokenText.upper();
3898 nextToken();
3899 // We can have macros in between 'class' and the class name.
3900 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3901 parseParens();
3904 // Note that parsing away template declarations here leads to incorrectly
3905 // accepting function declarations as record declarations.
3906 // In general, we cannot solve this problem. Consider:
3907 // class A<int> B() {}
3908 // which can be a function definition or a class definition when B() is a
3909 // macro. If we find enough real-world cases where this is a problem, we
3910 // can parse for the 'template' keyword in the beginning of the statement,
3911 // and thus rule out the record production in case there is no template
3912 // (this would still leave us with an ambiguity between template function
3913 // and class declarations).
3914 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3915 do {
3916 if (FormatTok->is(tok::l_brace)) {
3917 calculateBraceTypes(/*ExpectClassBody=*/true);
3918 if (!tryToParseBracedList())
3919 break;
3921 if (FormatTok->is(tok::l_square)) {
3922 FormatToken *Previous = FormatTok->Previous;
3923 if (!Previous ||
3924 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3925 // Don't try parsing a lambda if we had a closing parenthesis before,
3926 // it was probably a pointer to an array: int (*)[].
3927 if (!tryToParseLambda())
3928 continue;
3929 } else {
3930 parseSquare();
3931 continue;
3934 if (FormatTok->is(tok::semi))
3935 return;
3936 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3937 addUnwrappedLine();
3938 nextToken();
3939 parseCSharpGenericTypeConstraint();
3940 break;
3942 nextToken();
3943 } while (!eof());
3946 auto GetBraceTypes =
3947 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3948 switch (RecordTok.Tok.getKind()) {
3949 case tok::kw_class:
3950 return {TT_ClassLBrace, TT_ClassRBrace};
3951 case tok::kw_struct:
3952 return {TT_StructLBrace, TT_StructRBrace};
3953 case tok::kw_union:
3954 return {TT_UnionLBrace, TT_UnionRBrace};
3955 default:
3956 // Useful for e.g. interface.
3957 return {TT_RecordLBrace, TT_RecordRBrace};
3960 if (FormatTok->is(tok::l_brace)) {
3961 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3962 FormatTok->setFinalizedType(OpenBraceType);
3963 if (ParseAsExpr) {
3964 parseChildBlock();
3965 } else {
3966 if (ShouldBreakBeforeBrace(Style, InitialToken))
3967 addUnwrappedLine();
3969 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3970 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3972 setPreviousRBraceType(ClosingBraceType);
3974 // There is no addUnwrappedLine() here so that we fall through to parsing a
3975 // structural element afterwards. Thus, in "class A {} n, m;",
3976 // "} n, m;" will end up in one unwrapped line.
3979 void UnwrappedLineParser::parseObjCMethod() {
3980 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3981 "'(' or identifier expected.");
3982 do {
3983 if (FormatTok->is(tok::semi)) {
3984 nextToken();
3985 addUnwrappedLine();
3986 return;
3987 } else if (FormatTok->is(tok::l_brace)) {
3988 if (Style.BraceWrapping.AfterFunction)
3989 addUnwrappedLine();
3990 parseBlock();
3991 addUnwrappedLine();
3992 return;
3993 } else {
3994 nextToken();
3996 } while (!eof());
3999 void UnwrappedLineParser::parseObjCProtocolList() {
4000 assert(FormatTok->is(tok::less) && "'<' expected.");
4001 do {
4002 nextToken();
4003 // Early exit in case someone forgot a close angle.
4004 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4005 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4006 return;
4008 } while (!eof() && FormatTok->isNot(tok::greater));
4009 nextToken(); // Skip '>'.
4012 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4013 do {
4014 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4015 nextToken();
4016 addUnwrappedLine();
4017 break;
4019 if (FormatTok->is(tok::l_brace)) {
4020 parseBlock();
4021 // In ObjC interfaces, nothing should be following the "}".
4022 addUnwrappedLine();
4023 } else if (FormatTok->is(tok::r_brace)) {
4024 // Ignore stray "}". parseStructuralElement doesn't consume them.
4025 nextToken();
4026 addUnwrappedLine();
4027 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4028 nextToken();
4029 parseObjCMethod();
4030 } else {
4031 parseStructuralElement();
4033 } while (!eof());
4036 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4037 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4038 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4039 nextToken();
4040 nextToken(); // interface name
4042 // @interface can be followed by a lightweight generic
4043 // specialization list, then either a base class or a category.
4044 if (FormatTok->is(tok::less))
4045 parseObjCLightweightGenerics();
4046 if (FormatTok->is(tok::colon)) {
4047 nextToken();
4048 nextToken(); // base class name
4049 // The base class can also have lightweight generics applied to it.
4050 if (FormatTok->is(tok::less))
4051 parseObjCLightweightGenerics();
4052 } else if (FormatTok->is(tok::l_paren)) {
4053 // Skip category, if present.
4054 parseParens();
4057 if (FormatTok->is(tok::less))
4058 parseObjCProtocolList();
4060 if (FormatTok->is(tok::l_brace)) {
4061 if (Style.BraceWrapping.AfterObjCDeclaration)
4062 addUnwrappedLine();
4063 parseBlock(/*MustBeDeclaration=*/true);
4066 // With instance variables, this puts '}' on its own line. Without instance
4067 // variables, this ends the @interface line.
4068 addUnwrappedLine();
4070 parseObjCUntilAtEnd();
4073 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4074 assert(FormatTok->is(tok::less));
4075 // Unlike protocol lists, generic parameterizations support
4076 // nested angles:
4078 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4079 // NSObject <NSCopying, NSSecureCoding>
4081 // so we need to count how many open angles we have left.
4082 unsigned NumOpenAngles = 1;
4083 do {
4084 nextToken();
4085 // Early exit in case someone forgot a close angle.
4086 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4087 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4088 break;
4090 if (FormatTok->is(tok::less)) {
4091 ++NumOpenAngles;
4092 } else if (FormatTok->is(tok::greater)) {
4093 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4094 --NumOpenAngles;
4096 } while (!eof() && NumOpenAngles != 0);
4097 nextToken(); // Skip '>'.
4100 // Returns true for the declaration/definition form of @protocol,
4101 // false for the expression form.
4102 bool UnwrappedLineParser::parseObjCProtocol() {
4103 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4104 nextToken();
4106 if (FormatTok->is(tok::l_paren)) {
4107 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4108 return false;
4111 // The definition/declaration form,
4112 // @protocol Foo
4113 // - (int)someMethod;
4114 // @end
4116 nextToken(); // protocol name
4118 if (FormatTok->is(tok::less))
4119 parseObjCProtocolList();
4121 // Check for protocol declaration.
4122 if (FormatTok->is(tok::semi)) {
4123 nextToken();
4124 addUnwrappedLine();
4125 return true;
4128 addUnwrappedLine();
4129 parseObjCUntilAtEnd();
4130 return true;
4133 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4134 bool IsImport = FormatTok->is(Keywords.kw_import);
4135 assert(IsImport || FormatTok->is(tok::kw_export));
4136 nextToken();
4138 // Consume the "default" in "export default class/function".
4139 if (FormatTok->is(tok::kw_default))
4140 nextToken();
4142 // Consume "async function", "function" and "default function", so that these
4143 // get parsed as free-standing JS functions, i.e. do not require a trailing
4144 // semicolon.
4145 if (FormatTok->is(Keywords.kw_async))
4146 nextToken();
4147 if (FormatTok->is(Keywords.kw_function)) {
4148 nextToken();
4149 return;
4152 // For imports, `export *`, `export {...}`, consume the rest of the line up
4153 // to the terminating `;`. For everything else, just return and continue
4154 // parsing the structural element, i.e. the declaration or expression for
4155 // `export default`.
4156 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4157 !FormatTok->isStringLiteral() &&
4158 !(FormatTok->is(Keywords.kw_type) &&
4159 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4160 return;
4163 while (!eof()) {
4164 if (FormatTok->is(tok::semi))
4165 return;
4166 if (Line->Tokens.empty()) {
4167 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4168 // import statement should terminate.
4169 return;
4171 if (FormatTok->is(tok::l_brace)) {
4172 FormatTok->setBlockKind(BK_Block);
4173 nextToken();
4174 parseBracedList();
4175 } else {
4176 nextToken();
4181 void UnwrappedLineParser::parseStatementMacro() {
4182 nextToken();
4183 if (FormatTok->is(tok::l_paren))
4184 parseParens();
4185 if (FormatTok->is(tok::semi))
4186 nextToken();
4187 addUnwrappedLine();
4190 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4191 // consume things like a::`b.c[d:e] or a::*
4192 while (true) {
4193 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4194 tok::coloncolon, tok::hash) ||
4195 Keywords.isVerilogIdentifier(*FormatTok)) {
4196 nextToken();
4197 } else if (FormatTok->is(tok::l_square)) {
4198 parseSquare();
4199 } else {
4200 break;
4205 void UnwrappedLineParser::parseVerilogSensitivityList() {
4206 if (FormatTok->isNot(tok::at))
4207 return;
4208 nextToken();
4209 // A block event expression has 2 at signs.
4210 if (FormatTok->is(tok::at))
4211 nextToken();
4212 switch (FormatTok->Tok.getKind()) {
4213 case tok::star:
4214 nextToken();
4215 break;
4216 case tok::l_paren:
4217 parseParens();
4218 break;
4219 default:
4220 parseVerilogHierarchyIdentifier();
4221 break;
4225 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4226 unsigned AddLevels = 0;
4228 if (FormatTok->is(Keywords.kw_clocking)) {
4229 nextToken();
4230 if (Keywords.isVerilogIdentifier(*FormatTok))
4231 nextToken();
4232 parseVerilogSensitivityList();
4233 if (FormatTok->is(tok::semi))
4234 nextToken();
4235 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4236 Keywords.kw_casez, Keywords.kw_randcase,
4237 Keywords.kw_randsequence)) {
4238 if (Style.IndentCaseLabels)
4239 AddLevels++;
4240 nextToken();
4241 if (FormatTok->is(tok::l_paren)) {
4242 FormatTok->setFinalizedType(TT_ConditionLParen);
4243 parseParens();
4245 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4246 nextToken();
4247 // The case header has no semicolon.
4248 } else {
4249 // "module" etc.
4250 nextToken();
4251 // all the words like the name of the module and specifiers like
4252 // "automatic" and the width of function return type
4253 while (true) {
4254 if (FormatTok->is(tok::l_square)) {
4255 auto Prev = FormatTok->getPreviousNonComment();
4256 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4257 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4258 parseSquare();
4259 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4260 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4261 nextToken();
4262 } else {
4263 break;
4267 auto NewLine = [this]() {
4268 addUnwrappedLine();
4269 Line->IsContinuation = true;
4272 // package imports
4273 while (FormatTok->is(Keywords.kw_import)) {
4274 NewLine();
4275 nextToken();
4276 parseVerilogHierarchyIdentifier();
4277 if (FormatTok->is(tok::semi))
4278 nextToken();
4281 // parameters and ports
4282 if (FormatTok->is(Keywords.kw_verilogHash)) {
4283 NewLine();
4284 nextToken();
4285 if (FormatTok->is(tok::l_paren)) {
4286 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4287 parseParens();
4290 if (FormatTok->is(tok::l_paren)) {
4291 NewLine();
4292 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4293 parseParens();
4296 // extends and implements
4297 if (FormatTok->is(Keywords.kw_extends)) {
4298 NewLine();
4299 nextToken();
4300 parseVerilogHierarchyIdentifier();
4301 if (FormatTok->is(tok::l_paren))
4302 parseParens();
4304 if (FormatTok->is(Keywords.kw_implements)) {
4305 NewLine();
4306 do {
4307 nextToken();
4308 parseVerilogHierarchyIdentifier();
4309 } while (FormatTok->is(tok::comma));
4312 // Coverage event for cover groups.
4313 if (FormatTok->is(tok::at)) {
4314 NewLine();
4315 parseVerilogSensitivityList();
4318 if (FormatTok->is(tok::semi))
4319 nextToken(/*LevelDifference=*/1);
4320 addUnwrappedLine();
4323 return AddLevels;
4326 void UnwrappedLineParser::parseVerilogTable() {
4327 assert(FormatTok->is(Keywords.kw_table));
4328 nextToken(/*LevelDifference=*/1);
4329 addUnwrappedLine();
4331 auto InitialLevel = Line->Level++;
4332 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4333 FormatToken *Tok = FormatTok;
4334 nextToken();
4335 if (Tok->is(tok::semi))
4336 addUnwrappedLine();
4337 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4338 Tok->setFinalizedType(TT_VerilogTableItem);
4340 Line->Level = InitialLevel;
4341 nextToken(/*LevelDifference=*/-1);
4342 addUnwrappedLine();
4345 void UnwrappedLineParser::parseVerilogCaseLabel() {
4346 // The label will get unindented in AnnotatingParser. If there are no leading
4347 // spaces, indent the rest here so that things inside the block will be
4348 // indented relative to things outside. We don't use parseLabel because we
4349 // don't know whether this colon is a label or a ternary expression at this
4350 // point.
4351 auto OrigLevel = Line->Level;
4352 auto FirstLine = CurrentLines->size();
4353 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4354 ++Line->Level;
4355 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4356 --Line->Level;
4357 parseStructuralElement();
4358 // Restore the indentation in both the new line and the line that has the
4359 // label.
4360 if (CurrentLines->size() > FirstLine)
4361 (*CurrentLines)[FirstLine].Level = OrigLevel;
4362 Line->Level = OrigLevel;
4365 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4366 for (const auto &N : Line.Tokens) {
4367 if (N.Tok->MacroCtx)
4368 return true;
4369 for (const UnwrappedLine &Child : N.Children)
4370 if (containsExpansion(Child))
4371 return true;
4373 return false;
4376 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4377 if (Line->Tokens.empty())
4378 return;
4379 LLVM_DEBUG({
4380 if (!parsingPPDirective()) {
4381 llvm::dbgs() << "Adding unwrapped line:\n";
4382 printDebugInfo(*Line);
4386 // If this line closes a block when in Whitesmiths mode, remember that
4387 // information so that the level can be decreased after the line is added.
4388 // This has to happen after the addition of the line since the line itself
4389 // needs to be indented.
4390 bool ClosesWhitesmithsBlock =
4391 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4392 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4394 // If the current line was expanded from a macro call, we use it to
4395 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4396 // line and the unexpanded token stream.
4397 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4398 if (!Reconstruct)
4399 Reconstruct.emplace(Line->Level, Unexpanded);
4400 Reconstruct->addLine(*Line);
4402 // While the reconstructed unexpanded lines are stored in the normal
4403 // flow of lines, the expanded lines are stored on the side to be analyzed
4404 // in an extra step.
4405 CurrentExpandedLines.push_back(std::move(*Line));
4407 if (Reconstruct->finished()) {
4408 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4409 assert(!Reconstructed.Tokens.empty() &&
4410 "Reconstructed must at least contain the macro identifier.");
4411 assert(!parsingPPDirective());
4412 LLVM_DEBUG({
4413 llvm::dbgs() << "Adding unexpanded line:\n";
4414 printDebugInfo(Reconstructed);
4416 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4417 Lines.push_back(std::move(Reconstructed));
4418 CurrentExpandedLines.clear();
4419 Reconstruct.reset();
4421 } else {
4422 // At the top level we only get here when no unexpansion is going on, or
4423 // when conditional formatting led to unfinished macro reconstructions.
4424 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4425 CurrentLines->push_back(std::move(*Line));
4427 Line->Tokens.clear();
4428 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4429 Line->FirstStartColumn = 0;
4430 Line->IsContinuation = false;
4431 Line->SeenDecltypeAuto = false;
4433 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4434 --Line->Level;
4435 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4436 CurrentLines->append(
4437 std::make_move_iterator(PreprocessorDirectives.begin()),
4438 std::make_move_iterator(PreprocessorDirectives.end()));
4439 PreprocessorDirectives.clear();
4441 // Disconnect the current token from the last token on the previous line.
4442 FormatTok->Previous = nullptr;
4445 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4447 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4448 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4449 FormatTok.NewlinesBefore > 0;
4452 // Checks if \p FormatTok is a line comment that continues the line comment
4453 // section on \p Line.
4454 static bool
4455 continuesLineCommentSection(const FormatToken &FormatTok,
4456 const UnwrappedLine &Line,
4457 const llvm::Regex &CommentPragmasRegex) {
4458 if (Line.Tokens.empty())
4459 return false;
4461 StringRef IndentContent = FormatTok.TokenText;
4462 if (FormatTok.TokenText.starts_with("//") ||
4463 FormatTok.TokenText.starts_with("/*")) {
4464 IndentContent = FormatTok.TokenText.substr(2);
4466 if (CommentPragmasRegex.match(IndentContent))
4467 return false;
4469 // If Line starts with a line comment, then FormatTok continues the comment
4470 // section if its original column is greater or equal to the original start
4471 // column of the line.
4473 // Define the min column token of a line as follows: if a line ends in '{' or
4474 // contains a '{' followed by a line comment, then the min column token is
4475 // that '{'. Otherwise, the min column token of the line is the first token of
4476 // the line.
4478 // If Line starts with a token other than a line comment, then FormatTok
4479 // continues the comment section if its original column is greater than the
4480 // original start column of the min column token of the line.
4482 // For example, the second line comment continues the first in these cases:
4484 // // first line
4485 // // second line
4487 // and:
4489 // // first line
4490 // // second line
4492 // and:
4494 // int i; // first line
4495 // // second line
4497 // and:
4499 // do { // first line
4500 // // second line
4501 // int i;
4502 // } while (true);
4504 // and:
4506 // enum {
4507 // a, // first line
4508 // // second line
4509 // b
4510 // };
4512 // The second line comment doesn't continue the first in these cases:
4514 // // first line
4515 // // second line
4517 // and:
4519 // int i; // first line
4520 // // second line
4522 // and:
4524 // do { // first line
4525 // // second line
4526 // int i;
4527 // } while (true);
4529 // and:
4531 // enum {
4532 // a, // first line
4533 // // second line
4534 // };
4535 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4537 // Scan for '{//'. If found, use the column of '{' as a min column for line
4538 // comment section continuation.
4539 const FormatToken *PreviousToken = nullptr;
4540 for (const UnwrappedLineNode &Node : Line.Tokens) {
4541 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4542 isLineComment(*Node.Tok)) {
4543 MinColumnToken = PreviousToken;
4544 break;
4546 PreviousToken = Node.Tok;
4548 // Grab the last newline preceding a token in this unwrapped line.
4549 if (Node.Tok->NewlinesBefore > 0)
4550 MinColumnToken = Node.Tok;
4552 if (PreviousToken && PreviousToken->is(tok::l_brace))
4553 MinColumnToken = PreviousToken;
4555 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4556 MinColumnToken);
4559 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4560 bool JustComments = Line->Tokens.empty();
4561 for (FormatToken *Tok : CommentsBeforeNextToken) {
4562 // Line comments that belong to the same line comment section are put on the
4563 // same line since later we might want to reflow content between them.
4564 // Additional fine-grained breaking of line comment sections is controlled
4565 // by the class BreakableLineCommentSection in case it is desirable to keep
4566 // several line comment sections in the same unwrapped line.
4568 // FIXME: Consider putting separate line comment sections as children to the
4569 // unwrapped line instead.
4570 Tok->ContinuesLineCommentSection =
4571 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4572 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4573 addUnwrappedLine();
4574 pushToken(Tok);
4576 if (NewlineBeforeNext && JustComments)
4577 addUnwrappedLine();
4578 CommentsBeforeNextToken.clear();
4581 void UnwrappedLineParser::nextToken(int LevelDifference) {
4582 if (eof())
4583 return;
4584 flushComments(isOnNewLine(*FormatTok));
4585 pushToken(FormatTok);
4586 FormatToken *Previous = FormatTok;
4587 if (!Style.isJavaScript())
4588 readToken(LevelDifference);
4589 else
4590 readTokenWithJavaScriptASI();
4591 FormatTok->Previous = Previous;
4592 if (Style.isVerilog()) {
4593 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4594 // keywords like `begin`, we can't treat them the same as left braces
4595 // because some contexts require one of them. For example structs use
4596 // braces and if blocks use keywords, and a left brace can occur in an if
4597 // statement, but it is not a block. For keywords like `end`, we simply
4598 // treat them the same as right braces.
4599 if (Keywords.isVerilogEnd(*FormatTok))
4600 FormatTok->Tok.setKind(tok::r_brace);
4604 void UnwrappedLineParser::distributeComments(
4605 const SmallVectorImpl<FormatToken *> &Comments,
4606 const FormatToken *NextTok) {
4607 // Whether or not a line comment token continues a line is controlled by
4608 // the method continuesLineCommentSection, with the following caveat:
4610 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4611 // that each comment line from the trail is aligned with the next token, if
4612 // the next token exists. If a trail exists, the beginning of the maximal
4613 // trail is marked as a start of a new comment section.
4615 // For example in this code:
4617 // int a; // line about a
4618 // // line 1 about b
4619 // // line 2 about b
4620 // int b;
4622 // the two lines about b form a maximal trail, so there are two sections, the
4623 // first one consisting of the single comment "// line about a" and the
4624 // second one consisting of the next two comments.
4625 if (Comments.empty())
4626 return;
4627 bool ShouldPushCommentsInCurrentLine = true;
4628 bool HasTrailAlignedWithNextToken = false;
4629 unsigned StartOfTrailAlignedWithNextToken = 0;
4630 if (NextTok) {
4631 // We are skipping the first element intentionally.
4632 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4633 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4634 HasTrailAlignedWithNextToken = true;
4635 StartOfTrailAlignedWithNextToken = i;
4639 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4640 FormatToken *FormatTok = Comments[i];
4641 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4642 FormatTok->ContinuesLineCommentSection = false;
4643 } else {
4644 FormatTok->ContinuesLineCommentSection =
4645 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4647 if (!FormatTok->ContinuesLineCommentSection &&
4648 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4649 ShouldPushCommentsInCurrentLine = false;
4651 if (ShouldPushCommentsInCurrentLine)
4652 pushToken(FormatTok);
4653 else
4654 CommentsBeforeNextToken.push_back(FormatTok);
4658 void UnwrappedLineParser::readToken(int LevelDifference) {
4659 SmallVector<FormatToken *, 1> Comments;
4660 bool PreviousWasComment = false;
4661 bool FirstNonCommentOnLine = false;
4662 do {
4663 FormatTok = Tokens->getNextToken();
4664 assert(FormatTok);
4665 while (FormatTok->getType() == TT_ConflictStart ||
4666 FormatTok->getType() == TT_ConflictEnd ||
4667 FormatTok->getType() == TT_ConflictAlternative) {
4668 if (FormatTok->getType() == TT_ConflictStart)
4669 conditionalCompilationStart(/*Unreachable=*/false);
4670 else if (FormatTok->getType() == TT_ConflictAlternative)
4671 conditionalCompilationAlternative();
4672 else if (FormatTok->getType() == TT_ConflictEnd)
4673 conditionalCompilationEnd();
4674 FormatTok = Tokens->getNextToken();
4675 FormatTok->MustBreakBefore = true;
4678 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4679 const FormatToken &Tok,
4680 bool PreviousWasComment) {
4681 auto IsFirstOnLine = [](const FormatToken &Tok) {
4682 return Tok.HasUnescapedNewline || Tok.IsFirst;
4685 // Consider preprocessor directives preceded by block comments as first
4686 // on line.
4687 if (PreviousWasComment)
4688 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4689 return IsFirstOnLine(Tok);
4692 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4693 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4694 PreviousWasComment = FormatTok->is(tok::comment);
4696 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4697 (!Style.isVerilog() ||
4698 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4699 FirstNonCommentOnLine) {
4700 distributeComments(Comments, FormatTok);
4701 Comments.clear();
4702 // If there is an unfinished unwrapped line, we flush the preprocessor
4703 // directives only after that unwrapped line was finished later.
4704 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4705 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4706 assert((LevelDifference >= 0 ||
4707 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4708 "LevelDifference makes Line->Level negative");
4709 Line->Level += LevelDifference;
4710 // Comments stored before the preprocessor directive need to be output
4711 // before the preprocessor directive, at the same level as the
4712 // preprocessor directive, as we consider them to apply to the directive.
4713 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4714 PPBranchLevel > 0) {
4715 Line->Level += PPBranchLevel;
4717 flushComments(isOnNewLine(*FormatTok));
4718 parsePPDirective();
4719 PreviousWasComment = FormatTok->is(tok::comment);
4720 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4721 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4724 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4725 !Line->InPPDirective) {
4726 continue;
4729 if (FormatTok->is(tok::identifier) &&
4730 Macros.defined(FormatTok->TokenText) &&
4731 // FIXME: Allow expanding macros in preprocessor directives.
4732 !Line->InPPDirective) {
4733 FormatToken *ID = FormatTok;
4734 unsigned Position = Tokens->getPosition();
4736 // To correctly parse the code, we need to replace the tokens of the macro
4737 // call with its expansion.
4738 auto PreCall = std::move(Line);
4739 Line.reset(new UnwrappedLine);
4740 bool OldInExpansion = InExpansion;
4741 InExpansion = true;
4742 // We parse the macro call into a new line.
4743 auto Args = parseMacroCall();
4744 InExpansion = OldInExpansion;
4745 assert(Line->Tokens.front().Tok == ID);
4746 // And remember the unexpanded macro call tokens.
4747 auto UnexpandedLine = std::move(Line);
4748 // Reset to the old line.
4749 Line = std::move(PreCall);
4751 LLVM_DEBUG({
4752 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4753 if (Args) {
4754 llvm::dbgs() << "(";
4755 for (const auto &Arg : Args.value())
4756 for (const auto &T : Arg)
4757 llvm::dbgs() << T->TokenText << " ";
4758 llvm::dbgs() << ")";
4760 llvm::dbgs() << "\n";
4762 if (Macros.objectLike(ID->TokenText) && Args &&
4763 !Macros.hasArity(ID->TokenText, Args->size())) {
4764 // The macro is either
4765 // - object-like, but we got argumnets, or
4766 // - overloaded to be both object-like and function-like, but none of
4767 // the function-like arities match the number of arguments.
4768 // Thus, expand as object-like macro.
4769 LLVM_DEBUG(llvm::dbgs()
4770 << "Macro \"" << ID->TokenText
4771 << "\" not overloaded for arity " << Args->size()
4772 << "or not function-like, using object-like overload.");
4773 Args.reset();
4774 UnexpandedLine->Tokens.resize(1);
4775 Tokens->setPosition(Position);
4776 nextToken();
4777 assert(!Args && Macros.objectLike(ID->TokenText));
4779 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4780 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4781 // Next, we insert the expanded tokens in the token stream at the
4782 // current position, and continue parsing.
4783 Unexpanded[ID] = std::move(UnexpandedLine);
4784 SmallVector<FormatToken *, 8> Expansion =
4785 Macros.expand(ID, std::move(Args));
4786 if (!Expansion.empty())
4787 FormatTok = Tokens->insertTokens(Expansion);
4789 LLVM_DEBUG({
4790 llvm::dbgs() << "Expanded: ";
4791 for (const auto &T : Expansion)
4792 llvm::dbgs() << T->TokenText << " ";
4793 llvm::dbgs() << "\n";
4795 } else {
4796 LLVM_DEBUG({
4797 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4798 << "\", because it was used ";
4799 if (Args)
4800 llvm::dbgs() << "with " << Args->size();
4801 else
4802 llvm::dbgs() << "without";
4803 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4805 Tokens->setPosition(Position);
4806 FormatTok = ID;
4810 if (FormatTok->isNot(tok::comment)) {
4811 distributeComments(Comments, FormatTok);
4812 Comments.clear();
4813 return;
4816 Comments.push_back(FormatTok);
4817 } while (!eof());
4819 distributeComments(Comments, nullptr);
4820 Comments.clear();
4823 namespace {
4824 template <typename Iterator>
4825 void pushTokens(Iterator Begin, Iterator End,
4826 llvm::SmallVectorImpl<FormatToken *> &Into) {
4827 for (auto I = Begin; I != End; ++I) {
4828 Into.push_back(I->Tok);
4829 for (const auto &Child : I->Children)
4830 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4833 } // namespace
4835 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4836 UnwrappedLineParser::parseMacroCall() {
4837 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4838 assert(Line->Tokens.empty());
4839 nextToken();
4840 if (FormatTok->isNot(tok::l_paren))
4841 return Args;
4842 unsigned Position = Tokens->getPosition();
4843 FormatToken *Tok = FormatTok;
4844 nextToken();
4845 Args.emplace();
4846 auto ArgStart = std::prev(Line->Tokens.end());
4848 int Parens = 0;
4849 do {
4850 switch (FormatTok->Tok.getKind()) {
4851 case tok::l_paren:
4852 ++Parens;
4853 nextToken();
4854 break;
4855 case tok::r_paren: {
4856 if (Parens > 0) {
4857 --Parens;
4858 nextToken();
4859 break;
4861 Args->push_back({});
4862 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4863 nextToken();
4864 return Args;
4866 case tok::comma: {
4867 if (Parens > 0) {
4868 nextToken();
4869 break;
4871 Args->push_back({});
4872 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4873 nextToken();
4874 ArgStart = std::prev(Line->Tokens.end());
4875 break;
4877 default:
4878 nextToken();
4879 break;
4881 } while (!eof());
4882 Line->Tokens.resize(1);
4883 Tokens->setPosition(Position);
4884 FormatTok = Tok;
4885 return {};
4888 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4889 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4890 if (MustBreakBeforeNextToken) {
4891 Line->Tokens.back().Tok->MustBreakBefore = true;
4892 MustBreakBeforeNextToken = false;
4896 } // end namespace format
4897 } // end namespace clang