[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / lib / Format / UnwrappedLineParser.cpp
blob30f9bcbfa29308e6d6da8ce182680fe821e380f3
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <utility>
31 #define DEBUG_TYPE "format-parser"
33 namespace clang {
34 namespace format {
36 namespace {
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
63 if (!NewLine)
64 OS << "\n";
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
71 class ScopedDeclarationState {
72 public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
87 private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
92 } // end anonymous namespace
94 class ScopedLineState {
95 public:
96 ScopedLineState(UnwrappedLineParser &Parser,
97 bool SwitchToPreprocessorLines = false)
98 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99 if (SwitchToPreprocessorLines)
100 Parser.CurrentLines = &Parser.PreprocessorDirectives;
101 else if (!Parser.Line->Tokens.empty())
102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103 PreBlockLine = std::move(Parser.Line);
104 Parser.Line = std::make_unique<UnwrappedLine>();
105 Parser.Line->Level = PreBlockLine->Level;
106 Parser.Line->PPLevel = PreBlockLine->PPLevel;
107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 ~ScopedLineState() {
112 if (!Parser.Line->Tokens.empty())
113 Parser.addUnwrappedLine();
114 assert(Parser.Line->Tokens.empty());
115 Parser.Line = std::move(PreBlockLine);
116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117 Parser.MustBreakBeforeNextToken = true;
118 Parser.CurrentLines = OriginalLines;
121 private:
122 UnwrappedLineParser &Parser;
124 std::unique_ptr<UnwrappedLine> PreBlockLine;
125 SmallVectorImpl<UnwrappedLine> *OriginalLines;
128 class CompoundStatementIndenter {
129 public:
130 CompoundStatementIndenter(UnwrappedLineParser *Parser,
131 const FormatStyle &Style, unsigned &LineLevel)
132 : CompoundStatementIndenter(Parser, LineLevel,
133 Style.BraceWrapping.AfterControlStatement,
134 Style.BraceWrapping.IndentBraces) {}
135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
136 bool WrapBrace, bool IndentBrace)
137 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138 if (WrapBrace)
139 Parser->addUnwrappedLine();
140 if (IndentBrace)
141 ++LineLevel;
143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
145 private:
146 unsigned &LineLevel;
147 unsigned OldLineLevel;
150 UnwrappedLineParser::UnwrappedLineParser(
151 SourceManager &SourceMgr, const FormatStyle &Style,
152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155 IdentifierTable &IdentTable)
156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161 ? IG_Rejected
162 : IG_Inited),
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
166 void UnwrappedLineParser::reset() {
167 PPBranchLevel = -1;
168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited;
171 IncludeGuardToken = nullptr;
172 Line.reset(new UnwrappedLine);
173 CommentsBeforeNextToken.clear();
174 FormatTok = nullptr;
175 MustBreakBeforeNextToken = false;
176 IsDecltypeAutoFunction = false;
177 PreprocessorDirectives.clear();
178 CurrentLines = &Lines;
179 DeclarationScopeStack.clear();
180 NestedTooDeep.clear();
181 NestedLambdas.clear();
182 PPStack.clear();
183 Line->FirstStartColumn = FirstStartColumn;
185 if (!Unexpanded.empty())
186 for (FormatToken *Token : AllTokens)
187 Token->MacroCtx.reset();
188 CurrentExpandedLines.clear();
189 ExpandedLines.clear();
190 Unexpanded.clear();
191 InExpansion = false;
192 Reconstruct.reset();
195 void UnwrappedLineParser::parse() {
196 IndexedTokenSource TokenSource(AllTokens);
197 Line->FirstStartColumn = FirstStartColumn;
198 do {
199 LLVM_DEBUG(llvm::dbgs() << "----\n");
200 reset();
201 Tokens = &TokenSource;
202 TokenSource.reset();
204 readToken();
205 parseFile();
207 // If we found an include guard then all preprocessor directives (other than
208 // the guard) are over-indented by one.
209 if (IncludeGuard == IG_Found) {
210 for (auto &Line : Lines)
211 if (Line.InPPDirective && Line.Level > 0)
212 --Line.Level;
215 // Create line with eof token.
216 assert(eof());
217 pushToken(FormatTok);
218 addUnwrappedLine();
220 // In a first run, format everything with the lines containing macro calls
221 // replaced by the expansion.
222 if (!ExpandedLines.empty()) {
223 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
224 for (const auto &Line : Lines) {
225 if (!Line.Tokens.empty()) {
226 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
227 if (it != ExpandedLines.end()) {
228 for (const auto &Expanded : it->second) {
229 LLVM_DEBUG(printDebugInfo(Expanded));
230 Callback.consumeUnwrappedLine(Expanded);
232 continue;
235 LLVM_DEBUG(printDebugInfo(Line));
236 Callback.consumeUnwrappedLine(Line);
238 Callback.finishRun();
241 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
242 for (const UnwrappedLine &Line : Lines) {
243 LLVM_DEBUG(printDebugInfo(Line));
244 Callback.consumeUnwrappedLine(Line);
246 Callback.finishRun();
247 Lines.clear();
248 while (!PPLevelBranchIndex.empty() &&
249 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
250 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
251 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
253 if (!PPLevelBranchIndex.empty()) {
254 ++PPLevelBranchIndex.back();
255 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
256 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 } while (!PPLevelBranchIndex.empty());
261 void UnwrappedLineParser::parseFile() {
262 // The top-level context in a file always has declarations, except for pre-
263 // processor directives and JavaScript files.
264 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
265 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
266 MustBeDeclaration);
267 if (Style.Language == FormatStyle::LK_TextProto)
268 parseBracedList();
269 else
270 parseLevel();
271 // Make sure to format the remaining tokens.
273 // LK_TextProto is special since its top-level is parsed as the body of a
274 // braced list, which does not necessarily have natural line separators such
275 // as a semicolon. Comments after the last entry that have been determined to
276 // not belong to that line, as in:
277 // key: value
278 // // endfile comment
279 // do not have a chance to be put on a line of their own until this point.
280 // Here we add this newline before end-of-file comments.
281 if (Style.Language == FormatStyle::LK_TextProto &&
282 !CommentsBeforeNextToken.empty()) {
283 addUnwrappedLine();
285 flushComments(true);
286 addUnwrappedLine();
289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
290 do {
291 switch (FormatTok->Tok.getKind()) {
292 case tok::l_brace:
293 return;
294 default:
295 if (FormatTok->is(Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
301 nextToken();
302 break;
304 } while (!eof());
307 void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
327 } while (!eof());
330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
339 /// \brief Parses a level, that is ???.
340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341 /// \param IfKind The \p if statement kind in the level.
342 /// \param IfLeftBrace The left brace of the \p if block in the level.
343 /// \returns true if a simple block of if/else/for/while, or false otherwise.
344 /// (A simple block has a single statement.)
345 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 continue;
363 tok::TokenKind kind = FormatTok->Tok.getKind();
364 if (FormatTok->getType() == TT_MacroBlockBegin)
365 kind = tok::l_brace;
366 else if (FormatTok->getType() == TT_MacroBlockEnd)
367 kind = tok::r_brace;
369 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
370 &HasLabel, &StatementCount] {
371 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
372 HasDoWhile ? nullptr : &HasDoWhile,
373 HasLabel ? nullptr : &HasLabel);
374 ++StatementCount;
375 assert(StatementCount > 0 && "StatementCount overflow!");
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 if (InRequiresExpression) {
385 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
386 } else if (FormatTok->Previous &&
387 FormatTok->Previous->ClosesRequiresClause) {
388 // We need the 'default' case here to correctly parse a function
389 // l_brace.
390 ParseDefault();
391 continue;
393 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
394 tryToParseBracedList()) {
395 continue;
397 parseBlock();
398 ++StatementCount;
399 assert(StatementCount > 0 && "StatementCount overflow!");
400 addUnwrappedLine();
401 break;
402 case tok::r_brace:
403 if (OpeningBrace) {
404 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
405 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
406 return false;
408 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
409 HasDoWhile || IsPrecededByCommentOrPPDirective ||
410 precededByCommentOrPPDirective()) {
411 return false;
413 const FormatToken *Next = Tokens->peekNextToken();
414 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
415 return false;
416 if (IfLeftBrace)
417 *IfLeftBrace = IfLBrace;
418 return true;
420 nextToken();
421 addUnwrappedLine();
422 break;
423 case tok::kw_default: {
424 unsigned StoredPosition = Tokens->getPosition();
425 FormatToken *Next;
426 do {
427 Next = Tokens->getNextToken();
428 assert(Next);
429 } while (Next->is(tok::comment));
430 FormatTok = Tokens->setPosition(StoredPosition);
431 if (Next->isNot(tok::colon)) {
432 // default not followed by ':' is not a case label; treat it like
433 // an identifier.
434 parseStructuralElement();
435 break;
437 // Else, if it is 'default:', fall through to the case handling.
438 [[fallthrough]];
440 case tok::kw_case:
441 if (Style.isProto() || Style.isVerilog() ||
442 (Style.isJavaScript() && Line->MustBeDeclaration)) {
443 // Proto: there are no switch/case statements
444 // Verilog: Case labels don't have this word. We handle case
445 // labels including default in TokenAnnotator.
446 // JavaScript: A 'case: string' style field declaration.
447 ParseDefault();
448 break;
450 if (!SwitchLabelEncountered &&
451 (Style.IndentCaseLabels ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
471 } while (!eof());
473 return false;
476 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493 do {
494 // Get next non-comment token.
495 FormatToken *NextTok;
496 do {
497 NextTok = Tokens->getNextToken();
498 } while (NextTok->is(tok::comment));
500 switch (Tok->Tok.getKind()) {
501 case tok::l_brace:
502 if (Style.isJavaScript() && PrevTok) {
503 if (PrevTok->isOneOf(tok::colon, tok::less)) {
504 // A ':' indicates this code is in a type, or a braced list
505 // following a label in an object literal ({a: {b: 1}}).
506 // A '<' could be an object used in a comparison, but that is nonsense
507 // code (can never return true), so more likely it is a generic type
508 // argument (`X<{a: string; b: number}>`).
509 // The code below could be confused by semicolons between the
510 // individual members in a type member list, which would normally
511 // trigger BK_Block. In both cases, this must be parsed as an inline
512 // braced init.
513 Tok->setBlockKind(BK_BracedInit);
514 } else if (PrevTok->is(tok::r_paren)) {
515 // `) { }` can only occur in function or method declarations in JS.
516 Tok->setBlockKind(BK_Block);
518 } else {
519 Tok->setBlockKind(BK_Unknown);
521 LBraceStack.push_back({Tok, PrevTok});
522 break;
523 case tok::r_brace:
524 if (LBraceStack.empty())
525 break;
526 if (LBraceStack.back().Tok->is(BK_Unknown)) {
527 bool ProbablyBracedList = false;
528 if (Style.Language == FormatStyle::LK_Proto) {
529 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
530 } else {
531 // Skip NextTok over preprocessor lines, otherwise we may not
532 // properly diagnose the block as a braced intializer
533 // if the comma separator appears after the pp directive.
534 while (NextTok->is(tok::hash)) {
535 ScopedMacroState MacroState(*Line, Tokens, NextTok);
536 do {
537 NextTok = Tokens->getNextToken();
538 } while (NextTok->isNot(tok::eof));
541 // Using OriginalColumn to distinguish between ObjC methods and
542 // binary operators is a bit hacky.
543 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
544 NextTok->OriginalColumn == 0;
546 // Try to detect a braced list. Note that regardless how we mark inner
547 // braces here, we will overwrite the BlockKind later if we parse a
548 // braced list (where all blocks inside are by default braced lists),
549 // or when we explicitly detect blocks (for example while parsing
550 // lambdas).
552 // If we already marked the opening brace as braced list, the closing
553 // must also be part of it.
554 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
556 ProbablyBracedList = ProbablyBracedList ||
557 (Style.isJavaScript() &&
558 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
559 Keywords.kw_as));
560 ProbablyBracedList = ProbablyBracedList ||
561 (Style.isCpp() && NextTok->is(tok::l_paren));
563 // If there is a comma, semicolon or right paren after the closing
564 // brace, we assume this is a braced initializer list.
565 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
566 // braced list in JS.
567 ProbablyBracedList =
568 ProbablyBracedList ||
569 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
570 tok::r_paren, tok::r_square, tok::ellipsis);
572 // Distinguish between braced list in a constructor initializer list
573 // followed by constructor body, or just adjacent blocks.
574 ProbablyBracedList =
575 ProbablyBracedList ||
576 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
577 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
578 tok::greater));
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 (NextTok->is(tok::identifier) &&
583 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
585 ProbablyBracedList = ProbablyBracedList ||
586 (NextTok->is(tok::semi) &&
587 (!ExpectClassBody || LBraceStack.size() != 1));
589 ProbablyBracedList =
590 ProbablyBracedList ||
591 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
594 // We can have an array subscript after a braced init
595 // list, but C++11 attributes are expected after blocks.
596 NextTok = Tokens->getNextToken();
597 ProbablyBracedList = NextTok->isNot(tok::l_square);
600 if (ProbablyBracedList) {
601 Tok->setBlockKind(BK_BracedInit);
602 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
603 } else {
604 Tok->setBlockKind(BK_Block);
605 LBraceStack.back().Tok->setBlockKind(BK_Block);
608 LBraceStack.pop_back();
609 break;
610 case tok::identifier:
611 if (Tok->isNot(TT_StatementMacro))
612 break;
613 [[fallthrough]];
614 case tok::kw_if:
615 if (PrevTok->is(tok::hash))
616 break;
617 [[fallthrough]];
618 case tok::at:
619 case tok::semi:
620 case tok::kw_while:
621 case tok::kw_for:
622 case tok::kw_switch:
623 case tok::kw_try:
624 case tok::kw___try:
625 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
626 LBraceStack.back().Tok->setBlockKind(BK_Block);
627 break;
628 default:
629 break;
631 PrevTok = Tok;
632 Tok = NextTok;
633 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
635 // Assume other blocks for all unclosed opening braces.
636 for (const auto &Entry : LBraceStack)
637 if (Entry.Tok->is(BK_Unknown))
638 Entry.Tok->setBlockKind(BK_Block);
640 FormatTok = Tokens->setPosition(StoredPosition);
643 // Sets the token type of the directly previous right brace.
644 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
645 if (auto Prev = FormatTok->getPreviousNonComment();
646 Prev && Prev->is(tok::r_brace)) {
647 Prev->setFinalizedType(Type);
651 template <class T>
652 static inline void hash_combine(std::size_t &seed, const T &v) {
653 std::hash<T> hasher;
654 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
657 size_t UnwrappedLineParser::computePPHash() const {
658 size_t h = 0;
659 for (const auto &i : PPStack) {
660 hash_combine(h, size_t(i.Kind));
661 hash_combine(h, i.Line);
663 return h;
666 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
667 // is not null, subtracts its length (plus the preceding space) when computing
668 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
669 // running the token annotator on it so that we can restore them afterward.
670 bool UnwrappedLineParser::mightFitOnOneLine(
671 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
672 const auto ColumnLimit = Style.ColumnLimit;
673 if (ColumnLimit == 0)
674 return true;
676 auto &Tokens = ParsedLine.Tokens;
677 assert(!Tokens.empty());
679 const auto *LastToken = Tokens.back().Tok;
680 assert(LastToken);
682 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
684 int Index = 0;
685 for (const auto &Token : Tokens) {
686 assert(Token.Tok);
687 auto &SavedToken = SavedTokens[Index++];
688 SavedToken.Tok = new FormatToken;
689 SavedToken.Tok->copyFrom(*Token.Tok);
690 SavedToken.Children = std::move(Token.Children);
693 AnnotatedLine Line(ParsedLine);
694 assert(Line.Last == LastToken);
696 TokenAnnotator Annotator(Style, Keywords);
697 Annotator.annotate(Line);
698 Annotator.calculateFormattingInformation(Line);
700 auto Length = LastToken->TotalLength;
701 if (OpeningBrace) {
702 assert(OpeningBrace != Tokens.front().Tok);
703 if (auto Prev = OpeningBrace->Previous;
704 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
705 Length -= ColumnLimit;
707 Length -= OpeningBrace->TokenText.size() + 1;
710 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
711 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
712 Length -= FirstToken->TokenText.size() + 1;
715 Index = 0;
716 for (auto &Token : Tokens) {
717 const auto &SavedToken = SavedTokens[Index++];
718 Token.Tok->copyFrom(*SavedToken.Tok);
719 Token.Children = std::move(SavedToken.Children);
720 delete SavedToken.Tok;
723 // If these change PPLevel needs to be used for get correct indentation.
724 assert(!Line.InMacroBody);
725 assert(!Line.InPPDirective);
726 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
729 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
730 unsigned AddLevels, bool MunchSemi,
731 bool KeepBraces,
732 IfStmtKind *IfKind,
733 bool UnindentWhitesmithsBraces) {
734 auto HandleVerilogBlockLabel = [this]() {
735 // ":" name
736 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
737 nextToken();
738 if (Keywords.isVerilogIdentifier(*FormatTok))
739 nextToken();
743 // Whether this is a Verilog-specific block that has a special header like a
744 // module.
745 const bool VerilogHierarchy =
746 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
747 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
748 (Style.isVerilog() &&
749 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
750 "'{' or macro block token expected");
751 FormatToken *Tok = FormatTok;
752 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
753 auto Index = CurrentLines->size();
754 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
755 FormatTok->setBlockKind(BK_Block);
757 // For Whitesmiths mode, jump to the next level prior to skipping over the
758 // braces.
759 if (!VerilogHierarchy && AddLevels > 0 &&
760 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
761 ++Line->Level;
764 size_t PPStartHash = computePPHash();
766 const unsigned InitialLevel = Line->Level;
767 if (VerilogHierarchy) {
768 AddLevels += parseVerilogHierarchyHeader();
769 } else {
770 nextToken(/*LevelDifference=*/AddLevels);
771 HandleVerilogBlockLabel();
774 // Bail out if there are too many levels. Otherwise, the stack might overflow.
775 if (Line->Level > 300)
776 return nullptr;
778 if (MacroBlock && FormatTok->is(tok::l_paren))
779 parseParens();
781 size_t NbPreprocessorDirectives =
782 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
783 addUnwrappedLine();
784 size_t OpeningLineIndex =
785 CurrentLines->empty()
786 ? (UnwrappedLine::kInvalidIndex)
787 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
789 // Whitesmiths is weird here. The brace needs to be indented for the namespace
790 // block, but the block itself may not be indented depending on the style
791 // settings. This allows the format to back up one level in those cases.
792 if (UnindentWhitesmithsBraces)
793 --Line->Level;
795 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
796 MustBeDeclaration);
797 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
798 Line->Level += AddLevels;
800 FormatToken *IfLBrace = nullptr;
801 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
803 if (eof())
804 return IfLBrace;
806 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
807 : FormatTok->isNot(tok::r_brace)) {
808 Line->Level = InitialLevel;
809 FormatTok->setBlockKind(BK_Block);
810 return IfLBrace;
813 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
814 FormatTok->setFinalizedType(TT_NamespaceRBrace);
816 const bool IsFunctionRBrace =
817 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
819 auto RemoveBraces = [=]() mutable {
820 if (!SimpleBlock)
821 return false;
822 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
823 assert(FormatTok->is(tok::r_brace));
824 const bool WrappedOpeningBrace = !Tok->Previous;
825 if (WrappedOpeningBrace && FollowedByComment)
826 return false;
827 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
828 if (KeepBraces && !HasRequiredIfBraces)
829 return false;
830 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
831 const FormatToken *Previous = Tokens->getPreviousToken();
832 assert(Previous);
833 if (Previous->is(tok::r_brace) && !Previous->Optional)
834 return false;
836 assert(!CurrentLines->empty());
837 auto &LastLine = CurrentLines->back();
838 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
839 return false;
840 if (Tok->is(TT_ElseLBrace))
841 return true;
842 if (WrappedOpeningBrace) {
843 assert(Index > 0);
844 --Index; // The line above the wrapped l_brace.
845 Tok = nullptr;
847 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
849 if (RemoveBraces()) {
850 Tok->MatchingParen = FormatTok;
851 FormatTok->MatchingParen = Tok;
854 size_t PPEndHash = computePPHash();
856 // Munch the closing brace.
857 nextToken(/*LevelDifference=*/-AddLevels);
859 // When this is a function block and there is an unnecessary semicolon
860 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
861 // it later).
862 if (Style.RemoveSemicolon && IsFunctionRBrace) {
863 while (FormatTok->is(tok::semi)) {
864 FormatTok->Optional = true;
865 nextToken();
869 HandleVerilogBlockLabel();
871 if (MacroBlock && FormatTok->is(tok::l_paren))
872 parseParens();
874 Line->Level = InitialLevel;
876 if (FormatTok->is(tok::kw_noexcept)) {
877 // A noexcept in a requires expression.
878 nextToken();
881 if (FormatTok->is(tok::arrow)) {
882 // Following the } or noexcept we can find a trailing return type arrow
883 // as part of an implicit conversion constraint.
884 nextToken();
885 parseStructuralElement();
888 if (MunchSemi && FormatTok->is(tok::semi))
889 nextToken();
891 if (PPStartHash == PPEndHash) {
892 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
893 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
894 // Update the opening line to add the forward reference as well
895 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
896 CurrentLines->size() - 1;
900 return IfLBrace;
903 static bool isGoogScope(const UnwrappedLine &Line) {
904 // FIXME: Closure-library specific stuff should not be hard-coded but be
905 // configurable.
906 if (Line.Tokens.size() < 4)
907 return false;
908 auto I = Line.Tokens.begin();
909 if (I->Tok->TokenText != "goog")
910 return false;
911 ++I;
912 if (I->Tok->isNot(tok::period))
913 return false;
914 ++I;
915 if (I->Tok->TokenText != "scope")
916 return false;
917 ++I;
918 return I->Tok->is(tok::l_paren);
921 static bool isIIFE(const UnwrappedLine &Line,
922 const AdditionalKeywords &Keywords) {
923 // Look for the start of an immediately invoked anonymous function.
924 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
925 // This is commonly done in JavaScript to create a new, anonymous scope.
926 // Example: (function() { ... })()
927 if (Line.Tokens.size() < 3)
928 return false;
929 auto I = Line.Tokens.begin();
930 if (I->Tok->isNot(tok::l_paren))
931 return false;
932 ++I;
933 if (I->Tok->isNot(Keywords.kw_function))
934 return false;
935 ++I;
936 return I->Tok->is(tok::l_paren);
939 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
940 const FormatToken &InitialToken) {
941 tok::TokenKind Kind = InitialToken.Tok.getKind();
942 if (InitialToken.is(TT_NamespaceMacro))
943 Kind = tok::kw_namespace;
945 switch (Kind) {
946 case tok::kw_namespace:
947 return Style.BraceWrapping.AfterNamespace;
948 case tok::kw_class:
949 return Style.BraceWrapping.AfterClass;
950 case tok::kw_union:
951 return Style.BraceWrapping.AfterUnion;
952 case tok::kw_struct:
953 return Style.BraceWrapping.AfterStruct;
954 case tok::kw_enum:
955 return Style.BraceWrapping.AfterEnum;
956 default:
957 return false;
961 void UnwrappedLineParser::parseChildBlock() {
962 assert(FormatTok->is(tok::l_brace));
963 FormatTok->setBlockKind(BK_Block);
964 const FormatToken *OpeningBrace = FormatTok;
965 nextToken();
967 bool SkipIndent = (Style.isJavaScript() &&
968 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
969 ScopedLineState LineState(*this);
970 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
971 /*MustBeDeclaration=*/false);
972 Line->Level += SkipIndent ? 0 : 1;
973 parseLevel(OpeningBrace);
974 flushComments(isOnNewLine(*FormatTok));
975 Line->Level -= SkipIndent ? 0 : 1;
977 nextToken();
980 void UnwrappedLineParser::parsePPDirective() {
981 assert(FormatTok->is(tok::hash) && "'#' expected");
982 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
984 nextToken();
986 if (!FormatTok->Tok.getIdentifierInfo()) {
987 parsePPUnknown();
988 return;
991 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
992 case tok::pp_define:
993 parsePPDefine();
994 return;
995 case tok::pp_if:
996 parsePPIf(/*IfDef=*/false);
997 break;
998 case tok::pp_ifdef:
999 case tok::pp_ifndef:
1000 parsePPIf(/*IfDef=*/true);
1001 break;
1002 case tok::pp_else:
1003 case tok::pp_elifdef:
1004 case tok::pp_elifndef:
1005 case tok::pp_elif:
1006 parsePPElse();
1007 break;
1008 case tok::pp_endif:
1009 parsePPEndIf();
1010 break;
1011 case tok::pp_pragma:
1012 parsePPPragma();
1013 break;
1014 default:
1015 parsePPUnknown();
1016 break;
1020 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1021 size_t Line = CurrentLines->size();
1022 if (CurrentLines == &PreprocessorDirectives)
1023 Line += Lines.size();
1025 if (Unreachable ||
1026 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1027 PPStack.push_back({PP_Unreachable, Line});
1028 } else {
1029 PPStack.push_back({PP_Conditional, Line});
1033 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1034 ++PPBranchLevel;
1035 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1036 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1037 PPLevelBranchIndex.push_back(0);
1038 PPLevelBranchCount.push_back(0);
1040 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1041 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1042 conditionalCompilationCondition(Unreachable || Skip);
1045 void UnwrappedLineParser::conditionalCompilationAlternative() {
1046 if (!PPStack.empty())
1047 PPStack.pop_back();
1048 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1049 if (!PPChainBranchIndex.empty())
1050 ++PPChainBranchIndex.top();
1051 conditionalCompilationCondition(
1052 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1053 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1056 void UnwrappedLineParser::conditionalCompilationEnd() {
1057 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1058 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1059 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1060 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1062 // Guard against #endif's without #if.
1063 if (PPBranchLevel > -1)
1064 --PPBranchLevel;
1065 if (!PPChainBranchIndex.empty())
1066 PPChainBranchIndex.pop();
1067 if (!PPStack.empty())
1068 PPStack.pop_back();
1071 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1072 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1073 nextToken();
1074 bool Unreachable = false;
1075 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1076 Unreachable = true;
1077 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1078 Unreachable = true;
1079 conditionalCompilationStart(Unreachable);
1080 FormatToken *IfCondition = FormatTok;
1081 // If there's a #ifndef on the first line, and the only lines before it are
1082 // comments, it could be an include guard.
1083 bool MaybeIncludeGuard = IfNDef;
1084 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1085 for (auto &Line : Lines) {
1086 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1087 MaybeIncludeGuard = false;
1088 IncludeGuard = IG_Rejected;
1089 break;
1093 --PPBranchLevel;
1094 parsePPUnknown();
1095 ++PPBranchLevel;
1096 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1097 IncludeGuard = IG_IfNdefed;
1098 IncludeGuardToken = IfCondition;
1102 void UnwrappedLineParser::parsePPElse() {
1103 // If a potential include guard has an #else, it's not an include guard.
1104 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1105 IncludeGuard = IG_Rejected;
1106 // Don't crash when there is an #else without an #if.
1107 assert(PPBranchLevel >= -1);
1108 if (PPBranchLevel == -1)
1109 conditionalCompilationStart(/*Unreachable=*/true);
1110 conditionalCompilationAlternative();
1111 --PPBranchLevel;
1112 parsePPUnknown();
1113 ++PPBranchLevel;
1116 void UnwrappedLineParser::parsePPEndIf() {
1117 conditionalCompilationEnd();
1118 parsePPUnknown();
1119 // If the #endif of a potential include guard is the last thing in the file,
1120 // then we found an include guard.
1121 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1122 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1123 IncludeGuard = IG_Found;
1127 void UnwrappedLineParser::parsePPDefine() {
1128 nextToken();
1130 if (!FormatTok->Tok.getIdentifierInfo()) {
1131 IncludeGuard = IG_Rejected;
1132 IncludeGuardToken = nullptr;
1133 parsePPUnknown();
1134 return;
1137 if (IncludeGuard == IG_IfNdefed &&
1138 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1139 IncludeGuard = IG_Defined;
1140 IncludeGuardToken = nullptr;
1141 for (auto &Line : Lines) {
1142 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1143 IncludeGuard = IG_Rejected;
1144 break;
1149 // In the context of a define, even keywords should be treated as normal
1150 // identifiers. Setting the kind to identifier is not enough, because we need
1151 // to treat additional keywords like __except as well, which are already
1152 // identifiers. Setting the identifier info to null interferes with include
1153 // guard processing above, and changes preprocessing nesting.
1154 FormatTok->Tok.setKind(tok::identifier);
1155 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1156 nextToken();
1157 if (FormatTok->Tok.getKind() == tok::l_paren &&
1158 !FormatTok->hasWhitespaceBefore()) {
1159 parseParens();
1161 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1162 Line->Level += PPBranchLevel + 1;
1163 addUnwrappedLine();
1164 ++Line->Level;
1166 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1167 assert((int)Line->PPLevel >= 0);
1168 Line->InMacroBody = true;
1170 // Errors during a preprocessor directive can only affect the layout of the
1171 // preprocessor directive, and thus we ignore them. An alternative approach
1172 // would be to use the same approach we use on the file level (no
1173 // re-indentation if there was a structural error) within the macro
1174 // definition.
1175 parseFile();
1178 void UnwrappedLineParser::parsePPPragma() {
1179 Line->InPragmaDirective = true;
1180 parsePPUnknown();
1183 void UnwrappedLineParser::parsePPUnknown() {
1184 do {
1185 nextToken();
1186 } while (!eof());
1187 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1188 Line->Level += PPBranchLevel + 1;
1189 addUnwrappedLine();
1192 // Here we exclude certain tokens that are not usually the first token in an
1193 // unwrapped line. This is used in attempt to distinguish macro calls without
1194 // trailing semicolons from other constructs split to several lines.
1195 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1196 // Semicolon can be a null-statement, l_square can be a start of a macro or
1197 // a C++11 attribute, but this doesn't seem to be common.
1198 assert(Tok.isNot(TT_AttributeSquare));
1199 return !Tok.isOneOf(tok::semi, tok::l_brace,
1200 // Tokens that can only be used as binary operators and a
1201 // part of overloaded operator names.
1202 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1203 tok::less, tok::greater, tok::slash, tok::percent,
1204 tok::lessless, tok::greatergreater, tok::equal,
1205 tok::plusequal, tok::minusequal, tok::starequal,
1206 tok::slashequal, tok::percentequal, tok::ampequal,
1207 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1208 tok::lesslessequal,
1209 // Colon is used in labels, base class lists, initializer
1210 // lists, range-based for loops, ternary operator, but
1211 // should never be the first token in an unwrapped line.
1212 tok::colon,
1213 // 'noexcept' is a trailing annotation.
1214 tok::kw_noexcept);
1217 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1218 const FormatToken *FormatTok) {
1219 // FIXME: This returns true for C/C++ keywords like 'struct'.
1220 return FormatTok->is(tok::identifier) &&
1221 (!FormatTok->Tok.getIdentifierInfo() ||
1222 !FormatTok->isOneOf(
1223 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1224 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1225 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1226 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1227 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1228 Keywords.kw_instanceof, Keywords.kw_interface,
1229 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1232 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1233 const FormatToken *FormatTok) {
1234 return FormatTok->Tok.isLiteral() ||
1235 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1236 mustBeJSIdent(Keywords, FormatTok);
1239 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1240 // when encountered after a value (see mustBeJSIdentOrValue).
1241 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1242 const FormatToken *FormatTok) {
1243 return FormatTok->isOneOf(
1244 tok::kw_return, Keywords.kw_yield,
1245 // conditionals
1246 tok::kw_if, tok::kw_else,
1247 // loops
1248 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1249 // switch/case
1250 tok::kw_switch, tok::kw_case,
1251 // exceptions
1252 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1253 // declaration
1254 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1255 Keywords.kw_async, Keywords.kw_function,
1256 // import/export
1257 Keywords.kw_import, tok::kw_export);
1260 // Checks whether a token is a type in K&R C (aka C78).
1261 static bool isC78Type(const FormatToken &Tok) {
1262 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1263 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1264 tok::identifier);
1267 // This function checks whether a token starts the first parameter declaration
1268 // in a K&R C (aka C78) function definition, e.g.:
1269 // int f(a, b)
1270 // short a, b;
1271 // {
1272 // return a + b;
1273 // }
1274 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1275 const FormatToken *FuncName) {
1276 assert(Tok);
1277 assert(Next);
1278 assert(FuncName);
1280 if (FuncName->isNot(tok::identifier))
1281 return false;
1283 const FormatToken *Prev = FuncName->Previous;
1284 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1285 return false;
1287 if (!isC78Type(*Tok) &&
1288 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1289 return false;
1292 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1293 return false;
1295 Tok = Tok->Previous;
1296 if (!Tok || Tok->isNot(tok::r_paren))
1297 return false;
1299 Tok = Tok->Previous;
1300 if (!Tok || Tok->isNot(tok::identifier))
1301 return false;
1303 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1306 bool UnwrappedLineParser::parseModuleImport() {
1307 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1309 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1310 !Token->Tok.getIdentifierInfo() &&
1311 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1312 return false;
1315 nextToken();
1316 while (!eof()) {
1317 if (FormatTok->is(tok::colon)) {
1318 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1320 // Handle import <foo/bar.h> as we would an include statement.
1321 else if (FormatTok->is(tok::less)) {
1322 nextToken();
1323 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1324 // Mark tokens up to the trailing line comments as implicit string
1325 // literals.
1326 if (FormatTok->isNot(tok::comment) &&
1327 !FormatTok->TokenText.startswith("//")) {
1328 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1330 nextToken();
1333 if (FormatTok->is(tok::semi)) {
1334 nextToken();
1335 break;
1337 nextToken();
1340 addUnwrappedLine();
1341 return true;
1344 // readTokenWithJavaScriptASI reads the next token and terminates the current
1345 // line if JavaScript Automatic Semicolon Insertion must
1346 // happen between the current token and the next token.
1348 // This method is conservative - it cannot cover all edge cases of JavaScript,
1349 // but only aims to correctly handle certain well known cases. It *must not*
1350 // return true in speculative cases.
1351 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1352 FormatToken *Previous = FormatTok;
1353 readToken();
1354 FormatToken *Next = FormatTok;
1356 bool IsOnSameLine =
1357 CommentsBeforeNextToken.empty()
1358 ? Next->NewlinesBefore == 0
1359 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1360 if (IsOnSameLine)
1361 return;
1363 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1364 bool PreviousStartsTemplateExpr =
1365 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1366 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1367 // If the line contains an '@' sign, the previous token might be an
1368 // annotation, which can precede another identifier/value.
1369 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1370 return LineNode.Tok->is(tok::at);
1372 if (HasAt)
1373 return;
1375 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1376 return addUnwrappedLine();
1377 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1378 bool NextEndsTemplateExpr =
1379 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1380 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1381 (PreviousMustBeValue ||
1382 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1383 tok::minusminus))) {
1384 return addUnwrappedLine();
1386 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1387 isJSDeclOrStmt(Keywords, Next)) {
1388 return addUnwrappedLine();
1392 void UnwrappedLineParser::parseStructuralElement(
1393 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1394 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1395 if (Style.Language == FormatStyle::LK_TableGen &&
1396 FormatTok->is(tok::pp_include)) {
1397 nextToken();
1398 if (FormatTok->is(tok::string_literal))
1399 nextToken();
1400 addUnwrappedLine();
1401 return;
1404 if (Style.isCpp()) {
1405 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1407 } else if (Style.isVerilog()) {
1408 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1409 parseForOrWhileLoop(/*HasParens=*/false);
1410 return;
1412 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1413 parseForOrWhileLoop();
1414 return;
1416 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1417 Keywords.kw_assume, Keywords.kw_cover)) {
1418 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1419 return;
1422 // Skip things that can exist before keywords like 'if' and 'case'.
1423 while (true) {
1424 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1425 Keywords.kw_unique0)) {
1426 nextToken();
1427 } else if (FormatTok->is(tok::l_paren) &&
1428 Tokens->peekNextToken()->is(tok::star)) {
1429 parseParens();
1430 } else {
1431 break;
1436 // Tokens that only make sense at the beginning of a line.
1437 switch (FormatTok->Tok.getKind()) {
1438 case tok::kw_asm:
1439 nextToken();
1440 if (FormatTok->is(tok::l_brace)) {
1441 FormatTok->setFinalizedType(TT_InlineASMBrace);
1442 nextToken();
1443 while (FormatTok && !eof()) {
1444 if (FormatTok->is(tok::r_brace)) {
1445 FormatTok->setFinalizedType(TT_InlineASMBrace);
1446 nextToken();
1447 addUnwrappedLine();
1448 break;
1450 FormatTok->Finalized = true;
1451 nextToken();
1454 break;
1455 case tok::kw_namespace:
1456 parseNamespace();
1457 return;
1458 case tok::kw_public:
1459 case tok::kw_protected:
1460 case tok::kw_private:
1461 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1462 Style.isCSharp()) {
1463 nextToken();
1464 } else {
1465 parseAccessSpecifier();
1467 return;
1468 case tok::kw_if: {
1469 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1470 // field/method declaration.
1471 break;
1473 FormatToken *Tok = parseIfThenElse(IfKind);
1474 if (IfLeftBrace)
1475 *IfLeftBrace = Tok;
1476 return;
1478 case tok::kw_for:
1479 case tok::kw_while:
1480 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1481 // field/method declaration.
1482 break;
1484 parseForOrWhileLoop();
1485 return;
1486 case tok::kw_do:
1487 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1488 // field/method declaration.
1489 break;
1491 parseDoWhile();
1492 if (HasDoWhile)
1493 *HasDoWhile = true;
1494 return;
1495 case tok::kw_switch:
1496 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497 // 'switch: string' field declaration.
1498 break;
1500 parseSwitch();
1501 return;
1502 case tok::kw_default:
1503 // In Verilog default along with other labels are handled in the next loop.
1504 if (Style.isVerilog())
1505 break;
1506 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507 // 'default: string' field declaration.
1508 break;
1510 nextToken();
1511 if (FormatTok->is(tok::colon)) {
1512 FormatTok->setFinalizedType(TT_CaseLabelColon);
1513 parseLabel();
1514 return;
1516 // e.g. "default void f() {}" in a Java interface.
1517 break;
1518 case tok::kw_case:
1519 // Proto: there are no switch/case statements.
1520 if (Style.isProto()) {
1521 nextToken();
1522 return;
1524 if (Style.isVerilog()) {
1525 parseBlock();
1526 addUnwrappedLine();
1527 return;
1529 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1530 // 'case: string' field declaration.
1531 nextToken();
1532 break;
1534 parseCaseLabel();
1535 return;
1536 case tok::kw_try:
1537 case tok::kw___try:
1538 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539 // field/method declaration.
1540 break;
1542 parseTryCatch();
1543 return;
1544 case tok::kw_extern:
1545 nextToken();
1546 if (Style.isVerilog()) {
1547 // In Verilog and extern module declaration looks like a start of module.
1548 // But there is no body and endmodule. So we handle it separately.
1549 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1550 parseVerilogHierarchyHeader();
1551 return;
1553 } else if (FormatTok->is(tok::string_literal)) {
1554 nextToken();
1555 if (FormatTok->is(tok::l_brace)) {
1556 if (Style.BraceWrapping.AfterExternBlock)
1557 addUnwrappedLine();
1558 // Either we indent or for backwards compatibility we follow the
1559 // AfterExternBlock style.
1560 unsigned AddLevels =
1561 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1562 (Style.BraceWrapping.AfterExternBlock &&
1563 Style.IndentExternBlock ==
1564 FormatStyle::IEBS_AfterExternBlock)
1565 ? 1u
1566 : 0u;
1567 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1568 addUnwrappedLine();
1569 return;
1572 break;
1573 case tok::kw_export:
1574 if (Style.isJavaScript()) {
1575 parseJavaScriptEs6ImportExport();
1576 return;
1578 if (Style.isCpp()) {
1579 nextToken();
1580 if (FormatTok->is(tok::kw_namespace)) {
1581 parseNamespace();
1582 return;
1584 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1585 return;
1587 break;
1588 case tok::kw_inline:
1589 nextToken();
1590 if (FormatTok->is(tok::kw_namespace)) {
1591 parseNamespace();
1592 return;
1594 break;
1595 case tok::identifier:
1596 if (FormatTok->is(TT_ForEachMacro)) {
1597 parseForOrWhileLoop();
1598 return;
1600 if (FormatTok->is(TT_MacroBlockBegin)) {
1601 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1602 /*MunchSemi=*/false);
1603 return;
1605 if (FormatTok->is(Keywords.kw_import)) {
1606 if (Style.isJavaScript()) {
1607 parseJavaScriptEs6ImportExport();
1608 return;
1610 if (Style.Language == FormatStyle::LK_Proto) {
1611 nextToken();
1612 if (FormatTok->is(tok::kw_public))
1613 nextToken();
1614 if (FormatTok->isNot(tok::string_literal))
1615 return;
1616 nextToken();
1617 if (FormatTok->is(tok::semi))
1618 nextToken();
1619 addUnwrappedLine();
1620 return;
1622 if (Style.isCpp() && parseModuleImport())
1623 return;
1625 if (Style.isCpp() &&
1626 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1627 Keywords.kw_slots, Keywords.kw_qslots)) {
1628 nextToken();
1629 if (FormatTok->is(tok::colon)) {
1630 nextToken();
1631 addUnwrappedLine();
1632 return;
1635 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1636 parseStatementMacro();
1637 return;
1639 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1640 parseNamespace();
1641 return;
1643 // In Verilog labels can be any expression, so we don't do them here.
1644 if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1645 !Line->MustBeDeclaration) {
1646 nextToken();
1647 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1648 FormatTok->setFinalizedType(TT_GotoLabelColon);
1649 parseLabel(!Style.IndentGotoLabels);
1650 if (HasLabel)
1651 *HasLabel = true;
1652 return;
1654 // In all other cases, parse the declaration.
1655 break;
1656 default:
1657 break;
1660 const bool InRequiresExpression =
1661 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1662 do {
1663 const FormatToken *Previous = FormatTok->Previous;
1664 switch (FormatTok->Tok.getKind()) {
1665 case tok::at:
1666 nextToken();
1667 if (FormatTok->is(tok::l_brace)) {
1668 nextToken();
1669 parseBracedList();
1670 break;
1671 } else if (Style.Language == FormatStyle::LK_Java &&
1672 FormatTok->is(Keywords.kw_interface)) {
1673 nextToken();
1674 break;
1676 switch (FormatTok->Tok.getObjCKeywordID()) {
1677 case tok::objc_public:
1678 case tok::objc_protected:
1679 case tok::objc_package:
1680 case tok::objc_private:
1681 return parseAccessSpecifier();
1682 case tok::objc_interface:
1683 case tok::objc_implementation:
1684 return parseObjCInterfaceOrImplementation();
1685 case tok::objc_protocol:
1686 if (parseObjCProtocol())
1687 return;
1688 break;
1689 case tok::objc_end:
1690 return; // Handled by the caller.
1691 case tok::objc_optional:
1692 case tok::objc_required:
1693 nextToken();
1694 addUnwrappedLine();
1695 return;
1696 case tok::objc_autoreleasepool:
1697 nextToken();
1698 if (FormatTok->is(tok::l_brace)) {
1699 if (Style.BraceWrapping.AfterControlStatement ==
1700 FormatStyle::BWACS_Always) {
1701 addUnwrappedLine();
1703 parseBlock();
1705 addUnwrappedLine();
1706 return;
1707 case tok::objc_synchronized:
1708 nextToken();
1709 if (FormatTok->is(tok::l_paren)) {
1710 // Skip synchronization object
1711 parseParens();
1713 if (FormatTok->is(tok::l_brace)) {
1714 if (Style.BraceWrapping.AfterControlStatement ==
1715 FormatStyle::BWACS_Always) {
1716 addUnwrappedLine();
1718 parseBlock();
1720 addUnwrappedLine();
1721 return;
1722 case tok::objc_try:
1723 // This branch isn't strictly necessary (the kw_try case below would
1724 // do this too after the tok::at is parsed above). But be explicit.
1725 parseTryCatch();
1726 return;
1727 default:
1728 break;
1730 break;
1731 case tok::kw_requires: {
1732 if (Style.isCpp()) {
1733 bool ParsedClause = parseRequires();
1734 if (ParsedClause)
1735 return;
1736 } else {
1737 nextToken();
1739 break;
1741 case tok::kw_enum:
1742 // Ignore if this is part of "template <enum ...".
1743 if (Previous && Previous->is(tok::less)) {
1744 nextToken();
1745 break;
1748 // parseEnum falls through and does not yet add an unwrapped line as an
1749 // enum definition can start a structural element.
1750 if (!parseEnum())
1751 break;
1752 // This only applies to C++ and Verilog.
1753 if (!Style.isCpp() && !Style.isVerilog()) {
1754 addUnwrappedLine();
1755 return;
1757 break;
1758 case tok::kw_typedef:
1759 nextToken();
1760 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1761 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1762 Keywords.kw_CF_CLOSED_ENUM,
1763 Keywords.kw_NS_CLOSED_ENUM)) {
1764 parseEnum();
1766 break;
1767 case tok::kw_class:
1768 if (Style.isVerilog()) {
1769 parseBlock();
1770 addUnwrappedLine();
1771 return;
1773 [[fallthrough]];
1774 case tok::kw_struct:
1775 case tok::kw_union:
1776 if (parseStructLike())
1777 return;
1778 break;
1779 case tok::kw_decltype:
1780 nextToken();
1781 if (FormatTok->is(tok::l_paren)) {
1782 parseParens();
1783 assert(FormatTok->Previous);
1784 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1785 tok::l_paren)) {
1786 Line->SeenDecltypeAuto = true;
1789 break;
1790 case tok::period:
1791 nextToken();
1792 // In Java, classes have an implicit static member "class".
1793 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1794 FormatTok->is(tok::kw_class)) {
1795 nextToken();
1797 if (Style.isJavaScript() && FormatTok &&
1798 FormatTok->Tok.getIdentifierInfo()) {
1799 // JavaScript only has pseudo keywords, all keywords are allowed to
1800 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1801 nextToken();
1803 break;
1804 case tok::semi:
1805 nextToken();
1806 addUnwrappedLine();
1807 return;
1808 case tok::r_brace:
1809 addUnwrappedLine();
1810 return;
1811 case tok::l_paren: {
1812 parseParens();
1813 // Break the unwrapped line if a K&R C function definition has a parameter
1814 // declaration.
1815 if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1816 break;
1817 if (isC78ParameterDecl(FormatTok,
1818 Tokens->peekNextToken(/*SkipComment=*/true),
1819 Previous)) {
1820 addUnwrappedLine();
1821 return;
1823 break;
1825 case tok::kw_operator:
1826 nextToken();
1827 if (FormatTok->isBinaryOperator())
1828 nextToken();
1829 break;
1830 case tok::caret:
1831 nextToken();
1832 // Block return type.
1833 if (FormatTok->Tok.isAnyIdentifier() ||
1834 FormatTok->isSimpleTypeSpecifier()) {
1835 nextToken();
1836 // Return types: pointers are ok too.
1837 while (FormatTok->is(tok::star))
1838 nextToken();
1840 // Block argument list.
1841 if (FormatTok->is(tok::l_paren))
1842 parseParens();
1843 // Block body.
1844 if (FormatTok->is(tok::l_brace))
1845 parseChildBlock();
1846 break;
1847 case tok::l_brace:
1848 if (InRequiresExpression)
1849 FormatTok->setFinalizedType(TT_BracedListLBrace);
1850 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1851 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1852 // A block outside of parentheses must be the last part of a
1853 // structural element.
1854 // FIXME: Figure out cases where this is not true, and add projections
1855 // for them (the one we know is missing are lambdas).
1856 if (Style.Language == FormatStyle::LK_Java &&
1857 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1858 // If necessary, we could set the type to something different than
1859 // TT_FunctionLBrace.
1860 if (Style.BraceWrapping.AfterControlStatement ==
1861 FormatStyle::BWACS_Always) {
1862 addUnwrappedLine();
1864 } else if (Style.BraceWrapping.AfterFunction) {
1865 addUnwrappedLine();
1867 FormatTok->setFinalizedType(TT_FunctionLBrace);
1868 parseBlock();
1869 IsDecltypeAutoFunction = false;
1870 addUnwrappedLine();
1871 return;
1873 // Otherwise this was a braced init list, and the structural
1874 // element continues.
1875 break;
1876 case tok::kw_try:
1877 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1878 // field/method declaration.
1879 nextToken();
1880 break;
1882 // We arrive here when parsing function-try blocks.
1883 if (Style.BraceWrapping.AfterFunction)
1884 addUnwrappedLine();
1885 parseTryCatch();
1886 return;
1887 case tok::identifier: {
1888 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1889 Line->MustBeDeclaration) {
1890 addUnwrappedLine();
1891 parseCSharpGenericTypeConstraint();
1892 break;
1894 if (FormatTok->is(TT_MacroBlockEnd)) {
1895 addUnwrappedLine();
1896 return;
1899 // Function declarations (as opposed to function expressions) are parsed
1900 // on their own unwrapped line by continuing this loop. Function
1901 // expressions (functions that are not on their own line) must not create
1902 // a new unwrapped line, so they are special cased below.
1903 size_t TokenCount = Line->Tokens.size();
1904 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1905 (TokenCount > 1 ||
1906 (TokenCount == 1 &&
1907 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1908 tryToParseJSFunction();
1909 break;
1911 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1912 FormatTok->is(Keywords.kw_interface)) {
1913 if (Style.isJavaScript()) {
1914 // In JavaScript/TypeScript, "interface" can be used as a standalone
1915 // identifier, e.g. in `var interface = 1;`. If "interface" is
1916 // followed by another identifier, it is very like to be an actual
1917 // interface declaration.
1918 unsigned StoredPosition = Tokens->getPosition();
1919 FormatToken *Next = Tokens->getNextToken();
1920 FormatTok = Tokens->setPosition(StoredPosition);
1921 if (!mustBeJSIdent(Keywords, Next)) {
1922 nextToken();
1923 break;
1926 parseRecord();
1927 addUnwrappedLine();
1928 return;
1931 if (Style.isVerilog()) {
1932 if (FormatTok->is(Keywords.kw_table)) {
1933 parseVerilogTable();
1934 return;
1936 if (Keywords.isVerilogBegin(*FormatTok) ||
1937 Keywords.isVerilogHierarchy(*FormatTok)) {
1938 parseBlock();
1939 addUnwrappedLine();
1940 return;
1944 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1945 if (parseStructLike())
1946 return;
1947 break;
1950 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1951 parseStatementMacro();
1952 return;
1955 // See if the following token should start a new unwrapped line.
1956 StringRef Text = FormatTok->TokenText;
1958 FormatToken *PreviousToken = FormatTok;
1959 nextToken();
1961 // JS doesn't have macros, and within classes colons indicate fields, not
1962 // labels.
1963 if (Style.isJavaScript())
1964 break;
1966 auto OneTokenSoFar = [&]() {
1967 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1968 while (I != E && I->Tok->is(tok::comment))
1969 ++I;
1970 if (Style.isVerilog())
1971 while (I != E && I->Tok->is(tok::hash))
1972 ++I;
1973 return I != E && (++I == E);
1975 if (OneTokenSoFar()) {
1976 // Recognize function-like macro usages without trailing semicolon as
1977 // well as free-standing macros like Q_OBJECT.
1978 bool FunctionLike = FormatTok->is(tok::l_paren);
1979 if (FunctionLike)
1980 parseParens();
1982 bool FollowedByNewline =
1983 CommentsBeforeNextToken.empty()
1984 ? FormatTok->NewlinesBefore > 0
1985 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1987 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1988 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1989 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1990 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1991 addUnwrappedLine();
1992 return;
1995 break;
1997 case tok::equal:
1998 if ((Style.isJavaScript() || Style.isCSharp()) &&
1999 FormatTok->is(TT_FatArrow)) {
2000 tryToParseChildBlock();
2001 break;
2004 nextToken();
2005 if (FormatTok->is(tok::l_brace)) {
2006 // Block kind should probably be set to BK_BracedInit for any language.
2007 // C# needs this change to ensure that array initialisers and object
2008 // initialisers are indented the same way.
2009 if (Style.isCSharp())
2010 FormatTok->setBlockKind(BK_BracedInit);
2011 nextToken();
2012 parseBracedList();
2013 } else if (Style.Language == FormatStyle::LK_Proto &&
2014 FormatTok->is(tok::less)) {
2015 nextToken();
2016 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2017 /*ClosingBraceKind=*/tok::greater);
2019 break;
2020 case tok::l_square:
2021 parseSquare();
2022 break;
2023 case tok::kw_new:
2024 parseNew();
2025 break;
2026 case tok::kw_case:
2027 // Proto: there are no switch/case statements.
2028 if (Style.isProto()) {
2029 nextToken();
2030 return;
2032 // In Verilog switch is called case.
2033 if (Style.isVerilog()) {
2034 parseBlock();
2035 addUnwrappedLine();
2036 return;
2038 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2039 // 'case: string' field declaration.
2040 nextToken();
2041 break;
2043 parseCaseLabel();
2044 break;
2045 case tok::kw_default:
2046 nextToken();
2047 if (Style.isVerilog()) {
2048 if (FormatTok->is(tok::colon)) {
2049 // The label will be handled in the next iteration.
2050 break;
2052 if (FormatTok->is(Keywords.kw_clocking)) {
2053 // A default clocking block.
2054 parseBlock();
2055 addUnwrappedLine();
2056 return;
2058 parseVerilogCaseLabel();
2059 return;
2061 break;
2062 case tok::colon:
2063 nextToken();
2064 if (Style.isVerilog()) {
2065 parseVerilogCaseLabel();
2066 return;
2068 break;
2069 default:
2070 nextToken();
2071 break;
2073 } while (!eof());
2076 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2077 assert(FormatTok->is(tok::l_brace));
2078 if (!Style.isCSharp())
2079 return false;
2080 // See if it's a property accessor.
2081 if (FormatTok->Previous->isNot(tok::identifier))
2082 return false;
2084 // See if we are inside a property accessor.
2086 // Record the current tokenPosition so that we can advance and
2087 // reset the current token. `Next` is not set yet so we need
2088 // another way to advance along the token stream.
2089 unsigned int StoredPosition = Tokens->getPosition();
2090 FormatToken *Tok = Tokens->getNextToken();
2092 // A trivial property accessor is of the form:
2093 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2094 // Track these as they do not require line breaks to be introduced.
2095 bool HasSpecialAccessor = false;
2096 bool IsTrivialPropertyAccessor = true;
2097 while (!eof()) {
2098 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2099 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2100 Keywords.kw_init, Keywords.kw_set)) {
2101 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2102 HasSpecialAccessor = true;
2103 Tok = Tokens->getNextToken();
2104 continue;
2106 if (Tok->isNot(tok::r_brace))
2107 IsTrivialPropertyAccessor = false;
2108 break;
2111 if (!HasSpecialAccessor) {
2112 Tokens->setPosition(StoredPosition);
2113 return false;
2116 // Try to parse the property accessor:
2117 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2118 Tokens->setPosition(StoredPosition);
2119 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2120 addUnwrappedLine();
2121 nextToken();
2122 do {
2123 switch (FormatTok->Tok.getKind()) {
2124 case tok::r_brace:
2125 nextToken();
2126 if (FormatTok->is(tok::equal)) {
2127 while (!eof() && FormatTok->isNot(tok::semi))
2128 nextToken();
2129 nextToken();
2131 addUnwrappedLine();
2132 return true;
2133 case tok::l_brace:
2134 ++Line->Level;
2135 parseBlock(/*MustBeDeclaration=*/true);
2136 addUnwrappedLine();
2137 --Line->Level;
2138 break;
2139 case tok::equal:
2140 if (FormatTok->is(TT_FatArrow)) {
2141 ++Line->Level;
2142 do {
2143 nextToken();
2144 } while (!eof() && FormatTok->isNot(tok::semi));
2145 nextToken();
2146 addUnwrappedLine();
2147 --Line->Level;
2148 break;
2150 nextToken();
2151 break;
2152 default:
2153 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2154 Keywords.kw_set) &&
2155 !IsTrivialPropertyAccessor) {
2156 // Non-trivial get/set needs to be on its own line.
2157 addUnwrappedLine();
2159 nextToken();
2161 } while (!eof());
2163 // Unreachable for well-formed code (paired '{' and '}').
2164 return true;
2167 bool UnwrappedLineParser::tryToParseLambda() {
2168 assert(FormatTok->is(tok::l_square));
2169 if (!Style.isCpp()) {
2170 nextToken();
2171 return false;
2173 FormatToken &LSquare = *FormatTok;
2174 if (!tryToParseLambdaIntroducer())
2175 return false;
2177 bool SeenArrow = false;
2178 bool InTemplateParameterList = false;
2180 while (FormatTok->isNot(tok::l_brace)) {
2181 if (FormatTok->isSimpleTypeSpecifier()) {
2182 nextToken();
2183 continue;
2185 switch (FormatTok->Tok.getKind()) {
2186 case tok::l_brace:
2187 break;
2188 case tok::l_paren:
2189 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2190 break;
2191 case tok::l_square:
2192 parseSquare();
2193 break;
2194 case tok::less:
2195 assert(FormatTok->Previous);
2196 if (FormatTok->Previous->is(tok::r_square))
2197 InTemplateParameterList = true;
2198 nextToken();
2199 break;
2200 case tok::kw_auto:
2201 case tok::kw_class:
2202 case tok::kw_template:
2203 case tok::kw_typename:
2204 case tok::amp:
2205 case tok::star:
2206 case tok::kw_const:
2207 case tok::kw_constexpr:
2208 case tok::kw_consteval:
2209 case tok::comma:
2210 case tok::greater:
2211 case tok::identifier:
2212 case tok::numeric_constant:
2213 case tok::coloncolon:
2214 case tok::kw_mutable:
2215 case tok::kw_noexcept:
2216 case tok::kw_static:
2217 nextToken();
2218 break;
2219 // Specialization of a template with an integer parameter can contain
2220 // arithmetic, logical, comparison and ternary operators.
2222 // FIXME: This also accepts sequences of operators that are not in the scope
2223 // of a template argument list.
2225 // In a C++ lambda a template type can only occur after an arrow. We use
2226 // this as an heuristic to distinguish between Objective-C expressions
2227 // followed by an `a->b` expression, such as:
2228 // ([obj func:arg] + a->b)
2229 // Otherwise the code below would parse as a lambda.
2230 case tok::plus:
2231 case tok::minus:
2232 case tok::exclaim:
2233 case tok::tilde:
2234 case tok::slash:
2235 case tok::percent:
2236 case tok::lessless:
2237 case tok::pipe:
2238 case tok::pipepipe:
2239 case tok::ampamp:
2240 case tok::caret:
2241 case tok::equalequal:
2242 case tok::exclaimequal:
2243 case tok::greaterequal:
2244 case tok::lessequal:
2245 case tok::question:
2246 case tok::colon:
2247 case tok::ellipsis:
2248 case tok::kw_true:
2249 case tok::kw_false:
2250 if (SeenArrow || InTemplateParameterList) {
2251 nextToken();
2252 break;
2254 return true;
2255 case tok::arrow:
2256 // This might or might not actually be a lambda arrow (this could be an
2257 // ObjC method invocation followed by a dereferencing arrow). We might
2258 // reset this back to TT_Unknown in TokenAnnotator.
2259 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2260 SeenArrow = true;
2261 nextToken();
2262 break;
2263 case tok::kw_requires: {
2264 auto *RequiresToken = FormatTok;
2265 nextToken();
2266 parseRequiresClause(RequiresToken);
2267 break;
2269 case tok::equal:
2270 if (!InTemplateParameterList)
2271 return true;
2272 nextToken();
2273 break;
2274 default:
2275 return true;
2279 FormatTok->setFinalizedType(TT_LambdaLBrace);
2280 LSquare.setFinalizedType(TT_LambdaLSquare);
2282 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2283 parseChildBlock();
2284 assert(!NestedLambdas.empty());
2285 NestedLambdas.pop_back();
2287 return true;
2290 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2291 const FormatToken *Previous = FormatTok->Previous;
2292 const FormatToken *LeftSquare = FormatTok;
2293 nextToken();
2294 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2295 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2296 tok::kw_co_yield, tok::kw_co_return)) ||
2297 Previous->closesScope())) ||
2298 LeftSquare->isCppStructuredBinding(Style)) {
2299 return false;
2301 if (FormatTok->is(tok::l_square))
2302 return false;
2303 if (FormatTok->is(tok::r_square)) {
2304 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2305 if (Next->is(tok::greater))
2306 return false;
2308 parseSquare(/*LambdaIntroducer=*/true);
2309 return true;
2312 void UnwrappedLineParser::tryToParseJSFunction() {
2313 assert(FormatTok->is(Keywords.kw_function));
2314 if (FormatTok->is(Keywords.kw_async))
2315 nextToken();
2316 // Consume "function".
2317 nextToken();
2319 // Consume * (generator function). Treat it like C++'s overloaded operators.
2320 if (FormatTok->is(tok::star)) {
2321 FormatTok->setFinalizedType(TT_OverloadedOperator);
2322 nextToken();
2325 // Consume function name.
2326 if (FormatTok->is(tok::identifier))
2327 nextToken();
2329 if (FormatTok->isNot(tok::l_paren))
2330 return;
2332 // Parse formal parameter list.
2333 parseParens();
2335 if (FormatTok->is(tok::colon)) {
2336 // Parse a type definition.
2337 nextToken();
2339 // Eat the type declaration. For braced inline object types, balance braces,
2340 // otherwise just parse until finding an l_brace for the function body.
2341 if (FormatTok->is(tok::l_brace))
2342 tryToParseBracedList();
2343 else
2344 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2345 nextToken();
2348 if (FormatTok->is(tok::semi))
2349 return;
2351 parseChildBlock();
2354 bool UnwrappedLineParser::tryToParseBracedList() {
2355 if (FormatTok->is(BK_Unknown))
2356 calculateBraceTypes();
2357 assert(FormatTok->isNot(BK_Unknown));
2358 if (FormatTok->is(BK_Block))
2359 return false;
2360 nextToken();
2361 parseBracedList();
2362 return true;
2365 bool UnwrappedLineParser::tryToParseChildBlock() {
2366 assert(Style.isJavaScript() || Style.isCSharp());
2367 assert(FormatTok->is(TT_FatArrow));
2368 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2369 // They always start an expression or a child block if followed by a curly
2370 // brace.
2371 nextToken();
2372 if (FormatTok->isNot(tok::l_brace))
2373 return false;
2374 parseChildBlock();
2375 return true;
2378 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2379 bool IsEnum,
2380 tok::TokenKind ClosingBraceKind) {
2381 bool HasError = false;
2383 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2384 // replace this by using parseAssignmentExpression() inside.
2385 do {
2386 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2387 tryToParseChildBlock()) {
2388 continue;
2390 if (Style.isJavaScript()) {
2391 if (FormatTok->is(Keywords.kw_function)) {
2392 tryToParseJSFunction();
2393 continue;
2395 if (FormatTok->is(tok::l_brace)) {
2396 // Could be a method inside of a braced list `{a() { return 1; }}`.
2397 if (tryToParseBracedList())
2398 continue;
2399 parseChildBlock();
2402 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2403 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2404 addUnwrappedLine();
2405 nextToken();
2406 return !HasError;
2408 switch (FormatTok->Tok.getKind()) {
2409 case tok::l_square:
2410 if (Style.isCSharp())
2411 parseSquare();
2412 else
2413 tryToParseLambda();
2414 break;
2415 case tok::l_paren:
2416 parseParens();
2417 // JavaScript can just have free standing methods and getters/setters in
2418 // object literals. Detect them by a "{" following ")".
2419 if (Style.isJavaScript()) {
2420 if (FormatTok->is(tok::l_brace))
2421 parseChildBlock();
2422 break;
2424 break;
2425 case tok::l_brace:
2426 // Assume there are no blocks inside a braced init list apart
2427 // from the ones we explicitly parse out (like lambdas).
2428 FormatTok->setBlockKind(BK_BracedInit);
2429 nextToken();
2430 parseBracedList();
2431 break;
2432 case tok::less:
2433 if (Style.Language == FormatStyle::LK_Proto ||
2434 ClosingBraceKind == tok::greater) {
2435 nextToken();
2436 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2437 /*ClosingBraceKind=*/tok::greater);
2438 } else {
2439 nextToken();
2441 break;
2442 case tok::semi:
2443 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2444 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2445 // used for error recovery if we have otherwise determined that this is
2446 // a braced list.
2447 if (Style.isJavaScript()) {
2448 nextToken();
2449 break;
2451 HasError = true;
2452 if (!ContinueOnSemicolons)
2453 return !HasError;
2454 nextToken();
2455 break;
2456 case tok::comma:
2457 nextToken();
2458 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2459 addUnwrappedLine();
2460 break;
2461 default:
2462 nextToken();
2463 break;
2465 } while (!eof());
2466 return false;
2469 /// \brief Parses a pair of parentheses (and everything between them).
2470 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2471 /// double ampersands. This applies for all nested scopes as well.
2473 /// Returns whether there is a `=` token between the parentheses.
2474 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2475 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2476 auto *LeftParen = FormatTok;
2477 bool SeenEqual = false;
2478 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2479 nextToken();
2480 do {
2481 switch (FormatTok->Tok.getKind()) {
2482 case tok::l_paren:
2483 if (parseParens(AmpAmpTokenType))
2484 SeenEqual = true;
2485 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2486 parseChildBlock();
2487 break;
2488 case tok::r_paren:
2489 if (!MightBeStmtExpr &&
2490 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2491 const auto *Prev = LeftParen->Previous;
2492 const auto *Next = Tokens->peekNextToken();
2493 const bool DoubleParens =
2494 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2495 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2496 const bool Blacklisted =
2497 PrevPrev &&
2498 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2499 (SeenEqual &&
2500 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2501 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2502 const bool ReturnParens =
2503 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2504 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2505 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2506 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2507 Next->is(tok::semi);
2508 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2509 LeftParen->Optional = true;
2510 FormatTok->Optional = true;
2513 nextToken();
2514 return SeenEqual;
2515 case tok::r_brace:
2516 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2517 return SeenEqual;
2518 case tok::l_square:
2519 tryToParseLambda();
2520 break;
2521 case tok::l_brace:
2522 if (!tryToParseBracedList())
2523 parseChildBlock();
2524 break;
2525 case tok::at:
2526 nextToken();
2527 if (FormatTok->is(tok::l_brace)) {
2528 nextToken();
2529 parseBracedList();
2531 break;
2532 case tok::equal:
2533 SeenEqual = true;
2534 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2535 tryToParseChildBlock();
2536 else
2537 nextToken();
2538 break;
2539 case tok::kw_class:
2540 if (Style.isJavaScript())
2541 parseRecord(/*ParseAsExpr=*/true);
2542 else
2543 nextToken();
2544 break;
2545 case tok::identifier:
2546 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2547 tryToParseJSFunction();
2548 else
2549 nextToken();
2550 break;
2551 case tok::kw_requires: {
2552 auto RequiresToken = FormatTok;
2553 nextToken();
2554 parseRequiresExpression(RequiresToken);
2555 break;
2557 case tok::ampamp:
2558 if (AmpAmpTokenType != TT_Unknown)
2559 FormatTok->setFinalizedType(AmpAmpTokenType);
2560 [[fallthrough]];
2561 default:
2562 nextToken();
2563 break;
2565 } while (!eof());
2566 return SeenEqual;
2569 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2570 if (!LambdaIntroducer) {
2571 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2572 if (tryToParseLambda())
2573 return;
2575 do {
2576 switch (FormatTok->Tok.getKind()) {
2577 case tok::l_paren:
2578 parseParens();
2579 break;
2580 case tok::r_square:
2581 nextToken();
2582 return;
2583 case tok::r_brace:
2584 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2585 return;
2586 case tok::l_square:
2587 parseSquare();
2588 break;
2589 case tok::l_brace: {
2590 if (!tryToParseBracedList())
2591 parseChildBlock();
2592 break;
2594 case tok::at:
2595 nextToken();
2596 if (FormatTok->is(tok::l_brace)) {
2597 nextToken();
2598 parseBracedList();
2600 break;
2601 default:
2602 nextToken();
2603 break;
2605 } while (!eof());
2608 void UnwrappedLineParser::keepAncestorBraces() {
2609 if (!Style.RemoveBracesLLVM)
2610 return;
2612 const int MaxNestingLevels = 2;
2613 const int Size = NestedTooDeep.size();
2614 if (Size >= MaxNestingLevels)
2615 NestedTooDeep[Size - MaxNestingLevels] = true;
2616 NestedTooDeep.push_back(false);
2619 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2620 for (const auto &Token : llvm::reverse(Line.Tokens))
2621 if (Token.Tok->isNot(tok::comment))
2622 return Token.Tok;
2624 return nullptr;
2627 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2628 FormatToken *Tok = nullptr;
2630 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2631 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2632 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2633 ? getLastNonComment(*Line)
2634 : Line->Tokens.back().Tok;
2635 assert(Tok);
2636 if (Tok->BraceCount < 0) {
2637 assert(Tok->BraceCount == -1);
2638 Tok = nullptr;
2639 } else {
2640 Tok->BraceCount = -1;
2644 addUnwrappedLine();
2645 ++Line->Level;
2646 parseStructuralElement();
2648 if (Tok) {
2649 assert(!Line->InPPDirective);
2650 Tok = nullptr;
2651 for (const auto &L : llvm::reverse(*CurrentLines)) {
2652 if (!L.InPPDirective && getLastNonComment(L)) {
2653 Tok = L.Tokens.back().Tok;
2654 break;
2657 assert(Tok);
2658 ++Tok->BraceCount;
2661 if (CheckEOF && eof())
2662 addUnwrappedLine();
2664 --Line->Level;
2667 static void markOptionalBraces(FormatToken *LeftBrace) {
2668 if (!LeftBrace)
2669 return;
2671 assert(LeftBrace->is(tok::l_brace));
2673 FormatToken *RightBrace = LeftBrace->MatchingParen;
2674 if (!RightBrace) {
2675 assert(!LeftBrace->Optional);
2676 return;
2679 assert(RightBrace->is(tok::r_brace));
2680 assert(RightBrace->MatchingParen == LeftBrace);
2681 assert(LeftBrace->Optional == RightBrace->Optional);
2683 LeftBrace->Optional = true;
2684 RightBrace->Optional = true;
2687 void UnwrappedLineParser::handleAttributes() {
2688 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2689 if (FormatTok->isAttribute())
2690 nextToken();
2691 else if (FormatTok->is(tok::l_square))
2692 handleCppAttributes();
2695 bool UnwrappedLineParser::handleCppAttributes() {
2696 // Handle [[likely]] / [[unlikely]] attributes.
2697 assert(FormatTok->is(tok::l_square));
2698 if (!tryToParseSimpleAttribute())
2699 return false;
2700 parseSquare();
2701 return true;
2704 /// Returns whether \c Tok begins a block.
2705 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2706 // FIXME: rename the function or make
2707 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2708 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2709 : Tok.is(tok::l_brace);
2712 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2713 bool KeepBraces,
2714 bool IsVerilogAssert) {
2715 assert((FormatTok->is(tok::kw_if) ||
2716 (Style.isVerilog() &&
2717 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2718 Keywords.kw_assume, Keywords.kw_cover))) &&
2719 "'if' expected");
2720 nextToken();
2722 if (IsVerilogAssert) {
2723 // Handle `assert #0` and `assert final`.
2724 if (FormatTok->is(Keywords.kw_verilogHash)) {
2725 nextToken();
2726 if (FormatTok->is(tok::numeric_constant))
2727 nextToken();
2728 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2729 Keywords.kw_sequence)) {
2730 nextToken();
2734 // Handle `if !consteval`.
2735 if (FormatTok->is(tok::exclaim))
2736 nextToken();
2738 bool KeepIfBraces = true;
2739 if (FormatTok->is(tok::kw_consteval)) {
2740 nextToken();
2741 } else {
2742 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2743 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2744 nextToken();
2745 if (FormatTok->is(tok::l_paren)) {
2746 FormatTok->setFinalizedType(TT_ConditionLParen);
2747 parseParens();
2750 handleAttributes();
2751 // The then action is optional in Verilog assert statements.
2752 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2753 nextToken();
2754 addUnwrappedLine();
2755 return nullptr;
2758 bool NeedsUnwrappedLine = false;
2759 keepAncestorBraces();
2761 FormatToken *IfLeftBrace = nullptr;
2762 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2764 if (isBlockBegin(*FormatTok)) {
2765 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2766 IfLeftBrace = FormatTok;
2767 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2768 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2769 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2770 setPreviousRBraceType(TT_ControlStatementRBrace);
2771 if (Style.BraceWrapping.BeforeElse)
2772 addUnwrappedLine();
2773 else
2774 NeedsUnwrappedLine = true;
2775 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2776 addUnwrappedLine();
2777 } else {
2778 parseUnbracedBody();
2781 if (Style.RemoveBracesLLVM) {
2782 assert(!NestedTooDeep.empty());
2783 KeepIfBraces = KeepIfBraces ||
2784 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2785 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2786 IfBlockKind == IfStmtKind::IfElseIf;
2789 bool KeepElseBraces = KeepIfBraces;
2790 FormatToken *ElseLeftBrace = nullptr;
2791 IfStmtKind Kind = IfStmtKind::IfOnly;
2793 if (FormatTok->is(tok::kw_else)) {
2794 if (Style.RemoveBracesLLVM) {
2795 NestedTooDeep.back() = false;
2796 Kind = IfStmtKind::IfElse;
2798 nextToken();
2799 handleAttributes();
2800 if (isBlockBegin(*FormatTok)) {
2801 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2802 FormatTok->setFinalizedType(TT_ElseLBrace);
2803 ElseLeftBrace = FormatTok;
2804 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2805 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2806 FormatToken *IfLBrace =
2807 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2808 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2809 setPreviousRBraceType(TT_ElseRBrace);
2810 if (FormatTok->is(tok::kw_else)) {
2811 KeepElseBraces = KeepElseBraces ||
2812 ElseBlockKind == IfStmtKind::IfOnly ||
2813 ElseBlockKind == IfStmtKind::IfElseIf;
2814 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2815 KeepElseBraces = true;
2816 assert(ElseLeftBrace->MatchingParen);
2817 markOptionalBraces(ElseLeftBrace);
2819 addUnwrappedLine();
2820 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2821 const FormatToken *Previous = Tokens->getPreviousToken();
2822 assert(Previous);
2823 const bool IsPrecededByComment = Previous->is(tok::comment);
2824 if (IsPrecededByComment) {
2825 addUnwrappedLine();
2826 ++Line->Level;
2828 bool TooDeep = true;
2829 if (Style.RemoveBracesLLVM) {
2830 Kind = IfStmtKind::IfElseIf;
2831 TooDeep = NestedTooDeep.pop_back_val();
2833 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2834 if (Style.RemoveBracesLLVM)
2835 NestedTooDeep.push_back(TooDeep);
2836 if (IsPrecededByComment)
2837 --Line->Level;
2838 } else {
2839 parseUnbracedBody(/*CheckEOF=*/true);
2841 } else {
2842 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2843 if (NeedsUnwrappedLine)
2844 addUnwrappedLine();
2847 if (!Style.RemoveBracesLLVM)
2848 return nullptr;
2850 assert(!NestedTooDeep.empty());
2851 KeepElseBraces = KeepElseBraces ||
2852 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2853 NestedTooDeep.back();
2855 NestedTooDeep.pop_back();
2857 if (!KeepIfBraces && !KeepElseBraces) {
2858 markOptionalBraces(IfLeftBrace);
2859 markOptionalBraces(ElseLeftBrace);
2860 } else if (IfLeftBrace) {
2861 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2862 if (IfRightBrace) {
2863 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2864 assert(!IfLeftBrace->Optional);
2865 assert(!IfRightBrace->Optional);
2866 IfLeftBrace->MatchingParen = nullptr;
2867 IfRightBrace->MatchingParen = nullptr;
2871 if (IfKind)
2872 *IfKind = Kind;
2874 return IfLeftBrace;
2877 void UnwrappedLineParser::parseTryCatch() {
2878 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2879 nextToken();
2880 bool NeedsUnwrappedLine = false;
2881 if (FormatTok->is(tok::colon)) {
2882 // We are in a function try block, what comes is an initializer list.
2883 nextToken();
2885 // In case identifiers were removed by clang-tidy, what might follow is
2886 // multiple commas in sequence - before the first identifier.
2887 while (FormatTok->is(tok::comma))
2888 nextToken();
2890 while (FormatTok->is(tok::identifier)) {
2891 nextToken();
2892 if (FormatTok->is(tok::l_paren))
2893 parseParens();
2894 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2895 FormatTok->is(tok::l_brace)) {
2896 do {
2897 nextToken();
2898 } while (FormatTok->isNot(tok::r_brace));
2899 nextToken();
2902 // In case identifiers were removed by clang-tidy, what might follow is
2903 // multiple commas in sequence - after the first identifier.
2904 while (FormatTok->is(tok::comma))
2905 nextToken();
2908 // Parse try with resource.
2909 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2910 parseParens();
2912 keepAncestorBraces();
2914 if (FormatTok->is(tok::l_brace)) {
2915 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2916 parseBlock();
2917 if (Style.BraceWrapping.BeforeCatch)
2918 addUnwrappedLine();
2919 else
2920 NeedsUnwrappedLine = true;
2921 } else if (FormatTok->isNot(tok::kw_catch)) {
2922 // The C++ standard requires a compound-statement after a try.
2923 // If there's none, we try to assume there's a structuralElement
2924 // and try to continue.
2925 addUnwrappedLine();
2926 ++Line->Level;
2927 parseStructuralElement();
2928 --Line->Level;
2930 while (true) {
2931 if (FormatTok->is(tok::at))
2932 nextToken();
2933 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2934 tok::kw___finally) ||
2935 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2936 FormatTok->is(Keywords.kw_finally)) ||
2937 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2938 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2939 break;
2941 nextToken();
2942 while (FormatTok->isNot(tok::l_brace)) {
2943 if (FormatTok->is(tok::l_paren)) {
2944 parseParens();
2945 continue;
2947 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2948 if (Style.RemoveBracesLLVM)
2949 NestedTooDeep.pop_back();
2950 return;
2952 nextToken();
2954 NeedsUnwrappedLine = false;
2955 Line->MustBeDeclaration = false;
2956 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2957 parseBlock();
2958 if (Style.BraceWrapping.BeforeCatch)
2959 addUnwrappedLine();
2960 else
2961 NeedsUnwrappedLine = true;
2964 if (Style.RemoveBracesLLVM)
2965 NestedTooDeep.pop_back();
2967 if (NeedsUnwrappedLine)
2968 addUnwrappedLine();
2971 void UnwrappedLineParser::parseNamespace() {
2972 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2973 "'namespace' expected");
2975 const FormatToken &InitialToken = *FormatTok;
2976 nextToken();
2977 if (InitialToken.is(TT_NamespaceMacro)) {
2978 parseParens();
2979 } else {
2980 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2981 tok::l_square, tok::period, tok::l_paren) ||
2982 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2983 if (FormatTok->is(tok::l_square))
2984 parseSquare();
2985 else if (FormatTok->is(tok::l_paren))
2986 parseParens();
2987 else
2988 nextToken();
2991 if (FormatTok->is(tok::l_brace)) {
2992 FormatTok->setFinalizedType(TT_NamespaceLBrace);
2994 if (ShouldBreakBeforeBrace(Style, InitialToken))
2995 addUnwrappedLine();
2997 unsigned AddLevels =
2998 Style.NamespaceIndentation == FormatStyle::NI_All ||
2999 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3000 DeclarationScopeStack.size() > 1)
3001 ? 1u
3002 : 0u;
3003 bool ManageWhitesmithsBraces =
3004 AddLevels == 0u &&
3005 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3007 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3008 // the whole block.
3009 if (ManageWhitesmithsBraces)
3010 ++Line->Level;
3012 // Munch the semicolon after a namespace. This is more common than one would
3013 // think. Putting the semicolon into its own line is very ugly.
3014 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3015 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3016 ManageWhitesmithsBraces);
3018 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3020 if (ManageWhitesmithsBraces)
3021 --Line->Level;
3023 // FIXME: Add error handling.
3026 void UnwrappedLineParser::parseNew() {
3027 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3028 nextToken();
3030 if (Style.isCSharp()) {
3031 do {
3032 // Handle constructor invocation, e.g. `new(field: value)`.
3033 if (FormatTok->is(tok::l_paren))
3034 parseParens();
3036 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3037 if (FormatTok->is(tok::l_brace))
3038 parseBracedList();
3040 if (FormatTok->isOneOf(tok::semi, tok::comma))
3041 return;
3043 nextToken();
3044 } while (!eof());
3047 if (Style.Language != FormatStyle::LK_Java)
3048 return;
3050 // In Java, we can parse everything up to the parens, which aren't optional.
3051 do {
3052 // There should not be a ;, { or } before the new's open paren.
3053 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3054 return;
3056 // Consume the parens.
3057 if (FormatTok->is(tok::l_paren)) {
3058 parseParens();
3060 // If there is a class body of an anonymous class, consume that as child.
3061 if (FormatTok->is(tok::l_brace))
3062 parseChildBlock();
3063 return;
3065 nextToken();
3066 } while (!eof());
3069 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3070 keepAncestorBraces();
3072 if (isBlockBegin(*FormatTok)) {
3073 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3074 FormatToken *LeftBrace = FormatTok;
3075 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3076 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3077 /*MunchSemi=*/true, KeepBraces);
3078 setPreviousRBraceType(TT_ControlStatementRBrace);
3079 if (!KeepBraces) {
3080 assert(!NestedTooDeep.empty());
3081 if (!NestedTooDeep.back())
3082 markOptionalBraces(LeftBrace);
3084 if (WrapRightBrace)
3085 addUnwrappedLine();
3086 } else {
3087 parseUnbracedBody();
3090 if (!KeepBraces)
3091 NestedTooDeep.pop_back();
3094 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3095 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3096 (Style.isVerilog() &&
3097 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3098 Keywords.kw_always_ff, Keywords.kw_always_latch,
3099 Keywords.kw_final, Keywords.kw_initial,
3100 Keywords.kw_foreach, Keywords.kw_forever,
3101 Keywords.kw_repeat))) &&
3102 "'for', 'while' or foreach macro expected");
3103 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3104 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3106 nextToken();
3107 // JS' for await ( ...
3108 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3109 nextToken();
3110 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3111 nextToken();
3112 if (HasParens && FormatTok->is(tok::l_paren)) {
3113 // The type is only set for Verilog basically because we were afraid to
3114 // change the existing behavior for loops. See the discussion on D121756 for
3115 // details.
3116 if (Style.isVerilog())
3117 FormatTok->setFinalizedType(TT_ConditionLParen);
3118 parseParens();
3120 // Event control.
3121 if (Style.isVerilog())
3122 parseVerilogSensitivityList();
3124 handleAttributes();
3125 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3128 void UnwrappedLineParser::parseDoWhile() {
3129 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3130 nextToken();
3132 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3134 // FIXME: Add error handling.
3135 if (FormatTok->isNot(tok::kw_while)) {
3136 addUnwrappedLine();
3137 return;
3140 FormatTok->setFinalizedType(TT_DoWhile);
3142 // If in Whitesmiths mode, the line with the while() needs to be indented
3143 // to the same level as the block.
3144 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3145 ++Line->Level;
3147 nextToken();
3148 parseStructuralElement();
3151 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3152 nextToken();
3153 unsigned OldLineLevel = Line->Level;
3154 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3155 --Line->Level;
3156 if (LeftAlignLabel)
3157 Line->Level = 0;
3159 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3160 FormatTok->is(tok::l_brace)) {
3162 CompoundStatementIndenter Indenter(this, Line->Level,
3163 Style.BraceWrapping.AfterCaseLabel,
3164 Style.BraceWrapping.IndentBraces);
3165 parseBlock();
3166 if (FormatTok->is(tok::kw_break)) {
3167 if (Style.BraceWrapping.AfterControlStatement ==
3168 FormatStyle::BWACS_Always) {
3169 addUnwrappedLine();
3170 if (!Style.IndentCaseBlocks &&
3171 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3172 ++Line->Level;
3175 parseStructuralElement();
3177 addUnwrappedLine();
3178 } else {
3179 if (FormatTok->is(tok::semi))
3180 nextToken();
3181 addUnwrappedLine();
3183 Line->Level = OldLineLevel;
3184 if (FormatTok->isNot(tok::l_brace)) {
3185 parseStructuralElement();
3186 addUnwrappedLine();
3190 void UnwrappedLineParser::parseCaseLabel() {
3191 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3193 // FIXME: fix handling of complex expressions here.
3194 do {
3195 nextToken();
3196 if (FormatTok->is(tok::colon)) {
3197 FormatTok->setFinalizedType(TT_CaseLabelColon);
3198 break;
3200 } while (!eof());
3201 parseLabel();
3204 void UnwrappedLineParser::parseSwitch() {
3205 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3206 nextToken();
3207 if (FormatTok->is(tok::l_paren))
3208 parseParens();
3210 keepAncestorBraces();
3212 if (FormatTok->is(tok::l_brace)) {
3213 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3214 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3215 parseBlock();
3216 setPreviousRBraceType(TT_ControlStatementRBrace);
3217 addUnwrappedLine();
3218 } else {
3219 addUnwrappedLine();
3220 ++Line->Level;
3221 parseStructuralElement();
3222 --Line->Level;
3225 if (Style.RemoveBracesLLVM)
3226 NestedTooDeep.pop_back();
3229 // Operators that can follow a C variable.
3230 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3231 switch (kind) {
3232 case tok::ampamp:
3233 case tok::ampequal:
3234 case tok::arrow:
3235 case tok::caret:
3236 case tok::caretequal:
3237 case tok::comma:
3238 case tok::ellipsis:
3239 case tok::equal:
3240 case tok::equalequal:
3241 case tok::exclaim:
3242 case tok::exclaimequal:
3243 case tok::greater:
3244 case tok::greaterequal:
3245 case tok::greatergreater:
3246 case tok::greatergreaterequal:
3247 case tok::l_paren:
3248 case tok::l_square:
3249 case tok::less:
3250 case tok::lessequal:
3251 case tok::lessless:
3252 case tok::lesslessequal:
3253 case tok::minus:
3254 case tok::minusequal:
3255 case tok::minusminus:
3256 case tok::percent:
3257 case tok::percentequal:
3258 case tok::period:
3259 case tok::pipe:
3260 case tok::pipeequal:
3261 case tok::pipepipe:
3262 case tok::plus:
3263 case tok::plusequal:
3264 case tok::plusplus:
3265 case tok::question:
3266 case tok::r_brace:
3267 case tok::r_paren:
3268 case tok::r_square:
3269 case tok::semi:
3270 case tok::slash:
3271 case tok::slashequal:
3272 case tok::star:
3273 case tok::starequal:
3274 return true;
3275 default:
3276 return false;
3280 void UnwrappedLineParser::parseAccessSpecifier() {
3281 FormatToken *AccessSpecifierCandidate = FormatTok;
3282 nextToken();
3283 // Understand Qt's slots.
3284 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3285 nextToken();
3286 // Otherwise, we don't know what it is, and we'd better keep the next token.
3287 if (FormatTok->is(tok::colon)) {
3288 nextToken();
3289 addUnwrappedLine();
3290 } else if (FormatTok->isNot(tok::coloncolon) &&
3291 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3292 // Not a variable name nor namespace name.
3293 addUnwrappedLine();
3294 } else if (AccessSpecifierCandidate) {
3295 // Consider the access specifier to be a C identifier.
3296 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3300 /// \brief Parses a requires, decides if it is a clause or an expression.
3301 /// \pre The current token has to be the requires keyword.
3302 /// \returns true if it parsed a clause.
3303 bool clang::format::UnwrappedLineParser::parseRequires() {
3304 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3305 auto RequiresToken = FormatTok;
3307 // We try to guess if it is a requires clause, or a requires expression. For
3308 // that we first consume the keyword and check the next token.
3309 nextToken();
3311 switch (FormatTok->Tok.getKind()) {
3312 case tok::l_brace:
3313 // This can only be an expression, never a clause.
3314 parseRequiresExpression(RequiresToken);
3315 return false;
3316 case tok::l_paren:
3317 // Clauses and expression can start with a paren, it's unclear what we have.
3318 break;
3319 default:
3320 // All other tokens can only be a clause.
3321 parseRequiresClause(RequiresToken);
3322 return true;
3325 // Looking forward we would have to decide if there are function declaration
3326 // like arguments to the requires expression:
3327 // requires (T t) {
3328 // Or there is a constraint expression for the requires clause:
3329 // requires (C<T> && ...
3331 // But first let's look behind.
3332 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3334 if (!PreviousNonComment ||
3335 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3336 // If there is no token, or an expression left brace, we are a requires
3337 // clause within a requires expression.
3338 parseRequiresClause(RequiresToken);
3339 return true;
3342 switch (PreviousNonComment->Tok.getKind()) {
3343 case tok::greater:
3344 case tok::r_paren:
3345 case tok::kw_noexcept:
3346 case tok::kw_const:
3347 // This is a requires clause.
3348 parseRequiresClause(RequiresToken);
3349 return true;
3350 case tok::amp:
3351 case tok::ampamp: {
3352 // This can be either:
3353 // if (... && requires (T t) ...)
3354 // Or
3355 // void member(...) && requires (C<T> ...
3356 // We check the one token before that for a const:
3357 // void member(...) const && requires (C<T> ...
3358 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3359 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3360 parseRequiresClause(RequiresToken);
3361 return true;
3363 break;
3365 default:
3366 if (PreviousNonComment->isTypeOrIdentifier()) {
3367 // This is a requires clause.
3368 parseRequiresClause(RequiresToken);
3369 return true;
3371 // It's an expression.
3372 parseRequiresExpression(RequiresToken);
3373 return false;
3376 // Now we look forward and try to check if the paren content is a parameter
3377 // list. The parameters can be cv-qualified and contain references or
3378 // pointers.
3379 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3380 // of stuff: typename, const, *, &, &&, ::, identifiers.
3382 unsigned StoredPosition = Tokens->getPosition();
3383 FormatToken *NextToken = Tokens->getNextToken();
3384 int Lookahead = 0;
3385 auto PeekNext = [&Lookahead, &NextToken, this] {
3386 ++Lookahead;
3387 NextToken = Tokens->getNextToken();
3390 bool FoundType = false;
3391 bool LastWasColonColon = false;
3392 int OpenAngles = 0;
3394 for (; Lookahead < 50; PeekNext()) {
3395 switch (NextToken->Tok.getKind()) {
3396 case tok::kw_volatile:
3397 case tok::kw_const:
3398 case tok::comma:
3399 if (OpenAngles == 0) {
3400 FormatTok = Tokens->setPosition(StoredPosition);
3401 parseRequiresExpression(RequiresToken);
3402 return false;
3404 break;
3405 case tok::r_paren:
3406 case tok::pipepipe:
3407 FormatTok = Tokens->setPosition(StoredPosition);
3408 parseRequiresClause(RequiresToken);
3409 return true;
3410 case tok::eof:
3411 // Break out of the loop.
3412 Lookahead = 50;
3413 break;
3414 case tok::coloncolon:
3415 LastWasColonColon = true;
3416 break;
3417 case tok::identifier:
3418 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3419 FormatTok = Tokens->setPosition(StoredPosition);
3420 parseRequiresExpression(RequiresToken);
3421 return false;
3423 FoundType = true;
3424 LastWasColonColon = false;
3425 break;
3426 case tok::less:
3427 ++OpenAngles;
3428 break;
3429 case tok::greater:
3430 --OpenAngles;
3431 break;
3432 default:
3433 if (NextToken->isSimpleTypeSpecifier()) {
3434 FormatTok = Tokens->setPosition(StoredPosition);
3435 parseRequiresExpression(RequiresToken);
3436 return false;
3438 break;
3441 // This seems to be a complicated expression, just assume it's a clause.
3442 FormatTok = Tokens->setPosition(StoredPosition);
3443 parseRequiresClause(RequiresToken);
3444 return true;
3447 /// \brief Parses a requires clause.
3448 /// \param RequiresToken The requires keyword token, which starts this clause.
3449 /// \pre We need to be on the next token after the requires keyword.
3450 /// \sa parseRequiresExpression
3452 /// Returns if it either has finished parsing the clause, or it detects, that
3453 /// the clause is incorrect.
3454 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3455 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3456 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3458 // If there is no previous token, we are within a requires expression,
3459 // otherwise we will always have the template or function declaration in front
3460 // of it.
3461 bool InRequiresExpression =
3462 !RequiresToken->Previous ||
3463 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3465 RequiresToken->setFinalizedType(InRequiresExpression
3466 ? TT_RequiresClauseInARequiresExpression
3467 : TT_RequiresClause);
3469 // NOTE: parseConstraintExpression is only ever called from this function.
3470 // It could be inlined into here.
3471 parseConstraintExpression();
3473 if (!InRequiresExpression)
3474 FormatTok->Previous->ClosesRequiresClause = true;
3477 /// \brief Parses a requires expression.
3478 /// \param RequiresToken The requires keyword token, which starts this clause.
3479 /// \pre We need to be on the next token after the requires keyword.
3480 /// \sa parseRequiresClause
3482 /// Returns if it either has finished parsing the expression, or it detects,
3483 /// that the expression is incorrect.
3484 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3485 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3486 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3488 RequiresToken->setFinalizedType(TT_RequiresExpression);
3490 if (FormatTok->is(tok::l_paren)) {
3491 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3492 parseParens();
3495 if (FormatTok->is(tok::l_brace)) {
3496 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3497 parseChildBlock();
3501 /// \brief Parses a constraint expression.
3503 /// This is the body of a requires clause. It returns, when the parsing is
3504 /// complete, or the expression is incorrect.
3505 void UnwrappedLineParser::parseConstraintExpression() {
3506 // The special handling for lambdas is needed since tryToParseLambda() eats a
3507 // token and if a requires expression is the last part of a requires clause
3508 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3509 // not set on the correct token. Thus we need to be aware if we even expect a
3510 // lambda to be possible.
3511 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3512 bool LambdaNextTimeAllowed = true;
3514 // Within lambda declarations, it is permitted to put a requires clause after
3515 // its template parameter list, which would place the requires clause right
3516 // before the parentheses of the parameters of the lambda declaration. Thus,
3517 // we track if we expect to see grouping parentheses at all.
3518 // Without this check, `requires foo<T> (T t)` in the below example would be
3519 // seen as the whole requires clause, accidentally eating the parameters of
3520 // the lambda.
3521 // [&]<typename T> requires foo<T> (T t) { ... };
3522 bool TopLevelParensAllowed = true;
3524 do {
3525 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3527 switch (FormatTok->Tok.getKind()) {
3528 case tok::kw_requires: {
3529 auto RequiresToken = FormatTok;
3530 nextToken();
3531 parseRequiresExpression(RequiresToken);
3532 break;
3535 case tok::l_paren:
3536 if (!TopLevelParensAllowed)
3537 return;
3538 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3539 TopLevelParensAllowed = false;
3540 break;
3542 case tok::l_square:
3543 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3544 return;
3545 break;
3547 case tok::kw_const:
3548 case tok::semi:
3549 case tok::kw_class:
3550 case tok::kw_struct:
3551 case tok::kw_union:
3552 return;
3554 case tok::l_brace:
3555 // Potential function body.
3556 return;
3558 case tok::ampamp:
3559 case tok::pipepipe:
3560 FormatTok->setFinalizedType(TT_BinaryOperator);
3561 nextToken();
3562 LambdaNextTimeAllowed = true;
3563 TopLevelParensAllowed = true;
3564 break;
3566 case tok::comma:
3567 case tok::comment:
3568 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3569 nextToken();
3570 break;
3572 case tok::kw_sizeof:
3573 case tok::greater:
3574 case tok::greaterequal:
3575 case tok::greatergreater:
3576 case tok::less:
3577 case tok::lessequal:
3578 case tok::lessless:
3579 case tok::equalequal:
3580 case tok::exclaim:
3581 case tok::exclaimequal:
3582 case tok::plus:
3583 case tok::minus:
3584 case tok::star:
3585 case tok::slash:
3586 LambdaNextTimeAllowed = true;
3587 TopLevelParensAllowed = true;
3588 // Just eat them.
3589 nextToken();
3590 break;
3592 case tok::numeric_constant:
3593 case tok::coloncolon:
3594 case tok::kw_true:
3595 case tok::kw_false:
3596 TopLevelParensAllowed = false;
3597 // Just eat them.
3598 nextToken();
3599 break;
3601 case tok::kw_static_cast:
3602 case tok::kw_const_cast:
3603 case tok::kw_reinterpret_cast:
3604 case tok::kw_dynamic_cast:
3605 nextToken();
3606 if (FormatTok->isNot(tok::less))
3607 return;
3609 nextToken();
3610 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3611 /*ClosingBraceKind=*/tok::greater);
3612 break;
3614 default:
3615 if (!FormatTok->Tok.getIdentifierInfo()) {
3616 // Identifiers are part of the default case, we check for more then
3617 // tok::identifier to handle builtin type traits.
3618 return;
3621 // We need to differentiate identifiers for a template deduction guide,
3622 // variables, or function return types (the constraint expression has
3623 // ended before that), and basically all other cases. But it's easier to
3624 // check the other way around.
3625 assert(FormatTok->Previous);
3626 switch (FormatTok->Previous->Tok.getKind()) {
3627 case tok::coloncolon: // Nested identifier.
3628 case tok::ampamp: // Start of a function or variable for the
3629 case tok::pipepipe: // constraint expression. (binary)
3630 case tok::exclaim: // The same as above, but unary.
3631 case tok::kw_requires: // Initial identifier of a requires clause.
3632 case tok::equal: // Initial identifier of a concept declaration.
3633 break;
3634 default:
3635 return;
3638 // Read identifier with optional template declaration.
3639 nextToken();
3640 if (FormatTok->is(tok::less)) {
3641 nextToken();
3642 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3643 /*ClosingBraceKind=*/tok::greater);
3645 TopLevelParensAllowed = false;
3646 break;
3648 } while (!eof());
3651 bool UnwrappedLineParser::parseEnum() {
3652 const FormatToken &InitialToken = *FormatTok;
3654 // Won't be 'enum' for NS_ENUMs.
3655 if (FormatTok->is(tok::kw_enum))
3656 nextToken();
3658 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3659 // declarations. An "enum" keyword followed by a colon would be a syntax
3660 // error and thus assume it is just an identifier.
3661 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3662 return false;
3664 // In protobuf, "enum" can be used as a field name.
3665 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3666 return false;
3668 // Eat up enum class ...
3669 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3670 nextToken();
3672 while (FormatTok->Tok.getIdentifierInfo() ||
3673 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3674 tok::greater, tok::comma, tok::question,
3675 tok::l_square, tok::r_square)) {
3676 if (Style.isVerilog()) {
3677 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3678 nextToken();
3679 // In Verilog the base type can have dimensions.
3680 while (FormatTok->is(tok::l_square))
3681 parseSquare();
3682 } else {
3683 nextToken();
3685 // We can have macros or attributes in between 'enum' and the enum name.
3686 if (FormatTok->is(tok::l_paren))
3687 parseParens();
3688 assert(FormatTok->isNot(TT_AttributeSquare));
3689 if (FormatTok->is(tok::identifier)) {
3690 nextToken();
3691 // If there are two identifiers in a row, this is likely an elaborate
3692 // return type. In Java, this can be "implements", etc.
3693 if (Style.isCpp() && FormatTok->is(tok::identifier))
3694 return false;
3698 // Just a declaration or something is wrong.
3699 if (FormatTok->isNot(tok::l_brace))
3700 return true;
3701 FormatTok->setFinalizedType(TT_EnumLBrace);
3702 FormatTok->setBlockKind(BK_Block);
3704 if (Style.Language == FormatStyle::LK_Java) {
3705 // Java enums are different.
3706 parseJavaEnumBody();
3707 return true;
3709 if (Style.Language == FormatStyle::LK_Proto) {
3710 parseBlock(/*MustBeDeclaration=*/true);
3711 return true;
3714 if (!Style.AllowShortEnumsOnASingleLine &&
3715 ShouldBreakBeforeBrace(Style, InitialToken)) {
3716 addUnwrappedLine();
3718 // Parse enum body.
3719 nextToken();
3720 if (!Style.AllowShortEnumsOnASingleLine) {
3721 addUnwrappedLine();
3722 Line->Level += 1;
3724 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3725 /*IsEnum=*/true);
3726 if (!Style.AllowShortEnumsOnASingleLine)
3727 Line->Level -= 1;
3728 if (HasError) {
3729 if (FormatTok->is(tok::semi))
3730 nextToken();
3731 addUnwrappedLine();
3733 setPreviousRBraceType(TT_EnumRBrace);
3734 return true;
3736 // There is no addUnwrappedLine() here so that we fall through to parsing a
3737 // structural element afterwards. Thus, in "enum A {} n, m;",
3738 // "} n, m;" will end up in one unwrapped line.
3741 bool UnwrappedLineParser::parseStructLike() {
3742 // parseRecord falls through and does not yet add an unwrapped line as a
3743 // record declaration or definition can start a structural element.
3744 parseRecord();
3745 // This does not apply to Java, JavaScript and C#.
3746 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3747 Style.isCSharp()) {
3748 if (FormatTok->is(tok::semi))
3749 nextToken();
3750 addUnwrappedLine();
3751 return true;
3753 return false;
3756 namespace {
3757 // A class used to set and restore the Token position when peeking
3758 // ahead in the token source.
3759 class ScopedTokenPosition {
3760 unsigned StoredPosition;
3761 FormatTokenSource *Tokens;
3763 public:
3764 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3765 assert(Tokens && "Tokens expected to not be null");
3766 StoredPosition = Tokens->getPosition();
3769 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3771 } // namespace
3773 // Look to see if we have [[ by looking ahead, if
3774 // its not then rewind to the original position.
3775 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3776 ScopedTokenPosition AutoPosition(Tokens);
3777 FormatToken *Tok = Tokens->getNextToken();
3778 // We already read the first [ check for the second.
3779 if (Tok->isNot(tok::l_square))
3780 return false;
3781 // Double check that the attribute is just something
3782 // fairly simple.
3783 while (Tok->isNot(tok::eof)) {
3784 if (Tok->is(tok::r_square))
3785 break;
3786 Tok = Tokens->getNextToken();
3788 if (Tok->is(tok::eof))
3789 return false;
3790 Tok = Tokens->getNextToken();
3791 if (Tok->isNot(tok::r_square))
3792 return false;
3793 Tok = Tokens->getNextToken();
3794 if (Tok->is(tok::semi))
3795 return false;
3796 return true;
3799 void UnwrappedLineParser::parseJavaEnumBody() {
3800 assert(FormatTok->is(tok::l_brace));
3801 const FormatToken *OpeningBrace = FormatTok;
3803 // Determine whether the enum is simple, i.e. does not have a semicolon or
3804 // constants with class bodies. Simple enums can be formatted like braced
3805 // lists, contracted to a single line, etc.
3806 unsigned StoredPosition = Tokens->getPosition();
3807 bool IsSimple = true;
3808 FormatToken *Tok = Tokens->getNextToken();
3809 while (Tok->isNot(tok::eof)) {
3810 if (Tok->is(tok::r_brace))
3811 break;
3812 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3813 IsSimple = false;
3814 break;
3816 // FIXME: This will also mark enums with braces in the arguments to enum
3817 // constants as "not simple". This is probably fine in practice, though.
3818 Tok = Tokens->getNextToken();
3820 FormatTok = Tokens->setPosition(StoredPosition);
3822 if (IsSimple) {
3823 nextToken();
3824 parseBracedList();
3825 addUnwrappedLine();
3826 return;
3829 // Parse the body of a more complex enum.
3830 // First add a line for everything up to the "{".
3831 nextToken();
3832 addUnwrappedLine();
3833 ++Line->Level;
3835 // Parse the enum constants.
3836 while (!eof()) {
3837 if (FormatTok->is(tok::l_brace)) {
3838 // Parse the constant's class body.
3839 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3840 /*MunchSemi=*/false);
3841 } else if (FormatTok->is(tok::l_paren)) {
3842 parseParens();
3843 } else if (FormatTok->is(tok::comma)) {
3844 nextToken();
3845 addUnwrappedLine();
3846 } else if (FormatTok->is(tok::semi)) {
3847 nextToken();
3848 addUnwrappedLine();
3849 break;
3850 } else if (FormatTok->is(tok::r_brace)) {
3851 addUnwrappedLine();
3852 break;
3853 } else {
3854 nextToken();
3858 // Parse the class body after the enum's ";" if any.
3859 parseLevel(OpeningBrace);
3860 nextToken();
3861 --Line->Level;
3862 addUnwrappedLine();
3865 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3866 const FormatToken &InitialToken = *FormatTok;
3867 nextToken();
3869 // The actual identifier can be a nested name specifier, and in macros
3870 // it is often token-pasted.
3871 // An [[attribute]] can be before the identifier.
3872 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3873 tok::kw_alignas, tok::l_square) ||
3874 FormatTok->isAttribute() ||
3875 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3876 FormatTok->isOneOf(tok::period, tok::comma))) {
3877 if (Style.isJavaScript() &&
3878 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3879 // JavaScript/TypeScript supports inline object types in
3880 // extends/implements positions:
3881 // class Foo implements {bar: number} { }
3882 nextToken();
3883 if (FormatTok->is(tok::l_brace)) {
3884 tryToParseBracedList();
3885 continue;
3888 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3889 continue;
3890 bool IsNonMacroIdentifier =
3891 FormatTok->is(tok::identifier) &&
3892 FormatTok->TokenText != FormatTok->TokenText.upper();
3893 nextToken();
3894 // We can have macros in between 'class' and the class name.
3895 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3896 parseParens();
3899 // Note that parsing away template declarations here leads to incorrectly
3900 // accepting function declarations as record declarations.
3901 // In general, we cannot solve this problem. Consider:
3902 // class A<int> B() {}
3903 // which can be a function definition or a class definition when B() is a
3904 // macro. If we find enough real-world cases where this is a problem, we
3905 // can parse for the 'template' keyword in the beginning of the statement,
3906 // and thus rule out the record production in case there is no template
3907 // (this would still leave us with an ambiguity between template function
3908 // and class declarations).
3909 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3910 do {
3911 if (FormatTok->is(tok::l_brace)) {
3912 calculateBraceTypes(/*ExpectClassBody=*/true);
3913 if (!tryToParseBracedList())
3914 break;
3916 if (FormatTok->is(tok::l_square)) {
3917 FormatToken *Previous = FormatTok->Previous;
3918 if (!Previous ||
3919 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3920 // Don't try parsing a lambda if we had a closing parenthesis before,
3921 // it was probably a pointer to an array: int (*)[].
3922 if (!tryToParseLambda())
3923 continue;
3924 } else {
3925 parseSquare();
3926 continue;
3929 if (FormatTok->is(tok::semi))
3930 return;
3931 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3932 addUnwrappedLine();
3933 nextToken();
3934 parseCSharpGenericTypeConstraint();
3935 break;
3937 nextToken();
3938 } while (!eof());
3941 auto GetBraceTypes =
3942 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3943 switch (RecordTok.Tok.getKind()) {
3944 case tok::kw_class:
3945 return {TT_ClassLBrace, TT_ClassRBrace};
3946 case tok::kw_struct:
3947 return {TT_StructLBrace, TT_StructRBrace};
3948 case tok::kw_union:
3949 return {TT_UnionLBrace, TT_UnionRBrace};
3950 default:
3951 // Useful for e.g. interface.
3952 return {TT_RecordLBrace, TT_RecordRBrace};
3955 if (FormatTok->is(tok::l_brace)) {
3956 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3957 FormatTok->setFinalizedType(OpenBraceType);
3958 if (ParseAsExpr) {
3959 parseChildBlock();
3960 } else {
3961 if (ShouldBreakBeforeBrace(Style, InitialToken))
3962 addUnwrappedLine();
3964 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3965 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3967 setPreviousRBraceType(ClosingBraceType);
3969 // There is no addUnwrappedLine() here so that we fall through to parsing a
3970 // structural element afterwards. Thus, in "class A {} n, m;",
3971 // "} n, m;" will end up in one unwrapped line.
3974 void UnwrappedLineParser::parseObjCMethod() {
3975 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3976 "'(' or identifier expected.");
3977 do {
3978 if (FormatTok->is(tok::semi)) {
3979 nextToken();
3980 addUnwrappedLine();
3981 return;
3982 } else if (FormatTok->is(tok::l_brace)) {
3983 if (Style.BraceWrapping.AfterFunction)
3984 addUnwrappedLine();
3985 parseBlock();
3986 addUnwrappedLine();
3987 return;
3988 } else {
3989 nextToken();
3991 } while (!eof());
3994 void UnwrappedLineParser::parseObjCProtocolList() {
3995 assert(FormatTok->is(tok::less) && "'<' expected.");
3996 do {
3997 nextToken();
3998 // Early exit in case someone forgot a close angle.
3999 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4000 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4001 return;
4003 } while (!eof() && FormatTok->isNot(tok::greater));
4004 nextToken(); // Skip '>'.
4007 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4008 do {
4009 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4010 nextToken();
4011 addUnwrappedLine();
4012 break;
4014 if (FormatTok->is(tok::l_brace)) {
4015 parseBlock();
4016 // In ObjC interfaces, nothing should be following the "}".
4017 addUnwrappedLine();
4018 } else if (FormatTok->is(tok::r_brace)) {
4019 // Ignore stray "}". parseStructuralElement doesn't consume them.
4020 nextToken();
4021 addUnwrappedLine();
4022 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4023 nextToken();
4024 parseObjCMethod();
4025 } else {
4026 parseStructuralElement();
4028 } while (!eof());
4031 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4032 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4033 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4034 nextToken();
4035 nextToken(); // interface name
4037 // @interface can be followed by a lightweight generic
4038 // specialization list, then either a base class or a category.
4039 if (FormatTok->is(tok::less))
4040 parseObjCLightweightGenerics();
4041 if (FormatTok->is(tok::colon)) {
4042 nextToken();
4043 nextToken(); // base class name
4044 // The base class can also have lightweight generics applied to it.
4045 if (FormatTok->is(tok::less))
4046 parseObjCLightweightGenerics();
4047 } else if (FormatTok->is(tok::l_paren)) {
4048 // Skip category, if present.
4049 parseParens();
4052 if (FormatTok->is(tok::less))
4053 parseObjCProtocolList();
4055 if (FormatTok->is(tok::l_brace)) {
4056 if (Style.BraceWrapping.AfterObjCDeclaration)
4057 addUnwrappedLine();
4058 parseBlock(/*MustBeDeclaration=*/true);
4061 // With instance variables, this puts '}' on its own line. Without instance
4062 // variables, this ends the @interface line.
4063 addUnwrappedLine();
4065 parseObjCUntilAtEnd();
4068 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4069 assert(FormatTok->is(tok::less));
4070 // Unlike protocol lists, generic parameterizations support
4071 // nested angles:
4073 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4074 // NSObject <NSCopying, NSSecureCoding>
4076 // so we need to count how many open angles we have left.
4077 unsigned NumOpenAngles = 1;
4078 do {
4079 nextToken();
4080 // Early exit in case someone forgot a close angle.
4081 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4082 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4083 break;
4085 if (FormatTok->is(tok::less)) {
4086 ++NumOpenAngles;
4087 } else if (FormatTok->is(tok::greater)) {
4088 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4089 --NumOpenAngles;
4091 } while (!eof() && NumOpenAngles != 0);
4092 nextToken(); // Skip '>'.
4095 // Returns true for the declaration/definition form of @protocol,
4096 // false for the expression form.
4097 bool UnwrappedLineParser::parseObjCProtocol() {
4098 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4099 nextToken();
4101 if (FormatTok->is(tok::l_paren)) {
4102 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4103 return false;
4106 // The definition/declaration form,
4107 // @protocol Foo
4108 // - (int)someMethod;
4109 // @end
4111 nextToken(); // protocol name
4113 if (FormatTok->is(tok::less))
4114 parseObjCProtocolList();
4116 // Check for protocol declaration.
4117 if (FormatTok->is(tok::semi)) {
4118 nextToken();
4119 addUnwrappedLine();
4120 return true;
4123 addUnwrappedLine();
4124 parseObjCUntilAtEnd();
4125 return true;
4128 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4129 bool IsImport = FormatTok->is(Keywords.kw_import);
4130 assert(IsImport || FormatTok->is(tok::kw_export));
4131 nextToken();
4133 // Consume the "default" in "export default class/function".
4134 if (FormatTok->is(tok::kw_default))
4135 nextToken();
4137 // Consume "async function", "function" and "default function", so that these
4138 // get parsed as free-standing JS functions, i.e. do not require a trailing
4139 // semicolon.
4140 if (FormatTok->is(Keywords.kw_async))
4141 nextToken();
4142 if (FormatTok->is(Keywords.kw_function)) {
4143 nextToken();
4144 return;
4147 // For imports, `export *`, `export {...}`, consume the rest of the line up
4148 // to the terminating `;`. For everything else, just return and continue
4149 // parsing the structural element, i.e. the declaration or expression for
4150 // `export default`.
4151 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4152 !FormatTok->isStringLiteral() &&
4153 !(FormatTok->is(Keywords.kw_type) &&
4154 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4155 return;
4158 while (!eof()) {
4159 if (FormatTok->is(tok::semi))
4160 return;
4161 if (Line->Tokens.empty()) {
4162 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4163 // import statement should terminate.
4164 return;
4166 if (FormatTok->is(tok::l_brace)) {
4167 FormatTok->setBlockKind(BK_Block);
4168 nextToken();
4169 parseBracedList();
4170 } else {
4171 nextToken();
4176 void UnwrappedLineParser::parseStatementMacro() {
4177 nextToken();
4178 if (FormatTok->is(tok::l_paren))
4179 parseParens();
4180 if (FormatTok->is(tok::semi))
4181 nextToken();
4182 addUnwrappedLine();
4185 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4186 // consume things like a::`b.c[d:e] or a::*
4187 while (true) {
4188 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4189 tok::coloncolon, tok::hash) ||
4190 Keywords.isVerilogIdentifier(*FormatTok)) {
4191 nextToken();
4192 } else if (FormatTok->is(tok::l_square)) {
4193 parseSquare();
4194 } else {
4195 break;
4200 void UnwrappedLineParser::parseVerilogSensitivityList() {
4201 if (FormatTok->isNot(tok::at))
4202 return;
4203 nextToken();
4204 // A block event expression has 2 at signs.
4205 if (FormatTok->is(tok::at))
4206 nextToken();
4207 switch (FormatTok->Tok.getKind()) {
4208 case tok::star:
4209 nextToken();
4210 break;
4211 case tok::l_paren:
4212 parseParens();
4213 break;
4214 default:
4215 parseVerilogHierarchyIdentifier();
4216 break;
4220 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4221 unsigned AddLevels = 0;
4223 if (FormatTok->is(Keywords.kw_clocking)) {
4224 nextToken();
4225 if (Keywords.isVerilogIdentifier(*FormatTok))
4226 nextToken();
4227 parseVerilogSensitivityList();
4228 if (FormatTok->is(tok::semi))
4229 nextToken();
4230 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4231 Keywords.kw_casez, Keywords.kw_randcase,
4232 Keywords.kw_randsequence)) {
4233 if (Style.IndentCaseLabels)
4234 AddLevels++;
4235 nextToken();
4236 if (FormatTok->is(tok::l_paren)) {
4237 FormatTok->setFinalizedType(TT_ConditionLParen);
4238 parseParens();
4240 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4241 nextToken();
4242 // The case header has no semicolon.
4243 } else {
4244 // "module" etc.
4245 nextToken();
4246 // all the words like the name of the module and specifiers like
4247 // "automatic" and the width of function return type
4248 while (true) {
4249 if (FormatTok->is(tok::l_square)) {
4250 auto Prev = FormatTok->getPreviousNonComment();
4251 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4252 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4253 parseSquare();
4254 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4255 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4256 nextToken();
4257 } else {
4258 break;
4262 auto NewLine = [this]() {
4263 addUnwrappedLine();
4264 Line->IsContinuation = true;
4267 // package imports
4268 while (FormatTok->is(Keywords.kw_import)) {
4269 NewLine();
4270 nextToken();
4271 parseVerilogHierarchyIdentifier();
4272 if (FormatTok->is(tok::semi))
4273 nextToken();
4276 // parameters and ports
4277 if (FormatTok->is(Keywords.kw_verilogHash)) {
4278 NewLine();
4279 nextToken();
4280 if (FormatTok->is(tok::l_paren)) {
4281 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4282 parseParens();
4285 if (FormatTok->is(tok::l_paren)) {
4286 NewLine();
4287 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4288 parseParens();
4291 // extends and implements
4292 if (FormatTok->is(Keywords.kw_extends)) {
4293 NewLine();
4294 nextToken();
4295 parseVerilogHierarchyIdentifier();
4296 if (FormatTok->is(tok::l_paren))
4297 parseParens();
4299 if (FormatTok->is(Keywords.kw_implements)) {
4300 NewLine();
4301 do {
4302 nextToken();
4303 parseVerilogHierarchyIdentifier();
4304 } while (FormatTok->is(tok::comma));
4307 // Coverage event for cover groups.
4308 if (FormatTok->is(tok::at)) {
4309 NewLine();
4310 parseVerilogSensitivityList();
4313 if (FormatTok->is(tok::semi))
4314 nextToken(/*LevelDifference=*/1);
4315 addUnwrappedLine();
4318 return AddLevels;
4321 void UnwrappedLineParser::parseVerilogTable() {
4322 assert(FormatTok->is(Keywords.kw_table));
4323 nextToken(/*LevelDifference=*/1);
4324 addUnwrappedLine();
4326 auto InitialLevel = Line->Level++;
4327 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4328 FormatToken *Tok = FormatTok;
4329 nextToken();
4330 if (Tok->is(tok::semi))
4331 addUnwrappedLine();
4332 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4333 Tok->setFinalizedType(TT_VerilogTableItem);
4335 Line->Level = InitialLevel;
4336 nextToken(/*LevelDifference=*/-1);
4337 addUnwrappedLine();
4340 void UnwrappedLineParser::parseVerilogCaseLabel() {
4341 // The label will get unindented in AnnotatingParser. If there are no leading
4342 // spaces, indent the rest here so that things inside the block will be
4343 // indented relative to things outside. We don't use parseLabel because we
4344 // don't know whether this colon is a label or a ternary expression at this
4345 // point.
4346 auto OrigLevel = Line->Level;
4347 auto FirstLine = CurrentLines->size();
4348 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4349 ++Line->Level;
4350 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4351 --Line->Level;
4352 parseStructuralElement();
4353 // Restore the indentation in both the new line and the line that has the
4354 // label.
4355 if (CurrentLines->size() > FirstLine)
4356 (*CurrentLines)[FirstLine].Level = OrigLevel;
4357 Line->Level = OrigLevel;
4360 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4361 for (const auto &N : Line.Tokens) {
4362 if (N.Tok->MacroCtx)
4363 return true;
4364 for (const UnwrappedLine &Child : N.Children)
4365 if (containsExpansion(Child))
4366 return true;
4368 return false;
4371 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4372 if (Line->Tokens.empty())
4373 return;
4374 LLVM_DEBUG({
4375 if (!parsingPPDirective()) {
4376 llvm::dbgs() << "Adding unwrapped line:\n";
4377 printDebugInfo(*Line);
4381 // If this line closes a block when in Whitesmiths mode, remember that
4382 // information so that the level can be decreased after the line is added.
4383 // This has to happen after the addition of the line since the line itself
4384 // needs to be indented.
4385 bool ClosesWhitesmithsBlock =
4386 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4387 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4389 // If the current line was expanded from a macro call, we use it to
4390 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4391 // line and the unexpanded token stream.
4392 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4393 if (!Reconstruct)
4394 Reconstruct.emplace(Line->Level, Unexpanded);
4395 Reconstruct->addLine(*Line);
4397 // While the reconstructed unexpanded lines are stored in the normal
4398 // flow of lines, the expanded lines are stored on the side to be analyzed
4399 // in an extra step.
4400 CurrentExpandedLines.push_back(std::move(*Line));
4402 if (Reconstruct->finished()) {
4403 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4404 assert(!Reconstructed.Tokens.empty() &&
4405 "Reconstructed must at least contain the macro identifier.");
4406 assert(!parsingPPDirective());
4407 LLVM_DEBUG({
4408 llvm::dbgs() << "Adding unexpanded line:\n";
4409 printDebugInfo(Reconstructed);
4411 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4412 Lines.push_back(std::move(Reconstructed));
4413 CurrentExpandedLines.clear();
4414 Reconstruct.reset();
4416 } else {
4417 // At the top level we only get here when no unexpansion is going on, or
4418 // when conditional formatting led to unfinished macro reconstructions.
4419 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4420 CurrentLines->push_back(std::move(*Line));
4422 Line->Tokens.clear();
4423 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4424 Line->FirstStartColumn = 0;
4425 Line->IsContinuation = false;
4426 Line->SeenDecltypeAuto = false;
4428 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4429 --Line->Level;
4430 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4431 CurrentLines->append(
4432 std::make_move_iterator(PreprocessorDirectives.begin()),
4433 std::make_move_iterator(PreprocessorDirectives.end()));
4434 PreprocessorDirectives.clear();
4436 // Disconnect the current token from the last token on the previous line.
4437 FormatTok->Previous = nullptr;
4440 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4442 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4443 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4444 FormatTok.NewlinesBefore > 0;
4447 // Checks if \p FormatTok is a line comment that continues the line comment
4448 // section on \p Line.
4449 static bool
4450 continuesLineCommentSection(const FormatToken &FormatTok,
4451 const UnwrappedLine &Line,
4452 const llvm::Regex &CommentPragmasRegex) {
4453 if (Line.Tokens.empty())
4454 return false;
4456 StringRef IndentContent = FormatTok.TokenText;
4457 if (FormatTok.TokenText.startswith("//") ||
4458 FormatTok.TokenText.startswith("/*")) {
4459 IndentContent = FormatTok.TokenText.substr(2);
4461 if (CommentPragmasRegex.match(IndentContent))
4462 return false;
4464 // If Line starts with a line comment, then FormatTok continues the comment
4465 // section if its original column is greater or equal to the original start
4466 // column of the line.
4468 // Define the min column token of a line as follows: if a line ends in '{' or
4469 // contains a '{' followed by a line comment, then the min column token is
4470 // that '{'. Otherwise, the min column token of the line is the first token of
4471 // the line.
4473 // If Line starts with a token other than a line comment, then FormatTok
4474 // continues the comment section if its original column is greater than the
4475 // original start column of the min column token of the line.
4477 // For example, the second line comment continues the first in these cases:
4479 // // first line
4480 // // second line
4482 // and:
4484 // // first line
4485 // // second line
4487 // and:
4489 // int i; // first line
4490 // // second line
4492 // and:
4494 // do { // first line
4495 // // second line
4496 // int i;
4497 // } while (true);
4499 // and:
4501 // enum {
4502 // a, // first line
4503 // // second line
4504 // b
4505 // };
4507 // The second line comment doesn't continue the first in these cases:
4509 // // first line
4510 // // second line
4512 // and:
4514 // int i; // first line
4515 // // second line
4517 // and:
4519 // do { // first line
4520 // // second line
4521 // int i;
4522 // } while (true);
4524 // and:
4526 // enum {
4527 // a, // first line
4528 // // second line
4529 // };
4530 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4532 // Scan for '{//'. If found, use the column of '{' as a min column for line
4533 // comment section continuation.
4534 const FormatToken *PreviousToken = nullptr;
4535 for (const UnwrappedLineNode &Node : Line.Tokens) {
4536 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4537 isLineComment(*Node.Tok)) {
4538 MinColumnToken = PreviousToken;
4539 break;
4541 PreviousToken = Node.Tok;
4543 // Grab the last newline preceding a token in this unwrapped line.
4544 if (Node.Tok->NewlinesBefore > 0)
4545 MinColumnToken = Node.Tok;
4547 if (PreviousToken && PreviousToken->is(tok::l_brace))
4548 MinColumnToken = PreviousToken;
4550 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4551 MinColumnToken);
4554 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4555 bool JustComments = Line->Tokens.empty();
4556 for (FormatToken *Tok : CommentsBeforeNextToken) {
4557 // Line comments that belong to the same line comment section are put on the
4558 // same line since later we might want to reflow content between them.
4559 // Additional fine-grained breaking of line comment sections is controlled
4560 // by the class BreakableLineCommentSection in case it is desirable to keep
4561 // several line comment sections in the same unwrapped line.
4563 // FIXME: Consider putting separate line comment sections as children to the
4564 // unwrapped line instead.
4565 Tok->ContinuesLineCommentSection =
4566 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4567 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4568 addUnwrappedLine();
4569 pushToken(Tok);
4571 if (NewlineBeforeNext && JustComments)
4572 addUnwrappedLine();
4573 CommentsBeforeNextToken.clear();
4576 void UnwrappedLineParser::nextToken(int LevelDifference) {
4577 if (eof())
4578 return;
4579 flushComments(isOnNewLine(*FormatTok));
4580 pushToken(FormatTok);
4581 FormatToken *Previous = FormatTok;
4582 if (!Style.isJavaScript())
4583 readToken(LevelDifference);
4584 else
4585 readTokenWithJavaScriptASI();
4586 FormatTok->Previous = Previous;
4587 if (Style.isVerilog()) {
4588 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4589 // keywords like `begin`, we can't treat them the same as left braces
4590 // because some contexts require one of them. For example structs use
4591 // braces and if blocks use keywords, and a left brace can occur in an if
4592 // statement, but it is not a block. For keywords like `end`, we simply
4593 // treat them the same as right braces.
4594 if (Keywords.isVerilogEnd(*FormatTok))
4595 FormatTok->Tok.setKind(tok::r_brace);
4599 void UnwrappedLineParser::distributeComments(
4600 const SmallVectorImpl<FormatToken *> &Comments,
4601 const FormatToken *NextTok) {
4602 // Whether or not a line comment token continues a line is controlled by
4603 // the method continuesLineCommentSection, with the following caveat:
4605 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4606 // that each comment line from the trail is aligned with the next token, if
4607 // the next token exists. If a trail exists, the beginning of the maximal
4608 // trail is marked as a start of a new comment section.
4610 // For example in this code:
4612 // int a; // line about a
4613 // // line 1 about b
4614 // // line 2 about b
4615 // int b;
4617 // the two lines about b form a maximal trail, so there are two sections, the
4618 // first one consisting of the single comment "// line about a" and the
4619 // second one consisting of the next two comments.
4620 if (Comments.empty())
4621 return;
4622 bool ShouldPushCommentsInCurrentLine = true;
4623 bool HasTrailAlignedWithNextToken = false;
4624 unsigned StartOfTrailAlignedWithNextToken = 0;
4625 if (NextTok) {
4626 // We are skipping the first element intentionally.
4627 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4628 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4629 HasTrailAlignedWithNextToken = true;
4630 StartOfTrailAlignedWithNextToken = i;
4634 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4635 FormatToken *FormatTok = Comments[i];
4636 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4637 FormatTok->ContinuesLineCommentSection = false;
4638 } else {
4639 FormatTok->ContinuesLineCommentSection =
4640 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4642 if (!FormatTok->ContinuesLineCommentSection &&
4643 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4644 ShouldPushCommentsInCurrentLine = false;
4646 if (ShouldPushCommentsInCurrentLine)
4647 pushToken(FormatTok);
4648 else
4649 CommentsBeforeNextToken.push_back(FormatTok);
4653 void UnwrappedLineParser::readToken(int LevelDifference) {
4654 SmallVector<FormatToken *, 1> Comments;
4655 bool PreviousWasComment = false;
4656 bool FirstNonCommentOnLine = false;
4657 do {
4658 FormatTok = Tokens->getNextToken();
4659 assert(FormatTok);
4660 while (FormatTok->getType() == TT_ConflictStart ||
4661 FormatTok->getType() == TT_ConflictEnd ||
4662 FormatTok->getType() == TT_ConflictAlternative) {
4663 if (FormatTok->getType() == TT_ConflictStart)
4664 conditionalCompilationStart(/*Unreachable=*/false);
4665 else if (FormatTok->getType() == TT_ConflictAlternative)
4666 conditionalCompilationAlternative();
4667 else if (FormatTok->getType() == TT_ConflictEnd)
4668 conditionalCompilationEnd();
4669 FormatTok = Tokens->getNextToken();
4670 FormatTok->MustBreakBefore = true;
4673 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4674 const FormatToken &Tok,
4675 bool PreviousWasComment) {
4676 auto IsFirstOnLine = [](const FormatToken &Tok) {
4677 return Tok.HasUnescapedNewline || Tok.IsFirst;
4680 // Consider preprocessor directives preceded by block comments as first
4681 // on line.
4682 if (PreviousWasComment)
4683 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4684 return IsFirstOnLine(Tok);
4687 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4688 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4689 PreviousWasComment = FormatTok->is(tok::comment);
4691 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4692 (!Style.isVerilog() ||
4693 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4694 FirstNonCommentOnLine) {
4695 distributeComments(Comments, FormatTok);
4696 Comments.clear();
4697 // If there is an unfinished unwrapped line, we flush the preprocessor
4698 // directives only after that unwrapped line was finished later.
4699 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4700 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4701 assert((LevelDifference >= 0 ||
4702 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4703 "LevelDifference makes Line->Level negative");
4704 Line->Level += LevelDifference;
4705 // Comments stored before the preprocessor directive need to be output
4706 // before the preprocessor directive, at the same level as the
4707 // preprocessor directive, as we consider them to apply to the directive.
4708 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4709 PPBranchLevel > 0) {
4710 Line->Level += PPBranchLevel;
4712 flushComments(isOnNewLine(*FormatTok));
4713 parsePPDirective();
4714 PreviousWasComment = FormatTok->is(tok::comment);
4715 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4716 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4719 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4720 !Line->InPPDirective) {
4721 continue;
4724 if (FormatTok->is(tok::identifier) &&
4725 Macros.defined(FormatTok->TokenText) &&
4726 // FIXME: Allow expanding macros in preprocessor directives.
4727 !Line->InPPDirective) {
4728 FormatToken *ID = FormatTok;
4729 unsigned Position = Tokens->getPosition();
4731 // To correctly parse the code, we need to replace the tokens of the macro
4732 // call with its expansion.
4733 auto PreCall = std::move(Line);
4734 Line.reset(new UnwrappedLine);
4735 bool OldInExpansion = InExpansion;
4736 InExpansion = true;
4737 // We parse the macro call into a new line.
4738 auto Args = parseMacroCall();
4739 InExpansion = OldInExpansion;
4740 assert(Line->Tokens.front().Tok == ID);
4741 // And remember the unexpanded macro call tokens.
4742 auto UnexpandedLine = std::move(Line);
4743 // Reset to the old line.
4744 Line = std::move(PreCall);
4746 LLVM_DEBUG({
4747 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4748 if (Args) {
4749 llvm::dbgs() << "(";
4750 for (const auto &Arg : Args.value())
4751 for (const auto &T : Arg)
4752 llvm::dbgs() << T->TokenText << " ";
4753 llvm::dbgs() << ")";
4755 llvm::dbgs() << "\n";
4757 if (Macros.objectLike(ID->TokenText) && Args &&
4758 !Macros.hasArity(ID->TokenText, Args->size())) {
4759 // The macro is either
4760 // - object-like, but we got argumnets, or
4761 // - overloaded to be both object-like and function-like, but none of
4762 // the function-like arities match the number of arguments.
4763 // Thus, expand as object-like macro.
4764 LLVM_DEBUG(llvm::dbgs()
4765 << "Macro \"" << ID->TokenText
4766 << "\" not overloaded for arity " << Args->size()
4767 << "or not function-like, using object-like overload.");
4768 Args.reset();
4769 UnexpandedLine->Tokens.resize(1);
4770 Tokens->setPosition(Position);
4771 nextToken();
4772 assert(!Args && Macros.objectLike(ID->TokenText));
4774 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4775 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4776 // Next, we insert the expanded tokens in the token stream at the
4777 // current position, and continue parsing.
4778 Unexpanded[ID] = std::move(UnexpandedLine);
4779 SmallVector<FormatToken *, 8> Expansion =
4780 Macros.expand(ID, std::move(Args));
4781 if (!Expansion.empty())
4782 FormatTok = Tokens->insertTokens(Expansion);
4784 LLVM_DEBUG({
4785 llvm::dbgs() << "Expanded: ";
4786 for (const auto &T : Expansion)
4787 llvm::dbgs() << T->TokenText << " ";
4788 llvm::dbgs() << "\n";
4790 } else {
4791 LLVM_DEBUG({
4792 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4793 << "\", because it was used ";
4794 if (Args)
4795 llvm::dbgs() << "with " << Args->size();
4796 else
4797 llvm::dbgs() << "without";
4798 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4800 Tokens->setPosition(Position);
4801 FormatTok = ID;
4805 if (FormatTok->isNot(tok::comment)) {
4806 distributeComments(Comments, FormatTok);
4807 Comments.clear();
4808 return;
4811 Comments.push_back(FormatTok);
4812 } while (!eof());
4814 distributeComments(Comments, nullptr);
4815 Comments.clear();
4818 namespace {
4819 template <typename Iterator>
4820 void pushTokens(Iterator Begin, Iterator End,
4821 llvm::SmallVectorImpl<FormatToken *> &Into) {
4822 for (auto I = Begin; I != End; ++I) {
4823 Into.push_back(I->Tok);
4824 for (const auto &Child : I->Children)
4825 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4828 } // namespace
4830 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4831 UnwrappedLineParser::parseMacroCall() {
4832 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4833 assert(Line->Tokens.empty());
4834 nextToken();
4835 if (FormatTok->isNot(tok::l_paren))
4836 return Args;
4837 unsigned Position = Tokens->getPosition();
4838 FormatToken *Tok = FormatTok;
4839 nextToken();
4840 Args.emplace();
4841 auto ArgStart = std::prev(Line->Tokens.end());
4843 int Parens = 0;
4844 do {
4845 switch (FormatTok->Tok.getKind()) {
4846 case tok::l_paren:
4847 ++Parens;
4848 nextToken();
4849 break;
4850 case tok::r_paren: {
4851 if (Parens > 0) {
4852 --Parens;
4853 nextToken();
4854 break;
4856 Args->push_back({});
4857 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4858 nextToken();
4859 return Args;
4861 case tok::comma: {
4862 if (Parens > 0) {
4863 nextToken();
4864 break;
4866 Args->push_back({});
4867 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4868 nextToken();
4869 ArgStart = std::prev(Line->Tokens.end());
4870 break;
4872 default:
4873 nextToken();
4874 break;
4876 } while (!eof());
4877 Line->Tokens.resize(1);
4878 Tokens->setPosition(Position);
4879 FormatTok = Tok;
4880 return {};
4883 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4884 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4885 if (MustBreakBeforeNextToken) {
4886 Line->Tokens.back().Tok->MustBreakBefore = true;
4887 MustBreakBeforeNextToken = false;
4891 } // end namespace format
4892 } // end namespace clang