[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / Format / UnwrappedLineParser.cpp
blob66c12057572574f5d7b8e3f3f3d23c238044eb81
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <utility>
31 #define DEBUG_TYPE "format-parser"
33 namespace clang {
34 namespace format {
36 namespace {
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
63 if (!NewLine)
64 OS << "\n";
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
71 class ScopedDeclarationState {
72 public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
87 private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
92 } // end anonymous namespace
94 class ScopedLineState {
95 public:
96 ScopedLineState(UnwrappedLineParser &Parser,
97 bool SwitchToPreprocessorLines = false)
98 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99 if (SwitchToPreprocessorLines)
100 Parser.CurrentLines = &Parser.PreprocessorDirectives;
101 else if (!Parser.Line->Tokens.empty())
102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103 PreBlockLine = std::move(Parser.Line);
104 Parser.Line = std::make_unique<UnwrappedLine>();
105 Parser.Line->Level = PreBlockLine->Level;
106 Parser.Line->PPLevel = PreBlockLine->PPLevel;
107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 ~ScopedLineState() {
112 if (!Parser.Line->Tokens.empty())
113 Parser.addUnwrappedLine();
114 assert(Parser.Line->Tokens.empty());
115 Parser.Line = std::move(PreBlockLine);
116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117 Parser.MustBreakBeforeNextToken = true;
118 Parser.CurrentLines = OriginalLines;
121 private:
122 UnwrappedLineParser &Parser;
124 std::unique_ptr<UnwrappedLine> PreBlockLine;
125 SmallVectorImpl<UnwrappedLine> *OriginalLines;
128 class CompoundStatementIndenter {
129 public:
130 CompoundStatementIndenter(UnwrappedLineParser *Parser,
131 const FormatStyle &Style, unsigned &LineLevel)
132 : CompoundStatementIndenter(Parser, LineLevel,
133 Style.BraceWrapping.AfterControlStatement,
134 Style.BraceWrapping.IndentBraces) {}
135 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
136 bool WrapBrace, bool IndentBrace)
137 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138 if (WrapBrace)
139 Parser->addUnwrappedLine();
140 if (IndentBrace)
141 ++LineLevel;
143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
145 private:
146 unsigned &LineLevel;
147 unsigned OldLineLevel;
150 UnwrappedLineParser::UnwrappedLineParser(
151 SourceManager &SourceMgr, const FormatStyle &Style,
152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
153 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155 IdentifierTable &IdentTable)
156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161 ? IG_Rejected
162 : IG_Inited),
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
166 void UnwrappedLineParser::reset() {
167 PPBranchLevel = -1;
168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited;
171 IncludeGuardToken = nullptr;
172 Line.reset(new UnwrappedLine);
173 CommentsBeforeNextToken.clear();
174 FormatTok = nullptr;
175 MustBreakBeforeNextToken = false;
176 PreprocessorDirectives.clear();
177 CurrentLines = &Lines;
178 DeclarationScopeStack.clear();
179 NestedTooDeep.clear();
180 PPStack.clear();
181 Line->FirstStartColumn = FirstStartColumn;
183 if (!Unexpanded.empty())
184 for (FormatToken *Token : AllTokens)
185 Token->MacroCtx.reset();
186 CurrentExpandedLines.clear();
187 ExpandedLines.clear();
188 Unexpanded.clear();
189 InExpansion = false;
190 Reconstruct.reset();
193 void UnwrappedLineParser::parse() {
194 IndexedTokenSource TokenSource(AllTokens);
195 Line->FirstStartColumn = FirstStartColumn;
196 do {
197 LLVM_DEBUG(llvm::dbgs() << "----\n");
198 reset();
199 Tokens = &TokenSource;
200 TokenSource.reset();
202 readToken();
203 parseFile();
205 // If we found an include guard then all preprocessor directives (other than
206 // the guard) are over-indented by one.
207 if (IncludeGuard == IG_Found) {
208 for (auto &Line : Lines)
209 if (Line.InPPDirective && Line.Level > 0)
210 --Line.Level;
213 // Create line with eof token.
214 assert(FormatTok->is(tok::eof));
215 pushToken(FormatTok);
216 addUnwrappedLine();
218 // In a first run, format everything with the lines containing macro calls
219 // replaced by the expansion.
220 if (!ExpandedLines.empty()) {
221 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
222 for (const auto &Line : Lines) {
223 if (!Line.Tokens.empty()) {
224 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
225 if (it != ExpandedLines.end()) {
226 for (const auto &Expanded : it->second) {
227 LLVM_DEBUG(printDebugInfo(Expanded));
228 Callback.consumeUnwrappedLine(Expanded);
230 continue;
233 LLVM_DEBUG(printDebugInfo(Line));
234 Callback.consumeUnwrappedLine(Line);
236 Callback.finishRun();
239 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
240 for (const UnwrappedLine &Line : Lines) {
241 LLVM_DEBUG(printDebugInfo(Line));
242 Callback.consumeUnwrappedLine(Line);
244 Callback.finishRun();
245 Lines.clear();
246 while (!PPLevelBranchIndex.empty() &&
247 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
248 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
249 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251 if (!PPLevelBranchIndex.empty()) {
252 ++PPLevelBranchIndex.back();
253 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
254 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256 } while (!PPLevelBranchIndex.empty());
259 void UnwrappedLineParser::parseFile() {
260 // The top-level context in a file always has declarations, except for pre-
261 // processor directives and JavaScript files.
262 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
263 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
264 MustBeDeclaration);
265 if (Style.Language == FormatStyle::LK_TextProto)
266 parseBracedList();
267 else
268 parseLevel();
269 // Make sure to format the remaining tokens.
271 // LK_TextProto is special since its top-level is parsed as the body of a
272 // braced list, which does not necessarily have natural line separators such
273 // as a semicolon. Comments after the last entry that have been determined to
274 // not belong to that line, as in:
275 // key: value
276 // // endfile comment
277 // do not have a chance to be put on a line of their own until this point.
278 // Here we add this newline before end-of-file comments.
279 if (Style.Language == FormatStyle::LK_TextProto &&
280 !CommentsBeforeNextToken.empty()) {
281 addUnwrappedLine();
283 flushComments(true);
284 addUnwrappedLine();
287 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
288 do {
289 switch (FormatTok->Tok.getKind()) {
290 case tok::l_brace:
291 return;
292 default:
293 if (FormatTok->is(Keywords.kw_where)) {
294 addUnwrappedLine();
295 nextToken();
296 parseCSharpGenericTypeConstraint();
297 break;
299 nextToken();
300 break;
302 } while (!eof());
305 void UnwrappedLineParser::parseCSharpAttribute() {
306 int UnpairedSquareBrackets = 1;
307 do {
308 switch (FormatTok->Tok.getKind()) {
309 case tok::r_square:
310 nextToken();
311 --UnpairedSquareBrackets;
312 if (UnpairedSquareBrackets == 0) {
313 addUnwrappedLine();
314 return;
316 break;
317 case tok::l_square:
318 ++UnpairedSquareBrackets;
319 nextToken();
320 break;
321 default:
322 nextToken();
323 break;
325 } while (!eof());
328 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
329 if (!Lines.empty() && Lines.back().InPPDirective)
330 return true;
332 const FormatToken *Previous = Tokens->getPreviousToken();
333 return Previous && Previous->is(tok::comment) &&
334 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337 /// \brief Parses a level, that is ???.
338 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
339 /// \param CanContainBracedList If the content can contain (at any level) a
340 /// braced list.
341 /// \param NextLBracesType The type for left brace found in this level.
342 /// \param IfKind The \p if statement kind in the level.
343 /// \param IfLeftBrace The left brace of the \p if block in the level.
344 /// \returns true if a simple block of if/else/for/while, or false otherwise.
345 /// (A simple block has a single statement.)
346 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
347 bool CanContainBracedList,
348 TokenType NextLBracesType,
349 IfStmtKind *IfKind,
350 FormatToken **IfLeftBrace) {
351 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
352 ? TT_BracedListLBrace
353 : TT_Unknown;
354 const bool IsPrecededByCommentOrPPDirective =
355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356 FormatToken *IfLBrace = nullptr;
357 bool HasDoWhile = false;
358 bool HasLabel = false;
359 unsigned StatementCount = 0;
360 bool SwitchLabelEncountered = false;
362 do {
363 if (FormatTok->getType() == TT_AttributeMacro) {
364 nextToken();
365 continue;
367 tok::TokenKind kind = FormatTok->Tok.getKind();
368 if (FormatTok->getType() == TT_MacroBlockBegin)
369 kind = tok::l_brace;
370 else if (FormatTok->getType() == TT_MacroBlockEnd)
371 kind = tok::r_brace;
373 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
374 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
375 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
376 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
377 HasLabel ? nullptr : &HasLabel);
378 ++StatementCount;
379 assert(StatementCount > 0 && "StatementCount overflow!");
382 switch (kind) {
383 case tok::comment:
384 nextToken();
385 addUnwrappedLine();
386 break;
387 case tok::l_brace:
388 if (NextLBracesType != TT_Unknown) {
389 FormatTok->setFinalizedType(NextLBracesType);
390 } else if (FormatTok->Previous &&
391 FormatTok->Previous->ClosesRequiresClause) {
392 // We need the 'default' case here to correctly parse a function
393 // l_brace.
394 ParseDefault();
395 continue;
397 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
398 tryToParseBracedList()) {
399 continue;
401 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
402 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
403 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
404 NextLBracesType);
405 ++StatementCount;
406 assert(StatementCount > 0 && "StatementCount overflow!");
407 addUnwrappedLine();
408 break;
409 case tok::r_brace:
410 if (OpeningBrace) {
411 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
413 return false;
415 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
416 HasDoWhile || IsPrecededByCommentOrPPDirective ||
417 precededByCommentOrPPDirective()) {
418 return false;
420 const FormatToken *Next = Tokens->peekNextToken();
421 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
422 return false;
423 if (IfLeftBrace)
424 *IfLeftBrace = IfLBrace;
425 return true;
427 nextToken();
428 addUnwrappedLine();
429 break;
430 case tok::kw_default: {
431 unsigned StoredPosition = Tokens->getPosition();
432 FormatToken *Next;
433 do {
434 Next = Tokens->getNextToken();
435 assert(Next);
436 } while (Next->is(tok::comment));
437 FormatTok = Tokens->setPosition(StoredPosition);
438 if (Next->isNot(tok::colon)) {
439 // default not followed by ':' is not a case label; treat it like
440 // an identifier.
441 parseStructuralElement();
442 break;
444 // Else, if it is 'default:', fall through to the case handling.
445 [[fallthrough]];
447 case tok::kw_case:
448 if (Style.isProto() || Style.isVerilog() ||
449 (Style.isJavaScript() && Line->MustBeDeclaration)) {
450 // Proto: there are no switch/case statements
451 // Verilog: Case labels don't have this word. We handle case
452 // labels including default in TokenAnnotator.
453 // JavaScript: A 'case: string' style field declaration.
454 ParseDefault();
455 break;
457 if (!SwitchLabelEncountered &&
458 (Style.IndentCaseLabels ||
459 (Line->InPPDirective && Line->Level == 1))) {
460 ++Line->Level;
462 SwitchLabelEncountered = true;
463 parseStructuralElement();
464 break;
465 case tok::l_square:
466 if (Style.isCSharp()) {
467 nextToken();
468 parseCSharpAttribute();
469 break;
471 if (handleCppAttributes())
472 break;
473 [[fallthrough]];
474 default:
475 ParseDefault();
476 break;
478 } while (!eof());
480 return false;
483 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
484 // We'll parse forward through the tokens until we hit
485 // a closing brace or eof - note that getNextToken() will
486 // parse macros, so this will magically work inside macro
487 // definitions, too.
488 unsigned StoredPosition = Tokens->getPosition();
489 FormatToken *Tok = FormatTok;
490 const FormatToken *PrevTok = Tok->Previous;
491 // Keep a stack of positions of lbrace tokens. We will
492 // update information about whether an lbrace starts a
493 // braced init list or a different block during the loop.
494 struct StackEntry {
495 FormatToken *Tok;
496 const FormatToken *PrevTok;
498 SmallVector<StackEntry, 8> LBraceStack;
499 assert(Tok->is(tok::l_brace));
500 do {
501 // Get next non-comment token.
502 FormatToken *NextTok;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (NextTok->is(tok::comment));
507 switch (Tok->Tok.getKind()) {
508 case tok::l_brace:
509 if (Style.isJavaScript() && PrevTok) {
510 if (PrevTok->isOneOf(tok::colon, tok::less)) {
511 // A ':' indicates this code is in a type, or a braced list
512 // following a label in an object literal ({a: {b: 1}}).
513 // A '<' could be an object used in a comparison, but that is nonsense
514 // code (can never return true), so more likely it is a generic type
515 // argument (`X<{a: string; b: number}>`).
516 // The code below could be confused by semicolons between the
517 // individual members in a type member list, which would normally
518 // trigger BK_Block. In both cases, this must be parsed as an inline
519 // braced init.
520 Tok->setBlockKind(BK_BracedInit);
521 } else if (PrevTok->is(tok::r_paren)) {
522 // `) { }` can only occur in function or method declarations in JS.
523 Tok->setBlockKind(BK_Block);
525 } else {
526 Tok->setBlockKind(BK_Unknown);
528 LBraceStack.push_back({Tok, PrevTok});
529 break;
530 case tok::r_brace:
531 if (LBraceStack.empty())
532 break;
533 if (LBraceStack.back().Tok->is(BK_Unknown)) {
534 bool ProbablyBracedList = false;
535 if (Style.Language == FormatStyle::LK_Proto) {
536 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
537 } else {
538 // Skip NextTok over preprocessor lines, otherwise we may not
539 // properly diagnose the block as a braced intializer
540 // if the comma separator appears after the pp directive.
541 while (NextTok->is(tok::hash)) {
542 ScopedMacroState MacroState(*Line, Tokens, NextTok);
543 do {
544 NextTok = Tokens->getNextToken();
545 } while (NextTok->isNot(tok::eof));
548 // Using OriginalColumn to distinguish between ObjC methods and
549 // binary operators is a bit hacky.
550 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
551 NextTok->OriginalColumn == 0;
553 // Try to detect a braced list. Note that regardless how we mark inner
554 // braces here, we will overwrite the BlockKind later if we parse a
555 // braced list (where all blocks inside are by default braced lists),
556 // or when we explicitly detect blocks (for example while parsing
557 // lambdas).
559 // If we already marked the opening brace as braced list, the closing
560 // must also be part of it.
561 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
563 ProbablyBracedList = ProbablyBracedList ||
564 (Style.isJavaScript() &&
565 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
566 Keywords.kw_as));
567 ProbablyBracedList = ProbablyBracedList ||
568 (Style.isCpp() && NextTok->is(tok::l_paren));
570 // If there is a comma, semicolon or right paren after the closing
571 // brace, we assume this is a braced initializer list.
572 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
573 // braced list in JS.
574 ProbablyBracedList =
575 ProbablyBracedList ||
576 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
577 tok::r_paren, tok::r_square, tok::ellipsis);
579 // Distinguish between braced list in a constructor initializer list
580 // followed by constructor body, or just adjacent blocks.
581 ProbablyBracedList =
582 ProbablyBracedList ||
583 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
584 LBraceStack.back().PrevTok->is(tok::identifier));
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(tok::identifier) &&
589 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
591 ProbablyBracedList = ProbablyBracedList ||
592 (NextTok->is(tok::semi) &&
593 (!ExpectClassBody || LBraceStack.size() != 1));
595 ProbablyBracedList =
596 ProbablyBracedList ||
597 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
599 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok = Tokens->getNextToken();
603 ProbablyBracedList = NextTok->isNot(tok::l_square);
606 if (ProbablyBracedList) {
607 Tok->setBlockKind(BK_BracedInit);
608 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
609 } else {
610 Tok->setBlockKind(BK_Block);
611 LBraceStack.back().Tok->setBlockKind(BK_Block);
614 LBraceStack.pop_back();
615 break;
616 case tok::identifier:
617 if (!Tok->is(TT_StatementMacro))
618 break;
619 [[fallthrough]];
620 case tok::at:
621 case tok::semi:
622 case tok::kw_if:
623 case tok::kw_while:
624 case tok::kw_for:
625 case tok::kw_switch:
626 case tok::kw_try:
627 case tok::kw___try:
628 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
629 LBraceStack.back().Tok->setBlockKind(BK_Block);
630 break;
631 default:
632 break;
634 PrevTok = Tok;
635 Tok = NextTok;
636 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
638 // Assume other blocks for all unclosed opening braces.
639 for (const auto &Entry : LBraceStack)
640 if (Entry.Tok->is(BK_Unknown))
641 Entry.Tok->setBlockKind(BK_Block);
643 FormatTok = Tokens->setPosition(StoredPosition);
646 template <class T>
647 static inline void hash_combine(std::size_t &seed, const T &v) {
648 std::hash<T> hasher;
649 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
652 size_t UnwrappedLineParser::computePPHash() const {
653 size_t h = 0;
654 for (const auto &i : PPStack) {
655 hash_combine(h, size_t(i.Kind));
656 hash_combine(h, i.Line);
658 return h;
661 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
662 // is not null, subtracts its length (plus the preceding space) when computing
663 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
664 // running the token annotator on it so that we can restore them afterward.
665 bool UnwrappedLineParser::mightFitOnOneLine(
666 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
667 const auto ColumnLimit = Style.ColumnLimit;
668 if (ColumnLimit == 0)
669 return true;
671 auto &Tokens = ParsedLine.Tokens;
672 assert(!Tokens.empty());
674 const auto *LastToken = Tokens.back().Tok;
675 assert(LastToken);
677 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
679 int Index = 0;
680 for (const auto &Token : Tokens) {
681 assert(Token.Tok);
682 auto &SavedToken = SavedTokens[Index++];
683 SavedToken.Tok = new FormatToken;
684 SavedToken.Tok->copyFrom(*Token.Tok);
685 SavedToken.Children = std::move(Token.Children);
688 AnnotatedLine Line(ParsedLine);
689 assert(Line.Last == LastToken);
691 TokenAnnotator Annotator(Style, Keywords);
692 Annotator.annotate(Line);
693 Annotator.calculateFormattingInformation(Line);
695 auto Length = LastToken->TotalLength;
696 if (OpeningBrace) {
697 assert(OpeningBrace != Tokens.front().Tok);
698 if (auto Prev = OpeningBrace->Previous;
699 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
700 Length -= ColumnLimit;
702 Length -= OpeningBrace->TokenText.size() + 1;
705 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
706 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
707 Length -= FirstToken->TokenText.size() + 1;
710 Index = 0;
711 for (auto &Token : Tokens) {
712 const auto &SavedToken = SavedTokens[Index++];
713 Token.Tok->copyFrom(*SavedToken.Tok);
714 Token.Children = std::move(SavedToken.Children);
715 delete SavedToken.Tok;
718 // If these change PPLevel needs to be used for get correct indentation.
719 assert(!Line.InMacroBody);
720 assert(!Line.InPPDirective);
721 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
724 FormatToken *UnwrappedLineParser::parseBlock(
725 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
726 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
727 bool CanContainBracedList, TokenType NextLBracesType) {
728 auto HandleVerilogBlockLabel = [this]() {
729 // ":" name
730 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
731 nextToken();
732 if (Keywords.isVerilogIdentifier(*FormatTok))
733 nextToken();
737 // Whether this is a Verilog-specific block that has a special header like a
738 // module.
739 const bool VerilogHierarchy =
740 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
741 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
742 (Style.isVerilog() &&
743 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
744 "'{' or macro block token expected");
745 FormatToken *Tok = FormatTok;
746 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
747 auto Index = CurrentLines->size();
748 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
749 FormatTok->setBlockKind(BK_Block);
751 // For Whitesmiths mode, jump to the next level prior to skipping over the
752 // braces.
753 if (!VerilogHierarchy && AddLevels > 0 &&
754 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
755 ++Line->Level;
758 size_t PPStartHash = computePPHash();
760 const unsigned InitialLevel = Line->Level;
761 if (VerilogHierarchy) {
762 AddLevels += parseVerilogHierarchyHeader();
763 } else {
764 nextToken(/*LevelDifference=*/AddLevels);
765 HandleVerilogBlockLabel();
768 // Bail out if there are too many levels. Otherwise, the stack might overflow.
769 if (Line->Level > 300)
770 return nullptr;
772 if (MacroBlock && FormatTok->is(tok::l_paren))
773 parseParens();
775 size_t NbPreprocessorDirectives =
776 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
777 addUnwrappedLine();
778 size_t OpeningLineIndex =
779 CurrentLines->empty()
780 ? (UnwrappedLine::kInvalidIndex)
781 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
783 // Whitesmiths is weird here. The brace needs to be indented for the namespace
784 // block, but the block itself may not be indented depending on the style
785 // settings. This allows the format to back up one level in those cases.
786 if (UnindentWhitesmithsBraces)
787 --Line->Level;
789 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
790 MustBeDeclaration);
791 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
792 Line->Level += AddLevels;
794 FormatToken *IfLBrace = nullptr;
795 const bool SimpleBlock =
796 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
798 if (eof())
799 return IfLBrace;
801 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
802 : !FormatTok->is(tok::r_brace)) {
803 Line->Level = InitialLevel;
804 FormatTok->setBlockKind(BK_Block);
805 return IfLBrace;
808 const bool IsFunctionRBrace =
809 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
811 auto RemoveBraces = [=]() mutable {
812 if (!SimpleBlock)
813 return false;
814 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
815 assert(FormatTok->is(tok::r_brace));
816 const bool WrappedOpeningBrace = !Tok->Previous;
817 if (WrappedOpeningBrace && FollowedByComment)
818 return false;
819 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
820 if (KeepBraces && !HasRequiredIfBraces)
821 return false;
822 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
823 const FormatToken *Previous = Tokens->getPreviousToken();
824 assert(Previous);
825 if (Previous->is(tok::r_brace) && !Previous->Optional)
826 return false;
828 assert(!CurrentLines->empty());
829 auto &LastLine = CurrentLines->back();
830 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
831 return false;
832 if (Tok->is(TT_ElseLBrace))
833 return true;
834 if (WrappedOpeningBrace) {
835 assert(Index > 0);
836 --Index; // The line above the wrapped l_brace.
837 Tok = nullptr;
839 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
841 if (RemoveBraces()) {
842 Tok->MatchingParen = FormatTok;
843 FormatTok->MatchingParen = Tok;
846 size_t PPEndHash = computePPHash();
848 // Munch the closing brace.
849 nextToken(/*LevelDifference=*/-AddLevels);
851 // When this is a function block and there is an unnecessary semicolon
852 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
853 // it later).
854 if (Style.RemoveSemicolon && IsFunctionRBrace) {
855 while (FormatTok->is(tok::semi)) {
856 FormatTok->Optional = true;
857 nextToken();
861 HandleVerilogBlockLabel();
863 if (MacroBlock && FormatTok->is(tok::l_paren))
864 parseParens();
866 Line->Level = InitialLevel;
868 if (FormatTok->is(tok::kw_noexcept)) {
869 // A noexcept in a requires expression.
870 nextToken();
873 if (FormatTok->is(tok::arrow)) {
874 // Following the } or noexcept we can find a trailing return type arrow
875 // as part of an implicit conversion constraint.
876 nextToken();
877 parseStructuralElement();
880 if (MunchSemi && FormatTok->is(tok::semi))
881 nextToken();
883 if (PPStartHash == PPEndHash) {
884 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
885 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
886 // Update the opening line to add the forward reference as well
887 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
888 CurrentLines->size() - 1;
892 return IfLBrace;
895 static bool isGoogScope(const UnwrappedLine &Line) {
896 // FIXME: Closure-library specific stuff should not be hard-coded but be
897 // configurable.
898 if (Line.Tokens.size() < 4)
899 return false;
900 auto I = Line.Tokens.begin();
901 if (I->Tok->TokenText != "goog")
902 return false;
903 ++I;
904 if (I->Tok->isNot(tok::period))
905 return false;
906 ++I;
907 if (I->Tok->TokenText != "scope")
908 return false;
909 ++I;
910 return I->Tok->is(tok::l_paren);
913 static bool isIIFE(const UnwrappedLine &Line,
914 const AdditionalKeywords &Keywords) {
915 // Look for the start of an immediately invoked anonymous function.
916 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
917 // This is commonly done in JavaScript to create a new, anonymous scope.
918 // Example: (function() { ... })()
919 if (Line.Tokens.size() < 3)
920 return false;
921 auto I = Line.Tokens.begin();
922 if (I->Tok->isNot(tok::l_paren))
923 return false;
924 ++I;
925 if (I->Tok->isNot(Keywords.kw_function))
926 return false;
927 ++I;
928 return I->Tok->is(tok::l_paren);
931 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
932 const FormatToken &InitialToken) {
933 tok::TokenKind Kind = InitialToken.Tok.getKind();
934 if (InitialToken.is(TT_NamespaceMacro))
935 Kind = tok::kw_namespace;
937 switch (Kind) {
938 case tok::kw_namespace:
939 return Style.BraceWrapping.AfterNamespace;
940 case tok::kw_class:
941 return Style.BraceWrapping.AfterClass;
942 case tok::kw_union:
943 return Style.BraceWrapping.AfterUnion;
944 case tok::kw_struct:
945 return Style.BraceWrapping.AfterStruct;
946 case tok::kw_enum:
947 return Style.BraceWrapping.AfterEnum;
948 default:
949 return false;
953 void UnwrappedLineParser::parseChildBlock(
954 bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
955 assert(FormatTok->is(tok::l_brace));
956 FormatTok->setBlockKind(BK_Block);
957 const FormatToken *OpeningBrace = FormatTok;
958 nextToken();
960 bool SkipIndent = (Style.isJavaScript() &&
961 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
962 ScopedLineState LineState(*this);
963 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
964 /*MustBeDeclaration=*/false);
965 Line->Level += SkipIndent ? 0 : 1;
966 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
967 flushComments(isOnNewLine(*FormatTok));
968 Line->Level -= SkipIndent ? 0 : 1;
970 nextToken();
973 void UnwrappedLineParser::parsePPDirective() {
974 assert(FormatTok->is(tok::hash) && "'#' expected");
975 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
977 nextToken();
979 if (!FormatTok->Tok.getIdentifierInfo()) {
980 parsePPUnknown();
981 return;
984 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
985 case tok::pp_define:
986 parsePPDefine();
987 return;
988 case tok::pp_if:
989 parsePPIf(/*IfDef=*/false);
990 break;
991 case tok::pp_ifdef:
992 case tok::pp_ifndef:
993 parsePPIf(/*IfDef=*/true);
994 break;
995 case tok::pp_else:
996 case tok::pp_elifdef:
997 case tok::pp_elifndef:
998 case tok::pp_elif:
999 parsePPElse();
1000 break;
1001 case tok::pp_endif:
1002 parsePPEndIf();
1003 break;
1004 case tok::pp_pragma:
1005 parsePPPragma();
1006 break;
1007 default:
1008 parsePPUnknown();
1009 break;
1013 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1014 size_t Line = CurrentLines->size();
1015 if (CurrentLines == &PreprocessorDirectives)
1016 Line += Lines.size();
1018 if (Unreachable ||
1019 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1020 PPStack.push_back({PP_Unreachable, Line});
1021 } else {
1022 PPStack.push_back({PP_Conditional, Line});
1026 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1027 ++PPBranchLevel;
1028 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1029 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1030 PPLevelBranchIndex.push_back(0);
1031 PPLevelBranchCount.push_back(0);
1033 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1034 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1035 conditionalCompilationCondition(Unreachable || Skip);
1038 void UnwrappedLineParser::conditionalCompilationAlternative() {
1039 if (!PPStack.empty())
1040 PPStack.pop_back();
1041 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1042 if (!PPChainBranchIndex.empty())
1043 ++PPChainBranchIndex.top();
1044 conditionalCompilationCondition(
1045 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1046 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1049 void UnwrappedLineParser::conditionalCompilationEnd() {
1050 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1051 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1052 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1053 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1055 // Guard against #endif's without #if.
1056 if (PPBranchLevel > -1)
1057 --PPBranchLevel;
1058 if (!PPChainBranchIndex.empty())
1059 PPChainBranchIndex.pop();
1060 if (!PPStack.empty())
1061 PPStack.pop_back();
1064 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1065 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1066 nextToken();
1067 bool Unreachable = false;
1068 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1069 Unreachable = true;
1070 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1071 Unreachable = true;
1072 conditionalCompilationStart(Unreachable);
1073 FormatToken *IfCondition = FormatTok;
1074 // If there's a #ifndef on the first line, and the only lines before it are
1075 // comments, it could be an include guard.
1076 bool MaybeIncludeGuard = IfNDef;
1077 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1078 for (auto &Line : Lines) {
1079 if (!Line.Tokens.front().Tok->is(tok::comment)) {
1080 MaybeIncludeGuard = false;
1081 IncludeGuard = IG_Rejected;
1082 break;
1086 --PPBranchLevel;
1087 parsePPUnknown();
1088 ++PPBranchLevel;
1089 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1090 IncludeGuard = IG_IfNdefed;
1091 IncludeGuardToken = IfCondition;
1095 void UnwrappedLineParser::parsePPElse() {
1096 // If a potential include guard has an #else, it's not an include guard.
1097 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1098 IncludeGuard = IG_Rejected;
1099 // Don't crash when there is an #else without an #if.
1100 assert(PPBranchLevel >= -1);
1101 if (PPBranchLevel == -1)
1102 conditionalCompilationStart(/*Unreachable=*/true);
1103 conditionalCompilationAlternative();
1104 --PPBranchLevel;
1105 parsePPUnknown();
1106 ++PPBranchLevel;
1109 void UnwrappedLineParser::parsePPEndIf() {
1110 conditionalCompilationEnd();
1111 parsePPUnknown();
1112 // If the #endif of a potential include guard is the last thing in the file,
1113 // then we found an include guard.
1114 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1115 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1116 IncludeGuard = IG_Found;
1120 void UnwrappedLineParser::parsePPDefine() {
1121 nextToken();
1123 if (!FormatTok->Tok.getIdentifierInfo()) {
1124 IncludeGuard = IG_Rejected;
1125 IncludeGuardToken = nullptr;
1126 parsePPUnknown();
1127 return;
1130 if (IncludeGuard == IG_IfNdefed &&
1131 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1132 IncludeGuard = IG_Defined;
1133 IncludeGuardToken = nullptr;
1134 for (auto &Line : Lines) {
1135 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1136 IncludeGuard = IG_Rejected;
1137 break;
1142 // In the context of a define, even keywords should be treated as normal
1143 // identifiers. Setting the kind to identifier is not enough, because we need
1144 // to treat additional keywords like __except as well, which are already
1145 // identifiers. Setting the identifier info to null interferes with include
1146 // guard processing above, and changes preprocessing nesting.
1147 FormatTok->Tok.setKind(tok::identifier);
1148 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1149 nextToken();
1150 if (FormatTok->Tok.getKind() == tok::l_paren &&
1151 !FormatTok->hasWhitespaceBefore()) {
1152 parseParens();
1154 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1155 Line->Level += PPBranchLevel + 1;
1156 addUnwrappedLine();
1157 ++Line->Level;
1159 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1160 assert((int)Line->PPLevel >= 0);
1161 Line->InMacroBody = true;
1163 // Errors during a preprocessor directive can only affect the layout of the
1164 // preprocessor directive, and thus we ignore them. An alternative approach
1165 // would be to use the same approach we use on the file level (no
1166 // re-indentation if there was a structural error) within the macro
1167 // definition.
1168 parseFile();
1171 void UnwrappedLineParser::parsePPPragma() {
1172 Line->InPragmaDirective = true;
1173 parsePPUnknown();
1176 void UnwrappedLineParser::parsePPUnknown() {
1177 do {
1178 nextToken();
1179 } while (!eof());
1180 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1181 Line->Level += PPBranchLevel + 1;
1182 addUnwrappedLine();
1185 // Here we exclude certain tokens that are not usually the first token in an
1186 // unwrapped line. This is used in attempt to distinguish macro calls without
1187 // trailing semicolons from other constructs split to several lines.
1188 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1189 // Semicolon can be a null-statement, l_square can be a start of a macro or
1190 // a C++11 attribute, but this doesn't seem to be common.
1191 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1192 Tok.isNot(TT_AttributeSquare) &&
1193 // Tokens that can only be used as binary operators and a part of
1194 // overloaded operator names.
1195 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1196 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1197 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1198 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1199 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1200 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1201 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1202 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1203 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1204 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1205 Tok.isNot(tok::lesslessequal) &&
1206 // Colon is used in labels, base class lists, initializer lists,
1207 // range-based for loops, ternary operator, but should never be the
1208 // first token in an unwrapped line.
1209 Tok.isNot(tok::colon) &&
1210 // 'noexcept' is a trailing annotation.
1211 Tok.isNot(tok::kw_noexcept);
1214 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1215 const FormatToken *FormatTok) {
1216 // FIXME: This returns true for C/C++ keywords like 'struct'.
1217 return FormatTok->is(tok::identifier) &&
1218 (!FormatTok->Tok.getIdentifierInfo() ||
1219 !FormatTok->isOneOf(
1220 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1221 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1222 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1223 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1224 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1225 Keywords.kw_instanceof, Keywords.kw_interface,
1226 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1229 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1230 const FormatToken *FormatTok) {
1231 return FormatTok->Tok.isLiteral() ||
1232 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1233 mustBeJSIdent(Keywords, FormatTok);
1236 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1237 // when encountered after a value (see mustBeJSIdentOrValue).
1238 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1239 const FormatToken *FormatTok) {
1240 return FormatTok->isOneOf(
1241 tok::kw_return, Keywords.kw_yield,
1242 // conditionals
1243 tok::kw_if, tok::kw_else,
1244 // loops
1245 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1246 // switch/case
1247 tok::kw_switch, tok::kw_case,
1248 // exceptions
1249 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1250 // declaration
1251 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1252 Keywords.kw_async, Keywords.kw_function,
1253 // import/export
1254 Keywords.kw_import, tok::kw_export);
1257 // Checks whether a token is a type in K&R C (aka C78).
1258 static bool isC78Type(const FormatToken &Tok) {
1259 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1260 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1261 tok::identifier);
1264 // This function checks whether a token starts the first parameter declaration
1265 // in a K&R C (aka C78) function definition, e.g.:
1266 // int f(a, b)
1267 // short a, b;
1268 // {
1269 // return a + b;
1270 // }
1271 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1272 const FormatToken *FuncName) {
1273 assert(Tok);
1274 assert(Next);
1275 assert(FuncName);
1277 if (FuncName->isNot(tok::identifier))
1278 return false;
1280 const FormatToken *Prev = FuncName->Previous;
1281 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1282 return false;
1284 if (!isC78Type(*Tok) &&
1285 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1286 return false;
1289 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1290 return false;
1292 Tok = Tok->Previous;
1293 if (!Tok || Tok->isNot(tok::r_paren))
1294 return false;
1296 Tok = Tok->Previous;
1297 if (!Tok || Tok->isNot(tok::identifier))
1298 return false;
1300 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1303 bool UnwrappedLineParser::parseModuleImport() {
1304 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1306 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1307 !Token->Tok.getIdentifierInfo() &&
1308 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1309 return false;
1312 nextToken();
1313 while (!eof()) {
1314 if (FormatTok->is(tok::colon)) {
1315 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1317 // Handle import <foo/bar.h> as we would an include statement.
1318 else if (FormatTok->is(tok::less)) {
1319 nextToken();
1320 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1321 // Mark tokens up to the trailing line comments as implicit string
1322 // literals.
1323 if (FormatTok->isNot(tok::comment) &&
1324 !FormatTok->TokenText.startswith("//")) {
1325 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1327 nextToken();
1330 if (FormatTok->is(tok::semi)) {
1331 nextToken();
1332 break;
1334 nextToken();
1337 addUnwrappedLine();
1338 return true;
1341 // readTokenWithJavaScriptASI reads the next token and terminates the current
1342 // line if JavaScript Automatic Semicolon Insertion must
1343 // happen between the current token and the next token.
1345 // This method is conservative - it cannot cover all edge cases of JavaScript,
1346 // but only aims to correctly handle certain well known cases. It *must not*
1347 // return true in speculative cases.
1348 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1349 FormatToken *Previous = FormatTok;
1350 readToken();
1351 FormatToken *Next = FormatTok;
1353 bool IsOnSameLine =
1354 CommentsBeforeNextToken.empty()
1355 ? Next->NewlinesBefore == 0
1356 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1357 if (IsOnSameLine)
1358 return;
1360 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1361 bool PreviousStartsTemplateExpr =
1362 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1363 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1364 // If the line contains an '@' sign, the previous token might be an
1365 // annotation, which can precede another identifier/value.
1366 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1367 return LineNode.Tok->is(tok::at);
1369 if (HasAt)
1370 return;
1372 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1373 return addUnwrappedLine();
1374 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1375 bool NextEndsTemplateExpr =
1376 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1377 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1378 (PreviousMustBeValue ||
1379 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1380 tok::minusminus))) {
1381 return addUnwrappedLine();
1383 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1384 isJSDeclOrStmt(Keywords, Next)) {
1385 return addUnwrappedLine();
1389 void UnwrappedLineParser::parseStructuralElement(
1390 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1391 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1392 if (Style.Language == FormatStyle::LK_TableGen &&
1393 FormatTok->is(tok::pp_include)) {
1394 nextToken();
1395 if (FormatTok->is(tok::string_literal))
1396 nextToken();
1397 addUnwrappedLine();
1398 return;
1401 if (Style.isVerilog()) {
1402 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1403 parseForOrWhileLoop(/*HasParens=*/false);
1404 return;
1406 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1407 parseForOrWhileLoop();
1408 return;
1410 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1411 Keywords.kw_assume, Keywords.kw_cover)) {
1412 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1413 return;
1416 // Skip things that can exist before keywords like 'if' and 'case'.
1417 while (true) {
1418 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1419 Keywords.kw_unique0)) {
1420 nextToken();
1421 } else if (FormatTok->is(tok::l_paren) &&
1422 Tokens->peekNextToken()->is(tok::star)) {
1423 parseParens();
1424 } else {
1425 break;
1430 // Tokens that only make sense at the beginning of a line.
1431 switch (FormatTok->Tok.getKind()) {
1432 case tok::kw_asm:
1433 nextToken();
1434 if (FormatTok->is(tok::l_brace)) {
1435 FormatTok->setFinalizedType(TT_InlineASMBrace);
1436 nextToken();
1437 while (FormatTok && !eof()) {
1438 if (FormatTok->is(tok::r_brace)) {
1439 FormatTok->setFinalizedType(TT_InlineASMBrace);
1440 nextToken();
1441 addUnwrappedLine();
1442 break;
1444 FormatTok->Finalized = true;
1445 nextToken();
1448 break;
1449 case tok::kw_namespace:
1450 parseNamespace();
1451 return;
1452 case tok::kw_public:
1453 case tok::kw_protected:
1454 case tok::kw_private:
1455 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1456 Style.isCSharp()) {
1457 nextToken();
1458 } else {
1459 parseAccessSpecifier();
1461 return;
1462 case tok::kw_if: {
1463 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1464 // field/method declaration.
1465 break;
1467 FormatToken *Tok = parseIfThenElse(IfKind);
1468 if (IfLeftBrace)
1469 *IfLeftBrace = Tok;
1470 return;
1472 case tok::kw_for:
1473 case tok::kw_while:
1474 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1475 // field/method declaration.
1476 break;
1478 parseForOrWhileLoop();
1479 return;
1480 case tok::kw_do:
1481 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1482 // field/method declaration.
1483 break;
1485 parseDoWhile();
1486 if (HasDoWhile)
1487 *HasDoWhile = true;
1488 return;
1489 case tok::kw_switch:
1490 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1491 // 'switch: string' field declaration.
1492 break;
1494 parseSwitch();
1495 return;
1496 case tok::kw_default:
1497 // In Verilog default along with other labels are handled in the next loop.
1498 if (Style.isVerilog())
1499 break;
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // 'default: string' field declaration.
1502 break;
1504 nextToken();
1505 if (FormatTok->is(tok::colon)) {
1506 FormatTok->setFinalizedType(TT_CaseLabelColon);
1507 parseLabel();
1508 return;
1510 // e.g. "default void f() {}" in a Java interface.
1511 break;
1512 case tok::kw_case:
1513 // Proto: there are no switch/case statements.
1514 if (Style.isProto()) {
1515 nextToken();
1516 return;
1518 if (Style.isVerilog()) {
1519 parseBlock();
1520 addUnwrappedLine();
1521 return;
1523 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524 // 'case: string' field declaration.
1525 nextToken();
1526 break;
1528 parseCaseLabel();
1529 return;
1530 case tok::kw_try:
1531 case tok::kw___try:
1532 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1533 // field/method declaration.
1534 break;
1536 parseTryCatch();
1537 return;
1538 case tok::kw_extern:
1539 nextToken();
1540 if (Style.isVerilog()) {
1541 // In Verilog and extern module declaration looks like a start of module.
1542 // But there is no body and endmodule. So we handle it separately.
1543 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1544 parseVerilogHierarchyHeader();
1545 return;
1547 } else if (FormatTok->is(tok::string_literal)) {
1548 nextToken();
1549 if (FormatTok->is(tok::l_brace)) {
1550 if (Style.BraceWrapping.AfterExternBlock)
1551 addUnwrappedLine();
1552 // Either we indent or for backwards compatibility we follow the
1553 // AfterExternBlock style.
1554 unsigned AddLevels =
1555 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1556 (Style.BraceWrapping.AfterExternBlock &&
1557 Style.IndentExternBlock ==
1558 FormatStyle::IEBS_AfterExternBlock)
1559 ? 1u
1560 : 0u;
1561 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1562 addUnwrappedLine();
1563 return;
1566 break;
1567 case tok::kw_export:
1568 if (Style.isJavaScript()) {
1569 parseJavaScriptEs6ImportExport();
1570 return;
1572 if (Style.isCpp()) {
1573 nextToken();
1574 if (FormatTok->is(tok::kw_namespace)) {
1575 parseNamespace();
1576 return;
1578 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1579 return;
1581 break;
1582 case tok::kw_inline:
1583 nextToken();
1584 if (FormatTok->is(tok::kw_namespace)) {
1585 parseNamespace();
1586 return;
1588 break;
1589 case tok::identifier:
1590 if (FormatTok->is(TT_ForEachMacro)) {
1591 parseForOrWhileLoop();
1592 return;
1594 if (FormatTok->is(TT_MacroBlockBegin)) {
1595 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1596 /*MunchSemi=*/false);
1597 return;
1599 if (FormatTok->is(Keywords.kw_import)) {
1600 if (Style.isJavaScript()) {
1601 parseJavaScriptEs6ImportExport();
1602 return;
1604 if (Style.Language == FormatStyle::LK_Proto) {
1605 nextToken();
1606 if (FormatTok->is(tok::kw_public))
1607 nextToken();
1608 if (!FormatTok->is(tok::string_literal))
1609 return;
1610 nextToken();
1611 if (FormatTok->is(tok::semi))
1612 nextToken();
1613 addUnwrappedLine();
1614 return;
1616 if (Style.isCpp() && parseModuleImport())
1617 return;
1619 if (Style.isCpp() &&
1620 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1621 Keywords.kw_slots, Keywords.kw_qslots)) {
1622 nextToken();
1623 if (FormatTok->is(tok::colon)) {
1624 nextToken();
1625 addUnwrappedLine();
1626 return;
1629 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1630 parseStatementMacro();
1631 return;
1633 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1634 parseNamespace();
1635 return;
1637 // In all other cases, parse the declaration.
1638 break;
1639 default:
1640 break;
1642 do {
1643 const FormatToken *Previous = FormatTok->Previous;
1644 switch (FormatTok->Tok.getKind()) {
1645 case tok::at:
1646 nextToken();
1647 if (FormatTok->is(tok::l_brace)) {
1648 nextToken();
1649 parseBracedList();
1650 break;
1651 } else if (Style.Language == FormatStyle::LK_Java &&
1652 FormatTok->is(Keywords.kw_interface)) {
1653 nextToken();
1654 break;
1656 switch (FormatTok->Tok.getObjCKeywordID()) {
1657 case tok::objc_public:
1658 case tok::objc_protected:
1659 case tok::objc_package:
1660 case tok::objc_private:
1661 return parseAccessSpecifier();
1662 case tok::objc_interface:
1663 case tok::objc_implementation:
1664 return parseObjCInterfaceOrImplementation();
1665 case tok::objc_protocol:
1666 if (parseObjCProtocol())
1667 return;
1668 break;
1669 case tok::objc_end:
1670 return; // Handled by the caller.
1671 case tok::objc_optional:
1672 case tok::objc_required:
1673 nextToken();
1674 addUnwrappedLine();
1675 return;
1676 case tok::objc_autoreleasepool:
1677 nextToken();
1678 if (FormatTok->is(tok::l_brace)) {
1679 if (Style.BraceWrapping.AfterControlStatement ==
1680 FormatStyle::BWACS_Always) {
1681 addUnwrappedLine();
1683 parseBlock();
1685 addUnwrappedLine();
1686 return;
1687 case tok::objc_synchronized:
1688 nextToken();
1689 if (FormatTok->is(tok::l_paren)) {
1690 // Skip synchronization object
1691 parseParens();
1693 if (FormatTok->is(tok::l_brace)) {
1694 if (Style.BraceWrapping.AfterControlStatement ==
1695 FormatStyle::BWACS_Always) {
1696 addUnwrappedLine();
1698 parseBlock();
1700 addUnwrappedLine();
1701 return;
1702 case tok::objc_try:
1703 // This branch isn't strictly necessary (the kw_try case below would
1704 // do this too after the tok::at is parsed above). But be explicit.
1705 parseTryCatch();
1706 return;
1707 default:
1708 break;
1710 break;
1711 case tok::kw_requires: {
1712 if (Style.isCpp()) {
1713 bool ParsedClause = parseRequires();
1714 if (ParsedClause)
1715 return;
1716 } else {
1717 nextToken();
1719 break;
1721 case tok::kw_enum:
1722 // Ignore if this is part of "template <enum ...".
1723 if (Previous && Previous->is(tok::less)) {
1724 nextToken();
1725 break;
1728 // parseEnum falls through and does not yet add an unwrapped line as an
1729 // enum definition can start a structural element.
1730 if (!parseEnum())
1731 break;
1732 // This only applies to C++ and Verilog.
1733 if (!Style.isCpp() && !Style.isVerilog()) {
1734 addUnwrappedLine();
1735 return;
1737 break;
1738 case tok::kw_typedef:
1739 nextToken();
1740 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1741 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1742 Keywords.kw_CF_CLOSED_ENUM,
1743 Keywords.kw_NS_CLOSED_ENUM)) {
1744 parseEnum();
1746 break;
1747 case tok::kw_class:
1748 if (Style.isVerilog()) {
1749 parseBlock();
1750 addUnwrappedLine();
1751 return;
1753 [[fallthrough]];
1754 case tok::kw_struct:
1755 case tok::kw_union:
1756 if (parseStructLike())
1757 return;
1758 break;
1759 case tok::period:
1760 nextToken();
1761 // In Java, classes have an implicit static member "class".
1762 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1763 FormatTok->is(tok::kw_class)) {
1764 nextToken();
1766 if (Style.isJavaScript() && FormatTok &&
1767 FormatTok->Tok.getIdentifierInfo()) {
1768 // JavaScript only has pseudo keywords, all keywords are allowed to
1769 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1770 nextToken();
1772 break;
1773 case tok::semi:
1774 nextToken();
1775 addUnwrappedLine();
1776 return;
1777 case tok::r_brace:
1778 addUnwrappedLine();
1779 return;
1780 case tok::l_paren: {
1781 parseParens();
1782 // Break the unwrapped line if a K&R C function definition has a parameter
1783 // declaration.
1784 if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1785 break;
1786 if (isC78ParameterDecl(FormatTok,
1787 Tokens->peekNextToken(/*SkipComment=*/true),
1788 Previous)) {
1789 addUnwrappedLine();
1790 return;
1792 break;
1794 case tok::kw_operator:
1795 nextToken();
1796 if (FormatTok->isBinaryOperator())
1797 nextToken();
1798 break;
1799 case tok::caret:
1800 nextToken();
1801 if (FormatTok->Tok.isAnyIdentifier() ||
1802 FormatTok->isSimpleTypeSpecifier()) {
1803 nextToken();
1805 if (FormatTok->is(tok::l_paren))
1806 parseParens();
1807 if (FormatTok->is(tok::l_brace))
1808 parseChildBlock();
1809 break;
1810 case tok::l_brace:
1811 if (NextLBracesType != TT_Unknown)
1812 FormatTok->setFinalizedType(NextLBracesType);
1813 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1814 // A block outside of parentheses must be the last part of a
1815 // structural element.
1816 // FIXME: Figure out cases where this is not true, and add projections
1817 // for them (the one we know is missing are lambdas).
1818 if (Style.Language == FormatStyle::LK_Java &&
1819 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1820 // If necessary, we could set the type to something different than
1821 // TT_FunctionLBrace.
1822 if (Style.BraceWrapping.AfterControlStatement ==
1823 FormatStyle::BWACS_Always) {
1824 addUnwrappedLine();
1826 } else if (Style.BraceWrapping.AfterFunction) {
1827 addUnwrappedLine();
1829 FormatTok->setFinalizedType(TT_FunctionLBrace);
1830 parseBlock();
1831 addUnwrappedLine();
1832 return;
1834 // Otherwise this was a braced init list, and the structural
1835 // element continues.
1836 break;
1837 case tok::kw_try:
1838 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1839 // field/method declaration.
1840 nextToken();
1841 break;
1843 // We arrive here when parsing function-try blocks.
1844 if (Style.BraceWrapping.AfterFunction)
1845 addUnwrappedLine();
1846 parseTryCatch();
1847 return;
1848 case tok::identifier: {
1849 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1850 Line->MustBeDeclaration) {
1851 addUnwrappedLine();
1852 parseCSharpGenericTypeConstraint();
1853 break;
1855 if (FormatTok->is(TT_MacroBlockEnd)) {
1856 addUnwrappedLine();
1857 return;
1860 // Function declarations (as opposed to function expressions) are parsed
1861 // on their own unwrapped line by continuing this loop. Function
1862 // expressions (functions that are not on their own line) must not create
1863 // a new unwrapped line, so they are special cased below.
1864 size_t TokenCount = Line->Tokens.size();
1865 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1866 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1867 Keywords.kw_async)))) {
1868 tryToParseJSFunction();
1869 break;
1871 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1872 FormatTok->is(Keywords.kw_interface)) {
1873 if (Style.isJavaScript()) {
1874 // In JavaScript/TypeScript, "interface" can be used as a standalone
1875 // identifier, e.g. in `var interface = 1;`. If "interface" is
1876 // followed by another identifier, it is very like to be an actual
1877 // interface declaration.
1878 unsigned StoredPosition = Tokens->getPosition();
1879 FormatToken *Next = Tokens->getNextToken();
1880 FormatTok = Tokens->setPosition(StoredPosition);
1881 if (!mustBeJSIdent(Keywords, Next)) {
1882 nextToken();
1883 break;
1886 parseRecord();
1887 addUnwrappedLine();
1888 return;
1891 if (Style.isVerilog()) {
1892 if (FormatTok->is(Keywords.kw_table)) {
1893 parseVerilogTable();
1894 return;
1896 if (Keywords.isVerilogBegin(*FormatTok) ||
1897 Keywords.isVerilogHierarchy(*FormatTok)) {
1898 parseBlock();
1899 addUnwrappedLine();
1900 return;
1904 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1905 if (parseStructLike())
1906 return;
1907 break;
1910 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1911 parseStatementMacro();
1912 return;
1915 // See if the following token should start a new unwrapped line.
1916 StringRef Text = FormatTok->TokenText;
1918 FormatToken *PreviousToken = FormatTok;
1919 nextToken();
1921 // JS doesn't have macros, and within classes colons indicate fields, not
1922 // labels.
1923 if (Style.isJavaScript())
1924 break;
1926 auto OneTokenSoFar = [&]() {
1927 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1928 while (I != E && I->Tok->is(tok::comment))
1929 ++I;
1930 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1931 ++I;
1932 return I != E && (++I == E);
1934 if (OneTokenSoFar()) {
1935 // In Verilog labels can be any expression, so we don't do them here.
1936 if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
1937 !Line->MustBeDeclaration) {
1938 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1939 FormatTok->setFinalizedType(TT_GotoLabelColon);
1940 parseLabel(!Style.IndentGotoLabels);
1941 if (HasLabel)
1942 *HasLabel = true;
1943 return;
1945 // Recognize function-like macro usages without trailing semicolon as
1946 // well as free-standing macros like Q_OBJECT.
1947 bool FunctionLike = FormatTok->is(tok::l_paren);
1948 if (FunctionLike)
1949 parseParens();
1951 bool FollowedByNewline =
1952 CommentsBeforeNextToken.empty()
1953 ? FormatTok->NewlinesBefore > 0
1954 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1956 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1957 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1958 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1959 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1960 addUnwrappedLine();
1961 return;
1964 break;
1966 case tok::equal:
1967 if ((Style.isJavaScript() || Style.isCSharp()) &&
1968 FormatTok->is(TT_FatArrow)) {
1969 tryToParseChildBlock();
1970 break;
1973 nextToken();
1974 if (FormatTok->is(tok::l_brace)) {
1975 // Block kind should probably be set to BK_BracedInit for any language.
1976 // C# needs this change to ensure that array initialisers and object
1977 // initialisers are indented the same way.
1978 if (Style.isCSharp())
1979 FormatTok->setBlockKind(BK_BracedInit);
1980 nextToken();
1981 parseBracedList();
1982 } else if (Style.Language == FormatStyle::LK_Proto &&
1983 FormatTok->is(tok::less)) {
1984 nextToken();
1985 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1986 /*ClosingBraceKind=*/tok::greater);
1988 break;
1989 case tok::l_square:
1990 parseSquare();
1991 break;
1992 case tok::kw_new:
1993 parseNew();
1994 break;
1995 case tok::kw_case:
1996 // Proto: there are no switch/case statements.
1997 if (Style.isProto()) {
1998 nextToken();
1999 return;
2001 // In Verilog switch is called case.
2002 if (Style.isVerilog()) {
2003 parseBlock();
2004 addUnwrappedLine();
2005 return;
2007 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2008 // 'case: string' field declaration.
2009 nextToken();
2010 break;
2012 parseCaseLabel();
2013 break;
2014 case tok::kw_default:
2015 nextToken();
2016 if (Style.isVerilog()) {
2017 if (FormatTok->is(tok::colon)) {
2018 // The label will be handled in the next iteration.
2019 break;
2021 if (FormatTok->is(Keywords.kw_clocking)) {
2022 // A default clocking block.
2023 parseBlock();
2024 addUnwrappedLine();
2025 return;
2027 parseVerilogCaseLabel();
2028 return;
2030 break;
2031 case tok::colon:
2032 nextToken();
2033 if (Style.isVerilog()) {
2034 parseVerilogCaseLabel();
2035 return;
2037 break;
2038 default:
2039 nextToken();
2040 break;
2042 } while (!eof());
2045 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2046 assert(FormatTok->is(tok::l_brace));
2047 if (!Style.isCSharp())
2048 return false;
2049 // See if it's a property accessor.
2050 if (FormatTok->Previous->isNot(tok::identifier))
2051 return false;
2053 // See if we are inside a property accessor.
2055 // Record the current tokenPosition so that we can advance and
2056 // reset the current token. `Next` is not set yet so we need
2057 // another way to advance along the token stream.
2058 unsigned int StoredPosition = Tokens->getPosition();
2059 FormatToken *Tok = Tokens->getNextToken();
2061 // A trivial property accessor is of the form:
2062 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2063 // Track these as they do not require line breaks to be introduced.
2064 bool HasSpecialAccessor = false;
2065 bool IsTrivialPropertyAccessor = true;
2066 while (!eof()) {
2067 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2068 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2069 Keywords.kw_init, Keywords.kw_set)) {
2070 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2071 HasSpecialAccessor = true;
2072 Tok = Tokens->getNextToken();
2073 continue;
2075 if (Tok->isNot(tok::r_brace))
2076 IsTrivialPropertyAccessor = false;
2077 break;
2080 if (!HasSpecialAccessor) {
2081 Tokens->setPosition(StoredPosition);
2082 return false;
2085 // Try to parse the property accessor:
2086 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2087 Tokens->setPosition(StoredPosition);
2088 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2089 addUnwrappedLine();
2090 nextToken();
2091 do {
2092 switch (FormatTok->Tok.getKind()) {
2093 case tok::r_brace:
2094 nextToken();
2095 if (FormatTok->is(tok::equal)) {
2096 while (!eof() && FormatTok->isNot(tok::semi))
2097 nextToken();
2098 nextToken();
2100 addUnwrappedLine();
2101 return true;
2102 case tok::l_brace:
2103 ++Line->Level;
2104 parseBlock(/*MustBeDeclaration=*/true);
2105 addUnwrappedLine();
2106 --Line->Level;
2107 break;
2108 case tok::equal:
2109 if (FormatTok->is(TT_FatArrow)) {
2110 ++Line->Level;
2111 do {
2112 nextToken();
2113 } while (!eof() && FormatTok->isNot(tok::semi));
2114 nextToken();
2115 addUnwrappedLine();
2116 --Line->Level;
2117 break;
2119 nextToken();
2120 break;
2121 default:
2122 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2123 Keywords.kw_set) &&
2124 !IsTrivialPropertyAccessor) {
2125 // Non-trivial get/set needs to be on its own line.
2126 addUnwrappedLine();
2128 nextToken();
2130 } while (!eof());
2132 // Unreachable for well-formed code (paired '{' and '}').
2133 return true;
2136 bool UnwrappedLineParser::tryToParseLambda() {
2137 assert(FormatTok->is(tok::l_square));
2138 if (!Style.isCpp()) {
2139 nextToken();
2140 return false;
2142 FormatToken &LSquare = *FormatTok;
2143 if (!tryToParseLambdaIntroducer())
2144 return false;
2146 bool SeenArrow = false;
2147 bool InTemplateParameterList = false;
2149 while (FormatTok->isNot(tok::l_brace)) {
2150 if (FormatTok->isSimpleTypeSpecifier()) {
2151 nextToken();
2152 continue;
2154 switch (FormatTok->Tok.getKind()) {
2155 case tok::l_brace:
2156 break;
2157 case tok::l_paren:
2158 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2159 break;
2160 case tok::l_square:
2161 parseSquare();
2162 break;
2163 case tok::less:
2164 assert(FormatTok->Previous);
2165 if (FormatTok->Previous->is(tok::r_square))
2166 InTemplateParameterList = true;
2167 nextToken();
2168 break;
2169 case tok::kw_auto:
2170 case tok::kw_class:
2171 case tok::kw_template:
2172 case tok::kw_typename:
2173 case tok::amp:
2174 case tok::star:
2175 case tok::kw_const:
2176 case tok::kw_constexpr:
2177 case tok::kw_consteval:
2178 case tok::comma:
2179 case tok::greater:
2180 case tok::identifier:
2181 case tok::numeric_constant:
2182 case tok::coloncolon:
2183 case tok::kw_mutable:
2184 case tok::kw_noexcept:
2185 case tok::kw_static:
2186 nextToken();
2187 break;
2188 // Specialization of a template with an integer parameter can contain
2189 // arithmetic, logical, comparison and ternary operators.
2191 // FIXME: This also accepts sequences of operators that are not in the scope
2192 // of a template argument list.
2194 // In a C++ lambda a template type can only occur after an arrow. We use
2195 // this as an heuristic to distinguish between Objective-C expressions
2196 // followed by an `a->b` expression, such as:
2197 // ([obj func:arg] + a->b)
2198 // Otherwise the code below would parse as a lambda.
2200 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2201 // explicit template lists: []<bool b = true && false>(U &&u){}
2202 case tok::plus:
2203 case tok::minus:
2204 case tok::exclaim:
2205 case tok::tilde:
2206 case tok::slash:
2207 case tok::percent:
2208 case tok::lessless:
2209 case tok::pipe:
2210 case tok::pipepipe:
2211 case tok::ampamp:
2212 case tok::caret:
2213 case tok::equalequal:
2214 case tok::exclaimequal:
2215 case tok::greaterequal:
2216 case tok::lessequal:
2217 case tok::question:
2218 case tok::colon:
2219 case tok::ellipsis:
2220 case tok::kw_true:
2221 case tok::kw_false:
2222 if (SeenArrow || InTemplateParameterList) {
2223 nextToken();
2224 break;
2226 return true;
2227 case tok::arrow:
2228 // This might or might not actually be a lambda arrow (this could be an
2229 // ObjC method invocation followed by a dereferencing arrow). We might
2230 // reset this back to TT_Unknown in TokenAnnotator.
2231 FormatTok->setFinalizedType(TT_LambdaArrow);
2232 SeenArrow = true;
2233 nextToken();
2234 break;
2235 case tok::kw_requires: {
2236 auto *RequiresToken = FormatTok;
2237 nextToken();
2238 parseRequiresClause(RequiresToken);
2239 break;
2241 default:
2242 return true;
2245 FormatTok->setFinalizedType(TT_LambdaLBrace);
2246 LSquare.setFinalizedType(TT_LambdaLSquare);
2247 parseChildBlock();
2248 return true;
2251 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2252 const FormatToken *Previous = FormatTok->Previous;
2253 const FormatToken *LeftSquare = FormatTok;
2254 nextToken();
2255 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2256 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2257 tok::kw_co_yield, tok::kw_co_return)) ||
2258 Previous->closesScope())) ||
2259 LeftSquare->isCppStructuredBinding(Style)) {
2260 return false;
2262 if (FormatTok->is(tok::l_square))
2263 return false;
2264 if (FormatTok->is(tok::r_square)) {
2265 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2266 if (Next->is(tok::greater))
2267 return false;
2269 parseSquare(/*LambdaIntroducer=*/true);
2270 return true;
2273 void UnwrappedLineParser::tryToParseJSFunction() {
2274 assert(FormatTok->is(Keywords.kw_function) ||
2275 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2276 if (FormatTok->is(Keywords.kw_async))
2277 nextToken();
2278 // Consume "function".
2279 nextToken();
2281 // Consume * (generator function). Treat it like C++'s overloaded operators.
2282 if (FormatTok->is(tok::star)) {
2283 FormatTok->setFinalizedType(TT_OverloadedOperator);
2284 nextToken();
2287 // Consume function name.
2288 if (FormatTok->is(tok::identifier))
2289 nextToken();
2291 if (FormatTok->isNot(tok::l_paren))
2292 return;
2294 // Parse formal parameter list.
2295 parseParens();
2297 if (FormatTok->is(tok::colon)) {
2298 // Parse a type definition.
2299 nextToken();
2301 // Eat the type declaration. For braced inline object types, balance braces,
2302 // otherwise just parse until finding an l_brace for the function body.
2303 if (FormatTok->is(tok::l_brace))
2304 tryToParseBracedList();
2305 else
2306 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2307 nextToken();
2310 if (FormatTok->is(tok::semi))
2311 return;
2313 parseChildBlock();
2316 bool UnwrappedLineParser::tryToParseBracedList() {
2317 if (FormatTok->is(BK_Unknown))
2318 calculateBraceTypes();
2319 assert(FormatTok->isNot(BK_Unknown));
2320 if (FormatTok->is(BK_Block))
2321 return false;
2322 nextToken();
2323 parseBracedList();
2324 return true;
2327 bool UnwrappedLineParser::tryToParseChildBlock() {
2328 assert(Style.isJavaScript() || Style.isCSharp());
2329 assert(FormatTok->is(TT_FatArrow));
2330 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2331 // They always start an expression or a child block if followed by a curly
2332 // brace.
2333 nextToken();
2334 if (FormatTok->isNot(tok::l_brace))
2335 return false;
2336 parseChildBlock();
2337 return true;
2340 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2341 bool IsEnum,
2342 tok::TokenKind ClosingBraceKind) {
2343 bool HasError = false;
2345 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2346 // replace this by using parseAssignmentExpression() inside.
2347 do {
2348 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2349 tryToParseChildBlock()) {
2350 continue;
2352 if (Style.isJavaScript()) {
2353 if (FormatTok->is(Keywords.kw_function) ||
2354 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2355 tryToParseJSFunction();
2356 continue;
2358 if (FormatTok->is(tok::l_brace)) {
2359 // Could be a method inside of a braced list `{a() { return 1; }}`.
2360 if (tryToParseBracedList())
2361 continue;
2362 parseChildBlock();
2365 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2366 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2367 addUnwrappedLine();
2368 nextToken();
2369 return !HasError;
2371 switch (FormatTok->Tok.getKind()) {
2372 case tok::l_square:
2373 if (Style.isCSharp())
2374 parseSquare();
2375 else
2376 tryToParseLambda();
2377 break;
2378 case tok::l_paren:
2379 parseParens();
2380 // JavaScript can just have free standing methods and getters/setters in
2381 // object literals. Detect them by a "{" following ")".
2382 if (Style.isJavaScript()) {
2383 if (FormatTok->is(tok::l_brace))
2384 parseChildBlock();
2385 break;
2387 break;
2388 case tok::l_brace:
2389 // Assume there are no blocks inside a braced init list apart
2390 // from the ones we explicitly parse out (like lambdas).
2391 FormatTok->setBlockKind(BK_BracedInit);
2392 nextToken();
2393 parseBracedList();
2394 break;
2395 case tok::less:
2396 if (Style.Language == FormatStyle::LK_Proto ||
2397 ClosingBraceKind == tok::greater) {
2398 nextToken();
2399 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2400 /*ClosingBraceKind=*/tok::greater);
2401 } else {
2402 nextToken();
2404 break;
2405 case tok::semi:
2406 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2407 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2408 // used for error recovery if we have otherwise determined that this is
2409 // a braced list.
2410 if (Style.isJavaScript()) {
2411 nextToken();
2412 break;
2414 HasError = true;
2415 if (!ContinueOnSemicolons)
2416 return !HasError;
2417 nextToken();
2418 break;
2419 case tok::comma:
2420 nextToken();
2421 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2422 addUnwrappedLine();
2423 break;
2424 default:
2425 nextToken();
2426 break;
2428 } while (!eof());
2429 return false;
2432 /// \brief Parses a pair of parentheses (and everything between them).
2433 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2434 /// double ampersands. This only counts for the current parens scope.
2435 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2436 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2437 nextToken();
2438 do {
2439 switch (FormatTok->Tok.getKind()) {
2440 case tok::l_paren:
2441 parseParens();
2442 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2443 parseChildBlock();
2444 break;
2445 case tok::r_paren:
2446 nextToken();
2447 return;
2448 case tok::r_brace:
2449 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2450 return;
2451 case tok::l_square:
2452 tryToParseLambda();
2453 break;
2454 case tok::l_brace:
2455 if (!tryToParseBracedList())
2456 parseChildBlock();
2457 break;
2458 case tok::at:
2459 nextToken();
2460 if (FormatTok->is(tok::l_brace)) {
2461 nextToken();
2462 parseBracedList();
2464 break;
2465 case tok::equal:
2466 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2467 tryToParseChildBlock();
2468 else
2469 nextToken();
2470 break;
2471 case tok::kw_class:
2472 if (Style.isJavaScript())
2473 parseRecord(/*ParseAsExpr=*/true);
2474 else
2475 nextToken();
2476 break;
2477 case tok::identifier:
2478 if (Style.isJavaScript() &&
2479 (FormatTok->is(Keywords.kw_function) ||
2480 FormatTok->startsSequence(Keywords.kw_async,
2481 Keywords.kw_function))) {
2482 tryToParseJSFunction();
2483 } else {
2484 nextToken();
2486 break;
2487 case tok::kw_requires: {
2488 auto RequiresToken = FormatTok;
2489 nextToken();
2490 parseRequiresExpression(RequiresToken);
2491 break;
2493 case tok::ampamp:
2494 if (AmpAmpTokenType != TT_Unknown)
2495 FormatTok->setFinalizedType(AmpAmpTokenType);
2496 [[fallthrough]];
2497 default:
2498 nextToken();
2499 break;
2501 } while (!eof());
2504 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2505 if (!LambdaIntroducer) {
2506 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2507 if (tryToParseLambda())
2508 return;
2510 do {
2511 switch (FormatTok->Tok.getKind()) {
2512 case tok::l_paren:
2513 parseParens();
2514 break;
2515 case tok::r_square:
2516 nextToken();
2517 return;
2518 case tok::r_brace:
2519 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2520 return;
2521 case tok::l_square:
2522 parseSquare();
2523 break;
2524 case tok::l_brace: {
2525 if (!tryToParseBracedList())
2526 parseChildBlock();
2527 break;
2529 case tok::at:
2530 nextToken();
2531 if (FormatTok->is(tok::l_brace)) {
2532 nextToken();
2533 parseBracedList();
2535 break;
2536 default:
2537 nextToken();
2538 break;
2540 } while (!eof());
2543 void UnwrappedLineParser::keepAncestorBraces() {
2544 if (!Style.RemoveBracesLLVM)
2545 return;
2547 const int MaxNestingLevels = 2;
2548 const int Size = NestedTooDeep.size();
2549 if (Size >= MaxNestingLevels)
2550 NestedTooDeep[Size - MaxNestingLevels] = true;
2551 NestedTooDeep.push_back(false);
2554 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2555 for (const auto &Token : llvm::reverse(Line.Tokens))
2556 if (Token.Tok->isNot(tok::comment))
2557 return Token.Tok;
2559 return nullptr;
2562 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2563 FormatToken *Tok = nullptr;
2565 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2566 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2567 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2568 ? getLastNonComment(*Line)
2569 : Line->Tokens.back().Tok;
2570 assert(Tok);
2571 if (Tok->BraceCount < 0) {
2572 assert(Tok->BraceCount == -1);
2573 Tok = nullptr;
2574 } else {
2575 Tok->BraceCount = -1;
2579 addUnwrappedLine();
2580 ++Line->Level;
2581 parseStructuralElement();
2583 if (Tok) {
2584 assert(!Line->InPPDirective);
2585 Tok = nullptr;
2586 for (const auto &L : llvm::reverse(*CurrentLines)) {
2587 if (!L.InPPDirective && getLastNonComment(L)) {
2588 Tok = L.Tokens.back().Tok;
2589 break;
2592 assert(Tok);
2593 ++Tok->BraceCount;
2596 if (CheckEOF && eof())
2597 addUnwrappedLine();
2599 --Line->Level;
2602 static void markOptionalBraces(FormatToken *LeftBrace) {
2603 if (!LeftBrace)
2604 return;
2606 assert(LeftBrace->is(tok::l_brace));
2608 FormatToken *RightBrace = LeftBrace->MatchingParen;
2609 if (!RightBrace) {
2610 assert(!LeftBrace->Optional);
2611 return;
2614 assert(RightBrace->is(tok::r_brace));
2615 assert(RightBrace->MatchingParen == LeftBrace);
2616 assert(LeftBrace->Optional == RightBrace->Optional);
2618 LeftBrace->Optional = true;
2619 RightBrace->Optional = true;
2622 void UnwrappedLineParser::handleAttributes() {
2623 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2624 if (FormatTok->is(TT_AttributeMacro))
2625 nextToken();
2626 if (FormatTok->is(tok::l_square))
2627 handleCppAttributes();
2630 bool UnwrappedLineParser::handleCppAttributes() {
2631 // Handle [[likely]] / [[unlikely]] attributes.
2632 assert(FormatTok->is(tok::l_square));
2633 if (!tryToParseSimpleAttribute())
2634 return false;
2635 parseSquare();
2636 return true;
2639 /// Returns whether \c Tok begins a block.
2640 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2641 // FIXME: rename the function or make
2642 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2643 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2644 : Tok.is(tok::l_brace);
2647 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2648 bool KeepBraces,
2649 bool IsVerilogAssert) {
2650 assert((FormatTok->is(tok::kw_if) ||
2651 (Style.isVerilog() &&
2652 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2653 Keywords.kw_assume, Keywords.kw_cover))) &&
2654 "'if' expected");
2655 nextToken();
2657 if (IsVerilogAssert) {
2658 // Handle `assert #0` and `assert final`.
2659 if (FormatTok->is(Keywords.kw_verilogHash)) {
2660 nextToken();
2661 if (FormatTok->is(tok::numeric_constant))
2662 nextToken();
2663 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2664 Keywords.kw_sequence)) {
2665 nextToken();
2669 // Handle `if !consteval`.
2670 if (FormatTok->is(tok::exclaim))
2671 nextToken();
2673 bool KeepIfBraces = true;
2674 if (FormatTok->is(tok::kw_consteval)) {
2675 nextToken();
2676 } else {
2677 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2678 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2679 nextToken();
2680 if (FormatTok->is(tok::l_paren)) {
2681 FormatTok->setFinalizedType(TT_ConditionLParen);
2682 parseParens();
2685 handleAttributes();
2686 // The then action is optional in Verilog assert statements.
2687 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2688 nextToken();
2689 addUnwrappedLine();
2690 return nullptr;
2693 bool NeedsUnwrappedLine = false;
2694 keepAncestorBraces();
2696 FormatToken *IfLeftBrace = nullptr;
2697 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2699 if (isBlockBegin(*FormatTok)) {
2700 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2701 IfLeftBrace = FormatTok;
2702 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2703 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2704 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2705 if (Style.BraceWrapping.BeforeElse)
2706 addUnwrappedLine();
2707 else
2708 NeedsUnwrappedLine = true;
2709 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2710 addUnwrappedLine();
2711 } else {
2712 parseUnbracedBody();
2715 if (Style.RemoveBracesLLVM) {
2716 assert(!NestedTooDeep.empty());
2717 KeepIfBraces = KeepIfBraces ||
2718 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2719 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2720 IfBlockKind == IfStmtKind::IfElseIf;
2723 bool KeepElseBraces = KeepIfBraces;
2724 FormatToken *ElseLeftBrace = nullptr;
2725 IfStmtKind Kind = IfStmtKind::IfOnly;
2727 if (FormatTok->is(tok::kw_else)) {
2728 if (Style.RemoveBracesLLVM) {
2729 NestedTooDeep.back() = false;
2730 Kind = IfStmtKind::IfElse;
2732 nextToken();
2733 handleAttributes();
2734 if (isBlockBegin(*FormatTok)) {
2735 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2736 FormatTok->setFinalizedType(TT_ElseLBrace);
2737 ElseLeftBrace = FormatTok;
2738 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2739 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2740 FormatToken *IfLBrace =
2741 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2742 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2743 if (FormatTok->is(tok::kw_else)) {
2744 KeepElseBraces = KeepElseBraces ||
2745 ElseBlockKind == IfStmtKind::IfOnly ||
2746 ElseBlockKind == IfStmtKind::IfElseIf;
2747 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2748 KeepElseBraces = true;
2749 assert(ElseLeftBrace->MatchingParen);
2750 markOptionalBraces(ElseLeftBrace);
2752 addUnwrappedLine();
2753 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2754 const FormatToken *Previous = Tokens->getPreviousToken();
2755 assert(Previous);
2756 const bool IsPrecededByComment = Previous->is(tok::comment);
2757 if (IsPrecededByComment) {
2758 addUnwrappedLine();
2759 ++Line->Level;
2761 bool TooDeep = true;
2762 if (Style.RemoveBracesLLVM) {
2763 Kind = IfStmtKind::IfElseIf;
2764 TooDeep = NestedTooDeep.pop_back_val();
2766 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2767 if (Style.RemoveBracesLLVM)
2768 NestedTooDeep.push_back(TooDeep);
2769 if (IsPrecededByComment)
2770 --Line->Level;
2771 } else {
2772 parseUnbracedBody(/*CheckEOF=*/true);
2774 } else {
2775 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2776 if (NeedsUnwrappedLine)
2777 addUnwrappedLine();
2780 if (!Style.RemoveBracesLLVM)
2781 return nullptr;
2783 assert(!NestedTooDeep.empty());
2784 KeepElseBraces = KeepElseBraces ||
2785 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2786 NestedTooDeep.back();
2788 NestedTooDeep.pop_back();
2790 if (!KeepIfBraces && !KeepElseBraces) {
2791 markOptionalBraces(IfLeftBrace);
2792 markOptionalBraces(ElseLeftBrace);
2793 } else if (IfLeftBrace) {
2794 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2795 if (IfRightBrace) {
2796 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2797 assert(!IfLeftBrace->Optional);
2798 assert(!IfRightBrace->Optional);
2799 IfLeftBrace->MatchingParen = nullptr;
2800 IfRightBrace->MatchingParen = nullptr;
2804 if (IfKind)
2805 *IfKind = Kind;
2807 return IfLeftBrace;
2810 void UnwrappedLineParser::parseTryCatch() {
2811 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2812 nextToken();
2813 bool NeedsUnwrappedLine = false;
2814 if (FormatTok->is(tok::colon)) {
2815 // We are in a function try block, what comes is an initializer list.
2816 nextToken();
2818 // In case identifiers were removed by clang-tidy, what might follow is
2819 // multiple commas in sequence - before the first identifier.
2820 while (FormatTok->is(tok::comma))
2821 nextToken();
2823 while (FormatTok->is(tok::identifier)) {
2824 nextToken();
2825 if (FormatTok->is(tok::l_paren))
2826 parseParens();
2827 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2828 FormatTok->is(tok::l_brace)) {
2829 do {
2830 nextToken();
2831 } while (!FormatTok->is(tok::r_brace));
2832 nextToken();
2835 // In case identifiers were removed by clang-tidy, what might follow is
2836 // multiple commas in sequence - after the first identifier.
2837 while (FormatTok->is(tok::comma))
2838 nextToken();
2841 // Parse try with resource.
2842 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2843 parseParens();
2845 keepAncestorBraces();
2847 if (FormatTok->is(tok::l_brace)) {
2848 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2849 parseBlock();
2850 if (Style.BraceWrapping.BeforeCatch)
2851 addUnwrappedLine();
2852 else
2853 NeedsUnwrappedLine = true;
2854 } else if (!FormatTok->is(tok::kw_catch)) {
2855 // The C++ standard requires a compound-statement after a try.
2856 // If there's none, we try to assume there's a structuralElement
2857 // and try to continue.
2858 addUnwrappedLine();
2859 ++Line->Level;
2860 parseStructuralElement();
2861 --Line->Level;
2863 while (true) {
2864 if (FormatTok->is(tok::at))
2865 nextToken();
2866 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2867 tok::kw___finally) ||
2868 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2869 FormatTok->is(Keywords.kw_finally)) ||
2870 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2871 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2872 break;
2874 nextToken();
2875 while (FormatTok->isNot(tok::l_brace)) {
2876 if (FormatTok->is(tok::l_paren)) {
2877 parseParens();
2878 continue;
2880 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2881 if (Style.RemoveBracesLLVM)
2882 NestedTooDeep.pop_back();
2883 return;
2885 nextToken();
2887 NeedsUnwrappedLine = false;
2888 Line->MustBeDeclaration = false;
2889 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2890 parseBlock();
2891 if (Style.BraceWrapping.BeforeCatch)
2892 addUnwrappedLine();
2893 else
2894 NeedsUnwrappedLine = true;
2897 if (Style.RemoveBracesLLVM)
2898 NestedTooDeep.pop_back();
2900 if (NeedsUnwrappedLine)
2901 addUnwrappedLine();
2904 void UnwrappedLineParser::parseNamespace() {
2905 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2906 "'namespace' expected");
2908 const FormatToken &InitialToken = *FormatTok;
2909 nextToken();
2910 if (InitialToken.is(TT_NamespaceMacro)) {
2911 parseParens();
2912 } else {
2913 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2914 tok::l_square, tok::period, tok::l_paren) ||
2915 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2916 if (FormatTok->is(tok::l_square))
2917 parseSquare();
2918 else if (FormatTok->is(tok::l_paren))
2919 parseParens();
2920 else
2921 nextToken();
2924 if (FormatTok->is(tok::l_brace)) {
2925 if (ShouldBreakBeforeBrace(Style, InitialToken))
2926 addUnwrappedLine();
2928 unsigned AddLevels =
2929 Style.NamespaceIndentation == FormatStyle::NI_All ||
2930 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2931 DeclarationScopeStack.size() > 1)
2932 ? 1u
2933 : 0u;
2934 bool ManageWhitesmithsBraces =
2935 AddLevels == 0u &&
2936 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2938 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2939 // the whole block.
2940 if (ManageWhitesmithsBraces)
2941 ++Line->Level;
2943 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2944 /*KeepBraces=*/true, /*IfKind=*/nullptr,
2945 ManageWhitesmithsBraces);
2947 // Munch the semicolon after a namespace. This is more common than one would
2948 // think. Putting the semicolon into its own line is very ugly.
2949 if (FormatTok->is(tok::semi))
2950 nextToken();
2952 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2954 if (ManageWhitesmithsBraces)
2955 --Line->Level;
2957 // FIXME: Add error handling.
2960 void UnwrappedLineParser::parseNew() {
2961 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2962 nextToken();
2964 if (Style.isCSharp()) {
2965 do {
2966 // Handle constructor invocation, e.g. `new(field: value)`.
2967 if (FormatTok->is(tok::l_paren))
2968 parseParens();
2970 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
2971 if (FormatTok->is(tok::l_brace))
2972 parseBracedList();
2974 if (FormatTok->isOneOf(tok::semi, tok::comma))
2975 return;
2977 nextToken();
2978 } while (!eof());
2981 if (Style.Language != FormatStyle::LK_Java)
2982 return;
2984 // In Java, we can parse everything up to the parens, which aren't optional.
2985 do {
2986 // There should not be a ;, { or } before the new's open paren.
2987 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2988 return;
2990 // Consume the parens.
2991 if (FormatTok->is(tok::l_paren)) {
2992 parseParens();
2994 // If there is a class body of an anonymous class, consume that as child.
2995 if (FormatTok->is(tok::l_brace))
2996 parseChildBlock();
2997 return;
2999 nextToken();
3000 } while (!eof());
3003 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3004 keepAncestorBraces();
3006 if (isBlockBegin(*FormatTok)) {
3007 if (!KeepBraces)
3008 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3009 FormatToken *LeftBrace = FormatTok;
3010 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3011 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3012 /*MunchSemi=*/true, KeepBraces);
3013 if (!KeepBraces) {
3014 assert(!NestedTooDeep.empty());
3015 if (!NestedTooDeep.back())
3016 markOptionalBraces(LeftBrace);
3018 if (WrapRightBrace)
3019 addUnwrappedLine();
3020 } else {
3021 parseUnbracedBody();
3024 if (!KeepBraces)
3025 NestedTooDeep.pop_back();
3028 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3029 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3030 (Style.isVerilog() &&
3031 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3032 Keywords.kw_always_ff, Keywords.kw_always_latch,
3033 Keywords.kw_final, Keywords.kw_initial,
3034 Keywords.kw_foreach, Keywords.kw_forever,
3035 Keywords.kw_repeat))) &&
3036 "'for', 'while' or foreach macro expected");
3037 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3038 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3040 nextToken();
3041 // JS' for await ( ...
3042 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3043 nextToken();
3044 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3045 nextToken();
3046 if (HasParens && FormatTok->is(tok::l_paren)) {
3047 // The type is only set for Verilog basically because we were afraid to
3048 // change the existing behavior for loops. See the discussion on D121756 for
3049 // details.
3050 if (Style.isVerilog())
3051 FormatTok->setFinalizedType(TT_ConditionLParen);
3052 parseParens();
3054 // Event control.
3055 if (Style.isVerilog())
3056 parseVerilogSensitivityList();
3058 handleAttributes();
3059 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3062 void UnwrappedLineParser::parseDoWhile() {
3063 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3064 nextToken();
3066 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3068 // FIXME: Add error handling.
3069 if (!FormatTok->is(tok::kw_while)) {
3070 addUnwrappedLine();
3071 return;
3074 // If in Whitesmiths mode, the line with the while() needs to be indented
3075 // to the same level as the block.
3076 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3077 ++Line->Level;
3079 nextToken();
3080 parseStructuralElement();
3083 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3084 nextToken();
3085 unsigned OldLineLevel = Line->Level;
3086 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3087 --Line->Level;
3088 if (LeftAlignLabel)
3089 Line->Level = 0;
3091 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3092 FormatTok->is(tok::l_brace)) {
3094 CompoundStatementIndenter Indenter(this, Line->Level,
3095 Style.BraceWrapping.AfterCaseLabel,
3096 Style.BraceWrapping.IndentBraces);
3097 parseBlock();
3098 if (FormatTok->is(tok::kw_break)) {
3099 if (Style.BraceWrapping.AfterControlStatement ==
3100 FormatStyle::BWACS_Always) {
3101 addUnwrappedLine();
3102 if (!Style.IndentCaseBlocks &&
3103 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3104 ++Line->Level;
3107 parseStructuralElement();
3109 addUnwrappedLine();
3110 } else {
3111 if (FormatTok->is(tok::semi))
3112 nextToken();
3113 addUnwrappedLine();
3115 Line->Level = OldLineLevel;
3116 if (FormatTok->isNot(tok::l_brace)) {
3117 parseStructuralElement();
3118 addUnwrappedLine();
3122 void UnwrappedLineParser::parseCaseLabel() {
3123 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3125 // FIXME: fix handling of complex expressions here.
3126 do {
3127 nextToken();
3128 if (FormatTok->is(tok::colon)) {
3129 FormatTok->setFinalizedType(TT_CaseLabelColon);
3130 break;
3132 } while (!eof());
3133 parseLabel();
3136 void UnwrappedLineParser::parseSwitch() {
3137 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3138 nextToken();
3139 if (FormatTok->is(tok::l_paren))
3140 parseParens();
3142 keepAncestorBraces();
3144 if (FormatTok->is(tok::l_brace)) {
3145 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3146 parseBlock();
3147 addUnwrappedLine();
3148 } else {
3149 addUnwrappedLine();
3150 ++Line->Level;
3151 parseStructuralElement();
3152 --Line->Level;
3155 if (Style.RemoveBracesLLVM)
3156 NestedTooDeep.pop_back();
3159 // Operators that can follow a C variable.
3160 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3161 switch (kind) {
3162 case tok::ampamp:
3163 case tok::ampequal:
3164 case tok::arrow:
3165 case tok::caret:
3166 case tok::caretequal:
3167 case tok::comma:
3168 case tok::ellipsis:
3169 case tok::equal:
3170 case tok::equalequal:
3171 case tok::exclaim:
3172 case tok::exclaimequal:
3173 case tok::greater:
3174 case tok::greaterequal:
3175 case tok::greatergreater:
3176 case tok::greatergreaterequal:
3177 case tok::l_paren:
3178 case tok::l_square:
3179 case tok::less:
3180 case tok::lessequal:
3181 case tok::lessless:
3182 case tok::lesslessequal:
3183 case tok::minus:
3184 case tok::minusequal:
3185 case tok::minusminus:
3186 case tok::percent:
3187 case tok::percentequal:
3188 case tok::period:
3189 case tok::pipe:
3190 case tok::pipeequal:
3191 case tok::pipepipe:
3192 case tok::plus:
3193 case tok::plusequal:
3194 case tok::plusplus:
3195 case tok::question:
3196 case tok::r_brace:
3197 case tok::r_paren:
3198 case tok::r_square:
3199 case tok::semi:
3200 case tok::slash:
3201 case tok::slashequal:
3202 case tok::star:
3203 case tok::starequal:
3204 return true;
3205 default:
3206 return false;
3210 void UnwrappedLineParser::parseAccessSpecifier() {
3211 FormatToken *AccessSpecifierCandidate = FormatTok;
3212 nextToken();
3213 // Understand Qt's slots.
3214 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3215 nextToken();
3216 // Otherwise, we don't know what it is, and we'd better keep the next token.
3217 if (FormatTok->is(tok::colon)) {
3218 nextToken();
3219 addUnwrappedLine();
3220 } else if (!FormatTok->is(tok::coloncolon) &&
3221 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3222 // Not a variable name nor namespace name.
3223 addUnwrappedLine();
3224 } else if (AccessSpecifierCandidate) {
3225 // Consider the access specifier to be a C identifier.
3226 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3230 /// \brief Parses a requires, decides if it is a clause or an expression.
3231 /// \pre The current token has to be the requires keyword.
3232 /// \returns true if it parsed a clause.
3233 bool clang::format::UnwrappedLineParser::parseRequires() {
3234 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3235 auto RequiresToken = FormatTok;
3237 // We try to guess if it is a requires clause, or a requires expression. For
3238 // that we first consume the keyword and check the next token.
3239 nextToken();
3241 switch (FormatTok->Tok.getKind()) {
3242 case tok::l_brace:
3243 // This can only be an expression, never a clause.
3244 parseRequiresExpression(RequiresToken);
3245 return false;
3246 case tok::l_paren:
3247 // Clauses and expression can start with a paren, it's unclear what we have.
3248 break;
3249 default:
3250 // All other tokens can only be a clause.
3251 parseRequiresClause(RequiresToken);
3252 return true;
3255 // Looking forward we would have to decide if there are function declaration
3256 // like arguments to the requires expression:
3257 // requires (T t) {
3258 // Or there is a constraint expression for the requires clause:
3259 // requires (C<T> && ...
3261 // But first let's look behind.
3262 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3264 if (!PreviousNonComment ||
3265 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3266 // If there is no token, or an expression left brace, we are a requires
3267 // clause within a requires expression.
3268 parseRequiresClause(RequiresToken);
3269 return true;
3272 switch (PreviousNonComment->Tok.getKind()) {
3273 case tok::greater:
3274 case tok::r_paren:
3275 case tok::kw_noexcept:
3276 case tok::kw_const:
3277 // This is a requires clause.
3278 parseRequiresClause(RequiresToken);
3279 return true;
3280 case tok::amp:
3281 case tok::ampamp: {
3282 // This can be either:
3283 // if (... && requires (T t) ...)
3284 // Or
3285 // void member(...) && requires (C<T> ...
3286 // We check the one token before that for a const:
3287 // void member(...) const && requires (C<T> ...
3288 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3289 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3290 parseRequiresClause(RequiresToken);
3291 return true;
3293 break;
3295 default:
3296 if (PreviousNonComment->isTypeOrIdentifier()) {
3297 // This is a requires clause.
3298 parseRequiresClause(RequiresToken);
3299 return true;
3301 // It's an expression.
3302 parseRequiresExpression(RequiresToken);
3303 return false;
3306 // Now we look forward and try to check if the paren content is a parameter
3307 // list. The parameters can be cv-qualified and contain references or
3308 // pointers.
3309 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3310 // of stuff: typename, const, *, &, &&, ::, identifiers.
3312 unsigned StoredPosition = Tokens->getPosition();
3313 FormatToken *NextToken = Tokens->getNextToken();
3314 int Lookahead = 0;
3315 auto PeekNext = [&Lookahead, &NextToken, this] {
3316 ++Lookahead;
3317 NextToken = Tokens->getNextToken();
3320 bool FoundType = false;
3321 bool LastWasColonColon = false;
3322 int OpenAngles = 0;
3324 for (; Lookahead < 50; PeekNext()) {
3325 switch (NextToken->Tok.getKind()) {
3326 case tok::kw_volatile:
3327 case tok::kw_const:
3328 case tok::comma:
3329 FormatTok = Tokens->setPosition(StoredPosition);
3330 parseRequiresExpression(RequiresToken);
3331 return false;
3332 case tok::r_paren:
3333 case tok::pipepipe:
3334 FormatTok = Tokens->setPosition(StoredPosition);
3335 parseRequiresClause(RequiresToken);
3336 return true;
3337 case tok::eof:
3338 // Break out of the loop.
3339 Lookahead = 50;
3340 break;
3341 case tok::coloncolon:
3342 LastWasColonColon = true;
3343 break;
3344 case tok::identifier:
3345 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3346 FormatTok = Tokens->setPosition(StoredPosition);
3347 parseRequiresExpression(RequiresToken);
3348 return false;
3350 FoundType = true;
3351 LastWasColonColon = false;
3352 break;
3353 case tok::less:
3354 ++OpenAngles;
3355 break;
3356 case tok::greater:
3357 --OpenAngles;
3358 break;
3359 default:
3360 if (NextToken->isSimpleTypeSpecifier()) {
3361 FormatTok = Tokens->setPosition(StoredPosition);
3362 parseRequiresExpression(RequiresToken);
3363 return false;
3365 break;
3368 // This seems to be a complicated expression, just assume it's a clause.
3369 FormatTok = Tokens->setPosition(StoredPosition);
3370 parseRequiresClause(RequiresToken);
3371 return true;
3374 /// \brief Parses a requires clause.
3375 /// \param RequiresToken The requires keyword token, which starts this clause.
3376 /// \pre We need to be on the next token after the requires keyword.
3377 /// \sa parseRequiresExpression
3379 /// Returns if it either has finished parsing the clause, or it detects, that
3380 /// the clause is incorrect.
3381 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3382 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3383 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3385 // If there is no previous token, we are within a requires expression,
3386 // otherwise we will always have the template or function declaration in front
3387 // of it.
3388 bool InRequiresExpression =
3389 !RequiresToken->Previous ||
3390 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3392 RequiresToken->setFinalizedType(InRequiresExpression
3393 ? TT_RequiresClauseInARequiresExpression
3394 : TT_RequiresClause);
3396 // NOTE: parseConstraintExpression is only ever called from this function.
3397 // It could be inlined into here.
3398 parseConstraintExpression();
3400 if (!InRequiresExpression)
3401 FormatTok->Previous->ClosesRequiresClause = true;
3404 /// \brief Parses a requires expression.
3405 /// \param RequiresToken The requires keyword token, which starts this clause.
3406 /// \pre We need to be on the next token after the requires keyword.
3407 /// \sa parseRequiresClause
3409 /// Returns if it either has finished parsing the expression, or it detects,
3410 /// that the expression is incorrect.
3411 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3412 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3413 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3415 RequiresToken->setFinalizedType(TT_RequiresExpression);
3417 if (FormatTok->is(tok::l_paren)) {
3418 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3419 parseParens();
3422 if (FormatTok->is(tok::l_brace)) {
3423 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3424 parseChildBlock(/*CanContainBracedList=*/false,
3425 /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3429 /// \brief Parses a constraint expression.
3431 /// This is the body of a requires clause. It returns, when the parsing is
3432 /// complete, or the expression is incorrect.
3433 void UnwrappedLineParser::parseConstraintExpression() {
3434 // The special handling for lambdas is needed since tryToParseLambda() eats a
3435 // token and if a requires expression is the last part of a requires clause
3436 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3437 // not set on the correct token. Thus we need to be aware if we even expect a
3438 // lambda to be possible.
3439 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3440 bool LambdaNextTimeAllowed = true;
3442 // Within lambda declarations, it is permitted to put a requires clause after
3443 // its template parameter list, which would place the requires clause right
3444 // before the parentheses of the parameters of the lambda declaration. Thus,
3445 // we track if we expect to see grouping parentheses at all.
3446 // Without this check, `requires foo<T> (T t)` in the below example would be
3447 // seen as the whole requires clause, accidentally eating the parameters of
3448 // the lambda.
3449 // [&]<typename T> requires foo<T> (T t) { ... };
3450 bool TopLevelParensAllowed = true;
3452 do {
3453 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3455 switch (FormatTok->Tok.getKind()) {
3456 case tok::kw_requires: {
3457 auto RequiresToken = FormatTok;
3458 nextToken();
3459 parseRequiresExpression(RequiresToken);
3460 break;
3463 case tok::l_paren:
3464 if (!TopLevelParensAllowed)
3465 return;
3466 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3467 TopLevelParensAllowed = false;
3468 break;
3470 case tok::l_square:
3471 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3472 return;
3473 break;
3475 case tok::kw_const:
3476 case tok::semi:
3477 case tok::kw_class:
3478 case tok::kw_struct:
3479 case tok::kw_union:
3480 return;
3482 case tok::l_brace:
3483 // Potential function body.
3484 return;
3486 case tok::ampamp:
3487 case tok::pipepipe:
3488 FormatTok->setFinalizedType(TT_BinaryOperator);
3489 nextToken();
3490 LambdaNextTimeAllowed = true;
3491 TopLevelParensAllowed = true;
3492 break;
3494 case tok::comma:
3495 case tok::comment:
3496 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3497 nextToken();
3498 break;
3500 case tok::kw_sizeof:
3501 case tok::greater:
3502 case tok::greaterequal:
3503 case tok::greatergreater:
3504 case tok::less:
3505 case tok::lessequal:
3506 case tok::lessless:
3507 case tok::equalequal:
3508 case tok::exclaim:
3509 case tok::exclaimequal:
3510 case tok::plus:
3511 case tok::minus:
3512 case tok::star:
3513 case tok::slash:
3514 LambdaNextTimeAllowed = true;
3515 TopLevelParensAllowed = true;
3516 // Just eat them.
3517 nextToken();
3518 break;
3520 case tok::numeric_constant:
3521 case tok::coloncolon:
3522 case tok::kw_true:
3523 case tok::kw_false:
3524 TopLevelParensAllowed = false;
3525 // Just eat them.
3526 nextToken();
3527 break;
3529 case tok::kw_static_cast:
3530 case tok::kw_const_cast:
3531 case tok::kw_reinterpret_cast:
3532 case tok::kw_dynamic_cast:
3533 nextToken();
3534 if (!FormatTok->is(tok::less))
3535 return;
3537 nextToken();
3538 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3539 /*ClosingBraceKind=*/tok::greater);
3540 break;
3542 default:
3543 if (!FormatTok->Tok.getIdentifierInfo()) {
3544 // Identifiers are part of the default case, we check for more then
3545 // tok::identifier to handle builtin type traits.
3546 return;
3549 // We need to differentiate identifiers for a template deduction guide,
3550 // variables, or function return types (the constraint expression has
3551 // ended before that), and basically all other cases. But it's easier to
3552 // check the other way around.
3553 assert(FormatTok->Previous);
3554 switch (FormatTok->Previous->Tok.getKind()) {
3555 case tok::coloncolon: // Nested identifier.
3556 case tok::ampamp: // Start of a function or variable for the
3557 case tok::pipepipe: // constraint expression. (binary)
3558 case tok::exclaim: // The same as above, but unary.
3559 case tok::kw_requires: // Initial identifier of a requires clause.
3560 case tok::equal: // Initial identifier of a concept declaration.
3561 break;
3562 default:
3563 return;
3566 // Read identifier with optional template declaration.
3567 nextToken();
3568 if (FormatTok->is(tok::less)) {
3569 nextToken();
3570 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3571 /*ClosingBraceKind=*/tok::greater);
3573 TopLevelParensAllowed = false;
3574 break;
3576 } while (!eof());
3579 bool UnwrappedLineParser::parseEnum() {
3580 const FormatToken &InitialToken = *FormatTok;
3582 // Won't be 'enum' for NS_ENUMs.
3583 if (FormatTok->is(tok::kw_enum))
3584 nextToken();
3586 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3587 // declarations. An "enum" keyword followed by a colon would be a syntax
3588 // error and thus assume it is just an identifier.
3589 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3590 return false;
3592 // In protobuf, "enum" can be used as a field name.
3593 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3594 return false;
3596 // Eat up enum class ...
3597 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3598 nextToken();
3600 while (FormatTok->Tok.getIdentifierInfo() ||
3601 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3602 tok::greater, tok::comma, tok::question,
3603 tok::l_square, tok::r_square)) {
3604 if (Style.isVerilog()) {
3605 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3606 nextToken();
3607 // In Verilog the base type can have dimensions.
3608 while (FormatTok->is(tok::l_square))
3609 parseSquare();
3610 } else {
3611 nextToken();
3613 // We can have macros or attributes in between 'enum' and the enum name.
3614 if (FormatTok->is(tok::l_paren))
3615 parseParens();
3616 if (FormatTok->is(TT_AttributeSquare)) {
3617 parseSquare();
3618 // Consume the closing TT_AttributeSquare.
3619 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3620 nextToken();
3622 if (FormatTok->is(tok::identifier)) {
3623 nextToken();
3624 // If there are two identifiers in a row, this is likely an elaborate
3625 // return type. In Java, this can be "implements", etc.
3626 if (Style.isCpp() && FormatTok->is(tok::identifier))
3627 return false;
3631 // Just a declaration or something is wrong.
3632 if (FormatTok->isNot(tok::l_brace))
3633 return true;
3634 FormatTok->setFinalizedType(TT_EnumLBrace);
3635 FormatTok->setBlockKind(BK_Block);
3637 if (Style.Language == FormatStyle::LK_Java) {
3638 // Java enums are different.
3639 parseJavaEnumBody();
3640 return true;
3642 if (Style.Language == FormatStyle::LK_Proto) {
3643 parseBlock(/*MustBeDeclaration=*/true);
3644 return true;
3647 if (!Style.AllowShortEnumsOnASingleLine &&
3648 ShouldBreakBeforeBrace(Style, InitialToken)) {
3649 addUnwrappedLine();
3651 // Parse enum body.
3652 nextToken();
3653 if (!Style.AllowShortEnumsOnASingleLine) {
3654 addUnwrappedLine();
3655 Line->Level += 1;
3657 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3658 /*IsEnum=*/true);
3659 if (!Style.AllowShortEnumsOnASingleLine)
3660 Line->Level -= 1;
3661 if (HasError) {
3662 if (FormatTok->is(tok::semi))
3663 nextToken();
3664 addUnwrappedLine();
3666 return true;
3668 // There is no addUnwrappedLine() here so that we fall through to parsing a
3669 // structural element afterwards. Thus, in "enum A {} n, m;",
3670 // "} n, m;" will end up in one unwrapped line.
3673 bool UnwrappedLineParser::parseStructLike() {
3674 // parseRecord falls through and does not yet add an unwrapped line as a
3675 // record declaration or definition can start a structural element.
3676 parseRecord();
3677 // This does not apply to Java, JavaScript and C#.
3678 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3679 Style.isCSharp()) {
3680 if (FormatTok->is(tok::semi))
3681 nextToken();
3682 addUnwrappedLine();
3683 return true;
3685 return false;
3688 namespace {
3689 // A class used to set and restore the Token position when peeking
3690 // ahead in the token source.
3691 class ScopedTokenPosition {
3692 unsigned StoredPosition;
3693 FormatTokenSource *Tokens;
3695 public:
3696 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3697 assert(Tokens && "Tokens expected to not be null");
3698 StoredPosition = Tokens->getPosition();
3701 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3703 } // namespace
3705 // Look to see if we have [[ by looking ahead, if
3706 // its not then rewind to the original position.
3707 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3708 ScopedTokenPosition AutoPosition(Tokens);
3709 FormatToken *Tok = Tokens->getNextToken();
3710 // We already read the first [ check for the second.
3711 if (!Tok->is(tok::l_square))
3712 return false;
3713 // Double check that the attribute is just something
3714 // fairly simple.
3715 while (Tok->isNot(tok::eof)) {
3716 if (Tok->is(tok::r_square))
3717 break;
3718 Tok = Tokens->getNextToken();
3720 if (Tok->is(tok::eof))
3721 return false;
3722 Tok = Tokens->getNextToken();
3723 if (!Tok->is(tok::r_square))
3724 return false;
3725 Tok = Tokens->getNextToken();
3726 if (Tok->is(tok::semi))
3727 return false;
3728 return true;
3731 void UnwrappedLineParser::parseJavaEnumBody() {
3732 assert(FormatTok->is(tok::l_brace));
3733 const FormatToken *OpeningBrace = FormatTok;
3735 // Determine whether the enum is simple, i.e. does not have a semicolon or
3736 // constants with class bodies. Simple enums can be formatted like braced
3737 // lists, contracted to a single line, etc.
3738 unsigned StoredPosition = Tokens->getPosition();
3739 bool IsSimple = true;
3740 FormatToken *Tok = Tokens->getNextToken();
3741 while (!Tok->is(tok::eof)) {
3742 if (Tok->is(tok::r_brace))
3743 break;
3744 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3745 IsSimple = false;
3746 break;
3748 // FIXME: This will also mark enums with braces in the arguments to enum
3749 // constants as "not simple". This is probably fine in practice, though.
3750 Tok = Tokens->getNextToken();
3752 FormatTok = Tokens->setPosition(StoredPosition);
3754 if (IsSimple) {
3755 nextToken();
3756 parseBracedList();
3757 addUnwrappedLine();
3758 return;
3761 // Parse the body of a more complex enum.
3762 // First add a line for everything up to the "{".
3763 nextToken();
3764 addUnwrappedLine();
3765 ++Line->Level;
3767 // Parse the enum constants.
3768 while (!eof()) {
3769 if (FormatTok->is(tok::l_brace)) {
3770 // Parse the constant's class body.
3771 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3772 /*MunchSemi=*/false);
3773 } else if (FormatTok->is(tok::l_paren)) {
3774 parseParens();
3775 } else if (FormatTok->is(tok::comma)) {
3776 nextToken();
3777 addUnwrappedLine();
3778 } else if (FormatTok->is(tok::semi)) {
3779 nextToken();
3780 addUnwrappedLine();
3781 break;
3782 } else if (FormatTok->is(tok::r_brace)) {
3783 addUnwrappedLine();
3784 break;
3785 } else {
3786 nextToken();
3790 // Parse the class body after the enum's ";" if any.
3791 parseLevel(OpeningBrace);
3792 nextToken();
3793 --Line->Level;
3794 addUnwrappedLine();
3797 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3798 const FormatToken &InitialToken = *FormatTok;
3799 nextToken();
3801 // The actual identifier can be a nested name specifier, and in macros
3802 // it is often token-pasted.
3803 // An [[attribute]] can be before the identifier.
3804 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3805 tok::kw___attribute, tok::kw___declspec,
3806 tok::kw_alignas, tok::l_square) ||
3807 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3808 FormatTok->isOneOf(tok::period, tok::comma))) {
3809 if (Style.isJavaScript() &&
3810 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3811 // JavaScript/TypeScript supports inline object types in
3812 // extends/implements positions:
3813 // class Foo implements {bar: number} { }
3814 nextToken();
3815 if (FormatTok->is(tok::l_brace)) {
3816 tryToParseBracedList();
3817 continue;
3820 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3821 continue;
3822 bool IsNonMacroIdentifier =
3823 FormatTok->is(tok::identifier) &&
3824 FormatTok->TokenText != FormatTok->TokenText.upper();
3825 nextToken();
3826 // We can have macros in between 'class' and the class name.
3827 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3828 parseParens();
3831 // Note that parsing away template declarations here leads to incorrectly
3832 // accepting function declarations as record declarations.
3833 // In general, we cannot solve this problem. Consider:
3834 // class A<int> B() {}
3835 // which can be a function definition or a class definition when B() is a
3836 // macro. If we find enough real-world cases where this is a problem, we
3837 // can parse for the 'template' keyword in the beginning of the statement,
3838 // and thus rule out the record production in case there is no template
3839 // (this would still leave us with an ambiguity between template function
3840 // and class declarations).
3841 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3842 do {
3843 if (FormatTok->is(tok::l_brace)) {
3844 calculateBraceTypes(/*ExpectClassBody=*/true);
3845 if (!tryToParseBracedList())
3846 break;
3848 if (FormatTok->is(tok::l_square)) {
3849 FormatToken *Previous = FormatTok->Previous;
3850 if (!Previous ||
3851 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3852 // Don't try parsing a lambda if we had a closing parenthesis before,
3853 // it was probably a pointer to an array: int (*)[].
3854 if (!tryToParseLambda())
3855 continue;
3856 } else {
3857 parseSquare();
3858 continue;
3861 if (FormatTok->is(tok::semi))
3862 return;
3863 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3864 addUnwrappedLine();
3865 nextToken();
3866 parseCSharpGenericTypeConstraint();
3867 break;
3869 nextToken();
3870 } while (!eof());
3873 auto GetBraceType = [](const FormatToken &RecordTok) {
3874 switch (RecordTok.Tok.getKind()) {
3875 case tok::kw_class:
3876 return TT_ClassLBrace;
3877 case tok::kw_struct:
3878 return TT_StructLBrace;
3879 case tok::kw_union:
3880 return TT_UnionLBrace;
3881 default:
3882 // Useful for e.g. interface.
3883 return TT_RecordLBrace;
3886 if (FormatTok->is(tok::l_brace)) {
3887 FormatTok->setFinalizedType(GetBraceType(InitialToken));
3888 if (ParseAsExpr) {
3889 parseChildBlock();
3890 } else {
3891 if (ShouldBreakBeforeBrace(Style, InitialToken))
3892 addUnwrappedLine();
3894 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3895 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3898 // There is no addUnwrappedLine() here so that we fall through to parsing a
3899 // structural element afterwards. Thus, in "class A {} n, m;",
3900 // "} n, m;" will end up in one unwrapped line.
3903 void UnwrappedLineParser::parseObjCMethod() {
3904 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3905 "'(' or identifier expected.");
3906 do {
3907 if (FormatTok->is(tok::semi)) {
3908 nextToken();
3909 addUnwrappedLine();
3910 return;
3911 } else if (FormatTok->is(tok::l_brace)) {
3912 if (Style.BraceWrapping.AfterFunction)
3913 addUnwrappedLine();
3914 parseBlock();
3915 addUnwrappedLine();
3916 return;
3917 } else {
3918 nextToken();
3920 } while (!eof());
3923 void UnwrappedLineParser::parseObjCProtocolList() {
3924 assert(FormatTok->is(tok::less) && "'<' expected.");
3925 do {
3926 nextToken();
3927 // Early exit in case someone forgot a close angle.
3928 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3929 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3930 return;
3932 } while (!eof() && FormatTok->isNot(tok::greater));
3933 nextToken(); // Skip '>'.
3936 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3937 do {
3938 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3939 nextToken();
3940 addUnwrappedLine();
3941 break;
3943 if (FormatTok->is(tok::l_brace)) {
3944 parseBlock();
3945 // In ObjC interfaces, nothing should be following the "}".
3946 addUnwrappedLine();
3947 } else if (FormatTok->is(tok::r_brace)) {
3948 // Ignore stray "}". parseStructuralElement doesn't consume them.
3949 nextToken();
3950 addUnwrappedLine();
3951 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3952 nextToken();
3953 parseObjCMethod();
3954 } else {
3955 parseStructuralElement();
3957 } while (!eof());
3960 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3961 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3962 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3963 nextToken();
3964 nextToken(); // interface name
3966 // @interface can be followed by a lightweight generic
3967 // specialization list, then either a base class or a category.
3968 if (FormatTok->is(tok::less))
3969 parseObjCLightweightGenerics();
3970 if (FormatTok->is(tok::colon)) {
3971 nextToken();
3972 nextToken(); // base class name
3973 // The base class can also have lightweight generics applied to it.
3974 if (FormatTok->is(tok::less))
3975 parseObjCLightweightGenerics();
3976 } else if (FormatTok->is(tok::l_paren)) {
3977 // Skip category, if present.
3978 parseParens();
3981 if (FormatTok->is(tok::less))
3982 parseObjCProtocolList();
3984 if (FormatTok->is(tok::l_brace)) {
3985 if (Style.BraceWrapping.AfterObjCDeclaration)
3986 addUnwrappedLine();
3987 parseBlock(/*MustBeDeclaration=*/true);
3990 // With instance variables, this puts '}' on its own line. Without instance
3991 // variables, this ends the @interface line.
3992 addUnwrappedLine();
3994 parseObjCUntilAtEnd();
3997 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3998 assert(FormatTok->is(tok::less));
3999 // Unlike protocol lists, generic parameterizations support
4000 // nested angles:
4002 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4003 // NSObject <NSCopying, NSSecureCoding>
4005 // so we need to count how many open angles we have left.
4006 unsigned NumOpenAngles = 1;
4007 do {
4008 nextToken();
4009 // Early exit in case someone forgot a close angle.
4010 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4011 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4012 break;
4014 if (FormatTok->is(tok::less)) {
4015 ++NumOpenAngles;
4016 } else if (FormatTok->is(tok::greater)) {
4017 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4018 --NumOpenAngles;
4020 } while (!eof() && NumOpenAngles != 0);
4021 nextToken(); // Skip '>'.
4024 // Returns true for the declaration/definition form of @protocol,
4025 // false for the expression form.
4026 bool UnwrappedLineParser::parseObjCProtocol() {
4027 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4028 nextToken();
4030 if (FormatTok->is(tok::l_paren)) {
4031 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4032 return false;
4035 // The definition/declaration form,
4036 // @protocol Foo
4037 // - (int)someMethod;
4038 // @end
4040 nextToken(); // protocol name
4042 if (FormatTok->is(tok::less))
4043 parseObjCProtocolList();
4045 // Check for protocol declaration.
4046 if (FormatTok->is(tok::semi)) {
4047 nextToken();
4048 addUnwrappedLine();
4049 return true;
4052 addUnwrappedLine();
4053 parseObjCUntilAtEnd();
4054 return true;
4057 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4058 bool IsImport = FormatTok->is(Keywords.kw_import);
4059 assert(IsImport || FormatTok->is(tok::kw_export));
4060 nextToken();
4062 // Consume the "default" in "export default class/function".
4063 if (FormatTok->is(tok::kw_default))
4064 nextToken();
4066 // Consume "async function", "function" and "default function", so that these
4067 // get parsed as free-standing JS functions, i.e. do not require a trailing
4068 // semicolon.
4069 if (FormatTok->is(Keywords.kw_async))
4070 nextToken();
4071 if (FormatTok->is(Keywords.kw_function)) {
4072 nextToken();
4073 return;
4076 // For imports, `export *`, `export {...}`, consume the rest of the line up
4077 // to the terminating `;`. For everything else, just return and continue
4078 // parsing the structural element, i.e. the declaration or expression for
4079 // `export default`.
4080 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4081 !FormatTok->isStringLiteral() &&
4082 !(FormatTok->is(Keywords.kw_type) &&
4083 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4084 return;
4087 while (!eof()) {
4088 if (FormatTok->is(tok::semi))
4089 return;
4090 if (Line->Tokens.empty()) {
4091 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4092 // import statement should terminate.
4093 return;
4095 if (FormatTok->is(tok::l_brace)) {
4096 FormatTok->setBlockKind(BK_Block);
4097 nextToken();
4098 parseBracedList();
4099 } else {
4100 nextToken();
4105 void UnwrappedLineParser::parseStatementMacro() {
4106 nextToken();
4107 if (FormatTok->is(tok::l_paren))
4108 parseParens();
4109 if (FormatTok->is(tok::semi))
4110 nextToken();
4111 addUnwrappedLine();
4114 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4115 // consume things like a::`b.c[d:e] or a::*
4116 while (true) {
4117 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4118 tok::coloncolon, tok::hash) ||
4119 Keywords.isVerilogIdentifier(*FormatTok)) {
4120 nextToken();
4121 } else if (FormatTok->is(tok::l_square)) {
4122 parseSquare();
4123 } else {
4124 break;
4129 void UnwrappedLineParser::parseVerilogSensitivityList() {
4130 if (!FormatTok->is(tok::at))
4131 return;
4132 nextToken();
4133 // A block event expression has 2 at signs.
4134 if (FormatTok->is(tok::at))
4135 nextToken();
4136 switch (FormatTok->Tok.getKind()) {
4137 case tok::star:
4138 nextToken();
4139 break;
4140 case tok::l_paren:
4141 parseParens();
4142 break;
4143 default:
4144 parseVerilogHierarchyIdentifier();
4145 break;
4149 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4150 unsigned AddLevels = 0;
4152 if (FormatTok->is(Keywords.kw_clocking)) {
4153 nextToken();
4154 if (Keywords.isVerilogIdentifier(*FormatTok))
4155 nextToken();
4156 parseVerilogSensitivityList();
4157 if (FormatTok->is(tok::semi))
4158 nextToken();
4159 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4160 Keywords.kw_casez, Keywords.kw_randcase,
4161 Keywords.kw_randsequence)) {
4162 if (Style.IndentCaseLabels)
4163 AddLevels++;
4164 nextToken();
4165 if (FormatTok->is(tok::l_paren)) {
4166 FormatTok->setFinalizedType(TT_ConditionLParen);
4167 parseParens();
4169 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4170 nextToken();
4171 // The case header has no semicolon.
4172 } else {
4173 // "module" etc.
4174 nextToken();
4175 // all the words like the name of the module and specifiers like
4176 // "automatic" and the width of function return type
4177 while (true) {
4178 if (FormatTok->is(tok::l_square)) {
4179 auto Prev = FormatTok->getPreviousNonComment();
4180 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4181 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4182 parseSquare();
4183 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4184 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4185 nextToken();
4186 } else {
4187 break;
4191 auto NewLine = [this]() {
4192 addUnwrappedLine();
4193 Line->IsContinuation = true;
4196 // package imports
4197 while (FormatTok->is(Keywords.kw_import)) {
4198 NewLine();
4199 nextToken();
4200 parseVerilogHierarchyIdentifier();
4201 if (FormatTok->is(tok::semi))
4202 nextToken();
4205 // parameters and ports
4206 if (FormatTok->is(Keywords.kw_verilogHash)) {
4207 NewLine();
4208 nextToken();
4209 if (FormatTok->is(tok::l_paren)) {
4210 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4211 parseParens();
4214 if (FormatTok->is(tok::l_paren)) {
4215 NewLine();
4216 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4217 parseParens();
4220 // extends and implements
4221 if (FormatTok->is(Keywords.kw_extends)) {
4222 NewLine();
4223 nextToken();
4224 parseVerilogHierarchyIdentifier();
4225 if (FormatTok->is(tok::l_paren))
4226 parseParens();
4228 if (FormatTok->is(Keywords.kw_implements)) {
4229 NewLine();
4230 do {
4231 nextToken();
4232 parseVerilogHierarchyIdentifier();
4233 } while (FormatTok->is(tok::comma));
4236 // Coverage event for cover groups.
4237 if (FormatTok->is(tok::at)) {
4238 NewLine();
4239 parseVerilogSensitivityList();
4242 if (FormatTok->is(tok::semi))
4243 nextToken(/*LevelDifference=*/1);
4244 addUnwrappedLine();
4247 return AddLevels;
4250 void UnwrappedLineParser::parseVerilogTable() {
4251 assert(FormatTok->is(Keywords.kw_table));
4252 nextToken(/*LevelDifference=*/1);
4253 addUnwrappedLine();
4255 auto InitialLevel = Line->Level++;
4256 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4257 FormatToken *Tok = FormatTok;
4258 nextToken();
4259 if (Tok->is(tok::semi))
4260 addUnwrappedLine();
4261 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4262 Tok->setFinalizedType(TT_VerilogTableItem);
4264 Line->Level = InitialLevel;
4265 nextToken(/*LevelDifference=*/-1);
4266 addUnwrappedLine();
4269 void UnwrappedLineParser::parseVerilogCaseLabel() {
4270 // The label will get unindented in AnnotatingParser. If there are no leading
4271 // spaces, indent the rest here so that things inside the block will be
4272 // indented relative to things outside. We don't use parseLabel because we
4273 // don't know whether this colon is a label or a ternary expression at this
4274 // point.
4275 auto OrigLevel = Line->Level;
4276 auto FirstLine = CurrentLines->size();
4277 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4278 ++Line->Level;
4279 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4280 --Line->Level;
4281 parseStructuralElement();
4282 // Restore the indentation in both the new line and the line that has the
4283 // label.
4284 if (CurrentLines->size() > FirstLine)
4285 (*CurrentLines)[FirstLine].Level = OrigLevel;
4286 Line->Level = OrigLevel;
4289 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4290 for (const auto &N : Line.Tokens) {
4291 if (N.Tok->MacroCtx)
4292 return true;
4293 for (const UnwrappedLine &Child : N.Children)
4294 if (containsExpansion(Child))
4295 return true;
4297 return false;
4300 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4301 if (Line->Tokens.empty())
4302 return;
4303 LLVM_DEBUG({
4304 if (!parsingPPDirective()) {
4305 llvm::dbgs() << "Adding unwrapped line:\n";
4306 printDebugInfo(*Line);
4310 // If this line closes a block when in Whitesmiths mode, remember that
4311 // information so that the level can be decreased after the line is added.
4312 // This has to happen after the addition of the line since the line itself
4313 // needs to be indented.
4314 bool ClosesWhitesmithsBlock =
4315 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4316 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4318 // If the current line was expanded from a macro call, we use it to
4319 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4320 // line and the unexpanded token stream.
4321 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4322 if (!Reconstruct)
4323 Reconstruct.emplace(Line->Level, Unexpanded);
4324 Reconstruct->addLine(*Line);
4326 // While the reconstructed unexpanded lines are stored in the normal
4327 // flow of lines, the expanded lines are stored on the side to be analyzed
4328 // in an extra step.
4329 CurrentExpandedLines.push_back(std::move(*Line));
4331 if (Reconstruct->finished()) {
4332 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4333 assert(!Reconstructed.Tokens.empty() &&
4334 "Reconstructed must at least contain the macro identifier.");
4335 assert(!parsingPPDirective());
4336 LLVM_DEBUG({
4337 llvm::dbgs() << "Adding unexpanded line:\n";
4338 printDebugInfo(Reconstructed);
4340 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4341 Lines.push_back(std::move(Reconstructed));
4342 CurrentExpandedLines.clear();
4343 Reconstruct.reset();
4345 } else {
4346 // At the top level we only get here when no unexpansion is going on, or
4347 // when conditional formatting led to unfinished macro reconstructions.
4348 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4349 CurrentLines->push_back(std::move(*Line));
4351 Line->Tokens.clear();
4352 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4353 Line->FirstStartColumn = 0;
4354 Line->IsContinuation = false;
4356 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4357 --Line->Level;
4358 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4359 CurrentLines->append(
4360 std::make_move_iterator(PreprocessorDirectives.begin()),
4361 std::make_move_iterator(PreprocessorDirectives.end()));
4362 PreprocessorDirectives.clear();
4364 // Disconnect the current token from the last token on the previous line.
4365 FormatTok->Previous = nullptr;
4368 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4370 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4371 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4372 FormatTok.NewlinesBefore > 0;
4375 // Checks if \p FormatTok is a line comment that continues the line comment
4376 // section on \p Line.
4377 static bool
4378 continuesLineCommentSection(const FormatToken &FormatTok,
4379 const UnwrappedLine &Line,
4380 const llvm::Regex &CommentPragmasRegex) {
4381 if (Line.Tokens.empty())
4382 return false;
4384 StringRef IndentContent = FormatTok.TokenText;
4385 if (FormatTok.TokenText.startswith("//") ||
4386 FormatTok.TokenText.startswith("/*")) {
4387 IndentContent = FormatTok.TokenText.substr(2);
4389 if (CommentPragmasRegex.match(IndentContent))
4390 return false;
4392 // If Line starts with a line comment, then FormatTok continues the comment
4393 // section if its original column is greater or equal to the original start
4394 // column of the line.
4396 // Define the min column token of a line as follows: if a line ends in '{' or
4397 // contains a '{' followed by a line comment, then the min column token is
4398 // that '{'. Otherwise, the min column token of the line is the first token of
4399 // the line.
4401 // If Line starts with a token other than a line comment, then FormatTok
4402 // continues the comment section if its original column is greater than the
4403 // original start column of the min column token of the line.
4405 // For example, the second line comment continues the first in these cases:
4407 // // first line
4408 // // second line
4410 // and:
4412 // // first line
4413 // // second line
4415 // and:
4417 // int i; // first line
4418 // // second line
4420 // and:
4422 // do { // first line
4423 // // second line
4424 // int i;
4425 // } while (true);
4427 // and:
4429 // enum {
4430 // a, // first line
4431 // // second line
4432 // b
4433 // };
4435 // The second line comment doesn't continue the first in these cases:
4437 // // first line
4438 // // second line
4440 // and:
4442 // int i; // first line
4443 // // second line
4445 // and:
4447 // do { // first line
4448 // // second line
4449 // int i;
4450 // } while (true);
4452 // and:
4454 // enum {
4455 // a, // first line
4456 // // second line
4457 // };
4458 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4460 // Scan for '{//'. If found, use the column of '{' as a min column for line
4461 // comment section continuation.
4462 const FormatToken *PreviousToken = nullptr;
4463 for (const UnwrappedLineNode &Node : Line.Tokens) {
4464 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4465 isLineComment(*Node.Tok)) {
4466 MinColumnToken = PreviousToken;
4467 break;
4469 PreviousToken = Node.Tok;
4471 // Grab the last newline preceding a token in this unwrapped line.
4472 if (Node.Tok->NewlinesBefore > 0)
4473 MinColumnToken = Node.Tok;
4475 if (PreviousToken && PreviousToken->is(tok::l_brace))
4476 MinColumnToken = PreviousToken;
4478 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4479 MinColumnToken);
4482 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4483 bool JustComments = Line->Tokens.empty();
4484 for (FormatToken *Tok : CommentsBeforeNextToken) {
4485 // Line comments that belong to the same line comment section are put on the
4486 // same line since later we might want to reflow content between them.
4487 // Additional fine-grained breaking of line comment sections is controlled
4488 // by the class BreakableLineCommentSection in case it is desirable to keep
4489 // several line comment sections in the same unwrapped line.
4491 // FIXME: Consider putting separate line comment sections as children to the
4492 // unwrapped line instead.
4493 Tok->ContinuesLineCommentSection =
4494 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4495 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4496 addUnwrappedLine();
4497 pushToken(Tok);
4499 if (NewlineBeforeNext && JustComments)
4500 addUnwrappedLine();
4501 CommentsBeforeNextToken.clear();
4504 void UnwrappedLineParser::nextToken(int LevelDifference) {
4505 if (eof())
4506 return;
4507 flushComments(isOnNewLine(*FormatTok));
4508 pushToken(FormatTok);
4509 FormatToken *Previous = FormatTok;
4510 if (!Style.isJavaScript())
4511 readToken(LevelDifference);
4512 else
4513 readTokenWithJavaScriptASI();
4514 FormatTok->Previous = Previous;
4515 if (Style.isVerilog()) {
4516 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4517 // keywords like `begin`, we can't treat them the same as left braces
4518 // because some contexts require one of them. For example structs use
4519 // braces and if blocks use keywords, and a left brace can occur in an if
4520 // statement, but it is not a block. For keywords like `end`, we simply
4521 // treat them the same as right braces.
4522 if (Keywords.isVerilogEnd(*FormatTok))
4523 FormatTok->Tok.setKind(tok::r_brace);
4527 void UnwrappedLineParser::distributeComments(
4528 const SmallVectorImpl<FormatToken *> &Comments,
4529 const FormatToken *NextTok) {
4530 // Whether or not a line comment token continues a line is controlled by
4531 // the method continuesLineCommentSection, with the following caveat:
4533 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4534 // that each comment line from the trail is aligned with the next token, if
4535 // the next token exists. If a trail exists, the beginning of the maximal
4536 // trail is marked as a start of a new comment section.
4538 // For example in this code:
4540 // int a; // line about a
4541 // // line 1 about b
4542 // // line 2 about b
4543 // int b;
4545 // the two lines about b form a maximal trail, so there are two sections, the
4546 // first one consisting of the single comment "// line about a" and the
4547 // second one consisting of the next two comments.
4548 if (Comments.empty())
4549 return;
4550 bool ShouldPushCommentsInCurrentLine = true;
4551 bool HasTrailAlignedWithNextToken = false;
4552 unsigned StartOfTrailAlignedWithNextToken = 0;
4553 if (NextTok) {
4554 // We are skipping the first element intentionally.
4555 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4556 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4557 HasTrailAlignedWithNextToken = true;
4558 StartOfTrailAlignedWithNextToken = i;
4562 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4563 FormatToken *FormatTok = Comments[i];
4564 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4565 FormatTok->ContinuesLineCommentSection = false;
4566 } else {
4567 FormatTok->ContinuesLineCommentSection =
4568 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4570 if (!FormatTok->ContinuesLineCommentSection &&
4571 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4572 ShouldPushCommentsInCurrentLine = false;
4574 if (ShouldPushCommentsInCurrentLine)
4575 pushToken(FormatTok);
4576 else
4577 CommentsBeforeNextToken.push_back(FormatTok);
4581 void UnwrappedLineParser::readToken(int LevelDifference) {
4582 SmallVector<FormatToken *, 1> Comments;
4583 bool PreviousWasComment = false;
4584 bool FirstNonCommentOnLine = false;
4585 do {
4586 FormatTok = Tokens->getNextToken();
4587 assert(FormatTok);
4588 while (FormatTok->getType() == TT_ConflictStart ||
4589 FormatTok->getType() == TT_ConflictEnd ||
4590 FormatTok->getType() == TT_ConflictAlternative) {
4591 if (FormatTok->getType() == TT_ConflictStart)
4592 conditionalCompilationStart(/*Unreachable=*/false);
4593 else if (FormatTok->getType() == TT_ConflictAlternative)
4594 conditionalCompilationAlternative();
4595 else if (FormatTok->getType() == TT_ConflictEnd)
4596 conditionalCompilationEnd();
4597 FormatTok = Tokens->getNextToken();
4598 FormatTok->MustBreakBefore = true;
4601 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4602 const FormatToken &Tok,
4603 bool PreviousWasComment) {
4604 auto IsFirstOnLine = [](const FormatToken &Tok) {
4605 return Tok.HasUnescapedNewline || Tok.IsFirst;
4608 // Consider preprocessor directives preceded by block comments as first
4609 // on line.
4610 if (PreviousWasComment)
4611 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4612 return IsFirstOnLine(Tok);
4615 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4616 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4617 PreviousWasComment = FormatTok->is(tok::comment);
4619 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4620 (!Style.isVerilog() ||
4621 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4622 FirstNonCommentOnLine) {
4623 distributeComments(Comments, FormatTok);
4624 Comments.clear();
4625 // If there is an unfinished unwrapped line, we flush the preprocessor
4626 // directives only after that unwrapped line was finished later.
4627 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4628 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4629 assert((LevelDifference >= 0 ||
4630 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4631 "LevelDifference makes Line->Level negative");
4632 Line->Level += LevelDifference;
4633 // Comments stored before the preprocessor directive need to be output
4634 // before the preprocessor directive, at the same level as the
4635 // preprocessor directive, as we consider them to apply to the directive.
4636 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4637 PPBranchLevel > 0) {
4638 Line->Level += PPBranchLevel;
4640 flushComments(isOnNewLine(*FormatTok));
4641 parsePPDirective();
4642 PreviousWasComment = FormatTok->is(tok::comment);
4643 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4644 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4647 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4648 !Line->InPPDirective) {
4649 continue;
4652 if (FormatTok->is(tok::identifier) &&
4653 Macros.defined(FormatTok->TokenText) &&
4654 // FIXME: Allow expanding macros in preprocessor directives.
4655 !Line->InPPDirective) {
4656 FormatToken *ID = FormatTok;
4657 unsigned Position = Tokens->getPosition();
4659 // To correctly parse the code, we need to replace the tokens of the macro
4660 // call with its expansion.
4661 auto PreCall = std::move(Line);
4662 Line.reset(new UnwrappedLine);
4663 bool OldInExpansion = InExpansion;
4664 InExpansion = true;
4665 // We parse the macro call into a new line.
4666 auto Args = parseMacroCall();
4667 InExpansion = OldInExpansion;
4668 assert(Line->Tokens.front().Tok == ID);
4669 // And remember the unexpanded macro call tokens.
4670 auto UnexpandedLine = std::move(Line);
4671 // Reset to the old line.
4672 Line = std::move(PreCall);
4674 LLVM_DEBUG({
4675 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4676 if (Args) {
4677 llvm::dbgs() << "(";
4678 for (const auto &Arg : Args.value())
4679 for (const auto &T : Arg)
4680 llvm::dbgs() << T->TokenText << " ";
4681 llvm::dbgs() << ")";
4683 llvm::dbgs() << "\n";
4685 if (Macros.objectLike(ID->TokenText) && Args &&
4686 !Macros.hasArity(ID->TokenText, Args->size())) {
4687 // The macro is either
4688 // - object-like, but we got argumnets, or
4689 // - overloaded to be both object-like and function-like, but none of
4690 // the function-like arities match the number of arguments.
4691 // Thus, expand as object-like macro.
4692 LLVM_DEBUG(llvm::dbgs()
4693 << "Macro \"" << ID->TokenText
4694 << "\" not overloaded for arity " << Args->size()
4695 << "or not function-like, using object-like overload.");
4696 Args.reset();
4697 UnexpandedLine->Tokens.resize(1);
4698 Tokens->setPosition(Position);
4699 nextToken();
4700 assert(!Args && Macros.objectLike(ID->TokenText));
4702 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4703 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4704 // Next, we insert the expanded tokens in the token stream at the
4705 // current position, and continue parsing.
4706 Unexpanded[ID] = std::move(UnexpandedLine);
4707 SmallVector<FormatToken *, 8> Expansion =
4708 Macros.expand(ID, std::move(Args));
4709 if (!Expansion.empty())
4710 FormatTok = Tokens->insertTokens(Expansion);
4712 LLVM_DEBUG({
4713 llvm::dbgs() << "Expanded: ";
4714 for (const auto &T : Expansion)
4715 llvm::dbgs() << T->TokenText << " ";
4716 llvm::dbgs() << "\n";
4718 } else {
4719 LLVM_DEBUG({
4720 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4721 << "\", because it was used ";
4722 if (Args)
4723 llvm::dbgs() << "with " << Args->size();
4724 else
4725 llvm::dbgs() << "without";
4726 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4728 Tokens->setPosition(Position);
4729 FormatTok = ID;
4733 if (!FormatTok->is(tok::comment)) {
4734 distributeComments(Comments, FormatTok);
4735 Comments.clear();
4736 return;
4739 Comments.push_back(FormatTok);
4740 } while (!eof());
4742 distributeComments(Comments, nullptr);
4743 Comments.clear();
4746 namespace {
4747 template <typename Iterator>
4748 void pushTokens(Iterator Begin, Iterator End,
4749 llvm::SmallVectorImpl<FormatToken *> &Into) {
4750 for (auto I = Begin; I != End; ++I) {
4751 Into.push_back(I->Tok);
4752 for (const auto &Child : I->Children)
4753 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4756 } // namespace
4758 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4759 UnwrappedLineParser::parseMacroCall() {
4760 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4761 assert(Line->Tokens.empty());
4762 nextToken();
4763 if (!FormatTok->is(tok::l_paren))
4764 return Args;
4765 unsigned Position = Tokens->getPosition();
4766 FormatToken *Tok = FormatTok;
4767 nextToken();
4768 Args.emplace();
4769 auto ArgStart = std::prev(Line->Tokens.end());
4771 int Parens = 0;
4772 do {
4773 switch (FormatTok->Tok.getKind()) {
4774 case tok::l_paren:
4775 ++Parens;
4776 nextToken();
4777 break;
4778 case tok::r_paren: {
4779 if (Parens > 0) {
4780 --Parens;
4781 nextToken();
4782 break;
4784 Args->push_back({});
4785 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4786 nextToken();
4787 return Args;
4789 case tok::comma: {
4790 if (Parens > 0) {
4791 nextToken();
4792 break;
4794 Args->push_back({});
4795 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4796 nextToken();
4797 ArgStart = std::prev(Line->Tokens.end());
4798 break;
4800 default:
4801 nextToken();
4802 break;
4804 } while (!eof());
4805 Line->Tokens.resize(1);
4806 Tokens->setPosition(Position);
4807 FormatTok = Tok;
4808 return {};
4811 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4812 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4813 if (MustBreakBeforeNextToken) {
4814 Line->Tokens.back().Tok->MustBreakBefore = true;
4815 MustBreakBeforeNextToken = false;
4819 } // end namespace format
4820 } // end namespace clang