[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / Format / TokenAnnotator.cpp
blob997fe92cabac5f1398b82802afeb40241e7e6e4e
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
22 #define DEBUG_TYPE "format-token-annotator"
24 namespace clang {
25 namespace format {
27 namespace {
29 /// Returns \c true if the line starts with a token that can start a statement
30 /// with an initializer.
31 static bool startsWithInitStatement(const AnnotatedLine &Line) {
32 return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
33 Line.startsWith(tok::kw_switch);
36 /// Returns \c true if the token can be used as an identifier in
37 /// an Objective-C \c \@selector, \c false otherwise.
38 ///
39 /// Because getFormattingLangOpts() always lexes source code as
40 /// Objective-C++, C++ keywords like \c new and \c delete are
41 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
42 ///
43 /// For Objective-C and Objective-C++, both identifiers and keywords
44 /// are valid inside @selector(...) (or a macro which
45 /// invokes @selector(...)). So, we allow treat any identifier or
46 /// keyword as a potential Objective-C selector component.
47 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
48 return Tok.Tok.getIdentifierInfo();
51 /// With `Left` being '(', check if we're at either `[...](` or
52 /// `[...]<...>(`, where the [ opens a lambda capture list.
53 static bool isLambdaParameterList(const FormatToken *Left) {
54 // Skip <...> if present.
55 if (Left->Previous && Left->Previous->is(tok::greater) &&
56 Left->Previous->MatchingParen &&
57 Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
58 Left = Left->Previous->MatchingParen;
61 // Check for `[...]`.
62 return Left->Previous && Left->Previous->is(tok::r_square) &&
63 Left->Previous->MatchingParen &&
64 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
67 /// Returns \c true if the token is followed by a boolean condition, \c false
68 /// otherwise.
69 static bool isKeywordWithCondition(const FormatToken &Tok) {
70 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
71 tok::kw_constexpr, tok::kw_catch);
74 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
75 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
76 if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
77 return false;
78 // The first square bracket is part of an ObjC array literal
79 if (Tok.Previous && Tok.Previous->is(tok::at))
80 return false;
81 const FormatToken *AttrTok = Tok.Next->Next;
82 if (!AttrTok)
83 return false;
84 // C++17 '[[using ns: foo, bar(baz, blech)]]'
85 // We assume nobody will name an ObjC variable 'using'.
86 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
87 return true;
88 if (AttrTok->isNot(tok::identifier))
89 return false;
90 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
91 // ObjC message send. We assume nobody will use : in a C++11 attribute
92 // specifier parameter, although this is technically valid:
93 // [[foo(:)]].
94 if (AttrTok->is(tok::colon) ||
95 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
96 AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
97 return false;
99 if (AttrTok->is(tok::ellipsis))
100 return true;
101 AttrTok = AttrTok->Next;
103 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
106 /// A parser that gathers additional information about tokens.
108 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
109 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
110 /// into template parameter lists.
111 class AnnotatingParser {
112 public:
113 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
114 const AdditionalKeywords &Keywords,
115 SmallVector<ScopeType> &Scopes)
116 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
117 Keywords(Keywords), Scopes(Scopes) {
118 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
119 resetTokenMetadata();
122 private:
123 ScopeType getScopeType(const FormatToken &Token) const {
124 switch (Token.getType()) {
125 case TT_FunctionLBrace:
126 case TT_LambdaLBrace:
127 return ST_Function;
128 case TT_ClassLBrace:
129 case TT_StructLBrace:
130 case TT_UnionLBrace:
131 return ST_Class;
132 default:
133 return ST_Other;
137 bool parseAngle() {
138 if (!CurrentToken || !CurrentToken->Previous)
139 return false;
140 if (NonTemplateLess.count(CurrentToken->Previous))
141 return false;
143 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
144 if (Previous.Previous) {
145 if (Previous.Previous->Tok.isLiteral())
146 return false;
147 if (Previous.Previous->is(tok::r_brace))
148 return false;
149 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
150 (!Previous.Previous->MatchingParen ||
151 !Previous.Previous->MatchingParen->is(
152 TT_OverloadedOperatorLParen))) {
153 return false;
157 FormatToken *Left = CurrentToken->Previous;
158 Left->ParentBracket = Contexts.back().ContextKind;
159 ScopedContextCreator ContextCreator(*this, tok::less, 12);
161 // If this angle is in the context of an expression, we need to be more
162 // hesitant to detect it as opening template parameters.
163 bool InExprContext = Contexts.back().IsExpression;
165 Contexts.back().IsExpression = false;
166 // If there's a template keyword before the opening angle bracket, this is a
167 // template parameter, not an argument.
168 if (Left->Previous && Left->Previous->isNot(tok::kw_template))
169 Contexts.back().ContextType = Context::TemplateArgument;
171 if (Style.Language == FormatStyle::LK_Java &&
172 CurrentToken->is(tok::question)) {
173 next();
176 while (CurrentToken) {
177 if (CurrentToken->is(tok::greater)) {
178 // Try to do a better job at looking for ">>" within the condition of
179 // a statement. Conservatively insert spaces between consecutive ">"
180 // tokens to prevent splitting right bitshift operators and potentially
181 // altering program semantics. This check is overly conservative and
182 // will prevent spaces from being inserted in select nested template
183 // parameter cases, but should not alter program semantics.
184 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
185 Left->ParentBracket != tok::less &&
186 CurrentToken->getStartOfNonWhitespace() ==
187 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
188 -1)) {
189 return false;
191 Left->MatchingParen = CurrentToken;
192 CurrentToken->MatchingParen = Left;
193 // In TT_Proto, we must distignuish between:
194 // map<key, value>
195 // msg < item: data >
196 // msg: < item: data >
197 // In TT_TextProto, map<key, value> does not occur.
198 if (Style.Language == FormatStyle::LK_TextProto ||
199 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
200 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
201 CurrentToken->setType(TT_DictLiteral);
202 } else {
203 CurrentToken->setType(TT_TemplateCloser);
204 CurrentToken->Tok.setLength(1);
206 if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
207 return false;
208 next();
209 return true;
211 if (CurrentToken->is(tok::question) &&
212 Style.Language == FormatStyle::LK_Java) {
213 next();
214 continue;
216 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
217 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
218 !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto &&
219 Style.Language != FormatStyle::LK_TextProto)) {
220 return false;
222 // If a && or || is found and interpreted as a binary operator, this set
223 // of angles is likely part of something like "a < b && c > d". If the
224 // angles are inside an expression, the ||/&& might also be a binary
225 // operator that was misinterpreted because we are parsing template
226 // parameters.
227 // FIXME: This is getting out of hand, write a decent parser.
228 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
229 CurrentToken->Previous->is(TT_BinaryOperator) &&
230 Contexts[Contexts.size() - 2].IsExpression &&
231 !Line.startsWith(tok::kw_template)) {
232 return false;
234 updateParameterCount(Left, CurrentToken);
235 if (Style.Language == FormatStyle::LK_Proto) {
236 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
237 if (CurrentToken->is(tok::colon) ||
238 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
239 Previous->isNot(tok::colon))) {
240 Previous->setType(TT_SelectorName);
244 if (!consumeToken())
245 return false;
247 return false;
250 bool parseUntouchableParens() {
251 while (CurrentToken) {
252 CurrentToken->Finalized = true;
253 switch (CurrentToken->Tok.getKind()) {
254 case tok::l_paren:
255 next();
256 if (!parseUntouchableParens())
257 return false;
258 continue;
259 case tok::r_paren:
260 next();
261 return true;
262 default:
263 // no-op
264 break;
266 next();
268 return false;
271 bool parseParens(bool LookForDecls = false) {
272 if (!CurrentToken)
273 return false;
274 assert(CurrentToken->Previous && "Unknown previous token");
275 FormatToken &OpeningParen = *CurrentToken->Previous;
276 assert(OpeningParen.is(tok::l_paren));
277 FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
278 OpeningParen.ParentBracket = Contexts.back().ContextKind;
279 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
281 // FIXME: This is a bit of a hack. Do better.
282 Contexts.back().ColonIsForRangeExpr =
283 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
285 if (OpeningParen.Previous &&
286 OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
287 OpeningParen.Finalized = true;
288 return parseUntouchableParens();
291 bool StartsObjCMethodExpr = false;
292 if (!Style.isVerilog()) {
293 if (FormatToken *MaybeSel = OpeningParen.Previous) {
294 // @selector( starts a selector.
295 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
296 MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
297 StartsObjCMethodExpr = true;
302 if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
303 // Find the previous kw_operator token.
304 FormatToken *Prev = &OpeningParen;
305 while (!Prev->is(tok::kw_operator)) {
306 Prev = Prev->Previous;
307 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
310 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
311 // i.e. the operator is called as a member function,
312 // then the argument must be an expression.
313 bool OperatorCalledAsMemberFunction =
314 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
315 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
316 } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
317 Contexts.back().IsExpression = true;
318 Contexts.back().ContextType = Context::VerilogInstancePortList;
319 } else if (Style.isJavaScript() &&
320 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
321 Line.startsWith(tok::kw_export, Keywords.kw_type,
322 tok::identifier))) {
323 // type X = (...);
324 // export type X = (...);
325 Contexts.back().IsExpression = false;
326 } else if (OpeningParen.Previous &&
327 (OpeningParen.Previous->isOneOf(
328 tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
329 tok::kw_while, tok::l_paren, tok::comma,
330 TT_BinaryOperator) ||
331 OpeningParen.Previous->isIf())) {
332 // static_assert, if and while usually contain expressions.
333 Contexts.back().IsExpression = true;
334 } else if (Style.isJavaScript() && OpeningParen.Previous &&
335 (OpeningParen.Previous->is(Keywords.kw_function) ||
336 (OpeningParen.Previous->endsSequence(tok::identifier,
337 Keywords.kw_function)))) {
338 // function(...) or function f(...)
339 Contexts.back().IsExpression = false;
340 } else if (Style.isJavaScript() && OpeningParen.Previous &&
341 OpeningParen.Previous->is(TT_JsTypeColon)) {
342 // let x: (SomeType);
343 Contexts.back().IsExpression = false;
344 } else if (isLambdaParameterList(&OpeningParen)) {
345 // This is a parameter list of a lambda expression.
346 Contexts.back().IsExpression = false;
347 } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
348 Contexts.back().IsExpression = false;
349 } else if (OpeningParen.Previous &&
350 OpeningParen.Previous->is(tok::kw__Generic)) {
351 Contexts.back().ContextType = Context::C11GenericSelection;
352 Contexts.back().IsExpression = true;
353 } else if (Line.InPPDirective &&
354 (!OpeningParen.Previous ||
355 !OpeningParen.Previous->is(tok::identifier))) {
356 Contexts.back().IsExpression = true;
357 } else if (Contexts[Contexts.size() - 2].CaretFound) {
358 // This is the parameter list of an ObjC block.
359 Contexts.back().IsExpression = false;
360 } else if (OpeningParen.Previous &&
361 OpeningParen.Previous->is(TT_ForEachMacro)) {
362 // The first argument to a foreach macro is a declaration.
363 Contexts.back().ContextType = Context::ForEachMacro;
364 Contexts.back().IsExpression = false;
365 } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
366 OpeningParen.Previous->MatchingParen->isOneOf(
367 TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
368 Contexts.back().IsExpression = false;
369 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
370 bool IsForOrCatch =
371 OpeningParen.Previous &&
372 OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
373 Contexts.back().IsExpression = !IsForOrCatch;
376 // Infer the role of the l_paren based on the previous token if we haven't
377 // detected one yet.
378 if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
379 if (PrevNonComment->is(tok::kw___attribute)) {
380 OpeningParen.setType(TT_AttributeParen);
381 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
382 tok::kw_typeof,
383 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
384 #include "clang/Basic/TransformTypeTraits.def"
385 tok::kw__Atomic)) {
386 OpeningParen.setType(TT_TypeDeclarationParen);
387 // decltype() and typeof() usually contain expressions.
388 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
389 Contexts.back().IsExpression = true;
393 if (StartsObjCMethodExpr) {
394 Contexts.back().ColonIsObjCMethodExpr = true;
395 OpeningParen.setType(TT_ObjCMethodExpr);
398 // MightBeFunctionType and ProbablyFunctionType are used for
399 // function pointer and reference types as well as Objective-C
400 // block types:
402 // void (*FunctionPointer)(void);
403 // void (&FunctionReference)(void);
404 // void (&&FunctionReference)(void);
405 // void (^ObjCBlock)(void);
406 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
407 bool ProbablyFunctionType =
408 CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret);
409 bool HasMultipleLines = false;
410 bool HasMultipleParametersOnALine = false;
411 bool MightBeObjCForRangeLoop =
412 OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
413 FormatToken *PossibleObjCForInToken = nullptr;
414 while (CurrentToken) {
415 // LookForDecls is set when "if (" has been seen. Check for
416 // 'identifier' '*' 'identifier' followed by not '=' -- this
417 // '*' has to be a binary operator but determineStarAmpUsage() will
418 // categorize it as an unary operator, so set the right type here.
419 if (LookForDecls && CurrentToken->Next) {
420 FormatToken *Prev = CurrentToken->getPreviousNonComment();
421 if (Prev) {
422 FormatToken *PrevPrev = Prev->getPreviousNonComment();
423 FormatToken *Next = CurrentToken->Next;
424 if (PrevPrev && PrevPrev->is(tok::identifier) &&
425 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
426 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
427 Prev->setType(TT_BinaryOperator);
428 LookForDecls = false;
433 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
434 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
435 tok::coloncolon)) {
436 ProbablyFunctionType = true;
438 if (CurrentToken->is(tok::comma))
439 MightBeFunctionType = false;
440 if (CurrentToken->Previous->is(TT_BinaryOperator))
441 Contexts.back().IsExpression = true;
442 if (CurrentToken->is(tok::r_paren)) {
443 if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
444 ProbablyFunctionType && CurrentToken->Next &&
445 (CurrentToken->Next->is(tok::l_paren) ||
446 (CurrentToken->Next->is(tok::l_square) &&
447 Line.MustBeDeclaration))) {
448 OpeningParen.setType(OpeningParen.Next->is(tok::caret)
449 ? TT_ObjCBlockLParen
450 : TT_FunctionTypeLParen);
452 OpeningParen.MatchingParen = CurrentToken;
453 CurrentToken->MatchingParen = &OpeningParen;
455 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
456 OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
457 // Detect the case where macros are used to generate lambdas or
458 // function bodies, e.g.:
459 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
460 for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
461 Tok = Tok->Next) {
462 if (Tok->is(TT_BinaryOperator) &&
463 Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) {
464 Tok->setType(TT_PointerOrReference);
469 if (StartsObjCMethodExpr) {
470 CurrentToken->setType(TT_ObjCMethodExpr);
471 if (Contexts.back().FirstObjCSelectorName) {
472 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
473 Contexts.back().LongestObjCSelectorName;
477 if (OpeningParen.is(TT_AttributeParen))
478 CurrentToken->setType(TT_AttributeParen);
479 if (OpeningParen.is(TT_TypeDeclarationParen))
480 CurrentToken->setType(TT_TypeDeclarationParen);
481 if (OpeningParen.Previous &&
482 OpeningParen.Previous->is(TT_JavaAnnotation)) {
483 CurrentToken->setType(TT_JavaAnnotation);
485 if (OpeningParen.Previous &&
486 OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
487 CurrentToken->setType(TT_LeadingJavaAnnotation);
489 if (OpeningParen.Previous &&
490 OpeningParen.Previous->is(TT_AttributeSquare)) {
491 CurrentToken->setType(TT_AttributeSquare);
494 if (!HasMultipleLines)
495 OpeningParen.setPackingKind(PPK_Inconclusive);
496 else if (HasMultipleParametersOnALine)
497 OpeningParen.setPackingKind(PPK_BinPacked);
498 else
499 OpeningParen.setPackingKind(PPK_OnePerLine);
501 next();
502 return true;
504 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
505 return false;
507 if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
508 OpeningParen.setType(TT_Unknown);
509 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
510 !CurrentToken->Next->HasUnescapedNewline &&
511 !CurrentToken->Next->isTrailingComment()) {
512 HasMultipleParametersOnALine = true;
514 bool ProbablyFunctionTypeLParen =
515 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
516 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
517 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
518 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
519 !(CurrentToken->is(tok::l_brace) ||
520 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
521 Contexts.back().IsExpression = false;
523 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
524 MightBeObjCForRangeLoop = false;
525 if (PossibleObjCForInToken) {
526 PossibleObjCForInToken->setType(TT_Unknown);
527 PossibleObjCForInToken = nullptr;
530 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
531 PossibleObjCForInToken = CurrentToken;
532 PossibleObjCForInToken->setType(TT_ObjCForIn);
534 // When we discover a 'new', we set CanBeExpression to 'false' in order to
535 // parse the type correctly. Reset that after a comma.
536 if (CurrentToken->is(tok::comma))
537 Contexts.back().CanBeExpression = true;
539 FormatToken *Tok = CurrentToken;
540 if (!consumeToken())
541 return false;
542 updateParameterCount(&OpeningParen, Tok);
543 if (CurrentToken && CurrentToken->HasUnescapedNewline)
544 HasMultipleLines = true;
546 return false;
549 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
550 if (!Style.isCSharp())
551 return false;
553 // `identifier[i]` is not an attribute.
554 if (Tok.Previous && Tok.Previous->is(tok::identifier))
555 return false;
557 // Chains of [] in `identifier[i][j][k]` are not attributes.
558 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
559 auto *MatchingParen = Tok.Previous->MatchingParen;
560 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
561 return false;
564 const FormatToken *AttrTok = Tok.Next;
565 if (!AttrTok)
566 return false;
568 // Just an empty declaration e.g. string [].
569 if (AttrTok->is(tok::r_square))
570 return false;
572 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
573 while (AttrTok && AttrTok->isNot(tok::r_square))
574 AttrTok = AttrTok->Next;
576 if (!AttrTok)
577 return false;
579 // Allow an attribute to be the only content of a file.
580 AttrTok = AttrTok->Next;
581 if (!AttrTok)
582 return true;
584 // Limit this to being an access modifier that follows.
585 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
586 tok::comment, tok::kw_class, tok::kw_static,
587 tok::l_square, Keywords.kw_internal)) {
588 return true;
591 // incase its a [XXX] retval func(....
592 if (AttrTok->Next &&
593 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
594 return true;
597 return false;
600 bool parseSquare() {
601 if (!CurrentToken)
602 return false;
604 // A '[' could be an index subscript (after an identifier or after
605 // ')' or ']'), it could be the start of an Objective-C method
606 // expression, it could the start of an Objective-C array literal,
607 // or it could be a C++ attribute specifier [[foo::bar]].
608 FormatToken *Left = CurrentToken->Previous;
609 Left->ParentBracket = Contexts.back().ContextKind;
610 FormatToken *Parent = Left->getPreviousNonComment();
612 // Cases where '>' is followed by '['.
613 // In C++, this can happen either in array of templates (foo<int>[10])
614 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
615 bool CppArrayTemplates =
616 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
617 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
618 Contexts.back().ContextType == Context::TemplateArgument);
620 const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
621 const bool IsCpp11AttributeSpecifier =
622 isCppAttribute(Style.isCpp(), *Left) || IsInnerSquare;
624 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
625 bool IsCSharpAttributeSpecifier =
626 isCSharpAttributeSpecifier(*Left) ||
627 Contexts.back().InCSharpAttributeSpecifier;
629 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
630 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
631 bool StartsObjCMethodExpr =
632 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
633 Style.isCpp() && !IsCpp11AttributeSpecifier &&
634 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
635 Left->isNot(TT_LambdaLSquare) &&
636 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
637 (!Parent ||
638 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
639 tok::kw_return, tok::kw_throw) ||
640 Parent->isUnaryOperator() ||
641 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
642 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
643 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
644 prec::Unknown));
645 bool ColonFound = false;
647 unsigned BindingIncrease = 1;
648 if (IsCppStructuredBinding) {
649 Left->setType(TT_StructuredBindingLSquare);
650 } else if (Left->is(TT_Unknown)) {
651 if (StartsObjCMethodExpr) {
652 Left->setType(TT_ObjCMethodExpr);
653 } else if (InsideInlineASM) {
654 Left->setType(TT_InlineASMSymbolicNameLSquare);
655 } else if (IsCpp11AttributeSpecifier) {
656 Left->setType(TT_AttributeSquare);
657 if (!IsInnerSquare && Left->Previous)
658 Left->Previous->EndsCppAttributeGroup = false;
659 } else if (Style.isJavaScript() && Parent &&
660 Contexts.back().ContextKind == tok::l_brace &&
661 Parent->isOneOf(tok::l_brace, tok::comma)) {
662 Left->setType(TT_JsComputedPropertyName);
663 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
664 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
665 Left->setType(TT_DesignatedInitializerLSquare);
666 } else if (IsCSharpAttributeSpecifier) {
667 Left->setType(TT_AttributeSquare);
668 } else if (CurrentToken->is(tok::r_square) && Parent &&
669 Parent->is(TT_TemplateCloser)) {
670 Left->setType(TT_ArraySubscriptLSquare);
671 } else if (Style.Language == FormatStyle::LK_Proto ||
672 Style.Language == FormatStyle::LK_TextProto) {
673 // Square braces in LK_Proto can either be message field attributes:
675 // optional Aaa aaa = 1 [
676 // (aaa) = aaa
677 // ];
679 // extensions 123 [
680 // (aaa) = aaa
681 // ];
683 // or text proto extensions (in options):
685 // option (Aaa.options) = {
686 // [type.type/type] {
687 // key: value
688 // }
689 // }
691 // or repeated fields (in options):
693 // option (Aaa.options) = {
694 // keys: [ 1, 2, 3 ]
695 // }
697 // In the first and the third case we want to spread the contents inside
698 // the square braces; in the second we want to keep them inline.
699 Left->setType(TT_ArrayInitializerLSquare);
700 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
701 tok::equal) &&
702 !Left->endsSequence(tok::l_square, tok::numeric_constant,
703 tok::identifier) &&
704 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
705 Left->setType(TT_ProtoExtensionLSquare);
706 BindingIncrease = 10;
708 } else if (!CppArrayTemplates && Parent &&
709 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
710 tok::comma, tok::l_paren, tok::l_square,
711 tok::question, tok::colon, tok::kw_return,
712 // Should only be relevant to JavaScript:
713 tok::kw_default)) {
714 Left->setType(TT_ArrayInitializerLSquare);
715 } else {
716 BindingIncrease = 10;
717 Left->setType(TT_ArraySubscriptLSquare);
721 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
722 Contexts.back().IsExpression = true;
723 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
724 Contexts.back().IsExpression = false;
726 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
727 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
728 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
730 while (CurrentToken) {
731 if (CurrentToken->is(tok::r_square)) {
732 if (IsCpp11AttributeSpecifier) {
733 CurrentToken->setType(TT_AttributeSquare);
734 if (!IsInnerSquare)
735 CurrentToken->EndsCppAttributeGroup = true;
737 if (IsCSharpAttributeSpecifier) {
738 CurrentToken->setType(TT_AttributeSquare);
739 } else if (((CurrentToken->Next &&
740 CurrentToken->Next->is(tok::l_paren)) ||
741 (CurrentToken->Previous &&
742 CurrentToken->Previous->Previous == Left)) &&
743 Left->is(TT_ObjCMethodExpr)) {
744 // An ObjC method call is rarely followed by an open parenthesis. It
745 // also can't be composed of just one token, unless it's a macro that
746 // will be expanded to more tokens.
747 // FIXME: Do we incorrectly label ":" with this?
748 StartsObjCMethodExpr = false;
749 Left->setType(TT_Unknown);
751 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
752 CurrentToken->setType(TT_ObjCMethodExpr);
753 // If we haven't seen a colon yet, make sure the last identifier
754 // before the r_square is tagged as a selector name component.
755 if (!ColonFound && CurrentToken->Previous &&
756 CurrentToken->Previous->is(TT_Unknown) &&
757 canBeObjCSelectorComponent(*CurrentToken->Previous)) {
758 CurrentToken->Previous->setType(TT_SelectorName);
760 // determineStarAmpUsage() thinks that '*' '[' is allocating an
761 // array of pointers, but if '[' starts a selector then '*' is a
762 // binary operator.
763 if (Parent && Parent->is(TT_PointerOrReference))
764 Parent->overwriteFixedType(TT_BinaryOperator);
766 // An arrow after an ObjC method expression is not a lambda arrow.
767 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
768 CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) {
769 CurrentToken->Next->overwriteFixedType(TT_Unknown);
771 Left->MatchingParen = CurrentToken;
772 CurrentToken->MatchingParen = Left;
773 // FirstObjCSelectorName is set when a colon is found. This does
774 // not work, however, when the method has no parameters.
775 // Here, we set FirstObjCSelectorName when the end of the method call is
776 // reached, in case it was not set already.
777 if (!Contexts.back().FirstObjCSelectorName) {
778 FormatToken *Previous = CurrentToken->getPreviousNonComment();
779 if (Previous && Previous->is(TT_SelectorName)) {
780 Previous->ObjCSelectorNameParts = 1;
781 Contexts.back().FirstObjCSelectorName = Previous;
783 } else {
784 Left->ParameterCount =
785 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
787 if (Contexts.back().FirstObjCSelectorName) {
788 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
789 Contexts.back().LongestObjCSelectorName;
790 if (Left->BlockParameterCount > 1)
791 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
793 next();
794 return true;
796 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
797 return false;
798 if (CurrentToken->is(tok::colon)) {
799 if (IsCpp11AttributeSpecifier &&
800 CurrentToken->endsSequence(tok::colon, tok::identifier,
801 tok::kw_using)) {
802 // Remember that this is a [[using ns: foo]] C++ attribute, so we
803 // don't add a space before the colon (unlike other colons).
804 CurrentToken->setType(TT_AttributeColon);
805 } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
806 Left->isOneOf(TT_ArraySubscriptLSquare,
807 TT_DesignatedInitializerLSquare)) {
808 Left->setType(TT_ObjCMethodExpr);
809 StartsObjCMethodExpr = true;
810 Contexts.back().ColonIsObjCMethodExpr = true;
811 if (Parent && Parent->is(tok::r_paren)) {
812 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
813 Parent->setType(TT_CastRParen);
816 ColonFound = true;
818 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
819 !ColonFound) {
820 Left->setType(TT_ArrayInitializerLSquare);
822 FormatToken *Tok = CurrentToken;
823 if (!consumeToken())
824 return false;
825 updateParameterCount(Left, Tok);
827 return false;
830 bool couldBeInStructArrayInitializer() const {
831 if (Contexts.size() < 2)
832 return false;
833 // We want to back up no more then 2 context levels i.e.
834 // . { { <-
835 const auto End = std::next(Contexts.rbegin(), 2);
836 auto Last = Contexts.rbegin();
837 unsigned Depth = 0;
838 for (; Last != End; ++Last)
839 if (Last->ContextKind == tok::l_brace)
840 ++Depth;
841 return Depth == 2 && Last->ContextKind != tok::l_brace;
844 bool parseBrace() {
845 if (!CurrentToken)
846 return true;
848 assert(CurrentToken->Previous);
849 FormatToken &OpeningBrace = *CurrentToken->Previous;
850 assert(OpeningBrace.is(tok::l_brace));
851 OpeningBrace.ParentBracket = Contexts.back().ContextKind;
853 if (Contexts.back().CaretFound)
854 OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
855 Contexts.back().CaretFound = false;
857 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
858 Contexts.back().ColonIsDictLiteral = true;
859 if (OpeningBrace.is(BK_BracedInit))
860 Contexts.back().IsExpression = true;
861 if (Style.isJavaScript() && OpeningBrace.Previous &&
862 OpeningBrace.Previous->is(TT_JsTypeColon)) {
863 Contexts.back().IsExpression = false;
866 unsigned CommaCount = 0;
867 while (CurrentToken) {
868 if (CurrentToken->is(tok::r_brace)) {
869 assert(!Scopes.empty());
870 assert(Scopes.back() == getScopeType(OpeningBrace));
871 Scopes.pop_back();
872 assert(OpeningBrace.Optional == CurrentToken->Optional);
873 OpeningBrace.MatchingParen = CurrentToken;
874 CurrentToken->MatchingParen = &OpeningBrace;
875 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
876 if (OpeningBrace.ParentBracket == tok::l_brace &&
877 couldBeInStructArrayInitializer() && CommaCount > 0) {
878 Contexts.back().ContextType = Context::StructArrayInitializer;
881 next();
882 return true;
884 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
885 return false;
886 updateParameterCount(&OpeningBrace, CurrentToken);
887 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
888 FormatToken *Previous = CurrentToken->getPreviousNonComment();
889 if (Previous->is(TT_JsTypeOptionalQuestion))
890 Previous = Previous->getPreviousNonComment();
891 if ((CurrentToken->is(tok::colon) &&
892 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
893 Style.Language == FormatStyle::LK_Proto ||
894 Style.Language == FormatStyle::LK_TextProto) {
895 OpeningBrace.setType(TT_DictLiteral);
896 if (Previous->Tok.getIdentifierInfo() ||
897 Previous->is(tok::string_literal)) {
898 Previous->setType(TT_SelectorName);
901 if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown))
902 OpeningBrace.setType(TT_DictLiteral);
903 else if (Style.isJavaScript())
904 OpeningBrace.overwriteFixedType(TT_DictLiteral);
906 if (CurrentToken->is(tok::comma)) {
907 if (Style.isJavaScript())
908 OpeningBrace.overwriteFixedType(TT_DictLiteral);
909 ++CommaCount;
911 if (!consumeToken())
912 return false;
914 return true;
917 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
918 // For ObjC methods, the number of parameters is calculated differently as
919 // method declarations have a different structure (the parameters are not
920 // inside a bracket scope).
921 if (Current->is(tok::l_brace) && Current->is(BK_Block))
922 ++Left->BlockParameterCount;
923 if (Current->is(tok::comma)) {
924 ++Left->ParameterCount;
925 if (!Left->Role)
926 Left->Role.reset(new CommaSeparatedList(Style));
927 Left->Role->CommaFound(Current);
928 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
929 Left->ParameterCount = 1;
933 bool parseConditional() {
934 while (CurrentToken) {
935 if (CurrentToken->is(tok::colon)) {
936 CurrentToken->setType(TT_ConditionalExpr);
937 next();
938 return true;
940 if (!consumeToken())
941 return false;
943 return false;
946 bool parseTemplateDeclaration() {
947 if (CurrentToken && CurrentToken->is(tok::less)) {
948 CurrentToken->setType(TT_TemplateOpener);
949 next();
950 if (!parseAngle())
951 return false;
952 if (CurrentToken)
953 CurrentToken->Previous->ClosesTemplateDeclaration = true;
954 return true;
956 return false;
959 bool consumeToken() {
960 FormatToken *Tok = CurrentToken;
961 next();
962 // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
963 // operators.
964 if (Tok->is(TT_VerilogTableItem))
965 return true;
966 switch (Tok->Tok.getKind()) {
967 case tok::plus:
968 case tok::minus:
969 if (!Tok->Previous && Line.MustBeDeclaration)
970 Tok->setType(TT_ObjCMethodSpecifier);
971 break;
972 case tok::colon:
973 if (!Tok->Previous)
974 return false;
975 // Goto labels and case labels are already identified in
976 // UnwrappedLineParser.
977 if (Tok->isTypeFinalized())
978 break;
979 // Colons from ?: are handled in parseConditional().
980 if (Style.isJavaScript()) {
981 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
982 (Contexts.size() == 1 && // switch/case labels
983 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
984 Contexts.back().ContextKind == tok::l_paren || // function params
985 Contexts.back().ContextKind == tok::l_square || // array type
986 (!Contexts.back().IsExpression &&
987 Contexts.back().ContextKind == tok::l_brace) || // object type
988 (Contexts.size() == 1 &&
989 Line.MustBeDeclaration)) { // method/property declaration
990 Contexts.back().IsExpression = false;
991 Tok->setType(TT_JsTypeColon);
992 break;
994 } else if (Style.isCSharp()) {
995 if (Contexts.back().InCSharpAttributeSpecifier) {
996 Tok->setType(TT_AttributeColon);
997 break;
999 if (Contexts.back().ContextKind == tok::l_paren) {
1000 Tok->setType(TT_CSharpNamedArgumentColon);
1001 break;
1003 } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1004 // The distribution weight operators are labeled
1005 // TT_BinaryOperator by the lexer.
1006 if (Keywords.isVerilogEnd(*Tok->Previous) ||
1007 Keywords.isVerilogBegin(*Tok->Previous)) {
1008 Tok->setType(TT_VerilogBlockLabelColon);
1009 } else if (Contexts.back().ContextKind == tok::l_square) {
1010 Tok->setType(TT_BitFieldColon);
1011 } else if (Contexts.back().ColonIsDictLiteral) {
1012 Tok->setType(TT_DictLiteral);
1013 } else if (Contexts.size() == 1) {
1014 // In Verilog a case label doesn't have the case keyword. We
1015 // assume a colon following an expression is a case label.
1016 // Colons from ?: are annotated in parseConditional().
1017 Tok->setType(TT_CaseLabelColon);
1018 if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1019 --Line.Level;
1021 break;
1023 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1024 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1025 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1026 Tok->setType(TT_ModulePartitionColon);
1027 } else if (Contexts.back().ColonIsDictLiteral ||
1028 Style.Language == FormatStyle::LK_Proto ||
1029 Style.Language == FormatStyle::LK_TextProto) {
1030 Tok->setType(TT_DictLiteral);
1031 if (Style.Language == FormatStyle::LK_TextProto) {
1032 if (FormatToken *Previous = Tok->getPreviousNonComment())
1033 Previous->setType(TT_SelectorName);
1035 } else if (Contexts.back().ColonIsObjCMethodExpr ||
1036 Line.startsWith(TT_ObjCMethodSpecifier)) {
1037 Tok->setType(TT_ObjCMethodExpr);
1038 const FormatToken *BeforePrevious = Tok->Previous->Previous;
1039 // Ensure we tag all identifiers in method declarations as
1040 // TT_SelectorName.
1041 bool UnknownIdentifierInMethodDeclaration =
1042 Line.startsWith(TT_ObjCMethodSpecifier) &&
1043 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1044 if (!BeforePrevious ||
1045 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1046 !(BeforePrevious->is(TT_CastRParen) ||
1047 (BeforePrevious->is(TT_ObjCMethodExpr) &&
1048 BeforePrevious->is(tok::colon))) ||
1049 BeforePrevious->is(tok::r_square) ||
1050 Contexts.back().LongestObjCSelectorName == 0 ||
1051 UnknownIdentifierInMethodDeclaration) {
1052 Tok->Previous->setType(TT_SelectorName);
1053 if (!Contexts.back().FirstObjCSelectorName) {
1054 Contexts.back().FirstObjCSelectorName = Tok->Previous;
1055 } else if (Tok->Previous->ColumnWidth >
1056 Contexts.back().LongestObjCSelectorName) {
1057 Contexts.back().LongestObjCSelectorName =
1058 Tok->Previous->ColumnWidth;
1060 Tok->Previous->ParameterIndex =
1061 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1062 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1064 } else if (Contexts.back().ColonIsForRangeExpr) {
1065 Tok->setType(TT_RangeBasedForLoopColon);
1066 } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1067 Tok->setType(TT_GenericSelectionColon);
1068 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1069 Tok->setType(TT_BitFieldColon);
1070 } else if (Contexts.size() == 1 &&
1071 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1072 tok::kw_default)) {
1073 FormatToken *Prev = Tok->getPreviousNonComment();
1074 if (!Prev)
1075 break;
1076 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1077 Prev->ClosesRequiresClause) {
1078 Tok->setType(TT_CtorInitializerColon);
1079 } else if (Prev->is(tok::kw_try)) {
1080 // Member initializer list within function try block.
1081 FormatToken *PrevPrev = Prev->getPreviousNonComment();
1082 if (!PrevPrev)
1083 break;
1084 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1085 Tok->setType(TT_CtorInitializerColon);
1086 } else {
1087 Tok->setType(TT_InheritanceColon);
1089 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1090 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1091 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1092 Tok->Next->Next->is(tok::colon)))) {
1093 // This handles a special macro in ObjC code where selectors including
1094 // the colon are passed as macro arguments.
1095 Tok->setType(TT_ObjCMethodExpr);
1096 } else if (Contexts.back().ContextKind == tok::l_paren &&
1097 !Line.InPragmaDirective) {
1098 Tok->setType(TT_InlineASMColon);
1100 break;
1101 case tok::pipe:
1102 case tok::amp:
1103 // | and & in declarations/type expressions represent union and
1104 // intersection types, respectively.
1105 if (Style.isJavaScript() && !Contexts.back().IsExpression)
1106 Tok->setType(TT_JsTypeOperator);
1107 break;
1108 case tok::kw_if:
1109 if (CurrentToken &&
1110 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1111 next();
1113 [[fallthrough]];
1114 case tok::kw_while:
1115 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1116 next();
1117 if (!parseParens(/*LookForDecls=*/true))
1118 return false;
1120 break;
1121 case tok::kw_for:
1122 if (Style.isJavaScript()) {
1123 // x.for and {for: ...}
1124 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1125 (Tok->Next && Tok->Next->is(tok::colon))) {
1126 break;
1128 // JS' for await ( ...
1129 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1130 next();
1132 if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
1133 next();
1134 Contexts.back().ColonIsForRangeExpr = true;
1135 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1136 return false;
1137 next();
1138 if (!parseParens())
1139 return false;
1140 break;
1141 case tok::l_paren:
1142 // When faced with 'operator()()', the kw_operator handler incorrectly
1143 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1144 // the first two parens OverloadedOperators and the second l_paren an
1145 // OverloadedOperatorLParen.
1146 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1147 Tok->Previous->MatchingParen &&
1148 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1149 Tok->Previous->setType(TT_OverloadedOperator);
1150 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1151 Tok->setType(TT_OverloadedOperatorLParen);
1154 if (Style.isVerilog()) {
1155 // Identify the parameter list and port list in a module instantiation.
1156 // This is still needed when we already have
1157 // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1158 // function is only responsible for the definition, not the
1159 // instantiation.
1160 auto IsInstancePort = [&]() {
1161 const FormatToken *Prev = Tok->getPreviousNonComment();
1162 const FormatToken *PrevPrev;
1163 // In the following example all 4 left parentheses will be treated as
1164 // 'TT_VerilogInstancePortLParen'.
1166 // module_x instance_1(port_1); // Case A.
1167 // module_x #(parameter_1) // Case B.
1168 // instance_2(port_1), // Case C.
1169 // instance_3(port_1); // Case D.
1170 if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1171 return false;
1172 // Case A.
1173 if (Keywords.isVerilogIdentifier(*Prev) &&
1174 Keywords.isVerilogIdentifier(*PrevPrev)) {
1175 return true;
1177 // Case B.
1178 if (Prev->is(Keywords.kw_verilogHash) &&
1179 Keywords.isVerilogIdentifier(*PrevPrev)) {
1180 return true;
1182 // Case C.
1183 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1184 return true;
1185 // Case D.
1186 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1187 const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1188 if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1189 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1190 return true;
1193 return false;
1196 if (IsInstancePort())
1197 Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1200 if (!parseParens())
1201 return false;
1202 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1203 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1204 !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen) &&
1205 (!Tok->Previous ||
1206 !Tok->Previous->isOneOf(tok::kw___attribute, TT_RequiresClause,
1207 TT_LeadingJavaAnnotation))) {
1208 Line.MightBeFunctionDecl = true;
1210 break;
1211 case tok::l_square:
1212 if (!parseSquare())
1213 return false;
1214 break;
1215 case tok::l_brace:
1216 if (Style.Language == FormatStyle::LK_TextProto) {
1217 FormatToken *Previous = Tok->getPreviousNonComment();
1218 if (Previous && Previous->getType() != TT_DictLiteral)
1219 Previous->setType(TT_SelectorName);
1221 Scopes.push_back(getScopeType(*Tok));
1222 if (!parseBrace())
1223 return false;
1224 break;
1225 case tok::less:
1226 if (parseAngle()) {
1227 Tok->setType(TT_TemplateOpener);
1228 // In TT_Proto, we must distignuish between:
1229 // map<key, value>
1230 // msg < item: data >
1231 // msg: < item: data >
1232 // In TT_TextProto, map<key, value> does not occur.
1233 if (Style.Language == FormatStyle::LK_TextProto ||
1234 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1235 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1236 Tok->setType(TT_DictLiteral);
1237 FormatToken *Previous = Tok->getPreviousNonComment();
1238 if (Previous && Previous->getType() != TT_DictLiteral)
1239 Previous->setType(TT_SelectorName);
1241 } else {
1242 Tok->setType(TT_BinaryOperator);
1243 NonTemplateLess.insert(Tok);
1244 CurrentToken = Tok;
1245 next();
1247 break;
1248 case tok::r_paren:
1249 case tok::r_square:
1250 return false;
1251 case tok::r_brace:
1252 // Don't pop scope when encountering unbalanced r_brace.
1253 if (!Scopes.empty())
1254 Scopes.pop_back();
1255 // Lines can start with '}'.
1256 if (Tok->Previous)
1257 return false;
1258 break;
1259 case tok::greater:
1260 if (Style.Language != FormatStyle::LK_TextProto)
1261 Tok->setType(TT_BinaryOperator);
1262 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1263 Tok->SpacesRequiredBefore = 1;
1264 break;
1265 case tok::kw_operator:
1266 if (Style.Language == FormatStyle::LK_TextProto ||
1267 Style.Language == FormatStyle::LK_Proto) {
1268 break;
1270 while (CurrentToken &&
1271 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1272 if (CurrentToken->isOneOf(tok::star, tok::amp))
1273 CurrentToken->setType(TT_PointerOrReference);
1274 auto Next = CurrentToken->getNextNonComment();
1275 if (!Next)
1276 break;
1277 if (Next->is(tok::less))
1278 next();
1279 else
1280 consumeToken();
1281 if (!CurrentToken)
1282 break;
1283 auto Previous = CurrentToken->getPreviousNonComment();
1284 assert(Previous);
1285 if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1286 break;
1287 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1288 tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1289 // User defined literal.
1290 Previous->TokenText.startswith("\"\"")) {
1291 Previous->setType(TT_OverloadedOperator);
1292 if (CurrentToken->isOneOf(tok::less, tok::greater))
1293 break;
1296 if (CurrentToken && CurrentToken->is(tok::l_paren))
1297 CurrentToken->setType(TT_OverloadedOperatorLParen);
1298 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1299 CurrentToken->Previous->setType(TT_OverloadedOperator);
1300 break;
1301 case tok::question:
1302 if (Style.isJavaScript() && Tok->Next &&
1303 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1304 tok::r_brace, tok::r_square)) {
1305 // Question marks before semicolons, colons, etc. indicate optional
1306 // types (fields, parameters), e.g.
1307 // function(x?: string, y?) {...}
1308 // class X { y?; }
1309 Tok->setType(TT_JsTypeOptionalQuestion);
1310 break;
1312 // Declarations cannot be conditional expressions, this can only be part
1313 // of a type declaration.
1314 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1315 Style.isJavaScript()) {
1316 break;
1318 if (Style.isCSharp()) {
1319 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1320 // nullable types.
1322 // `Type?)`, `Type?>`, `Type? name;`
1323 if (Tok->Next &&
1324 (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1325 Tok->Next->startsSequence(tok::question, tok::greater) ||
1326 Tok->Next->startsSequence(tok::question, tok::identifier,
1327 tok::semi))) {
1328 Tok->setType(TT_CSharpNullable);
1329 break;
1332 // `Type? name =`
1333 if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1334 Tok->Next->Next->is(tok::equal)) {
1335 Tok->setType(TT_CSharpNullable);
1336 break;
1339 // Line.MustBeDeclaration will be true for `Type? name;`.
1340 // But not
1341 // cond ? "A" : "B";
1342 // cond ? id : "B";
1343 // cond ? cond2 ? "A" : "B" : "C";
1344 if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1345 (!Tok->Next ||
1346 !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1347 !Tok->Next->Next ||
1348 !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1349 Tok->setType(TT_CSharpNullable);
1350 break;
1353 parseConditional();
1354 break;
1355 case tok::kw_template:
1356 parseTemplateDeclaration();
1357 break;
1358 case tok::comma:
1359 switch (Contexts.back().ContextType) {
1360 case Context::CtorInitializer:
1361 Tok->setType(TT_CtorInitializerComma);
1362 break;
1363 case Context::InheritanceList:
1364 Tok->setType(TT_InheritanceComma);
1365 break;
1366 case Context::VerilogInstancePortList:
1367 Tok->setFinalizedType(TT_VerilogInstancePortComma);
1368 break;
1369 default:
1370 if (Style.isVerilog() && Contexts.size() == 1 &&
1371 Line.startsWith(Keywords.kw_assign)) {
1372 Tok->setFinalizedType(TT_VerilogAssignComma);
1373 } else if (Contexts.back().FirstStartOfName &&
1374 (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1375 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1376 Line.IsMultiVariableDeclStmt = true;
1378 break;
1380 if (Contexts.back().ContextType == Context::ForEachMacro)
1381 Contexts.back().IsExpression = true;
1382 break;
1383 case tok::kw_default:
1384 // Unindent case labels.
1385 if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1386 (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1387 --Line.Level;
1389 break;
1390 case tok::identifier:
1391 if (Tok->isOneOf(Keywords.kw___has_include,
1392 Keywords.kw___has_include_next)) {
1393 parseHasInclude();
1395 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1396 Tok->Next->isNot(tok::l_paren)) {
1397 Tok->setType(TT_CSharpGenericTypeConstraint);
1398 parseCSharpGenericTypeConstraint();
1399 if (!Tok->getPreviousNonComment())
1400 Line.IsContinuation = true;
1402 break;
1403 case tok::arrow:
1404 if (Tok->isNot(TT_LambdaArrow) && Tok->Previous &&
1405 Tok->Previous->is(tok::kw_noexcept)) {
1406 Tok->setType(TT_TrailingReturnArrow);
1408 break;
1409 case tok::eof:
1410 if (Style.InsertNewlineAtEOF && Tok->NewlinesBefore == 0)
1411 Tok->NewlinesBefore = 1;
1412 break;
1413 default:
1414 break;
1416 return true;
1419 void parseCSharpGenericTypeConstraint() {
1420 int OpenAngleBracketsCount = 0;
1421 while (CurrentToken) {
1422 if (CurrentToken->is(tok::less)) {
1423 // parseAngle is too greedy and will consume the whole line.
1424 CurrentToken->setType(TT_TemplateOpener);
1425 ++OpenAngleBracketsCount;
1426 next();
1427 } else if (CurrentToken->is(tok::greater)) {
1428 CurrentToken->setType(TT_TemplateCloser);
1429 --OpenAngleBracketsCount;
1430 next();
1431 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1432 // We allow line breaks after GenericTypeConstraintComma's
1433 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1434 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1435 next();
1436 } else if (CurrentToken->is(Keywords.kw_where)) {
1437 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1438 next();
1439 } else if (CurrentToken->is(tok::colon)) {
1440 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1441 next();
1442 } else {
1443 next();
1448 void parseIncludeDirective() {
1449 if (CurrentToken && CurrentToken->is(tok::less)) {
1450 next();
1451 while (CurrentToken) {
1452 // Mark tokens up to the trailing line comments as implicit string
1453 // literals.
1454 if (CurrentToken->isNot(tok::comment) &&
1455 !CurrentToken->TokenText.startswith("//")) {
1456 CurrentToken->setType(TT_ImplicitStringLiteral);
1458 next();
1463 void parseWarningOrError() {
1464 next();
1465 // We still want to format the whitespace left of the first token of the
1466 // warning or error.
1467 next();
1468 while (CurrentToken) {
1469 CurrentToken->setType(TT_ImplicitStringLiteral);
1470 next();
1474 void parsePragma() {
1475 next(); // Consume "pragma".
1476 if (CurrentToken &&
1477 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1478 Keywords.kw_region)) {
1479 bool IsMarkOrRegion =
1480 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1481 next();
1482 next(); // Consume first token (so we fix leading whitespace).
1483 while (CurrentToken) {
1484 if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1485 CurrentToken->setType(TT_ImplicitStringLiteral);
1486 next();
1491 void parseHasInclude() {
1492 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1493 return;
1494 next(); // '('
1495 parseIncludeDirective();
1496 next(); // ')'
1499 LineType parsePreprocessorDirective() {
1500 bool IsFirstToken = CurrentToken->IsFirst;
1501 LineType Type = LT_PreprocessorDirective;
1502 next();
1503 if (!CurrentToken)
1504 return Type;
1506 if (Style.isJavaScript() && IsFirstToken) {
1507 // JavaScript files can contain shebang lines of the form:
1508 // #!/usr/bin/env node
1509 // Treat these like C++ #include directives.
1510 while (CurrentToken) {
1511 // Tokens cannot be comments here.
1512 CurrentToken->setType(TT_ImplicitStringLiteral);
1513 next();
1515 return LT_ImportStatement;
1518 if (CurrentToken->is(tok::numeric_constant)) {
1519 CurrentToken->SpacesRequiredBefore = 1;
1520 return Type;
1522 // Hashes in the middle of a line can lead to any strange token
1523 // sequence.
1524 if (!CurrentToken->Tok.getIdentifierInfo())
1525 return Type;
1526 // In Verilog macro expansions start with a backtick just like preprocessor
1527 // directives. Thus we stop if the word is not a preprocessor directive.
1528 if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1529 return LT_Invalid;
1530 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1531 case tok::pp_include:
1532 case tok::pp_include_next:
1533 case tok::pp_import:
1534 next();
1535 parseIncludeDirective();
1536 Type = LT_ImportStatement;
1537 break;
1538 case tok::pp_error:
1539 case tok::pp_warning:
1540 parseWarningOrError();
1541 break;
1542 case tok::pp_pragma:
1543 parsePragma();
1544 break;
1545 case tok::pp_if:
1546 case tok::pp_elif:
1547 Contexts.back().IsExpression = true;
1548 next();
1549 parseLine();
1550 break;
1551 default:
1552 break;
1554 while (CurrentToken) {
1555 FormatToken *Tok = CurrentToken;
1556 next();
1557 if (Tok->is(tok::l_paren)) {
1558 parseParens();
1559 } else if (Tok->isOneOf(Keywords.kw___has_include,
1560 Keywords.kw___has_include_next)) {
1561 parseHasInclude();
1564 return Type;
1567 public:
1568 LineType parseLine() {
1569 if (!CurrentToken)
1570 return LT_Invalid;
1571 NonTemplateLess.clear();
1572 if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1573 // We were not yet allowed to use C++17 optional when this was being
1574 // written. So we used LT_Invalid to mark that the line is not a
1575 // preprocessor directive.
1576 auto Type = parsePreprocessorDirective();
1577 if (Type != LT_Invalid)
1578 return Type;
1581 // Directly allow to 'import <string-literal>' to support protocol buffer
1582 // definitions (github.com/google/protobuf) or missing "#" (either way we
1583 // should not break the line).
1584 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1585 if ((Style.Language == FormatStyle::LK_Java &&
1586 CurrentToken->is(Keywords.kw_package)) ||
1587 (!Style.isVerilog() && Info &&
1588 Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1589 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1590 tok::kw_static))) {
1591 next();
1592 parseIncludeDirective();
1593 return LT_ImportStatement;
1596 // If this line starts and ends in '<' and '>', respectively, it is likely
1597 // part of "#define <a/b.h>".
1598 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1599 parseIncludeDirective();
1600 return LT_ImportStatement;
1603 // In .proto files, top-level options and package statements are very
1604 // similar to import statements and should not be line-wrapped.
1605 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1606 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1607 next();
1608 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1609 while (CurrentToken)
1610 next();
1611 return LT_ImportStatement;
1615 bool KeywordVirtualFound = false;
1616 bool ImportStatement = false;
1618 // import {...} from '...';
1619 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1620 ImportStatement = true;
1622 while (CurrentToken) {
1623 if (CurrentToken->is(tok::kw_virtual))
1624 KeywordVirtualFound = true;
1625 if (Style.isJavaScript()) {
1626 // export {...} from '...';
1627 // An export followed by "from 'some string';" is a re-export from
1628 // another module identified by a URI and is treated as a
1629 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1630 // Just "export {...};" or "export class ..." should not be treated as
1631 // an import in this sense.
1632 if (Line.First->is(tok::kw_export) &&
1633 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1634 CurrentToken->Next->isStringLiteral()) {
1635 ImportStatement = true;
1637 if (isClosureImportStatement(*CurrentToken))
1638 ImportStatement = true;
1640 if (!consumeToken())
1641 return LT_Invalid;
1643 if (KeywordVirtualFound)
1644 return LT_VirtualFunctionDecl;
1645 if (ImportStatement)
1646 return LT_ImportStatement;
1648 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1649 if (Contexts.back().FirstObjCSelectorName) {
1650 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1651 Contexts.back().LongestObjCSelectorName;
1653 return LT_ObjCMethodDecl;
1656 for (const auto &ctx : Contexts)
1657 if (ctx.ContextType == Context::StructArrayInitializer)
1658 return LT_ArrayOfStructInitializer;
1660 return LT_Other;
1663 private:
1664 bool isClosureImportStatement(const FormatToken &Tok) {
1665 // FIXME: Closure-library specific stuff should not be hard-coded but be
1666 // configurable.
1667 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1668 Tok.Next->Next &&
1669 (Tok.Next->Next->TokenText == "module" ||
1670 Tok.Next->Next->TokenText == "provide" ||
1671 Tok.Next->Next->TokenText == "require" ||
1672 Tok.Next->Next->TokenText == "requireType" ||
1673 Tok.Next->Next->TokenText == "forwardDeclare") &&
1674 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1677 void resetTokenMetadata() {
1678 if (!CurrentToken)
1679 return;
1681 // Reset token type in case we have already looked at it and then
1682 // recovered from an error (e.g. failure to find the matching >).
1683 if (!CurrentToken->isTypeFinalized() &&
1684 !CurrentToken->isOneOf(
1685 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1686 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1687 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1688 TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
1689 TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
1690 TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro,
1691 TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace,
1692 TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause,
1693 TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
1694 TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
1695 TT_CompoundRequirementLBrace, TT_BracedListLBrace)) {
1696 CurrentToken->setType(TT_Unknown);
1698 CurrentToken->Role.reset();
1699 CurrentToken->MatchingParen = nullptr;
1700 CurrentToken->FakeLParens.clear();
1701 CurrentToken->FakeRParens = 0;
1704 void next() {
1705 if (!CurrentToken)
1706 return;
1708 CurrentToken->NestingLevel = Contexts.size() - 1;
1709 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1710 modifyContext(*CurrentToken);
1711 determineTokenType(*CurrentToken);
1712 CurrentToken = CurrentToken->Next;
1714 resetTokenMetadata();
1717 /// A struct to hold information valid in a specific context, e.g.
1718 /// a pair of parenthesis.
1719 struct Context {
1720 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1721 bool IsExpression)
1722 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1723 IsExpression(IsExpression) {}
1725 tok::TokenKind ContextKind;
1726 unsigned BindingStrength;
1727 bool IsExpression;
1728 unsigned LongestObjCSelectorName = 0;
1729 bool ColonIsForRangeExpr = false;
1730 bool ColonIsDictLiteral = false;
1731 bool ColonIsObjCMethodExpr = false;
1732 FormatToken *FirstObjCSelectorName = nullptr;
1733 FormatToken *FirstStartOfName = nullptr;
1734 bool CanBeExpression = true;
1735 bool CaretFound = false;
1736 bool InCpp11AttributeSpecifier = false;
1737 bool InCSharpAttributeSpecifier = false;
1738 bool VerilogAssignmentFound = false;
1739 enum {
1740 Unknown,
1741 // Like the part after `:` in a constructor.
1742 // Context(...) : IsExpression(IsExpression)
1743 CtorInitializer,
1744 // Like in the parentheses in a foreach.
1745 ForEachMacro,
1746 // Like the inheritance list in a class declaration.
1747 // class Input : public IO
1748 InheritanceList,
1749 // Like in the braced list.
1750 // int x[] = {};
1751 StructArrayInitializer,
1752 // Like in `static_cast<int>`.
1753 TemplateArgument,
1754 // C11 _Generic selection.
1755 C11GenericSelection,
1756 // Like in the outer parentheses in `ffnand ff1(.q());`.
1757 VerilogInstancePortList,
1758 } ContextType = Unknown;
1761 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1762 /// of each instance.
1763 struct ScopedContextCreator {
1764 AnnotatingParser &P;
1766 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1767 unsigned Increase)
1768 : P(P) {
1769 P.Contexts.push_back(Context(ContextKind,
1770 P.Contexts.back().BindingStrength + Increase,
1771 P.Contexts.back().IsExpression));
1774 ~ScopedContextCreator() {
1775 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1776 if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
1777 P.Contexts.pop_back();
1778 P.Contexts.back().ContextType = Context::StructArrayInitializer;
1779 return;
1782 P.Contexts.pop_back();
1786 void modifyContext(const FormatToken &Current) {
1787 auto AssignmentStartsExpression = [&]() {
1788 if (Current.getPrecedence() != prec::Assignment)
1789 return false;
1791 if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
1792 return false;
1793 if (Line.First->is(tok::kw_template)) {
1794 assert(Current.Previous);
1795 if (Current.Previous->is(tok::kw_operator)) {
1796 // `template ... operator=` cannot be an expression.
1797 return false;
1800 // `template` keyword can start a variable template.
1801 const FormatToken *Tok = Line.First->getNextNonComment();
1802 assert(Tok); // Current token is on the same line.
1803 if (Tok->isNot(TT_TemplateOpener)) {
1804 // Explicit template instantiations do not have `<>`.
1805 return false;
1808 // This is the default value of a template parameter, determine if it's
1809 // type or non-type.
1810 if (Contexts.back().ContextKind == tok::less) {
1811 assert(Current.Previous->Previous);
1812 return !Current.Previous->Previous->isOneOf(tok::kw_typename,
1813 tok::kw_class);
1816 Tok = Tok->MatchingParen;
1817 if (!Tok)
1818 return false;
1819 Tok = Tok->getNextNonComment();
1820 if (!Tok)
1821 return false;
1823 if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
1824 tok::kw_using)) {
1825 return false;
1828 return true;
1831 // Type aliases use `type X = ...;` in TypeScript and can be exported
1832 // using `export type ...`.
1833 if (Style.isJavaScript() &&
1834 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1835 Line.startsWith(tok::kw_export, Keywords.kw_type,
1836 tok::identifier))) {
1837 return false;
1840 return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
1843 if (AssignmentStartsExpression()) {
1844 Contexts.back().IsExpression = true;
1845 if (!Line.startsWith(TT_UnaryOperator)) {
1846 for (FormatToken *Previous = Current.Previous;
1847 Previous && Previous->Previous &&
1848 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1849 Previous = Previous->Previous) {
1850 if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
1851 Previous = Previous->MatchingParen;
1852 if (!Previous)
1853 break;
1855 if (Previous->opensScope())
1856 break;
1857 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1858 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1859 Previous->Previous && Previous->Previous->isNot(tok::equal)) {
1860 Previous->setType(TT_PointerOrReference);
1864 } else if (Current.is(tok::lessless) &&
1865 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1866 Contexts.back().IsExpression = true;
1867 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1868 Contexts.back().IsExpression = true;
1869 } else if (Current.is(TT_TrailingReturnArrow)) {
1870 Contexts.back().IsExpression = false;
1871 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1872 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1873 } else if (Current.Previous &&
1874 Current.Previous->is(TT_CtorInitializerColon)) {
1875 Contexts.back().IsExpression = true;
1876 Contexts.back().ContextType = Context::CtorInitializer;
1877 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1878 Contexts.back().ContextType = Context::InheritanceList;
1879 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1880 for (FormatToken *Previous = Current.Previous;
1881 Previous && Previous->isOneOf(tok::star, tok::amp);
1882 Previous = Previous->Previous) {
1883 Previous->setType(TT_PointerOrReference);
1885 if (Line.MustBeDeclaration &&
1886 Contexts.front().ContextType != Context::CtorInitializer) {
1887 Contexts.back().IsExpression = false;
1889 } else if (Current.is(tok::kw_new)) {
1890 Contexts.back().CanBeExpression = false;
1891 } else if (Current.is(tok::semi) ||
1892 (Current.is(tok::exclaim) && Current.Previous &&
1893 !Current.Previous->is(tok::kw_operator))) {
1894 // This should be the condition or increment in a for-loop.
1895 // But not operator !() (can't use TT_OverloadedOperator here as its not
1896 // been annotated yet).
1897 Contexts.back().IsExpression = true;
1901 static FormatToken *untilMatchingParen(FormatToken *Current) {
1902 // Used when `MatchingParen` is not yet established.
1903 int ParenLevel = 0;
1904 while (Current) {
1905 if (Current->is(tok::l_paren))
1906 ++ParenLevel;
1907 if (Current->is(tok::r_paren))
1908 --ParenLevel;
1909 if (ParenLevel < 1)
1910 break;
1911 Current = Current->Next;
1913 return Current;
1916 static bool isDeductionGuide(FormatToken &Current) {
1917 // Look for a deduction guide template<T> A(...) -> A<...>;
1918 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1919 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1920 // Find the TemplateCloser.
1921 FormatToken *TemplateCloser = Current.Next->Next;
1922 int NestingLevel = 0;
1923 while (TemplateCloser) {
1924 // Skip over an expressions in parens A<(3 < 2)>;
1925 if (TemplateCloser->is(tok::l_paren)) {
1926 // No Matching Paren yet so skip to matching paren
1927 TemplateCloser = untilMatchingParen(TemplateCloser);
1928 if (!TemplateCloser)
1929 break;
1931 if (TemplateCloser->is(tok::less))
1932 ++NestingLevel;
1933 if (TemplateCloser->is(tok::greater))
1934 --NestingLevel;
1935 if (NestingLevel < 1)
1936 break;
1937 TemplateCloser = TemplateCloser->Next;
1939 // Assuming we have found the end of the template ensure its followed
1940 // with a semi-colon.
1941 if (TemplateCloser && TemplateCloser->Next &&
1942 TemplateCloser->Next->is(tok::semi) &&
1943 Current.Previous->MatchingParen) {
1944 // Determine if the identifier `A` prior to the A<..>; is the same as
1945 // prior to the A(..)
1946 FormatToken *LeadingIdentifier =
1947 Current.Previous->MatchingParen->Previous;
1949 return LeadingIdentifier &&
1950 LeadingIdentifier->TokenText == Current.Next->TokenText;
1953 return false;
1956 void determineTokenType(FormatToken &Current) {
1957 if (!Current.is(TT_Unknown)) {
1958 // The token type is already known.
1959 return;
1962 if ((Style.isJavaScript() || Style.isCSharp()) &&
1963 Current.is(tok::exclaim)) {
1964 if (Current.Previous) {
1965 bool IsIdentifier =
1966 Style.isJavaScript()
1967 ? Keywords.IsJavaScriptIdentifier(
1968 *Current.Previous, /* AcceptIdentifierName= */ true)
1969 : Current.Previous->is(tok::identifier);
1970 if (IsIdentifier ||
1971 Current.Previous->isOneOf(
1972 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
1973 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
1974 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
1975 Current.Previous->Tok.isLiteral()) {
1976 Current.setType(TT_NonNullAssertion);
1977 return;
1980 if (Current.Next &&
1981 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1982 Current.setType(TT_NonNullAssertion);
1983 return;
1987 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1988 // function declaration have been found. In this case, 'Current' is a
1989 // trailing token of this declaration and thus cannot be a name.
1990 if (Current.is(Keywords.kw_instanceof)) {
1991 Current.setType(TT_BinaryOperator);
1992 } else if (isStartOfName(Current) &&
1993 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1994 Contexts.back().FirstStartOfName = &Current;
1995 Current.setType(TT_StartOfName);
1996 } else if (Current.is(tok::semi)) {
1997 // Reset FirstStartOfName after finding a semicolon so that a for loop
1998 // with multiple increment statements is not confused with a for loop
1999 // having multiple variable declarations.
2000 Contexts.back().FirstStartOfName = nullptr;
2001 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2002 AutoFound = true;
2003 } else if (Current.is(tok::arrow) &&
2004 Style.Language == FormatStyle::LK_Java) {
2005 Current.setType(TT_LambdaArrow);
2006 } else if (Current.is(tok::arrow) && AutoFound &&
2007 (Line.MightBeFunctionDecl || Line.InPPDirective) &&
2008 Current.NestingLevel == 0 &&
2009 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2010 // not auto operator->() -> xxx;
2011 Current.setType(TT_TrailingReturnArrow);
2012 } else if (Current.is(tok::arrow) && Current.Previous &&
2013 Current.Previous->is(tok::r_brace)) {
2014 // Concept implicit conversion constraint needs to be treated like
2015 // a trailing return type ... } -> <type>.
2016 Current.setType(TT_TrailingReturnArrow);
2017 } else if (isDeductionGuide(Current)) {
2018 // Deduction guides trailing arrow " A(...) -> A<T>;".
2019 Current.setType(TT_TrailingReturnArrow);
2020 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
2021 Current.setType(determineStarAmpUsage(
2022 Current,
2023 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2024 Contexts.back().ContextType == Context::TemplateArgument));
2025 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2026 (Style.isVerilog() && Current.is(tok::pipe))) {
2027 Current.setType(determinePlusMinusCaretUsage(Current));
2028 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2029 Contexts.back().CaretFound = true;
2030 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2031 Current.setType(determineIncrementUsage(Current));
2032 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2033 Current.setType(TT_UnaryOperator);
2034 } else if (Current.is(tok::question)) {
2035 if (Style.isJavaScript() && Line.MustBeDeclaration &&
2036 !Contexts.back().IsExpression) {
2037 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2038 // on the interface, not a ternary expression.
2039 Current.setType(TT_JsTypeOptionalQuestion);
2040 } else {
2041 Current.setType(TT_ConditionalExpr);
2043 } else if (Current.isBinaryOperator() &&
2044 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2045 (!Current.is(tok::greater) &&
2046 Style.Language != FormatStyle::LK_TextProto)) {
2047 if (Style.isVerilog()) {
2048 if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2049 !Contexts.back().VerilogAssignmentFound) {
2050 // In Verilog `<=` is assignment if in its own statement. It is a
2051 // statement instead of an expression, that is it can not be chained.
2052 Current.ForcedPrecedence = prec::Assignment;
2053 Current.setFinalizedType(TT_BinaryOperator);
2055 if (Current.getPrecedence() == prec::Assignment)
2056 Contexts.back().VerilogAssignmentFound = true;
2058 Current.setType(TT_BinaryOperator);
2059 } else if (Current.is(tok::comment)) {
2060 if (Current.TokenText.startswith("/*")) {
2061 if (Current.TokenText.endswith("*/")) {
2062 Current.setType(TT_BlockComment);
2063 } else {
2064 // The lexer has for some reason determined a comment here. But we
2065 // cannot really handle it, if it isn't properly terminated.
2066 Current.Tok.setKind(tok::unknown);
2068 } else {
2069 Current.setType(TT_LineComment);
2071 } else if (Current.is(tok::l_paren)) {
2072 if (lParenStartsCppCast(Current))
2073 Current.setType(TT_CppCastLParen);
2074 } else if (Current.is(tok::r_paren)) {
2075 if (rParenEndsCast(Current))
2076 Current.setType(TT_CastRParen);
2077 if (Current.MatchingParen && Current.Next &&
2078 !Current.Next->isBinaryOperator() &&
2079 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
2080 tok::comma, tok::period, tok::arrow,
2081 tok::coloncolon, tok::kw_noexcept)) {
2082 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
2083 // Make sure this isn't the return type of an Obj-C block declaration
2084 if (AfterParen->isNot(tok::caret)) {
2085 if (FormatToken *BeforeParen = Current.MatchingParen->Previous) {
2086 if (BeforeParen->is(tok::identifier) &&
2087 !BeforeParen->is(TT_TypenameMacro) &&
2088 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2089 (!BeforeParen->Previous ||
2090 BeforeParen->Previous->ClosesTemplateDeclaration)) {
2091 Current.setType(TT_FunctionAnnotationRParen);
2097 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2098 Style.Language != FormatStyle::LK_Java) {
2099 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2100 // marks declarations and properties that need special formatting.
2101 switch (Current.Next->Tok.getObjCKeywordID()) {
2102 case tok::objc_interface:
2103 case tok::objc_implementation:
2104 case tok::objc_protocol:
2105 Current.setType(TT_ObjCDecl);
2106 break;
2107 case tok::objc_property:
2108 Current.setType(TT_ObjCProperty);
2109 break;
2110 default:
2111 break;
2113 } else if (Current.is(tok::period)) {
2114 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2115 if (PreviousNoComment &&
2116 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2117 Current.setType(TT_DesignatedInitializerPeriod);
2118 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2119 Current.Previous->isOneOf(TT_JavaAnnotation,
2120 TT_LeadingJavaAnnotation)) {
2121 Current.setType(Current.Previous->getType());
2123 } else if (canBeObjCSelectorComponent(Current) &&
2124 // FIXME(bug 36976): ObjC return types shouldn't use
2125 // TT_CastRParen.
2126 Current.Previous && Current.Previous->is(TT_CastRParen) &&
2127 Current.Previous->MatchingParen &&
2128 Current.Previous->MatchingParen->Previous &&
2129 Current.Previous->MatchingParen->Previous->is(
2130 TT_ObjCMethodSpecifier)) {
2131 // This is the first part of an Objective-C selector name. (If there's no
2132 // colon after this, this is the only place which annotates the identifier
2133 // as a selector.)
2134 Current.setType(TT_SelectorName);
2135 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2136 tok::kw_requires) &&
2137 Current.Previous &&
2138 !Current.Previous->isOneOf(tok::equal, tok::at,
2139 TT_CtorInitializerComma,
2140 TT_CtorInitializerColon) &&
2141 Line.MightBeFunctionDecl && Contexts.size() == 1) {
2142 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2143 // function declaration have been found.
2144 Current.setType(TT_TrailingAnnotation);
2145 } else if ((Style.Language == FormatStyle::LK_Java ||
2146 Style.isJavaScript()) &&
2147 Current.Previous) {
2148 if (Current.Previous->is(tok::at) &&
2149 Current.isNot(Keywords.kw_interface)) {
2150 const FormatToken &AtToken = *Current.Previous;
2151 const FormatToken *Previous = AtToken.getPreviousNonComment();
2152 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2153 Current.setType(TT_LeadingJavaAnnotation);
2154 else
2155 Current.setType(TT_JavaAnnotation);
2156 } else if (Current.Previous->is(tok::period) &&
2157 Current.Previous->isOneOf(TT_JavaAnnotation,
2158 TT_LeadingJavaAnnotation)) {
2159 Current.setType(Current.Previous->getType());
2164 /// Take a guess at whether \p Tok starts a name of a function or
2165 /// variable declaration.
2167 /// This is a heuristic based on whether \p Tok is an identifier following
2168 /// something that is likely a type.
2169 bool isStartOfName(const FormatToken &Tok) {
2170 // Handled in ExpressionParser for Verilog.
2171 if (Style.isVerilog())
2172 return false;
2174 if (Tok.isNot(tok::identifier) || !Tok.Previous)
2175 return false;
2177 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2178 Keywords.kw_as)) {
2179 return false;
2181 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2182 return false;
2184 // Skip "const" as it does not have an influence on whether this is a name.
2185 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2187 // For javascript const can be like "let" or "var"
2188 if (!Style.isJavaScript())
2189 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2190 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2192 if (!PreviousNotConst)
2193 return false;
2195 if (PreviousNotConst->ClosesRequiresClause)
2196 return false;
2198 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2199 PreviousNotConst->Previous &&
2200 PreviousNotConst->Previous->is(tok::hash);
2202 if (PreviousNotConst->is(TT_TemplateCloser)) {
2203 return PreviousNotConst && PreviousNotConst->MatchingParen &&
2204 PreviousNotConst->MatchingParen->Previous &&
2205 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2206 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2209 if (PreviousNotConst->is(tok::r_paren) &&
2210 PreviousNotConst->is(TT_TypeDeclarationParen)) {
2211 return true;
2214 // If is a preprocess keyword like #define.
2215 if (IsPPKeyword)
2216 return false;
2218 // int a or auto a.
2219 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
2220 return true;
2222 // *a or &a or &&a.
2223 if (PreviousNotConst->is(TT_PointerOrReference))
2224 return true;
2226 // MyClass a;
2227 if (PreviousNotConst->isSimpleTypeSpecifier())
2228 return true;
2230 // type[] a in Java
2231 if (Style.Language == FormatStyle::LK_Java &&
2232 PreviousNotConst->is(tok::r_square)) {
2233 return true;
2236 // const a = in JavaScript.
2237 return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2240 /// Determine whether '(' is starting a C++ cast.
2241 bool lParenStartsCppCast(const FormatToken &Tok) {
2242 // C-style casts are only used in C++.
2243 if (!Style.isCpp())
2244 return false;
2246 FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2247 if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2248 LeftOfParens->MatchingParen) {
2249 auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2250 if (Prev &&
2251 Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2252 tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2253 // FIXME: Maybe we should handle identifiers ending with "_cast",
2254 // e.g. any_cast?
2255 return true;
2258 return false;
2261 /// Determine whether ')' is ending a cast.
2262 bool rParenEndsCast(const FormatToken &Tok) {
2263 // C-style casts are only used in C++, C# and Java.
2264 if (!Style.isCSharp() && !Style.isCpp() &&
2265 Style.Language != FormatStyle::LK_Java) {
2266 return false;
2269 // Empty parens aren't casts and there are no casts at the end of the line.
2270 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
2271 return false;
2273 if (Tok.MatchingParen->is(TT_OverloadedOperatorLParen))
2274 return false;
2276 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
2277 if (LeftOfParens) {
2278 // If there is a closing parenthesis left of the current
2279 // parentheses, look past it as these might be chained casts.
2280 if (LeftOfParens->is(tok::r_paren) &&
2281 LeftOfParens->isNot(TT_CastRParen)) {
2282 if (!LeftOfParens->MatchingParen ||
2283 !LeftOfParens->MatchingParen->Previous) {
2284 return false;
2286 LeftOfParens = LeftOfParens->MatchingParen->Previous;
2289 if (LeftOfParens->is(tok::r_square)) {
2290 // delete[] (void *)ptr;
2291 auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2292 if (Tok->isNot(tok::r_square))
2293 return nullptr;
2295 Tok = Tok->getPreviousNonComment();
2296 if (!Tok || Tok->isNot(tok::l_square))
2297 return nullptr;
2299 Tok = Tok->getPreviousNonComment();
2300 if (!Tok || Tok->isNot(tok::kw_delete))
2301 return nullptr;
2302 return Tok;
2304 if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2305 LeftOfParens = MaybeDelete;
2308 // The Condition directly below this one will see the operator arguments
2309 // as a (void *foo) cast.
2310 // void operator delete(void *foo) ATTRIB;
2311 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2312 LeftOfParens->Previous->is(tok::kw_operator)) {
2313 return false;
2316 // If there is an identifier (or with a few exceptions a keyword) right
2317 // before the parentheses, this is unlikely to be a cast.
2318 if (LeftOfParens->Tok.getIdentifierInfo() &&
2319 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2320 tok::kw_delete, tok::kw_throw)) {
2321 return false;
2324 // Certain other tokens right before the parentheses are also signals that
2325 // this cannot be a cast.
2326 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2327 TT_TemplateCloser, tok::ellipsis)) {
2328 return false;
2332 if (Tok.Next->is(tok::question))
2333 return false;
2335 // `foreach((A a, B b) in someList)` should not be seen as a cast.
2336 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
2337 return false;
2339 // Functions which end with decorations like volatile, noexcept are unlikely
2340 // to be casts.
2341 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2342 tok::kw_requires, tok::kw_throw, tok::arrow,
2343 Keywords.kw_override, Keywords.kw_final) ||
2344 isCppAttribute(Style.isCpp(), *Tok.Next)) {
2345 return false;
2348 // As Java has no function types, a "(" after the ")" likely means that this
2349 // is a cast.
2350 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
2351 return true;
2353 // If a (non-string) literal follows, this is likely a cast.
2354 if (Tok.Next->isNot(tok::string_literal) &&
2355 (Tok.Next->Tok.isLiteral() ||
2356 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) {
2357 return true;
2360 // Heuristically try to determine whether the parentheses contain a type.
2361 auto IsQualifiedPointerOrReference = [](FormatToken *T) {
2362 // This is used to handle cases such as x = (foo *const)&y;
2363 assert(!T->isSimpleTypeSpecifier() && "Should have already been checked");
2364 // Strip trailing qualifiers such as const or volatile when checking
2365 // whether the parens could be a cast to a pointer/reference type.
2366 while (T) {
2367 if (T->is(TT_AttributeParen)) {
2368 // Handle `x = (foo *__attribute__((foo)))&v;`:
2369 if (T->MatchingParen && T->MatchingParen->Previous &&
2370 T->MatchingParen->Previous->is(tok::kw___attribute)) {
2371 T = T->MatchingParen->Previous->Previous;
2372 continue;
2374 } else if (T->is(TT_AttributeSquare)) {
2375 // Handle `x = (foo *[[clang::foo]])&v;`:
2376 if (T->MatchingParen && T->MatchingParen->Previous) {
2377 T = T->MatchingParen->Previous;
2378 continue;
2380 } else if (T->canBePointerOrReferenceQualifier()) {
2381 T = T->Previous;
2382 continue;
2384 break;
2386 return T && T->is(TT_PointerOrReference);
2388 bool ParensAreType =
2389 !Tok.Previous ||
2390 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2391 Tok.Previous->isSimpleTypeSpecifier() ||
2392 IsQualifiedPointerOrReference(Tok.Previous);
2393 bool ParensCouldEndDecl =
2394 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2395 if (ParensAreType && !ParensCouldEndDecl)
2396 return true;
2398 // At this point, we heuristically assume that there are no casts at the
2399 // start of the line. We assume that we have found most cases where there
2400 // are by the logic above, e.g. "(void)x;".
2401 if (!LeftOfParens)
2402 return false;
2404 // Certain token types inside the parentheses mean that this can't be a
2405 // cast.
2406 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
2407 Token = Token->Next) {
2408 if (Token->is(TT_BinaryOperator))
2409 return false;
2412 // If the following token is an identifier or 'this', this is a cast. All
2413 // cases where this can be something else are handled above.
2414 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
2415 return true;
2417 // Look for a cast `( x ) (`.
2418 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2419 if (Tok.Previous->is(tok::identifier) &&
2420 Tok.Previous->Previous->is(tok::l_paren)) {
2421 return true;
2425 if (!Tok.Next->Next)
2426 return false;
2428 // If the next token after the parenthesis is a unary operator, assume
2429 // that this is cast, unless there are unexpected tokens inside the
2430 // parenthesis.
2431 bool NextIsUnary =
2432 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
2433 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
2434 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2435 return false;
2437 // Search for unexpected tokens.
2438 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2439 Prev = Prev->Previous) {
2440 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2441 return false;
2443 return true;
2446 /// Returns true if the token is used as a unary operator.
2447 bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2448 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2449 if (!PrevToken)
2450 return true;
2452 // These keywords are deliberately not included here because they may
2453 // precede only one of unary star/amp and plus/minus but not both. They are
2454 // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2456 // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2457 // know how they can be followed by a star or amp.
2458 if (PrevToken->isOneOf(
2459 TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2460 tok::equal, tok::question, tok::l_square, tok::l_brace,
2461 tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2462 tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2463 return true;
2466 // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2467 // where the unary `+` operator is overloaded, it is reasonable to write
2468 // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2469 if (PrevToken->is(tok::kw_sizeof))
2470 return true;
2472 // A sequence of leading unary operators.
2473 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2474 return true;
2476 // There can't be two consecutive binary operators.
2477 if (PrevToken->is(TT_BinaryOperator))
2478 return true;
2480 return false;
2483 /// Return the type of the given token assuming it is * or &.
2484 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2485 bool InTemplateArgument) {
2486 if (Style.isJavaScript())
2487 return TT_BinaryOperator;
2489 // && in C# must be a binary operator.
2490 if (Style.isCSharp() && Tok.is(tok::ampamp))
2491 return TT_BinaryOperator;
2493 if (Style.isVerilog()) {
2494 // In Verilog, `*` can only be a binary operator. `&` can be either unary
2495 // or binary. `*` also includes `*>` in module path declarations in
2496 // specify blocks because merged tokens take the type of the first one by
2497 // default.
2498 if (Tok.is(tok::star))
2499 return TT_BinaryOperator;
2500 return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2501 : TT_BinaryOperator;
2504 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2505 if (!PrevToken)
2506 return TT_UnaryOperator;
2508 const FormatToken *NextToken = Tok.getNextNonComment();
2510 if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2511 return TT_BinaryOperator;
2513 if (!NextToken ||
2514 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept, tok::comma,
2515 tok::r_paren, TT_RequiresClause) ||
2516 NextToken->canBePointerOrReferenceQualifier() ||
2517 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2518 return TT_PointerOrReference;
2521 if (PrevToken->is(tok::coloncolon))
2522 return TT_PointerOrReference;
2524 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2525 return TT_PointerOrReference;
2527 if (determineUnaryOperatorByUsage(Tok))
2528 return TT_UnaryOperator;
2530 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2531 return TT_PointerOrReference;
2532 if (NextToken->is(tok::kw_operator) && !IsExpression)
2533 return TT_PointerOrReference;
2534 if (NextToken->isOneOf(tok::comma, tok::semi))
2535 return TT_PointerOrReference;
2537 // After right braces, star tokens are likely to be pointers to struct,
2538 // union, or class.
2539 // struct {} *ptr;
2540 // This by itself is not sufficient to distinguish from multiplication
2541 // following a brace-initialized expression, as in:
2542 // int i = int{42} * 2;
2543 // In the struct case, the part of the struct declaration until the `{` and
2544 // the `}` are put on separate unwrapped lines; in the brace-initialized
2545 // case, the matching `{` is on the same unwrapped line, so check for the
2546 // presence of the matching brace to distinguish between those.
2547 if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
2548 !PrevToken->MatchingParen) {
2549 return TT_PointerOrReference;
2552 if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
2553 return TT_UnaryOperator;
2555 if (PrevToken->Tok.isLiteral() ||
2556 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2557 tok::kw_false, tok::r_brace)) {
2558 return TT_BinaryOperator;
2561 const FormatToken *NextNonParen = NextToken;
2562 while (NextNonParen && NextNonParen->is(tok::l_paren))
2563 NextNonParen = NextNonParen->getNextNonComment();
2564 if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
2565 NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
2566 NextNonParen->isUnaryOperator())) {
2567 return TT_BinaryOperator;
2570 // If we know we're in a template argument, there are no named declarations.
2571 // Thus, having an identifier on the right-hand side indicates a binary
2572 // operator.
2573 if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
2574 return TT_BinaryOperator;
2576 // "&&(" is quite unlikely to be two successive unary "&".
2577 if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren))
2578 return TT_BinaryOperator;
2580 // This catches some cases where evaluation order is used as control flow:
2581 // aaa && aaa->f();
2582 if (NextToken->Tok.isAnyIdentifier()) {
2583 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2584 if (NextNextToken && NextNextToken->is(tok::arrow))
2585 return TT_BinaryOperator;
2588 // It is very unlikely that we are going to find a pointer or reference type
2589 // definition on the RHS of an assignment.
2590 if (IsExpression && !Contexts.back().CaretFound)
2591 return TT_BinaryOperator;
2593 // Opeartors at class scope are likely pointer or reference members.
2594 if (!Scopes.empty() && Scopes.back() == ST_Class)
2595 return TT_PointerOrReference;
2597 // Tokens that indicate member access or chained operator& use.
2598 auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
2599 return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
2600 tok::arrowstar, tok::periodstar);
2603 // It's more likely that & represents operator& than an uninitialized
2604 // reference.
2605 if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
2606 IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
2607 NextToken && NextToken->Tok.isAnyIdentifier()) {
2608 if (auto NextNext = NextToken->getNextNonComment();
2609 NextNext &&
2610 (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
2611 return TT_BinaryOperator;
2615 return TT_PointerOrReference;
2618 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2619 if (determineUnaryOperatorByUsage(Tok))
2620 return TT_UnaryOperator;
2622 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2623 if (!PrevToken)
2624 return TT_UnaryOperator;
2626 if (PrevToken->is(tok::at))
2627 return TT_UnaryOperator;
2629 // Fall back to marking the token as binary operator.
2630 return TT_BinaryOperator;
2633 /// Determine whether ++/-- are pre- or post-increments/-decrements.
2634 TokenType determineIncrementUsage(const FormatToken &Tok) {
2635 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2636 if (!PrevToken || PrevToken->is(TT_CastRParen))
2637 return TT_UnaryOperator;
2638 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2639 return TT_TrailingUnaryOperator;
2641 return TT_UnaryOperator;
2644 SmallVector<Context, 8> Contexts;
2646 const FormatStyle &Style;
2647 AnnotatedLine &Line;
2648 FormatToken *CurrentToken;
2649 bool AutoFound;
2650 const AdditionalKeywords &Keywords;
2652 SmallVector<ScopeType> &Scopes;
2654 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2655 // determines that a specific token can't be a template opener, it will make
2656 // same decision irrespective of the decisions for tokens leading up to it.
2657 // Store this information to prevent this from causing exponential runtime.
2658 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2661 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2662 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2664 /// Parses binary expressions by inserting fake parenthesis based on
2665 /// operator precedence.
2666 class ExpressionParser {
2667 public:
2668 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2669 AnnotatedLine &Line)
2670 : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
2672 /// Parse expressions with the given operator precedence.
2673 void parse(int Precedence = 0) {
2674 // Skip 'return' and ObjC selector colons as they are not part of a binary
2675 // expression.
2676 while (Current && (Current->is(tok::kw_return) ||
2677 (Current->is(tok::colon) &&
2678 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
2679 next();
2682 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2683 return;
2685 // Conditional expressions need to be parsed separately for proper nesting.
2686 if (Precedence == prec::Conditional) {
2687 parseConditionalExpr();
2688 return;
2691 // Parse unary operators, which all have a higher precedence than binary
2692 // operators.
2693 if (Precedence == PrecedenceUnaryOperator) {
2694 parseUnaryOperator();
2695 return;
2698 FormatToken *Start = Current;
2699 FormatToken *LatestOperator = nullptr;
2700 unsigned OperatorIndex = 0;
2701 // The first name of the current type in a port list.
2702 FormatToken *VerilogFirstOfType = nullptr;
2704 while (Current) {
2705 // In Verilog ports in a module header that don't have a type take the
2706 // type of the previous one. For example,
2707 // module a(output b,
2708 // c,
2709 // output d);
2710 // In this case there need to be fake parentheses around b and c.
2711 if (Style.isVerilog() && Precedence == prec::Comma) {
2712 VerilogFirstOfType =
2713 verilogGroupDecl(VerilogFirstOfType, LatestOperator);
2716 // Consume operators with higher precedence.
2717 parse(Precedence + 1);
2719 // Do not assign fake parenthesis to tokens that are part of an
2720 // unexpanded macro call. The line within the macro call contains
2721 // the parenthesis and commas, and we will not find operators within
2722 // that structure.
2723 if (Current && Current->MacroParent)
2724 break;
2726 int CurrentPrecedence = getCurrentPrecedence();
2728 if (Precedence == CurrentPrecedence && Current &&
2729 Current->is(TT_SelectorName)) {
2730 if (LatestOperator)
2731 addFakeParenthesis(Start, prec::Level(Precedence));
2732 Start = Current;
2735 // At the end of the line or when an operator with lower precedence is
2736 // found, insert fake parenthesis and return.
2737 if (!Current ||
2738 (Current->closesScope() &&
2739 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2740 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2741 (CurrentPrecedence == prec::Conditional &&
2742 Precedence == prec::Assignment && Current->is(tok::colon))) {
2743 break;
2746 // Consume scopes: (), [], <> and {}
2747 // In addition to that we handle require clauses as scope, so that the
2748 // constraints in that are correctly indented.
2749 if (Current->opensScope() ||
2750 Current->isOneOf(TT_RequiresClause,
2751 TT_RequiresClauseInARequiresExpression)) {
2752 // In fragment of a JavaScript template string can look like '}..${' and
2753 // thus close a scope and open a new one at the same time.
2754 while (Current && (!Current->closesScope() || Current->opensScope())) {
2755 next();
2756 parse();
2758 next();
2759 } else {
2760 // Operator found.
2761 if (CurrentPrecedence == Precedence) {
2762 if (LatestOperator)
2763 LatestOperator->NextOperator = Current;
2764 LatestOperator = Current;
2765 Current->OperatorIndex = OperatorIndex;
2766 ++OperatorIndex;
2768 next(/*SkipPastLeadingComments=*/Precedence > 0);
2772 // Group variables of the same type.
2773 if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
2774 addFakeParenthesis(VerilogFirstOfType, prec::Comma);
2776 if (LatestOperator && (Current || Precedence > 0)) {
2777 // The requires clauses do not neccessarily end in a semicolon or a brace,
2778 // but just go over to struct/class or a function declaration, we need to
2779 // intervene so that the fake right paren is inserted correctly.
2780 auto End =
2781 (Start->Previous &&
2782 Start->Previous->isOneOf(TT_RequiresClause,
2783 TT_RequiresClauseInARequiresExpression))
2784 ? [this]() {
2785 auto Ret = Current ? Current : Line.Last;
2786 while (!Ret->ClosesRequiresClause && Ret->Previous)
2787 Ret = Ret->Previous;
2788 return Ret;
2790 : nullptr;
2792 if (Precedence == PrecedenceArrowAndPeriod) {
2793 // Call expressions don't have a binary operator precedence.
2794 addFakeParenthesis(Start, prec::Unknown, End);
2795 } else {
2796 addFakeParenthesis(Start, prec::Level(Precedence), End);
2801 private:
2802 /// Gets the precedence (+1) of the given token for binary operators
2803 /// and other tokens that we treat like binary operators.
2804 int getCurrentPrecedence() {
2805 if (Current) {
2806 const FormatToken *NextNonComment = Current->getNextNonComment();
2807 if (Current->is(TT_ConditionalExpr))
2808 return prec::Conditional;
2809 if (NextNonComment && Current->is(TT_SelectorName) &&
2810 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2811 ((Style.Language == FormatStyle::LK_Proto ||
2812 Style.Language == FormatStyle::LK_TextProto) &&
2813 NextNonComment->is(tok::less)))) {
2814 return prec::Assignment;
2816 if (Current->is(TT_JsComputedPropertyName))
2817 return prec::Assignment;
2818 if (Current->is(TT_LambdaArrow))
2819 return prec::Comma;
2820 if (Current->is(TT_FatArrow))
2821 return prec::Assignment;
2822 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2823 (Current->is(tok::comment) && NextNonComment &&
2824 NextNonComment->is(TT_SelectorName))) {
2825 return 0;
2827 if (Current->is(TT_RangeBasedForLoopColon))
2828 return prec::Comma;
2829 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2830 Current->is(Keywords.kw_instanceof)) {
2831 return prec::Relational;
2833 if (Style.isJavaScript() &&
2834 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
2835 return prec::Relational;
2837 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2838 return Current->getPrecedence();
2839 if (Current->isOneOf(tok::period, tok::arrow) &&
2840 Current->isNot(TT_TrailingReturnArrow)) {
2841 return PrecedenceArrowAndPeriod;
2843 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2844 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2845 Keywords.kw_throws)) {
2846 return 0;
2848 // In Verilog case labels are not on separate lines straight out of
2849 // UnwrappedLineParser. The colon is not part of an expression.
2850 if (Style.isVerilog() && Current->is(tok::colon))
2851 return 0;
2853 return -1;
2856 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
2857 FormatToken *End = nullptr) {
2858 Start->FakeLParens.push_back(Precedence);
2859 if (Precedence > prec::Unknown)
2860 Start->StartsBinaryExpression = true;
2861 if (!End && Current)
2862 End = Current->getPreviousNonComment();
2863 if (End) {
2864 ++End->FakeRParens;
2865 if (Precedence > prec::Unknown)
2866 End->EndsBinaryExpression = true;
2870 /// Parse unary operator expressions and surround them with fake
2871 /// parentheses if appropriate.
2872 void parseUnaryOperator() {
2873 llvm::SmallVector<FormatToken *, 2> Tokens;
2874 while (Current && Current->is(TT_UnaryOperator)) {
2875 Tokens.push_back(Current);
2876 next();
2878 parse(PrecedenceArrowAndPeriod);
2879 for (FormatToken *Token : llvm::reverse(Tokens)) {
2880 // The actual precedence doesn't matter.
2881 addFakeParenthesis(Token, prec::Unknown);
2885 void parseConditionalExpr() {
2886 while (Current && Current->isTrailingComment())
2887 next();
2888 FormatToken *Start = Current;
2889 parse(prec::LogicalOr);
2890 if (!Current || !Current->is(tok::question))
2891 return;
2892 next();
2893 parse(prec::Assignment);
2894 if (!Current || Current->isNot(TT_ConditionalExpr))
2895 return;
2896 next();
2897 parse(prec::Assignment);
2898 addFakeParenthesis(Start, prec::Conditional);
2901 void next(bool SkipPastLeadingComments = true) {
2902 if (Current)
2903 Current = Current->Next;
2904 while (Current &&
2905 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2906 Current->isTrailingComment()) {
2907 Current = Current->Next;
2911 // Add fake parenthesis around declarations of the same type for example in a
2912 // module prototype. Return the first port / variable of the current type.
2913 FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
2914 FormatToken *PreviousComma) {
2915 if (!Current)
2916 return nullptr;
2918 FormatToken *Start = Current;
2920 // Skip attributes.
2921 while (Start->startsSequence(tok::l_paren, tok::star)) {
2922 if (!(Start = Start->MatchingParen) ||
2923 !(Start = Start->getNextNonComment())) {
2924 return nullptr;
2928 FormatToken *Tok = Start;
2930 if (Tok->is(Keywords.kw_assign))
2931 Tok = Tok->getNextNonComment();
2933 // Skip any type qualifiers to find the first identifier. It may be either a
2934 // new type name or a variable name. There can be several type qualifiers
2935 // preceding a variable name, and we can not tell them apart by looking at
2936 // the word alone since a macro can be defined as either a type qualifier or
2937 // a variable name. Thus we use the last word before the dimensions instead
2938 // of the first word as the candidate for the variable or type name.
2939 FormatToken *First = nullptr;
2940 while (Tok) {
2941 FormatToken *Next = Tok->getNextNonComment();
2943 if (Tok->is(tok::hash)) {
2944 // Start of a macro expansion.
2945 First = Tok;
2946 Tok = Next;
2947 if (Tok)
2948 Tok = Tok->getNextNonComment();
2949 } else if (Tok->is(tok::hashhash)) {
2950 // Concatenation. Skip.
2951 Tok = Next;
2952 if (Tok)
2953 Tok = Tok->getNextNonComment();
2954 } else if ((Keywords.isVerilogQualifier(*Tok) ||
2955 Keywords.isVerilogIdentifier(*Tok))) {
2956 First = Tok;
2957 Tok = Next;
2958 // The name may have dots like `interface_foo.modport_foo`.
2959 while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
2960 (Tok = Tok->getNextNonComment())) {
2961 if (Keywords.isVerilogIdentifier(*Tok))
2962 Tok = Tok->getNextNonComment();
2964 } else if (!Next) {
2965 Tok = nullptr;
2966 } else if (Tok->is(tok::l_paren)) {
2967 // Make sure the parenthesized list is a drive strength. Otherwise the
2968 // statement may be a module instantiation in which case we have already
2969 // found the instance name.
2970 if (Next->isOneOf(
2971 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
2972 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
2973 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
2974 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
2975 Keywords.kw_weak1)) {
2976 Tok->setType(TT_VerilogStrength);
2977 Tok = Tok->MatchingParen;
2978 if (Tok) {
2979 Tok->setType(TT_VerilogStrength);
2980 Tok = Tok->getNextNonComment();
2982 } else {
2983 break;
2985 } else if (Tok->is(tok::hash)) {
2986 if (Next->is(tok::l_paren))
2987 Next = Next->MatchingParen;
2988 if (Next)
2989 Tok = Next->getNextNonComment();
2990 } else {
2991 break;
2995 // Find the second identifier. If it exists it will be the name.
2996 FormatToken *Second = nullptr;
2997 // Dimensions.
2998 while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
2999 Tok = Tok->getNextNonComment();
3000 if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3001 Second = Tok;
3003 // If the second identifier doesn't exist and there are qualifiers, the type
3004 // is implied.
3005 FormatToken *TypedName = nullptr;
3006 if (Second) {
3007 TypedName = Second;
3008 if (First && First->is(TT_Unknown))
3009 First->setType(TT_VerilogDimensionedTypeName);
3010 } else if (First != Start) {
3011 // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3012 // to null as intended.
3013 TypedName = First;
3016 if (TypedName) {
3017 // This is a declaration with a new type.
3018 if (TypedName->is(TT_Unknown))
3019 TypedName->setType(TT_StartOfName);
3020 // Group variables of the previous type.
3021 if (FirstOfType && PreviousComma) {
3022 PreviousComma->setType(TT_VerilogTypeComma);
3023 addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3026 FirstOfType = TypedName;
3028 // Don't let higher precedence handle the qualifiers. For example if we
3029 // have:
3030 // parameter x = 0
3031 // We skip `parameter` here. This way the fake parentheses for the
3032 // assignment will be around `x = 0`.
3033 while (Current && Current != FirstOfType) {
3034 if (Current->opensScope()) {
3035 next();
3036 parse();
3038 next();
3042 return FirstOfType;
3045 const FormatStyle &Style;
3046 const AdditionalKeywords &Keywords;
3047 const AnnotatedLine &Line;
3048 FormatToken *Current;
3051 } // end anonymous namespace
3053 void TokenAnnotator::setCommentLineLevels(
3054 SmallVectorImpl<AnnotatedLine *> &Lines) const {
3055 const AnnotatedLine *NextNonCommentLine = nullptr;
3056 for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3057 assert(Line->First);
3059 // If the comment is currently aligned with the line immediately following
3060 // it, that's probably intentional and we should keep it.
3061 if (NextNonCommentLine && !NextNonCommentLine->First->Finalized &&
3062 Line->isComment() && NextNonCommentLine->First->NewlinesBefore <= 1 &&
3063 NextNonCommentLine->First->OriginalColumn ==
3064 Line->First->OriginalColumn) {
3065 const bool PPDirectiveOrImportStmt =
3066 NextNonCommentLine->Type == LT_PreprocessorDirective ||
3067 NextNonCommentLine->Type == LT_ImportStatement;
3068 if (PPDirectiveOrImportStmt)
3069 Line->Type = LT_CommentAbovePPDirective;
3070 // Align comments for preprocessor lines with the # in column 0 if
3071 // preprocessor lines are not indented. Otherwise, align with the next
3072 // line.
3073 Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3074 PPDirectiveOrImportStmt
3076 : NextNonCommentLine->Level;
3077 } else {
3078 NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3081 setCommentLineLevels(Line->Children);
3085 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3086 unsigned Result = 0;
3087 for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3088 Result = std::max(Result, Tok->NestingLevel);
3089 return Result;
3092 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3093 for (auto &Child : Line.Children)
3094 annotate(*Child);
3096 AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3097 Line.Type = Parser.parseLine();
3099 // With very deep nesting, ExpressionParser uses lots of stack and the
3100 // formatting algorithm is very slow. We're not going to do a good job here
3101 // anyway - it's probably generated code being formatted by mistake.
3102 // Just skip the whole line.
3103 if (maxNestingDepth(Line) > 50)
3104 Line.Type = LT_Invalid;
3106 if (Line.Type == LT_Invalid)
3107 return;
3109 ExpressionParser ExprParser(Style, Keywords, Line);
3110 ExprParser.parse();
3112 if (Line.startsWith(TT_ObjCMethodSpecifier))
3113 Line.Type = LT_ObjCMethodDecl;
3114 else if (Line.startsWith(TT_ObjCDecl))
3115 Line.Type = LT_ObjCDecl;
3116 else if (Line.startsWith(TT_ObjCProperty))
3117 Line.Type = LT_ObjCProperty;
3119 Line.First->SpacesRequiredBefore = 1;
3120 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
3123 // This function heuristically determines whether 'Current' starts the name of a
3124 // function declaration.
3125 static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
3126 const AnnotatedLine &Line) {
3127 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
3128 for (; Next; Next = Next->Next) {
3129 if (Next->is(TT_OverloadedOperatorLParen))
3130 return Next;
3131 if (Next->is(TT_OverloadedOperator))
3132 continue;
3133 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3134 // For 'new[]' and 'delete[]'.
3135 if (Next->Next &&
3136 Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3137 Next = Next->Next->Next;
3139 continue;
3141 if (Next->startsSequence(tok::l_square, tok::r_square)) {
3142 // For operator[]().
3143 Next = Next->Next;
3144 continue;
3146 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
3147 Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) {
3148 // For operator void*(), operator char*(), operator Foo*().
3149 Next = Next->Next;
3150 continue;
3152 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3153 Next = Next->MatchingParen;
3154 continue;
3157 break;
3159 return nullptr;
3162 // Find parentheses of parameter list.
3163 const FormatToken *Next = Current.Next;
3164 if (Current.is(tok::kw_operator)) {
3165 if (Current.Previous && Current.Previous->is(tok::coloncolon))
3166 return false;
3167 Next = skipOperatorName(Next);
3168 } else {
3169 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
3170 return false;
3171 for (; Next; Next = Next->Next) {
3172 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3173 Next = Next->MatchingParen;
3174 } else if (Next->is(tok::coloncolon)) {
3175 Next = Next->Next;
3176 if (!Next)
3177 return false;
3178 if (Next->is(tok::kw_operator)) {
3179 Next = skipOperatorName(Next->Next);
3180 break;
3182 if (!Next->is(tok::identifier))
3183 return false;
3184 } else if (isCppAttribute(IsCpp, *Next)) {
3185 Next = Next->MatchingParen;
3186 if (!Next)
3187 return false;
3188 } else if (Next->is(tok::l_paren)) {
3189 break;
3190 } else {
3191 return false;
3196 // Check whether parameter list can belong to a function declaration.
3197 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
3198 return false;
3199 // If the lines ends with "{", this is likely a function definition.
3200 if (Line.Last->is(tok::l_brace))
3201 return true;
3202 if (Next->Next == Next->MatchingParen)
3203 return true; // Empty parentheses.
3204 // If there is an &/&& after the r_paren, this is likely a function.
3205 if (Next->MatchingParen->Next &&
3206 Next->MatchingParen->Next->is(TT_PointerOrReference)) {
3207 return true;
3210 // Check for K&R C function definitions (and C++ function definitions with
3211 // unnamed parameters), e.g.:
3212 // int f(i)
3213 // {
3214 // return i + 1;
3215 // }
3216 // bool g(size_t = 0, bool b = false)
3217 // {
3218 // return !b;
3219 // }
3220 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3221 !Line.endsWith(tok::semi)) {
3222 return true;
3225 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
3226 Tok = Tok->Next) {
3227 if (Tok->is(TT_TypeDeclarationParen))
3228 return true;
3229 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3230 Tok = Tok->MatchingParen;
3231 continue;
3233 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
3234 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
3235 return true;
3237 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
3238 Tok->Tok.isLiteral()) {
3239 return false;
3242 return false;
3245 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3246 assert(Line.MightBeFunctionDecl);
3248 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3249 Style.AlwaysBreakAfterReturnType ==
3250 FormatStyle::RTBS_TopLevelDefinitions) &&
3251 Line.Level > 0) {
3252 return false;
3255 switch (Style.AlwaysBreakAfterReturnType) {
3256 case FormatStyle::RTBS_None:
3257 return false;
3258 case FormatStyle::RTBS_All:
3259 case FormatStyle::RTBS_TopLevel:
3260 return true;
3261 case FormatStyle::RTBS_AllDefinitions:
3262 case FormatStyle::RTBS_TopLevelDefinitions:
3263 return Line.mightBeFunctionDefinition();
3266 return false;
3269 static bool mustBreakAfterAttributes(const FormatToken &Tok,
3270 const FormatStyle &Style) {
3271 switch (Style.BreakAfterAttributes) {
3272 case FormatStyle::ABS_Always:
3273 return true;
3274 case FormatStyle::ABS_Leave:
3275 return Tok.NewlinesBefore > 0;
3276 default:
3277 return false;
3281 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3282 for (AnnotatedLine *ChildLine : Line.Children)
3283 calculateFormattingInformation(*ChildLine);
3285 Line.First->TotalLength =
3286 Line.First->IsMultiline ? Style.ColumnLimit
3287 : Line.FirstStartColumn + Line.First->ColumnWidth;
3288 FormatToken *Current = Line.First->Next;
3289 bool InFunctionDecl = Line.MightBeFunctionDecl;
3290 bool AlignArrayOfStructures =
3291 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3292 Line.Type == LT_ArrayOfStructInitializer);
3293 if (AlignArrayOfStructures)
3294 calculateArrayInitializerColumnList(Line);
3296 for (FormatToken *Tok = Current, *AfterLastAttribute = nullptr; Tok;
3297 Tok = Tok->Next) {
3298 if (isFunctionDeclarationName(Style.isCpp(), *Tok, Line)) {
3299 Tok->setType(TT_FunctionDeclarationName);
3300 if (AfterLastAttribute &&
3301 mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3302 AfterLastAttribute->MustBreakBefore = true;
3303 Line.ReturnTypeWrapped = true;
3305 break;
3307 if (Tok->Previous->EndsCppAttributeGroup)
3308 AfterLastAttribute = Tok;
3311 while (Current) {
3312 const FormatToken *Prev = Current->Previous;
3313 if (Current->is(TT_LineComment)) {
3314 if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3315 Current->SpacesRequiredBefore =
3316 (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1;
3317 } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3318 Current->SpacesRequiredBefore = 0;
3319 } else {
3320 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3323 // If we find a trailing comment, iterate backwards to determine whether
3324 // it seems to relate to a specific parameter. If so, break before that
3325 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3326 // to the previous line in:
3327 // SomeFunction(a,
3328 // b, // comment
3329 // c);
3330 if (!Current->HasUnescapedNewline) {
3331 for (FormatToken *Parameter = Current->Previous; Parameter;
3332 Parameter = Parameter->Previous) {
3333 if (Parameter->isOneOf(tok::comment, tok::r_brace))
3334 break;
3335 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
3336 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
3337 Parameter->HasUnescapedNewline) {
3338 Parameter->MustBreakBefore = true;
3340 break;
3344 } else if (Current->SpacesRequiredBefore == 0 &&
3345 spaceRequiredBefore(Line, *Current)) {
3346 Current->SpacesRequiredBefore = 1;
3349 const auto &Children = Prev->Children;
3350 if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
3351 Current->MustBreakBefore = true;
3352 } else {
3353 Current->MustBreakBefore =
3354 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
3355 if (!Current->MustBreakBefore && InFunctionDecl &&
3356 Current->is(TT_FunctionDeclarationName)) {
3357 Current->MustBreakBefore = mustBreakForReturnType(Line);
3361 Current->CanBreakBefore =
3362 Current->MustBreakBefore || canBreakBefore(Line, *Current);
3363 unsigned ChildSize = 0;
3364 if (Prev->Children.size() == 1) {
3365 FormatToken &LastOfChild = *Prev->Children[0]->Last;
3366 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
3367 : LastOfChild.TotalLength + 1;
3369 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
3370 (Prev->Children.size() == 1 &&
3371 Prev->Children[0]->First->MustBreakBefore) ||
3372 Current->IsMultiline) {
3373 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
3374 } else {
3375 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
3376 ChildSize + Current->SpacesRequiredBefore;
3379 if (Current->is(TT_CtorInitializerColon))
3380 InFunctionDecl = false;
3382 // FIXME: Only calculate this if CanBreakBefore is true once static
3383 // initializers etc. are sorted out.
3384 // FIXME: Move magic numbers to a better place.
3386 // Reduce penalty for aligning ObjC method arguments using the colon
3387 // alignment as this is the canonical way (still prefer fitting everything
3388 // into one line if possible). Trying to fit a whole expression into one
3389 // line should not force other line breaks (e.g. when ObjC method
3390 // expression is a part of other expression).
3391 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
3392 if (Style.Language == FormatStyle::LK_ObjC &&
3393 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
3394 if (Current->ParameterIndex == 1)
3395 Current->SplitPenalty += 5 * Current->BindingStrength;
3396 } else {
3397 Current->SplitPenalty += 20 * Current->BindingStrength;
3400 Current = Current->Next;
3403 calculateUnbreakableTailLengths(Line);
3404 unsigned IndentLevel = Line.Level;
3405 for (Current = Line.First; Current; Current = Current->Next) {
3406 if (Current->Role)
3407 Current->Role->precomputeFormattingInfos(Current);
3408 if (Current->MatchingParen &&
3409 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
3410 IndentLevel > 0) {
3411 --IndentLevel;
3413 Current->IndentLevel = IndentLevel;
3414 if (Current->opensBlockOrBlockTypeList(Style))
3415 ++IndentLevel;
3418 LLVM_DEBUG({ printDebugInfo(Line); });
3421 void TokenAnnotator::calculateUnbreakableTailLengths(
3422 AnnotatedLine &Line) const {
3423 unsigned UnbreakableTailLength = 0;
3424 FormatToken *Current = Line.Last;
3425 while (Current) {
3426 Current->UnbreakableTailLength = UnbreakableTailLength;
3427 if (Current->CanBreakBefore ||
3428 Current->isOneOf(tok::comment, tok::string_literal)) {
3429 UnbreakableTailLength = 0;
3430 } else {
3431 UnbreakableTailLength +=
3432 Current->ColumnWidth + Current->SpacesRequiredBefore;
3434 Current = Current->Previous;
3438 void TokenAnnotator::calculateArrayInitializerColumnList(
3439 AnnotatedLine &Line) const {
3440 if (Line.First == Line.Last)
3441 return;
3442 auto *CurrentToken = Line.First;
3443 CurrentToken->ArrayInitializerLineStart = true;
3444 unsigned Depth = 0;
3445 while (CurrentToken && CurrentToken != Line.Last) {
3446 if (CurrentToken->is(tok::l_brace)) {
3447 CurrentToken->IsArrayInitializer = true;
3448 if (CurrentToken->Next)
3449 CurrentToken->Next->MustBreakBefore = true;
3450 CurrentToken =
3451 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
3452 } else {
3453 CurrentToken = CurrentToken->Next;
3458 FormatToken *TokenAnnotator::calculateInitializerColumnList(
3459 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
3460 while (CurrentToken && CurrentToken != Line.Last) {
3461 if (CurrentToken->is(tok::l_brace))
3462 ++Depth;
3463 else if (CurrentToken->is(tok::r_brace))
3464 --Depth;
3465 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
3466 CurrentToken = CurrentToken->Next;
3467 if (!CurrentToken)
3468 break;
3469 CurrentToken->StartsColumn = true;
3470 CurrentToken = CurrentToken->Previous;
3472 CurrentToken = CurrentToken->Next;
3474 return CurrentToken;
3477 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
3478 const FormatToken &Tok,
3479 bool InFunctionDecl) const {
3480 const FormatToken &Left = *Tok.Previous;
3481 const FormatToken &Right = Tok;
3483 if (Left.is(tok::semi))
3484 return 0;
3486 // Language specific handling.
3487 if (Style.Language == FormatStyle::LK_Java) {
3488 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
3489 return 1;
3490 if (Right.is(Keywords.kw_implements))
3491 return 2;
3492 if (Left.is(tok::comma) && Left.NestingLevel == 0)
3493 return 3;
3494 } else if (Style.isJavaScript()) {
3495 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
3496 return 100;
3497 if (Left.is(TT_JsTypeColon))
3498 return 35;
3499 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3500 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) {
3501 return 100;
3503 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
3504 if (Left.opensScope() && Right.closesScope())
3505 return 200;
3506 } else if (Style.isProto()) {
3507 if (Right.is(tok::l_square))
3508 return 1;
3509 if (Right.is(tok::period))
3510 return 500;
3513 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3514 return 1;
3515 if (Right.is(tok::l_square)) {
3516 if (Left.is(tok::r_square))
3517 return 200;
3518 // Slightly prefer formatting local lambda definitions like functions.
3519 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
3520 return 35;
3521 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3522 TT_ArrayInitializerLSquare,
3523 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
3524 return 500;
3528 if (Left.is(tok::coloncolon))
3529 return 500;
3530 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3531 Right.is(tok::kw_operator)) {
3532 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
3533 return 3;
3534 if (Left.is(TT_StartOfName))
3535 return 110;
3536 if (InFunctionDecl && Right.NestingLevel == 0)
3537 return Style.PenaltyReturnTypeOnItsOwnLine;
3538 return 200;
3540 if (Right.is(TT_PointerOrReference))
3541 return 190;
3542 if (Right.is(TT_LambdaArrow))
3543 return 110;
3544 if (Left.is(tok::equal) && Right.is(tok::l_brace))
3545 return 160;
3546 if (Left.is(TT_CastRParen))
3547 return 100;
3548 if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
3549 return 5000;
3550 if (Left.is(tok::comment))
3551 return 1000;
3553 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
3554 TT_CtorInitializerColon)) {
3555 return 2;
3558 if (Right.isMemberAccess()) {
3559 // Breaking before the "./->" of a chained call/member access is reasonably
3560 // cheap, as formatting those with one call per line is generally
3561 // desirable. In particular, it should be cheaper to break before the call
3562 // than it is to break inside a call's parameters, which could lead to weird
3563 // "hanging" indents. The exception is the very last "./->" to support this
3564 // frequent pattern:
3566 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
3567 // dddddddd);
3569 // which might otherwise be blown up onto many lines. Here, clang-format
3570 // won't produce "hanging" indents anyway as there is no other trailing
3571 // call.
3573 // Also apply higher penalty is not a call as that might lead to a wrapping
3574 // like:
3576 // aaaaaaa
3577 // .aaaaaaaaa.bbbbbbbb(cccccccc);
3578 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
3579 ? 150
3580 : 35;
3583 if (Right.is(TT_TrailingAnnotation) &&
3584 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
3585 // Moving trailing annotations to the next line is fine for ObjC method
3586 // declarations.
3587 if (Line.startsWith(TT_ObjCMethodSpecifier))
3588 return 10;
3589 // Generally, breaking before a trailing annotation is bad unless it is
3590 // function-like. It seems to be especially preferable to keep standard
3591 // annotations (i.e. "const", "final" and "override") on the same line.
3592 // Use a slightly higher penalty after ")" so that annotations like
3593 // "const override" are kept together.
3594 bool is_short_annotation = Right.TokenText.size() < 10;
3595 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
3598 // In for-loops, prefer breaking at ',' and ';'.
3599 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
3600 return 4;
3602 // In Objective-C method expressions, prefer breaking before "param:" over
3603 // breaking after it.
3604 if (Right.is(TT_SelectorName))
3605 return 0;
3606 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
3607 return Line.MightBeFunctionDecl ? 50 : 500;
3609 // In Objective-C type declarations, avoid breaking after the category's
3610 // open paren (we'll prefer breaking after the protocol list's opening
3611 // angle bracket, if present).
3612 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
3613 Left.Previous->isOneOf(tok::identifier, tok::greater)) {
3614 return 500;
3617 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
3618 return Style.PenaltyBreakOpenParenthesis;
3619 if (Left.is(tok::l_paren) && InFunctionDecl &&
3620 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
3621 return 100;
3623 if (Left.is(tok::l_paren) && Left.Previous &&
3624 (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
3625 Left.Previous->isIf())) {
3626 return 1000;
3628 if (Left.is(tok::equal) && InFunctionDecl)
3629 return 110;
3630 if (Right.is(tok::r_brace))
3631 return 1;
3632 if (Left.is(TT_TemplateOpener))
3633 return 100;
3634 if (Left.opensScope()) {
3635 // If we aren't aligning after opening parens/braces we can always break
3636 // here unless the style does not want us to place all arguments on the
3637 // next line.
3638 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
3639 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
3640 return 0;
3642 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
3643 return 19;
3644 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
3645 : 19;
3647 if (Left.is(TT_JavaAnnotation))
3648 return 50;
3650 if (Left.is(TT_UnaryOperator))
3651 return 60;
3652 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
3653 Left.Previous->isLabelString() &&
3654 (Left.NextOperator || Left.OperatorIndex != 0)) {
3655 return 50;
3657 if (Right.is(tok::plus) && Left.isLabelString() &&
3658 (Right.NextOperator || Right.OperatorIndex != 0)) {
3659 return 25;
3661 if (Left.is(tok::comma))
3662 return 1;
3663 if (Right.is(tok::lessless) && Left.isLabelString() &&
3664 (Right.NextOperator || Right.OperatorIndex != 1)) {
3665 return 25;
3667 if (Right.is(tok::lessless)) {
3668 // Breaking at a << is really cheap.
3669 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) {
3670 // Slightly prefer to break before the first one in log-like statements.
3671 return 2;
3673 return 1;
3675 if (Left.ClosesTemplateDeclaration)
3676 return Style.PenaltyBreakTemplateDeclaration;
3677 if (Left.ClosesRequiresClause)
3678 return 0;
3679 if (Left.is(TT_ConditionalExpr))
3680 return prec::Conditional;
3681 prec::Level Level = Left.getPrecedence();
3682 if (Level == prec::Unknown)
3683 Level = Right.getPrecedence();
3684 if (Level == prec::Assignment)
3685 return Style.PenaltyBreakAssignment;
3686 if (Level != prec::Unknown)
3687 return Level;
3689 return 3;
3692 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
3693 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
3694 return true;
3695 if (Right.is(TT_OverloadedOperatorLParen) &&
3696 Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
3697 return true;
3699 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
3700 Right.ParameterCount > 0) {
3701 return true;
3703 return false;
3706 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
3707 const FormatToken &Left,
3708 const FormatToken &Right) const {
3709 if (Left.is(tok::kw_return) &&
3710 !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
3711 return true;
3713 if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
3714 Right.MatchingParen->is(TT_CastRParen)) {
3715 return true;
3717 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
3718 return true;
3719 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
3720 Left.Tok.getObjCKeywordID() == tok::objc_property) {
3721 return true;
3723 if (Right.is(tok::hashhash))
3724 return Left.is(tok::hash);
3725 if (Left.isOneOf(tok::hashhash, tok::hash))
3726 return Right.is(tok::hash);
3727 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
3728 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
3729 Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
3730 return Style.SpaceInEmptyParentheses;
3732 if (Style.SpacesInConditionalStatement) {
3733 const FormatToken *LeftParen = nullptr;
3734 if (Left.is(tok::l_paren))
3735 LeftParen = &Left;
3736 else if (Right.is(tok::r_paren) && Right.MatchingParen)
3737 LeftParen = Right.MatchingParen;
3738 if (LeftParen) {
3739 if (LeftParen->is(TT_ConditionLParen))
3740 return true;
3741 if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
3742 return true;
3746 // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
3747 if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
3748 // function return type 'auto'
3749 TT_FunctionTypeLParen)) {
3750 return true;
3753 // auto{x} auto(x)
3754 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
3755 return false;
3757 // operator co_await(x)
3758 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
3759 Left.Previous->is(tok::kw_operator)) {
3760 return false;
3762 // co_await (x), co_yield (x), co_return (x)
3763 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
3764 !Right.isOneOf(tok::semi, tok::r_paren)) {
3765 return true;
3768 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
3769 return (Right.is(TT_CastRParen) ||
3770 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
3771 ? Style.SpacesInCStyleCastParentheses
3772 : Style.SpacesInParentheses;
3774 if (Right.isOneOf(tok::semi, tok::comma))
3775 return false;
3776 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
3777 bool IsLightweightGeneric = Right.MatchingParen &&
3778 Right.MatchingParen->Next &&
3779 Right.MatchingParen->Next->is(tok::colon);
3780 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
3782 if (Right.is(tok::less) && Left.is(tok::kw_template))
3783 return Style.SpaceAfterTemplateKeyword;
3784 if (Left.isOneOf(tok::exclaim, tok::tilde))
3785 return false;
3786 if (Left.is(tok::at) &&
3787 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
3788 tok::numeric_constant, tok::l_paren, tok::l_brace,
3789 tok::kw_true, tok::kw_false)) {
3790 return false;
3792 if (Left.is(tok::colon))
3793 return !Left.is(TT_ObjCMethodExpr);
3794 if (Left.is(tok::coloncolon))
3795 return false;
3796 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
3797 if (Style.Language == FormatStyle::LK_TextProto ||
3798 (Style.Language == FormatStyle::LK_Proto &&
3799 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
3800 // Format empty list as `<>`.
3801 if (Left.is(tok::less) && Right.is(tok::greater))
3802 return false;
3803 return !Style.Cpp11BracedListStyle;
3805 // Don't attempt to format operator<(), as it is handled later.
3806 if (Right.isNot(TT_OverloadedOperatorLParen))
3807 return false;
3809 if (Right.is(tok::ellipsis)) {
3810 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
3811 Left.Previous->is(tok::kw_case));
3813 if (Left.is(tok::l_square) && Right.is(tok::amp))
3814 return Style.SpacesInSquareBrackets;
3815 if (Right.is(TT_PointerOrReference)) {
3816 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
3817 if (!Left.MatchingParen)
3818 return true;
3819 FormatToken *TokenBeforeMatchingParen =
3820 Left.MatchingParen->getPreviousNonComment();
3821 if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen))
3822 return true;
3824 // Add a space if the previous token is a pointer qualifier or the closing
3825 // parenthesis of __attribute__(()) expression and the style requires spaces
3826 // after pointer qualifiers.
3827 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
3828 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3829 (Left.is(TT_AttributeParen) ||
3830 Left.canBePointerOrReferenceQualifier())) {
3831 return true;
3833 if (Left.Tok.isLiteral())
3834 return true;
3835 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
3836 if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
3837 Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
3838 return getTokenPointerOrReferenceAlignment(Right) !=
3839 FormatStyle::PAS_Left;
3841 return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
3842 (getTokenPointerOrReferenceAlignment(Right) !=
3843 FormatStyle::PAS_Left ||
3844 (Line.IsMultiVariableDeclStmt &&
3845 (Left.NestingLevel == 0 ||
3846 (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
3848 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
3849 (!Left.is(TT_PointerOrReference) ||
3850 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
3851 !Line.IsMultiVariableDeclStmt))) {
3852 return true;
3854 if (Left.is(TT_PointerOrReference)) {
3855 // Add a space if the next token is a pointer qualifier and the style
3856 // requires spaces before pointer qualifiers.
3857 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
3858 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3859 Right.canBePointerOrReferenceQualifier()) {
3860 return true;
3862 // & 1
3863 if (Right.Tok.isLiteral())
3864 return true;
3865 // & /* comment
3866 if (Right.is(TT_BlockComment))
3867 return true;
3868 // foo() -> const Bar * override/final
3869 // S::foo() & noexcept/requires
3870 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
3871 TT_RequiresClause) &&
3872 !Right.is(TT_StartOfName)) {
3873 return true;
3875 // & {
3876 if (Right.is(tok::l_brace) && Right.is(BK_Block))
3877 return true;
3878 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
3879 if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
3880 Right.Next->is(TT_RangeBasedForLoopColon)) {
3881 return getTokenPointerOrReferenceAlignment(Left) !=
3882 FormatStyle::PAS_Right;
3884 if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
3885 tok::l_paren)) {
3886 return false;
3888 if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
3889 return false;
3890 // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
3891 // because it does not take into account nested scopes like lambdas.
3892 // In multi-variable declaration statements, attach */& to the variable
3893 // independently of the style. However, avoid doing it if we are in a nested
3894 // scope, e.g. lambda. We still need to special-case statements with
3895 // initializers.
3896 if (Line.IsMultiVariableDeclStmt &&
3897 (Left.NestingLevel == Line.First->NestingLevel ||
3898 ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
3899 startsWithInitStatement(Line)))) {
3900 return false;
3902 return Left.Previous && !Left.Previous->isOneOf(
3903 tok::l_paren, tok::coloncolon, tok::l_square);
3905 // Ensure right pointer alignment with ellipsis e.g. int *...P
3906 if (Left.is(tok::ellipsis) && Left.Previous &&
3907 Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) {
3908 return Style.PointerAlignment != FormatStyle::PAS_Right;
3911 if (Right.is(tok::star) && Left.is(tok::l_paren))
3912 return false;
3913 if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
3914 return false;
3915 if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) {
3916 const FormatToken *Previous = &Left;
3917 while (Previous && !Previous->is(tok::kw_operator)) {
3918 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
3919 Previous = Previous->getPreviousNonComment();
3920 continue;
3922 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
3923 Previous = Previous->MatchingParen->getPreviousNonComment();
3924 continue;
3926 if (Previous->is(tok::coloncolon)) {
3927 Previous = Previous->getPreviousNonComment();
3928 continue;
3930 break;
3932 // Space between the type and the * in:
3933 // operator void*()
3934 // operator char*()
3935 // operator void const*()
3936 // operator void volatile*()
3937 // operator /*comment*/ const char*()
3938 // operator volatile /*comment*/ char*()
3939 // operator Foo*()
3940 // operator C<T>*()
3941 // operator std::Foo*()
3942 // operator C<T>::D<U>*()
3943 // dependent on PointerAlignment style.
3944 if (Previous) {
3945 if (Previous->endsSequence(tok::kw_operator))
3946 return Style.PointerAlignment != FormatStyle::PAS_Left;
3947 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
3948 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
3949 (Style.SpaceAroundPointerQualifiers ==
3950 FormatStyle::SAPQ_After) ||
3951 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
3955 if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
3956 return true;
3957 const auto SpaceRequiredForArrayInitializerLSquare =
3958 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
3959 return Style.SpacesInContainerLiterals ||
3960 ((Style.Language == FormatStyle::LK_Proto ||
3961 Style.Language == FormatStyle::LK_TextProto) &&
3962 !Style.Cpp11BracedListStyle &&
3963 LSquareTok.endsSequence(tok::l_square, tok::colon,
3964 TT_SelectorName));
3966 if (Left.is(tok::l_square)) {
3967 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
3968 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
3969 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
3970 TT_LambdaLSquare) &&
3971 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
3973 if (Right.is(tok::r_square)) {
3974 return Right.MatchingParen &&
3975 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
3976 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
3977 Style)) ||
3978 (Style.SpacesInSquareBrackets &&
3979 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
3980 TT_StructuredBindingLSquare,
3981 TT_LambdaLSquare)) ||
3982 Right.MatchingParen->is(TT_AttributeParen));
3984 if (Right.is(tok::l_square) &&
3985 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3986 TT_DesignatedInitializerLSquare,
3987 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
3988 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
3989 !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
3990 Right.is(TT_ArraySubscriptLSquare))) {
3991 return false;
3993 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
3994 return !Left.Children.empty(); // No spaces in "{}".
3995 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
3996 (Right.is(tok::r_brace) && Right.MatchingParen &&
3997 Right.MatchingParen->isNot(BK_Block))) {
3998 return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true;
4000 if (Left.is(TT_BlockComment)) {
4001 // No whitespace in x(/*foo=*/1), except for JavaScript.
4002 return Style.isJavaScript() || !Left.TokenText.endswith("=*/");
4005 // Space between template and attribute.
4006 // e.g. template <typename T> [[nodiscard]] ...
4007 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4008 return true;
4009 // Space before parentheses common for all languages
4010 if (Right.is(tok::l_paren)) {
4011 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4012 return spaceRequiredBeforeParens(Right);
4013 if (Left.isOneOf(TT_RequiresClause,
4014 TT_RequiresClauseInARequiresExpression)) {
4015 return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4016 spaceRequiredBeforeParens(Right);
4018 if (Left.is(TT_RequiresExpression)) {
4019 return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4020 spaceRequiredBeforeParens(Right);
4022 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
4023 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4024 return true;
4026 if (Left.is(TT_ForEachMacro)) {
4027 return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4028 spaceRequiredBeforeParens(Right);
4030 if (Left.is(TT_IfMacro)) {
4031 return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4032 spaceRequiredBeforeParens(Right);
4034 if (Line.Type == LT_ObjCDecl)
4035 return true;
4036 if (Left.is(tok::semi))
4037 return true;
4038 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4039 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4040 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4041 Right.is(TT_ConditionLParen)) {
4042 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4043 spaceRequiredBeforeParens(Right);
4046 // TODO add Operator overloading specific Options to
4047 // SpaceBeforeParensOptions
4048 if (Right.is(TT_OverloadedOperatorLParen))
4049 return spaceRequiredBeforeParens(Right);
4050 // Function declaration or definition
4051 if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
4052 if (Line.mightBeFunctionDefinition()) {
4053 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4054 spaceRequiredBeforeParens(Right);
4055 } else {
4056 return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
4057 spaceRequiredBeforeParens(Right);
4060 // Lambda
4061 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4062 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4063 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4064 spaceRequiredBeforeParens(Right);
4066 if (!Left.Previous || Left.Previous->isNot(tok::period)) {
4067 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4068 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4069 spaceRequiredBeforeParens(Right);
4071 if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4072 return ((!Line.MightBeFunctionDecl || !Left.Previous) &&
4073 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4074 spaceRequiredBeforeParens(Right);
4077 if (Left.is(tok::r_square) && Left.MatchingParen &&
4078 Left.MatchingParen->Previous &&
4079 Left.MatchingParen->Previous->is(tok::kw_delete)) {
4080 return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4081 spaceRequiredBeforeParens(Right);
4084 // Handle builtins like identifiers.
4085 if (Line.Type != LT_PreprocessorDirective &&
4086 (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4087 return spaceRequiredBeforeParens(Right);
4089 return false;
4091 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4092 return false;
4093 if (Right.is(TT_UnaryOperator)) {
4094 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4095 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4097 // No space between the variable name and the initializer list.
4098 // A a1{1};
4099 // Verilog doesn't have such syntax, but it has word operators that are C++
4100 // identifiers like `a inside {b, c}`. So the rule is not applicable.
4101 if (!Style.isVerilog() &&
4102 (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4103 tok::r_paren) ||
4104 Left.isSimpleTypeSpecifier()) &&
4105 Right.is(tok::l_brace) && Right.getNextNonComment() &&
4106 Right.isNot(BK_Block)) {
4107 return false;
4109 if (Left.is(tok::period) || Right.is(tok::period))
4110 return false;
4111 // u#str, U#str, L#str, u8#str
4112 // uR#str, UR#str, LR#str, u8R#str
4113 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4114 (Left.TokenText == "L" || Left.TokenText == "u" ||
4115 Left.TokenText == "U" || Left.TokenText == "u8" ||
4116 Left.TokenText == "LR" || Left.TokenText == "uR" ||
4117 Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4118 return false;
4120 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4121 Left.MatchingParen->Previous &&
4122 (Left.MatchingParen->Previous->is(tok::period) ||
4123 Left.MatchingParen->Previous->is(tok::coloncolon))) {
4124 // Java call to generic function with explicit type:
4125 // A.<B<C<...>>>DoSomething();
4126 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
4127 return false;
4129 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4130 return false;
4131 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4132 // Objective-C dictionary literal -> no space after opening brace.
4133 return false;
4135 if (Right.is(tok::r_brace) && Right.MatchingParen &&
4136 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4137 // Objective-C dictionary literal -> no space before closing brace.
4138 return false;
4140 if (Right.getType() == TT_TrailingAnnotation &&
4141 Right.isOneOf(tok::amp, tok::ampamp) &&
4142 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4143 (!Right.Next || Right.Next->is(tok::semi))) {
4144 // Match const and volatile ref-qualifiers without any additional
4145 // qualifiers such as
4146 // void Fn() const &;
4147 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4150 return true;
4153 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4154 const FormatToken &Right) const {
4155 const FormatToken &Left = *Right.Previous;
4157 // If the token is finalized don't touch it (as it could be in a
4158 // clang-format-off section).
4159 if (Left.Finalized)
4160 return Right.hasWhitespaceBefore();
4162 // Never ever merge two words.
4163 if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
4164 return true;
4166 // Leave a space between * and /* to avoid C4138 `comment end` found outside
4167 // of comment.
4168 if (Left.is(tok::star) && Right.is(tok::comment))
4169 return true;
4171 if (Style.isCpp()) {
4172 if (Left.is(TT_OverloadedOperator) &&
4173 Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4174 return true;
4176 // Space between UDL and dot: auto b = 4s .count();
4177 if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4178 return true;
4179 // Space between import <iostream>.
4180 // or import .....;
4181 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4182 return true;
4183 // Space between `module :` and `import :`.
4184 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4185 Right.is(TT_ModulePartitionColon)) {
4186 return true;
4188 // No space between import foo:bar but keep a space between import :bar;
4189 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4190 return false;
4191 // No space between :bar;
4192 if (Left.is(TT_ModulePartitionColon) &&
4193 Right.isOneOf(tok::identifier, tok::kw_private)) {
4194 return false;
4196 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4197 Line.First->is(Keywords.kw_import)) {
4198 return false;
4200 // Space in __attribute__((attr)) ::type.
4201 if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon))
4202 return true;
4204 if (Left.is(tok::kw_operator))
4205 return Right.is(tok::coloncolon);
4206 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4207 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4208 return true;
4210 if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4211 Right.is(TT_TemplateOpener)) {
4212 return true;
4214 } else if (Style.Language == FormatStyle::LK_Proto ||
4215 Style.Language == FormatStyle::LK_TextProto) {
4216 if (Right.is(tok::period) &&
4217 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4218 Keywords.kw_repeated, Keywords.kw_extend)) {
4219 return true;
4221 if (Right.is(tok::l_paren) &&
4222 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4223 return true;
4225 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4226 return true;
4227 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4228 if (Left.is(tok::slash) || Right.is(tok::slash))
4229 return false;
4230 if (Left.MatchingParen &&
4231 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4232 Right.isOneOf(tok::l_brace, tok::less)) {
4233 return !Style.Cpp11BracedListStyle;
4235 // A percent is probably part of a formatting specification, such as %lld.
4236 if (Left.is(tok::percent))
4237 return false;
4238 // Preserve the existence of a space before a percent for cases like 0x%04x
4239 // and "%d %d"
4240 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4241 return Right.hasWhitespaceBefore();
4242 } else if (Style.isJson()) {
4243 if (Right.is(tok::colon) && Left.is(tok::string_literal))
4244 return Style.SpaceBeforeJsonColon;
4245 } else if (Style.isCSharp()) {
4246 // Require spaces around '{' and before '}' unless they appear in
4247 // interpolated strings. Interpolated strings are merged into a single token
4248 // so cannot have spaces inserted by this function.
4250 // No space between 'this' and '['
4251 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4252 return false;
4254 // No space between 'new' and '('
4255 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4256 return false;
4258 // Space before { (including space within '{ {').
4259 if (Right.is(tok::l_brace))
4260 return true;
4262 // Spaces inside braces.
4263 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4264 return true;
4266 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4267 return true;
4269 // Spaces around '=>'.
4270 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4271 return true;
4273 // No spaces around attribute target colons
4274 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4275 return false;
4277 // space between type and variable e.g. Dictionary<string,string> foo;
4278 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4279 return true;
4281 // spaces inside square brackets.
4282 if (Left.is(tok::l_square) || Right.is(tok::r_square))
4283 return Style.SpacesInSquareBrackets;
4285 // No space before ? in nullable types.
4286 if (Right.is(TT_CSharpNullable))
4287 return false;
4289 // No space before null forgiving '!'.
4290 if (Right.is(TT_NonNullAssertion))
4291 return false;
4293 // No space between consecutive commas '[,,]'.
4294 if (Left.is(tok::comma) && Right.is(tok::comma))
4295 return false;
4297 // space after var in `var (key, value)`
4298 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
4299 return true;
4301 // space between keywords and paren e.g. "using ("
4302 if (Right.is(tok::l_paren)) {
4303 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
4304 Keywords.kw_lock)) {
4305 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4306 spaceRequiredBeforeParens(Right);
4310 // space between method modifier and opening parenthesis of a tuple return
4311 // type
4312 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
4313 tok::kw_virtual, tok::kw_extern, tok::kw_static,
4314 Keywords.kw_internal, Keywords.kw_abstract,
4315 Keywords.kw_sealed, Keywords.kw_override,
4316 Keywords.kw_async, Keywords.kw_unsafe) &&
4317 Right.is(tok::l_paren)) {
4318 return true;
4320 } else if (Style.isJavaScript()) {
4321 if (Left.is(TT_FatArrow))
4322 return true;
4323 // for await ( ...
4324 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
4325 Left.Previous->is(tok::kw_for)) {
4326 return true;
4328 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
4329 Right.MatchingParen) {
4330 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
4331 // An async arrow function, for example: `x = async () => foo();`,
4332 // as opposed to calling a function called async: `x = async();`
4333 if (Next && Next->is(TT_FatArrow))
4334 return true;
4336 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
4337 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) {
4338 return false;
4340 // In tagged template literals ("html`bar baz`"), there is no space between
4341 // the tag identifier and the template string.
4342 if (Keywords.IsJavaScriptIdentifier(Left,
4343 /* AcceptIdentifierName= */ false) &&
4344 Right.is(TT_TemplateString)) {
4345 return false;
4347 if (Right.is(tok::star) &&
4348 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
4349 return false;
4351 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
4352 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
4353 Keywords.kw_extends, Keywords.kw_implements)) {
4354 return true;
4356 if (Right.is(tok::l_paren)) {
4357 // JS methods can use some keywords as names (e.g. `delete()`).
4358 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
4359 return false;
4360 // Valid JS method names can include keywords, e.g. `foo.delete()` or
4361 // `bar.instanceof()`. Recognize call positions by preceding period.
4362 if (Left.Previous && Left.Previous->is(tok::period) &&
4363 Left.Tok.getIdentifierInfo()) {
4364 return false;
4366 // Additional unary JavaScript operators that need a space after.
4367 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
4368 tok::kw_void)) {
4369 return true;
4372 // `foo as const;` casts into a const type.
4373 if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
4374 return false;
4375 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
4376 tok::kw_const) ||
4377 // "of" is only a keyword if it appears after another identifier
4378 // (e.g. as "const x of y" in a for loop), or after a destructuring
4379 // operation (const [x, y] of z, const {a, b} of c).
4380 (Left.is(Keywords.kw_of) && Left.Previous &&
4381 (Left.Previous->is(tok::identifier) ||
4382 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
4383 (!Left.Previous || !Left.Previous->is(tok::period))) {
4384 return true;
4386 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
4387 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
4388 return false;
4390 if (Left.is(Keywords.kw_as) &&
4391 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
4392 return true;
4394 if (Left.is(tok::kw_default) && Left.Previous &&
4395 Left.Previous->is(tok::kw_export)) {
4396 return true;
4398 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
4399 return true;
4400 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
4401 return false;
4402 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
4403 return false;
4404 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
4405 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
4406 return false;
4408 if (Left.is(tok::ellipsis))
4409 return false;
4410 if (Left.is(TT_TemplateCloser) &&
4411 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
4412 Keywords.kw_implements, Keywords.kw_extends)) {
4413 // Type assertions ('<type>expr') are not followed by whitespace. Other
4414 // locations that should have whitespace following are identified by the
4415 // above set of follower tokens.
4416 return false;
4418 if (Right.is(TT_NonNullAssertion))
4419 return false;
4420 if (Left.is(TT_NonNullAssertion) &&
4421 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
4422 return true; // "x! as string", "x! in y"
4424 } else if (Style.Language == FormatStyle::LK_Java) {
4425 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
4426 return true;
4427 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
4428 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4429 spaceRequiredBeforeParens(Right);
4431 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
4432 tok::kw_protected) ||
4433 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
4434 Keywords.kw_native)) &&
4435 Right.is(TT_TemplateOpener)) {
4436 return true;
4438 } else if (Style.isVerilog()) {
4439 // An escaped identifier ends with whitespace.
4440 if (Style.isVerilog() && Left.is(tok::identifier) &&
4441 Left.TokenText[0] == '\\') {
4442 return true;
4444 // Add space between things in a primitive's state table unless in a
4445 // transition like `(0?)`.
4446 if ((Left.is(TT_VerilogTableItem) &&
4447 !Right.isOneOf(tok::r_paren, tok::semi)) ||
4448 (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
4449 const FormatToken *Next = Right.getNextNonComment();
4450 return !(Next && Next->is(tok::r_paren));
4452 // Don't add space within a delay like `#0`.
4453 if (Left.isNot(TT_BinaryOperator) &&
4454 Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
4455 return false;
4457 // Add space after a delay.
4458 if (!Right.is(tok::semi) &&
4459 (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
4460 Left.endsSequence(tok::numeric_constant,
4461 Keywords.kw_verilogHashHash) ||
4462 (Left.is(tok::r_paren) && Left.MatchingParen &&
4463 Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
4464 return true;
4466 // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
4467 // literal like `'{}`.
4468 if (Left.is(Keywords.kw_apostrophe) ||
4469 (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
4470 return false;
4472 // Don't add spaces between two at signs. Like in a coverage event.
4473 // Don't add spaces between at and a sensitivity list like
4474 // `@(posedge clk)`.
4475 if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
4476 return false;
4477 // Add space between the type name and dimension like `logic [1:0]`.
4478 if (Right.is(tok::l_square) &&
4479 Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
4480 return true;
4482 // Don't add spaces between a casting type and the quote or repetition count
4483 // and the brace.
4484 if ((Right.is(Keywords.kw_apostrophe) ||
4485 (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
4486 !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
4487 Keywords.isVerilogWordOperator(Left)) &&
4488 (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
4489 tok::numeric_constant) ||
4490 Keywords.isWordLike(Left))) {
4491 return false;
4493 // Don't add spaces in imports like `import foo::*;`.
4494 if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
4495 (Left.is(tok::star) && Right.is(tok::semi))) {
4496 return false;
4498 // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
4499 if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
4500 return true;
4501 // Add space before drive strength like in `wire (strong1, pull0)`.
4502 if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
4503 return true;
4504 // Don't add space in a streaming concatenation like `{>>{j}}`.
4505 if ((Left.is(tok::l_brace) &&
4506 Right.isOneOf(tok::lessless, tok::greatergreater)) ||
4507 (Left.endsSequence(tok::lessless, tok::l_brace) ||
4508 Left.endsSequence(tok::greatergreater, tok::l_brace))) {
4509 return false;
4512 if (Left.is(TT_ImplicitStringLiteral))
4513 return Right.hasWhitespaceBefore();
4514 if (Line.Type == LT_ObjCMethodDecl) {
4515 if (Left.is(TT_ObjCMethodSpecifier))
4516 return true;
4517 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) {
4518 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
4519 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
4520 // method declaration.
4521 return false;
4524 if (Line.Type == LT_ObjCProperty &&
4525 (Right.is(tok::equal) || Left.is(tok::equal))) {
4526 return false;
4529 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
4530 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
4531 return true;
4533 if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
4534 // In an unexpanded macro call we only find the parentheses and commas
4535 // in a line; the commas and closing parenthesis do not require a space.
4536 (Left.Children.empty() || !Left.MacroParent)) {
4537 return true;
4539 if (Right.is(tok::comma))
4540 return false;
4541 if (Right.is(TT_ObjCBlockLParen))
4542 return true;
4543 if (Right.is(TT_CtorInitializerColon))
4544 return Style.SpaceBeforeCtorInitializerColon;
4545 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
4546 return false;
4547 if (Right.is(TT_RangeBasedForLoopColon) &&
4548 !Style.SpaceBeforeRangeBasedForLoopColon) {
4549 return false;
4551 if (Left.is(TT_BitFieldColon)) {
4552 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4553 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
4555 if (Right.is(tok::colon)) {
4556 if (Right.is(TT_CaseLabelColon))
4557 return Style.SpaceBeforeCaseColon;
4558 if (Right.is(TT_GotoLabelColon))
4559 return false;
4560 // `private:` and `public:`.
4561 if (!Right.getNextNonComment())
4562 return false;
4563 if (Right.is(TT_ObjCMethodExpr))
4564 return false;
4565 if (Left.is(tok::question))
4566 return false;
4567 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
4568 return false;
4569 if (Right.is(TT_DictLiteral))
4570 return Style.SpacesInContainerLiterals;
4571 if (Right.is(TT_AttributeColon))
4572 return false;
4573 if (Right.is(TT_CSharpNamedArgumentColon))
4574 return false;
4575 if (Right.is(TT_GenericSelectionColon))
4576 return false;
4577 if (Right.is(TT_BitFieldColon)) {
4578 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4579 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
4581 return true;
4583 // Do not merge "- -" into "--".
4584 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
4585 Right.isOneOf(tok::minus, tok::minusminus)) ||
4586 (Left.isOneOf(tok::plus, tok::plusplus) &&
4587 Right.isOneOf(tok::plus, tok::plusplus))) {
4588 return true;
4590 if (Left.is(TT_UnaryOperator)) {
4591 if (!Right.is(tok::l_paren)) {
4592 // The alternative operators for ~ and ! are "compl" and "not".
4593 // If they are used instead, we do not want to combine them with
4594 // the token to the right, unless that is a left paren.
4595 if (Left.is(tok::exclaim) && Left.TokenText == "not")
4596 return true;
4597 if (Left.is(tok::tilde) && Left.TokenText == "compl")
4598 return true;
4599 // Lambda captures allow for a lone &, so "&]" needs to be properly
4600 // handled.
4601 if (Left.is(tok::amp) && Right.is(tok::r_square))
4602 return Style.SpacesInSquareBrackets;
4604 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
4605 Right.is(TT_BinaryOperator);
4608 // If the next token is a binary operator or a selector name, we have
4609 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
4610 if (Left.is(TT_CastRParen)) {
4611 return Style.SpaceAfterCStyleCast ||
4612 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
4615 auto ShouldAddSpacesInAngles = [this, &Right]() {
4616 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
4617 return true;
4618 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
4619 return Right.hasWhitespaceBefore();
4620 return false;
4623 if (Left.is(tok::greater) && Right.is(tok::greater)) {
4624 if (Style.Language == FormatStyle::LK_TextProto ||
4625 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
4626 return !Style.Cpp11BracedListStyle;
4628 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
4629 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4630 ShouldAddSpacesInAngles());
4632 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
4633 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
4634 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
4635 return false;
4637 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
4638 Right.getPrecedence() == prec::Assignment) {
4639 return false;
4641 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
4642 (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
4643 return false;
4645 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
4646 // Generally don't remove existing spaces between an identifier and "::".
4647 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
4648 // this turns out to be too lenient, add analysis of the identifier itself.
4649 return Right.hasWhitespaceBefore();
4651 if (Right.is(tok::coloncolon) &&
4652 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
4653 // Put a space between < and :: in vector< ::std::string >
4654 return (Left.is(TT_TemplateOpener) &&
4655 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4656 ShouldAddSpacesInAngles())) ||
4657 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
4658 tok::kw___super, TT_TemplateOpener,
4659 TT_TemplateCloser)) ||
4660 (Left.is(tok::l_paren) && Style.SpacesInParentheses);
4662 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
4663 return ShouldAddSpacesInAngles();
4664 // Space before TT_StructuredBindingLSquare.
4665 if (Right.is(TT_StructuredBindingLSquare)) {
4666 return !Left.isOneOf(tok::amp, tok::ampamp) ||
4667 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
4669 // Space before & or && following a TT_StructuredBindingLSquare.
4670 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
4671 Right.isOneOf(tok::amp, tok::ampamp)) {
4672 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4674 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
4675 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
4676 !Right.is(tok::r_paren))) {
4677 return true;
4679 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
4680 Left.MatchingParen &&
4681 Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
4682 return false;
4684 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
4685 Line.Type == LT_ImportStatement) {
4686 return true;
4688 if (Right.is(TT_TrailingUnaryOperator))
4689 return false;
4690 if (Left.is(TT_RegexLiteral))
4691 return false;
4692 return spaceRequiredBetween(Line, Left, Right);
4695 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
4696 static bool isAllmanBrace(const FormatToken &Tok) {
4697 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4698 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
4701 // Returns 'true' if 'Tok' is a function argument.
4702 static bool IsFunctionArgument(const FormatToken &Tok) {
4703 return Tok.MatchingParen && Tok.MatchingParen->Next &&
4704 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
4707 static bool
4708 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
4709 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
4710 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
4713 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
4714 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4715 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
4718 // Returns the first token on the line that is not a comment.
4719 static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) {
4720 const FormatToken *Next = Line.First;
4721 if (!Next)
4722 return Next;
4723 if (Next->is(tok::comment))
4724 Next = Next->getNextNonComment();
4725 return Next;
4728 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
4729 const FormatToken &Right) const {
4730 const FormatToken &Left = *Right.Previous;
4731 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
4732 return true;
4734 if (Style.isCSharp()) {
4735 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
4736 Style.BraceWrapping.AfterFunction) {
4737 return true;
4739 if (Right.is(TT_CSharpNamedArgumentColon) ||
4740 Left.is(TT_CSharpNamedArgumentColon)) {
4741 return false;
4743 if (Right.is(TT_CSharpGenericTypeConstraint))
4744 return true;
4745 if (Right.Next && Right.Next->is(TT_FatArrow) &&
4746 (Right.is(tok::numeric_constant) ||
4747 (Right.is(tok::identifier) && Right.TokenText == "_"))) {
4748 return true;
4751 // Break after C# [...] and before public/protected/private/internal.
4752 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
4753 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
4754 Right.is(Keywords.kw_internal))) {
4755 return true;
4757 // Break between ] and [ but only when there are really 2 attributes.
4758 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
4759 Left.is(tok::r_square) && Right.is(tok::l_square)) {
4760 return true;
4763 } else if (Style.isJavaScript()) {
4764 // FIXME: This might apply to other languages and token kinds.
4765 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
4766 Left.Previous->is(tok::string_literal)) {
4767 return true;
4769 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
4770 Left.Previous && Left.Previous->is(tok::equal) &&
4771 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
4772 tok::kw_const) &&
4773 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
4774 // above.
4775 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
4776 // Object literals on the top level of a file are treated as "enum-style".
4777 // Each key/value pair is put on a separate line, instead of bin-packing.
4778 return true;
4780 if (Left.is(tok::l_brace) && Line.Level == 0 &&
4781 (Line.startsWith(tok::kw_enum) ||
4782 Line.startsWith(tok::kw_const, tok::kw_enum) ||
4783 Line.startsWith(tok::kw_export, tok::kw_enum) ||
4784 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
4785 // JavaScript top-level enum key/value pairs are put on separate lines
4786 // instead of bin-packing.
4787 return true;
4789 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
4790 Left.Previous->is(TT_FatArrow)) {
4791 // JS arrow function (=> {...}).
4792 switch (Style.AllowShortLambdasOnASingleLine) {
4793 case FormatStyle::SLS_All:
4794 return false;
4795 case FormatStyle::SLS_None:
4796 return true;
4797 case FormatStyle::SLS_Empty:
4798 return !Left.Children.empty();
4799 case FormatStyle::SLS_Inline:
4800 // allow one-lining inline (e.g. in function call args) and empty arrow
4801 // functions.
4802 return (Left.NestingLevel == 0 && Line.Level == 0) &&
4803 !Left.Children.empty();
4805 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
4808 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
4809 !Left.Children.empty()) {
4810 // Support AllowShortFunctionsOnASingleLine for JavaScript.
4811 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
4812 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
4813 (Left.NestingLevel == 0 && Line.Level == 0 &&
4814 Style.AllowShortFunctionsOnASingleLine &
4815 FormatStyle::SFS_InlineOnly);
4817 } else if (Style.Language == FormatStyle::LK_Java) {
4818 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
4819 Right.Next->is(tok::string_literal)) {
4820 return true;
4822 } else if (Style.isVerilog()) {
4823 // Break between assignments.
4824 if (Left.is(TT_VerilogAssignComma))
4825 return true;
4826 // Break between ports of different types.
4827 if (Left.is(TT_VerilogTypeComma))
4828 return true;
4829 // Break between ports in a module instantiation and after the parameter
4830 // list.
4831 if (Style.VerilogBreakBetweenInstancePorts &&
4832 (Left.is(TT_VerilogInstancePortComma) ||
4833 (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
4834 Left.MatchingParen &&
4835 Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
4836 return true;
4838 // Break after labels. In Verilog labels don't have the 'case' keyword, so
4839 // it is hard to identify them in UnwrappedLineParser.
4840 if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
4841 return true;
4842 } else if (Style.Language == FormatStyle::LK_Cpp ||
4843 Style.Language == FormatStyle::LK_ObjC ||
4844 Style.Language == FormatStyle::LK_Proto ||
4845 Style.Language == FormatStyle::LK_TableGen ||
4846 Style.Language == FormatStyle::LK_TextProto) {
4847 if (Left.isStringLiteral() && Right.isStringLiteral())
4848 return true;
4851 // Basic JSON newline processing.
4852 if (Style.isJson()) {
4853 // Always break after a JSON record opener.
4854 // {
4855 // }
4856 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
4857 return true;
4858 // Always break after a JSON array opener based on BreakArrays.
4859 if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
4860 Right.isNot(tok::r_square)) ||
4861 Left.is(tok::comma)) {
4862 if (Right.is(tok::l_brace))
4863 return true;
4864 // scan to the right if an we see an object or an array inside
4865 // then break.
4866 for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
4867 if (Tok->isOneOf(tok::l_brace, tok::l_square))
4868 return true;
4869 if (Tok->isOneOf(tok::r_brace, tok::r_square))
4870 break;
4872 return Style.BreakArrays;
4876 if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
4877 Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
4878 return true;
4881 // If the last token before a '}', ']', or ')' is a comma or a trailing
4882 // comment, the intention is to insert a line break after it in order to make
4883 // shuffling around entries easier. Import statements, especially in
4884 // JavaScript, can be an exception to this rule.
4885 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
4886 const FormatToken *BeforeClosingBrace = nullptr;
4887 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
4888 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
4889 Left.isNot(BK_Block) && Left.MatchingParen) {
4890 BeforeClosingBrace = Left.MatchingParen->Previous;
4891 } else if (Right.MatchingParen &&
4892 (Right.MatchingParen->isOneOf(tok::l_brace,
4893 TT_ArrayInitializerLSquare) ||
4894 (Style.isJavaScript() &&
4895 Right.MatchingParen->is(tok::l_paren)))) {
4896 BeforeClosingBrace = &Left;
4898 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
4899 BeforeClosingBrace->isTrailingComment())) {
4900 return true;
4904 if (Right.is(tok::comment)) {
4905 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
4906 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
4908 if (Left.isTrailingComment())
4909 return true;
4910 if (Left.IsUnterminatedLiteral)
4911 return true;
4912 if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
4913 Right.Next->is(tok::string_literal)) {
4914 return true;
4916 if (Right.is(TT_RequiresClause)) {
4917 switch (Style.RequiresClausePosition) {
4918 case FormatStyle::RCPS_OwnLine:
4919 case FormatStyle::RCPS_WithFollowing:
4920 return true;
4921 default:
4922 break;
4925 // Can break after template<> declaration
4926 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
4927 Left.MatchingParen->NestingLevel == 0) {
4928 // Put concepts on the next line e.g.
4929 // template<typename T>
4930 // concept ...
4931 if (Right.is(tok::kw_concept))
4932 return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
4933 return Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes;
4935 if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
4936 switch (Style.RequiresClausePosition) {
4937 case FormatStyle::RCPS_OwnLine:
4938 case FormatStyle::RCPS_WithPreceding:
4939 return true;
4940 default:
4941 break;
4944 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
4945 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
4946 (Left.is(TT_CtorInitializerComma) ||
4947 Right.is(TT_CtorInitializerColon))) {
4948 return true;
4951 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
4952 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
4953 return true;
4956 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
4957 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
4958 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
4959 return true;
4961 if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
4962 if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
4963 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
4964 Right.is(TT_CtorInitializerColon)) {
4965 return true;
4968 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
4969 Left.is(TT_CtorInitializerColon)) {
4970 return true;
4973 // Break only if we have multiple inheritance.
4974 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
4975 Right.is(TT_InheritanceComma)) {
4976 return true;
4978 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
4979 Left.is(TT_InheritanceComma)) {
4980 return true;
4982 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) {
4983 // Multiline raw string literals are special wrt. line breaks. The author
4984 // has made a deliberate choice and might have aligned the contents of the
4985 // string literal accordingly. Thus, we try keep existing line breaks.
4986 return Right.IsMultiline && Right.NewlinesBefore > 0;
4988 if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
4989 Left.Previous->is(tok::equal))) &&
4990 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
4991 // Don't put enums or option definitions onto single lines in protocol
4992 // buffers.
4993 return true;
4995 if (Right.is(TT_InlineASMBrace))
4996 return Right.HasUnescapedNewline;
4998 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
4999 auto FirstNonComment = getFirstNonComment(Line);
5000 bool AccessSpecifier =
5001 FirstNonComment &&
5002 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
5003 tok::kw_private, tok::kw_protected);
5005 if (Style.BraceWrapping.AfterEnum) {
5006 if (Line.startsWith(tok::kw_enum) ||
5007 Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5008 return true;
5010 // Ensure BraceWrapping for `public enum A {`.
5011 if (AccessSpecifier && FirstNonComment->Next &&
5012 FirstNonComment->Next->is(tok::kw_enum)) {
5013 return true;
5017 // Ensure BraceWrapping for `public interface A {`.
5018 if (Style.BraceWrapping.AfterClass &&
5019 ((AccessSpecifier && FirstNonComment->Next &&
5020 FirstNonComment->Next->is(Keywords.kw_interface)) ||
5021 Line.startsWith(Keywords.kw_interface))) {
5022 return true;
5025 // Don't attempt to interpret struct return types as structs.
5026 if (Right.isNot(TT_FunctionLBrace)) {
5027 return (Line.startsWith(tok::kw_class) &&
5028 Style.BraceWrapping.AfterClass) ||
5029 (Line.startsWith(tok::kw_struct) &&
5030 Style.BraceWrapping.AfterStruct);
5034 if (Left.is(TT_ObjCBlockLBrace) &&
5035 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5036 return true;
5039 // Ensure wrapping after __attribute__((XX)) and @interface etc.
5040 if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl))
5041 return true;
5043 if (Left.is(TT_LambdaLBrace)) {
5044 if (IsFunctionArgument(Left) &&
5045 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5046 return false;
5049 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5050 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5051 (!Left.Children.empty() &&
5052 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5053 return true;
5057 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5058 Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) {
5059 return true;
5062 // Put multiple Java annotation on a new line.
5063 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5064 Left.is(TT_LeadingJavaAnnotation) &&
5065 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5066 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5067 return true;
5070 if (Right.is(TT_ProtoExtensionLSquare))
5071 return true;
5073 // In text proto instances if a submessage contains at least 2 entries and at
5074 // least one of them is a submessage, like A { ... B { ... } ... },
5075 // put all of the entries of A on separate lines by forcing the selector of
5076 // the submessage B to be put on a newline.
5078 // Example: these can stay on one line:
5079 // a { scalar_1: 1 scalar_2: 2 }
5080 // a { b { key: value } }
5082 // and these entries need to be on a new line even if putting them all in one
5083 // line is under the column limit:
5084 // a {
5085 // scalar: 1
5086 // b { key: value }
5087 // }
5089 // We enforce this by breaking before a submessage field that has previous
5090 // siblings, *and* breaking before a field that follows a submessage field.
5092 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
5093 // the TT_SelectorName there, but we don't want to break inside the brackets.
5095 // Another edge case is @submessage { key: value }, which is a common
5096 // substitution placeholder. In this case we want to keep `@` and `submessage`
5097 // together.
5099 // We ensure elsewhere that extensions are always on their own line.
5100 if ((Style.Language == FormatStyle::LK_Proto ||
5101 Style.Language == FormatStyle::LK_TextProto) &&
5102 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
5103 // Keep `@submessage` together in:
5104 // @submessage { key: value }
5105 if (Left.is(tok::at))
5106 return false;
5107 // Look for the scope opener after selector in cases like:
5108 // selector { ...
5109 // selector: { ...
5110 // selector: @base { ...
5111 FormatToken *LBrace = Right.Next;
5112 if (LBrace && LBrace->is(tok::colon)) {
5113 LBrace = LBrace->Next;
5114 if (LBrace && LBrace->is(tok::at)) {
5115 LBrace = LBrace->Next;
5116 if (LBrace)
5117 LBrace = LBrace->Next;
5120 if (LBrace &&
5121 // The scope opener is one of {, [, <:
5122 // selector { ... }
5123 // selector [ ... ]
5124 // selector < ... >
5126 // In case of selector { ... }, the l_brace is TT_DictLiteral.
5127 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5128 // so we check for immediately following r_brace.
5129 ((LBrace->is(tok::l_brace) &&
5130 (LBrace->is(TT_DictLiteral) ||
5131 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5132 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5133 // If Left.ParameterCount is 0, then this submessage entry is not the
5134 // first in its parent submessage, and we want to break before this entry.
5135 // If Left.ParameterCount is greater than 0, then its parent submessage
5136 // might contain 1 or more entries and we want to break before this entry
5137 // if it contains at least 2 entries. We deal with this case later by
5138 // detecting and breaking before the next entry in the parent submessage.
5139 if (Left.ParameterCount == 0)
5140 return true;
5141 // However, if this submessage is the first entry in its parent
5142 // submessage, Left.ParameterCount might be 1 in some cases.
5143 // We deal with this case later by detecting an entry
5144 // following a closing paren of this submessage.
5147 // If this is an entry immediately following a submessage, it will be
5148 // preceded by a closing paren of that submessage, like in:
5149 // left---. .---right
5150 // v v
5151 // sub: { ... } key: value
5152 // If there was a comment between `}` an `key` above, then `key` would be
5153 // put on a new line anyways.
5154 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5155 return true;
5158 // Deal with lambda arguments in C++ - we want consistent line breaks whether
5159 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
5160 // as aggressive line breaks are placed when the lambda is not the last arg.
5161 if ((Style.Language == FormatStyle::LK_Cpp ||
5162 Style.Language == FormatStyle::LK_ObjC) &&
5163 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
5164 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
5165 // Multiple lambdas in the same function call force line breaks.
5166 if (Left.BlockParameterCount > 1)
5167 return true;
5169 // A lambda followed by another arg forces a line break.
5170 if (!Left.Role)
5171 return false;
5172 auto Comma = Left.Role->lastComma();
5173 if (!Comma)
5174 return false;
5175 auto Next = Comma->getNextNonComment();
5176 if (!Next)
5177 return false;
5178 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
5179 return true;
5182 return false;
5185 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5186 const FormatToken &Right) const {
5187 const FormatToken &Left = *Right.Previous;
5188 // Language-specific stuff.
5189 if (Style.isCSharp()) {
5190 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5191 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5192 return false;
5194 // Only break after commas for generic type constraints.
5195 if (Line.First->is(TT_CSharpGenericTypeConstraint))
5196 return Left.is(TT_CSharpGenericTypeConstraintComma);
5197 // Keep nullable operators attached to their identifiers.
5198 if (Right.is(TT_CSharpNullable))
5199 return false;
5200 } else if (Style.Language == FormatStyle::LK_Java) {
5201 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5202 Keywords.kw_implements)) {
5203 return false;
5205 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5206 Keywords.kw_implements)) {
5207 return true;
5209 } else if (Style.isJavaScript()) {
5210 const FormatToken *NonComment = Right.getPreviousNonComment();
5211 if (NonComment &&
5212 NonComment->isOneOf(
5213 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5214 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5215 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
5216 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
5217 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
5218 Keywords.kw_await)) {
5219 return false; // Otherwise automatic semicolon insertion would trigger.
5221 if (Right.NestingLevel == 0 &&
5222 (Left.Tok.getIdentifierInfo() ||
5223 Left.isOneOf(tok::r_square, tok::r_paren)) &&
5224 Right.isOneOf(tok::l_square, tok::l_paren)) {
5225 return false; // Otherwise automatic semicolon insertion would trigger.
5227 if (NonComment && NonComment->is(tok::identifier) &&
5228 NonComment->TokenText == "asserts") {
5229 return false;
5231 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5232 return false;
5233 if (Left.is(TT_JsTypeColon))
5234 return true;
5235 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5236 if (Left.is(tok::exclaim) && Right.is(tok::colon))
5237 return false;
5238 // Look for is type annotations like:
5239 // function f(): a is B { ... }
5240 // Do not break before is in these cases.
5241 if (Right.is(Keywords.kw_is)) {
5242 const FormatToken *Next = Right.getNextNonComment();
5243 // If `is` is followed by a colon, it's likely that it's a dict key, so
5244 // ignore it for this check.
5245 // For example this is common in Polymer:
5246 // Polymer({
5247 // is: 'name',
5248 // ...
5249 // });
5250 if (!Next || !Next->is(tok::colon))
5251 return false;
5253 if (Left.is(Keywords.kw_in))
5254 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5255 if (Right.is(Keywords.kw_in))
5256 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5257 if (Right.is(Keywords.kw_as))
5258 return false; // must not break before as in 'x as type' casts
5259 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
5260 // extends and infer can appear as keywords in conditional types:
5261 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
5262 // do not break before them, as the expressions are subject to ASI.
5263 return false;
5265 if (Left.is(Keywords.kw_as))
5266 return true;
5267 if (Left.is(TT_NonNullAssertion))
5268 return true;
5269 if (Left.is(Keywords.kw_declare) &&
5270 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
5271 Keywords.kw_function, tok::kw_class, tok::kw_enum,
5272 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
5273 Keywords.kw_let, tok::kw_const)) {
5274 // See grammar for 'declare' statements at:
5275 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
5276 return false;
5278 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
5279 Right.isOneOf(tok::identifier, tok::string_literal)) {
5280 return false; // must not break in "module foo { ...}"
5282 if (Right.is(TT_TemplateString) && Right.closesScope())
5283 return false;
5284 // Don't split tagged template literal so there is a break between the tag
5285 // identifier and template string.
5286 if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
5287 return false;
5288 if (Left.is(TT_TemplateString) && Left.opensScope())
5289 return true;
5292 if (Left.is(tok::at))
5293 return false;
5294 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
5295 return false;
5296 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
5297 return !Right.is(tok::l_paren);
5298 if (Right.is(TT_PointerOrReference)) {
5299 return Line.IsMultiVariableDeclStmt ||
5300 (getTokenPointerOrReferenceAlignment(Right) ==
5301 FormatStyle::PAS_Right &&
5302 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
5304 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
5305 Right.is(tok::kw_operator)) {
5306 return true;
5308 if (Left.is(TT_PointerOrReference))
5309 return false;
5310 if (Right.isTrailingComment()) {
5311 // We rely on MustBreakBefore being set correctly here as we should not
5312 // change the "binding" behavior of a comment.
5313 // The first comment in a braced lists is always interpreted as belonging to
5314 // the first list element. Otherwise, it should be placed outside of the
5315 // list.
5316 return Left.is(BK_BracedInit) ||
5317 (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
5318 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
5320 if (Left.is(tok::question) && Right.is(tok::colon))
5321 return false;
5322 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
5323 return Style.BreakBeforeTernaryOperators;
5324 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
5325 return !Style.BreakBeforeTernaryOperators;
5326 if (Left.is(TT_InheritanceColon))
5327 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
5328 if (Right.is(TT_InheritanceColon))
5329 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
5330 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
5331 Left.isNot(TT_SelectorName)) {
5332 return true;
5335 if (Right.is(tok::colon) &&
5336 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
5337 return false;
5339 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
5340 if (Style.Language == FormatStyle::LK_Proto ||
5341 Style.Language == FormatStyle::LK_TextProto) {
5342 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
5343 return false;
5344 // Prevent cases like:
5346 // submessage:
5347 // { key: valueeeeeeeeeeee }
5349 // when the snippet does not fit into one line.
5350 // Prefer:
5352 // submessage: {
5353 // key: valueeeeeeeeeeee
5354 // }
5356 // instead, even if it is longer by one line.
5358 // Note that this allows the "{" to go over the column limit
5359 // when the column limit is just between ":" and "{", but that does
5360 // not happen too often and alternative formattings in this case are
5361 // not much better.
5363 // The code covers the cases:
5365 // submessage: { ... }
5366 // submessage: < ... >
5367 // repeated: [ ... ]
5368 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
5369 Right.is(TT_DictLiteral)) ||
5370 Right.is(TT_ArrayInitializerLSquare)) {
5371 return false;
5374 return true;
5376 if (Right.is(tok::r_square) && Right.MatchingParen &&
5377 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
5378 return false;
5380 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
5381 Right.Next->is(TT_ObjCMethodExpr))) {
5382 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
5384 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
5385 return true;
5386 if (Right.is(tok::kw_concept))
5387 return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
5388 if (Right.is(TT_RequiresClause))
5389 return true;
5390 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
5391 return true;
5392 if (Left.ClosesRequiresClause)
5393 return true;
5394 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
5395 TT_OverloadedOperator)) {
5396 return false;
5398 if (Left.is(TT_RangeBasedForLoopColon))
5399 return true;
5400 if (Right.is(TT_RangeBasedForLoopColon))
5401 return false;
5402 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
5403 return true;
5404 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
5405 (Left.is(tok::less) && Right.is(tok::less))) {
5406 return false;
5408 if (Right.is(TT_BinaryOperator) &&
5409 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
5410 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
5411 Right.getPrecedence() != prec::Assignment)) {
5412 return true;
5414 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
5415 Left.is(tok::kw_operator)) {
5416 return false;
5418 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
5419 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
5420 return false;
5422 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
5423 !Style.Cpp11BracedListStyle) {
5424 return false;
5426 if (Left.is(tok::l_paren) &&
5427 Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) {
5428 return false;
5430 if (Left.is(tok::l_paren) && Left.Previous &&
5431 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
5432 return false;
5434 if (Right.is(TT_ImplicitStringLiteral))
5435 return false;
5437 if (Right.is(TT_TemplateCloser))
5438 return false;
5439 if (Right.is(tok::r_square) && Right.MatchingParen &&
5440 Right.MatchingParen->is(TT_LambdaLSquare)) {
5441 return false;
5444 // We only break before r_brace if there was a corresponding break before
5445 // the l_brace, which is tracked by BreakBeforeClosingBrace.
5446 if (Right.is(tok::r_brace))
5447 return Right.MatchingParen && Right.MatchingParen->is(BK_Block);
5449 // We only break before r_paren if we're in a block indented context.
5450 if (Right.is(tok::r_paren)) {
5451 if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
5452 !Right.MatchingParen) {
5453 return false;
5455 auto Next = Right.Next;
5456 if (Next && Next->is(tok::r_paren))
5457 Next = Next->Next;
5458 if (Next && Next->is(tok::l_paren))
5459 return false;
5460 const FormatToken *Previous = Right.MatchingParen->Previous;
5461 return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
5464 // Allow breaking after a trailing annotation, e.g. after a method
5465 // declaration.
5466 if (Left.is(TT_TrailingAnnotation)) {
5467 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
5468 tok::less, tok::coloncolon);
5471 if (Right.is(tok::kw___attribute) ||
5472 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) {
5473 return !Left.is(TT_AttributeSquare);
5476 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
5477 return true;
5479 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
5480 return true;
5482 if (Left.is(TT_CtorInitializerColon)) {
5483 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5484 (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
5486 if (Right.is(TT_CtorInitializerColon))
5487 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
5488 if (Left.is(TT_CtorInitializerComma) &&
5489 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5490 return false;
5492 if (Right.is(TT_CtorInitializerComma) &&
5493 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5494 return true;
5496 if (Left.is(TT_InheritanceComma) &&
5497 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5498 return false;
5500 if (Right.is(TT_InheritanceComma) &&
5501 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5502 return true;
5504 if (Left.is(TT_ArrayInitializerLSquare))
5505 return true;
5506 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
5507 return true;
5508 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
5509 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
5510 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
5511 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
5512 Left.getPrecedence() == prec::Assignment)) {
5513 return true;
5515 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
5516 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
5517 return false;
5520 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
5521 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
5522 if (isAllmanLambdaBrace(Left))
5523 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
5524 if (isAllmanLambdaBrace(Right))
5525 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
5528 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
5529 tok::kw_class, tok::kw_struct, tok::comment) ||
5530 Right.isMemberAccess() ||
5531 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
5532 tok::colon, tok::l_square, tok::at) ||
5533 (Left.is(tok::r_paren) &&
5534 Right.isOneOf(tok::identifier, tok::kw_const)) ||
5535 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
5536 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
5539 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
5540 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
5541 << ", T=" << Line.Type << ", C=" << Line.IsContinuation
5542 << "):\n";
5543 const FormatToken *Tok = Line.First;
5544 while (Tok) {
5545 llvm::errs() << " M=" << Tok->MustBreakBefore
5546 << " C=" << Tok->CanBreakBefore
5547 << " T=" << getTokenTypeName(Tok->getType())
5548 << " S=" << Tok->SpacesRequiredBefore
5549 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
5550 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
5551 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
5552 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
5553 for (prec::Level LParen : Tok->FakeLParens)
5554 llvm::errs() << LParen << "/";
5555 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
5556 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
5557 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
5558 if (!Tok->Next)
5559 assert(Tok == Line.Last);
5560 Tok = Tok->Next;
5562 llvm::errs() << "----\n";
5565 FormatStyle::PointerAlignmentStyle
5566 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
5567 assert(Reference.isOneOf(tok::amp, tok::ampamp));
5568 switch (Style.ReferenceAlignment) {
5569 case FormatStyle::RAS_Pointer:
5570 return Style.PointerAlignment;
5571 case FormatStyle::RAS_Left:
5572 return FormatStyle::PAS_Left;
5573 case FormatStyle::RAS_Right:
5574 return FormatStyle::PAS_Right;
5575 case FormatStyle::RAS_Middle:
5576 return FormatStyle::PAS_Middle;
5578 assert(0); //"Unhandled value of ReferenceAlignment"
5579 return Style.PointerAlignment;
5582 FormatStyle::PointerAlignmentStyle
5583 TokenAnnotator::getTokenPointerOrReferenceAlignment(
5584 const FormatToken &PointerOrReference) const {
5585 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
5586 switch (Style.ReferenceAlignment) {
5587 case FormatStyle::RAS_Pointer:
5588 return Style.PointerAlignment;
5589 case FormatStyle::RAS_Left:
5590 return FormatStyle::PAS_Left;
5591 case FormatStyle::RAS_Right:
5592 return FormatStyle::PAS_Right;
5593 case FormatStyle::RAS_Middle:
5594 return FormatStyle::PAS_Middle;
5597 assert(PointerOrReference.is(tok::star));
5598 return Style.PointerAlignment;
5601 } // namespace format
5602 } // namespace clang