1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements specific functions of \c FormatTokens and their
13 //===----------------------------------------------------------------------===//
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
24 const char *getTokenTypeName(TokenType Type
) {
25 static const char *const TokNames
[] = {
31 if (Type
< NUM_TOKEN_TYPES
)
32 return TokNames
[Type
];
33 llvm_unreachable("unknown TokenType");
37 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
39 bool FormatToken::isSimpleTypeSpecifier() const {
40 switch (Tok
.getKind()) {
44 case tok::kw___int128
:
46 case tok::kw_unsigned
:
54 case tok::kw__Float16
:
55 case tok::kw___float128
:
56 case tok::kw___ibm128
:
59 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
60 #include "clang/Basic/TransformTypeTraits.def"
61 case tok::annot_typename
:
63 case tok::kw_char16_t
:
64 case tok::kw_char32_t
:
66 case tok::kw_decltype
:
74 bool FormatToken::isTypeOrIdentifier() const {
75 return isSimpleTypeSpecifier() || Tok
.isOneOf(tok::kw_auto
, tok::identifier
);
78 bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle
&Style
) const {
79 assert(is(tok::r_brace
));
80 if (!Style
.Cpp11BracedListStyle
||
81 Style
.AlignAfterOpenBracket
!= FormatStyle::BAS_BlockIndent
) {
84 const auto *LBrace
= MatchingParen
;
85 assert(LBrace
&& LBrace
->is(tok::l_brace
));
86 if (LBrace
->is(BK_BracedInit
))
88 if (LBrace
->Previous
&& LBrace
->Previous
->is(tok::equal
))
93 bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle
&Style
) const {
94 // C# Does not indent object initialisers as continuations.
95 if (is(tok::l_brace
) && getBlockKind() == BK_BracedInit
&& Style
.isCSharp())
97 if (is(TT_TemplateString
) && opensScope())
99 return is(TT_ArrayInitializerLSquare
) || is(TT_ProtoExtensionLSquare
) ||
101 (getBlockKind() == BK_Block
|| is(TT_DictLiteral
) ||
102 (!Style
.Cpp11BracedListStyle
&& NestingLevel
== 0))) ||
103 (is(tok::less
) && (Style
.Language
== FormatStyle::LK_Proto
||
104 Style
.Language
== FormatStyle::LK_TextProto
));
107 TokenRole::~TokenRole() {}
109 void TokenRole::precomputeFormattingInfos(const FormatToken
*Token
) {}
111 unsigned CommaSeparatedList::formatAfterToken(LineState
&State
,
112 ContinuationIndenter
*Indenter
,
114 if (!State
.NextToken
|| !State
.NextToken
->Previous
)
117 if (Formats
.size() == 1)
118 return 0; // Handled by formatFromToken
120 // Ensure that we start on the opening brace.
121 const FormatToken
*LBrace
=
122 State
.NextToken
->Previous
->getPreviousNonComment();
123 if (!LBrace
|| !LBrace
->isOneOf(tok::l_brace
, TT_ArrayInitializerLSquare
) ||
124 LBrace
->is(BK_Block
) || LBrace
->is(TT_DictLiteral
) ||
125 LBrace
->Next
->is(TT_DesignatedInitializerPeriod
)) {
129 // Calculate the number of code points we have to format this list. As the
130 // first token is already placed, we have to subtract it.
131 unsigned RemainingCodePoints
=
132 Style
.ColumnLimit
- State
.Column
+ State
.NextToken
->Previous
->ColumnWidth
;
134 // Find the best ColumnFormat, i.e. the best number of columns to use.
135 const ColumnFormat
*Format
= getColumnFormat(RemainingCodePoints
);
137 // If no ColumnFormat can be used, the braced list would generally be
138 // bin-packed. Add a severe penalty to this so that column layouts are
139 // preferred if possible.
143 // Format the entire list.
144 unsigned Penalty
= 0;
147 while (State
.NextToken
!= LBrace
->MatchingParen
) {
148 bool NewLine
= false;
149 unsigned ExtraSpaces
= 0;
151 // If the previous token was one of our commas, we are now on the next item.
152 if (Item
< Commas
.size() && State
.NextToken
->Previous
== Commas
[Item
]) {
153 if (!State
.NextToken
->isTrailingComment()) {
154 ExtraSpaces
+= Format
->ColumnSizes
[Column
] - ItemLengths
[Item
];
160 if (Column
== Format
->Columns
|| State
.NextToken
->MustBreakBefore
) {
165 // Place token using the continuation indenter and store the penalty.
166 Penalty
+= Indenter
->addTokenToState(State
, NewLine
, DryRun
, ExtraSpaces
);
171 unsigned CommaSeparatedList::formatFromToken(LineState
&State
,
172 ContinuationIndenter
*Indenter
,
174 // Formatting with 1 Column isn't really a column layout, so we don't need the
175 // special logic here. We can just avoid bin packing any of the parameters.
176 if (Formats
.size() == 1 || HasNestedBracedList
)
177 State
.Stack
.back().AvoidBinPacking
= true;
181 // Returns the lengths in code points between Begin and End (both included),
182 // assuming that the entire sequence is put on a single line.
183 static unsigned CodePointsBetween(const FormatToken
*Begin
,
184 const FormatToken
*End
) {
185 assert(End
->TotalLength
>= Begin
->TotalLength
);
186 return End
->TotalLength
- Begin
->TotalLength
+ Begin
->ColumnWidth
;
189 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken
*Token
) {
190 // FIXME: At some point we might want to do this for other lists, too.
191 if (!Token
->MatchingParen
||
192 !Token
->isOneOf(tok::l_brace
, TT_ArrayInitializerLSquare
)) {
196 // In C++11 braced list style, we should not format in columns unless they
197 // have many items (20 or more) or we allow bin-packing of function call
199 if (Style
.Cpp11BracedListStyle
&& !Style
.BinPackArguments
&&
200 Commas
.size() < 19) {
204 // Limit column layout for JavaScript array initializers to 20 or more items
205 // for now to introduce it carefully. We can become more aggressive if this
207 if (Token
->is(TT_ArrayInitializerLSquare
) && Commas
.size() < 19)
210 // Column format doesn't really make sense if we don't align after brackets.
211 if (Style
.AlignAfterOpenBracket
== FormatStyle::BAS_DontAlign
)
214 FormatToken
*ItemBegin
= Token
->Next
;
215 while (ItemBegin
->isTrailingComment())
216 ItemBegin
= ItemBegin
->Next
;
217 SmallVector
<bool, 8> MustBreakBeforeItem
;
219 // The lengths of an item if it is put at the end of the line. This includes
220 // trailing comments which are otherwise ignored for column alignment.
221 SmallVector
<unsigned, 8> EndOfLineItemLength
;
222 MustBreakBeforeItem
.reserve(Commas
.size() + 1);
223 EndOfLineItemLength
.reserve(Commas
.size() + 1);
224 ItemLengths
.reserve(Commas
.size() + 1);
226 bool HasSeparatingComment
= false;
227 for (unsigned i
= 0, e
= Commas
.size() + 1; i
!= e
; ++i
) {
229 // Skip comments on their own line.
230 while (ItemBegin
->HasUnescapedNewline
&& ItemBegin
->isTrailingComment()) {
231 ItemBegin
= ItemBegin
->Next
;
232 HasSeparatingComment
= i
> 0;
235 MustBreakBeforeItem
.push_back(ItemBegin
->MustBreakBefore
);
236 if (ItemBegin
->is(tok::l_brace
))
237 HasNestedBracedList
= true;
238 const FormatToken
*ItemEnd
= nullptr;
239 if (i
== Commas
.size()) {
240 ItemEnd
= Token
->MatchingParen
;
241 const FormatToken
*NonCommentEnd
= ItemEnd
->getPreviousNonComment();
242 ItemLengths
.push_back(CodePointsBetween(ItemBegin
, NonCommentEnd
));
243 if (Style
.Cpp11BracedListStyle
&&
244 !ItemEnd
->Previous
->isTrailingComment()) {
245 // In Cpp11 braced list style, the } and possibly other subsequent
246 // tokens will need to stay on a line with the last element.
247 while (ItemEnd
->Next
&& !ItemEnd
->Next
->CanBreakBefore
)
248 ItemEnd
= ItemEnd
->Next
;
250 // In other braced lists styles, the "}" can be wrapped to the new line.
251 ItemEnd
= Token
->MatchingParen
->Previous
;
255 // The comma is counted as part of the item when calculating the length.
256 ItemLengths
.push_back(CodePointsBetween(ItemBegin
, ItemEnd
));
258 // Consume trailing comments so the are included in EndOfLineItemLength.
259 if (ItemEnd
->Next
&& !ItemEnd
->Next
->HasUnescapedNewline
&&
260 ItemEnd
->Next
->isTrailingComment()) {
261 ItemEnd
= ItemEnd
->Next
;
264 EndOfLineItemLength
.push_back(CodePointsBetween(ItemBegin
, ItemEnd
));
265 // If there is a trailing comma in the list, the next item will start at the
266 // closing brace. Don't create an extra item for this.
267 if (ItemEnd
->getNextNonComment() == Token
->MatchingParen
)
269 ItemBegin
= ItemEnd
->Next
;
272 // Don't use column layout for lists with few elements and in presence of
273 // separating comments.
274 if (Commas
.size() < 5 || HasSeparatingComment
)
277 if (Token
->NestingLevel
!= 0 && Token
->is(tok::l_brace
) && Commas
.size() < 19)
280 // We can never place more than ColumnLimit / 3 items in a row (because of the
281 // spaces and the comma).
282 unsigned MaxItems
= Style
.ColumnLimit
/ 3;
283 SmallVector
<unsigned> MinSizeInColumn
;
284 MinSizeInColumn
.reserve(MaxItems
);
285 for (unsigned Columns
= 1; Columns
<= MaxItems
; ++Columns
) {
287 Format
.Columns
= Columns
;
288 Format
.ColumnSizes
.resize(Columns
);
289 MinSizeInColumn
.assign(Columns
, UINT_MAX
);
290 Format
.LineCount
= 1;
291 bool HasRowWithSufficientColumns
= false;
293 for (unsigned i
= 0, e
= ItemLengths
.size(); i
!= e
; ++i
) {
294 assert(i
< MustBreakBeforeItem
.size());
295 if (MustBreakBeforeItem
[i
] || Column
== Columns
) {
299 if (Column
== Columns
- 1)
300 HasRowWithSufficientColumns
= true;
302 (Column
== Columns
- 1) ? EndOfLineItemLength
[i
] : ItemLengths
[i
];
303 Format
.ColumnSizes
[Column
] = std::max(Format
.ColumnSizes
[Column
], Length
);
304 MinSizeInColumn
[Column
] = std::min(MinSizeInColumn
[Column
], Length
);
307 // If all rows are terminated early (e.g. by trailing comments), we don't
308 // need to look further.
309 if (!HasRowWithSufficientColumns
)
311 Format
.TotalWidth
= Columns
- 1; // Width of the N-1 spaces.
313 for (unsigned i
= 0; i
< Columns
; ++i
)
314 Format
.TotalWidth
+= Format
.ColumnSizes
[i
];
316 // Don't use this Format, if the difference between the longest and shortest
317 // element in a column exceeds a threshold to avoid excessive spaces.
319 for (unsigned i
= 0; i
< Columns
- 1; ++i
)
320 if (Format
.ColumnSizes
[i
] - MinSizeInColumn
[i
] > 10)
327 // Ignore layouts that are bound to violate the column limit.
328 if (Format
.TotalWidth
> Style
.ColumnLimit
&& Columns
> 1)
331 Formats
.push_back(Format
);
335 const CommaSeparatedList::ColumnFormat
*
336 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters
) const {
337 const ColumnFormat
*BestFormat
= nullptr;
338 for (const ColumnFormat
&Format
: llvm::reverse(Formats
)) {
339 if (Format
.TotalWidth
<= RemainingCharacters
|| Format
.Columns
== 1) {
340 if (BestFormat
&& Format
.LineCount
> BestFormat
->LineCount
)
342 BestFormat
= &Format
;
348 } // namespace format