1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements specific functions of \c FormatTokens and their
13 //===----------------------------------------------------------------------===//
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
24 const char *getTokenTypeName(TokenType Type
) {
25 static const char *const TokNames
[] = {
31 if (Type
< NUM_TOKEN_TYPES
)
32 return TokNames
[Type
];
33 llvm_unreachable("unknown TokenType");
37 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
39 bool FormatToken::isSimpleTypeSpecifier() const {
40 switch (Tok
.getKind()) {
44 case tok::kw___int128
:
46 case tok::kw_unsigned
:
54 case tok::kw__Float16
:
55 case tok::kw___float128
:
56 case tok::kw___ibm128
:
59 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
60 #include "clang/Basic/TransformTypeTraits.def"
61 case tok::annot_typename
:
63 case tok::kw_char16_t
:
64 case tok::kw_char32_t
:
66 case tok::kw_decltype
:
74 bool FormatToken::isTypeOrIdentifier() const {
75 return isSimpleTypeSpecifier() || Tok
.isOneOf(tok::kw_auto
, tok::identifier
);
78 bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle
&Style
) const {
79 // C# Does not indent object initialisers as continuations.
80 if (is(tok::l_brace
) && getBlockKind() == BK_BracedInit
&& Style
.isCSharp())
82 if (is(TT_TemplateString
) && opensScope())
84 return is(TT_ArrayInitializerLSquare
) || is(TT_ProtoExtensionLSquare
) ||
86 (getBlockKind() == BK_Block
|| is(TT_DictLiteral
) ||
87 (!Style
.Cpp11BracedListStyle
&& NestingLevel
== 0))) ||
88 (is(tok::less
) && (Style
.Language
== FormatStyle::LK_Proto
||
89 Style
.Language
== FormatStyle::LK_TextProto
));
92 TokenRole::~TokenRole() {}
94 void TokenRole::precomputeFormattingInfos(const FormatToken
*Token
) {}
96 unsigned CommaSeparatedList::formatAfterToken(LineState
&State
,
97 ContinuationIndenter
*Indenter
,
99 if (!State
.NextToken
|| !State
.NextToken
->Previous
)
102 if (Formats
.size() == 1)
103 return 0; // Handled by formatFromToken
105 // Ensure that we start on the opening brace.
106 const FormatToken
*LBrace
=
107 State
.NextToken
->Previous
->getPreviousNonComment();
108 if (!LBrace
|| !LBrace
->isOneOf(tok::l_brace
, TT_ArrayInitializerLSquare
) ||
109 LBrace
->is(BK_Block
) || LBrace
->is(TT_DictLiteral
) ||
110 LBrace
->Next
->is(TT_DesignatedInitializerPeriod
)) {
114 // Calculate the number of code points we have to format this list. As the
115 // first token is already placed, we have to subtract it.
116 unsigned RemainingCodePoints
=
117 Style
.ColumnLimit
- State
.Column
+ State
.NextToken
->Previous
->ColumnWidth
;
119 // Find the best ColumnFormat, i.e. the best number of columns to use.
120 const ColumnFormat
*Format
= getColumnFormat(RemainingCodePoints
);
122 // If no ColumnFormat can be used, the braced list would generally be
123 // bin-packed. Add a severe penalty to this so that column layouts are
124 // preferred if possible.
128 // Format the entire list.
129 unsigned Penalty
= 0;
132 while (State
.NextToken
!= LBrace
->MatchingParen
) {
133 bool NewLine
= false;
134 unsigned ExtraSpaces
= 0;
136 // If the previous token was one of our commas, we are now on the next item.
137 if (Item
< Commas
.size() && State
.NextToken
->Previous
== Commas
[Item
]) {
138 if (!State
.NextToken
->isTrailingComment()) {
139 ExtraSpaces
+= Format
->ColumnSizes
[Column
] - ItemLengths
[Item
];
145 if (Column
== Format
->Columns
|| State
.NextToken
->MustBreakBefore
) {
150 // Place token using the continuation indenter and store the penalty.
151 Penalty
+= Indenter
->addTokenToState(State
, NewLine
, DryRun
, ExtraSpaces
);
156 unsigned CommaSeparatedList::formatFromToken(LineState
&State
,
157 ContinuationIndenter
*Indenter
,
159 // Formatting with 1 Column isn't really a column layout, so we don't need the
160 // special logic here. We can just avoid bin packing any of the parameters.
161 if (Formats
.size() == 1 || HasNestedBracedList
)
162 State
.Stack
.back().AvoidBinPacking
= true;
166 // Returns the lengths in code points between Begin and End (both included),
167 // assuming that the entire sequence is put on a single line.
168 static unsigned CodePointsBetween(const FormatToken
*Begin
,
169 const FormatToken
*End
) {
170 assert(End
->TotalLength
>= Begin
->TotalLength
);
171 return End
->TotalLength
- Begin
->TotalLength
+ Begin
->ColumnWidth
;
174 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken
*Token
) {
175 // FIXME: At some point we might want to do this for other lists, too.
176 if (!Token
->MatchingParen
||
177 !Token
->isOneOf(tok::l_brace
, TT_ArrayInitializerLSquare
)) {
181 // In C++11 braced list style, we should not format in columns unless they
182 // have many items (20 or more) or we allow bin-packing of function call
184 if (Style
.Cpp11BracedListStyle
&& !Style
.BinPackArguments
&&
185 Commas
.size() < 19) {
189 // Limit column layout for JavaScript array initializers to 20 or more items
190 // for now to introduce it carefully. We can become more aggressive if this
192 if (Token
->is(TT_ArrayInitializerLSquare
) && Commas
.size() < 19)
195 // Column format doesn't really make sense if we don't align after brackets.
196 if (Style
.AlignAfterOpenBracket
== FormatStyle::BAS_DontAlign
)
199 FormatToken
*ItemBegin
= Token
->Next
;
200 while (ItemBegin
->isTrailingComment())
201 ItemBegin
= ItemBegin
->Next
;
202 SmallVector
<bool, 8> MustBreakBeforeItem
;
204 // The lengths of an item if it is put at the end of the line. This includes
205 // trailing comments which are otherwise ignored for column alignment.
206 SmallVector
<unsigned, 8> EndOfLineItemLength
;
207 MustBreakBeforeItem
.reserve(Commas
.size() + 1);
208 EndOfLineItemLength
.reserve(Commas
.size() + 1);
209 ItemLengths
.reserve(Commas
.size() + 1);
211 bool HasSeparatingComment
= false;
212 for (unsigned i
= 0, e
= Commas
.size() + 1; i
!= e
; ++i
) {
214 // Skip comments on their own line.
215 while (ItemBegin
->HasUnescapedNewline
&& ItemBegin
->isTrailingComment()) {
216 ItemBegin
= ItemBegin
->Next
;
217 HasSeparatingComment
= i
> 0;
220 MustBreakBeforeItem
.push_back(ItemBegin
->MustBreakBefore
);
221 if (ItemBegin
->is(tok::l_brace
))
222 HasNestedBracedList
= true;
223 const FormatToken
*ItemEnd
= nullptr;
224 if (i
== Commas
.size()) {
225 ItemEnd
= Token
->MatchingParen
;
226 const FormatToken
*NonCommentEnd
= ItemEnd
->getPreviousNonComment();
227 ItemLengths
.push_back(CodePointsBetween(ItemBegin
, NonCommentEnd
));
228 if (Style
.Cpp11BracedListStyle
&&
229 !ItemEnd
->Previous
->isTrailingComment()) {
230 // In Cpp11 braced list style, the } and possibly other subsequent
231 // tokens will need to stay on a line with the last element.
232 while (ItemEnd
->Next
&& !ItemEnd
->Next
->CanBreakBefore
)
233 ItemEnd
= ItemEnd
->Next
;
235 // In other braced lists styles, the "}" can be wrapped to the new line.
236 ItemEnd
= Token
->MatchingParen
->Previous
;
240 // The comma is counted as part of the item when calculating the length.
241 ItemLengths
.push_back(CodePointsBetween(ItemBegin
, ItemEnd
));
243 // Consume trailing comments so the are included in EndOfLineItemLength.
244 if (ItemEnd
->Next
&& !ItemEnd
->Next
->HasUnescapedNewline
&&
245 ItemEnd
->Next
->isTrailingComment()) {
246 ItemEnd
= ItemEnd
->Next
;
249 EndOfLineItemLength
.push_back(CodePointsBetween(ItemBegin
, ItemEnd
));
250 // If there is a trailing comma in the list, the next item will start at the
251 // closing brace. Don't create an extra item for this.
252 if (ItemEnd
->getNextNonComment() == Token
->MatchingParen
)
254 ItemBegin
= ItemEnd
->Next
;
257 // Don't use column layout for lists with few elements and in presence of
258 // separating comments.
259 if (Commas
.size() < 5 || HasSeparatingComment
)
262 if (Token
->NestingLevel
!= 0 && Token
->is(tok::l_brace
) && Commas
.size() < 19)
265 // We can never place more than ColumnLimit / 3 items in a row (because of the
266 // spaces and the comma).
267 unsigned MaxItems
= Style
.ColumnLimit
/ 3;
268 SmallVector
<unsigned> MinSizeInColumn
;
269 MinSizeInColumn
.reserve(MaxItems
);
270 for (unsigned Columns
= 1; Columns
<= MaxItems
; ++Columns
) {
272 Format
.Columns
= Columns
;
273 Format
.ColumnSizes
.resize(Columns
);
274 MinSizeInColumn
.assign(Columns
, UINT_MAX
);
275 Format
.LineCount
= 1;
276 bool HasRowWithSufficientColumns
= false;
278 for (unsigned i
= 0, e
= ItemLengths
.size(); i
!= e
; ++i
) {
279 assert(i
< MustBreakBeforeItem
.size());
280 if (MustBreakBeforeItem
[i
] || Column
== Columns
) {
284 if (Column
== Columns
- 1)
285 HasRowWithSufficientColumns
= true;
287 (Column
== Columns
- 1) ? EndOfLineItemLength
[i
] : ItemLengths
[i
];
288 Format
.ColumnSizes
[Column
] = std::max(Format
.ColumnSizes
[Column
], Length
);
289 MinSizeInColumn
[Column
] = std::min(MinSizeInColumn
[Column
], Length
);
292 // If all rows are terminated early (e.g. by trailing comments), we don't
293 // need to look further.
294 if (!HasRowWithSufficientColumns
)
296 Format
.TotalWidth
= Columns
- 1; // Width of the N-1 spaces.
298 for (unsigned i
= 0; i
< Columns
; ++i
)
299 Format
.TotalWidth
+= Format
.ColumnSizes
[i
];
301 // Don't use this Format, if the difference between the longest and shortest
302 // element in a column exceeds a threshold to avoid excessive spaces.
304 for (unsigned i
= 0; i
< Columns
- 1; ++i
)
305 if (Format
.ColumnSizes
[i
] - MinSizeInColumn
[i
] > 10)
312 // Ignore layouts that are bound to violate the column limit.
313 if (Format
.TotalWidth
> Style
.ColumnLimit
&& Columns
> 1)
316 Formats
.push_back(Format
);
320 const CommaSeparatedList::ColumnFormat
*
321 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters
) const {
322 const ColumnFormat
*BestFormat
= nullptr;
323 for (const ColumnFormat
&Format
: llvm::reverse(Formats
)) {
324 if (Format
.TotalWidth
<= RemainingCharacters
|| Format
.Columns
== 1) {
325 if (BestFormat
&& Format
.LineCount
> BestFormat
->LineCount
)
327 BestFormat
= &Format
;
333 } // namespace format