1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements an indenter that manages the indentation of
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
36 struct RawStringFormatStyleManager
;
37 class WhitespaceManager
;
39 struct RawStringFormatStyleManager
{
40 llvm::StringMap
<FormatStyle
> DelimiterStyle
;
41 llvm::StringMap
<FormatStyle
> EnclosingFunctionStyle
;
43 RawStringFormatStyleManager(const FormatStyle
&CodeStyle
);
45 std::optional
<FormatStyle
> getDelimiterStyle(StringRef Delimiter
) const;
47 std::optional
<FormatStyle
>
48 getEnclosingFunctionStyle(StringRef EnclosingFunction
) const;
51 class ContinuationIndenter
{
53 /// Constructs a \c ContinuationIndenter to format \p Line starting in
54 /// column \p FirstIndent.
55 ContinuationIndenter(const FormatStyle
&Style
,
56 const AdditionalKeywords
&Keywords
,
57 const SourceManager
&SourceMgr
,
58 WhitespaceManager
&Whitespaces
,
59 encoding::Encoding Encoding
,
60 bool BinPackInconclusiveFunctions
);
62 /// Get the initial state, i.e. the state after placing \p Line's
63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64 /// the case of formatting inside raw string literals, \p FirstStartColumn is
65 /// the column at which the state of the parent formatter is.
66 LineState
getInitialState(unsigned FirstIndent
, unsigned FirstStartColumn
,
67 const AnnotatedLine
*Line
, bool DryRun
);
69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
71 /// Returns \c true, if a line break after \p State is allowed.
72 bool canBreak(const LineState
&State
);
74 /// Returns \c true, if a line break after \p State is mandatory.
75 bool mustBreak(const LineState
&State
);
77 /// Appends the next token to \p State and updates information
78 /// necessary for indentation.
80 /// Puts the token on the current line if \p Newline is \c false and adds a
81 /// line break and necessary indentation otherwise.
83 /// If \p DryRun is \c false, also creates and stores the required
85 unsigned addTokenToState(LineState
&State
, bool Newline
, bool DryRun
,
86 unsigned ExtraSpaces
= 0);
88 /// Get the column limit for this line. This is the style's column
89 /// limit, potentially reduced for preprocessor definitions.
90 unsigned getColumnLimit(const LineState
&State
) const;
93 /// Mark the next token as consumed in \p State and modify its stacks
95 unsigned moveStateToNextToken(LineState
&State
, bool DryRun
, bool Newline
);
97 /// Update 'State' according to the next token's fake left parentheses.
98 void moveStatePastFakeLParens(LineState
&State
, bool Newline
);
99 /// Update 'State' according to the next token's fake r_parens.
100 void moveStatePastFakeRParens(LineState
&State
);
102 /// Update 'State' according to the next token being one of "(<{[".
103 void moveStatePastScopeOpener(LineState
&State
, bool Newline
);
104 /// Update 'State' according to the next token being one of ")>}]".
105 void moveStatePastScopeCloser(LineState
&State
);
106 /// Update 'State' with the next token opening a nested block.
107 void moveStateToNewBlock(LineState
&State
);
109 /// Reformats a raw string literal.
111 /// \returns An extra penalty induced by reformatting the token.
112 unsigned reformatRawStringLiteral(const FormatToken
&Current
,
114 const FormatStyle
&RawStringStyle
,
115 bool DryRun
, bool Newline
);
117 /// If the current token is at the end of the current line, handle
118 /// the transition to the next line.
119 unsigned handleEndOfLine(const FormatToken
&Current
, LineState
&State
,
120 bool DryRun
, bool AllowBreak
, bool Newline
);
122 /// If \p Current is a raw string that is configured to be reformatted,
123 /// return the style to be used.
124 std::optional
<FormatStyle
> getRawStringStyle(const FormatToken
&Current
,
125 const LineState
&State
);
127 /// If the current token sticks out over the end of the line, break
130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131 /// when tokens are broken or lines exceed the column limit, and exceeded
132 /// indicates whether the algorithm purposefully left lines exceeding the
135 /// The returned penalty will cover the cost of the additional line breaks
136 /// and column limit violation in all lines except for the last one. The
137 /// penalty for the column limit violation in the last line (and in single
138 /// line tokens) is handled in \c addNextStateToQueue.
140 /// \p Strict indicates whether reflowing is allowed to leave characters
141 /// protruding the column limit; if true, lines will be split strictly within
142 /// the column limit where possible; if false, words are allowed to protrude
143 /// over the column limit as long as the penalty is less than the penalty
145 std::pair
<unsigned, bool> breakProtrudingToken(const FormatToken
&Current
,
147 bool AllowBreak
, bool DryRun
,
150 /// Returns the \c BreakableToken starting at \p Current, or nullptr
151 /// if the current token cannot be broken.
152 std::unique_ptr
<BreakableToken
>
153 createBreakableToken(const FormatToken
&Current
, LineState
&State
,
156 /// Appends the next token to \p State and updates information
157 /// necessary for indentation.
159 /// Puts the token on the current line.
161 /// If \p DryRun is \c false, also creates and stores the required
163 void addTokenOnCurrentLine(LineState
&State
, bool DryRun
,
164 unsigned ExtraSpaces
);
166 /// Appends the next token to \p State and updates information
167 /// necessary for indentation.
169 /// Adds a line break and necessary indentation.
171 /// If \p DryRun is \c false, also creates and stores the required
173 unsigned addTokenOnNewLine(LineState
&State
, bool DryRun
);
175 /// Calculate the new column for a line wrap before the next token.
176 unsigned getNewLineColumn(const LineState
&State
);
178 /// Adds a multiline token to the \p State.
180 /// \returns Extra penalty for the first line of the literal: last line is
181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182 /// matter, as we don't change them.
183 unsigned addMultilineToken(const FormatToken
&Current
, LineState
&State
);
185 /// Returns \c true if the next token starts a multiline string
188 /// This includes implicitly concatenated strings, strings that will be broken
189 /// by clang-format and string literals with escaped newlines.
190 bool nextIsMultilineString(const LineState
&State
);
193 const AdditionalKeywords
&Keywords
;
194 const SourceManager
&SourceMgr
;
195 WhitespaceManager
&Whitespaces
;
196 encoding::Encoding Encoding
;
197 bool BinPackInconclusiveFunctions
;
198 llvm::Regex CommentPragmasRegex
;
199 const RawStringFormatStyleManager RawStringFormats
;
203 ParenState(const FormatToken
*Tok
, unsigned Indent
, unsigned LastSpace
,
204 bool AvoidBinPacking
, bool NoLineBreak
)
205 : Tok(Tok
), Indent(Indent
), LastSpace(LastSpace
),
206 NestedBlockIndent(Indent
), IsAligned(false),
207 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
208 AvoidBinPacking(AvoidBinPacking
), BreakBeforeParameter(false),
209 NoLineBreak(NoLineBreak
), NoLineBreakInOperand(false),
210 LastOperatorWrapped(true), ContainsLineBreak(false),
211 ContainsUnwrappedBuilder(false), AlignColons(true),
212 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
213 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
214 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
215 IsWrappedConditional(false), UnindentOperator(false) {}
217 /// \brief The token opening this parenthesis level, or nullptr if this level
218 /// is opened by fake parenthesis.
220 /// Not considered for memoization as it will always have the same value at
222 const FormatToken
*Tok
;
224 /// The position to which a specific parenthesis level needs to be
228 /// The position of the last space on each level.
230 /// Used e.g. to break like:
231 /// functionCall(Parameter, otherCall(
232 /// OtherParameter));
235 /// If a block relative to this parenthesis level gets wrapped, indent
237 unsigned NestedBlockIndent
;
239 /// The position the first "<<" operator encountered on each level.
241 /// Used to align "<<" operators. 0 if no such operator has been encountered
243 unsigned FirstLessLess
= 0;
245 /// The column of a \c ? in a conditional expression;
246 unsigned QuestionColumn
= 0;
248 /// The position of the colon in an ObjC method declaration/call.
249 unsigned ColonPos
= 0;
251 /// The start of the most recent function in a builder-type call.
252 unsigned StartOfFunctionCall
= 0;
254 /// Contains the start of array subscript expressions, so that they
256 unsigned StartOfArraySubscripts
= 0;
258 /// If a nested name specifier was broken over multiple lines, this
259 /// contains the start column of the second line. Otherwise 0.
260 unsigned NestedNameSpecifierContinuation
= 0;
262 /// If a call expression was broken over multiple lines, this
263 /// contains the start column of the second line. Otherwise 0.
264 unsigned CallContinuation
= 0;
266 /// The column of the first variable name in a variable declaration.
268 /// Used to align further variables if necessary.
269 unsigned VariablePos
= 0;
271 /// Whether this block's indentation is used for alignment.
274 /// Whether a newline needs to be inserted before the block's closing
277 /// We only want to insert a newline before the closing brace if there also
278 /// was a newline after the beginning left brace.
279 bool BreakBeforeClosingBrace
: 1;
281 /// Whether a newline needs to be inserted before the block's closing
284 /// We only want to insert a newline before the closing paren if there also
285 /// was a newline after the beginning left paren.
286 bool BreakBeforeClosingParen
: 1;
288 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289 /// lines, in this context.
290 bool AvoidBinPacking
: 1;
292 /// Break after the next comma (or all the commas in this context if
293 /// \c AvoidBinPacking is \c true).
294 bool BreakBeforeParameter
: 1;
296 /// Line breaking in this context would break a formatting rule.
297 bool NoLineBreak
: 1;
299 /// Same as \c NoLineBreak, but is restricted until the end of the
300 /// operand (including the next ",").
301 bool NoLineBreakInOperand
: 1;
303 /// True if the last binary operator on this level was wrapped to the
305 bool LastOperatorWrapped
: 1;
307 /// \c true if this \c ParenState already contains a line-break.
309 /// The first line break in a certain \c ParenState causes extra penalty so
310 /// that clang-format prefers similar breaks, i.e. breaks in the same
312 bool ContainsLineBreak
: 1;
314 /// \c true if this \c ParenState contains multiple segments of a
315 /// builder-type call on one line.
316 bool ContainsUnwrappedBuilder
: 1;
318 /// \c true if the colons of the curren ObjC method expression should
321 /// Not considered for memoization as it will always have the same value at
323 bool AlignColons
: 1;
325 /// \c true if at least one selector name was found in the current
326 /// ObjC method expression.
328 /// Not considered for memoization as it will always have the same value at
330 bool ObjCSelectorNameFound
: 1;
332 /// \c true if there are multiple nested blocks inside these parens.
334 /// Not considered for memoization as it will always have the same value at
336 bool HasMultipleNestedBlocks
: 1;
338 /// The start of a nested block (e.g. lambda introducer in C++ or
339 /// "function" in JavaScript) is not wrapped to a new line.
340 bool NestedBlockInlined
: 1;
342 /// \c true if the current \c ParenState represents an Objective-C
344 bool IsInsideObjCArrayLiteral
: 1;
346 bool IsCSharpGenericTypeConstraint
: 1;
348 /// \brief true if the current \c ParenState represents the false branch of
349 /// a chained conditional expression (e.g. else-if)
350 bool IsChainedConditional
: 1;
352 /// \brief true if there conditionnal was wrapped on the first operator (the
354 bool IsWrappedConditional
: 1;
356 /// \brief Indicates the indent should be reduced by the length of the
358 bool UnindentOperator
: 1;
360 bool operator<(const ParenState
&Other
) const {
361 if (Indent
!= Other
.Indent
)
362 return Indent
< Other
.Indent
;
363 if (LastSpace
!= Other
.LastSpace
)
364 return LastSpace
< Other
.LastSpace
;
365 if (NestedBlockIndent
!= Other
.NestedBlockIndent
)
366 return NestedBlockIndent
< Other
.NestedBlockIndent
;
367 if (FirstLessLess
!= Other
.FirstLessLess
)
368 return FirstLessLess
< Other
.FirstLessLess
;
369 if (IsAligned
!= Other
.IsAligned
)
371 if (BreakBeforeClosingBrace
!= Other
.BreakBeforeClosingBrace
)
372 return BreakBeforeClosingBrace
;
373 if (BreakBeforeClosingParen
!= Other
.BreakBeforeClosingParen
)
374 return BreakBeforeClosingParen
;
375 if (QuestionColumn
!= Other
.QuestionColumn
)
376 return QuestionColumn
< Other
.QuestionColumn
;
377 if (AvoidBinPacking
!= Other
.AvoidBinPacking
)
378 return AvoidBinPacking
;
379 if (BreakBeforeParameter
!= Other
.BreakBeforeParameter
)
380 return BreakBeforeParameter
;
381 if (NoLineBreak
!= Other
.NoLineBreak
)
383 if (LastOperatorWrapped
!= Other
.LastOperatorWrapped
)
384 return LastOperatorWrapped
;
385 if (ColonPos
!= Other
.ColonPos
)
386 return ColonPos
< Other
.ColonPos
;
387 if (StartOfFunctionCall
!= Other
.StartOfFunctionCall
)
388 return StartOfFunctionCall
< Other
.StartOfFunctionCall
;
389 if (StartOfArraySubscripts
!= Other
.StartOfArraySubscripts
)
390 return StartOfArraySubscripts
< Other
.StartOfArraySubscripts
;
391 if (CallContinuation
!= Other
.CallContinuation
)
392 return CallContinuation
< Other
.CallContinuation
;
393 if (VariablePos
!= Other
.VariablePos
)
394 return VariablePos
< Other
.VariablePos
;
395 if (ContainsLineBreak
!= Other
.ContainsLineBreak
)
396 return ContainsLineBreak
;
397 if (ContainsUnwrappedBuilder
!= Other
.ContainsUnwrappedBuilder
)
398 return ContainsUnwrappedBuilder
;
399 if (NestedBlockInlined
!= Other
.NestedBlockInlined
)
400 return NestedBlockInlined
;
401 if (IsCSharpGenericTypeConstraint
!= Other
.IsCSharpGenericTypeConstraint
)
402 return IsCSharpGenericTypeConstraint
;
403 if (IsChainedConditional
!= Other
.IsChainedConditional
)
404 return IsChainedConditional
;
405 if (IsWrappedConditional
!= Other
.IsWrappedConditional
)
406 return IsWrappedConditional
;
407 if (UnindentOperator
!= Other
.UnindentOperator
)
408 return UnindentOperator
;
413 /// The current state when indenting a unwrapped line.
415 /// As the indenting tries different combinations this is copied by value.
417 /// The number of used columns in the current line.
420 /// The token that needs to be next formatted.
421 FormatToken
*NextToken
;
423 /// \c true if \p NextToken should not continue this line.
426 /// The \c NestingLevel at the start of this line.
427 unsigned StartOfLineLevel
;
429 /// The lowest \c NestingLevel on the current line.
430 unsigned LowestLevelOnLine
;
432 /// The start column of the string literal, if we're in a string
433 /// literal sequence, 0 otherwise.
434 unsigned StartOfStringLiteral
;
436 /// A stack keeping track of properties applying to parenthesis
438 SmallVector
<ParenState
> Stack
;
440 /// Ignore the stack of \c ParenStates for state comparison.
442 /// In long and deeply nested unwrapped lines, the current algorithm can
443 /// be insufficient for finding the best formatting with a reasonable amount
444 /// of time and memory. Setting this flag will effectively lead to the
445 /// algorithm not analyzing some combinations. However, these combinations
446 /// rarely contain the optimal solution: In short, accepting a higher
447 /// penalty early would need to lead to different values in the \c
448 /// ParenState stack (in an otherwise identical state) and these different
449 /// values would need to lead to a significant amount of avoided penalty
452 /// FIXME: Come up with a better algorithm instead.
453 bool IgnoreStackForComparison
;
455 /// The indent of the first token.
456 unsigned FirstIndent
;
458 /// The line that is being formatted.
460 /// Does not need to be considered for memoization because it doesn't change.
461 const AnnotatedLine
*Line
;
463 /// Comparison operator to be able to used \c LineState in \c map.
464 bool operator<(const LineState
&Other
) const {
465 if (NextToken
!= Other
.NextToken
)
466 return NextToken
< Other
.NextToken
;
467 if (Column
!= Other
.Column
)
468 return Column
< Other
.Column
;
469 if (NoContinuation
!= Other
.NoContinuation
)
470 return NoContinuation
;
471 if (StartOfLineLevel
!= Other
.StartOfLineLevel
)
472 return StartOfLineLevel
< Other
.StartOfLineLevel
;
473 if (LowestLevelOnLine
!= Other
.LowestLevelOnLine
)
474 return LowestLevelOnLine
< Other
.LowestLevelOnLine
;
475 if (StartOfStringLiteral
!= Other
.StartOfStringLiteral
)
476 return StartOfStringLiteral
< Other
.StartOfStringLiteral
;
477 if (IgnoreStackForComparison
|| Other
.IgnoreStackForComparison
)
479 return Stack
< Other
.Stack
;
483 } // end namespace format
484 } // end namespace clang