[DFAJumpThreading] Remove incoming StartBlock from all phis when unfolding select...
[llvm-project.git] / clang / lib / Format / ContinuationIndenter.h
blob057b85bd32d5051f56bfd2d22a3651bea51f4acc
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 #include <map>
23 #include <optional>
24 #include <tuple>
26 namespace clang {
27 class SourceManager;
29 namespace format {
31 class AnnotatedLine;
32 class BreakableToken;
33 struct FormatToken;
34 struct LineState;
35 struct ParenState;
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
39 struct RawStringFormatStyleManager {
40 llvm::StringMap<FormatStyle> DelimiterStyle;
41 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
43 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
45 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
47 std::optional<FormatStyle>
48 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
51 class ContinuationIndenter {
52 public:
53 /// Constructs a \c ContinuationIndenter to format \p Line starting in
54 /// column \p FirstIndent.
55 ContinuationIndenter(const FormatStyle &Style,
56 const AdditionalKeywords &Keywords,
57 const SourceManager &SourceMgr,
58 WhitespaceManager &Whitespaces,
59 encoding::Encoding Encoding,
60 bool BinPackInconclusiveFunctions);
62 /// Get the initial state, i.e. the state after placing \p Line's
63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64 /// the case of formatting inside raw string literals, \p FirstStartColumn is
65 /// the column at which the state of the parent formatter is.
66 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67 const AnnotatedLine *Line, bool DryRun);
69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70 // better home.
71 /// Returns \c true, if a line break after \p State is allowed.
72 bool canBreak(const LineState &State);
74 /// Returns \c true, if a line break after \p State is mandatory.
75 bool mustBreak(const LineState &State);
77 /// Appends the next token to \p State and updates information
78 /// necessary for indentation.
79 ///
80 /// Puts the token on the current line if \p Newline is \c false and adds a
81 /// line break and necessary indentation otherwise.
82 ///
83 /// If \p DryRun is \c false, also creates and stores the required
84 /// \c Replacement.
85 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86 unsigned ExtraSpaces = 0);
88 /// Get the column limit for this line. This is the style's column
89 /// limit, potentially reduced for preprocessor definitions.
90 unsigned getColumnLimit(const LineState &State) const;
92 private:
93 /// Mark the next token as consumed in \p State and modify its stacks
94 /// accordingly.
95 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
97 /// Update 'State' according to the next token's fake left parentheses.
98 void moveStatePastFakeLParens(LineState &State, bool Newline);
99 /// Update 'State' according to the next token's fake r_parens.
100 void moveStatePastFakeRParens(LineState &State);
102 /// Update 'State' according to the next token being one of "(<{[".
103 void moveStatePastScopeOpener(LineState &State, bool Newline);
104 /// Update 'State' according to the next token being one of ")>}]".
105 void moveStatePastScopeCloser(LineState &State);
106 /// Update 'State' with the next token opening a nested block.
107 void moveStateToNewBlock(LineState &State);
109 /// Reformats a raw string literal.
111 /// \returns An extra penalty induced by reformatting the token.
112 unsigned reformatRawStringLiteral(const FormatToken &Current,
113 LineState &State,
114 const FormatStyle &RawStringStyle,
115 bool DryRun, bool Newline);
117 /// If the current token is at the end of the current line, handle
118 /// the transition to the next line.
119 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120 bool DryRun, bool AllowBreak, bool Newline);
122 /// If \p Current is a raw string that is configured to be reformatted,
123 /// return the style to be used.
124 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125 const LineState &State);
127 /// If the current token sticks out over the end of the line, break
128 /// it if possible.
130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131 /// when tokens are broken or lines exceed the column limit, and exceeded
132 /// indicates whether the algorithm purposefully left lines exceeding the
133 /// column limit.
135 /// The returned penalty will cover the cost of the additional line breaks
136 /// and column limit violation in all lines except for the last one. The
137 /// penalty for the column limit violation in the last line (and in single
138 /// line tokens) is handled in \c addNextStateToQueue.
140 /// \p Strict indicates whether reflowing is allowed to leave characters
141 /// protruding the column limit; if true, lines will be split strictly within
142 /// the column limit where possible; if false, words are allowed to protrude
143 /// over the column limit as long as the penalty is less than the penalty
144 /// of a break.
145 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146 LineState &State,
147 bool AllowBreak, bool DryRun,
148 bool Strict);
150 /// Returns the \c BreakableToken starting at \p Current, or nullptr
151 /// if the current token cannot be broken.
152 std::unique_ptr<BreakableToken>
153 createBreakableToken(const FormatToken &Current, LineState &State,
154 bool AllowBreak);
156 /// Appends the next token to \p State and updates information
157 /// necessary for indentation.
159 /// Puts the token on the current line.
161 /// If \p DryRun is \c false, also creates and stores the required
162 /// \c Replacement.
163 void addTokenOnCurrentLine(LineState &State, bool DryRun,
164 unsigned ExtraSpaces);
166 /// Appends the next token to \p State and updates information
167 /// necessary for indentation.
169 /// Adds a line break and necessary indentation.
171 /// If \p DryRun is \c false, also creates and stores the required
172 /// \c Replacement.
173 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
175 /// Calculate the new column for a line wrap before the next token.
176 unsigned getNewLineColumn(const LineState &State);
178 /// Adds a multiline token to the \p State.
180 /// \returns Extra penalty for the first line of the literal: last line is
181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182 /// matter, as we don't change them.
183 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
185 /// Returns \c true if the next token starts a multiline string
186 /// literal.
188 /// This includes implicitly concatenated strings, strings that will be broken
189 /// by clang-format and string literals with escaped newlines.
190 bool nextIsMultilineString(const LineState &State);
192 FormatStyle Style;
193 const AdditionalKeywords &Keywords;
194 const SourceManager &SourceMgr;
195 WhitespaceManager &Whitespaces;
196 encoding::Encoding Encoding;
197 bool BinPackInconclusiveFunctions;
198 llvm::Regex CommentPragmasRegex;
199 const RawStringFormatStyleManager RawStringFormats;
202 struct ParenState {
203 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204 bool AvoidBinPacking, bool NoLineBreak)
205 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206 NestedBlockIndent(Indent), IsAligned(false),
207 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
208 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
209 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
210 LastOperatorWrapped(true), ContainsLineBreak(false),
211 ContainsUnwrappedBuilder(false), AlignColons(true),
212 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
213 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
214 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
215 IsWrappedConditional(false), UnindentOperator(false) {}
217 /// \brief The token opening this parenthesis level, or nullptr if this level
218 /// is opened by fake parenthesis.
220 /// Not considered for memoization as it will always have the same value at
221 /// the same token.
222 const FormatToken *Tok;
224 /// The position to which a specific parenthesis level needs to be
225 /// indented.
226 unsigned Indent;
228 /// The position of the last space on each level.
230 /// Used e.g. to break like:
231 /// functionCall(Parameter, otherCall(
232 /// OtherParameter));
233 unsigned LastSpace;
235 /// If a block relative to this parenthesis level gets wrapped, indent
236 /// it this much.
237 unsigned NestedBlockIndent;
239 /// The position the first "<<" operator encountered on each level.
241 /// Used to align "<<" operators. 0 if no such operator has been encountered
242 /// on a level.
243 unsigned FirstLessLess = 0;
245 /// The column of a \c ? in a conditional expression;
246 unsigned QuestionColumn = 0;
248 /// The position of the colon in an ObjC method declaration/call.
249 unsigned ColonPos = 0;
251 /// The start of the most recent function in a builder-type call.
252 unsigned StartOfFunctionCall = 0;
254 /// Contains the start of array subscript expressions, so that they
255 /// can be aligned.
256 unsigned StartOfArraySubscripts = 0;
258 /// If a nested name specifier was broken over multiple lines, this
259 /// contains the start column of the second line. Otherwise 0.
260 unsigned NestedNameSpecifierContinuation = 0;
262 /// If a call expression was broken over multiple lines, this
263 /// contains the start column of the second line. Otherwise 0.
264 unsigned CallContinuation = 0;
266 /// The column of the first variable name in a variable declaration.
268 /// Used to align further variables if necessary.
269 unsigned VariablePos = 0;
271 /// Whether this block's indentation is used for alignment.
272 bool IsAligned : 1;
274 /// Whether a newline needs to be inserted before the block's closing
275 /// brace.
277 /// We only want to insert a newline before the closing brace if there also
278 /// was a newline after the beginning left brace.
279 bool BreakBeforeClosingBrace : 1;
281 /// Whether a newline needs to be inserted before the block's closing
282 /// paren.
284 /// We only want to insert a newline before the closing paren if there also
285 /// was a newline after the beginning left paren.
286 bool BreakBeforeClosingParen : 1;
288 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289 /// lines, in this context.
290 bool AvoidBinPacking : 1;
292 /// Break after the next comma (or all the commas in this context if
293 /// \c AvoidBinPacking is \c true).
294 bool BreakBeforeParameter : 1;
296 /// Line breaking in this context would break a formatting rule.
297 bool NoLineBreak : 1;
299 /// Same as \c NoLineBreak, but is restricted until the end of the
300 /// operand (including the next ",").
301 bool NoLineBreakInOperand : 1;
303 /// True if the last binary operator on this level was wrapped to the
304 /// next line.
305 bool LastOperatorWrapped : 1;
307 /// \c true if this \c ParenState already contains a line-break.
309 /// The first line break in a certain \c ParenState causes extra penalty so
310 /// that clang-format prefers similar breaks, i.e. breaks in the same
311 /// parenthesis.
312 bool ContainsLineBreak : 1;
314 /// \c true if this \c ParenState contains multiple segments of a
315 /// builder-type call on one line.
316 bool ContainsUnwrappedBuilder : 1;
318 /// \c true if the colons of the curren ObjC method expression should
319 /// be aligned.
321 /// Not considered for memoization as it will always have the same value at
322 /// the same token.
323 bool AlignColons : 1;
325 /// \c true if at least one selector name was found in the current
326 /// ObjC method expression.
328 /// Not considered for memoization as it will always have the same value at
329 /// the same token.
330 bool ObjCSelectorNameFound : 1;
332 /// \c true if there are multiple nested blocks inside these parens.
334 /// Not considered for memoization as it will always have the same value at
335 /// the same token.
336 bool HasMultipleNestedBlocks : 1;
338 /// The start of a nested block (e.g. lambda introducer in C++ or
339 /// "function" in JavaScript) is not wrapped to a new line.
340 bool NestedBlockInlined : 1;
342 /// \c true if the current \c ParenState represents an Objective-C
343 /// array literal.
344 bool IsInsideObjCArrayLiteral : 1;
346 bool IsCSharpGenericTypeConstraint : 1;
348 /// \brief true if the current \c ParenState represents the false branch of
349 /// a chained conditional expression (e.g. else-if)
350 bool IsChainedConditional : 1;
352 /// \brief true if there conditionnal was wrapped on the first operator (the
353 /// question mark)
354 bool IsWrappedConditional : 1;
356 /// \brief Indicates the indent should be reduced by the length of the
357 /// operator.
358 bool UnindentOperator : 1;
360 bool operator<(const ParenState &Other) const {
361 if (Indent != Other.Indent)
362 return Indent < Other.Indent;
363 if (LastSpace != Other.LastSpace)
364 return LastSpace < Other.LastSpace;
365 if (NestedBlockIndent != Other.NestedBlockIndent)
366 return NestedBlockIndent < Other.NestedBlockIndent;
367 if (FirstLessLess != Other.FirstLessLess)
368 return FirstLessLess < Other.FirstLessLess;
369 if (IsAligned != Other.IsAligned)
370 return IsAligned;
371 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
372 return BreakBeforeClosingBrace;
373 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
374 return BreakBeforeClosingParen;
375 if (QuestionColumn != Other.QuestionColumn)
376 return QuestionColumn < Other.QuestionColumn;
377 if (AvoidBinPacking != Other.AvoidBinPacking)
378 return AvoidBinPacking;
379 if (BreakBeforeParameter != Other.BreakBeforeParameter)
380 return BreakBeforeParameter;
381 if (NoLineBreak != Other.NoLineBreak)
382 return NoLineBreak;
383 if (LastOperatorWrapped != Other.LastOperatorWrapped)
384 return LastOperatorWrapped;
385 if (ColonPos != Other.ColonPos)
386 return ColonPos < Other.ColonPos;
387 if (StartOfFunctionCall != Other.StartOfFunctionCall)
388 return StartOfFunctionCall < Other.StartOfFunctionCall;
389 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391 if (CallContinuation != Other.CallContinuation)
392 return CallContinuation < Other.CallContinuation;
393 if (VariablePos != Other.VariablePos)
394 return VariablePos < Other.VariablePos;
395 if (ContainsLineBreak != Other.ContainsLineBreak)
396 return ContainsLineBreak;
397 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
398 return ContainsUnwrappedBuilder;
399 if (NestedBlockInlined != Other.NestedBlockInlined)
400 return NestedBlockInlined;
401 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
402 return IsCSharpGenericTypeConstraint;
403 if (IsChainedConditional != Other.IsChainedConditional)
404 return IsChainedConditional;
405 if (IsWrappedConditional != Other.IsWrappedConditional)
406 return IsWrappedConditional;
407 if (UnindentOperator != Other.UnindentOperator)
408 return UnindentOperator;
409 return false;
413 /// The current state when indenting a unwrapped line.
415 /// As the indenting tries different combinations this is copied by value.
416 struct LineState {
417 /// The number of used columns in the current line.
418 unsigned Column;
420 /// The token that needs to be next formatted.
421 FormatToken *NextToken;
423 /// \c true if \p NextToken should not continue this line.
424 bool NoContinuation;
426 /// The \c NestingLevel at the start of this line.
427 unsigned StartOfLineLevel;
429 /// The lowest \c NestingLevel on the current line.
430 unsigned LowestLevelOnLine;
432 /// The start column of the string literal, if we're in a string
433 /// literal sequence, 0 otherwise.
434 unsigned StartOfStringLiteral;
436 /// Disallow line breaks for this line.
437 bool NoLineBreak;
439 /// A stack keeping track of properties applying to parenthesis
440 /// levels.
441 SmallVector<ParenState> Stack;
443 /// Ignore the stack of \c ParenStates for state comparison.
445 /// In long and deeply nested unwrapped lines, the current algorithm can
446 /// be insufficient for finding the best formatting with a reasonable amount
447 /// of time and memory. Setting this flag will effectively lead to the
448 /// algorithm not analyzing some combinations. However, these combinations
449 /// rarely contain the optimal solution: In short, accepting a higher
450 /// penalty early would need to lead to different values in the \c
451 /// ParenState stack (in an otherwise identical state) and these different
452 /// values would need to lead to a significant amount of avoided penalty
453 /// later.
455 /// FIXME: Come up with a better algorithm instead.
456 bool IgnoreStackForComparison;
458 /// The indent of the first token.
459 unsigned FirstIndent;
461 /// The line that is being formatted.
463 /// Does not need to be considered for memoization because it doesn't change.
464 const AnnotatedLine *Line;
466 /// Comparison operator to be able to used \c LineState in \c map.
467 bool operator<(const LineState &Other) const {
468 if (NextToken != Other.NextToken)
469 return NextToken < Other.NextToken;
470 if (Column != Other.Column)
471 return Column < Other.Column;
472 if (NoContinuation != Other.NoContinuation)
473 return NoContinuation;
474 if (StartOfLineLevel != Other.StartOfLineLevel)
475 return StartOfLineLevel < Other.StartOfLineLevel;
476 if (LowestLevelOnLine != Other.LowestLevelOnLine)
477 return LowestLevelOnLine < Other.LowestLevelOnLine;
478 if (StartOfStringLiteral != Other.StartOfStringLiteral)
479 return StartOfStringLiteral < Other.StartOfStringLiteral;
480 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
481 return false;
482 return Stack < Other.Stack;
486 } // end namespace format
487 } // end namespace clang
489 #endif