1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// WhitespaceManager class manages whitespace around tokens and their
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
18 #include "TokenAnnotator.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/ADT/SmallVector.h"
29 /// Manages the whitespaces around tokens and their replacements.
31 /// This includes special handling for certain constructs, e.g. the alignment of
32 /// trailing line comments.
34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager
35 /// must be informed about every token in the source file; for each token, there
36 /// must be exactly one call to either \c replaceWhitespace or
37 /// \c addUntouchableToken.
39 /// There may be multiple calls to \c breakToken for a given token.
40 class WhitespaceManager
{
42 WhitespaceManager(const SourceManager
&SourceMgr
, const FormatStyle
&Style
,
44 : SourceMgr(SourceMgr
), Style(Style
), UseCRLF(UseCRLF
) {}
46 bool useCRLF() const { return UseCRLF
; }
48 /// Infers whether the input is using CRLF.
49 static bool inputUsesCRLF(StringRef Text
, bool DefaultToCRLF
);
51 /// Replaces the whitespace in front of \p Tok. Only call once for
52 /// each \c AnnotatedToken.
54 /// \p StartOfTokenColumn is the column at which the token will start after
55 /// this replacement. It is needed for determining how \p Spaces is turned
56 /// into tabs and spaces for some format styles.
57 void replaceWhitespace(FormatToken
&Tok
, unsigned Newlines
, unsigned Spaces
,
58 unsigned StartOfTokenColumn
, bool isAligned
= false,
59 bool InPPDirective
= false);
61 /// Adds information about an unchangeable token's whitespace.
63 /// Needs to be called for every token for which \c replaceWhitespace
65 void addUntouchableToken(const FormatToken
&Tok
, bool InPPDirective
);
67 llvm::Error
addReplacement(const tooling::Replacement
&Replacement
);
69 /// Inserts or replaces whitespace in the middle of a token.
71 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
72 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
75 /// Note: \p Spaces can be negative to retain information about initial
76 /// relative column offset between a line of a block comment and the start of
77 /// the comment. This negative offset may be compensated by trailing comment
78 /// alignment here. In all other cases negative \p Spaces will be truncated to
81 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
82 /// used to align backslashes correctly.
83 void replaceWhitespaceInToken(const FormatToken
&Tok
, unsigned Offset
,
84 unsigned ReplaceChars
,
85 StringRef PreviousPostfix
,
86 StringRef CurrentPrefix
, bool InPPDirective
,
87 unsigned Newlines
, int Spaces
);
89 /// Returns all the \c Replacements created during formatting.
90 const tooling::Replacements
&generateReplacements();
92 /// Represents a change before a token, a break inside a token,
93 /// or the layout of an unchanged token (or whitespace within).
95 /// Functor to sort changes in original source order.
96 class IsBeforeInFile
{
98 IsBeforeInFile(const SourceManager
&SourceMgr
) : SourceMgr(SourceMgr
) {}
99 bool operator()(const Change
&C1
, const Change
&C2
) const;
102 const SourceManager
&SourceMgr
;
105 /// Creates a \c Change.
107 /// The generated \c Change will replace the characters at
108 /// \p OriginalWhitespaceRange with a concatenation of
109 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
110 /// and \p CurrentLinePrefix.
112 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
113 /// trailing comments and escaped newlines.
114 Change(const FormatToken
&Tok
, bool CreateReplacement
,
115 SourceRange OriginalWhitespaceRange
, int Spaces
,
116 unsigned StartOfTokenColumn
, unsigned NewlinesBefore
,
117 StringRef PreviousLinePostfix
, StringRef CurrentLinePrefix
,
118 bool IsAligned
, bool ContinuesPPDirective
, bool IsInsideToken
);
120 // The kind of the token whose whitespace this change replaces, or in which
121 // this change inserts whitespace.
122 // FIXME: Currently this is not set correctly for breaks inside comments, as
123 // the \c BreakableToken is still doing its own alignment.
124 const FormatToken
*Tok
;
126 bool CreateReplacement
;
127 // Changes might be in the middle of a token, so we cannot just keep the
128 // FormatToken around to query its information.
129 SourceRange OriginalWhitespaceRange
;
130 unsigned StartOfTokenColumn
;
131 unsigned NewlinesBefore
;
132 std::string PreviousLinePostfix
;
133 std::string CurrentLinePrefix
;
135 bool ContinuesPPDirective
;
137 // The number of spaces in front of the token or broken part of the token.
138 // This will be adapted when aligning tokens.
139 // Can be negative to retain information about the initial relative offset
140 // of the lines in a block comment. This is used when aligning trailing
141 // comments. Uncompensated negative offset is truncated to 0.
144 // If this change is inside of a token but not at the start of the token or
145 // directly after a newline.
148 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
149 // \c EscapedNewlineColumn will be calculated in
150 // \c calculateLineBreakInformation.
151 bool IsTrailingComment
;
152 unsigned TokenLength
;
153 unsigned PreviousEndOfTokenColumn
;
154 unsigned EscapedNewlineColumn
;
156 // These fields are used to retain correct relative line indentation in a
157 // block comment when aligning trailing comments.
159 // If this Change represents a continuation of a block comment,
160 // \c StartOfBlockComment is pointer to the first Change in the block
161 // comment. \c IndentationOffset is a relative column offset to this
162 // change, so that the correct column can be reconstructed at the end of
163 // the alignment process.
164 const Change
*StartOfBlockComment
;
165 int IndentationOffset
;
167 // Depth of conditionals. Computed from tracking fake parenthesis, except
168 // it does not increase the indent for "chained" conditionals.
169 int ConditionalsLevel
;
171 // A combination of indent, nesting and conditionals levels, which are used
172 // in tandem to compute lexical scope, for the purposes of deciding
173 // when to stop consecutive alignment runs.
174 std::tuple
<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
175 return std::make_tuple(Tok
->IndentLevel
, Tok
->NestingLevel
,
181 struct CellDescription
{
184 unsigned EndIndex
= 0;
185 bool HasSplit
= false;
186 CellDescription
*NextColumnElement
= nullptr;
188 constexpr bool operator==(const CellDescription
&Other
) const {
189 return Index
== Other
.Index
&& Cell
== Other
.Cell
&&
190 EndIndex
== Other
.EndIndex
;
192 constexpr bool operator!=(const CellDescription
&Other
) const {
193 return !(*this == Other
);
197 struct CellDescriptions
{
198 SmallVector
<CellDescription
> Cells
;
199 SmallVector
<unsigned> CellCounts
;
200 unsigned InitialSpaces
= 0;
202 // Determine if every row in the array
203 // has the same number of columns.
204 bool isRectangular() const {
205 if (CellCounts
.empty())
208 for (auto NumberOfColumns
: CellCounts
)
209 if (NumberOfColumns
!= CellCounts
[0])
215 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
216 /// or token parts in a line and \c PreviousEndOfTokenColumn and
217 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
218 void calculateLineBreakInformation();
220 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
221 void alignConsecutiveMacros();
223 /// Align consecutive assignments over all \c Changes.
224 void alignConsecutiveAssignments();
226 /// Align consecutive bitfields over all \c Changes.
227 void alignConsecutiveBitFields();
229 /// Align consecutive declarations over all \c Changes.
230 void alignConsecutiveDeclarations();
232 /// Align consecutive declarations over all \c Changes.
233 void alignChainedConditionals();
235 /// Align consecutive short case statements over all \c Changes.
236 void alignConsecutiveShortCaseStatements();
238 /// Align trailing comments over all \c Changes.
239 void alignTrailingComments();
241 /// Align trailing comments from change \p Start to change \p End at
242 /// the specified \p Column.
243 void alignTrailingComments(unsigned Start
, unsigned End
, unsigned Column
);
245 /// Align escaped newlines over all \c Changes.
246 void alignEscapedNewlines();
248 /// Align escaped newlines from change \p Start to change \p End at
249 /// the specified \p Column.
250 void alignEscapedNewlines(unsigned Start
, unsigned End
, unsigned Column
);
252 /// Align Array Initializers over all \c Changes.
253 void alignArrayInitializers();
255 /// Align Array Initializers from change \p Start to change \p End at
256 /// the specified \p Column.
257 void alignArrayInitializers(unsigned Start
, unsigned End
);
259 /// Align Array Initializers being careful to right justify the columns
260 /// as described by \p CellDescs.
261 void alignArrayInitializersRightJustified(CellDescriptions
&&CellDescs
);
263 /// Align Array Initializers being careful to left justify the columns
264 /// as described by \p CellDescs.
265 void alignArrayInitializersLeftJustified(CellDescriptions
&&CellDescs
);
267 /// Calculate the cell width between two indexes.
268 unsigned calculateCellWidth(unsigned Start
, unsigned End
,
269 bool WithSpaces
= false) const;
271 /// Get a set of fully specified CellDescriptions between \p Start and
272 /// \p End of the change list.
273 CellDescriptions
getCells(unsigned Start
, unsigned End
);
275 /// Does this \p Cell contain a split element?
276 static bool isSplitCell(const CellDescription
&Cell
);
278 /// Get the width of the preceding cells from \p Start to \p End.
279 template <typename I
>
280 auto getNetWidth(const I
&Start
, const I
&End
, unsigned InitialSpaces
) const {
281 auto NetWidth
= InitialSpaces
;
282 for (auto PrevIter
= Start
; PrevIter
!= End
; ++PrevIter
) {
283 // If we broke the line the initial spaces are already
285 if (Changes
[PrevIter
->Index
].NewlinesBefore
> 0)
288 calculateCellWidth(PrevIter
->Index
, PrevIter
->EndIndex
, true) + 1;
293 /// Get the maximum width of a cell in a sequence of columns.
294 template <typename I
>
295 unsigned getMaximumCellWidth(I CellIter
, unsigned NetWidth
) const {
297 calculateCellWidth(CellIter
->Index
, CellIter
->EndIndex
, true);
298 if (Changes
[CellIter
->Index
].NewlinesBefore
== 0)
299 CellWidth
+= NetWidth
;
300 for (const auto *Next
= CellIter
->NextColumnElement
; Next
;
301 Next
= Next
->NextColumnElement
) {
302 auto ThisWidth
= calculateCellWidth(Next
->Index
, Next
->EndIndex
, true);
303 if (Changes
[Next
->Index
].NewlinesBefore
== 0)
304 ThisWidth
+= NetWidth
;
305 CellWidth
= std::max(CellWidth
, ThisWidth
);
310 /// Get The maximum width of all columns to a given cell.
311 template <typename I
>
312 unsigned getMaximumNetWidth(const I
&CellStart
, const I
&CellStop
,
313 unsigned InitialSpaces
, unsigned CellCount
,
314 unsigned MaxRowCount
) const {
315 auto MaxNetWidth
= getNetWidth(CellStart
, CellStop
, InitialSpaces
);
317 auto Offset
= std::distance(CellStart
, CellStop
);
318 for (const auto *Next
= CellStop
->NextColumnElement
; Next
;
319 Next
= Next
->NextColumnElement
) {
320 if (RowCount
> MaxRowCount
)
322 auto Start
= (CellStart
+ RowCount
* CellCount
);
323 auto End
= Start
+ Offset
;
325 std::max(MaxNetWidth
, getNetWidth(Start
, End
, InitialSpaces
));
331 /// Align a split cell with a newline to the first element in the cell.
332 void alignToStartOfCell(unsigned Start
, unsigned End
);
334 /// Link the Cell pointers in the list of Cells.
335 static CellDescriptions
linkCells(CellDescriptions
&&CellDesc
);
337 /// Fill \c Replaces with the replacements for all effective changes.
338 void generateChanges();
340 /// Stores \p Text as the replacement for the whitespace in \p Range.
341 void storeReplacement(SourceRange Range
, StringRef Text
);
342 void appendNewlineText(std::string
&Text
, unsigned Newlines
);
343 void appendEscapedNewlineText(std::string
&Text
, unsigned Newlines
,
344 unsigned PreviousEndOfTokenColumn
,
345 unsigned EscapedNewlineColumn
);
346 void appendIndentText(std::string
&Text
, unsigned IndentLevel
,
347 unsigned Spaces
, unsigned WhitespaceStartColumn
,
349 unsigned appendTabIndent(std::string
&Text
, unsigned Spaces
,
350 unsigned Indentation
);
352 SmallVector
<Change
, 16> Changes
;
353 const SourceManager
&SourceMgr
;
354 tooling::Replacements Replaces
;
355 const FormatStyle
&Style
;
359 } // namespace format