1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
27 LT_ObjCDecl
, // An @interface, @implementation, or @protocol line.
29 LT_ObjCProperty
, // An @property line.
31 LT_PreprocessorDirective
,
32 LT_VirtualFunctionDecl
,
33 LT_ArrayOfStructInitializer
,
34 LT_CommentAbovePPDirective
,
38 // Contained in class declaration/definition.
40 // Contained within function definition.
42 // Contained within other scope block (loop, if/else, etc).
48 AnnotatedLine(const UnwrappedLine
&Line
)
49 : First(Line
.Tokens
.front().Tok
), Level(Line
.Level
),
50 PPLevel(Line
.PPLevel
),
51 MatchingOpeningBlockLineIndex(Line
.MatchingOpeningBlockLineIndex
),
52 MatchingClosingBlockLineIndex(Line
.MatchingClosingBlockLineIndex
),
53 InPPDirective(Line
.InPPDirective
),
54 InPragmaDirective(Line
.InPragmaDirective
),
55 InMacroBody(Line
.InMacroBody
),
56 MustBeDeclaration(Line
.MustBeDeclaration
), MightBeFunctionDecl(false),
57 IsMultiVariableDeclStmt(false), Affected(false),
58 LeadingEmptyLinesAffected(false), ChildrenAffected(false),
59 ReturnTypeWrapped(false), IsContinuation(Line
.IsContinuation
),
60 FirstStartColumn(Line
.FirstStartColumn
) {
61 assert(!Line
.Tokens
.empty());
63 // Calculate Next and Previous for all tokens. Note that we must overwrite
64 // Next and Previous for every token, as previous formatting runs might have
65 // left them in a different state.
66 First
->Previous
= nullptr;
67 FormatToken
*Current
= First
;
68 addChildren(Line
.Tokens
.front(), Current
);
69 for (const UnwrappedLineNode
&Node
: llvm::drop_begin(Line
.Tokens
)) {
70 if (Node
.Tok
->MacroParent
)
71 ContainsMacroCall
= true;
72 Current
->Next
= Node
.Tok
;
73 Node
.Tok
->Previous
= Current
;
74 Current
= Current
->Next
;
75 addChildren(Node
, Current
);
76 // FIXME: if we add children, previous will point to the token before
77 // the children; changing this requires significant changes across
84 void addChildren(const UnwrappedLineNode
&Node
, FormatToken
*Current
) {
85 Current
->Children
.clear();
86 for (const auto &Child
: Node
.Children
) {
87 Children
.push_back(new AnnotatedLine(Child
));
88 if (Children
.back()->ContainsMacroCall
)
89 ContainsMacroCall
= true;
90 Current
->Children
.push_back(Children
.back());
96 for (const auto *Child
: Children
)
97 Size
+= Child
->size();
102 for (AnnotatedLine
*Child
: Children
)
104 FormatToken
*Current
= First
;
106 Current
->Children
.clear();
107 Current
->Role
.reset();
108 Current
= Current
->Next
;
112 bool isComment() const {
113 return First
&& First
->is(tok::comment
) && !First
->getNextNonComment();
116 /// \c true if this line starts with the given tokens in order, ignoring
118 template <typename
... Ts
> bool startsWith(Ts
... Tokens
) const {
119 return First
&& First
->startsSequence(Tokens
...);
122 /// \c true if this line ends with the given tokens in reversed order,
123 /// ignoring comments.
124 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
125 /// this line is like "... T3 T2 T1".
126 template <typename
... Ts
> bool endsWith(Ts
... Tokens
) const {
127 return Last
&& Last
->endsSequence(Tokens
...);
130 /// \c true if this line looks like a function definition instead of a
131 /// function declaration. Asserts MightBeFunctionDecl.
132 bool mightBeFunctionDefinition() const {
133 assert(MightBeFunctionDecl
);
134 // Try to determine if the end of a stream of tokens is either the
135 // Definition or the Declaration for a function. It does this by looking for
136 // the ';' in foo(); and using that it ends with a ; to know this is the
137 // Definition, however the line could end with
138 // foo(); /* comment */
143 // endsWith() ignores the comment.
144 return !endsWith(tok::semi
);
147 /// \c true if this line starts a namespace definition.
148 bool startsWithNamespace() const {
149 return startsWith(tok::kw_namespace
) || startsWith(TT_NamespaceMacro
) ||
150 startsWith(tok::kw_inline
, tok::kw_namespace
) ||
151 startsWith(tok::kw_export
, tok::kw_namespace
);
154 FormatToken
*getFirstNonComment() const {
156 return First
->is(tok::comment
) ? First
->getNextNonComment() : First
;
162 SmallVector
<AnnotatedLine
*, 0> Children
;
167 size_t MatchingOpeningBlockLineIndex
;
168 size_t MatchingClosingBlockLineIndex
;
170 bool InPragmaDirective
;
172 bool MustBeDeclaration
;
173 bool MightBeFunctionDecl
;
174 bool IsMultiVariableDeclStmt
;
176 /// \c True if this line contains a macro call for which an expansion exists.
177 bool ContainsMacroCall
= false;
179 /// \c True if this line should be formatted, i.e. intersects directly or
180 /// indirectly with one of the input ranges.
183 /// \c True if the leading empty lines of this line intersect with one of the
185 bool LeadingEmptyLinesAffected
;
187 /// \c True if one of this line's children intersects with an input range.
188 bool ChildrenAffected
;
190 /// \c True if breaking after last attribute group in function return type.
191 bool ReturnTypeWrapped
;
193 /// \c True if this line should be indented by ContinuationIndent in addition
194 /// to the normal indention level.
197 unsigned FirstStartColumn
;
201 AnnotatedLine(const AnnotatedLine
&) = delete;
202 void operator=(const AnnotatedLine
&) = delete;
205 /// Determines extra information about the tokens comprising an
206 /// \c UnwrappedLine.
207 class TokenAnnotator
{
209 TokenAnnotator(const FormatStyle
&Style
, const AdditionalKeywords
&Keywords
)
210 : Style(Style
), Keywords(Keywords
) {}
212 /// Adapts the indent levels of comment lines to the indent of the
214 // FIXME: Can/should this be done in the UnwrappedLineParser?
215 void setCommentLineLevels(SmallVectorImpl
<AnnotatedLine
*> &Lines
) const;
217 void annotate(AnnotatedLine
&Line
);
218 void calculateFormattingInformation(AnnotatedLine
&Line
) const;
221 /// Calculate the penalty for splitting before \c Tok.
222 unsigned splitPenalty(const AnnotatedLine
&Line
, const FormatToken
&Tok
,
223 bool InFunctionDecl
) const;
225 bool spaceRequiredBeforeParens(const FormatToken
&Right
) const;
227 bool spaceRequiredBetween(const AnnotatedLine
&Line
, const FormatToken
&Left
,
228 const FormatToken
&Right
) const;
230 bool spaceRequiredBefore(const AnnotatedLine
&Line
,
231 const FormatToken
&Right
) const;
233 bool mustBreakBefore(const AnnotatedLine
&Line
,
234 const FormatToken
&Right
) const;
236 bool canBreakBefore(const AnnotatedLine
&Line
,
237 const FormatToken
&Right
) const;
239 bool mustBreakForReturnType(const AnnotatedLine
&Line
) const;
241 void printDebugInfo(const AnnotatedLine
&Line
) const;
243 void calculateUnbreakableTailLengths(AnnotatedLine
&Line
) const;
245 void calculateArrayInitializerColumnList(AnnotatedLine
&Line
) const;
247 FormatToken
*calculateInitializerColumnList(AnnotatedLine
&Line
,
248 FormatToken
*CurrentToken
,
249 unsigned Depth
) const;
250 FormatStyle::PointerAlignmentStyle
251 getTokenReferenceAlignment(const FormatToken
&PointerOrReference
) const;
253 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(
254 const FormatToken
&PointerOrReference
) const;
256 const FormatStyle
&Style
;
258 const AdditionalKeywords
&Keywords
;
260 SmallVector
<ScopeType
> Scopes
;
263 } // end namespace format
264 } // end namespace clang