1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
18 #include "UnwrappedLineParser.h"
25 // Contains public/private/protected followed by TT_InheritanceColon.
28 LT_ObjCDecl
, // An @interface, @implementation, or @protocol line.
30 LT_ObjCProperty
, // An @property line.
32 LT_PreprocessorDirective
,
33 LT_VirtualFunctionDecl
,
34 LT_ArrayOfStructInitializer
,
35 LT_CommentAbovePPDirective
,
39 // Contained in child block.
41 // Contained in class declaration/definition.
43 // Contained within other scope block (function, loop, if/else, etc).
49 AnnotatedLine(const UnwrappedLine
&Line
)
50 : First(Line
.Tokens
.front().Tok
), Type(LT_Other
), Level(Line
.Level
),
51 PPLevel(Line
.PPLevel
),
52 MatchingOpeningBlockLineIndex(Line
.MatchingOpeningBlockLineIndex
),
53 MatchingClosingBlockLineIndex(Line
.MatchingClosingBlockLineIndex
),
54 InPPDirective(Line
.InPPDirective
),
55 InPragmaDirective(Line
.InPragmaDirective
),
56 InMacroBody(Line
.InMacroBody
),
57 MustBeDeclaration(Line
.MustBeDeclaration
), MightBeFunctionDecl(false),
58 IsMultiVariableDeclStmt(false), Affected(false),
59 LeadingEmptyLinesAffected(false), ChildrenAffected(false),
60 ReturnTypeWrapped(false), IsContinuation(Line
.IsContinuation
),
61 FirstStartColumn(Line
.FirstStartColumn
) {
62 assert(!Line
.Tokens
.empty());
64 // Calculate Next and Previous for all tokens. Note that we must overwrite
65 // Next and Previous for every token, as previous formatting runs might have
66 // left them in a different state.
67 First
->Previous
= nullptr;
68 FormatToken
*Current
= First
;
69 addChildren(Line
.Tokens
.front(), Current
);
70 for (const UnwrappedLineNode
&Node
: llvm::drop_begin(Line
.Tokens
)) {
71 if (Node
.Tok
->MacroParent
)
72 ContainsMacroCall
= true;
73 Current
->Next
= Node
.Tok
;
74 Node
.Tok
->Previous
= Current
;
75 Current
= Current
->Next
;
76 addChildren(Node
, Current
);
77 // FIXME: if we add children, previous will point to the token before
78 // the children; changing this requires significant changes across
85 void addChildren(const UnwrappedLineNode
&Node
, FormatToken
*Current
) {
86 Current
->Children
.clear();
87 for (const auto &Child
: Node
.Children
) {
88 Children
.push_back(new AnnotatedLine(Child
));
89 if (Children
.back()->ContainsMacroCall
)
90 ContainsMacroCall
= true;
91 Current
->Children
.push_back(Children
.back());
97 for (const auto *Child
: Children
)
98 Size
+= Child
->size();
103 for (AnnotatedLine
*Child
: Children
)
105 FormatToken
*Current
= First
;
107 Current
->Children
.clear();
108 Current
->Role
.reset();
109 Current
= Current
->Next
;
113 bool isComment() const {
114 return First
&& First
->is(tok::comment
) && !First
->getNextNonComment();
117 /// \c true if this line starts with the given tokens in order, ignoring
119 template <typename
... Ts
> bool startsWith(Ts
... Tokens
) const {
120 return First
&& First
->startsSequence(Tokens
...);
123 /// \c true if this line ends with the given tokens in reversed order,
124 /// ignoring comments.
125 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
126 /// this line is like "... T3 T2 T1".
127 template <typename
... Ts
> bool endsWith(Ts
... Tokens
) const {
128 return Last
&& Last
->endsSequence(Tokens
...);
131 /// \c true if this line looks like a function definition instead of a
132 /// function declaration. Asserts MightBeFunctionDecl.
133 bool mightBeFunctionDefinition() const {
134 assert(MightBeFunctionDecl
);
135 // Try to determine if the end of a stream of tokens is either the
136 // Definition or the Declaration for a function. It does this by looking for
137 // the ';' in foo(); and using that it ends with a ; to know this is the
138 // Definition, however the line could end with
139 // foo(); /* comment */
144 // endsWith() ignores the comment.
145 return !endsWith(tok::semi
);
148 /// \c true if this line starts a namespace definition.
149 bool startsWithNamespace() const {
150 return startsWith(tok::kw_namespace
) || startsWith(TT_NamespaceMacro
) ||
151 startsWith(tok::kw_inline
, tok::kw_namespace
) ||
152 startsWith(tok::kw_export
, tok::kw_namespace
);
155 FormatToken
*getFirstNonComment() const {
157 return First
->is(tok::comment
) ? First
->getNextNonComment() : First
;
160 FormatToken
*getLastNonComment() const {
162 return Last
->is(tok::comment
) ? Last
->getPreviousNonComment() : Last
;
168 SmallVector
<AnnotatedLine
*, 0> Children
;
173 size_t MatchingOpeningBlockLineIndex
;
174 size_t MatchingClosingBlockLineIndex
;
176 bool InPragmaDirective
;
178 bool MustBeDeclaration
;
179 bool MightBeFunctionDecl
;
180 bool IsMultiVariableDeclStmt
;
182 /// \c True if this line contains a macro call for which an expansion exists.
183 bool ContainsMacroCall
= false;
185 /// \c True if calculateFormattingInformation() has been called on this line.
186 bool Computed
= false;
188 /// \c True if this line should be formatted, i.e. intersects directly or
189 /// indirectly with one of the input ranges.
192 /// \c True if the leading empty lines of this line intersect with one of the
194 bool LeadingEmptyLinesAffected
;
196 /// \c True if one of this line's children intersects with an input range.
197 bool ChildrenAffected
;
199 /// \c True if breaking after last attribute group in function return type.
200 bool ReturnTypeWrapped
;
202 /// \c True if this line should be indented by ContinuationIndent in addition
203 /// to the normal indention level.
206 unsigned FirstStartColumn
;
210 AnnotatedLine(const AnnotatedLine
&) = delete;
211 void operator=(const AnnotatedLine
&) = delete;
214 /// Determines extra information about the tokens comprising an
215 /// \c UnwrappedLine.
216 class TokenAnnotator
{
218 TokenAnnotator(const FormatStyle
&Style
, const AdditionalKeywords
&Keywords
)
219 : Style(Style
), IsCpp(Style
.isCpp()),
220 LangOpts(getFormattingLangOpts(Style
)), Keywords(Keywords
) {
221 assert(IsCpp
== LangOpts
.CXXOperatorNames
);
224 /// Adapts the indent levels of comment lines to the indent of the
226 // FIXME: Can/should this be done in the UnwrappedLineParser?
227 void setCommentLineLevels(SmallVectorImpl
<AnnotatedLine
*> &Lines
) const;
229 void annotate(AnnotatedLine
&Line
);
230 void calculateFormattingInformation(AnnotatedLine
&Line
) const;
233 /// Calculate the penalty for splitting before \c Tok.
234 unsigned splitPenalty(const AnnotatedLine
&Line
, const FormatToken
&Tok
,
235 bool InFunctionDecl
) const;
237 bool spaceRequiredBeforeParens(const FormatToken
&Right
) const;
239 bool spaceRequiredBetween(const AnnotatedLine
&Line
, const FormatToken
&Left
,
240 const FormatToken
&Right
) const;
242 bool spaceRequiredBefore(const AnnotatedLine
&Line
,
243 const FormatToken
&Right
) const;
245 bool mustBreakBefore(const AnnotatedLine
&Line
,
246 const FormatToken
&Right
) const;
248 bool canBreakBefore(const AnnotatedLine
&Line
,
249 const FormatToken
&Right
) const;
251 bool mustBreakForReturnType(const AnnotatedLine
&Line
) const;
253 void printDebugInfo(const AnnotatedLine
&Line
) const;
255 void calculateUnbreakableTailLengths(AnnotatedLine
&Line
) const;
257 void calculateArrayInitializerColumnList(AnnotatedLine
&Line
) const;
259 FormatToken
*calculateInitializerColumnList(AnnotatedLine
&Line
,
260 FormatToken
*CurrentToken
,
261 unsigned Depth
) const;
262 FormatStyle::PointerAlignmentStyle
263 getTokenReferenceAlignment(const FormatToken
&PointerOrReference
) const;
265 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(
266 const FormatToken
&PointerOrReference
) const;
268 const FormatStyle
&Style
;
271 LangOptions LangOpts
;
273 const AdditionalKeywords
&Keywords
;
275 SmallVector
<ScopeType
> Scopes
, MacroBodyScopes
;
278 } // end namespace format
279 } // end namespace clang