[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / clang / lib / Format / Macros.h
blobb26799c20f8c42c98299a3e657c15f68147d001b
1 //===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the main building blocks of macro support in
11 /// clang-format.
12 ///
13 /// In order to not violate the requirement that clang-format can format files
14 /// in isolation, clang-format's macro support uses expansions users provide
15 /// as part of clang-format's style configuration.
16 ///
17 /// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18 /// one level of expansion (\see MacroExpander for a full description of what
19 /// is supported).
20 ///
21 /// As part of parsing, clang-format uses the MacroExpander to expand the
22 /// spelled token streams into expanded token streams when it encounters a
23 /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24 /// from the expanded token stream.
25 /// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26 /// matches the spelled token stream into unwrapped lines that best resemble the
27 /// structure of the expanded unwrapped lines. These reconstructed unwrapped
28 /// lines are aliasing the tokens in the expanded token stream, so that token
29 /// annotations will be reused when formatting the spelled macro calls.
30 ///
31 /// When formatting, clang-format annotates and formats the expanded unwrapped
32 /// lines first, determining the token types. Next, it formats the spelled
33 /// unwrapped lines, keeping the token types fixed, while allowing other
34 /// formatting decisions to change.
35 ///
36 //===----------------------------------------------------------------------===//
38 #ifndef CLANG_LIB_FORMAT_MACROS_H
39 #define CLANG_LIB_FORMAT_MACROS_H
41 #include <list>
42 #include <map>
43 #include <string>
44 #include <vector>
46 #include "FormatToken.h"
47 #include "llvm/ADT/ArrayRef.h"
48 #include "llvm/ADT/DenseMap.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/StringRef.h"
52 namespace clang {
53 namespace format {
55 struct UnwrappedLine;
56 struct UnwrappedLineNode;
58 /// Takes a set of macro definitions as strings and allows expanding calls to
59 /// those macros.
60 ///
61 /// For example:
62 /// Definition: A(x, y)=x + y
63 /// Call : A(int a = 1, 2)
64 /// Expansion : int a = 1 + 2
65 ///
66 /// Expansion does not check arity of the definition.
67 /// If fewer arguments than expected are provided, the remaining parameters
68 /// are considered empty:
69 /// Call : A(a)
70 /// Expansion: a +
71 /// If more arguments than expected are provided, they will be discarded.
72 ///
73 /// The expander does not support:
74 /// - recursive expansion
75 /// - stringification
76 /// - concatenation
77 /// - variadic macros
78 ///
79 /// Furthermore, only a single expansion of each macro argument is supported,
80 /// so that we cannot get conflicting formatting decisions from different
81 /// expansions.
82 /// Definition: A(x)=x+x
83 /// Call : A(id)
84 /// Expansion : id+x
85 ///
86 class MacroExpander {
87 public:
88 using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>;
90 /// Construct a macro expander from a set of macro definitions.
91 /// Macro definitions must be encoded as UTF-8.
92 ///
93 /// Each entry in \p Macros must conform to the following simple
94 /// macro-definition language:
95 /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
96 /// <params> ::= <id-list> | ""
97 /// <id-list> ::= <id> | <id> "," <params>
98 /// <expansion> ::= "=" <tail> | <eof>
99 /// <tail> ::= <tok> <tail> | <eof>
101 /// Macros that cannot be parsed will be silently discarded.
103 MacroExpander(const std::vector<std::string> &Macros,
104 clang::SourceManager &SourceMgr, const FormatStyle &Style,
105 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
106 IdentifierTable &IdentTable);
107 ~MacroExpander();
109 /// Returns whether a macro \p Name is defined.
110 bool defined(llvm::StringRef Name) const;
112 /// Returns whether the macro has no arguments and should not consume
113 /// subsequent parentheses.
114 bool objectLike(llvm::StringRef Name) const;
116 /// Returns the expanded stream of format tokens for \p ID, where
117 /// each element in \p Args is a positional argument to the macro call.
118 llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
119 ArgsList Args) const;
121 private:
122 struct Definition;
123 class DefinitionParser;
125 void parseDefinition(const std::string &Macro);
127 clang::SourceManager &SourceMgr;
128 const FormatStyle &Style;
129 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
130 IdentifierTable &IdentTable;
131 SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
132 llvm::StringMap<Definition> Definitions;
135 /// Converts a sequence of UnwrappedLines containing expanded macros into a
136 /// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
137 /// broken into child lines, in a way that best conveys the structure of the
138 /// expanded code.
140 /// In the simplest case, a spelled UnwrappedLine contains one macro, and after
141 /// expanding it we have one expanded UnwrappedLine. In general, macro
142 /// expansions can span UnwrappedLines, and multiple macros can contribute
143 /// tokens to the same line. We keep consuming expanded lines until:
144 /// * all expansions that started have finished (we're not chopping any macros
145 /// in half)
146 /// * *and* we've reached the end of a *spelled* unwrapped line.
148 /// A single UnwrappedLine represents this chunk of code.
150 /// After this point, the state of the spelled/expanded stream is "in sync"
151 /// (both at the start of an UnwrappedLine, with no macros open), so the
152 /// Unexpander can be thrown away and parsing can continue.
154 /// Given a mapping from the macro name identifier token in the macro call
155 /// to the tokens of the macro call, for example:
156 /// CLASSA -> CLASSA({public: void x();})
158 /// When getting the formatted lines of the expansion via the \c addLine method
159 /// (each '->' specifies a call to \c addLine ):
160 /// -> class A {
161 /// -> public:
162 /// -> void x();
163 /// -> };
165 /// Creates the tree of unwrapped lines containing the macro call tokens so that
166 /// the macro call tokens fit the semantic structure of the expanded formatted
167 /// lines:
168 /// -> CLASSA({
169 /// -> public:
170 /// -> void x();
171 /// -> })
172 class MacroCallReconstructor {
173 public:
174 /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
175 /// \p Level, using the map from name identifier token to the corresponding
176 /// tokens of the spelled macro call.
177 MacroCallReconstructor(
178 unsigned Level,
179 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
180 &ActiveExpansions);
182 /// For the given \p Line, match all occurences of tokens expanded from a
183 /// macro to unwrapped lines in the spelled macro call so that the resulting
184 /// tree of unwrapped lines best resembles the structure of unwrapped lines
185 /// passed in via \c addLine.
186 void addLine(const UnwrappedLine &Line);
188 /// Check whether at the current state there is no open macro expansion
189 /// that needs to be processed to finish an macro call.
190 /// Only when \c finished() is true, \c takeResult() can be called to retrieve
191 /// the resulting \c UnwrappedLine.
192 /// If there are multiple subsequent macro calls within an unwrapped line in
193 /// the spelled token stream, the calling code may also continue to call
194 /// \c addLine() when \c finished() is true.
195 bool finished() const { return ActiveExpansions.empty(); }
197 /// Retrieve the formatted \c UnwrappedLine containing the orginal
198 /// macro calls, formatted according to the expanded token stream received
199 /// via \c addLine().
200 /// Generally, this line tries to have the same structure as the expanded,
201 /// formatted unwrapped lines handed in via \c addLine(), with the exception
202 /// that for multiple top-level lines, each subsequent line will be the
203 /// child of the last token in its predecessor. This representation is chosen
204 /// because it is a precondition to the formatter that we get what looks like
205 /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
207 /// If a token in a macro argument is a child of a token in the expansion,
208 /// the parent will be the corresponding token in the macro call.
209 /// For example:
210 /// #define C(a, b) class C { a b
211 /// C(int x;, int y;)
212 /// would expand to
213 /// class C { int x; int y;
214 /// where in a formatted line "int x;" and "int y;" would both be new separate
215 /// lines.
217 /// In the result, "int x;" will be a child of the opening parenthesis in "C("
218 /// and "int y;" will be a child of the "," token:
219 /// C (
220 /// \- int x;
221 /// ,
222 /// \- int y;
223 /// )
224 UnwrappedLine takeResult() &&;
226 private:
227 void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
228 void prepareParent(FormatToken *ExpandedParent, bool First);
229 FormatToken *getParentInResult(FormatToken *Parent);
230 void reconstruct(FormatToken *Token);
231 void startReconstruction(FormatToken *Token);
232 bool reconstructActiveCallUntil(FormatToken *Token);
233 void endReconstruction(FormatToken *Token);
234 bool processNextReconstructed();
235 void finalize();
237 struct ReconstructedLine;
239 void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
240 UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
241 void debug(const ReconstructedLine &Line, int Level);
242 ReconstructedLine &parentLine();
243 ReconstructedLine *currentLine();
244 void debugParentMap() const;
246 #ifndef NDEBUG
247 enum ReconstructorState {
248 Start, // No macro expansion was found in the input yet.
249 InProgress, // During a macro reconstruction.
250 Finalized, // Past macro reconstruction, the result is finalized.
252 ReconstructorState State = Start;
253 #endif
255 // Node in which we build up the resulting unwrapped line; this type is
256 // analogous to UnwrappedLineNode.
257 struct LineNode {
258 LineNode() = default;
259 LineNode(FormatToken *Tok) : Tok(Tok) {}
260 FormatToken *Tok = nullptr;
261 llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children;
264 // Line in which we build up the resulting unwrapped line.
265 // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
266 // instead of rolling our own type.
267 struct ReconstructedLine {
268 llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
271 // The line in which we collect the resulting reconstructed output.
272 // To reduce special cases in the algorithm, the first level of the line
273 // contains a single null token that has the reconstructed incoming
274 // lines as children.
275 // In the end, we stich the lines together so that each subsequent line
276 // is a child of the last token of the previous line. This is necessary
277 // in order to format the overall expression as a single logical line -
278 // if we created separate lines, we'd format them with their own top-level
279 // indent depending on the semantic structure, which is not desired.
280 ReconstructedLine Result;
282 // Stack of currently "open" lines, where each line's predecessor's last
283 // token is the parent token for that line.
284 llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines;
286 // Maps from the expanded token to the token that takes its place in the
287 // reconstructed token stream in terms of parent-child relationships.
288 // Note that it might take multiple steps to arrive at the correct
289 // parent in the output.
290 // Given: #define C(a, b) []() { a; b; }
291 // And a call: C(f(), g())
292 // The structure in the incoming formatted unwrapped line will be:
293 // []() {
294 // |- f();
295 // \- g();
296 // }
297 // with f and g being children of the opening brace.
298 // In the reconstructed call:
299 // C(f(), g())
300 // \- f()
301 // \- g()
302 // We want f to be a child of the opening parenthesis and g to be a child
303 // of the comma token in the macro call.
304 // Thus, we map
305 // { -> (
306 // and add
307 // ( -> ,
308 // once we're past the comma in the reconstruction.
309 llvm::DenseMap<FormatToken *, FormatToken *>
310 SpelledParentToReconstructedParent;
312 // Keeps track of a single expansion while we're reconstructing tokens it
313 // generated.
314 struct Expansion {
315 // The identifier token of the macro call.
316 FormatToken *ID;
317 // Our current position in the reconstruction.
318 std::list<UnwrappedLineNode>::iterator SpelledI;
319 // The end of the reconstructed token sequence.
320 std::list<UnwrappedLineNode>::iterator SpelledE;
323 // Stack of macro calls for which we're in the middle of an expansion.
324 llvm::SmallVector<Expansion> ActiveExpansions;
326 struct MacroCallState {
327 MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
328 FormatToken *MacroCallLParen);
330 ReconstructedLine *Line;
332 // The last token in the parent line or expansion, or nullptr if the macro
333 // expansion is on a top-level line.
335 // For example, in the macro call:
336 // auto f = []() { ID(1); };
337 // The MacroCallState for ID will have '{' as ParentLastToken.
339 // In the macro call:
340 // ID(ID(void f()));
341 // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
342 // while the MacroCallState for the inner ID will have the '(' of the outer
343 // ID as ParentLastToken.
345 // In the macro call:
346 // ID2(a, ID(b));
347 // The MacroCallState of ID will have ',' as ParentLastToken.
348 FormatToken *ParentLastToken;
350 // The l_paren of this MacroCallState's macro call.
351 FormatToken *MacroCallLParen;
354 // Keeps track of the lines into which the opening brace/parenthesis &
355 // argument separating commas for each level in the macro call go in order to
356 // put the corresponding closing brace/parenthesis into the same line in the
357 // output and keep track of which parents in the expanded token stream map to
358 // which tokens in the reconstructed stream.
359 // When an opening brace/parenthesis has children, we want the structure of
360 // the output line to be:
361 // |- MACRO
362 // |- (
363 // | \- <argument>
364 // |- ,
365 // | \- <argument>
366 // \- )
367 llvm::SmallVector<MacroCallState> MacroCallStructure;
369 // Level the generated UnwrappedLine will be at.
370 const unsigned Level;
372 // Maps from identifier of the macro call to an unwrapped line containing
373 // all tokens of the macro call.
374 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
375 &IdToReconstructed;
378 } // namespace format
379 } // namespace clang
381 #endif