1 //===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "DirectiveTree.h"
10 #include "clang/Basic/IdentifierTable.h"
11 #include "clang/Basic/TokenKinds.h"
12 #include "llvm/Support/FormatVariadic.h"
20 class DirectiveParser
{
22 explicit DirectiveParser(const TokenStream
&Code
)
23 : Code(Code
), Tok(&Code
.front()) {}
24 void parse(DirectiveTree
*Result
) { parse(Result
, /*TopLevel=*/true); }
27 // Roles that a directive might take within a conditional block.
28 enum class Cond
{ None
, If
, Else
, End
};
29 static Cond
classifyDirective(tok::PPKeywordKind K
) {
31 case clang::tok::pp_if
:
32 case clang::tok::pp_ifdef
:
33 case clang::tok::pp_ifndef
:
35 case clang::tok::pp_elif
:
36 case clang::tok::pp_elifdef
:
37 case clang::tok::pp_elifndef
:
38 case clang::tok::pp_else
:
40 case clang::tok::pp_endif
:
47 // Parses tokens starting at Tok into Tree.
48 // If we reach an End or Else directive that ends Tree, returns it.
49 // If TopLevel is true, then we do not expect End and always return
51 std::optional
<DirectiveTree::Directive
> parse(DirectiveTree
*Tree
,
53 auto StartsDirective
=
54 [&, AllowDirectiveAt((const Token
*)nullptr)]() mutable {
55 if (Tok
->flag(LexFlags::StartsPPLine
)) {
56 // If we considered a comment at the start of a PP-line, it doesn't
57 // start a directive but the directive can still start after it.
58 if (Tok
->Kind
== tok::comment
)
59 AllowDirectiveAt
= Tok
+ 1;
60 return Tok
->Kind
== tok::hash
;
62 return Tok
->Kind
== tok::hash
&& AllowDirectiveAt
== Tok
;
64 // Each iteration adds one chunk (or returns, if we see #endif).
65 while (Tok
->Kind
!= tok::eof
) {
66 // If there's no directive here, we have a code chunk.
67 if (!StartsDirective()) {
68 const Token
*Start
= Tok
;
71 while (Tok
->Kind
!= tok::eof
&& !StartsDirective());
72 Tree
->Chunks
.push_back(DirectiveTree::Code
{
73 Token::Range
{Code
.index(*Start
), Code
.index(*Tok
)}});
77 // We have some kind of directive.
78 DirectiveTree::Directive Directive
;
79 parseDirective(&Directive
);
80 Cond Kind
= classifyDirective(Directive
.Kind
);
81 if (Kind
== Cond::If
) {
82 // #if or similar, starting a nested conditional block.
83 DirectiveTree::Conditional Conditional
;
84 Conditional
.Branches
.emplace_back();
85 Conditional
.Branches
.back().first
= std::move(Directive
);
86 parseConditional(&Conditional
);
87 Tree
->Chunks
.push_back(std::move(Conditional
));
88 } else if ((Kind
== Cond::Else
|| Kind
== Cond::End
) && !TopLevel
) {
89 // #endif or similar, ending this PStructure scope.
90 // (#endif is unexpected at the top level, treat as simple directive).
91 return std::move(Directive
);
93 // #define or similar, a simple directive at the current scope.
94 Tree
->Chunks
.push_back(std::move(Directive
));
100 // Parse the rest of a conditional section, after seeing the If directive.
101 // Returns after consuming the End directive.
102 void parseConditional(DirectiveTree::Conditional
*C
) {
103 assert(C
->Branches
.size() == 1 &&
104 C
->Branches
.front().second
.Chunks
.empty() &&
105 "Should be ready to parse first branch body");
106 while (Tok
->Kind
!= tok::eof
) {
107 auto Terminator
= parse(&C
->Branches
.back().second
, /*TopLevel=*/false);
109 assert(Tok
->Kind
== tok::eof
&& "gave up parsing before eof?");
110 C
->End
.Tokens
= Token::Range::emptyAt(Code
.index(*Tok
));
113 if (classifyDirective(Terminator
->Kind
) == Cond::End
) {
114 C
->End
= std::move(*Terminator
);
117 assert(classifyDirective(Terminator
->Kind
) == Cond::Else
&&
118 "ended branch unexpectedly");
119 C
->Branches
.emplace_back();
120 C
->Branches
.back().first
= std::move(*Terminator
);
124 // Parse a directive. Tok is the hash.
125 void parseDirective(DirectiveTree::Directive
*D
) {
126 assert(Tok
->Kind
== tok::hash
);
128 // Directive spans from the hash until the end of line or file.
129 const Token
*Begin
= Tok
++;
130 while (Tok
->Kind
!= tok::eof
&& !Tok
->flag(LexFlags::StartsPPLine
))
132 ArrayRef
<Token
> Tokens
{Begin
, Tok
};
133 D
->Tokens
= {Code
.index(*Tokens
.begin()), Code
.index(*Tokens
.end())};
135 // Directive name is the first non-comment token after the hash.
136 Tokens
= Tokens
.drop_front().drop_while(
137 [](const Token
&T
) { return T
.Kind
== tok::comment
; });
139 D
->Kind
= PPKeywords
.get(Tokens
.front().text()).getPPKeywordID();
142 const TokenStream
&Code
;
144 clang::IdentifierTable PPKeywords
;
148 llvm::raw_ostream
&OS
;
151 Dumper(llvm::raw_ostream
& OS
) : OS(OS
) {}
152 void operator()(const DirectiveTree
& Tree
) {
153 for (const auto& Chunk
: Tree
.Chunks
)
154 std::visit(*this, Chunk
);
156 void operator()(const DirectiveTree::Conditional
&Conditional
) {
157 for (unsigned I
= 0; I
< Conditional
.Branches
.size(); ++I
) {
158 const auto &Branch
= Conditional
.Branches
[I
];
159 (*this)(Branch
.first
, Conditional
.Taken
== I
);
161 (*this)(Branch
.second
);
164 (*this)(Conditional
.End
);
166 void operator()(const DirectiveTree::Directive
&Directive
,
167 bool Taken
= false) {
168 OS
.indent(Indent
) << llvm::formatv(
169 "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive
.Kind
),
170 Directive
.Tokens
.size(), Taken
? " TAKEN" : "");
172 void operator()(const DirectiveTree::Code
&Code
) {
173 OS
.indent(Indent
) << llvm::formatv("code ({0} tokens)\n",
179 DirectiveTree
DirectiveTree::parse(const TokenStream
&Code
) {
180 DirectiveTree Result
;
181 DirectiveParser(Code
).parse(&Result
);
185 // Define operator<< in terms of dump() functions above.
186 #define OSTREAM_DUMP(Type) \
187 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \
191 OSTREAM_DUMP(DirectiveTree
)
192 OSTREAM_DUMP(DirectiveTree::Directive
)
193 OSTREAM_DUMP(DirectiveTree::Conditional
)
194 OSTREAM_DUMP(DirectiveTree::Code
)
198 // Makes choices about conditional branches.
200 // Generally it tries to maximize the amount of useful code we see.
202 // Caveat: each conditional is evaluated independently. Consider this code:
204 // bool isWindows = true;
207 // bool isWindows = false;
209 // We take both branches and define isWindows twice. We could track more state
210 // in order to produce a consistent view, but this is complex.
211 class BranchChooser
{
213 BranchChooser(const TokenStream
&Code
) : Code(Code
) {}
215 // Describes code seen by making particular branch choices. Higher is better.
217 int Tokens
= 0; // excluding comments and directives
219 int Errors
= 0; // #error directives
221 bool operator>(const Score
&Other
) const {
222 // Seeing errors is bad, other things are good.
223 return std::make_tuple(-Errors
, Tokens
, Directives
) >
224 std::make_tuple(-Other
.Errors
, Other
.Tokens
, Other
.Directives
);
227 Score
&operator+=(const Score
&Other
) {
228 Tokens
+= Other
.Tokens
;
229 Directives
+= Other
.Directives
;
230 Errors
+= Other
.Errors
;
235 Score
operator()(DirectiveTree::Code
&C
) {
237 for (const Token
&T
: Code
.tokens(C
.Tokens
))
238 if (T
.Kind
!= tok::comment
)
243 Score
operator()(DirectiveTree::Directive
&D
) {
246 S
.Errors
= D
.Kind
== tok::pp_error
;
250 Score
operator()(DirectiveTree::Conditional
&C
) {
252 bool MayTakeTrivial
= true;
253 bool TookTrivial
= false;
255 for (unsigned I
= 0; I
< C
.Branches
.size(); ++I
) {
256 // Walk the branch to make its nested choices in any case.
257 Score BranchScore
= walk(C
.Branches
[I
].second
);
258 // If we already took an #if 1, don't consider any other branches.
261 // Is this a trivial #if 0 or #if 1?
262 if (auto TriviallyTaken
= isTakenWhenReached(C
.Branches
[I
].first
)) {
263 if (!*TriviallyTaken
)
264 continue; // Don't consider #if 0 even if it scores well.
268 // After a nontrivial condition, #elif 1 isn't guaranteed taken.
269 MayTakeTrivial
= false;
271 // Is this the best branch so far? (Including if it's #if 1).
272 if (TookTrivial
|| !C
.Taken
|| BranchScore
> Best
) {
279 Score
walk(DirectiveTree
&M
) {
281 for (auto &C
: M
.Chunks
)
282 S
+= std::visit(*this, C
);
287 // Return true if the directive starts an always-taken conditional branch,
288 // false if the branch is never taken, and std::nullopt otherwise.
289 std::optional
<bool> isTakenWhenReached(const DirectiveTree::Directive
&Dir
) {
291 case clang::tok::pp_if
:
292 case clang::tok::pp_elif
:
293 break; // handled below
294 case clang::tok::pp_else
:
296 default: // #ifdef etc
300 const auto &Tokens
= Code
.tokens(Dir
.Tokens
);
301 assert(!Tokens
.empty() && Tokens
.front().Kind
== tok::hash
);
302 const Token
&Name
= Tokens
.front().nextNC();
303 const Token
&Value
= Name
.nextNC();
304 // Does the condition consist of exactly one token?
305 if (&Value
>= Tokens
.end() || &Value
.nextNC() < Tokens
.end())
307 return llvm::StringSwitch
<std::optional
<bool>>(Value
.text())
308 .Cases("true", "1", true)
309 .Cases("false", "0", false)
310 .Default(std::nullopt
);
313 const TokenStream
&Code
;
318 void chooseConditionalBranches(DirectiveTree
&Tree
, const TokenStream
&Code
) {
319 BranchChooser
{Code
}.walk(Tree
);
324 const TokenStream
&In
;
328 Preprocessor(const TokenStream
&In
, TokenStream
&Out
) : In(In
), Out(Out
) {}
329 ~Preprocessor() { Out
.finalize(); }
331 void walk(const DirectiveTree
&T
) {
332 for (const auto &C
: T
.Chunks
)
333 std::visit(*this, C
);
336 void operator()(const DirectiveTree::Code
&C
) {
337 for (const auto &Tok
: In
.tokens(C
.Tokens
))
341 void operator()(const DirectiveTree::Directive
&) {}
343 void operator()(const DirectiveTree::Conditional
&C
) {
345 walk(C
.Branches
[*C
.Taken
].second
);
350 TokenStream
DirectiveTree::stripDirectives(const TokenStream
&In
) const {
352 Preprocessor(In
, Out
).walk(*this);
356 } // namespace clangd