[Reland][Runtimes] Merge 'compile_commands.json' files from runtimes build (#116303)
[llvm-project.git] / clang-tools-extra / clangd / support / DirectiveTree.cpp
blobd25da111681afc49bbc1004ec0016c3e59147cb5
1 //===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "DirectiveTree.h"
10 #include "clang/Basic/IdentifierTable.h"
11 #include "clang/Basic/TokenKinds.h"
12 #include "llvm/Support/FormatVariadic.h"
13 #include <optional>
14 #include <variant>
16 namespace clang {
17 namespace clangd {
18 namespace {
20 class DirectiveParser {
21 public:
22 explicit DirectiveParser(const TokenStream &Code)
23 : Code(Code), Tok(&Code.front()) {}
24 void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); }
26 private:
27 // Roles that a directive might take within a conditional block.
28 enum class Cond { None, If, Else, End };
29 static Cond classifyDirective(tok::PPKeywordKind K) {
30 switch (K) {
31 case clang::tok::pp_if:
32 case clang::tok::pp_ifdef:
33 case clang::tok::pp_ifndef:
34 return Cond::If;
35 case clang::tok::pp_elif:
36 case clang::tok::pp_elifdef:
37 case clang::tok::pp_elifndef:
38 case clang::tok::pp_else:
39 return Cond::Else;
40 case clang::tok::pp_endif:
41 return Cond::End;
42 default:
43 return Cond::None;
47 // Parses tokens starting at Tok into Tree.
48 // If we reach an End or Else directive that ends Tree, returns it.
49 // If TopLevel is true, then we do not expect End and always return
50 // std::nullopt.
51 std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree,
52 bool TopLevel) {
53 auto StartsDirective =
54 [&, AllowDirectiveAt((const Token *)nullptr)]() mutable {
55 if (Tok->flag(LexFlags::StartsPPLine)) {
56 // If we considered a comment at the start of a PP-line, it doesn't
57 // start a directive but the directive can still start after it.
58 if (Tok->Kind == tok::comment)
59 AllowDirectiveAt = Tok + 1;
60 return Tok->Kind == tok::hash;
62 return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
64 // Each iteration adds one chunk (or returns, if we see #endif).
65 while (Tok->Kind != tok::eof) {
66 // If there's no directive here, we have a code chunk.
67 if (!StartsDirective()) {
68 const Token *Start = Tok;
70 ++Tok;
71 while (Tok->Kind != tok::eof && !StartsDirective());
72 Tree->Chunks.push_back(DirectiveTree::Code{
73 Token::Range{Code.index(*Start), Code.index(*Tok)}});
74 continue;
77 // We have some kind of directive.
78 DirectiveTree::Directive Directive;
79 parseDirective(&Directive);
80 Cond Kind = classifyDirective(Directive.Kind);
81 if (Kind == Cond::If) {
82 // #if or similar, starting a nested conditional block.
83 DirectiveTree::Conditional Conditional;
84 Conditional.Branches.emplace_back();
85 Conditional.Branches.back().first = std::move(Directive);
86 parseConditional(&Conditional);
87 Tree->Chunks.push_back(std::move(Conditional));
88 } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
89 // #endif or similar, ending this PStructure scope.
90 // (#endif is unexpected at the top level, treat as simple directive).
91 return std::move(Directive);
92 } else {
93 // #define or similar, a simple directive at the current scope.
94 Tree->Chunks.push_back(std::move(Directive));
97 return std::nullopt;
100 // Parse the rest of a conditional section, after seeing the If directive.
101 // Returns after consuming the End directive.
102 void parseConditional(DirectiveTree::Conditional *C) {
103 assert(C->Branches.size() == 1 &&
104 C->Branches.front().second.Chunks.empty() &&
105 "Should be ready to parse first branch body");
106 while (Tok->Kind != tok::eof) {
107 auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false);
108 if (!Terminator) {
109 assert(Tok->Kind == tok::eof && "gave up parsing before eof?");
110 C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok));
111 return;
113 if (classifyDirective(Terminator->Kind) == Cond::End) {
114 C->End = std::move(*Terminator);
115 return;
117 assert(classifyDirective(Terminator->Kind) == Cond::Else &&
118 "ended branch unexpectedly");
119 C->Branches.emplace_back();
120 C->Branches.back().first = std::move(*Terminator);
124 // Parse a directive. Tok is the hash.
125 void parseDirective(DirectiveTree::Directive *D) {
126 assert(Tok->Kind == tok::hash);
128 // Directive spans from the hash until the end of line or file.
129 const Token *Begin = Tok++;
130 while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine))
131 ++Tok;
132 ArrayRef<Token> Tokens{Begin, Tok};
133 D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())};
135 // Directive name is the first non-comment token after the hash.
136 Tokens = Tokens.drop_front().drop_while(
137 [](const Token &T) { return T.Kind == tok::comment; });
138 if (!Tokens.empty())
139 D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
142 const TokenStream &Code;
143 const Token *Tok;
144 clang::IdentifierTable PPKeywords;
147 struct Dumper {
148 llvm::raw_ostream &OS;
149 unsigned Indent = 0;
151 Dumper(llvm::raw_ostream& OS) : OS(OS) {}
152 void operator()(const DirectiveTree& Tree) {
153 for (const auto& Chunk : Tree.Chunks)
154 std::visit(*this, Chunk);
156 void operator()(const DirectiveTree::Conditional &Conditional) {
157 for (unsigned I = 0; I < Conditional.Branches.size(); ++I) {
158 const auto &Branch = Conditional.Branches[I];
159 (*this)(Branch.first, Conditional.Taken == I);
160 Indent += 2;
161 (*this)(Branch.second);
162 Indent -= 2;
164 (*this)(Conditional.End);
166 void operator()(const DirectiveTree::Directive &Directive,
167 bool Taken = false) {
168 OS.indent(Indent) << llvm::formatv(
169 "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind),
170 Directive.Tokens.size(), Taken ? " TAKEN" : "");
172 void operator()(const DirectiveTree::Code &Code) {
173 OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n",
174 Code.Tokens.size());
177 } // namespace
179 DirectiveTree DirectiveTree::parse(const TokenStream &Code) {
180 DirectiveTree Result;
181 DirectiveParser(Code).parse(&Result);
182 return Result;
185 // Define operator<< in terms of dump() functions above.
186 #define OSTREAM_DUMP(Type) \
187 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \
188 Dumper{OS}(T); \
189 return OS; \
191 OSTREAM_DUMP(DirectiveTree)
192 OSTREAM_DUMP(DirectiveTree::Directive)
193 OSTREAM_DUMP(DirectiveTree::Conditional)
194 OSTREAM_DUMP(DirectiveTree::Code)
195 #undef OSTREAM_DUMP
197 namespace {
198 // Makes choices about conditional branches.
200 // Generally it tries to maximize the amount of useful code we see.
202 // Caveat: each conditional is evaluated independently. Consider this code:
203 // #ifdef WINDOWS
204 // bool isWindows = true;
205 // #endif
206 // #ifndef WINDOWS
207 // bool isWindows = false;
208 // #endif
209 // We take both branches and define isWindows twice. We could track more state
210 // in order to produce a consistent view, but this is complex.
211 class BranchChooser {
212 public:
213 BranchChooser(const TokenStream &Code) : Code(Code) {}
215 // Describes code seen by making particular branch choices. Higher is better.
216 struct Score {
217 int Tokens = 0; // excluding comments and directives
218 int Directives = 0;
219 int Errors = 0; // #error directives
221 bool operator>(const Score &Other) const {
222 // Seeing errors is bad, other things are good.
223 return std::make_tuple(-Errors, Tokens, Directives) >
224 std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives);
227 Score &operator+=(const Score &Other) {
228 Tokens += Other.Tokens;
229 Directives += Other.Directives;
230 Errors += Other.Errors;
231 return *this;
235 Score operator()(DirectiveTree::Code &C) {
236 Score S;
237 for (const Token &T : Code.tokens(C.Tokens))
238 if (T.Kind != tok::comment)
239 ++S.Tokens;
240 return S;
243 Score operator()(DirectiveTree::Directive &D) {
244 Score S;
245 S.Directives = 1;
246 S.Errors = D.Kind == tok::pp_error;
247 return S;
250 Score operator()(DirectiveTree::Conditional &C) {
251 Score Best;
252 bool MayTakeTrivial = true;
253 bool TookTrivial = false;
255 for (unsigned I = 0; I < C.Branches.size(); ++I) {
256 // Walk the branch to make its nested choices in any case.
257 Score BranchScore = walk(C.Branches[I].second);
258 // If we already took an #if 1, don't consider any other branches.
259 if (TookTrivial)
260 continue;
261 // Is this a trivial #if 0 or #if 1?
262 if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) {
263 if (!*TriviallyTaken)
264 continue; // Don't consider #if 0 even if it scores well.
265 if (MayTakeTrivial)
266 TookTrivial = true;
267 } else {
268 // After a nontrivial condition, #elif 1 isn't guaranteed taken.
269 MayTakeTrivial = false;
271 // Is this the best branch so far? (Including if it's #if 1).
272 if (TookTrivial || !C.Taken || BranchScore > Best) {
273 Best = BranchScore;
274 C.Taken = I;
277 return Best;
279 Score walk(DirectiveTree &M) {
280 Score S;
281 for (auto &C : M.Chunks)
282 S += std::visit(*this, C);
283 return S;
286 private:
287 // Return true if the directive starts an always-taken conditional branch,
288 // false if the branch is never taken, and std::nullopt otherwise.
289 std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) {
290 switch (Dir.Kind) {
291 case clang::tok::pp_if:
292 case clang::tok::pp_elif:
293 break; // handled below
294 case clang::tok::pp_else:
295 return true;
296 default: // #ifdef etc
297 return std::nullopt;
300 const auto &Tokens = Code.tokens(Dir.Tokens);
301 assert(!Tokens.empty() && Tokens.front().Kind == tok::hash);
302 const Token &Name = Tokens.front().nextNC();
303 const Token &Value = Name.nextNC();
304 // Does the condition consist of exactly one token?
305 if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end())
306 return std::nullopt;
307 return llvm::StringSwitch<std::optional<bool>>(Value.text())
308 .Cases("true", "1", true)
309 .Cases("false", "0", false)
310 .Default(std::nullopt);
313 const TokenStream &Code;
316 } // namespace
318 void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) {
319 BranchChooser{Code}.walk(Tree);
322 namespace {
323 class Preprocessor {
324 const TokenStream &In;
325 TokenStream &Out;
327 public:
328 Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
329 ~Preprocessor() { Out.finalize(); }
331 void walk(const DirectiveTree &T) {
332 for (const auto &C : T.Chunks)
333 std::visit(*this, C);
336 void operator()(const DirectiveTree::Code &C) {
337 for (const auto &Tok : In.tokens(C.Tokens))
338 Out.push(Tok);
341 void operator()(const DirectiveTree::Directive &) {}
343 void operator()(const DirectiveTree::Conditional &C) {
344 if (C.Taken)
345 walk(C.Branches[*C.Taken].second);
348 } // namespace
350 TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
351 TokenStream Out;
352 Preprocessor(In, Out).walk(*this);
353 return Out;
356 } // namespace clangd
357 } // namespace clang