1 //===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // Some compiler drivers have implicit search mechanism for system headers.
9 // This compilation database implementation tries to extract that information by
10 // executing the driver in verbose mode. gcc-compatible drivers print something
14 // #include <...> search starts here:
15 // /usr/lib/gcc/x86_64-linux-gnu/7/include
17 // /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
18 // /usr/include/x86_64-linux-gnu
20 // End of search list.
23 // This component parses that output and adds each path to command line args
24 // provided by Base, after prepending them with -isystem. Therefore current
25 // implementation would not work with a driver that is not gcc-compatible.
27 // First argument of the command line received from underlying compilation
28 // database is used as compiler driver path. Due to this arbitrary binary
29 // execution, this mechanism is not used by default and only executes binaries
30 // in the paths that are explicitly included by the user.
32 #include "CompileCommands.h"
33 #include "GlobalCompilationDatabase.h"
34 #include "support/Logger.h"
35 #include "support/Threading.h"
36 #include "support/Trace.h"
37 #include "clang/Basic/Diagnostic.h"
38 #include "clang/Basic/DiagnosticIDs.h"
39 #include "clang/Basic/DiagnosticOptions.h"
40 #include "clang/Basic/TargetInfo.h"
41 #include "clang/Basic/TargetOptions.h"
42 #include "clang/Driver/Types.h"
43 #include "clang/Tooling/CompilationDatabase.h"
44 #include "llvm/ADT/ArrayRef.h"
45 #include "llvm/ADT/DenseMap.h"
46 #include "llvm/ADT/Hashing.h"
47 #include "llvm/ADT/IntrusiveRefCntPtr.h"
48 #include "llvm/ADT/STLExtras.h"
49 #include "llvm/ADT/ScopeExit.h"
50 #include "llvm/ADT/SmallString.h"
51 #include "llvm/ADT/SmallVector.h"
52 #include "llvm/ADT/StringExtras.h"
53 #include "llvm/ADT/StringRef.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/FileSystem.h"
56 #include "llvm/Support/MemoryBuffer.h"
57 #include "llvm/Support/Path.h"
58 #include "llvm/Support/Program.h"
59 #include "llvm/Support/Regex.h"
60 #include "llvm/Support/ScopedPrinter.h"
61 #include "llvm/Support/raw_ostream.h"
72 namespace clang::clangd
{
76 std::vector
<std::string
> SystemIncludes
;
81 // Name of the driver program to execute or absolute path to it.
83 // Whether certain includes should be part of query.
84 bool StandardIncludes
= true;
85 bool StandardCXXIncludes
= true;
86 // Language to use while querying.
92 llvm::SmallVector
<std::string
> Specs
;
94 bool operator==(const DriverArgs
&RHS
) const {
95 return std::tie(Driver
, StandardIncludes
, StandardCXXIncludes
, Lang
,
96 Sysroot
, ISysroot
, Target
, Stdlib
, Specs
) ==
97 std::tie(RHS
.Driver
, RHS
.StandardIncludes
, RHS
.StandardCXXIncludes
,
98 RHS
.Lang
, RHS
.Sysroot
, RHS
.ISysroot
, RHS
.Target
, RHS
.Stdlib
,
102 DriverArgs(const tooling::CompileCommand
&Cmd
, llvm::StringRef File
) {
103 llvm::SmallString
<128> Driver(Cmd
.CommandLine
.front());
104 // Driver is a not a single executable name but instead a path (either
105 // relative or absolute).
106 if (llvm::any_of(Driver
,
107 [](char C
) { return llvm::sys::path::is_separator(C
); })) {
108 llvm::sys::fs::make_absolute(Cmd
.Directory
, Driver
);
110 this->Driver
= Driver
.str().str();
111 for (size_t I
= 0, E
= Cmd
.CommandLine
.size(); I
< E
; ++I
) {
112 llvm::StringRef Arg
= Cmd
.CommandLine
[I
];
114 // Look for Language related flags.
115 if (Arg
.consume_front("-x")) {
116 if (Arg
.empty() && I
+ 1 < E
)
117 Lang
= Cmd
.CommandLine
[I
+ 1];
121 // Look for standard/builtin includes.
122 else if (Arg
== "-nostdinc" || Arg
== "--no-standard-includes")
123 StandardIncludes
= false;
124 else if (Arg
== "-nostdinc++")
125 StandardCXXIncludes
= false;
126 // Figure out sysroot
127 else if (Arg
.consume_front("--sysroot")) {
128 if (Arg
.consume_front("="))
130 else if (Arg
.empty() && I
+ 1 < E
)
131 Sysroot
= Cmd
.CommandLine
[I
+ 1];
132 } else if (Arg
.consume_front("-isysroot")) {
133 if (Arg
.empty() && I
+ 1 < E
)
134 ISysroot
= Cmd
.CommandLine
[I
+ 1];
136 ISysroot
= Arg
.str();
137 } else if (Arg
.consume_front("--target=")) {
139 } else if (Arg
.consume_front("-target")) {
140 if (Arg
.empty() && I
+ 1 < E
)
141 Target
= Cmd
.CommandLine
[I
+ 1];
142 } else if (Arg
.consume_front("--stdlib")) {
143 if (Arg
.consume_front("="))
145 else if (Arg
.empty() && I
+ 1 < E
)
146 Stdlib
= Cmd
.CommandLine
[I
+ 1];
147 } else if (Arg
.consume_front("-stdlib=")) {
149 } else if (Arg
.starts_with("-specs=")) {
150 // clang requires a single token like `-specs=file` or `--specs=file`,
151 // but gcc will accept two tokens like `--specs file`. Since the
152 // compilation database is presumably correct, we just forward the flags
154 Specs
.push_back(Arg
.str());
155 } else if (Arg
.starts_with("--specs=")) {
156 Specs
.push_back(Arg
.str());
157 } else if (Arg
== "--specs" && I
+ 1 < E
) {
158 Specs
.push_back(Arg
.str());
159 Specs
.push_back(Cmd
.CommandLine
[I
+ 1]);
163 // Downgrade objective-c++-header (used in clangd's fallback flags for .h
164 // files) to c++-header, as some drivers may fail to run the extraction
165 // command if it contains `-xobjective-c++-header` and objective-c++ support
167 // In practice, we don't see different include paths for the two on
168 // clang+mac, which is the most common objectve-c compiler.
169 if (Lang
== "objective-c++-header") {
173 // If language is not explicit in the flags, infer from the file.
174 // This is important as we want to cache each language separately.
176 llvm::StringRef Ext
= llvm::sys::path::extension(File
).trim('.');
177 auto Type
= driver::types::lookupTypeForExtension(Ext
);
178 if (Type
== driver::types::TY_INVALID
) {
179 elog("System include extraction: invalid file type for {0}", Ext
);
181 Lang
= driver::types::getTypeName(Type
);
185 llvm::SmallVector
<llvm::StringRef
> render() const {
186 // FIXME: Don't treat lang specially?
187 assert(!Lang
.empty());
188 llvm::SmallVector
<llvm::StringRef
> Args
= {"-x", Lang
};
189 if (!StandardIncludes
)
190 Args
.push_back("-nostdinc");
191 if (!StandardCXXIncludes
)
192 Args
.push_back("-nostdinc++");
193 if (!Sysroot
.empty())
194 Args
.append({"--sysroot", Sysroot
});
195 if (!ISysroot
.empty())
196 Args
.append({"-isysroot", ISysroot
});
198 Args
.append({"-target", Target
});
200 Args
.append({"--stdlib", Stdlib
});
202 for (llvm::StringRef Spec
: Specs
) {
203 Args
.push_back(Spec
);
209 static DriverArgs
getEmpty() { return {}; }
212 DriverArgs() = default;
215 } // namespace clang::clangd
217 using DriverArgs
= clang::clangd::DriverArgs
;
218 template <> struct DenseMapInfo
<DriverArgs
> {
219 static DriverArgs
getEmptyKey() {
220 auto Driver
= DriverArgs::getEmpty();
221 Driver
.Driver
= "EMPTY_KEY";
224 static DriverArgs
getTombstoneKey() {
225 auto Driver
= DriverArgs::getEmpty();
226 Driver
.Driver
= "TOMBSTONE_KEY";
229 static unsigned getHashValue(const DriverArgs
&Val
) {
230 unsigned FixedFieldsHash
= llvm::hash_value(std::tuple
{
232 Val
.StandardIncludes
,
233 Val
.StandardCXXIncludes
,
242 llvm::hash_combine_range(Val
.Specs
.begin(), Val
.Specs
.end());
244 return llvm::hash_combine(FixedFieldsHash
, SpecsHash
);
246 static bool isEqual(const DriverArgs
&LHS
, const DriverArgs
&RHS
) {
251 namespace clang::clangd
{
253 bool isValidTarget(llvm::StringRef Triple
) {
254 std::shared_ptr
<TargetOptions
> TargetOpts(new TargetOptions
);
255 TargetOpts
->Triple
= Triple
.str();
256 DiagnosticsEngine
Diags(new DiagnosticIDs
, new DiagnosticOptions
,
257 new IgnoringDiagConsumer
);
258 llvm::IntrusiveRefCntPtr
<TargetInfo
> Target
=
259 TargetInfo::CreateTargetInfo(Diags
, TargetOpts
);
263 std::optional
<DriverInfo
> parseDriverOutput(llvm::StringRef Output
) {
265 const char SIS
[] = "#include <...> search starts here:";
266 const char SIE
[] = "End of search list.";
267 const char TS
[] = "Target: ";
268 llvm::SmallVector
<llvm::StringRef
> Lines
;
269 Output
.split(Lines
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
272 Initial
, // Initial state: searching for target or includes list.
273 IncludesExtracting
, // Includes extracting.
274 Done
// Includes and target extraction done.
276 bool SeenIncludes
= false;
277 bool SeenTarget
= false;
278 for (auto *It
= Lines
.begin(); State
!= Done
&& It
!= Lines
.end(); ++It
) {
282 if (!SeenIncludes
&& Line
.trim() == SIS
) {
284 State
= IncludesExtracting
;
285 } else if (!SeenTarget
&& Line
.trim().starts_with(TS
)) {
287 llvm::StringRef TargetLine
= Line
.trim();
288 TargetLine
.consume_front(TS
);
289 // Only detect targets that clang understands
290 if (!isValidTarget(TargetLine
)) {
291 elog("System include extraction: invalid target \"{0}\", ignoring",
294 Info
.Target
= TargetLine
.str();
295 vlog("System include extraction: target extracted: \"{0}\"",
300 case IncludesExtracting
:
301 if (Line
.trim() == SIE
) {
302 State
= SeenTarget
? Done
: Initial
;
304 Info
.SystemIncludes
.push_back(Line
.trim().str());
305 vlog("System include extraction: adding {0}", Line
);
309 llvm_unreachable("Impossible state of the driver output parser");
314 elog("System include extraction: start marker not found: {0}", Output
);
317 if (State
== IncludesExtracting
) {
318 elog("System include extraction: end marker missing: {0}", Output
);
321 return std::move(Info
);
324 std::optional
<std::string
> run(llvm::ArrayRef
<llvm::StringRef
> Argv
,
325 bool OutputIsStderr
) {
326 llvm::SmallString
<128> OutputPath
;
327 if (auto EC
= llvm::sys::fs::createTemporaryFile("system-includes", "clangd",
329 elog("System include extraction: failed to create temporary file with "
334 auto CleanUp
= llvm::make_scope_exit(
335 [&OutputPath
]() { llvm::sys::fs::remove(OutputPath
); });
337 std::optional
<llvm::StringRef
> Redirects
[] = {{""}, {""}, {""}};
338 Redirects
[OutputIsStderr
? 2 : 1] = OutputPath
.str();
342 llvm::sys::ExecuteAndWait(Argv
.front(), Argv
, /*Env=*/std::nullopt
,
343 Redirects
, /*SecondsToWait=*/0,
344 /*MemoryLimit=*/0, &ErrMsg
)) {
345 elog("System include extraction: driver execution failed with return code: "
346 "{0} - '{1}'. Args: [{2}]",
347 llvm::to_string(RC
), ErrMsg
, printArgv(Argv
));
351 auto BufOrError
= llvm::MemoryBuffer::getFile(OutputPath
);
353 elog("System include extraction: failed to read {0} with error {1}",
354 OutputPath
, BufOrError
.getError().message());
357 return BufOrError
.get().get()->getBuffer().str();
360 std::optional
<DriverInfo
>
361 extractSystemIncludesAndTarget(const DriverArgs
&InputArgs
,
362 const llvm::Regex
&QueryDriverRegex
) {
363 trace::Span
Tracer("Extract system includes and target");
365 std::string Driver
= InputArgs
.Driver
;
366 if (!llvm::sys::path::is_absolute(Driver
)) {
367 auto DriverProgram
= llvm::sys::findProgramByName(Driver
);
369 vlog("System include extraction: driver {0} expanded to {1}", Driver
,
371 Driver
= *DriverProgram
;
373 elog("System include extraction: driver {0} not found in PATH", Driver
);
378 SPAN_ATTACH(Tracer
, "driver", Driver
);
379 SPAN_ATTACH(Tracer
, "lang", InputArgs
.Lang
);
381 // If driver was "../foo" then having to allowlist "/path/a/../foo" rather
382 // than "/path/foo" is absurd.
383 // Allow either to match the allowlist, then proceed with "/path/a/../foo".
384 // This was our historical behavior, and it *could* resolve to something else.
385 llvm::SmallString
<256> NoDots(Driver
);
386 llvm::sys::path::remove_dots(NoDots
, /*remove_dot_dot=*/true);
387 if (!QueryDriverRegex
.match(Driver
) && !QueryDriverRegex
.match(NoDots
)) {
388 vlog("System include extraction: not allowed driver {0}", Driver
);
392 llvm::SmallVector
<llvm::StringRef
> Args
= {Driver
, "-E", "-v"};
393 Args
.append(InputArgs
.render());
394 // Input needs to go after Lang flags.
396 auto Output
= run(Args
, /*OutputIsStderr=*/true);
400 std::optional
<DriverInfo
> Info
= parseDriverOutput(*Output
);
404 // The built-in headers are tightly coupled to parser builtins.
405 // (These are clang's "resource dir", GCC's GCC_INCLUDE_DIR.)
406 // We should keep using clangd's versions, so exclude the queried builtins.
407 // They're not specially marked in the -v output, but we can get the path
408 // with `$DRIVER -print-file-name=include`.
409 if (auto BuiltinHeaders
=
410 run({Driver
, "-print-file-name=include"}, /*OutputIsStderr=*/false)) {
411 auto Path
= llvm::StringRef(*BuiltinHeaders
).trim();
412 if (!Path
.empty() && llvm::sys::path::is_absolute(Path
)) {
413 auto Size
= Info
->SystemIncludes
.size();
414 llvm::erase(Info
->SystemIncludes
, Path
);
415 vlog("System includes extractor: builtin headers {0} {1}", Path
,
416 (Info
->SystemIncludes
.size() != Size
)
418 : "not found in driver's response");
422 log("System includes extractor: successfully executed {0}\n\tgot includes: "
423 "\"{1}\"\n\tgot target: \"{2}\"",
424 Driver
, llvm::join(Info
->SystemIncludes
, ", "), Info
->Target
);
428 tooling::CompileCommand
&
429 addSystemIncludes(tooling::CompileCommand
&Cmd
,
430 llvm::ArrayRef
<std::string
> SystemIncludes
) {
431 std::vector
<std::string
> ToAppend
;
432 for (llvm::StringRef Include
: SystemIncludes
) {
433 // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl"
434 ToAppend
.push_back("-isystem");
435 ToAppend
.push_back(Include
.str());
437 if (!ToAppend
.empty()) {
438 // Just append when `--` isn't present.
439 auto InsertAt
= llvm::find(Cmd
.CommandLine
, "--");
440 Cmd
.CommandLine
.insert(InsertAt
, std::make_move_iterator(ToAppend
.begin()),
441 std::make_move_iterator(ToAppend
.end()));
446 tooling::CompileCommand
&setTarget(tooling::CompileCommand
&Cmd
,
447 const std::string
&Target
) {
448 if (!Target
.empty()) {
449 // We do not want to override existing target with extracted one.
450 for (llvm::StringRef Arg
: Cmd
.CommandLine
) {
451 if (Arg
== "-target" || Arg
.starts_with("--target="))
454 // Just append when `--` isn't present.
455 auto InsertAt
= llvm::find(Cmd
.CommandLine
, "--");
456 Cmd
.CommandLine
.insert(InsertAt
, "--target=" + Target
);
461 /// Converts a glob containing only ** or * into a regex.
462 std::string
convertGlobToRegex(llvm::StringRef Glob
) {
464 llvm::raw_string_ostream
RegStream(RegText
);
466 for (size_t I
= 0, E
= Glob
.size(); I
< E
; ++I
) {
467 if (Glob
[I
] == '*') {
468 if (I
+ 1 < E
&& Glob
[I
+ 1] == '*') {
469 // Double star, accept any sequence.
471 // Also skip the second star.
474 // Single star, accept any sequence without a slash.
475 RegStream
<< "[^/]*";
477 } else if (llvm::sys::path::is_separator(Glob
[I
]) &&
478 llvm::sys::path::is_separator('/') &&
479 llvm::sys::path::is_separator('\\')) {
480 RegStream
<< R
"([/\\])"; // Accept either slash on windows.
482 RegStream
<< llvm::Regex::escape(Glob
.substr(I
, 1));
490 /// Converts a glob containing only ** or * into a regex.
491 llvm::Regex
convertGlobsToRegex(llvm::ArrayRef
<std::string
> Globs
) {
492 assert(!Globs
.empty() && "Globs cannot be empty!");
493 std::vector
<std::string
> RegTexts
;
494 RegTexts
.reserve(Globs
.size());
495 for (llvm::StringRef Glob
: Globs
)
496 RegTexts
.push_back(convertGlobToRegex(Glob
));
498 // Tempting to pass IgnoreCase, but we don't know the FS sensitivity.
499 llvm::Regex
Reg(llvm::join(RegTexts
, "|"));
500 assert(Reg
.isValid(RegTexts
.front()) &&
501 "Created an invalid regex from globs");
505 /// Extracts system includes from a trusted driver by parsing the output of
506 /// include search path and appends them to the commands coming from underlying
507 /// compilation database.
508 class SystemIncludeExtractor
{
510 SystemIncludeExtractor(llvm::ArrayRef
<std::string
> QueryDriverGlobs
)
511 : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs
)) {}
513 void operator()(tooling::CompileCommand
&Cmd
, llvm::StringRef File
) const {
514 if (Cmd
.CommandLine
.empty())
517 DriverArgs
Args(Cmd
, File
);
518 if (Args
.Lang
.empty())
520 if (auto Info
= QueriedDrivers
.get(Args
, [&] {
521 return extractSystemIncludesAndTarget(Args
, QueryDriverRegex
);
523 setTarget(addSystemIncludes(Cmd
, Info
->SystemIncludes
), Info
->Target
);
528 // Caches includes extracted from a driver. Key is driver:lang.
529 Memoize
<llvm::DenseMap
<DriverArgs
, std::optional
<DriverInfo
>>> QueriedDrivers
;
530 llvm::Regex QueryDriverRegex
;
534 SystemIncludeExtractorFn
535 getSystemIncludeExtractor(llvm::ArrayRef
<std::string
> QueryDriverGlobs
) {
536 if (QueryDriverGlobs
.empty())
538 return SystemIncludeExtractor(QueryDriverGlobs
);
541 } // namespace clang::clangd