1 //===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // Some compiler drivers have implicit search mechanism for system headers.
9 // This compilation database implementation tries to extract that information by
10 // executing the driver in verbose mode. gcc-compatible drivers print something
14 // #include <...> search starts here:
15 // /usr/lib/gcc/x86_64-linux-gnu/7/include
17 // /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
18 // /usr/include/x86_64-linux-gnu
20 // End of search list.
23 // This component parses that output and adds each path to command line args
24 // provided by Base, after prepending them with -isystem. Therefore current
25 // implementation would not work with a driver that is not gcc-compatible.
27 // First argument of the command line received from underlying compilation
28 // database is used as compiler driver path. Due to this arbitrary binary
29 // execution, this mechanism is not used by default and only executes binaries
30 // in the paths that are explicitly included by the user.
32 #include "CompileCommands.h"
33 #include "GlobalCompilationDatabase.h"
34 #include "support/Logger.h"
35 #include "support/Threading.h"
36 #include "support/Trace.h"
37 #include "clang/Basic/Diagnostic.h"
38 #include "clang/Basic/DiagnosticIDs.h"
39 #include "clang/Basic/DiagnosticOptions.h"
40 #include "clang/Basic/TargetInfo.h"
41 #include "clang/Basic/TargetOptions.h"
42 #include "clang/Driver/Types.h"
43 #include "clang/Tooling/CompilationDatabase.h"
44 #include "llvm/ADT/ArrayRef.h"
45 #include "llvm/ADT/DenseMap.h"
46 #include "llvm/ADT/Hashing.h"
47 #include "llvm/ADT/IntrusiveRefCntPtr.h"
48 #include "llvm/ADT/STLExtras.h"
49 #include "llvm/ADT/ScopeExit.h"
50 #include "llvm/ADT/SmallString.h"
51 #include "llvm/ADT/SmallVector.h"
52 #include "llvm/ADT/StringExtras.h"
53 #include "llvm/ADT/StringRef.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/FileSystem.h"
56 #include "llvm/Support/MemoryBuffer.h"
57 #include "llvm/Support/Path.h"
58 #include "llvm/Support/Program.h"
59 #include "llvm/Support/Regex.h"
60 #include "llvm/Support/ScopedPrinter.h"
61 #include "llvm/Support/raw_ostream.h"
72 namespace clang::clangd
{
76 std::vector
<std::string
> SystemIncludes
;
81 // Name of the driver program to execute or absolute path to it.
83 // Whether certain includes should be part of query.
84 bool StandardIncludes
= true;
85 bool StandardCXXIncludes
= true;
86 // Language to use while querying.
93 bool operator==(const DriverArgs
&RHS
) const {
94 return std::tie(Driver
, StandardIncludes
, StandardCXXIncludes
, Lang
,
95 Sysroot
, ISysroot
, Target
, Stdlib
) ==
96 std::tie(RHS
.Driver
, RHS
.StandardIncludes
, RHS
.StandardCXXIncludes
,
97 RHS
.Lang
, RHS
.Sysroot
, RHS
.ISysroot
, RHS
.Target
,
101 DriverArgs(const tooling::CompileCommand
&Cmd
, llvm::StringRef File
) {
102 llvm::SmallString
<128> Driver(Cmd
.CommandLine
.front());
103 // Driver is a not a single executable name but instead a path (either
104 // relative or absolute).
105 if (llvm::any_of(Driver
,
106 [](char C
) { return llvm::sys::path::is_separator(C
); })) {
107 llvm::sys::fs::make_absolute(Cmd
.Directory
, Driver
);
109 this->Driver
= Driver
.str().str();
110 for (size_t I
= 0, E
= Cmd
.CommandLine
.size(); I
< E
; ++I
) {
111 llvm::StringRef Arg
= Cmd
.CommandLine
[I
];
113 // Look for Language related flags.
114 if (Arg
.consume_front("-x")) {
115 if (Arg
.empty() && I
+ 1 < E
)
116 Lang
= Cmd
.CommandLine
[I
+ 1];
120 // Look for standard/builtin includes.
121 else if (Arg
== "-nostdinc" || Arg
== "--no-standard-includes")
122 StandardIncludes
= false;
123 else if (Arg
== "-nostdinc++")
124 StandardCXXIncludes
= false;
125 // Figure out sysroot
126 else if (Arg
.consume_front("--sysroot")) {
127 if (Arg
.consume_front("="))
129 else if (Arg
.empty() && I
+ 1 < E
)
130 Sysroot
= Cmd
.CommandLine
[I
+ 1];
131 } else if (Arg
.consume_front("-isysroot")) {
132 if (Arg
.empty() && I
+ 1 < E
)
133 ISysroot
= Cmd
.CommandLine
[I
+ 1];
135 ISysroot
= Arg
.str();
136 } else if (Arg
.consume_front("--target=")) {
138 } else if (Arg
.consume_front("-target")) {
139 if (Arg
.empty() && I
+ 1 < E
)
140 Target
= Cmd
.CommandLine
[I
+ 1];
141 } else if (Arg
.consume_front("--stdlib")) {
142 if (Arg
.consume_front("="))
144 else if (Arg
.empty() && I
+ 1 < E
)
145 Stdlib
= Cmd
.CommandLine
[I
+ 1];
146 } else if (Arg
.consume_front("-stdlib=")) {
151 // Downgrade objective-c++-header (used in clangd's fallback flags for .h
152 // files) to c++-header, as some drivers may fail to run the extraction
153 // command if it contains `-xobjective-c++-header` and objective-c++ support
155 // In practice, we don't see different include paths for the two on
156 // clang+mac, which is the most common objectve-c compiler.
157 if (Lang
== "objective-c++-header") {
161 // If language is not explicit in the flags, infer from the file.
162 // This is important as we want to cache each language separately.
164 llvm::StringRef Ext
= llvm::sys::path::extension(File
).trim('.');
165 auto Type
= driver::types::lookupTypeForExtension(Ext
);
166 if (Type
== driver::types::TY_INVALID
) {
167 elog("System include extraction: invalid file type for {0}", Ext
);
169 Lang
= driver::types::getTypeName(Type
);
173 llvm::SmallVector
<llvm::StringRef
> render() const {
174 // FIXME: Don't treat lang specially?
175 assert(!Lang
.empty());
176 llvm::SmallVector
<llvm::StringRef
> Args
= {"-x", Lang
};
177 if (!StandardIncludes
)
178 Args
.push_back("-nostdinc");
179 if (!StandardCXXIncludes
)
180 Args
.push_back("-nostdinc++");
181 if (!Sysroot
.empty())
182 Args
.append({"--sysroot", Sysroot
});
183 if (!ISysroot
.empty())
184 Args
.append({"-isysroot", ISysroot
});
186 Args
.append({"-target", Target
});
188 Args
.append({"--stdlib", Stdlib
});
192 static DriverArgs
getEmpty() { return {}; }
195 DriverArgs() = default;
198 } // namespace clang::clangd
200 using DriverArgs
= clang::clangd::DriverArgs
;
201 template <> struct DenseMapInfo
<DriverArgs
> {
202 static DriverArgs
getEmptyKey() {
203 auto Driver
= DriverArgs::getEmpty();
204 Driver
.Driver
= "EMPTY_KEY";
207 static DriverArgs
getTombstoneKey() {
208 auto Driver
= DriverArgs::getEmpty();
209 Driver
.Driver
= "TOMBSTONE_KEY";
212 static unsigned getHashValue(const DriverArgs
&Val
) {
213 return llvm::hash_value(std::tuple
{
215 Val
.StandardIncludes
,
216 Val
.StandardCXXIncludes
,
224 static bool isEqual(const DriverArgs
&LHS
, const DriverArgs
&RHS
) {
229 namespace clang::clangd
{
231 bool isValidTarget(llvm::StringRef Triple
) {
232 std::shared_ptr
<TargetOptions
> TargetOpts(new TargetOptions
);
233 TargetOpts
->Triple
= Triple
.str();
234 DiagnosticsEngine
Diags(new DiagnosticIDs
, new DiagnosticOptions
,
235 new IgnoringDiagConsumer
);
236 llvm::IntrusiveRefCntPtr
<TargetInfo
> Target
=
237 TargetInfo::CreateTargetInfo(Diags
, TargetOpts
);
241 std::optional
<DriverInfo
> parseDriverOutput(llvm::StringRef Output
) {
243 const char SIS
[] = "#include <...> search starts here:";
244 const char SIE
[] = "End of search list.";
245 const char TS
[] = "Target: ";
246 llvm::SmallVector
<llvm::StringRef
> Lines
;
247 Output
.split(Lines
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
250 Initial
, // Initial state: searching for target or includes list.
251 IncludesExtracting
, // Includes extracting.
252 Done
// Includes and target extraction done.
254 bool SeenIncludes
= false;
255 bool SeenTarget
= false;
256 for (auto *It
= Lines
.begin(); State
!= Done
&& It
!= Lines
.end(); ++It
) {
260 if (!SeenIncludes
&& Line
.trim() == SIS
) {
262 State
= IncludesExtracting
;
263 } else if (!SeenTarget
&& Line
.trim().startswith(TS
)) {
265 llvm::StringRef TargetLine
= Line
.trim();
266 TargetLine
.consume_front(TS
);
267 // Only detect targets that clang understands
268 if (!isValidTarget(TargetLine
)) {
269 elog("System include extraction: invalid target \"{0}\", ignoring",
272 Info
.Target
= TargetLine
.str();
273 vlog("System include extraction: target extracted: \"{0}\"",
278 case IncludesExtracting
:
279 if (Line
.trim() == SIE
) {
280 State
= SeenTarget
? Done
: Initial
;
282 Info
.SystemIncludes
.push_back(Line
.trim().str());
283 vlog("System include extraction: adding {0}", Line
);
287 llvm_unreachable("Impossible state of the driver output parser");
292 elog("System include extraction: start marker not found: {0}", Output
);
295 if (State
== IncludesExtracting
) {
296 elog("System include extraction: end marker missing: {0}", Output
);
299 return std::move(Info
);
302 std::optional
<std::string
> run(llvm::ArrayRef
<llvm::StringRef
> Argv
,
303 bool OutputIsStderr
) {
304 llvm::SmallString
<128> OutputPath
;
305 if (auto EC
= llvm::sys::fs::createTemporaryFile("system-includes", "clangd",
307 elog("System include extraction: failed to create temporary file with "
312 auto CleanUp
= llvm::make_scope_exit(
313 [&OutputPath
]() { llvm::sys::fs::remove(OutputPath
); });
315 std::optional
<llvm::StringRef
> Redirects
[] = {{""}, {""}, {""}};
316 Redirects
[OutputIsStderr
? 2 : 1] = OutputPath
.str();
320 llvm::sys::ExecuteAndWait(Argv
.front(), Argv
, /*Env=*/std::nullopt
,
321 Redirects
, /*SecondsToWait=*/0,
322 /*MemoryLimit=*/0, &ErrMsg
)) {
323 elog("System include extraction: driver execution failed with return code: "
324 "{0} - '{1}'. Args: [{2}]",
325 llvm::to_string(RC
), ErrMsg
, printArgv(Argv
));
329 auto BufOrError
= llvm::MemoryBuffer::getFile(OutputPath
);
331 elog("System include extraction: failed to read {0} with error {1}",
332 OutputPath
, BufOrError
.getError().message());
335 return BufOrError
.get().get()->getBuffer().str();
338 std::optional
<DriverInfo
>
339 extractSystemIncludesAndTarget(const DriverArgs
&InputArgs
,
340 const llvm::Regex
&QueryDriverRegex
) {
341 trace::Span
Tracer("Extract system includes and target");
343 std::string Driver
= InputArgs
.Driver
;
344 if (!llvm::sys::path::is_absolute(Driver
)) {
345 auto DriverProgram
= llvm::sys::findProgramByName(Driver
);
347 vlog("System include extraction: driver {0} expanded to {1}", Driver
,
349 Driver
= *DriverProgram
;
351 elog("System include extraction: driver {0} not found in PATH", Driver
);
356 SPAN_ATTACH(Tracer
, "driver", Driver
);
357 SPAN_ATTACH(Tracer
, "lang", InputArgs
.Lang
);
359 // If driver was "../foo" then having to allowlist "/path/a/../foo" rather
360 // than "/path/foo" is absurd.
361 // Allow either to match the allowlist, then proceed with "/path/a/../foo".
362 // This was our historical behavior, and it *could* resolve to something else.
363 llvm::SmallString
<256> NoDots(Driver
);
364 llvm::sys::path::remove_dots(NoDots
, /*remove_dot_dot=*/true);
365 if (!QueryDriverRegex
.match(Driver
) && !QueryDriverRegex
.match(NoDots
)) {
366 vlog("System include extraction: not allowed driver {0}", Driver
);
370 llvm::SmallVector
<llvm::StringRef
> Args
= {Driver
, "-E", "-v"};
371 Args
.append(InputArgs
.render());
372 // Input needs to go after Lang flags.
374 auto Output
= run(Args
, /*OutputIsStderr=*/true);
378 std::optional
<DriverInfo
> Info
= parseDriverOutput(*Output
);
382 // The built-in headers are tightly coupled to parser builtins.
383 // (These are clang's "resource dir", GCC's GCC_INCLUDE_DIR.)
384 // We should keep using clangd's versions, so exclude the queried builtins.
385 // They're not specially marked in the -v output, but we can get the path
386 // with `$DRIVER -print-file-name=include`.
387 if (auto BuiltinHeaders
=
388 run({Driver
, "-print-file-name=include"}, /*OutputIsStderr=*/false)) {
389 auto Path
= llvm::StringRef(*BuiltinHeaders
).trim();
390 if (!Path
.empty() && llvm::sys::path::is_absolute(Path
)) {
391 auto Size
= Info
->SystemIncludes
.size();
392 llvm::erase(Info
->SystemIncludes
, Path
);
393 vlog("System includes extractor: builtin headers {0} {1}", Path
,
394 (Info
->SystemIncludes
.size() != Size
)
396 : "not found in driver's response");
400 log("System includes extractor: successfully executed {0}\n\tgot includes: "
401 "\"{1}\"\n\tgot target: \"{2}\"",
402 Driver
, llvm::join(Info
->SystemIncludes
, ", "), Info
->Target
);
406 tooling::CompileCommand
&
407 addSystemIncludes(tooling::CompileCommand
&Cmd
,
408 llvm::ArrayRef
<std::string
> SystemIncludes
) {
409 std::vector
<std::string
> ToAppend
;
410 for (llvm::StringRef Include
: SystemIncludes
) {
411 // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl"
412 ToAppend
.push_back("-isystem");
413 ToAppend
.push_back(Include
.str());
415 if (!ToAppend
.empty()) {
416 // Just append when `--` isn't present.
417 auto InsertAt
= llvm::find(Cmd
.CommandLine
, "--");
418 Cmd
.CommandLine
.insert(InsertAt
, std::make_move_iterator(ToAppend
.begin()),
419 std::make_move_iterator(ToAppend
.end()));
424 tooling::CompileCommand
&setTarget(tooling::CompileCommand
&Cmd
,
425 const std::string
&Target
) {
426 if (!Target
.empty()) {
427 // We do not want to override existing target with extracted one.
428 for (llvm::StringRef Arg
: Cmd
.CommandLine
) {
429 if (Arg
== "-target" || Arg
.startswith("--target="))
432 // Just append when `--` isn't present.
433 auto InsertAt
= llvm::find(Cmd
.CommandLine
, "--");
434 Cmd
.CommandLine
.insert(InsertAt
, "--target=" + Target
);
439 /// Converts a glob containing only ** or * into a regex.
440 std::string
convertGlobToRegex(llvm::StringRef Glob
) {
442 llvm::raw_string_ostream
RegStream(RegText
);
444 for (size_t I
= 0, E
= Glob
.size(); I
< E
; ++I
) {
445 if (Glob
[I
] == '*') {
446 if (I
+ 1 < E
&& Glob
[I
+ 1] == '*') {
447 // Double star, accept any sequence.
449 // Also skip the second star.
452 // Single star, accept any sequence without a slash.
453 RegStream
<< "[^/]*";
455 } else if (llvm::sys::path::is_separator(Glob
[I
]) &&
456 llvm::sys::path::is_separator('/') &&
457 llvm::sys::path::is_separator('\\')) {
458 RegStream
<< R
"([/\\])"; // Accept either slash on windows.
460 RegStream
<< llvm::Regex::escape(Glob
.substr(I
, 1));
468 /// Converts a glob containing only ** or * into a regex.
469 llvm::Regex
convertGlobsToRegex(llvm::ArrayRef
<std::string
> Globs
) {
470 assert(!Globs
.empty() && "Globs cannot be empty!");
471 std::vector
<std::string
> RegTexts
;
472 RegTexts
.reserve(Globs
.size());
473 for (llvm::StringRef Glob
: Globs
)
474 RegTexts
.push_back(convertGlobToRegex(Glob
));
476 // Tempting to pass IgnoreCase, but we don't know the FS sensitivity.
477 llvm::Regex
Reg(llvm::join(RegTexts
, "|"));
478 assert(Reg
.isValid(RegTexts
.front()) &&
479 "Created an invalid regex from globs");
483 /// Extracts system includes from a trusted driver by parsing the output of
484 /// include search path and appends them to the commands coming from underlying
485 /// compilation database.
486 class SystemIncludeExtractor
{
488 SystemIncludeExtractor(llvm::ArrayRef
<std::string
> QueryDriverGlobs
)
489 : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs
)) {}
491 void operator()(tooling::CompileCommand
&Cmd
, llvm::StringRef File
) const {
492 if (Cmd
.CommandLine
.empty())
495 DriverArgs
Args(Cmd
, File
);
496 if (Args
.Lang
.empty())
498 if (auto Info
= QueriedDrivers
.get(Args
, [&] {
499 return extractSystemIncludesAndTarget(Args
, QueryDriverRegex
);
501 setTarget(addSystemIncludes(Cmd
, Info
->SystemIncludes
), Info
->Target
);
506 // Caches includes extracted from a driver. Key is driver:lang.
507 Memoize
<llvm::DenseMap
<DriverArgs
, std::optional
<DriverInfo
>>> QueriedDrivers
;
508 llvm::Regex QueryDriverRegex
;
512 SystemIncludeExtractorFn
513 getSystemIncludeExtractor(llvm::ArrayRef
<std::string
> QueryDriverGlobs
) {
514 if (QueryDriverGlobs
.empty())
516 return SystemIncludeExtractor(QueryDriverGlobs
);
519 } // namespace clang::clangd