1 //===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
17 #include "SymbolCollector.h"
18 #include "index/IndexAction.h"
19 #include "support/Logger.h"
20 #include "support/ThreadsafeFS.h"
21 #include "support/Trace.h"
22 #include "clang/Basic/LangOptions.h"
23 #include "clang/Frontend/CompilerInvocation.h"
24 #include "clang/Lex/PreprocessorOptions.h"
25 #include "clang/Tooling/Inclusions/StandardLibrary.h"
26 #include "llvm/ADT/IntrusiveRefCntPtr.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/Path.h"
37 Lang
langFromOpts(const LangOptions
&LO
) { return LO
.CPlusPlus
? CXX
: C
; }
38 llvm::StringLiteral
mandatoryHeader(Lang L
) {
45 llvm_unreachable("unhandled Lang");
48 LangStandard::Kind
standardFromOpts(const LangOptions
&LO
) {
51 return LangStandard::lang_cxx23
;
53 return LangStandard::lang_cxx20
;
55 return LangStandard::lang_cxx17
;
57 return LangStandard::lang_cxx14
;
59 return LangStandard::lang_cxx11
;
60 return LangStandard::lang_cxx98
;
63 return LangStandard::lang_c23
;
64 // C17 has no new features, so treat {C11,C17} as C17.
66 return LangStandard::lang_c17
;
67 return LangStandard::lang_c99
;
70 std::string
buildUmbrella(llvm::StringLiteral Mandatory
,
71 llvm::ArrayRef
<tooling::stdlib::Header
> Headers
) {
73 llvm::raw_string_ostream
OS(Result
);
75 // We __has_include guard all our #includes to avoid errors when using older
76 // stdlib version that don't have headers for the newest language standards.
77 // But make sure we get *some* error if things are totally broken.
79 "#if !__has_include(<{0}>)\n"
80 "#error Mandatory header <{0}> not found in standard library!\n"
84 for (auto Header
: Headers
) {
85 OS
<< llvm::formatv("#if __has_include({0})\n"
95 llvm::StringRef
getStdlibUmbrellaHeader(const LangOptions
&LO
) {
96 // The umbrella header is the same for all versions of each language.
97 // Headers that are unsupported in old lang versions are usually guarded by
98 // #if. Some headers may be not present in old stdlib versions, the umbrella
99 // header guards with __has_include for this purpose.
100 Lang L
= langFromOpts(LO
);
103 static std::string
*UmbrellaCXX
= new std::string(buildUmbrella(
105 tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX
)));
108 static std::string
*UmbrellaC
= new std::string(
109 buildUmbrella(mandatoryHeader(L
),
110 tooling::stdlib::Header::all(tooling::stdlib::Lang::C
)));
113 llvm_unreachable("invalid Lang in langFromOpts");
118 // Including the standard library leaks unwanted transitively included symbols.
120 // We want to drop these, they're a bit tricky to identify:
121 // - we don't want to limit to symbols on our list, as our list has only
122 // top-level symbols (and there may be legitimate stdlib extensions).
123 // - we can't limit to only symbols defined in known stdlib headers, as stdlib
124 // internal structure is murky
125 // - we can't strictly require symbols to come from a particular path, e.g.
126 // libstdc++ is mostly under /usr/include/c++/10/...
127 // but std::ctype_base is under /usr/include/<platform>/c++/10/...
128 // We require the symbol to come from a header that is *either* from
129 // the standard library path (as identified by the location of <vector>), or
130 // another header that defines a symbol from our stdlib list.
131 SymbolSlab
filter(SymbolSlab Slab
, const StdLibLocation
&Loc
) {
132 SymbolSlab::Builder Result
;
134 static auto &StandardHeaders
= *[] {
135 auto *Set
= new llvm::DenseSet
<llvm::StringRef
>();
136 for (auto Header
: tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX
))
137 Set
->insert(Header
.name());
138 for (auto Header
: tooling::stdlib::Header::all(tooling::stdlib::Lang::C
))
139 Set
->insert(Header
.name());
143 // Form prefixes like file:///usr/include/c++/10/
144 // These can be trivially prefix-compared with URIs in the indexed symbols.
145 llvm::SmallVector
<std::string
> StdLibURIPrefixes
;
146 for (const auto &Path
: Loc
.Paths
) {
147 StdLibURIPrefixes
.push_back(URI::create(Path
).toString());
148 if (StdLibURIPrefixes
.back().back() != '/')
149 StdLibURIPrefixes
.back().push_back('/');
151 // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
152 // owner of a symbol whose insertable header is in StandardHeaders?
153 // Pointer key because strings in a SymbolSlab are interned.
154 llvm::DenseMap
<const char *, bool> GoodHeader
;
155 for (const Symbol
&S
: Slab
) {
156 if (!S
.IncludeHeaders
.empty() &&
157 StandardHeaders
.contains(S
.IncludeHeaders
.front().IncludeHeader
)) {
158 GoodHeader
[S
.CanonicalDeclaration
.FileURI
] = true;
159 GoodHeader
[S
.Definition
.FileURI
] = true;
162 for (const char *URI
:
163 {S
.CanonicalDeclaration
.FileURI
, S
.Definition
.FileURI
}) {
164 auto R
= GoodHeader
.try_emplace(URI
, false);
166 R
.first
->second
= llvm::any_of(
168 [&, URIStr(llvm::StringRef(URI
))](const std::string
&Prefix
) {
169 return URIStr
.starts_with(Prefix
);
175 for (const auto &Good
: GoodHeader
)
176 if (Good
.second
&& *Good
.first
)
177 dlog("Stdlib header: {0}", Good
.first
);
179 // Empty URIs aren't considered good. (Definition can be blank).
180 auto IsGoodHeader
= [&](const char *C
) { return *C
&& GoodHeader
.lookup(C
); };
182 for (const Symbol
&S
: Slab
) {
183 if (!(IsGoodHeader(S
.CanonicalDeclaration
.FileURI
) ||
184 IsGoodHeader(S
.Definition
.FileURI
))) {
185 dlog("Ignoring wrong-header symbol {0}{1} in {2}", S
.Scope
, S
.Name
,
186 S
.CanonicalDeclaration
.FileURI
);
192 return std::move(Result
).build();
197 SymbolSlab
indexStandardLibrary(llvm::StringRef HeaderSources
,
198 std::unique_ptr
<CompilerInvocation
> CI
,
199 const StdLibLocation
&Loc
,
200 const ThreadsafeFS
&TFS
) {
201 if (CI
->getFrontendOpts().Inputs
.size() != 1 ||
202 !CI
->getPreprocessorOpts().ImplicitPCHInclude
.empty()) {
203 elog("Indexing standard library failed: bad CompilerInvocation");
204 assert(false && "indexing stdlib with a dubious CompilerInvocation!");
207 const FrontendInputFile
&Input
= CI
->getFrontendOpts().Inputs
.front();
208 trace::Span
Tracer("StandardLibraryIndex");
209 LangStandard::Kind LangStd
= standardFromOpts(CI
->getLangOpts());
210 log("Indexing {0} standard library in the context of {1}",
211 LangStandard::getLangStandardForKind(LangStd
).getName(), Input
.getFile());
214 IgnoreDiagnostics IgnoreDiags
;
215 // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
216 CI
->getPreprocessorOpts().clearRemappedFiles();
217 auto Clang
= prepareCompilerInstance(
218 std::move(CI
), /*Preamble=*/nullptr,
219 llvm::MemoryBuffer::getMemBuffer(HeaderSources
, Input
.getFile()),
220 TFS
.view(/*CWD=*/std::nullopt
), IgnoreDiags
);
222 elog("Standard Library Index: Couldn't build compiler instance");
226 SymbolCollector::Options IndexOpts
;
227 IndexOpts
.Origin
= SymbolOrigin::StdLib
;
228 IndexOpts
.CollectMainFileSymbols
= false;
229 IndexOpts
.CollectMainFileRefs
= false;
230 IndexOpts
.CollectMacro
= true;
231 IndexOpts
.StoreAllDocumentation
= true;
232 // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
233 // Files from outside the StdLibLocation may define true std symbols anyway.
234 // We end up "blessing" such headers, and can only do that by indexing
237 // Refs, relations, include graph in the stdlib mostly aren't useful.
238 auto Action
= createStaticIndexingAction(
239 IndexOpts
, [&](SymbolSlab S
) { Symbols
= std::move(S
); }, nullptr,
242 if (!Action
->BeginSourceFile(*Clang
, Input
)) {
243 elog("Standard Library Index: BeginSourceFile() failed");
247 if (llvm::Error Err
= Action
->Execute()) {
248 elog("Standard Library Index: Execute failed: {0}", std::move(Err
));
252 Action
->EndSourceFile();
254 unsigned SymbolsBeforeFilter
= Symbols
.size();
255 Symbols
= filter(std::move(Symbols
), Loc
);
256 bool Errors
= Clang
->hasDiagnostics() &&
257 Clang
->getDiagnostics().hasUncompilableErrorOccurred();
258 log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
259 LangStandard::getLangStandardForKind(LangStd
).getName(), Symbols
.size(),
260 SymbolsBeforeFilter
- Symbols
.size(),
261 Errors
? " (incomplete due to errors)" : "");
262 SPAN_ATTACH(Tracer
, "symbols", int(Symbols
.size()));
266 SymbolSlab
indexStandardLibrary(std::unique_ptr
<CompilerInvocation
> Invocation
,
267 const StdLibLocation
&Loc
,
268 const ThreadsafeFS
&TFS
) {
269 llvm::StringRef Header
= getStdlibUmbrellaHeader(Invocation
->getLangOpts());
270 return indexStandardLibrary(Header
, std::move(Invocation
), Loc
, TFS
);
273 bool StdLibSet::isBest(const LangOptions
&LO
) const {
274 return standardFromOpts(LO
) >=
275 Best
[langFromOpts(LO
)].load(std::memory_order_acquire
);
278 std::optional
<StdLibLocation
> StdLibSet::add(const LangOptions
&LO
,
279 const HeaderSearch
&HS
) {
280 Lang L
= langFromOpts(LO
);
281 int OldVersion
= Best
[L
].load(std::memory_order_acquire
);
282 int NewVersion
= standardFromOpts(LO
);
283 dlog("Index stdlib? {0}",
284 LangStandard::getLangStandardForKind(standardFromOpts(LO
)).getName());
286 if (!Config::current().Index
.StandardLibrary
) {
287 dlog("No: disabled in config");
291 if (NewVersion
<= OldVersion
) {
292 dlog("No: have {0}, {1}>={2}",
293 LangStandard::getLangStandardForKind(
294 static_cast<LangStandard::Kind
>(NewVersion
))
296 OldVersion
, NewVersion
);
300 // We'd like to index a standard library here if there is one.
301 // Check for the existence of <vector> on the search path.
302 // We could cache this, but we only get here repeatedly when there's no
303 // stdlib, and even then only once per preamble build.
304 llvm::StringLiteral ProbeHeader
= mandatoryHeader(L
);
305 llvm::SmallString
<256> Path
; // Scratch space.
306 llvm::SmallVector
<std::string
> SearchPaths
;
307 auto RecordHeaderPath
= [&](llvm::StringRef HeaderPath
) {
308 llvm::StringRef DirPath
= llvm::sys::path::parent_path(HeaderPath
);
309 if (!HS
.getFileMgr().getVirtualFileSystem().getRealPath(DirPath
, Path
))
310 SearchPaths
.emplace_back(Path
);
312 for (const auto &DL
:
313 llvm::make_range(HS
.search_dir_begin(), HS
.search_dir_end())) {
314 switch (DL
.getLookupType()) {
315 case DirectoryLookup::LT_NormalDir
: {
316 Path
= DL
.getDirRef()->getName();
317 llvm::sys::path::append(Path
, ProbeHeader
);
318 llvm::vfs::Status Stat
;
319 if (!HS
.getFileMgr().getNoncachedStatValue(Path
, Stat
) &&
320 Stat
.isRegularFile())
321 RecordHeaderPath(Path
);
324 case DirectoryLookup::LT_Framework
:
325 // stdlib can't be a framework (framework includes must have a slash)
327 case DirectoryLookup::LT_HeaderMap
:
328 llvm::StringRef Target
=
329 DL
.getHeaderMap()->lookupFilename(ProbeHeader
, Path
);
331 RecordHeaderPath(Target
);
335 if (SearchPaths
.empty())
338 dlog("Found standard library in {0}", llvm::join(SearchPaths
, ", "));
340 while (!Best
[L
].compare_exchange_weak(OldVersion
, NewVersion
,
341 std::memory_order_acq_rel
))
342 if (OldVersion
>= NewVersion
) {
343 dlog("No: lost the race");
344 return std::nullopt
; // Another thread won the race while we were
348 dlog("Yes, index stdlib!");
349 return StdLibLocation
{std::move(SearchPaths
)};
352 } // namespace clangd