1 //===-- Background.cpp - Build an index in a background thread ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "index/Background.h"
13 #include "SourceCode.h"
15 #include "index/BackgroundIndexLoader.h"
16 #include "index/FileIndex.h"
17 #include "index/Index.h"
18 #include "index/IndexAction.h"
19 #include "index/MemIndex.h"
20 #include "index/Ref.h"
21 #include "index/Relation.h"
22 #include "index/Serialization.h"
23 #include "index/Symbol.h"
24 #include "index/SymbolCollector.h"
25 #include "support/Context.h"
26 #include "support/Logger.h"
27 #include "support/Path.h"
28 #include "support/Threading.h"
29 #include "support/ThreadsafeFS.h"
30 #include "support/Trace.h"
31 #include "clang/Basic/SourceLocation.h"
32 #include "clang/Basic/SourceManager.h"
33 #include "clang/Basic/Stack.h"
34 #include "clang/Frontend/FrontendAction.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/ADT/StringMap.h"
39 #include "llvm/ADT/StringRef.h"
40 #include "llvm/Support/Error.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/Threading.h"
43 #include "llvm/Support/xxhash.h"
48 #include <condition_variable>
65 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
66 // relative to Cmd.Directory, which might not be the same as current working
68 llvm::SmallString
<128> getAbsolutePath(const tooling::CompileCommand
&Cmd
) {
69 llvm::SmallString
<128> AbsolutePath
;
70 if (llvm::sys::path::is_absolute(Cmd
.Filename
)) {
71 AbsolutePath
= Cmd
.Filename
;
73 AbsolutePath
= Cmd
.Directory
;
74 llvm::sys::path::append(AbsolutePath
, Cmd
.Filename
);
75 llvm::sys::path::remove_dots(AbsolutePath
, true);
80 bool shardIsStale(const LoadedShard
&LS
, llvm::vfs::FileSystem
*FS
) {
81 auto Buf
= FS
->getBufferForFile(LS
.AbsolutePath
);
83 vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
84 LS
.AbsolutePath
, Buf
.getError().message());
85 // There is no point in indexing an unreadable file.
88 return digest(Buf
->get()->getBuffer()) != LS
.Digest
;
93 BackgroundIndex::BackgroundIndex(
94 const ThreadsafeFS
&TFS
, const GlobalCompilationDatabase
&CDB
,
95 BackgroundIndexStorage::Factory IndexStorageFactory
, Options Opts
)
96 : SwapIndex(std::make_unique
<MemIndex
>()), TFS(TFS
), CDB(CDB
),
97 IndexingPriority(Opts
.IndexingPriority
),
98 ContextProvider(std::move(Opts
.ContextProvider
)),
99 IndexedSymbols(IndexContents::All
),
100 Rebuilder(this, &IndexedSymbols
, Opts
.ThreadPoolSize
),
101 IndexStorageFactory(std::move(IndexStorageFactory
)),
102 Queue(std::move(Opts
.OnProgress
)),
104 CDB
.watch([&](const std::vector
<std::string
> &ChangedFiles
) {
105 enqueue(ChangedFiles
);
107 assert(Opts
.ThreadPoolSize
> 0 && "Thread pool size can't be zero.");
108 assert(this->IndexStorageFactory
&& "Storage factory can not be null!");
109 for (unsigned I
= 0; I
< Opts
.ThreadPoolSize
; ++I
) {
110 ThreadPool
.runAsync("background-worker-" + llvm::Twine(I
+ 1),
111 [this, Ctx(Context::current().clone())]() mutable {
112 clang::noteBottomOfStack();
113 WithContext
BGContext(std::move(Ctx
));
114 Queue
.work([&] { Rebuilder
.idle(); });
119 BackgroundIndex::~BackgroundIndex() {
124 BackgroundQueue::Task
BackgroundIndex::changedFilesTask(
125 const std::vector
<std::string
> &ChangedFiles
) {
126 BackgroundQueue::Task
T([this, ChangedFiles
] {
127 trace::Span
Tracer("BackgroundIndexEnqueue");
129 std::optional
<WithContext
> WithProvidedContext
;
131 WithProvidedContext
.emplace(ContextProvider(/*Path=*/""));
133 // We're doing this asynchronously, because we'll read shards here too.
134 log("Enqueueing {0} commands for indexing", ChangedFiles
.size());
135 SPAN_ATTACH(Tracer
, "files", int64_t(ChangedFiles
.size()));
137 auto NeedsReIndexing
= loadProject(std::move(ChangedFiles
));
138 // Run indexing for files that need to be updated.
139 std::shuffle(NeedsReIndexing
.begin(), NeedsReIndexing
.end(),
140 std::mt19937(std::random_device
{}()));
141 std::vector
<BackgroundQueue::Task
> Tasks
;
142 Tasks
.reserve(NeedsReIndexing
.size());
143 for (const auto &File
: NeedsReIndexing
)
144 Tasks
.push_back(indexFileTask(std::move(File
)));
145 Queue
.append(std::move(Tasks
));
148 T
.QueuePri
= LoadShards
;
149 T
.ThreadPri
= llvm::ThreadPriority::Default
;
153 static llvm::StringRef
filenameWithoutExtension(llvm::StringRef Path
) {
154 Path
= llvm::sys::path::filename(Path
);
155 return Path
.drop_back(llvm::sys::path::extension(Path
).size());
158 BackgroundQueue::Task
BackgroundIndex::indexFileTask(std::string Path
) {
159 std::string Tag
= filenameWithoutExtension(Path
).str();
160 uint64_t Key
= llvm::xxh3_64bits(Path
);
161 BackgroundQueue::Task
T([this, Path(std::move(Path
))] {
162 std::optional
<WithContext
> WithProvidedContext
;
164 WithProvidedContext
.emplace(ContextProvider(Path
));
165 auto Cmd
= CDB
.getCompileCommand(Path
);
168 if (auto Error
= index(std::move(*Cmd
)))
169 elog("Indexing {0} failed: {1}", Path
, std::move(Error
));
171 T
.QueuePri
= IndexFile
;
172 T
.ThreadPri
= IndexingPriority
;
173 T
.Tag
= std::move(Tag
);
178 void BackgroundIndex::boostRelated(llvm::StringRef Path
) {
179 if (isHeaderFile(Path
))
180 Queue
.boost(filenameWithoutExtension(Path
), IndexBoostedFile
);
183 /// Given index results from a TU, only update symbols coming from files that
184 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new
185 /// index information on IndexStorage.
186 void BackgroundIndex::update(
187 llvm::StringRef MainFile
, IndexFileIn Index
,
188 const llvm::StringMap
<ShardVersion
> &ShardVersionsSnapshot
,
191 llvm::StringMap
<std::pair
<Path
, FileDigest
>> FilesToUpdate
;
192 // Note that sources do not contain any information regarding missing headers,
193 // since we don't even know what absolute path they should fall in.
194 for (const auto &IndexIt
: *Index
.Sources
) {
195 const auto &IGN
= IndexIt
.getValue();
196 auto AbsPath
= URI::resolve(IGN
.URI
, MainFile
);
198 elog("Failed to resolve URI: {0}", AbsPath
.takeError());
201 const auto DigestIt
= ShardVersionsSnapshot
.find(*AbsPath
);
202 // File has different contents, or indexing was successful this time.
203 if (DigestIt
== ShardVersionsSnapshot
.end() ||
204 DigestIt
->getValue().Digest
!= IGN
.Digest
||
205 (DigestIt
->getValue().HadErrors
&& !HadErrors
))
206 FilesToUpdate
[IGN
.URI
] = {std::move(*AbsPath
), IGN
.Digest
};
209 // Shard slabs into files.
210 FileShardedIndex
ShardedIndex(std::move(Index
));
212 // Build and store new slabs for each updated file.
213 for (const auto &FileIt
: FilesToUpdate
) {
214 auto Uri
= FileIt
.first();
215 auto IF
= ShardedIndex
.getShard(Uri
);
216 assert(IF
&& "no shard for file in Index.Sources?");
217 PathRef Path
= FileIt
.getValue().first
;
219 // Only store command line hash for main files of the TU, since our
220 // current model keeps only one version of a header file.
221 if (Path
!= MainFile
)
224 // We need to store shards before updating the index, since the latter
226 // FIXME: Also skip serializing the shard if it is already up-to-date.
227 if (auto Error
= IndexStorageFactory(Path
)->storeShard(Path
, *IF
))
228 elog("Failed to write background-index shard for file {0}: {1}", Path
,
232 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
233 const auto &Hash
= FileIt
.getValue().second
;
234 auto DigestIt
= ShardVersions
.try_emplace(Path
);
235 ShardVersion
&SV
= DigestIt
.first
->second
;
236 // Skip if file is already up to date, unless previous index was broken
237 // and this one is not.
238 if (!DigestIt
.second
&& SV
.Digest
== Hash
&& SV
.HadErrors
&& !HadErrors
)
241 SV
.HadErrors
= HadErrors
;
243 // This can override a newer version that is added in another thread, if
244 // this thread sees the older version but finishes later. This should be
246 IndexedSymbols
.update(
247 Uri
, std::make_unique
<SymbolSlab
>(std::move(*IF
->Symbols
)),
248 std::make_unique
<RefSlab
>(std::move(*IF
->Refs
)),
249 std::make_unique
<RelationSlab
>(std::move(*IF
->Relations
)),
255 llvm::Error
BackgroundIndex::index(tooling::CompileCommand Cmd
) {
256 trace::Span
Tracer("BackgroundIndex");
257 SPAN_ATTACH(Tracer
, "file", Cmd
.Filename
);
258 auto AbsolutePath
= getAbsolutePath(Cmd
);
260 auto FS
= TFS
.view(Cmd
.Directory
);
261 auto Buf
= FS
->getBufferForFile(AbsolutePath
);
263 return llvm::errorCodeToError(Buf
.getError());
264 auto Hash
= digest(Buf
->get()->getBuffer());
266 // Take a snapshot of the versions to avoid locking for each file in the TU.
267 llvm::StringMap
<ShardVersion
> ShardVersionsSnapshot
;
269 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
270 ShardVersionsSnapshot
= ShardVersions
;
273 vlog("Indexing {0} (digest:={1})", Cmd
.Filename
, llvm::toHex(Hash
));
276 Inputs
.CompileCommand
= std::move(Cmd
);
277 IgnoreDiagnostics IgnoreDiags
;
278 auto CI
= buildCompilerInvocation(Inputs
, IgnoreDiags
);
280 return error("Couldn't build compiler invocation");
283 prepareCompilerInstance(std::move(CI
), /*Preamble=*/nullptr,
284 std::move(*Buf
), std::move(FS
), IgnoreDiags
);
286 return error("Couldn't build compiler instance");
288 SymbolCollector::Options IndexOpts
;
289 // Creates a filter to not collect index results from files with unchanged
291 IndexOpts
.FileFilter
= [&ShardVersionsSnapshot
](const SourceManager
&SM
,
293 const auto F
= SM
.getFileEntryRefForID(FID
);
295 return false; // Skip invalid files.
296 auto AbsPath
= getCanonicalPath(*F
, SM
.getFileManager());
298 return false; // Skip files without absolute path.
299 auto Digest
= digestFile(SM
, FID
);
302 auto D
= ShardVersionsSnapshot
.find(*AbsPath
);
303 if (D
!= ShardVersionsSnapshot
.end() && D
->second
.Digest
== Digest
&&
304 !D
->second
.HadErrors
)
305 return false; // Skip files that haven't changed, without errors.
308 IndexOpts
.CollectMainFileRefs
= true;
311 auto Action
= createStaticIndexingAction(
312 IndexOpts
, [&](SymbolSlab S
) { Index
.Symbols
= std::move(S
); },
313 [&](RefSlab R
) { Index
.Refs
= std::move(R
); },
314 [&](RelationSlab R
) { Index
.Relations
= std::move(R
); },
315 [&](IncludeGraph IG
) { Index
.Sources
= std::move(IG
); });
317 // We're going to run clang here, and it could potentially crash.
318 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
319 // but the leaky "recovery" is pretty scary too in a long-running process.
320 // If crashes are a real problem, maybe we should fork a child process.
322 const FrontendInputFile
&Input
= Clang
->getFrontendOpts().Inputs
.front();
323 if (!Action
->BeginSourceFile(*Clang
, Input
))
324 return error("BeginSourceFile() failed");
325 if (llvm::Error Err
= Action
->Execute())
328 Action
->EndSourceFile();
330 Index
.Cmd
= Inputs
.CompileCommand
;
331 assert(Index
.Symbols
&& Index
.Refs
&& Index
.Sources
&&
332 "Symbols, Refs and Sources must be set.");
334 log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
335 Inputs
.CompileCommand
.Filename
, Index
.Symbols
->size(),
336 Index
.Refs
->numRefs(), Index
.Sources
->size());
337 SPAN_ATTACH(Tracer
, "symbols", int(Index
.Symbols
->size()));
338 SPAN_ATTACH(Tracer
, "refs", int(Index
.Refs
->numRefs()));
339 SPAN_ATTACH(Tracer
, "sources", int(Index
.Sources
->size()));
341 bool HadErrors
= Clang
->hasDiagnostics() &&
342 Clang
->getDiagnostics().hasUncompilableErrorOccurred();
344 log("Failed to compile {0}, index may be incomplete", AbsolutePath
);
345 for (auto &It
: *Index
.Sources
)
346 It
.second
.Flags
|= IncludeGraphNode::SourceFlag::HadErrors
;
348 update(AbsolutePath
, std::move(Index
), ShardVersionsSnapshot
, HadErrors
);
350 Rebuilder
.indexedTU();
351 return llvm::Error::success();
354 // Restores shards for \p MainFiles from index storage. Then checks staleness of
355 // those shards and returns a list of TUs that needs to be indexed to update
357 std::vector
<std::string
>
358 BackgroundIndex::loadProject(std::vector
<std::string
> MainFiles
) {
359 // Drop files where background indexing is disabled in config.
361 llvm::erase_if(MainFiles
, [&](const std::string
&TU
) {
362 // Load the config for each TU, as indexing may be selectively enabled.
363 WithContext
WithProvidedContext(ContextProvider(TU
));
364 return Config::current().Index
.Background
==
365 Config::BackgroundPolicy::Skip
;
367 Rebuilder
.startLoading();
368 // Load shards for all of the mainfiles.
369 const std::vector
<LoadedShard
> Result
=
370 loadIndexShards(MainFiles
, IndexStorageFactory
, CDB
);
371 size_t LoadedShards
= 0;
373 // Update in-memory state.
374 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
375 for (auto &LS
: Result
) {
380 ? std::make_unique
<SymbolSlab
>(std::move(*LS
.Shard
->Symbols
))
382 auto RS
= LS
.Shard
->Refs
383 ? std::make_unique
<RefSlab
>(std::move(*LS
.Shard
->Refs
))
387 ? std::make_unique
<RelationSlab
>(std::move(*LS
.Shard
->Relations
))
389 ShardVersion
&SV
= ShardVersions
[LS
.AbsolutePath
];
390 SV
.Digest
= LS
.Digest
;
391 SV
.HadErrors
= LS
.HadErrors
;
394 IndexedSymbols
.update(URI::create(LS
.AbsolutePath
).toString(),
395 std::move(SS
), std::move(RS
), std::move(RelS
),
399 Rebuilder
.loadedShard(LoadedShards
);
400 Rebuilder
.doneLoading();
402 auto FS
= TFS
.view(/*CWD=*/std::nullopt
);
403 llvm::DenseSet
<PathRef
> TUsToIndex
;
404 // We'll accept data from stale shards, but ensure the files get reindexed
406 for (auto &LS
: Result
) {
407 if (!shardIsStale(LS
, FS
.get()))
409 PathRef TUForFile
= LS
.DependentTU
;
410 assert(!TUForFile
.empty() && "File without a TU!");
412 // FIXME: Currently, we simply schedule indexing on a TU whenever any of
413 // its dependencies needs re-indexing. We might do it smarter by figuring
414 // out a minimal set of TUs that will cover all the stale dependencies.
415 // FIXME: Try looking at other TUs if no compile commands are available
416 // for this TU, i.e TU was deleted after we performed indexing.
417 TUsToIndex
.insert(TUForFile
);
420 return {TUsToIndex
.begin(), TUsToIndex
.end()};
423 void BackgroundIndex::profile(MemoryTree
&MT
) const {
424 IndexedSymbols
.profile(MT
.child("slabs"));
425 // We don't want to mix memory used by index and symbols, so call base class.
426 MT
.child("index").addUsage(SwapIndex::estimateMemoryUsage());
428 } // namespace clangd