1 //===-- Background.cpp - Build an index in a background thread ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "index/Background.h"
13 #include "SourceCode.h"
15 #include "index/BackgroundIndexLoader.h"
16 #include "index/FileIndex.h"
17 #include "index/Index.h"
18 #include "index/IndexAction.h"
19 #include "index/MemIndex.h"
20 #include "index/Ref.h"
21 #include "index/Relation.h"
22 #include "index/Serialization.h"
23 #include "index/Symbol.h"
24 #include "index/SymbolCollector.h"
25 #include "support/Context.h"
26 #include "support/Logger.h"
27 #include "support/Path.h"
28 #include "support/Threading.h"
29 #include "support/ThreadsafeFS.h"
30 #include "support/Trace.h"
31 #include "clang/Basic/SourceLocation.h"
32 #include "clang/Basic/SourceManager.h"
33 #include "clang/Frontend/FrontendAction.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseSet.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/StringMap.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Threading.h"
42 #include "llvm/Support/xxhash.h"
47 #include <condition_variable>
63 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
64 // relative to Cmd.Directory, which might not be the same as current working
66 llvm::SmallString
<128> getAbsolutePath(const tooling::CompileCommand
&Cmd
) {
67 llvm::SmallString
<128> AbsolutePath
;
68 if (llvm::sys::path::is_absolute(Cmd
.Filename
)) {
69 AbsolutePath
= Cmd
.Filename
;
71 AbsolutePath
= Cmd
.Directory
;
72 llvm::sys::path::append(AbsolutePath
, Cmd
.Filename
);
73 llvm::sys::path::remove_dots(AbsolutePath
, true);
78 bool shardIsStale(const LoadedShard
&LS
, llvm::vfs::FileSystem
*FS
) {
79 auto Buf
= FS
->getBufferForFile(LS
.AbsolutePath
);
81 vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
82 LS
.AbsolutePath
, Buf
.getError().message());
83 // There is no point in indexing an unreadable file.
86 return digest(Buf
->get()->getBuffer()) != LS
.Digest
;
91 BackgroundIndex::BackgroundIndex(
92 const ThreadsafeFS
&TFS
, const GlobalCompilationDatabase
&CDB
,
93 BackgroundIndexStorage::Factory IndexStorageFactory
, Options Opts
)
94 : SwapIndex(std::make_unique
<MemIndex
>()), TFS(TFS
), CDB(CDB
),
95 IndexingPriority(Opts
.IndexingPriority
),
96 ContextProvider(std::move(Opts
.ContextProvider
)),
97 IndexedSymbols(IndexContents::All
),
98 Rebuilder(this, &IndexedSymbols
, Opts
.ThreadPoolSize
),
99 IndexStorageFactory(std::move(IndexStorageFactory
)),
100 Queue(std::move(Opts
.OnProgress
)),
102 CDB
.watch([&](const std::vector
<std::string
> &ChangedFiles
) {
103 enqueue(ChangedFiles
);
105 assert(Opts
.ThreadPoolSize
> 0 && "Thread pool size can't be zero.");
106 assert(this->IndexStorageFactory
&& "Storage factory can not be null!");
107 for (unsigned I
= 0; I
< Opts
.ThreadPoolSize
; ++I
) {
108 ThreadPool
.runAsync("background-worker-" + llvm::Twine(I
+ 1),
109 [this, Ctx(Context::current().clone())]() mutable {
110 WithContext
BGContext(std::move(Ctx
));
111 Queue
.work([&] { Rebuilder
.idle(); });
116 BackgroundIndex::~BackgroundIndex() {
121 BackgroundQueue::Task
BackgroundIndex::changedFilesTask(
122 const std::vector
<std::string
> &ChangedFiles
) {
123 BackgroundQueue::Task
T([this, ChangedFiles
] {
124 trace::Span
Tracer("BackgroundIndexEnqueue");
126 llvm::Optional
<WithContext
> WithProvidedContext
;
128 WithProvidedContext
.emplace(ContextProvider(/*Path=*/""));
130 // We're doing this asynchronously, because we'll read shards here too.
131 log("Enqueueing {0} commands for indexing", ChangedFiles
.size());
132 SPAN_ATTACH(Tracer
, "files", int64_t(ChangedFiles
.size()));
134 auto NeedsReIndexing
= loadProject(std::move(ChangedFiles
));
135 // Run indexing for files that need to be updated.
136 std::shuffle(NeedsReIndexing
.begin(), NeedsReIndexing
.end(),
137 std::mt19937(std::random_device
{}()));
138 std::vector
<BackgroundQueue::Task
> Tasks
;
139 Tasks
.reserve(NeedsReIndexing
.size());
140 for (const auto &File
: NeedsReIndexing
)
141 Tasks
.push_back(indexFileTask(std::move(File
)));
142 Queue
.append(std::move(Tasks
));
145 T
.QueuePri
= LoadShards
;
146 T
.ThreadPri
= llvm::ThreadPriority::Default
;
150 static llvm::StringRef
filenameWithoutExtension(llvm::StringRef Path
) {
151 Path
= llvm::sys::path::filename(Path
);
152 return Path
.drop_back(llvm::sys::path::extension(Path
).size());
155 BackgroundQueue::Task
BackgroundIndex::indexFileTask(std::string Path
) {
156 std::string Tag
= filenameWithoutExtension(Path
).str();
157 uint64_t Key
= llvm::xxHash64(Path
);
158 BackgroundQueue::Task
T([this, Path(std::move(Path
))] {
159 llvm::Optional
<WithContext
> WithProvidedContext
;
161 WithProvidedContext
.emplace(ContextProvider(Path
));
162 auto Cmd
= CDB
.getCompileCommand(Path
);
165 if (auto Error
= index(std::move(*Cmd
)))
166 elog("Indexing {0} failed: {1}", Path
, std::move(Error
));
168 T
.QueuePri
= IndexFile
;
169 T
.ThreadPri
= IndexingPriority
;
170 T
.Tag
= std::move(Tag
);
175 void BackgroundIndex::boostRelated(llvm::StringRef Path
) {
176 if (isHeaderFile(Path
))
177 Queue
.boost(filenameWithoutExtension(Path
), IndexBoostedFile
);
180 /// Given index results from a TU, only update symbols coming from files that
181 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new
182 /// index information on IndexStorage.
183 void BackgroundIndex::update(
184 llvm::StringRef MainFile
, IndexFileIn Index
,
185 const llvm::StringMap
<ShardVersion
> &ShardVersionsSnapshot
,
188 llvm::StringMap
<std::pair
<Path
, FileDigest
>> FilesToUpdate
;
189 // Note that sources do not contain any information regarding missing headers,
190 // since we don't even know what absolute path they should fall in.
191 for (const auto &IndexIt
: *Index
.Sources
) {
192 const auto &IGN
= IndexIt
.getValue();
193 auto AbsPath
= URI::resolve(IGN
.URI
, MainFile
);
195 elog("Failed to resolve URI: {0}", AbsPath
.takeError());
198 const auto DigestIt
= ShardVersionsSnapshot
.find(*AbsPath
);
199 // File has different contents, or indexing was successful this time.
200 if (DigestIt
== ShardVersionsSnapshot
.end() ||
201 DigestIt
->getValue().Digest
!= IGN
.Digest
||
202 (DigestIt
->getValue().HadErrors
&& !HadErrors
))
203 FilesToUpdate
[IGN
.URI
] = {std::move(*AbsPath
), IGN
.Digest
};
206 // Shard slabs into files.
207 FileShardedIndex
ShardedIndex(std::move(Index
));
209 // Build and store new slabs for each updated file.
210 for (const auto &FileIt
: FilesToUpdate
) {
211 auto Uri
= FileIt
.first();
212 auto IF
= ShardedIndex
.getShard(Uri
);
213 assert(IF
&& "no shard for file in Index.Sources?");
214 PathRef Path
= FileIt
.getValue().first
;
216 // Only store command line hash for main files of the TU, since our
217 // current model keeps only one version of a header file.
218 if (Path
!= MainFile
)
221 // We need to store shards before updating the index, since the latter
223 // FIXME: Also skip serializing the shard if it is already up-to-date.
224 if (auto Error
= IndexStorageFactory(Path
)->storeShard(Path
, *IF
))
225 elog("Failed to write background-index shard for file {0}: {1}", Path
,
229 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
230 const auto &Hash
= FileIt
.getValue().second
;
231 auto DigestIt
= ShardVersions
.try_emplace(Path
);
232 ShardVersion
&SV
= DigestIt
.first
->second
;
233 // Skip if file is already up to date, unless previous index was broken
234 // and this one is not.
235 if (!DigestIt
.second
&& SV
.Digest
== Hash
&& SV
.HadErrors
&& !HadErrors
)
238 SV
.HadErrors
= HadErrors
;
240 // This can override a newer version that is added in another thread, if
241 // this thread sees the older version but finishes later. This should be
243 IndexedSymbols
.update(
244 Uri
, std::make_unique
<SymbolSlab
>(std::move(*IF
->Symbols
)),
245 std::make_unique
<RefSlab
>(std::move(*IF
->Refs
)),
246 std::make_unique
<RelationSlab
>(std::move(*IF
->Relations
)),
252 llvm::Error
BackgroundIndex::index(tooling::CompileCommand Cmd
) {
253 trace::Span
Tracer("BackgroundIndex");
254 SPAN_ATTACH(Tracer
, "file", Cmd
.Filename
);
255 auto AbsolutePath
= getAbsolutePath(Cmd
);
257 auto FS
= TFS
.view(Cmd
.Directory
);
258 auto Buf
= FS
->getBufferForFile(AbsolutePath
);
260 return llvm::errorCodeToError(Buf
.getError());
261 auto Hash
= digest(Buf
->get()->getBuffer());
263 // Take a snapshot of the versions to avoid locking for each file in the TU.
264 llvm::StringMap
<ShardVersion
> ShardVersionsSnapshot
;
266 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
267 ShardVersionsSnapshot
= ShardVersions
;
270 vlog("Indexing {0} (digest:={1})", Cmd
.Filename
, llvm::toHex(Hash
));
273 Inputs
.CompileCommand
= std::move(Cmd
);
274 IgnoreDiagnostics IgnoreDiags
;
275 auto CI
= buildCompilerInvocation(Inputs
, IgnoreDiags
);
277 return error("Couldn't build compiler invocation");
280 prepareCompilerInstance(std::move(CI
), /*Preamble=*/nullptr,
281 std::move(*Buf
), std::move(FS
), IgnoreDiags
);
283 return error("Couldn't build compiler instance");
285 SymbolCollector::Options IndexOpts
;
286 // Creates a filter to not collect index results from files with unchanged
288 IndexOpts
.FileFilter
= [&ShardVersionsSnapshot
](const SourceManager
&SM
,
290 const auto *F
= SM
.getFileEntryForID(FID
);
292 return false; // Skip invalid files.
293 auto AbsPath
= getCanonicalPath(F
, SM
);
295 return false; // Skip files without absolute path.
296 auto Digest
= digestFile(SM
, FID
);
299 auto D
= ShardVersionsSnapshot
.find(*AbsPath
);
300 if (D
!= ShardVersionsSnapshot
.end() && D
->second
.Digest
== Digest
&&
301 !D
->second
.HadErrors
)
302 return false; // Skip files that haven't changed, without errors.
305 IndexOpts
.CollectMainFileRefs
= true;
308 auto Action
= createStaticIndexingAction(
309 IndexOpts
, [&](SymbolSlab S
) { Index
.Symbols
= std::move(S
); },
310 [&](RefSlab R
) { Index
.Refs
= std::move(R
); },
311 [&](RelationSlab R
) { Index
.Relations
= std::move(R
); },
312 [&](IncludeGraph IG
) { Index
.Sources
= std::move(IG
); });
314 // We're going to run clang here, and it could potentially crash.
315 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
316 // but the leaky "recovery" is pretty scary too in a long-running process.
317 // If crashes are a real problem, maybe we should fork a child process.
319 const FrontendInputFile
&Input
= Clang
->getFrontendOpts().Inputs
.front();
320 if (!Action
->BeginSourceFile(*Clang
, Input
))
321 return error("BeginSourceFile() failed");
322 if (llvm::Error Err
= Action
->Execute())
325 Action
->EndSourceFile();
327 Index
.Cmd
= Inputs
.CompileCommand
;
328 assert(Index
.Symbols
&& Index
.Refs
&& Index
.Sources
&&
329 "Symbols, Refs and Sources must be set.");
331 log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
332 Inputs
.CompileCommand
.Filename
, Index
.Symbols
->size(),
333 Index
.Refs
->numRefs(), Index
.Sources
->size());
334 SPAN_ATTACH(Tracer
, "symbols", int(Index
.Symbols
->size()));
335 SPAN_ATTACH(Tracer
, "refs", int(Index
.Refs
->numRefs()));
336 SPAN_ATTACH(Tracer
, "sources", int(Index
.Sources
->size()));
338 bool HadErrors
= Clang
->hasDiagnostics() &&
339 Clang
->getDiagnostics().hasUncompilableErrorOccurred();
341 log("Failed to compile {0}, index may be incomplete", AbsolutePath
);
342 for (auto &It
: *Index
.Sources
)
343 It
.second
.Flags
|= IncludeGraphNode::SourceFlag::HadErrors
;
345 update(AbsolutePath
, std::move(Index
), ShardVersionsSnapshot
, HadErrors
);
347 Rebuilder
.indexedTU();
348 return llvm::Error::success();
351 // Restores shards for \p MainFiles from index storage. Then checks staleness of
352 // those shards and returns a list of TUs that needs to be indexed to update
354 std::vector
<std::string
>
355 BackgroundIndex::loadProject(std::vector
<std::string
> MainFiles
) {
356 // Drop files where background indexing is disabled in config.
358 llvm::erase_if(MainFiles
, [&](const std::string
&TU
) {
359 // Load the config for each TU, as indexing may be selectively enabled.
360 WithContext
WithProvidedContext(ContextProvider(TU
));
361 return Config::current().Index
.Background
==
362 Config::BackgroundPolicy::Skip
;
364 Rebuilder
.startLoading();
365 // Load shards for all of the mainfiles.
366 const std::vector
<LoadedShard
> Result
=
367 loadIndexShards(MainFiles
, IndexStorageFactory
, CDB
);
368 size_t LoadedShards
= 0;
370 // Update in-memory state.
371 std::lock_guard
<std::mutex
> Lock(ShardVersionsMu
);
372 for (auto &LS
: Result
) {
377 ? std::make_unique
<SymbolSlab
>(std::move(*LS
.Shard
->Symbols
))
379 auto RS
= LS
.Shard
->Refs
380 ? std::make_unique
<RefSlab
>(std::move(*LS
.Shard
->Refs
))
384 ? std::make_unique
<RelationSlab
>(std::move(*LS
.Shard
->Relations
))
386 ShardVersion
&SV
= ShardVersions
[LS
.AbsolutePath
];
387 SV
.Digest
= LS
.Digest
;
388 SV
.HadErrors
= LS
.HadErrors
;
391 IndexedSymbols
.update(URI::create(LS
.AbsolutePath
).toString(),
392 std::move(SS
), std::move(RS
), std::move(RelS
),
396 Rebuilder
.loadedShard(LoadedShards
);
397 Rebuilder
.doneLoading();
399 auto FS
= TFS
.view(/*CWD=*/llvm::None
);
400 llvm::DenseSet
<PathRef
> TUsToIndex
;
401 // We'll accept data from stale shards, but ensure the files get reindexed
403 for (auto &LS
: Result
) {
404 if (!shardIsStale(LS
, FS
.get()))
406 PathRef TUForFile
= LS
.DependentTU
;
407 assert(!TUForFile
.empty() && "File without a TU!");
409 // FIXME: Currently, we simply schedule indexing on a TU whenever any of
410 // its dependencies needs re-indexing. We might do it smarter by figuring
411 // out a minimal set of TUs that will cover all the stale dependencies.
412 // FIXME: Try looking at other TUs if no compile commands are available
413 // for this TU, i.e TU was deleted after we performed indexing.
414 TUsToIndex
.insert(TUForFile
);
417 return {TUsToIndex
.begin(), TUsToIndex
.end()};
420 void BackgroundIndex::profile(MemoryTree
&MT
) const {
421 IndexedSymbols
.profile(MT
.child("slabs"));
422 // We don't want to mix memory used by index and symbols, so call base class.
423 MT
.child("index").addUsage(SwapIndex::estimateMemoryUsage());
425 } // namespace clangd