[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / clang-tools-extra / clangd / index / Background.cpp
blobb5349468eb24f9155c67e97ecad9d2feae8c127e
1 //===-- Background.cpp - Build an index in a background thread ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "index/Background.h"
10 #include "Compiler.h"
11 #include "Config.h"
12 #include "Headers.h"
13 #include "SourceCode.h"
14 #include "URI.h"
15 #include "index/BackgroundIndexLoader.h"
16 #include "index/FileIndex.h"
17 #include "index/Index.h"
18 #include "index/IndexAction.h"
19 #include "index/MemIndex.h"
20 #include "index/Ref.h"
21 #include "index/Relation.h"
22 #include "index/Serialization.h"
23 #include "index/Symbol.h"
24 #include "index/SymbolCollector.h"
25 #include "support/Context.h"
26 #include "support/Logger.h"
27 #include "support/Path.h"
28 #include "support/Threading.h"
29 #include "support/ThreadsafeFS.h"
30 #include "support/Trace.h"
31 #include "clang/Basic/SourceLocation.h"
32 #include "clang/Basic/SourceManager.h"
33 #include "clang/Frontend/FrontendAction.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseSet.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/StringMap.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Threading.h"
42 #include "llvm/Support/xxhash.h"
44 #include <algorithm>
45 #include <atomic>
46 #include <chrono>
47 #include <condition_variable>
48 #include <cstddef>
49 #include <memory>
50 #include <mutex>
51 #include <numeric>
52 #include <queue>
53 #include <random>
54 #include <string>
55 #include <thread>
56 #include <utility>
57 #include <vector>
59 namespace clang {
60 namespace clangd {
61 namespace {
63 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
64 // relative to Cmd.Directory, which might not be the same as current working
65 // directory.
66 llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
67 llvm::SmallString<128> AbsolutePath;
68 if (llvm::sys::path::is_absolute(Cmd.Filename)) {
69 AbsolutePath = Cmd.Filename;
70 } else {
71 AbsolutePath = Cmd.Directory;
72 llvm::sys::path::append(AbsolutePath, Cmd.Filename);
73 llvm::sys::path::remove_dots(AbsolutePath, true);
75 return AbsolutePath;
78 bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
79 auto Buf = FS->getBufferForFile(LS.AbsolutePath);
80 if (!Buf) {
81 vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
82 LS.AbsolutePath, Buf.getError().message());
83 // There is no point in indexing an unreadable file.
84 return false;
86 return digest(Buf->get()->getBuffer()) != LS.Digest;
89 } // namespace
91 BackgroundIndex::BackgroundIndex(
92 const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB,
93 BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts)
94 : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB),
95 IndexingPriority(Opts.IndexingPriority),
96 ContextProvider(std::move(Opts.ContextProvider)),
97 IndexedSymbols(IndexContents::All),
98 Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize),
99 IndexStorageFactory(std::move(IndexStorageFactory)),
100 Queue(std::move(Opts.OnProgress)),
101 CommandsChanged(
102 CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
103 enqueue(ChangedFiles);
104 })) {
105 assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero.");
106 assert(this->IndexStorageFactory && "Storage factory can not be null!");
107 for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) {
108 ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1),
109 [this, Ctx(Context::current().clone())]() mutable {
110 WithContext BGContext(std::move(Ctx));
111 Queue.work([&] { Rebuilder.idle(); });
116 BackgroundIndex::~BackgroundIndex() {
117 stop();
118 ThreadPool.wait();
121 BackgroundQueue::Task BackgroundIndex::changedFilesTask(
122 const std::vector<std::string> &ChangedFiles) {
123 BackgroundQueue::Task T([this, ChangedFiles] {
124 trace::Span Tracer("BackgroundIndexEnqueue");
126 llvm::Optional<WithContext> WithProvidedContext;
127 if (ContextProvider)
128 WithProvidedContext.emplace(ContextProvider(/*Path=*/""));
130 // We're doing this asynchronously, because we'll read shards here too.
131 log("Enqueueing {0} commands for indexing", ChangedFiles.size());
132 SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
134 auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
135 // Run indexing for files that need to be updated.
136 std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
137 std::mt19937(std::random_device{}()));
138 std::vector<BackgroundQueue::Task> Tasks;
139 Tasks.reserve(NeedsReIndexing.size());
140 for (const auto &File : NeedsReIndexing)
141 Tasks.push_back(indexFileTask(std::move(File)));
142 Queue.append(std::move(Tasks));
145 T.QueuePri = LoadShards;
146 T.ThreadPri = llvm::ThreadPriority::Default;
147 return T;
150 static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
151 Path = llvm::sys::path::filename(Path);
152 return Path.drop_back(llvm::sys::path::extension(Path).size());
155 BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
156 std::string Tag = filenameWithoutExtension(Path).str();
157 uint64_t Key = llvm::xxHash64(Path);
158 BackgroundQueue::Task T([this, Path(std::move(Path))] {
159 llvm::Optional<WithContext> WithProvidedContext;
160 if (ContextProvider)
161 WithProvidedContext.emplace(ContextProvider(Path));
162 auto Cmd = CDB.getCompileCommand(Path);
163 if (!Cmd)
164 return;
165 if (auto Error = index(std::move(*Cmd)))
166 elog("Indexing {0} failed: {1}", Path, std::move(Error));
168 T.QueuePri = IndexFile;
169 T.ThreadPri = IndexingPriority;
170 T.Tag = std::move(Tag);
171 T.Key = Key;
172 return T;
175 void BackgroundIndex::boostRelated(llvm::StringRef Path) {
176 if (isHeaderFile(Path))
177 Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
180 /// Given index results from a TU, only update symbols coming from files that
181 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new
182 /// index information on IndexStorage.
183 void BackgroundIndex::update(
184 llvm::StringRef MainFile, IndexFileIn Index,
185 const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
186 bool HadErrors) {
187 // Keys are URIs.
188 llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
189 // Note that sources do not contain any information regarding missing headers,
190 // since we don't even know what absolute path they should fall in.
191 for (const auto &IndexIt : *Index.Sources) {
192 const auto &IGN = IndexIt.getValue();
193 auto AbsPath = URI::resolve(IGN.URI, MainFile);
194 if (!AbsPath) {
195 elog("Failed to resolve URI: {0}", AbsPath.takeError());
196 continue;
198 const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
199 // File has different contents, or indexing was successful this time.
200 if (DigestIt == ShardVersionsSnapshot.end() ||
201 DigestIt->getValue().Digest != IGN.Digest ||
202 (DigestIt->getValue().HadErrors && !HadErrors))
203 FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
206 // Shard slabs into files.
207 FileShardedIndex ShardedIndex(std::move(Index));
209 // Build and store new slabs for each updated file.
210 for (const auto &FileIt : FilesToUpdate) {
211 auto Uri = FileIt.first();
212 auto IF = ShardedIndex.getShard(Uri);
213 assert(IF && "no shard for file in Index.Sources?");
214 PathRef Path = FileIt.getValue().first;
216 // Only store command line hash for main files of the TU, since our
217 // current model keeps only one version of a header file.
218 if (Path != MainFile)
219 IF->Cmd.reset();
221 // We need to store shards before updating the index, since the latter
222 // consumes slabs.
223 // FIXME: Also skip serializing the shard if it is already up-to-date.
224 if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF))
225 elog("Failed to write background-index shard for file {0}: {1}", Path,
226 std::move(Error));
229 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
230 const auto &Hash = FileIt.getValue().second;
231 auto DigestIt = ShardVersions.try_emplace(Path);
232 ShardVersion &SV = DigestIt.first->second;
233 // Skip if file is already up to date, unless previous index was broken
234 // and this one is not.
235 if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors)
236 continue;
237 SV.Digest = Hash;
238 SV.HadErrors = HadErrors;
240 // This can override a newer version that is added in another thread, if
241 // this thread sees the older version but finishes later. This should be
242 // rare in practice.
243 IndexedSymbols.update(
244 Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
245 std::make_unique<RefSlab>(std::move(*IF->Refs)),
246 std::make_unique<RelationSlab>(std::move(*IF->Relations)),
247 Path == MainFile);
252 llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
253 trace::Span Tracer("BackgroundIndex");
254 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
255 auto AbsolutePath = getAbsolutePath(Cmd);
257 auto FS = TFS.view(Cmd.Directory);
258 auto Buf = FS->getBufferForFile(AbsolutePath);
259 if (!Buf)
260 return llvm::errorCodeToError(Buf.getError());
261 auto Hash = digest(Buf->get()->getBuffer());
263 // Take a snapshot of the versions to avoid locking for each file in the TU.
264 llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
266 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
267 ShardVersionsSnapshot = ShardVersions;
270 vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
271 ParseInputs Inputs;
272 Inputs.TFS = &TFS;
273 Inputs.CompileCommand = std::move(Cmd);
274 IgnoreDiagnostics IgnoreDiags;
275 auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
276 if (!CI)
277 return error("Couldn't build compiler invocation");
279 auto Clang =
280 prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
281 std::move(*Buf), std::move(FS), IgnoreDiags);
282 if (!Clang)
283 return error("Couldn't build compiler instance");
285 SymbolCollector::Options IndexOpts;
286 // Creates a filter to not collect index results from files with unchanged
287 // digests.
288 IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
289 FileID FID) {
290 const auto *F = SM.getFileEntryForID(FID);
291 if (!F)
292 return false; // Skip invalid files.
293 auto AbsPath = getCanonicalPath(F, SM);
294 if (!AbsPath)
295 return false; // Skip files without absolute path.
296 auto Digest = digestFile(SM, FID);
297 if (!Digest)
298 return false;
299 auto D = ShardVersionsSnapshot.find(*AbsPath);
300 if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
301 !D->second.HadErrors)
302 return false; // Skip files that haven't changed, without errors.
303 return true;
305 IndexOpts.CollectMainFileRefs = true;
307 IndexFileIn Index;
308 auto Action = createStaticIndexingAction(
309 IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); },
310 [&](RefSlab R) { Index.Refs = std::move(R); },
311 [&](RelationSlab R) { Index.Relations = std::move(R); },
312 [&](IncludeGraph IG) { Index.Sources = std::move(IG); });
314 // We're going to run clang here, and it could potentially crash.
315 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
316 // but the leaky "recovery" is pretty scary too in a long-running process.
317 // If crashes are a real problem, maybe we should fork a child process.
319 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
320 if (!Action->BeginSourceFile(*Clang, Input))
321 return error("BeginSourceFile() failed");
322 if (llvm::Error Err = Action->Execute())
323 return Err;
325 Action->EndSourceFile();
327 Index.Cmd = Inputs.CompileCommand;
328 assert(Index.Symbols && Index.Refs && Index.Sources &&
329 "Symbols, Refs and Sources must be set.");
331 log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
332 Inputs.CompileCommand.Filename, Index.Symbols->size(),
333 Index.Refs->numRefs(), Index.Sources->size());
334 SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size()));
335 SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs()));
336 SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
338 bool HadErrors = Clang->hasDiagnostics() &&
339 Clang->getDiagnostics().hasUncompilableErrorOccurred();
340 if (HadErrors) {
341 log("Failed to compile {0}, index may be incomplete", AbsolutePath);
342 for (auto &It : *Index.Sources)
343 It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
345 update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
347 Rebuilder.indexedTU();
348 return llvm::Error::success();
351 // Restores shards for \p MainFiles from index storage. Then checks staleness of
352 // those shards and returns a list of TUs that needs to be indexed to update
353 // staleness.
354 std::vector<std::string>
355 BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
356 // Drop files where background indexing is disabled in config.
357 if (ContextProvider)
358 llvm::erase_if(MainFiles, [&](const std::string &TU) {
359 // Load the config for each TU, as indexing may be selectively enabled.
360 WithContext WithProvidedContext(ContextProvider(TU));
361 return Config::current().Index.Background ==
362 Config::BackgroundPolicy::Skip;
364 Rebuilder.startLoading();
365 // Load shards for all of the mainfiles.
366 const std::vector<LoadedShard> Result =
367 loadIndexShards(MainFiles, IndexStorageFactory, CDB);
368 size_t LoadedShards = 0;
370 // Update in-memory state.
371 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
372 for (auto &LS : Result) {
373 if (!LS.Shard)
374 continue;
375 auto SS =
376 LS.Shard->Symbols
377 ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols))
378 : nullptr;
379 auto RS = LS.Shard->Refs
380 ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs))
381 : nullptr;
382 auto RelS =
383 LS.Shard->Relations
384 ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations))
385 : nullptr;
386 ShardVersion &SV = ShardVersions[LS.AbsolutePath];
387 SV.Digest = LS.Digest;
388 SV.HadErrors = LS.HadErrors;
389 ++LoadedShards;
391 IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(),
392 std::move(SS), std::move(RS), std::move(RelS),
393 LS.CountReferences);
396 Rebuilder.loadedShard(LoadedShards);
397 Rebuilder.doneLoading();
399 auto FS = TFS.view(/*CWD=*/llvm::None);
400 llvm::DenseSet<PathRef> TUsToIndex;
401 // We'll accept data from stale shards, but ensure the files get reindexed
402 // soon.
403 for (auto &LS : Result) {
404 if (!shardIsStale(LS, FS.get()))
405 continue;
406 PathRef TUForFile = LS.DependentTU;
407 assert(!TUForFile.empty() && "File without a TU!");
409 // FIXME: Currently, we simply schedule indexing on a TU whenever any of
410 // its dependencies needs re-indexing. We might do it smarter by figuring
411 // out a minimal set of TUs that will cover all the stale dependencies.
412 // FIXME: Try looking at other TUs if no compile commands are available
413 // for this TU, i.e TU was deleted after we performed indexing.
414 TUsToIndex.insert(TUForFile);
417 return {TUsToIndex.begin(), TUsToIndex.end()};
420 void BackgroundIndex::profile(MemoryTree &MT) const {
421 IndexedSymbols.profile(MT.child("slabs"));
422 // We don't want to mix memory used by index and symbols, so call base class.
423 MT.child("index").addUsage(SwapIndex::estimateMemoryUsage());
425 } // namespace clangd
426 } // namespace clang