1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "CollectMacros.h"
11 #include "ParsedAST.h"
12 #include "clang-include-cleaner/Record.h"
13 #include "index/Index.h"
14 #include "index/MemIndex.h"
15 #include "index/Merge.h"
16 #include "index/Ref.h"
17 #include "index/Relation.h"
18 #include "index/Serialization.h"
19 #include "index/Symbol.h"
20 #include "index/SymbolCollector.h"
21 #include "index/SymbolID.h"
22 #include "index/SymbolOrigin.h"
23 #include "index/dex/Dex.h"
24 #include "support/Logger.h"
25 #include "support/MemoryTree.h"
26 #include "support/Path.h"
27 #include "clang/AST/ASTContext.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/IndexingOptions.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
46 SlabTuple
indexSymbols(ASTContext
&AST
, Preprocessor
&PP
,
47 llvm::ArrayRef
<Decl
*> DeclsToIndex
,
48 const MainFileMacros
*MacroRefsToIndex
,
49 const include_cleaner::PragmaIncludes
&PI
,
50 bool IsIndexMainAST
, llvm::StringRef Version
,
51 bool CollectMainFileRefs
) {
52 SymbolCollector::Options CollectorOpts
;
53 CollectorOpts
.CollectIncludePath
= true;
54 CollectorOpts
.PragmaIncludes
= &PI
;
55 CollectorOpts
.CountReferences
= false;
56 CollectorOpts
.Origin
=
57 IsIndexMainAST
? SymbolOrigin::Open
: SymbolOrigin::Preamble
;
58 CollectorOpts
.CollectMainFileRefs
= CollectMainFileRefs
;
59 // We want stdlib implementation details in the index only if we've opened the
60 // file in question. This does means xrefs won't work, though.
61 CollectorOpts
.CollectReserved
= IsIndexMainAST
;
63 index::IndexingOptions IndexOpts
;
64 // We only need declarations, because we don't count references.
65 IndexOpts
.SystemSymbolFilter
=
66 index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly
;
67 // We index function-local classes and its member functions only.
68 IndexOpts
.IndexFunctionLocals
= true;
70 // We only collect refs when indexing main AST.
71 CollectorOpts
.RefFilter
= RefKind::All
;
72 // Comments for main file can always be obtained from sema, do not store
74 CollectorOpts
.StoreAllDocumentation
= false;
76 IndexOpts
.IndexMacrosInPreprocessor
= true;
77 CollectorOpts
.CollectMacro
= true;
78 CollectorOpts
.StoreAllDocumentation
= true;
81 SymbolCollector
Collector(std::move(CollectorOpts
));
82 Collector
.setPreprocessor(PP
);
83 index::indexTopLevelDecls(AST
, PP
, DeclsToIndex
, Collector
, IndexOpts
);
85 Collector
.handleMacros(*MacroRefsToIndex
);
87 const auto &SM
= AST
.getSourceManager();
88 const auto MainFileEntry
= SM
.getFileEntryRefForID(SM
.getMainFileID());
89 std::string FileName
=
90 std::string(MainFileEntry
? MainFileEntry
->getName() : "");
92 auto Syms
= Collector
.takeSymbols();
93 auto Refs
= Collector
.takeRefs();
94 auto Relations
= Collector
.takeRelations();
96 vlog("indexed {0} AST for {1} version {2}:\n"
97 " symbol slab: {3} symbols, {4} bytes\n"
98 " ref slab: {5} symbols, {6} refs, {7} bytes\n"
99 " relations slab: {8} relations, {9} bytes",
100 IsIndexMainAST
? "file" : "preamble", FileName
, Version
, Syms
.size(),
101 Syms
.bytes(), Refs
.size(), Refs
.numRefs(), Refs
.bytes(),
102 Relations
.size(), Relations
.bytes());
103 return std::make_tuple(std::move(Syms
), std::move(Refs
),
104 std::move(Relations
));
107 // We keep only the node "U" and its edges. Any node other than "U" will be
108 // empty in the resultant graph.
109 IncludeGraph
getSubGraph(llvm::StringRef URI
, const IncludeGraph
&FullGraph
) {
112 auto Entry
= IG
.try_emplace(URI
).first
;
113 auto &Node
= Entry
->getValue();
114 Node
= FullGraph
.lookup(Entry
->getKey());
115 Node
.URI
= Entry
->getKey();
117 // URIs inside nodes must point into the keys of the same IncludeGraph.
118 for (auto &Include
: Node
.DirectIncludes
) {
119 auto I
= IG
.try_emplace(Include
).first
;
120 I
->getValue().URI
= I
->getKey();
121 Include
= I
->getKey();
127 FileShardedIndex::FileShardedIndex(IndexFileIn Input
)
128 : Index(std::move(Input
)) {
129 // Used to build RelationSlabs.
130 llvm::DenseMap
<SymbolID
, FileShard
*> SymbolIDToFile
;
132 // Attribute each Symbol to both their declaration and definition locations.
134 for (const auto &S
: *Index
.Symbols
) {
135 auto It
= Shards
.try_emplace(S
.CanonicalDeclaration
.FileURI
);
136 It
.first
->getValue().Symbols
.insert(&S
);
137 SymbolIDToFile
[S
.ID
] = &It
.first
->getValue();
138 // Only bother if definition file is different than declaration file.
140 S
.Definition
.FileURI
!= S
.CanonicalDeclaration
.FileURI
) {
141 auto It
= Shards
.try_emplace(S
.Definition
.FileURI
);
142 It
.first
->getValue().Symbols
.insert(&S
);
146 // Attribute references into each file they occured in.
148 for (const auto &SymRefs
: *Index
.Refs
) {
149 for (const auto &R
: SymRefs
.second
) {
150 const auto It
= Shards
.try_emplace(R
.Location
.FileURI
);
151 It
.first
->getValue().Refs
.insert(&R
);
152 RefToSymID
[&R
] = SymRefs
.first
;
156 // The Subject and/or Object shards might be part of multiple TUs. In
157 // such cases there will be a race and the last TU to write the shard
158 // will win and all the other relations will be lost. To avoid this,
159 // we store relations in both shards. A race might still happen if the
160 // same translation unit produces different relations under different
161 // configurations, but that's something clangd doesn't handle in general.
162 if (Index
.Relations
) {
163 for (const auto &R
: *Index
.Relations
) {
164 // FIXME: RelationSlab shouldn't contain dangling relations.
165 FileShard
*SubjectFile
= SymbolIDToFile
.lookup(R
.Subject
);
166 FileShard
*ObjectFile
= SymbolIDToFile
.lookup(R
.Object
);
168 SubjectFile
->Relations
.insert(&R
);
169 if (ObjectFile
&& ObjectFile
!= SubjectFile
)
170 ObjectFile
->Relations
.insert(&R
);
173 // Store only the direct includes of a file in a shard.
175 const auto &FullGraph
= *Index
.Sources
;
176 for (const auto &It
: FullGraph
) {
177 auto ShardIt
= Shards
.try_emplace(It
.first());
178 ShardIt
.first
->getValue().IG
= getSubGraph(It
.first(), FullGraph
);
182 std::vector
<llvm::StringRef
> FileShardedIndex::getAllSources() const {
183 // It should be enough to construct a vector with {Shards.keys().begin(),
184 // Shards.keys().end()} but MSVC fails to compile that.
185 std::vector
<PathRef
> Result
;
186 Result
.reserve(Shards
.size());
187 for (auto Key
: Shards
.keys())
188 Result
.push_back(Key
);
192 std::optional
<IndexFileIn
>
193 FileShardedIndex::getShard(llvm::StringRef Uri
) const {
194 auto It
= Shards
.find(Uri
);
195 if (It
== Shards
.end())
199 IF
.Sources
= It
->getValue().IG
;
202 SymbolSlab::Builder SymB
;
203 for (const auto *S
: It
->getValue().Symbols
)
205 IF
.Symbols
= std::move(SymB
).build();
207 RefSlab::Builder RefB
;
208 for (const auto *Ref
: It
->getValue().Refs
) {
209 auto SID
= RefToSymID
.lookup(Ref
);
210 RefB
.insert(SID
, *Ref
);
212 IF
.Refs
= std::move(RefB
).build();
214 RelationSlab::Builder RelB
;
215 for (const auto *Rel
: It
->getValue().Relations
) {
218 IF
.Relations
= std::move(RelB
).build();
219 // Explicit move here is needed by some compilers.
220 return std::move(IF
);
223 SlabTuple
indexMainDecls(ParsedAST
&AST
) {
225 AST
.getASTContext(), AST
.getPreprocessor(), AST
.getLocalTopLevelDecls(),
226 &AST
.getMacros(), AST
.getPragmaIncludes(),
227 /*IsIndexMainAST=*/true, AST
.version(), /*CollectMainFileRefs=*/true);
230 SlabTuple
indexHeaderSymbols(llvm::StringRef Version
, ASTContext
&AST
,
232 const include_cleaner::PragmaIncludes
&PI
) {
233 std::vector
<Decl
*> DeclsToIndex(
234 AST
.getTranslationUnitDecl()->decls().begin(),
235 AST
.getTranslationUnitDecl()->decls().end());
236 return indexSymbols(AST
, PP
, DeclsToIndex
,
237 /*MainFileMacros=*/nullptr, PI
,
238 /*IsIndexMainAST=*/false, Version
,
239 /*CollectMainFileRefs=*/false);
242 FileSymbols::FileSymbols(IndexContents IdxContents
, bool SupportContainedRefs
)
243 : IdxContents(IdxContents
), SupportContainedRefs(SupportContainedRefs
) {}
245 void FileSymbols::update(llvm::StringRef Key
,
246 std::unique_ptr
<SymbolSlab
> Symbols
,
247 std::unique_ptr
<RefSlab
> Refs
,
248 std::unique_ptr
<RelationSlab
> Relations
,
249 bool CountReferences
) {
250 std::lock_guard
<std::mutex
> Lock(Mutex
);
253 SymbolsSnapshot
.erase(Key
);
255 SymbolsSnapshot
[Key
] = std::move(Symbols
);
257 RefsSnapshot
.erase(Key
);
259 RefSlabAndCountReferences Item
;
260 Item
.CountReferences
= CountReferences
;
261 Item
.Slab
= std::move(Refs
);
262 RefsSnapshot
[Key
] = std::move(Item
);
265 RelationsSnapshot
.erase(Key
);
267 RelationsSnapshot
[Key
] = std::move(Relations
);
270 std::unique_ptr
<SymbolIndex
>
271 FileSymbols::buildIndex(IndexType Type
, DuplicateHandling DuplicateHandle
,
273 std::vector
<std::shared_ptr
<SymbolSlab
>> SymbolSlabs
;
274 std::vector
<std::shared_ptr
<RefSlab
>> RefSlabs
;
275 std::vector
<std::shared_ptr
<RelationSlab
>> RelationSlabs
;
276 llvm::StringSet
<> Files
;
277 std::vector
<RefSlab
*> MainFileRefs
;
279 std::lock_guard
<std::mutex
> Lock(Mutex
);
280 for (const auto &FileAndSymbols
: SymbolsSnapshot
) {
281 SymbolSlabs
.push_back(FileAndSymbols
.second
);
282 Files
.insert(FileAndSymbols
.first());
284 for (const auto &FileAndRefs
: RefsSnapshot
) {
285 RefSlabs
.push_back(FileAndRefs
.second
.Slab
);
286 Files
.insert(FileAndRefs
.first());
287 if (FileAndRefs
.second
.CountReferences
)
288 MainFileRefs
.push_back(RefSlabs
.back().get());
290 for (const auto &FileAndRelations
: RelationsSnapshot
) {
291 Files
.insert(FileAndRelations
.first());
292 RelationSlabs
.push_back(FileAndRelations
.second
);
296 *Version
= this->Version
;
298 std::vector
<const Symbol
*> AllSymbols
;
299 std::vector
<Symbol
> SymsStorage
;
300 switch (DuplicateHandle
) {
301 case DuplicateHandling::Merge
: {
302 llvm::DenseMap
<SymbolID
, Symbol
> Merged
;
303 for (const auto &Slab
: SymbolSlabs
) {
304 for (const auto &Sym
: *Slab
) {
305 assert(Sym
.References
== 0 &&
306 "Symbol with non-zero references sent to FileSymbols");
307 auto I
= Merged
.try_emplace(Sym
.ID
, Sym
);
309 I
.first
->second
= mergeSymbol(I
.first
->second
, Sym
);
312 for (const RefSlab
*Refs
: MainFileRefs
)
313 for (const auto &Sym
: *Refs
) {
314 auto It
= Merged
.find(Sym
.first
);
315 // This might happen while background-index is still running.
316 if (It
== Merged
.end())
318 It
->getSecond().References
+= Sym
.second
.size();
320 SymsStorage
.reserve(Merged
.size());
321 for (auto &Sym
: Merged
) {
322 SymsStorage
.push_back(std::move(Sym
.second
));
323 AllSymbols
.push_back(&SymsStorage
.back());
327 case DuplicateHandling::PickOne
: {
328 llvm::DenseSet
<SymbolID
> AddedSymbols
;
329 for (const auto &Slab
: SymbolSlabs
)
330 for (const auto &Sym
: *Slab
) {
331 assert(Sym
.References
== 0 &&
332 "Symbol with non-zero references sent to FileSymbols");
333 if (AddedSymbols
.insert(Sym
.ID
).second
)
334 AllSymbols
.push_back(&Sym
);
340 std::vector
<Ref
> RefsStorage
; // Contiguous ranges for each SymbolID.
341 llvm::DenseMap
<SymbolID
, llvm::ArrayRef
<Ref
>> AllRefs
;
343 llvm::DenseMap
<SymbolID
, llvm::SmallVector
<Ref
, 4>> MergedRefs
;
345 for (const auto &RefSlab
: RefSlabs
)
346 for (const auto &Sym
: *RefSlab
) {
347 MergedRefs
[Sym
.first
].append(Sym
.second
.begin(), Sym
.second
.end());
348 Count
+= Sym
.second
.size();
350 RefsStorage
.reserve(Count
);
351 AllRefs
.reserve(MergedRefs
.size());
352 for (auto &Sym
: MergedRefs
) {
353 auto &SymRefs
= Sym
.second
;
354 // Sorting isn't required, but yields more stable results over rebuilds.
356 llvm::copy(SymRefs
, back_inserter(RefsStorage
));
359 llvm::ArrayRef
<Ref
>(&RefsStorage
[RefsStorage
.size() - SymRefs
.size()],
364 std::vector
<Relation
> AllRelations
;
365 for (const auto &RelationSlab
: RelationSlabs
) {
366 for (const auto &R
: *RelationSlab
)
367 AllRelations
.push_back(R
);
369 // Sort relations and remove duplicates that could arise due to
370 // relations being stored in both the shards containing their
371 // subject and object.
372 llvm::sort(AllRelations
);
373 AllRelations
.erase(std::unique(AllRelations
.begin(), AllRelations
.end()),
377 RefsStorage
.size() * sizeof(Ref
) + SymsStorage
.size() * sizeof(Symbol
);
378 for (const auto &Slab
: SymbolSlabs
)
379 StorageSize
+= Slab
->bytes();
380 for (const auto &RefSlab
: RefSlabs
)
381 StorageSize
+= RefSlab
->bytes();
383 // Index must keep the slabs and contiguous ranges alive.
385 case IndexType::Light
:
386 return std::make_unique
<MemIndex
>(
387 llvm::make_pointee_range(AllSymbols
), std::move(AllRefs
),
388 std::move(AllRelations
), std::move(Files
), IdxContents
,
389 std::make_tuple(std::move(SymbolSlabs
), std::move(RefSlabs
),
390 std::move(RefsStorage
), std::move(SymsStorage
)),
392 case IndexType::Heavy
:
393 return std::make_unique
<dex::Dex
>(
394 llvm::make_pointee_range(AllSymbols
), std::move(AllRefs
),
395 std::move(AllRelations
), std::move(Files
), IdxContents
,
396 std::make_tuple(std::move(SymbolSlabs
), std::move(RefSlabs
),
397 std::move(RefsStorage
), std::move(SymsStorage
)),
398 StorageSize
, SupportContainedRefs
);
400 llvm_unreachable("Unknown clangd::IndexType");
403 void FileSymbols::profile(MemoryTree
&MT
) const {
404 std::lock_guard
<std::mutex
> Lock(Mutex
);
405 for (const auto &SymSlab
: SymbolsSnapshot
) {
406 MT
.detail(SymSlab
.first())
408 .addUsage(SymSlab
.second
->bytes());
410 for (const auto &RefSlab
: RefsSnapshot
) {
411 MT
.detail(RefSlab
.first())
413 .addUsage(RefSlab
.second
.Slab
->bytes());
415 for (const auto &RelSlab
: RelationsSnapshot
) {
416 MT
.detail(RelSlab
.first())
418 .addUsage(RelSlab
.second
->bytes());
422 FileIndex::FileIndex(bool SupportContainedRefs
)
423 : MergedIndex(&MainFileIndex
, &PreambleIndex
),
424 PreambleSymbols(IndexContents::Symbols
| IndexContents::Relations
,
425 SupportContainedRefs
),
426 PreambleIndex(std::make_unique
<MemIndex
>()),
427 MainFileSymbols(IndexContents::All
, SupportContainedRefs
),
428 MainFileIndex(std::make_unique
<MemIndex
>()) {}
430 void FileIndex::updatePreamble(IndexFileIn IF
) {
431 FileShardedIndex
ShardedIndex(std::move(IF
));
432 for (auto Uri
: ShardedIndex
.getAllSources()) {
433 auto IF
= ShardedIndex
.getShard(Uri
);
434 // We are using the key received from ShardedIndex, so it should always
437 PreambleSymbols
.update(
438 Uri
, std::make_unique
<SymbolSlab
>(std::move(*IF
->Symbols
)),
439 std::make_unique
<RefSlab
>(),
440 std::make_unique
<RelationSlab
>(std::move(*IF
->Relations
)),
441 /*CountReferences=*/false);
443 size_t IndexVersion
= 0;
444 auto NewIndex
= PreambleSymbols
.buildIndex(
445 IndexType::Heavy
, DuplicateHandling::PickOne
, &IndexVersion
);
447 std::lock_guard
<std::mutex
> Lock(UpdateIndexMu
);
448 if (IndexVersion
<= PreambleIndexVersion
) {
449 // We lost the race, some other thread built a later version.
452 PreambleIndexVersion
= IndexVersion
;
453 PreambleIndex
.reset(std::move(NewIndex
));
455 "Build dynamic index for header symbols with estimated memory usage of "
457 PreambleIndex
.estimateMemoryUsage());
461 void FileIndex::updatePreamble(PathRef Path
, llvm::StringRef Version
,
462 ASTContext
&AST
, Preprocessor
&PP
,
463 const include_cleaner::PragmaIncludes
&PI
) {
465 std::tie(IF
.Symbols
, std::ignore
, IF
.Relations
) =
466 indexHeaderSymbols(Version
, AST
, PP
, PI
);
467 updatePreamble(std::move(IF
));
470 void FileIndex::updateMain(PathRef Path
, ParsedAST
&AST
) {
471 auto Contents
= indexMainDecls(AST
);
472 MainFileSymbols
.update(
473 URI::create(Path
).toString(),
474 std::make_unique
<SymbolSlab
>(std::move(std::get
<0>(Contents
))),
475 std::make_unique
<RefSlab
>(std::move(std::get
<1>(Contents
))),
476 std::make_unique
<RelationSlab
>(std::move(std::get
<2>(Contents
))),
477 /*CountReferences=*/true);
478 size_t IndexVersion
= 0;
479 auto NewIndex
= MainFileSymbols
.buildIndex(
480 IndexType::Light
, DuplicateHandling::Merge
, &IndexVersion
);
482 std::lock_guard
<std::mutex
> Lock(UpdateIndexMu
);
483 if (IndexVersion
<= MainIndexVersion
) {
484 // We lost the race, some other thread built a later version.
487 MainIndexVersion
= IndexVersion
;
488 MainFileIndex
.reset(std::move(NewIndex
));
490 "Build dynamic index for main-file symbols with estimated memory usage "
492 MainFileIndex
.estimateMemoryUsage());
496 void FileIndex::profile(MemoryTree
&MT
) const {
497 PreambleSymbols
.profile(MT
.child("preamble").child("slabs"));
500 .addUsage(PreambleIndex
.estimateMemoryUsage());
501 MainFileSymbols
.profile(MT
.child("main_file").child("slabs"));
502 MT
.child("main_file")
504 .addUsage(MainFileIndex
.estimateMemoryUsage());
506 } // namespace clangd