1 //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
10 // maintained at source-file granularity (e.g. with ASTs), and files can be
11 // updated dynamically.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
16 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
19 #include "clang-include-cleaner/Record.h"
20 #include "index/Index.h"
21 #include "index/Merge.h"
22 #include "index/Ref.h"
23 #include "index/Relation.h"
24 #include "index/Serialization.h"
25 #include "index/Symbol.h"
26 #include "support/MemoryTree.h"
27 #include "support/Path.h"
28 #include "clang/Lex/Preprocessor.h"
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/ADT/StringRef.h"
41 /// Select between in-memory index implementations, which have tradeoffs.
42 enum class IndexType
{
43 // MemIndex is trivially cheap to build, but has poor query performance.
45 // Dex is relatively expensive to build and uses more memory, but is fast.
49 /// How to handle duplicated symbols across multiple files.
50 enum class DuplicateHandling
{
51 // Pick a random symbol. Less accurate but faster.
53 // Merge symbols. More accurate but slower.
57 /// A container of slabs associated with a key. It can be updated at key
58 /// granularity, replacing all slabs belonging to a key with a new set. Keys are
59 /// usually file paths/uris.
61 /// This implements snapshot semantics. Each update will create a new snapshot
62 /// for all slabs of the Key. Snapshots are managed with shared pointers that
63 /// are shared between this class and the users. For each key, this class only
64 /// stores a pointer pointing to the newest snapshot, and an outdated snapshot
65 /// is deleted by the last owner of the snapshot, either this class or the
68 /// The snapshot semantics keeps critical sections minimal since we only need
69 /// locking when we swap or obtain references to snapshots.
72 FileSymbols(IndexContents IdxContents
, bool SupportContainedRefs
);
73 /// Updates all slabs associated with the \p Key.
74 /// If either is nullptr, corresponding data for \p Key will be removed.
75 /// If CountReferences is true, \p Refs will be used for counting references
77 void update(llvm::StringRef Key
, std::unique_ptr
<SymbolSlab
> Symbols
,
78 std::unique_ptr
<RefSlab
> Refs
,
79 std::unique_ptr
<RelationSlab
> Relations
, bool CountReferences
);
81 /// The index keeps the slabs alive.
82 /// Will count Symbol::References based on number of references in the main
83 /// files, while building the index with DuplicateHandling::Merge option.
84 /// Version is populated with an increasing sequence counter.
85 std::unique_ptr
<SymbolIndex
>
87 DuplicateHandling DuplicateHandle
= DuplicateHandling::PickOne
,
88 size_t *Version
= nullptr);
90 void profile(MemoryTree
&MT
) const;
93 IndexContents IdxContents
;
94 bool SupportContainedRefs
;
96 struct RefSlabAndCountReferences
{
97 std::shared_ptr
<RefSlab
> Slab
;
98 bool CountReferences
= false;
100 mutable std::mutex Mutex
;
103 llvm::StringMap
<std::shared_ptr
<SymbolSlab
>> SymbolsSnapshot
;
104 llvm::StringMap
<RefSlabAndCountReferences
> RefsSnapshot
;
105 llvm::StringMap
<std::shared_ptr
<RelationSlab
>> RelationsSnapshot
;
108 /// This manages symbols from files and an in-memory index on all symbols.
109 /// FIXME: Expose an interface to remove files that are closed.
110 class FileIndex
: public MergedIndex
{
112 FileIndex(bool SupportContainedRefs
);
114 /// Update preamble symbols of file \p Path with all declarations in \p AST
115 /// and macros in \p PP.
116 void updatePreamble(PathRef Path
, llvm::StringRef Version
, ASTContext
&AST
,
118 const include_cleaner::PragmaIncludes
&PI
);
119 void updatePreamble(IndexFileIn
);
121 /// Update symbols and references from main file \p Path with
122 /// `indexMainDecls`.
123 void updateMain(PathRef Path
, ParsedAST
&AST
);
125 void profile(MemoryTree
&MT
) const;
128 // Contains information from each file's preamble only. Symbols and relations
129 // are sharded per declaration file to deduplicate multiple symbols and reduce
131 // Missing information:
132 // - symbol refs (these are always "from the main file")
133 // - definition locations in the main file
135 // Note that we store only one version of a header, hence symbols appearing in
136 // different PP states will be missing.
137 FileSymbols PreambleSymbols
;
138 SwapIndex PreambleIndex
;
140 // Contains information from each file's main AST.
141 // These are updated frequently (on file change), but are relatively small.
143 // - refs to symbols declared in the preamble and referenced from main
144 // - symbols declared both in the main file and the preamble
145 // (Note that symbols *only* in the main file are not indexed).
146 FileSymbols MainFileSymbols
;
147 SwapIndex MainFileIndex
;
149 // While both the FileIndex and SwapIndex are threadsafe, we need to track
150 // versions to ensure that we don't overwrite newer indexes with older ones.
151 std::mutex UpdateIndexMu
;
152 unsigned MainIndexVersion
= 0;
153 unsigned PreambleIndexVersion
= 0;
156 using SlabTuple
= std::tuple
<SymbolSlab
, RefSlab
, RelationSlab
>;
158 /// Retrieves symbols and refs of local top level decls in \p AST (i.e.
159 /// `AST.getLocalTopLevelDecls()`).
160 /// Exposed to assist in unit tests.
161 SlabTuple
indexMainDecls(ParsedAST
&AST
);
163 /// Index declarations from \p AST and macros from \p PP that are declared in
164 /// included headers.
165 SlabTuple
indexHeaderSymbols(llvm::StringRef Version
, ASTContext
&AST
,
167 const include_cleaner::PragmaIncludes
&PI
);
169 /// Takes slabs coming from a TU (multiple files) and shards them per
170 /// declaration location.
171 struct FileShardedIndex
{
172 /// \p HintPath is used to convert file URIs stored in symbols into absolute
174 explicit FileShardedIndex(IndexFileIn Input
);
176 /// Returns uris for all files that has a shard.
177 std::vector
<llvm::StringRef
> getAllSources() const;
179 /// Generates index shard for the \p Uri. Note that this function results in
180 /// a copy of all the relevant data.
181 /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
183 std::optional
<IndexFileIn
> getShard(llvm::StringRef Uri
) const;
186 // Contains all the information that belongs to a single file.
188 // Either declared or defined in the file.
189 llvm::DenseSet
<const Symbol
*> Symbols
;
190 // Reference occurs in the file.
191 llvm::DenseSet
<const Ref
*> Refs
;
192 // Subject is declared in the file.
193 llvm::DenseSet
<const Relation
*> Relations
;
194 // Contains edges for only the direct includes.
198 // Keeps all the information alive.
199 const IndexFileIn Index
;
200 // Mapping from URIs to slab information.
201 llvm::StringMap
<FileShard
> Shards
;
202 // Used to build RefSlabs.
203 llvm::DenseMap
<const Ref
*, SymbolID
> RefToSymID
;
206 } // namespace clangd
209 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H