1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
18 //===----------------------------------------------------------------------===//
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
23 #include "index/dex/Iterator.h"
24 #include "index/Index.h"
25 #include "index/Relation.h"
26 #include "index/dex/PostingList.h"
27 #include "index/dex/Token.h"
28 #include "llvm/ADT/StringSet.h"
34 /// In-memory Dex trigram-based index implementation.
35 class Dex
: public SymbolIndex
{
37 // All data must outlive this index.
38 template <typename SymbolRange
, typename RefsRange
, typename RelationsRange
>
39 Dex(SymbolRange
&&Symbols
, RefsRange
&&Refs
, RelationsRange
&&Relations
,
40 bool SupportContainedRefs
)
42 for (auto &&Sym
: Symbols
)
43 this->Symbols
.push_back(&Sym
);
44 for (auto &&Ref
: Refs
)
45 this->Refs
.try_emplace(Ref
.first
, Ref
.second
);
46 for (auto &&Rel
: Relations
)
47 this->Relations
[std::make_pair(Rel
.Subject
,
48 static_cast<uint8_t>(Rel
.Predicate
))]
49 .push_back(Rel
.Object
);
50 buildIndex(SupportContainedRefs
);
52 // Symbols and Refs are owned by BackingData, Index takes ownership.
53 template <typename SymbolRange
, typename RefsRange
, typename RelationsRange
,
55 Dex(SymbolRange
&&Symbols
, RefsRange
&&Refs
, RelationsRange
&&Relations
,
56 Payload
&&BackingData
, size_t BackingDataSize
, bool SupportContainedRefs
)
57 : Dex(std::forward
<SymbolRange
>(Symbols
), std::forward
<RefsRange
>(Refs
),
58 std::forward
<RelationsRange
>(Relations
), SupportContainedRefs
) {
59 KeepAlive
= std::shared_ptr
<void>(
60 std::make_shared
<Payload
>(std::move(BackingData
)), nullptr);
61 this->BackingDataSize
= BackingDataSize
;
64 template <typename SymbolRange
, typename RefsRange
, typename RelationsRange
,
65 typename FileRange
, typename Payload
>
66 Dex(SymbolRange
&&Symbols
, RefsRange
&&Refs
, RelationsRange
&&Relations
,
67 FileRange
&&Files
, IndexContents IdxContents
, Payload
&&BackingData
,
68 size_t BackingDataSize
, bool SupportContainedRefs
)
69 : Dex(std::forward
<SymbolRange
>(Symbols
), std::forward
<RefsRange
>(Refs
),
70 std::forward
<RelationsRange
>(Relations
),
71 std::forward
<Payload
>(BackingData
), BackingDataSize
,
72 SupportContainedRefs
) {
73 this->Files
= std::forward
<FileRange
>(Files
);
74 this->IdxContents
= IdxContents
;
77 /// Builds an index from slabs. The index takes ownership of the slab.
78 static std::unique_ptr
<SymbolIndex
> build(SymbolSlab
, RefSlab
, RelationSlab
,
79 bool SupportContainedRefs
);
82 fuzzyFind(const FuzzyFindRequest
&Req
,
83 llvm::function_ref
<void(const Symbol
&)> Callback
) const override
;
85 void lookup(const LookupRequest
&Req
,
86 llvm::function_ref
<void(const Symbol
&)> Callback
) const override
;
88 bool refs(const RefsRequest
&Req
,
89 llvm::function_ref
<void(const Ref
&)> Callback
) const override
;
91 bool containedRefs(const ContainedRefsRequest
&Req
,
92 llvm::function_ref
<void(const ContainedRefsResult
&)>
93 Callback
) const override
;
95 void relations(const RelationsRequest
&Req
,
96 llvm::function_ref
<void(const SymbolID
&, const Symbol
&)>
97 Callback
) const override
;
99 llvm::unique_function
<IndexContents(llvm::StringRef
) const>
100 indexedFiles() const override
;
102 size_t estimateMemoryUsage() const override
;
106 const Ref
*Reference
;
110 RevRef(const Ref
&Reference
, SymbolID Target
)
111 : Reference(&Reference
), Target(Target
) {}
112 const Ref
&ref() const { return *Reference
; }
113 ContainedRefsResult
containedRefsResult() const {
114 return {ref().Location
, ref().Kind
, Target
};
118 void buildIndex(bool EnableOutgoingCalls
);
119 llvm::iterator_range
<std::vector
<RevRef
>::const_iterator
>
120 lookupRevRefs(const SymbolID
&Container
) const;
121 std::unique_ptr
<Iterator
> iterator(const Token
&Tok
) const;
122 std::unique_ptr
<Iterator
>
123 createFileProximityIterator(llvm::ArrayRef
<std::string
> ProximityPaths
) const;
124 std::unique_ptr
<Iterator
>
125 createTypeBoostingIterator(llvm::ArrayRef
<std::string
> Types
) const;
127 /// Stores symbols sorted in the descending order of symbol quality..
128 std::vector
<const Symbol
*> Symbols
;
129 /// SymbolQuality[I] is the quality of Symbols[I].
130 std::vector
<float> SymbolQuality
;
131 llvm::DenseMap
<SymbolID
, const Symbol
*> LookupTable
;
132 /// Inverted index is a mapping from the search token to the posting list,
133 /// which contains all items which can be characterized by such search token.
134 /// For example, if the search token is scope "std::", the corresponding
135 /// posting list would contain all indices of symbols defined in namespace
136 /// std. Inverted index is used to retrieve posting lists which are processed
137 /// during the fuzzyFind process.
138 llvm::DenseMap
<Token
, PostingList
> InvertedIndex
;
140 llvm::DenseMap
<SymbolID
, llvm::ArrayRef
<Ref
>> Refs
;
141 std::vector
<RevRef
> RevRefs
; // sorted by container ID
142 static_assert(sizeof(RelationKind
) == sizeof(uint8_t),
143 "RelationKind should be of same size as a uint8_t");
144 llvm::DenseMap
<std::pair
<SymbolID
, uint8_t>, std::vector
<SymbolID
>> Relations
;
145 std::shared_ptr
<void> KeepAlive
; // poor man's move-only std::any
146 // Set of files which were used during this index build.
147 llvm::StringSet
<> Files
;
148 // Contents of the index (symbols, references, etc.)
149 // This is only populated if `Files` is, which applies to some but not all
150 // consumers of this class.
151 IndexContents IdxContents
= IndexContents::None
;
152 // Size of memory retained by KeepAlive.
153 size_t BackingDataSize
= 0;
156 /// Returns Search Token for a number of parent directories of given Path.
157 /// Should be used within the index build process.
159 /// This function is exposed for testing only.
160 llvm::SmallVector
<llvm::StringRef
, 5> generateProximityURIs(llvm::StringRef
);
163 } // namespace clangd
166 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H