1 //===--- Symbol.h ------------------------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
12 #include "index/SymbolID.h"
13 #include "index/SymbolLocation.h"
14 #include "index/SymbolOrigin.h"
15 #include "clang/Index/IndexSymbol.h"
16 #include "llvm/ADT/BitmaskEnum.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/StringSaver.h"
23 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
25 /// The class presents a C++ symbol, e.g. class, function.
27 /// WARNING: Symbols do not own much of their underlying data - typically
28 /// strings are owned by a SymbolSlab. They should be treated as non-owning
29 /// references. Copies are shallow.
31 /// When adding new unowned data fields to Symbol, remember to update:
32 /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
33 /// - mergeSymbol in Merge.cpp, to properly combine two Symbols.
35 /// A fully documented symbol can be split as:
36 /// size_type std::map<k, t>::count(const K& key) const
37 /// | Return | Scope |Name| Signature |
38 /// We split up these components to allow display flexibility later.
40 /// The ID of the symbol.
42 /// The symbol information, like symbol kind.
43 index::SymbolInfo SymInfo
= index::SymbolInfo();
44 /// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
46 /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
47 llvm::StringRef Scope
;
48 /// The location of the symbol's definition, if one was found.
49 /// This just covers the symbol name (e.g. without class/function body).
50 SymbolLocation Definition
;
51 /// The location of the preferred declaration of the symbol.
52 /// This just covers the symbol name.
53 /// This may be the same as Definition.
55 /// A C++ symbol may have multiple declarations, and we pick one to prefer.
56 /// * For classes, the canonical declaration should be the definition.
57 /// * For non-inline functions, the canonical declaration typically appears
58 /// in the ".h" file corresponding to the definition.
59 SymbolLocation CanonicalDeclaration
;
60 /// The number of translation units that reference this symbol from their main
61 /// file. This number is only meaningful if aggregated in an index.
62 unsigned References
= 0;
63 /// Where this symbol came from. Usually an index provides a constant value.
64 SymbolOrigin Origin
= SymbolOrigin::Unknown
;
65 /// A brief description of the symbol that can be appended in the completion
66 /// candidate list. For example, "(X x, Y y) const" is a function signature.
67 /// Only set when the symbol is indexed for completion.
68 llvm::StringRef Signature
;
69 /// Argument list in human-readable format, will be displayed to help
70 /// disambiguate between different specializations of a template. Empty for
71 /// non-specializations. Example: "<int, bool, 3>"
72 llvm::StringRef TemplateSpecializationArgs
;
73 /// What to insert when completing this symbol, after the symbol name.
74 /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
75 /// (When snippets are disabled, the symbol name alone is used).
76 /// Only set when the symbol is indexed for completion.
77 llvm::StringRef CompletionSnippetSuffix
;
78 /// Documentation including comment for the symbol declaration.
79 llvm::StringRef Documentation
;
80 /// Type when this symbol is used in an expression. (Short display form).
81 /// e.g. return type of a function, or type of a variable.
82 /// Only set when the symbol is indexed for completion.
83 llvm::StringRef ReturnType
;
85 /// Raw representation of the OpaqueType of the symbol, used for scoring
87 /// Only set when the symbol is indexed for completion.
90 enum IncludeDirective
: uint8_t {
92 /// `#include "header.h"`
94 /// `#import "header.h"`
97 LLVM_MARK_AS_BITMASK_ENUM(Import
)
100 struct IncludeHeaderWithReferences
{
101 IncludeHeaderWithReferences() = default;
103 IncludeHeaderWithReferences(llvm::StringRef IncludeHeader
,
105 IncludeDirective SupportedDirectives
)
106 : IncludeHeader(IncludeHeader
), References(References
),
107 SupportedDirectives(SupportedDirectives
) {}
109 /// This can be either a URI of the header to be #include'd
110 /// for this symbol, or a literal header quoted with <> or "" that is
111 /// suitable to be included directly. When it is a URI, the exact #include
112 /// path needs to be calculated according to the URI scheme.
114 /// Note that the include header is a canonical include for the symbol and
115 /// can be different from FileURI in the CanonicalDeclaration.
116 llvm::StringRef IncludeHeader
= "";
117 /// The number of translation units that reference this symbol and include
118 /// this header. This number is only meaningful if aggregated in an index.
119 uint32_t References
: 30;
120 /// Bitfield of supported directives (IncludeDirective) that can be used
121 /// when including this header.
122 uint32_t SupportedDirectives
: 2;
124 IncludeDirective
supportedDirectives() const {
125 return static_cast<IncludeDirective
>(SupportedDirectives
);
128 /// One Symbol can potentially be included via different headers.
129 /// - If we haven't seen a definition, this covers all declarations.
130 /// - If we have seen a definition, this covers declarations visible from
132 /// Only set when the symbol is indexed for completion.
133 llvm::SmallVector
<IncludeHeaderWithReferences
, 1> IncludeHeaders
;
135 enum SymbolFlag
: uint8_t {
137 /// Whether or not this symbol is meant to be used for the code completion.
138 /// See also isIndexedForCodeCompletion().
139 /// Note that we don't store completion information (signature, snippet,
140 /// type, includes) if the symbol is not indexed for code completion.
141 IndexedForCodeCompletion
= 1 << 0,
142 /// Indicates if the symbol is deprecated.
144 /// Symbol is an implementation detail.
145 ImplementationDetail
= 1 << 2,
146 /// Symbol is visible to other files (not e.g. a static helper function).
147 VisibleOutsideFile
= 1 << 3,
148 /// Symbol has an attached documentation comment.
149 HasDocComment
= 1 << 4
151 SymbolFlag Flags
= SymbolFlag::None
;
153 /// FIXME: also add deprecation message and fixit?
156 inline Symbol::SymbolFlag
operator|(Symbol::SymbolFlag A
,
157 Symbol::SymbolFlag B
) {
158 return static_cast<Symbol::SymbolFlag
>(static_cast<uint8_t>(A
) |
159 static_cast<uint8_t>(B
));
161 inline Symbol::SymbolFlag
&operator|=(Symbol::SymbolFlag
&A
,
162 Symbol::SymbolFlag B
) {
166 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
, const Symbol
&S
);
167 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
, Symbol::SymbolFlag
);
169 /// Invokes Callback with each StringRef& contained in the Symbol.
170 /// Useful for deduplicating backing strings.
171 template <typename Callback
> void visitStrings(Symbol
&S
, const Callback
&CB
) {
174 CB(S
.TemplateSpecializationArgs
);
176 CB(S
.CompletionSnippetSuffix
);
180 auto RawCharPointerCB
= [&CB
](const char *&P
) {
181 llvm::StringRef
S(P
);
183 assert(!S
.data()[S
.size()] && "Visited StringRef must be null-terminated");
186 RawCharPointerCB(S
.CanonicalDeclaration
.FileURI
);
187 RawCharPointerCB(S
.Definition
.FileURI
);
189 for (auto &Include
: S
.IncludeHeaders
)
190 CB(Include
.IncludeHeader
);
193 /// Computes query-independent quality score for a Symbol.
194 /// This currently falls in the range [1, ln(#indexed documents)].
195 /// FIXME: this should probably be split into symbol -> signals
196 /// and signals -> score, so it can be reused for Sema completions.
197 float quality(const Symbol
&S
);
199 /// An immutable symbol container that stores a set of symbols.
200 /// The container will maintain the lifetime of the symbols.
203 using const_iterator
= std::vector
<Symbol
>::const_iterator
;
204 using iterator
= const_iterator
;
205 using value_type
= Symbol
;
207 SymbolSlab() = default;
209 const_iterator
begin() const { return Symbols
.begin(); }
210 const_iterator
end() const { return Symbols
.end(); }
211 const_iterator
find(const SymbolID
&SymID
) const;
213 using size_type
= size_t;
214 size_type
size() const { return Symbols
.size(); }
215 bool empty() const { return Symbols
.empty(); }
216 // Estimates the total memory usage.
217 size_t bytes() const {
218 return sizeof(*this) + Arena
.getTotalMemory() +
219 Symbols
.capacity() * sizeof(Symbol
);
222 /// SymbolSlab::Builder is a mutable container that can 'freeze' to
223 /// SymbolSlab. The frozen SymbolSlab will use less memory.
226 Builder() : UniqueStrings(Arena
) {}
228 /// Adds a symbol, overwriting any existing one with the same ID.
229 /// This is a deep copy: underlying strings will be owned by the slab.
230 void insert(const Symbol
&S
);
232 /// Removes the symbol with an ID, if it exists.
233 void erase(const SymbolID
&ID
) { Symbols
.erase(ID
); }
235 /// Returns the symbol with an ID, if it exists. Valid until insert/remove.
236 const Symbol
*find(const SymbolID
&ID
) {
237 auto I
= Symbols
.find(ID
);
238 return I
== Symbols
.end() ? nullptr : &I
->second
;
241 /// Consumes the builder to finalize the slab.
242 SymbolSlab
build() &&;
245 llvm::BumpPtrAllocator Arena
;
246 /// Intern table for strings. Contents are on the arena.
247 llvm::UniqueStringSaver UniqueStrings
;
248 /// Values are indices into Symbols vector.
249 llvm::DenseMap
<SymbolID
, Symbol
> Symbols
;
253 SymbolSlab(llvm::BumpPtrAllocator Arena
, std::vector
<Symbol
> Symbols
)
254 : Arena(std::move(Arena
)), Symbols(std::move(Symbols
)) {}
256 llvm::BumpPtrAllocator Arena
; // Owns Symbol data that the Symbols do not.
257 std::vector
<Symbol
> Symbols
; // Sorted by SymbolID to allow lookup.
260 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
, const SymbolSlab
&Slab
);
262 } // namespace clangd
265 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H