1 //===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H
9 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H
11 #include "CollectMacros.h"
12 #include "clang-include-cleaner/Record.h"
13 #include "clang-include-cleaner/Types.h"
14 #include "index/Ref.h"
15 #include "index/Relation.h"
16 #include "index/Symbol.h"
17 #include "index/SymbolID.h"
18 #include "index/SymbolLocation.h"
19 #include "index/SymbolOrigin.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/LLVM.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Index/IndexDataConsumer.h"
26 #include "clang/Index/IndexSymbol.h"
27 #include "clang/Sema/CodeCompleteConsumer.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringRef.h"
41 /// Collect declarations (symbols) from an AST.
42 /// It collects most declarations except:
43 /// - Implicit declarations
44 /// - Anonymous declarations (anonymous enum/class/struct, etc)
45 /// - Declarations in anonymous namespaces in headers
46 /// - Local declarations (in function bodies, blocks, etc)
47 /// - Template specializations
48 /// - Library-specific private declarations (e.g. private declaration generated
49 /// by protobuf compiler)
51 /// References to main-file symbols are not collected.
53 /// See also shouldCollectSymbol(...).
55 /// Clients (e.g. clangd) can use SymbolCollector together with
56 /// index::indexTopLevelDecls to retrieve all symbols when the source file is
58 class SymbolCollector
: public index::IndexDataConsumer
{
61 /// When symbol paths cannot be resolved to absolute paths (e.g. files in
62 /// VFS that does not have absolute path), combine the fallback directory
63 /// with symbols' paths to get absolute paths. This must be an absolute
65 std::string FallbackDir
;
66 bool CollectIncludePath
= false;
67 /// If set, this is used to map symbol #include path to a potentially
68 /// different #include path specified by IWYU pragmas.
69 const include_cleaner::PragmaIncludes
*PragmaIncludes
= nullptr;
70 // Populate the Symbol.References field.
71 bool CountReferences
= false;
72 /// The symbol ref kinds that will be collected.
73 /// If not set, SymbolCollector will not collect refs.
74 /// Note that references of namespace decls are not collected, as they
75 /// contribute large part of the index, and they are less useful compared
77 RefKind RefFilter
= RefKind::Unknown
;
78 /// If set to true, SymbolCollector will collect all refs (from main file
79 /// and included headers); otherwise, only refs from main file will be
81 /// This flag is only meaningful when RefFilter is set.
82 bool RefsInHeaders
= false;
83 // Every symbol collected will be stamped with this origin.
84 SymbolOrigin Origin
= SymbolOrigin::Unknown
;
86 /// Note that SymbolCollector must be run with preprocessor in order to
87 /// collect macros. For example, `indexTopLevelDecls` will not index any
88 /// macro even if this is true.
89 bool CollectMacro
= false;
90 /// Collect symbols local to main-files, such as static functions, symbols
91 /// inside an anonymous namespace, function-local classes and its member
93 bool CollectMainFileSymbols
= true;
94 /// Collect references to main-file symbols.
95 bool CollectMainFileRefs
= false;
96 /// Collect symbols with reserved names, like __Vector_base.
97 /// This does not currently affect macros (many like _WIN32 are important!)
98 /// This only affects system headers.
99 bool CollectReserved
= false;
100 /// If set to true, SymbolCollector will collect doc for all symbols.
101 /// Note that documents of symbols being indexed for completion will always
102 /// be collected regardless of this option.
103 bool StoreAllDocumentation
= false;
104 /// If this is set, only collect symbols/references from a file if
105 /// `FileFilter(SM, FID)` is true. If not set, all files are indexed.
106 std::function
<bool(const SourceManager
&, FileID
)> FileFilter
= nullptr;
109 SymbolCollector(Options Opts
);
112 /// Returns true is \p ND should be collected.
113 static bool shouldCollectSymbol(const NamedDecl
&ND
, const ASTContext
&ASTCtx
,
114 const Options
&Opts
, bool IsMainFileSymbol
);
116 // Given a ref contained in enclosing decl `Enclosing`, return
117 // the decl that should be used as that ref's Ref::Container. This is
118 // usually `Enclosing` itself, but in cases where `Enclosing` is not
119 // indexed, we walk further up because Ref::Container should always be
120 // an indexed symbol.
121 // Note: we don't use DeclContext as the container as in some cases
122 // it's useful to use a Decl which is not a DeclContext. For example,
123 // for a ref occurring in the initializer of a namespace-scope variable,
124 // it's useful to use that variable as the container, as otherwise the
125 // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl,
126 // which are both not indexed and less granular than we'd like for use cases
127 // like call hierarchy.
128 static const Decl
*getRefContainer(const Decl
*Enclosing
,
129 const SymbolCollector::Options
&Opts
);
131 void initialize(ASTContext
&Ctx
) override
;
133 void setPreprocessor(std::shared_ptr
<Preprocessor
> PP
) override
{
136 void setPreprocessor(Preprocessor
&PP
) { this->PP
= &PP
; }
139 handleDeclOccurrence(const Decl
*D
, index::SymbolRoleSet Roles
,
140 ArrayRef
<index::SymbolRelation
> Relations
,
142 index::IndexDataConsumer::ASTNodeInfo ASTNode
) override
;
144 bool handleMacroOccurrence(const IdentifierInfo
*Name
, const MacroInfo
*MI
,
145 index::SymbolRoleSet Roles
,
146 SourceLocation Loc
) override
;
148 void handleMacros(const MainFileMacros
&MacroRefsToIndex
);
150 SymbolSlab
takeSymbols() { return std::move(Symbols
).build(); }
151 RefSlab
takeRefs() { return std::move(Refs
).build(); }
152 RelationSlab
takeRelations() { return std::move(Relations
).build(); }
154 /// Returns true if we are interested in references and declarations from \p
155 /// FID. If this function return false, bodies of functions inside those files
156 /// will be skipped to decrease indexing time.
157 bool shouldIndexFile(FileID FID
);
159 void finish() override
;
162 const Symbol
*addDeclaration(const NamedDecl
&, SymbolID
,
163 bool IsMainFileSymbol
);
164 void addDefinition(const NamedDecl
&, const Symbol
&DeclSymbol
,
166 void processRelations(const NamedDecl
&ND
, const SymbolID
&ID
,
167 ArrayRef
<index::SymbolRelation
> Relations
);
169 std::optional
<SymbolLocation
> getTokenLocation(SourceLocation TokLoc
);
171 std::optional
<std::string
> getIncludeHeader(const Symbol
&S
, FileID
);
173 SymbolID
getSymbolIDCached(const Decl
*D
);
174 SymbolID
getSymbolIDCached(const llvm::StringRef MacroName
,
175 const MacroInfo
*MI
, const SourceManager
&SM
);
177 // All Symbols collected from the AST.
178 SymbolSlab::Builder Symbols
;
179 // File IDs used to determine if the code contains Obj-C constructs.
180 // For Obj-C symbols, these File IDs are used to compute the include
182 llvm::DenseMap
<SymbolID
, FileID
> IncludeFiles
;
183 void setIncludeLocation(const Symbol
&S
, SourceLocation
,
184 const include_cleaner::Symbol
&Sym
);
186 // Providers for Symbol.IncludeHeaders.
187 // The final spelling is calculated in finish().
188 llvm::DenseMap
<SymbolID
, llvm::SmallVector
<include_cleaner::Header
>>
190 // Files which contain ObjC symbols.
191 // This is finalized and used in finish().
192 llvm::DenseSet
<FileID
> FilesWithObjCConstructs
;
194 // Indexed macros, to be erased if they turned out to be include guards.
195 llvm::DenseSet
<const IdentifierInfo
*> IndexedMacros
;
196 // All refs collected from the AST. It includes:
197 // 1) symbols declared in the preamble and referenced from the main file (
198 // which is not a header), or
199 // 2) symbols declared and referenced from the main file (which is a header)
200 RefSlab::Builder Refs
;
201 // All relations collected from the AST.
202 RelationSlab::Builder Relations
;
204 Preprocessor
*PP
= nullptr;
205 std::shared_ptr
<GlobalCodeCompletionAllocator
> CompletionAllocator
;
206 std::unique_ptr
<CodeCompletionTUInfo
> CompletionTUInfo
;
211 index::SymbolRoleSet Roles
;
212 const Decl
*Container
;
215 void addRef(SymbolID ID
, const SymbolRef
&SR
);
216 // Symbols referenced from the current TU, flushed on finish().
217 llvm::DenseSet
<SymbolID
> ReferencedSymbols
;
218 // Maps canonical declaration provided by clang to canonical declaration for
219 // an index symbol, if clangd prefers a different declaration than that
220 // provided by clang. For example, friend declaration might be considered
221 // canonical by clang but should not be considered canonical in the index
222 // unless it's a definition.
223 llvm::DenseMap
<const Decl
*, const Decl
*> CanonicalDecls
;
224 // Cache whether to index a file or not.
225 llvm::DenseMap
<FileID
, bool> FilesToIndexCache
;
226 // Encapsulates calculations and caches around header paths, which headers
227 // to insert for which symbol, etc.
228 class HeaderFileURICache
;
229 std::unique_ptr
<HeaderFileURICache
> HeaderFileURIs
;
230 llvm::DenseMap
<const Decl
*, SymbolID
> DeclToIDCache
;
231 llvm::DenseMap
<const MacroInfo
*, SymbolID
> MacroToIDCache
;
234 } // namespace clangd