1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SymbolCollector.h"
11 #include "CodeComplete.h"
12 #include "CodeCompletionStrings.h"
13 #include "ExpectedTypes.h"
14 #include "SourceCode.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "index/CanonicalIncludes.h"
21 #include "index/Relation.h"
22 #include "index/Symbol.h"
23 #include "index/SymbolID.h"
24 #include "index/SymbolLocation.h"
25 #include "clang/AST/Decl.h"
26 #include "clang/AST/DeclBase.h"
27 #include "clang/AST/DeclObjC.h"
28 #include "clang/AST/DeclTemplate.h"
29 #include "clang/AST/DeclarationName.h"
30 #include "clang/AST/Expr.h"
31 #include "clang/Basic/FileEntry.h"
32 #include "clang/Basic/LangOptions.h"
33 #include "clang/Basic/SourceLocation.h"
34 #include "clang/Basic/SourceManager.h"
35 #include "clang/Index/IndexSymbol.h"
36 #include "clang/Lex/Preprocessor.h"
37 #include "clang/Lex/Token.h"
38 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
39 #include "clang/Tooling/Inclusions/StandardLibrary.h"
40 #include "llvm/ADT/ArrayRef.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/FileSystem.h"
46 #include "llvm/Support/Path.h"
57 /// If \p ND is a template specialization, returns the described template.
58 /// Otherwise, returns \p ND.
59 const NamedDecl
&getTemplateOrThis(const NamedDecl
&ND
) {
60 if (auto *T
= ND
.getDescribedTemplate())
65 // Checks whether the decl is a private symbol in a header generated by
67 // FIXME: make filtering extensible when there are more use cases for symbol
69 bool isPrivateProtoDecl(const NamedDecl
&ND
) {
70 const auto &SM
= ND
.getASTContext().getSourceManager();
71 if (!isProtoFile(nameLocation(ND
, SM
), SM
))
74 // ND without identifier can be operators.
75 if (ND
.getIdentifier() == nullptr)
77 auto Name
= ND
.getIdentifier()->getName();
78 if (!Name
.contains('_'))
80 // Nested proto entities (e.g. Message::Nested) have top-level decls
81 // that shouldn't be used (Message_Nested). Ignore them completely.
82 // The nested entities are dangling type aliases, we may want to reconsider
83 // including them in the future.
84 // For enum constants, SOME_ENUM_CONSTANT is not private and should be
85 // indexed. Outer_INNER is private. This heuristic relies on naming style, it
86 // will include OUTER_INNER and exclude some_enum_constant.
87 // FIXME: the heuristic relies on naming style (i.e. no underscore in
88 // user-defined names) and can be improved.
89 return (ND
.getKind() != Decl::EnumConstant
) || llvm::any_of(Name
, islower
);
92 // We only collect #include paths for symbols that are suitable for global code
93 // completion, except for namespaces since #include path for a namespace is hard
95 Symbol::IncludeDirective
shouldCollectIncludePath(index::SymbolKind Kind
) {
96 using SK
= index::SymbolKind
;
107 case SK::EnumConstant
:
109 return Symbol::Include
| Symbol::Import
;
111 return Symbol::Import
;
113 return Symbol::Invalid
;
117 // Return the symbol range of the token at \p TokLoc.
118 std::pair
<SymbolLocation::Position
, SymbolLocation::Position
>
119 getTokenRange(SourceLocation TokLoc
, const SourceManager
&SM
,
120 const LangOptions
&LangOpts
) {
121 auto CreatePosition
= [&SM
](SourceLocation Loc
) {
122 auto LSPLoc
= sourceLocToPosition(SM
, Loc
);
123 SymbolLocation::Position Pos
;
124 Pos
.setLine(LSPLoc
.line
);
125 Pos
.setColumn(LSPLoc
.character
);
129 auto TokenLength
= clang::Lexer::MeasureTokenLength(TokLoc
, SM
, LangOpts
);
130 return {CreatePosition(TokLoc
),
131 CreatePosition(TokLoc
.getLocWithOffset(TokenLength
))};
134 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
135 // its symbol (e.g. a go-to-declaration target). This overrides the default of
136 // using Clang's canonical declaration, which is the first in the TU.
138 // Example: preferring a class declaration over its forward declaration.
139 bool isPreferredDeclaration(const NamedDecl
&ND
, index::SymbolRoleSet Roles
) {
140 const auto &SM
= ND
.getASTContext().getSourceManager();
141 if (isa
<TagDecl
>(ND
))
142 return (Roles
& static_cast<unsigned>(index::SymbolRole::Definition
)) &&
143 !isInsideMainFile(ND
.getLocation(), SM
);
144 if (const auto *ID
= dyn_cast
<ObjCInterfaceDecl
>(&ND
))
145 return ID
->isThisDeclarationADefinition();
146 if (const auto *PD
= dyn_cast
<ObjCProtocolDecl
>(&ND
))
147 return PD
->isThisDeclarationADefinition();
151 RefKind
toRefKind(index::SymbolRoleSet Roles
, bool Spelled
= false) {
152 RefKind Result
= RefKind::Unknown
;
153 if (Roles
& static_cast<unsigned>(index::SymbolRole::Declaration
))
154 Result
|= RefKind::Declaration
;
155 if (Roles
& static_cast<unsigned>(index::SymbolRole::Definition
))
156 Result
|= RefKind::Definition
;
157 if (Roles
& static_cast<unsigned>(index::SymbolRole::Reference
))
158 Result
|= RefKind::Reference
;
160 Result
|= RefKind::Spelled
;
164 std::optional
<RelationKind
> indexableRelation(const index::SymbolRelation
&R
) {
165 if (R
.Roles
& static_cast<unsigned>(index::SymbolRole::RelationBaseOf
))
166 return RelationKind::BaseOf
;
167 if (R
.Roles
& static_cast<unsigned>(index::SymbolRole::RelationOverrideOf
))
168 return RelationKind::OverriddenBy
;
172 // Check if there is an exact spelling of \p ND at \p Loc.
173 bool isSpelled(SourceLocation Loc
, const NamedDecl
&ND
) {
174 auto Name
= ND
.getDeclName();
175 const auto NameKind
= Name
.getNameKind();
176 if (NameKind
!= DeclarationName::Identifier
&&
177 NameKind
!= DeclarationName::CXXConstructorName
)
179 const auto &AST
= ND
.getASTContext();
180 const auto &SM
= AST
.getSourceManager();
181 const auto &LO
= AST
.getLangOpts();
183 if (clang::Lexer::getRawToken(Loc
, Tok
, SM
, LO
))
185 auto StrName
= Name
.getAsString();
186 return clang::Lexer::getSpelling(Tok
, SM
, LO
) == StrName
;
190 // Encapsulates decisions about how to record header paths in the index,
191 // including filename normalization, URI conversion etc.
192 // Expensive checks are cached internally.
193 class SymbolCollector::HeaderFileURICache
{
194 struct FrameworkUmbrellaSpelling
{
195 // Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>
196 std::optional
<std::string
> PublicHeader
;
197 // Spelling for the private umbrella header, e.g.
198 // <Foundation/Foundation_Private.h>
199 std::optional
<std::string
> PrivateHeader
;
201 // Weird double-indirect access to PP, which might not be ready yet when
202 // HeaderFiles is created but will be by the time it's used.
203 // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
205 const SourceManager
&SM
;
206 const include_cleaner::PragmaIncludes
*PI
;
207 llvm::StringRef FallbackDir
;
208 llvm::DenseMap
<const FileEntry
*, const std::string
*> CacheFEToURI
;
209 llvm::StringMap
<std::string
> CachePathToURI
;
210 llvm::DenseMap
<FileID
, llvm::StringRef
> CacheFIDToInclude
;
211 llvm::StringMap
<std::string
> CachePathToFrameworkSpelling
;
212 llvm::StringMap
<FrameworkUmbrellaSpelling
>
213 CacheFrameworkToUmbrellaHeaderSpelling
;
216 HeaderFileURICache(Preprocessor
*&PP
, const SourceManager
&SM
,
217 const SymbolCollector::Options
&Opts
)
218 : PP(PP
), SM(SM
), PI(Opts
.PragmaIncludes
), FallbackDir(Opts
.FallbackDir
) {
221 // Returns a canonical URI for the file \p FE.
222 // We attempt to make the path absolute first.
223 const std::string
&toURI(const FileEntryRef FE
) {
224 auto R
= CacheFEToURI
.try_emplace(FE
);
226 auto CanonPath
= getCanonicalPath(FE
, SM
.getFileManager());
227 R
.first
->second
= &toURIInternal(CanonPath
? *CanonPath
: FE
.getName());
229 return *R
.first
->second
;
232 // Returns a canonical URI for \p Path.
233 // If the file is in the FileManager, use that to canonicalize the path.
234 // We attempt to make the path absolute in any case.
235 const std::string
&toURI(llvm::StringRef Path
) {
236 if (auto File
= SM
.getFileManager().getFileRef(Path
))
238 return toURIInternal(Path
);
241 // Gets a canonical include (URI of the header or <header> or "header") for
242 // header of \p FID (which should usually be the *expansion* file).
243 // This does not account for any per-symbol overrides!
244 // Returns "" if includes should not be inserted for this file.
245 llvm::StringRef
getIncludeHeader(FileID FID
) {
246 auto R
= CacheFIDToInclude
.try_emplace(FID
);
248 R
.first
->second
= getIncludeHeaderUncached(FID
);
249 return R
.first
->second
;
252 // If a file is mapped by canonical headers, use that mapping, regardless
253 // of whether it's an otherwise-good header (header guards etc).
254 llvm::StringRef
mapCanonical(llvm::StringRef HeaderPath
) {
257 // Populate the system header mapping as late as possible to
258 // ensure the preprocessor has been set already.
259 CanonicalIncludes SysHeaderMapping
;
260 SysHeaderMapping
.addSystemHeadersMapping(PP
->getLangOpts());
261 auto Canonical
= SysHeaderMapping
.mapHeader(HeaderPath
);
262 if (Canonical
.empty())
264 // If we had a mapping, always use it.
265 assert(Canonical
.startswith("<") || Canonical
.startswith("\""));
270 // This takes care of making paths absolute and path->URI caching, but no
271 // FileManager-based canonicalization.
272 const std::string
&toURIInternal(llvm::StringRef Path
) {
273 auto R
= CachePathToURI
.try_emplace(Path
);
275 llvm::SmallString
<256> AbsPath
= Path
;
276 if (!llvm::sys::path::is_absolute(AbsPath
) && !FallbackDir
.empty())
277 llvm::sys::fs::make_absolute(FallbackDir
, AbsPath
);
278 assert(llvm::sys::path::is_absolute(AbsPath
) &&
279 "If the VFS can't make paths absolute, a FallbackDir must be "
281 llvm::sys::path::remove_dots(AbsPath
, /*remove_dot_dot=*/true);
282 R
.first
->second
= URI::create(AbsPath
).toString();
284 return R
.first
->second
;
287 struct FrameworkHeaderPath
{
288 // Path to the framework directory containing the Headers/PrivateHeaders
289 // directories e.g. /Frameworks/Foundation.framework/
290 llvm::StringRef HeadersParentDir
;
291 // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h
292 // Note: This is NOT relative to the `HeadersParentDir`.
293 llvm::StringRef HeaderSubpath
;
294 // Whether this header is under the PrivateHeaders dir
295 bool IsPrivateHeader
;
298 std::optional
<FrameworkHeaderPath
>
299 splitFrameworkHeaderPath(llvm::StringRef Path
) {
300 using namespace llvm::sys
;
301 path::reverse_iterator I
= path::rbegin(Path
);
302 path::reverse_iterator Prev
= I
;
303 path::reverse_iterator E
= path::rend(Path
);
305 if (*I
== "Headers") {
306 FrameworkHeaderPath HeaderPath
;
307 HeaderPath
.HeadersParentDir
= Path
.substr(0, I
- E
);
308 HeaderPath
.HeaderSubpath
= Path
.substr(Prev
- E
);
309 HeaderPath
.IsPrivateHeader
= false;
312 if (*I
== "PrivateHeaders") {
313 FrameworkHeaderPath HeaderPath
;
314 HeaderPath
.HeadersParentDir
= Path
.substr(0, I
- E
);
315 HeaderPath
.HeaderSubpath
= Path
.substr(Prev
- E
);
316 HeaderPath
.IsPrivateHeader
= true;
322 // Unexpected, must not be a framework header.
326 // Frameworks typically have an umbrella header of the same name, e.g.
327 // <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or
328 // <Foundation/Foundation_Private.h> instead of
329 // <Foundation/NSObject_Private.h> which should be used instead of directly
330 // importing the header.
331 std::optional
<std::string
>
332 getFrameworkUmbrellaSpelling(llvm::StringRef Framework
,
333 const HeaderSearch
&HS
,
334 FrameworkHeaderPath
&HeaderPath
) {
335 auto Res
= CacheFrameworkToUmbrellaHeaderSpelling
.try_emplace(Framework
);
336 auto *CachedSpelling
= &Res
.first
->second
;
338 return HeaderPath
.IsPrivateHeader
? CachedSpelling
->PrivateHeader
339 : CachedSpelling
->PublicHeader
;
341 SmallString
<256> UmbrellaPath(HeaderPath
.HeadersParentDir
);
342 llvm::sys::path::append(UmbrellaPath
, "Headers", Framework
+ ".h");
344 llvm::vfs::Status Status
;
345 auto StatErr
= HS
.getFileMgr().getNoncachedStatValue(UmbrellaPath
, Status
);
347 CachedSpelling
->PublicHeader
= llvm::formatv("<{0}/{0}.h>", Framework
);
349 UmbrellaPath
= HeaderPath
.HeadersParentDir
;
350 llvm::sys::path::append(UmbrellaPath
, "PrivateHeaders",
351 Framework
+ "_Private.h");
353 StatErr
= HS
.getFileMgr().getNoncachedStatValue(UmbrellaPath
, Status
);
355 CachedSpelling
->PrivateHeader
=
356 llvm::formatv("<{0}/{0}_Private.h>", Framework
);
358 return HeaderPath
.IsPrivateHeader
? CachedSpelling
->PrivateHeader
359 : CachedSpelling
->PublicHeader
;
362 // Compute the framework include spelling for `FE` which is in a framework
363 // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would
364 // give <Foundation/Foundation.h> if the umbrella header exists, otherwise
365 // <Foundation/NSObject.h>.
366 std::optional
<llvm::StringRef
>
367 getFrameworkHeaderIncludeSpelling(FileEntryRef FE
, llvm::StringRef Framework
,
369 auto Res
= CachePathToFrameworkSpelling
.try_emplace(FE
.getName());
370 auto *CachedHeaderSpelling
= &Res
.first
->second
;
372 return llvm::StringRef(*CachedHeaderSpelling
);
374 auto HeaderPath
= splitFrameworkHeaderPath(FE
.getName());
376 // Unexpected: must not be a proper framework header, don't cache the
378 CachePathToFrameworkSpelling
.erase(Res
.first
);
381 if (auto UmbrellaSpelling
=
382 getFrameworkUmbrellaSpelling(Framework
, HS
, *HeaderPath
)) {
383 *CachedHeaderSpelling
= *UmbrellaSpelling
;
384 return llvm::StringRef(*CachedHeaderSpelling
);
387 *CachedHeaderSpelling
=
388 llvm::formatv("<{0}/{1}>", Framework
, HeaderPath
->HeaderSubpath
).str();
389 return llvm::StringRef(*CachedHeaderSpelling
);
392 llvm::StringRef
getIncludeHeaderUncached(FileID FID
) {
393 const auto FE
= SM
.getFileEntryRefForID(FID
);
394 if (!FE
|| FE
->getName().empty())
397 if (auto Verbatim
= PI
->getPublic(*FE
); !Verbatim
.empty())
400 llvm::StringRef Filename
= FE
->getName();
401 if (auto Canonical
= mapCanonical(Filename
); !Canonical
.empty())
404 // Framework headers are spelled as <FrameworkName/Foo.h>, not
405 // "path/FrameworkName.framework/Headers/Foo.h".
406 auto &HS
= PP
->getHeaderSearchInfo();
407 if (const auto *HFI
= HS
.getExistingFileInfo(*FE
, /*WantExternal*/ false))
408 if (!HFI
->Framework
.empty())
410 getFrameworkHeaderIncludeSpelling(*FE
, HFI
->Framework
, HS
))
413 if (!tooling::isSelfContainedHeader(*FE
, PP
->getSourceManager(),
414 PP
->getHeaderSearchInfo())) {
415 // A .inc or .def file is often included into a real header to define
416 // symbols (e.g. LLVM tablegen files).
417 if (Filename
.endswith(".inc") || Filename
.endswith(".def"))
418 // Don't use cache reentrantly due to iterator invalidation.
419 return getIncludeHeaderUncached(SM
.getFileID(SM
.getIncludeLoc(FID
)));
420 // Conservatively refuse to insert #includes to files without guards.
423 // Standard case: just insert the file itself.
428 // Return the symbol location of the token at \p TokLoc.
429 std::optional
<SymbolLocation
>
430 SymbolCollector::getTokenLocation(SourceLocation TokLoc
) {
431 const auto &SM
= ASTCtx
->getSourceManager();
432 const auto FE
= SM
.getFileEntryRefForID(SM
.getFileID(TokLoc
));
436 SymbolLocation Result
;
437 Result
.FileURI
= HeaderFileURIs
->toURI(*FE
).c_str();
438 auto Range
= getTokenRange(TokLoc
, SM
, ASTCtx
->getLangOpts());
439 Result
.Start
= Range
.first
;
440 Result
.End
= Range
.second
;
445 SymbolCollector::SymbolCollector(Options Opts
) : Opts(std::move(Opts
)) {}
446 SymbolCollector::~SymbolCollector() = default;
448 void SymbolCollector::initialize(ASTContext
&Ctx
) {
450 HeaderFileURIs
= std::make_unique
<HeaderFileURICache
>(
451 this->PP
, ASTCtx
->getSourceManager(), Opts
);
452 CompletionAllocator
= std::make_shared
<GlobalCodeCompletionAllocator
>();
454 std::make_unique
<CodeCompletionTUInfo
>(CompletionAllocator
);
457 bool SymbolCollector::shouldCollectSymbol(const NamedDecl
&ND
,
458 const ASTContext
&ASTCtx
,
460 bool IsMainFileOnly
) {
461 // Skip anonymous declarations, e.g (anonymous enum/class/struct).
462 if (ND
.getDeclName().isEmpty())
465 // Skip main-file symbols if we are not collecting them.
466 if (IsMainFileOnly
&& !Opts
.CollectMainFileSymbols
)
469 // Skip symbols in anonymous namespaces in header files.
470 if (!IsMainFileOnly
&& ND
.isInAnonymousNamespace())
473 // For function local symbols, index only classes and its member functions.
474 if (index::isFunctionLocalSymbol(&ND
))
475 return isa
<RecordDecl
>(ND
) ||
476 (ND
.isCXXInstanceMember() && ND
.isFunctionOrFunctionTemplate());
478 // We want most things but not "local" symbols such as symbols inside
479 // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
480 // FIXME: Need a matcher for ExportDecl in order to include symbols declared
482 const auto *DeclCtx
= ND
.getDeclContext();
483 switch (DeclCtx
->getDeclKind()) {
484 case Decl::TranslationUnit
:
485 case Decl::Namespace
:
486 case Decl::LinkageSpec
:
488 case Decl::ObjCProtocol
:
489 case Decl::ObjCInterface
:
490 case Decl::ObjCCategory
:
491 case Decl::ObjCCategoryImpl
:
492 case Decl::ObjCImplementation
:
495 // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
497 if (!isa
<RecordDecl
>(DeclCtx
))
501 // Avoid indexing internal symbols in protobuf generated headers.
502 if (isPrivateProtoDecl(ND
))
504 if (!Opts
.CollectReserved
&&
505 (hasReservedName(ND
) || hasReservedScope(*ND
.getDeclContext())) &&
506 ASTCtx
.getSourceManager().isInSystemHeader(ND
.getLocation()))
513 SymbolCollector::getRefContainer(const Decl
*Enclosing
,
514 const SymbolCollector::Options
&Opts
) {
516 const auto *ND
= dyn_cast
<NamedDecl
>(Enclosing
);
517 if (ND
&& shouldCollectSymbol(*ND
, ND
->getASTContext(), Opts
, true)) {
520 Enclosing
= dyn_cast_or_null
<Decl
>(Enclosing
->getDeclContext());
525 // Always return true to continue indexing.
526 bool SymbolCollector::handleDeclOccurrence(
527 const Decl
*D
, index::SymbolRoleSet Roles
,
528 llvm::ArrayRef
<index::SymbolRelation
> Relations
, SourceLocation Loc
,
529 index::IndexDataConsumer::ASTNodeInfo ASTNode
) {
530 assert(ASTCtx
&& PP
&& HeaderFileURIs
);
531 assert(CompletionAllocator
&& CompletionTUInfo
);
532 assert(ASTNode
.OrigD
);
533 // Indexing API puts canonical decl into D, which might not have a valid
534 // source location for implicit/built-in decls. Fallback to original decl in
536 if (D
->getLocation().isInvalid())
538 // If OrigD is an declaration associated with a friend declaration and it's
539 // not a definition, skip it. Note that OrigD is the occurrence that the
540 // collector is currently visiting.
541 if ((ASTNode
.OrigD
->getFriendObjectKind() !=
542 Decl::FriendObjectKind::FOK_None
) &&
543 !(Roles
& static_cast<unsigned>(index::SymbolRole::Definition
)))
545 // A declaration created for a friend declaration should not be used as the
546 // canonical declaration in the index. Use OrigD instead, unless we've already
547 // picked a replacement for D
548 if (D
->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None
)
549 D
= CanonicalDecls
.try_emplace(D
, ASTNode
.OrigD
).first
->second
;
550 // Flag to mark that D should be considered canonical meaning its declaration
551 // will override any previous declaration for the Symbol.
552 bool DeclIsCanonical
= false;
553 // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
554 // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
555 if (const auto *IID
= dyn_cast
<ObjCImplementationDecl
>(D
)) {
556 DeclIsCanonical
= true;
557 if (const auto *CID
= IID
->getClassInterface())
558 if (const auto *DD
= CID
->getDefinition())
559 if (!DD
->isImplicitInterfaceDecl())
562 // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
563 // its ObjCCategoryDecl if it has one.
564 if (const auto *CID
= dyn_cast
<ObjCCategoryImplDecl
>(D
)) {
565 DeclIsCanonical
= true;
566 if (const auto *CD
= CID
->getCategoryDecl())
569 const NamedDecl
*ND
= dyn_cast
<NamedDecl
>(D
);
573 auto ID
= getSymbolIDCached(ND
);
577 // Mark D as referenced if this is a reference coming from the main file.
578 // D may not be an interesting symbol, but it's cheaper to check at the end.
579 auto &SM
= ASTCtx
->getSourceManager();
580 if (Opts
.CountReferences
&&
581 (Roles
& static_cast<unsigned>(index::SymbolRole::Reference
)) &&
582 SM
.getFileID(SM
.getSpellingLoc(Loc
)) == SM
.getMainFileID())
583 ReferencedSymbols
.insert(ID
);
585 // ND is the canonical (i.e. first) declaration. If it's in the main file
586 // (which is not a header), then no public declaration was visible, so assume
587 // it's main-file only.
588 bool IsMainFileOnly
=
589 SM
.isWrittenInMainFile(SM
.getExpansionLoc(ND
->getBeginLoc())) &&
590 !isHeaderFile(SM
.getFileEntryRefForID(SM
.getMainFileID())->getName(),
591 ASTCtx
->getLangOpts());
592 // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
593 if (ASTNode
.OrigD
->isImplicit() ||
594 !shouldCollectSymbol(*ND
, *ASTCtx
, Opts
, IsMainFileOnly
))
597 // Note: we need to process relations for all decl occurrences, including
598 // refs, because the indexing code only populates relations for specific
599 // occurrences. For example, RelationBaseOf is only populated for the
600 // occurrence inside the base-specifier.
601 processRelations(*ND
, ID
, Relations
);
603 bool CollectRef
= static_cast<bool>(Opts
.RefFilter
& toRefKind(Roles
));
604 // Unlike other fields, e.g. Symbols (which use spelling locations), we use
605 // file locations for references (as it aligns the behavior of clangd's
607 // FIXME: we should try to use the file locations for other fields.
609 (!IsMainFileOnly
|| Opts
.CollectMainFileRefs
||
610 ND
->isExternallyVisible()) &&
611 !isa
<NamespaceDecl
>(ND
)) {
612 auto FileLoc
= SM
.getFileLoc(Loc
);
613 auto FID
= SM
.getFileID(FileLoc
);
614 if (Opts
.RefsInHeaders
|| FID
== SM
.getMainFileID()) {
615 addRef(ID
, SymbolRef
{FileLoc
, FID
, Roles
,
616 getRefContainer(ASTNode
.Parent
, Opts
),
617 isSpelled(FileLoc
, *ND
)});
620 // Don't continue indexing if this is a mere reference.
621 if (!(Roles
& (static_cast<unsigned>(index::SymbolRole::Declaration
) |
622 static_cast<unsigned>(index::SymbolRole::Definition
))))
625 // FIXME: ObjCPropertyDecl are not properly indexed here:
626 // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
628 auto *OriginalDecl
= dyn_cast
<NamedDecl
>(ASTNode
.OrigD
);
632 const Symbol
*BasicSymbol
= Symbols
.find(ID
);
633 if (isPreferredDeclaration(*OriginalDecl
, Roles
))
634 // If OriginalDecl is preferred, replace/create the existing canonical
635 // declaration (e.g. a class forward declaration). There should be at most
636 // one duplicate as we expect to see only one preferred declaration per
637 // TU, because in practice they are definitions.
638 BasicSymbol
= addDeclaration(*OriginalDecl
, std::move(ID
), IsMainFileOnly
);
639 else if (!BasicSymbol
|| DeclIsCanonical
)
640 BasicSymbol
= addDeclaration(*ND
, std::move(ID
), IsMainFileOnly
);
642 if (Roles
& static_cast<unsigned>(index::SymbolRole::Definition
))
643 addDefinition(*OriginalDecl
, *BasicSymbol
);
648 void SymbolCollector::handleMacros(const MainFileMacros
&MacroRefsToIndex
) {
649 assert(HeaderFileURIs
&& PP
);
650 const auto &SM
= PP
->getSourceManager();
651 const auto MainFileEntryRef
= SM
.getFileEntryRefForID(SM
.getMainFileID());
652 assert(MainFileEntryRef
);
654 const std::string
&MainFileURI
= HeaderFileURIs
->toURI(*MainFileEntryRef
);
655 // Add macro references.
656 for (const auto &IDToRefs
: MacroRefsToIndex
.MacroRefs
) {
657 for (const auto &MacroRef
: IDToRefs
.second
) {
658 const auto &Range
= MacroRef
.toRange(SM
);
659 bool IsDefinition
= MacroRef
.IsDefinition
;
661 R
.Location
.Start
.setLine(Range
.start
.line
);
662 R
.Location
.Start
.setColumn(Range
.start
.character
);
663 R
.Location
.End
.setLine(Range
.end
.line
);
664 R
.Location
.End
.setColumn(Range
.end
.character
);
665 R
.Location
.FileURI
= MainFileURI
.c_str();
666 R
.Kind
= IsDefinition
? RefKind::Definition
: RefKind::Reference
;
667 Refs
.insert(IDToRefs
.first
, R
);
670 S
.ID
= IDToRefs
.first
;
671 auto StartLoc
= cantFail(sourceLocationInMainFile(SM
, Range
.start
));
672 auto EndLoc
= cantFail(sourceLocationInMainFile(SM
, Range
.end
));
673 S
.Name
= toSourceCode(SM
, SourceRange(StartLoc
, EndLoc
));
674 S
.SymInfo
.Kind
= index::SymbolKind::Macro
;
675 S
.SymInfo
.SubKind
= index::SymbolSubKind::None
;
676 S
.SymInfo
.Properties
= index::SymbolPropertySet();
677 S
.SymInfo
.Lang
= index::SymbolLanguage::C
;
678 S
.Origin
= Opts
.Origin
;
679 S
.CanonicalDeclaration
= R
.Location
;
680 // Make the macro visible for code completion if main file is an
681 // include-able header.
682 if (!HeaderFileURIs
->getIncludeHeader(SM
.getMainFileID()).empty()) {
683 S
.Flags
|= Symbol::IndexedForCodeCompletion
;
684 S
.Flags
|= Symbol::VisibleOutsideFile
;
692 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo
*Name
,
694 index::SymbolRoleSet Roles
,
695 SourceLocation Loc
) {
697 // Builtin macros don't have useful locations and aren't needed in completion.
698 if (MI
->isBuiltinMacro())
701 const auto &SM
= PP
->getSourceManager();
702 auto DefLoc
= MI
->getDefinitionLoc();
703 // Also avoid storing macros that aren't defined in any file, i.e. predefined
704 // macros like __DBL_MIN__ and those defined on the command line.
705 if (SM
.isWrittenInBuiltinFile(DefLoc
) ||
706 SM
.isWrittenInCommandLineFile(DefLoc
) ||
707 Name
->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
710 auto ID
= getSymbolIDCached(Name
->getName(), MI
, SM
);
714 auto SpellingLoc
= SM
.getSpellingLoc(Loc
);
715 bool IsMainFileOnly
=
716 SM
.isInMainFile(SM
.getExpansionLoc(DefLoc
)) &&
717 !isHeaderFile(SM
.getFileEntryRefForID(SM
.getMainFileID())->getName(),
718 ASTCtx
->getLangOpts());
719 // Do not store references to main-file macros.
720 if ((static_cast<unsigned>(Opts
.RefFilter
) & Roles
) && !IsMainFileOnly
&&
721 (Opts
.RefsInHeaders
|| SM
.getFileID(SpellingLoc
) == SM
.getMainFileID())) {
722 // FIXME: Populate container information for macro references.
723 // FIXME: All MacroRefs are marked as Spelled now, but this should be
725 addRef(ID
, SymbolRef
{Loc
, SM
.getFileID(Loc
), Roles
, /*Container=*/nullptr,
730 if (!Opts
.CollectMacro
)
733 // Skip main-file macros if we are not collecting them.
734 if (IsMainFileOnly
&& !Opts
.CollectMainFileSymbols
)
737 // Mark the macro as referenced if this is a reference coming from the main
738 // file. The macro may not be an interesting symbol, but it's cheaper to check
740 if (Opts
.CountReferences
&&
741 (Roles
& static_cast<unsigned>(index::SymbolRole::Reference
)) &&
742 SM
.getFileID(SpellingLoc
) == SM
.getMainFileID())
743 ReferencedSymbols
.insert(ID
);
745 // Don't continue indexing if this is a mere reference.
746 // FIXME: remove macro with ID if it is undefined.
747 if (!(Roles
& static_cast<unsigned>(index::SymbolRole::Declaration
) ||
748 Roles
& static_cast<unsigned>(index::SymbolRole::Definition
)))
751 // Only collect one instance in case there are multiple.
752 if (Symbols
.find(ID
) != nullptr)
756 S
.ID
= std::move(ID
);
757 S
.Name
= Name
->getName();
758 if (!IsMainFileOnly
) {
759 S
.Flags
|= Symbol::IndexedForCodeCompletion
;
760 S
.Flags
|= Symbol::VisibleOutsideFile
;
762 S
.SymInfo
= index::getSymbolInfoForMacro(*MI
);
763 S
.Origin
= Opts
.Origin
;
764 // FIXME: use the result to filter out symbols.
765 shouldIndexFile(SM
.getFileID(Loc
));
766 if (auto DeclLoc
= getTokenLocation(DefLoc
))
767 S
.CanonicalDeclaration
= *DeclLoc
;
769 CodeCompletionResult
SymbolCompletion(Name
);
770 const auto *CCS
= SymbolCompletion
.CreateCodeCompletionStringForMacro(
771 *PP
, *CompletionAllocator
, *CompletionTUInfo
);
772 std::string Signature
;
773 std::string SnippetSuffix
;
774 getSignature(*CCS
, &Signature
, &SnippetSuffix
, SymbolCompletion
.Kind
,
775 SymbolCompletion
.CursorKind
);
776 S
.Signature
= Signature
;
777 S
.CompletionSnippetSuffix
= SnippetSuffix
;
779 IndexedMacros
.insert(Name
);
781 setIncludeLocation(S
, DefLoc
, include_cleaner::Macro
{Name
, DefLoc
});
786 void SymbolCollector::processRelations(
787 const NamedDecl
&ND
, const SymbolID
&ID
,
788 ArrayRef
<index::SymbolRelation
> Relations
) {
789 for (const auto &R
: Relations
) {
790 auto RKind
= indexableRelation(R
);
793 const Decl
*Object
= R
.RelatedSymbol
;
795 auto ObjectID
= getSymbolIDCached(Object
);
799 // Record the relation.
800 // TODO: There may be cases where the object decl is not indexed for some
801 // reason. Those cases should probably be removed in due course, but for
802 // now there are two possible ways to handle it:
803 // (A) Avoid storing the relation in such cases.
804 // (B) Store it anyways. Clients will likely lookup() the SymbolID
805 // in the index and find nothing, but that's a situation they
806 // probably need to handle for other reasons anyways.
807 // We currently do (B) because it's simpler.
808 if (*RKind
== RelationKind::BaseOf
)
809 this->Relations
.insert({ID
, *RKind
, ObjectID
});
810 else if (*RKind
== RelationKind::OverriddenBy
)
811 this->Relations
.insert({ObjectID
, *RKind
, ID
});
815 void SymbolCollector::setIncludeLocation(const Symbol
&S
, SourceLocation DefLoc
,
816 const include_cleaner::Symbol
&Sym
) {
817 const auto &SM
= PP
->getSourceManager();
818 if (!Opts
.CollectIncludePath
||
819 shouldCollectIncludePath(S
.SymInfo
.Kind
) == Symbol::Invalid
)
822 // Use the expansion location to get the #include header since this is
823 // where the symbol is exposed.
824 IncludeFiles
[S
.ID
] = SM
.getDecomposedExpansionLoc(DefLoc
).first
;
826 // We update providers for a symbol with each occurence, as SymbolCollector
827 // might run while parsing, rather than at the end of a translation unit.
828 // Hence we see more and more redecls over time.
829 auto [It
, Inserted
] = SymbolProviders
.try_emplace(S
.ID
);
831 include_cleaner::headersForSymbol(Sym
, SM
, Opts
.PragmaIncludes
);
835 auto *HeadersIter
= Headers
.begin();
836 include_cleaner::Header H
= *HeadersIter
;
837 while (HeadersIter
!= Headers
.end() &&
838 H
.kind() == include_cleaner::Header::Physical
&&
839 !tooling::isSelfContainedHeader(H
.physical(), SM
,
840 PP
->getHeaderSearchInfo())) {
847 llvm::StringRef
getStdHeader(const Symbol
*S
, const LangOptions
&LangOpts
) {
848 tooling::stdlib::Lang Lang
= tooling::stdlib::Lang::CXX
;
850 Lang
= tooling::stdlib::Lang::C
;
851 else if(!LangOpts
.CPlusPlus
)
854 if (S
->Scope
== "std::" && S
->Name
== "move") {
855 if (!S
->Signature
.contains(','))
857 return "<algorithm>";
860 if (auto StdSym
= tooling::stdlib::Symbol::named(S
->Scope
, S
->Name
, Lang
))
861 if (auto Header
= StdSym
->header())
862 return Header
->name();
866 void SymbolCollector::finish() {
867 // At the end of the TU, add 1 to the refcount of all referenced symbols.
868 for (const auto &ID
: ReferencedSymbols
) {
869 if (const auto *S
= Symbols
.find(ID
)) {
870 // SymbolSlab::Builder returns const symbols because strings are interned
871 // and modifying returned symbols without inserting again wouldn't go
872 // well. const_cast is safe here as we're modifying a data owned by the
873 // Symbol. This reduces time spent in SymbolCollector by ~1%.
874 ++const_cast<Symbol
*>(S
)->References
;
877 if (Opts
.CollectMacro
) {
879 // First, drop header guards. We can't identify these until EOF.
880 for (const IdentifierInfo
*II
: IndexedMacros
) {
881 if (const auto *MI
= PP
->getMacroDefinition(II
).getMacroInfo())
883 getSymbolIDCached(II
->getName(), MI
, PP
->getSourceManager()))
884 if (MI
->isUsedForHeaderGuard())
888 llvm::DenseMap
<FileID
, bool> FileToContainsImportsOrObjC
;
889 llvm::DenseMap
<include_cleaner::Header
, std::string
> HeaderSpelling
;
890 // Fill in IncludeHeaders.
891 // We delay this until end of TU so header guards are all resolved.
892 for (const auto &[SID
, OptionalProvider
] : SymbolProviders
) {
893 const Symbol
*S
= Symbols
.find(SID
);
896 assert(IncludeFiles
.contains(SID
));
898 const auto FID
= IncludeFiles
.at(SID
);
899 // Determine if the FID is #include'd or #import'ed.
900 Symbol::IncludeDirective Directives
= Symbol::Invalid
;
901 auto CollectDirectives
= shouldCollectIncludePath(S
->SymInfo
.Kind
);
902 if ((CollectDirectives
& Symbol::Include
) != 0)
903 Directives
|= Symbol::Include
;
904 // Only allow #import for symbols from ObjC-like files.
905 if ((CollectDirectives
& Symbol::Import
) != 0) {
906 auto [It
, Inserted
] = FileToContainsImportsOrObjC
.try_emplace(FID
);
908 It
->second
= FilesWithObjCConstructs
.contains(FID
) ||
909 tooling::codeContainsImports(
910 ASTCtx
->getSourceManager().getBufferData(FID
));
912 Directives
|= Symbol::Import
;
915 if (Directives
== Symbol::Invalid
)
918 // Use the include location-based logic for Objective-C symbols.
919 if (Directives
& Symbol::Import
) {
920 llvm::StringRef IncludeHeader
= getStdHeader(S
, ASTCtx
->getLangOpts());
921 if (IncludeHeader
.empty())
922 IncludeHeader
= HeaderFileURIs
->getIncludeHeader(FID
);
924 if (!IncludeHeader
.empty()) {
926 NewSym
.IncludeHeaders
.push_back({IncludeHeader
, 1, Directives
});
927 Symbols
.insert(NewSym
);
929 // FIXME: use providers from include-cleaner library once it's polished
934 assert(Directives
== Symbol::Include
);
935 // For #include's, use the providers computed by the include-cleaner
937 if (!OptionalProvider
)
939 const auto &H
= *OptionalProvider
;
940 const auto [SpellingIt
, Inserted
] = HeaderSpelling
.try_emplace(H
);
942 auto &SM
= ASTCtx
->getSourceManager();
943 if (H
.kind() == include_cleaner::Header::Kind::Physical
) {
944 // FIXME: Get rid of this once include-cleaner has support for system
947 HeaderFileURIs
->mapCanonical(H
.physical().getName());
949 SpellingIt
->second
= Canonical
;
950 // For physical files, prefer URIs as spellings might change
951 // depending on the translation unit.
952 else if (tooling::isSelfContainedHeader(H
.physical(), SM
,
953 PP
->getHeaderSearchInfo()))
955 HeaderFileURIs
->toURI(H
.physical());
957 SpellingIt
->second
= include_cleaner::spellHeader(
958 {H
, PP
->getHeaderSearchInfo(),
959 SM
.getFileEntryForID(SM
.getMainFileID())});
963 if (!SpellingIt
->second
.empty()) {
965 NewSym
.IncludeHeaders
.push_back({SpellingIt
->second
, 1, Directives
});
966 Symbols
.insert(NewSym
);
970 ReferencedSymbols
.clear();
971 IncludeFiles
.clear();
972 SymbolProviders
.clear();
973 FilesWithObjCConstructs
.clear();
976 const Symbol
*SymbolCollector::addDeclaration(const NamedDecl
&ND
, SymbolID ID
,
977 bool IsMainFileOnly
) {
978 auto &Ctx
= ND
.getASTContext();
979 auto &SM
= Ctx
.getSourceManager();
982 S
.ID
= std::move(ID
);
983 std::string QName
= printQualifiedName(ND
);
984 // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
985 // for consistency with CodeCompletionString and a clean name/signature split.
986 std::tie(S
.Scope
, S
.Name
) = splitQualifiedName(QName
);
987 std::string TemplateSpecializationArgs
= printTemplateSpecializationArgs(ND
);
988 S
.TemplateSpecializationArgs
= TemplateSpecializationArgs
;
990 // We collect main-file symbols, but do not use them for code completion.
991 if (!IsMainFileOnly
&& isIndexedForCodeCompletion(ND
, Ctx
))
992 S
.Flags
|= Symbol::IndexedForCodeCompletion
;
993 if (isImplementationDetail(&ND
))
994 S
.Flags
|= Symbol::ImplementationDetail
;
996 S
.Flags
|= Symbol::VisibleOutsideFile
;
997 S
.SymInfo
= index::getSymbolInfo(&ND
);
998 auto Loc
= nameLocation(ND
, SM
);
999 assert(Loc
.isValid() && "Invalid source location for NamedDecl");
1000 // FIXME: use the result to filter out symbols.
1001 auto FID
= SM
.getFileID(Loc
);
1002 shouldIndexFile(FID
);
1003 if (auto DeclLoc
= getTokenLocation(Loc
))
1004 S
.CanonicalDeclaration
= *DeclLoc
;
1006 S
.Origin
= Opts
.Origin
;
1007 if (ND
.getAvailability() == AR_Deprecated
)
1008 S
.Flags
|= Symbol::Deprecated
;
1010 // Add completion info.
1011 // FIXME: we may want to choose a different redecl, or combine from several.
1012 assert(ASTCtx
&& PP
&& "ASTContext and Preprocessor must be set.");
1013 // We use the primary template, as clang does during code completion.
1014 CodeCompletionResult
SymbolCompletion(&getTemplateOrThis(ND
), 0);
1015 const auto *CCS
= SymbolCompletion
.CreateCodeCompletionString(
1016 *ASTCtx
, *PP
, CodeCompletionContext::CCC_Symbol
, *CompletionAllocator
,
1018 /*IncludeBriefComments*/ false);
1019 std::string Documentation
=
1020 formatDocumentation(*CCS
, getDocComment(Ctx
, SymbolCompletion
,
1021 /*CommentsFromHeaders=*/true));
1022 if (!(S
.Flags
& Symbol::IndexedForCodeCompletion
)) {
1023 if (Opts
.StoreAllDocumentation
)
1024 S
.Documentation
= Documentation
;
1026 return Symbols
.find(S
.ID
);
1028 S
.Documentation
= Documentation
;
1029 std::string Signature
;
1030 std::string SnippetSuffix
;
1031 getSignature(*CCS
, &Signature
, &SnippetSuffix
, SymbolCompletion
.Kind
,
1032 SymbolCompletion
.CursorKind
);
1033 S
.Signature
= Signature
;
1034 S
.CompletionSnippetSuffix
= SnippetSuffix
;
1035 std::string ReturnType
= getReturnType(*CCS
);
1036 S
.ReturnType
= ReturnType
;
1038 std::optional
<OpaqueType
> TypeStorage
;
1039 if (S
.Flags
& Symbol::IndexedForCodeCompletion
) {
1040 TypeStorage
= OpaqueType::fromCompletionResult(*ASTCtx
, SymbolCompletion
);
1042 S
.Type
= TypeStorage
->raw();
1046 setIncludeLocation(S
, ND
.getLocation(), include_cleaner::Symbol
{ND
});
1047 if (S
.SymInfo
.Lang
== index::SymbolLanguage::ObjC
)
1048 FilesWithObjCConstructs
.insert(FID
);
1049 return Symbols
.find(S
.ID
);
1052 void SymbolCollector::addDefinition(const NamedDecl
&ND
,
1053 const Symbol
&DeclSym
) {
1054 if (DeclSym
.Definition
)
1056 const auto &SM
= ND
.getASTContext().getSourceManager();
1057 auto Loc
= nameLocation(ND
, SM
);
1058 shouldIndexFile(SM
.getFileID(Loc
));
1059 auto DefLoc
= getTokenLocation(Loc
);
1060 // If we saw some forward declaration, we end up copying the symbol.
1061 // This is not ideal, but avoids duplicating the "is this a definition" check
1062 // in clang::index. We should only see one definition.
1066 // FIXME: use the result to filter out symbols.
1067 S
.Definition
= *DefLoc
;
1071 bool SymbolCollector::shouldIndexFile(FileID FID
) {
1072 if (!Opts
.FileFilter
)
1074 auto I
= FilesToIndexCache
.try_emplace(FID
);
1076 I
.first
->second
= Opts
.FileFilter(ASTCtx
->getSourceManager(), FID
);
1077 return I
.first
->second
;
1080 void SymbolCollector::addRef(SymbolID ID
, const SymbolRef
&SR
) {
1081 const auto &SM
= ASTCtx
->getSourceManager();
1082 // FIXME: use the result to filter out references.
1083 shouldIndexFile(SR
.FID
);
1084 if (const auto FE
= SM
.getFileEntryRefForID(SR
.FID
)) {
1085 auto Range
= getTokenRange(SR
.Loc
, SM
, ASTCtx
->getLangOpts());
1087 R
.Location
.Start
= Range
.first
;
1088 R
.Location
.End
= Range
.second
;
1089 R
.Location
.FileURI
= HeaderFileURIs
->toURI(*FE
).c_str();
1090 R
.Kind
= toRefKind(SR
.Roles
, SR
.Spelled
);
1091 R
.Container
= getSymbolIDCached(SR
.Container
);
1096 SymbolID
SymbolCollector::getSymbolIDCached(const Decl
*D
) {
1097 auto It
= DeclToIDCache
.try_emplace(D
, SymbolID
{});
1099 It
.first
->second
= getSymbolID(D
);
1100 return It
.first
->second
;
1103 SymbolID
SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName
,
1104 const MacroInfo
*MI
,
1105 const SourceManager
&SM
) {
1106 auto It
= MacroToIDCache
.try_emplace(MI
, SymbolID
{});
1108 It
.first
->second
= getSymbolID(MacroName
, MI
, SM
);
1109 return It
.first
->second
;
1111 } // namespace clangd
1112 } // namespace clang