[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang-tools-extra / clangd / index / SymbolCollector.cpp
blobaac6676a995fedfd28fb12aabecf30fe76a132c7
1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CodeComplete.h"
12 #include "CodeCompletionStrings.h"
13 #include "ExpectedTypes.h"
14 #include "SourceCode.h"
15 #include "URI.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "index/CanonicalIncludes.h"
21 #include "index/Relation.h"
22 #include "index/Symbol.h"
23 #include "index/SymbolID.h"
24 #include "index/SymbolLocation.h"
25 #include "clang/AST/Decl.h"
26 #include "clang/AST/DeclBase.h"
27 #include "clang/AST/DeclObjC.h"
28 #include "clang/AST/DeclTemplate.h"
29 #include "clang/AST/DeclarationName.h"
30 #include "clang/AST/Expr.h"
31 #include "clang/Basic/FileEntry.h"
32 #include "clang/Basic/LangOptions.h"
33 #include "clang/Basic/SourceLocation.h"
34 #include "clang/Basic/SourceManager.h"
35 #include "clang/Index/IndexSymbol.h"
36 #include "clang/Lex/Preprocessor.h"
37 #include "clang/Lex/Token.h"
38 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
39 #include "clang/Tooling/Inclusions/StandardLibrary.h"
40 #include "llvm/ADT/ArrayRef.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/FileSystem.h"
46 #include "llvm/Support/Path.h"
47 #include <cassert>
48 #include <memory>
49 #include <optional>
50 #include <string>
51 #include <utility>
53 namespace clang {
54 namespace clangd {
55 namespace {
57 /// If \p ND is a template specialization, returns the described template.
58 /// Otherwise, returns \p ND.
59 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
60 if (auto *T = ND.getDescribedTemplate())
61 return *T;
62 return ND;
65 // Checks whether the decl is a private symbol in a header generated by
66 // protobuf compiler.
67 // FIXME: make filtering extensible when there are more use cases for symbol
68 // filters.
69 bool isPrivateProtoDecl(const NamedDecl &ND) {
70 const auto &SM = ND.getASTContext().getSourceManager();
71 if (!isProtoFile(nameLocation(ND, SM), SM))
72 return false;
74 // ND without identifier can be operators.
75 if (ND.getIdentifier() == nullptr)
76 return false;
77 auto Name = ND.getIdentifier()->getName();
78 if (!Name.contains('_'))
79 return false;
80 // Nested proto entities (e.g. Message::Nested) have top-level decls
81 // that shouldn't be used (Message_Nested). Ignore them completely.
82 // The nested entities are dangling type aliases, we may want to reconsider
83 // including them in the future.
84 // For enum constants, SOME_ENUM_CONSTANT is not private and should be
85 // indexed. Outer_INNER is private. This heuristic relies on naming style, it
86 // will include OUTER_INNER and exclude some_enum_constant.
87 // FIXME: the heuristic relies on naming style (i.e. no underscore in
88 // user-defined names) and can be improved.
89 return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
92 // We only collect #include paths for symbols that are suitable for global code
93 // completion, except for namespaces since #include path for a namespace is hard
94 // to define.
95 Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) {
96 using SK = index::SymbolKind;
97 switch (Kind) {
98 case SK::Macro:
99 case SK::Enum:
100 case SK::Struct:
101 case SK::Class:
102 case SK::Union:
103 case SK::TypeAlias:
104 case SK::Using:
105 case SK::Function:
106 case SK::Variable:
107 case SK::EnumConstant:
108 case SK::Concept:
109 return Symbol::Include | Symbol::Import;
110 case SK::Protocol:
111 return Symbol::Import;
112 default:
113 return Symbol::Invalid;
117 // Return the symbol range of the token at \p TokLoc.
118 std::pair<SymbolLocation::Position, SymbolLocation::Position>
119 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
120 const LangOptions &LangOpts) {
121 auto CreatePosition = [&SM](SourceLocation Loc) {
122 auto LSPLoc = sourceLocToPosition(SM, Loc);
123 SymbolLocation::Position Pos;
124 Pos.setLine(LSPLoc.line);
125 Pos.setColumn(LSPLoc.character);
126 return Pos;
129 auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
130 return {CreatePosition(TokLoc),
131 CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
134 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
135 // its symbol (e.g. a go-to-declaration target). This overrides the default of
136 // using Clang's canonical declaration, which is the first in the TU.
138 // Example: preferring a class declaration over its forward declaration.
139 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
140 const auto &SM = ND.getASTContext().getSourceManager();
141 if (isa<TagDecl>(ND))
142 return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
143 !isInsideMainFile(ND.getLocation(), SM);
144 if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
145 return ID->isThisDeclarationADefinition();
146 if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
147 return PD->isThisDeclarationADefinition();
148 return false;
151 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
152 RefKind Result = RefKind::Unknown;
153 if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
154 Result |= RefKind::Declaration;
155 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
156 Result |= RefKind::Definition;
157 if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
158 Result |= RefKind::Reference;
159 if (Spelled)
160 Result |= RefKind::Spelled;
161 return Result;
164 std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
165 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
166 return RelationKind::BaseOf;
167 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
168 return RelationKind::OverriddenBy;
169 return std::nullopt;
172 // Check if there is an exact spelling of \p ND at \p Loc.
173 bool isSpelled(SourceLocation Loc, const NamedDecl &ND) {
174 auto Name = ND.getDeclName();
175 const auto NameKind = Name.getNameKind();
176 if (NameKind != DeclarationName::Identifier &&
177 NameKind != DeclarationName::CXXConstructorName)
178 return false;
179 const auto &AST = ND.getASTContext();
180 const auto &SM = AST.getSourceManager();
181 const auto &LO = AST.getLangOpts();
182 clang::Token Tok;
183 if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))
184 return false;
185 auto StrName = Name.getAsString();
186 return clang::Lexer::getSpelling(Tok, SM, LO) == StrName;
188 } // namespace
190 // Encapsulates decisions about how to record header paths in the index,
191 // including filename normalization, URI conversion etc.
192 // Expensive checks are cached internally.
193 class SymbolCollector::HeaderFileURICache {
194 struct FrameworkUmbrellaSpelling {
195 // Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>
196 std::optional<std::string> PublicHeader;
197 // Spelling for the private umbrella header, e.g.
198 // <Foundation/Foundation_Private.h>
199 std::optional<std::string> PrivateHeader;
201 // Weird double-indirect access to PP, which might not be ready yet when
202 // HeaderFiles is created but will be by the time it's used.
203 // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
204 Preprocessor *&PP;
205 const SourceManager &SM;
206 const include_cleaner::PragmaIncludes *PI;
207 llvm::StringRef FallbackDir;
208 llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
209 llvm::StringMap<std::string> CachePathToURI;
210 llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
211 llvm::StringMap<std::string> CachePathToFrameworkSpelling;
212 llvm::StringMap<FrameworkUmbrellaSpelling>
213 CacheFrameworkToUmbrellaHeaderSpelling;
215 public:
216 HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM,
217 const SymbolCollector::Options &Opts)
218 : PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) {
221 // Returns a canonical URI for the file \p FE.
222 // We attempt to make the path absolute first.
223 const std::string &toURI(const FileEntryRef FE) {
224 auto R = CacheFEToURI.try_emplace(FE);
225 if (R.second) {
226 auto CanonPath = getCanonicalPath(FE, SM.getFileManager());
227 R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE.getName());
229 return *R.first->second;
232 // Returns a canonical URI for \p Path.
233 // If the file is in the FileManager, use that to canonicalize the path.
234 // We attempt to make the path absolute in any case.
235 const std::string &toURI(llvm::StringRef Path) {
236 if (auto File = SM.getFileManager().getFileRef(Path))
237 return toURI(*File);
238 return toURIInternal(Path);
241 // Gets a canonical include (URI of the header or <header> or "header") for
242 // header of \p FID (which should usually be the *expansion* file).
243 // This does not account for any per-symbol overrides!
244 // Returns "" if includes should not be inserted for this file.
245 llvm::StringRef getIncludeHeader(FileID FID) {
246 auto R = CacheFIDToInclude.try_emplace(FID);
247 if (R.second)
248 R.first->second = getIncludeHeaderUncached(FID);
249 return R.first->second;
252 // If a file is mapped by canonical headers, use that mapping, regardless
253 // of whether it's an otherwise-good header (header guards etc).
254 llvm::StringRef mapCanonical(llvm::StringRef HeaderPath) {
255 if (!PP)
256 return "";
257 // Populate the system header mapping as late as possible to
258 // ensure the preprocessor has been set already.
259 CanonicalIncludes SysHeaderMapping;
260 SysHeaderMapping.addSystemHeadersMapping(PP->getLangOpts());
261 auto Canonical = SysHeaderMapping.mapHeader(HeaderPath);
262 if (Canonical.empty())
263 return "";
264 // If we had a mapping, always use it.
265 assert(Canonical.startswith("<") || Canonical.startswith("\""));
266 return Canonical;
269 private:
270 // This takes care of making paths absolute and path->URI caching, but no
271 // FileManager-based canonicalization.
272 const std::string &toURIInternal(llvm::StringRef Path) {
273 auto R = CachePathToURI.try_emplace(Path);
274 if (R.second) {
275 llvm::SmallString<256> AbsPath = Path;
276 if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
277 llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
278 assert(llvm::sys::path::is_absolute(AbsPath) &&
279 "If the VFS can't make paths absolute, a FallbackDir must be "
280 "provided");
281 llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
282 R.first->second = URI::create(AbsPath).toString();
284 return R.first->second;
287 struct FrameworkHeaderPath {
288 // Path to the framework directory containing the Headers/PrivateHeaders
289 // directories e.g. /Frameworks/Foundation.framework/
290 llvm::StringRef HeadersParentDir;
291 // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h
292 // Note: This is NOT relative to the `HeadersParentDir`.
293 llvm::StringRef HeaderSubpath;
294 // Whether this header is under the PrivateHeaders dir
295 bool IsPrivateHeader;
298 std::optional<FrameworkHeaderPath>
299 splitFrameworkHeaderPath(llvm::StringRef Path) {
300 using namespace llvm::sys;
301 path::reverse_iterator I = path::rbegin(Path);
302 path::reverse_iterator Prev = I;
303 path::reverse_iterator E = path::rend(Path);
304 while (I != E) {
305 if (*I == "Headers") {
306 FrameworkHeaderPath HeaderPath;
307 HeaderPath.HeadersParentDir = Path.substr(0, I - E);
308 HeaderPath.HeaderSubpath = Path.substr(Prev - E);
309 HeaderPath.IsPrivateHeader = false;
310 return HeaderPath;
312 if (*I == "PrivateHeaders") {
313 FrameworkHeaderPath HeaderPath;
314 HeaderPath.HeadersParentDir = Path.substr(0, I - E);
315 HeaderPath.HeaderSubpath = Path.substr(Prev - E);
316 HeaderPath.IsPrivateHeader = true;
317 return HeaderPath;
319 Prev = I;
320 ++I;
322 // Unexpected, must not be a framework header.
323 return std::nullopt;
326 // Frameworks typically have an umbrella header of the same name, e.g.
327 // <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or
328 // <Foundation/Foundation_Private.h> instead of
329 // <Foundation/NSObject_Private.h> which should be used instead of directly
330 // importing the header.
331 std::optional<std::string>
332 getFrameworkUmbrellaSpelling(llvm::StringRef Framework,
333 const HeaderSearch &HS,
334 FrameworkHeaderPath &HeaderPath) {
335 auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework);
336 auto *CachedSpelling = &Res.first->second;
337 if (!Res.second) {
338 return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
339 : CachedSpelling->PublicHeader;
341 SmallString<256> UmbrellaPath(HeaderPath.HeadersParentDir);
342 llvm::sys::path::append(UmbrellaPath, "Headers", Framework + ".h");
344 llvm::vfs::Status Status;
345 auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
346 if (!StatErr)
347 CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework);
349 UmbrellaPath = HeaderPath.HeadersParentDir;
350 llvm::sys::path::append(UmbrellaPath, "PrivateHeaders",
351 Framework + "_Private.h");
353 StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
354 if (!StatErr)
355 CachedSpelling->PrivateHeader =
356 llvm::formatv("<{0}/{0}_Private.h>", Framework);
358 return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
359 : CachedSpelling->PublicHeader;
362 // Compute the framework include spelling for `FE` which is in a framework
363 // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would
364 // give <Foundation/Foundation.h> if the umbrella header exists, otherwise
365 // <Foundation/NSObject.h>.
366 std::optional<llvm::StringRef>
367 getFrameworkHeaderIncludeSpelling(FileEntryRef FE, llvm::StringRef Framework,
368 HeaderSearch &HS) {
369 auto Res = CachePathToFrameworkSpelling.try_emplace(FE.getName());
370 auto *CachedHeaderSpelling = &Res.first->second;
371 if (!Res.second)
372 return llvm::StringRef(*CachedHeaderSpelling);
374 auto HeaderPath = splitFrameworkHeaderPath(FE.getName());
375 if (!HeaderPath) {
376 // Unexpected: must not be a proper framework header, don't cache the
377 // failure.
378 CachePathToFrameworkSpelling.erase(Res.first);
379 return std::nullopt;
381 if (auto UmbrellaSpelling =
382 getFrameworkUmbrellaSpelling(Framework, HS, *HeaderPath)) {
383 *CachedHeaderSpelling = *UmbrellaSpelling;
384 return llvm::StringRef(*CachedHeaderSpelling);
387 *CachedHeaderSpelling =
388 llvm::formatv("<{0}/{1}>", Framework, HeaderPath->HeaderSubpath).str();
389 return llvm::StringRef(*CachedHeaderSpelling);
392 llvm::StringRef getIncludeHeaderUncached(FileID FID) {
393 const auto FE = SM.getFileEntryRefForID(FID);
394 if (!FE || FE->getName().empty())
395 return "";
397 if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty())
398 return Verbatim;
400 llvm::StringRef Filename = FE->getName();
401 if (auto Canonical = mapCanonical(Filename); !Canonical.empty())
402 return Canonical;
404 // Framework headers are spelled as <FrameworkName/Foo.h>, not
405 // "path/FrameworkName.framework/Headers/Foo.h".
406 auto &HS = PP->getHeaderSearchInfo();
407 if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false))
408 if (!HFI->Framework.empty())
409 if (auto Spelling =
410 getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))
411 return *Spelling;
413 if (!tooling::isSelfContainedHeader(*FE, PP->getSourceManager(),
414 PP->getHeaderSearchInfo())) {
415 // A .inc or .def file is often included into a real header to define
416 // symbols (e.g. LLVM tablegen files).
417 if (Filename.endswith(".inc") || Filename.endswith(".def"))
418 // Don't use cache reentrantly due to iterator invalidation.
419 return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
420 // Conservatively refuse to insert #includes to files without guards.
421 return "";
423 // Standard case: just insert the file itself.
424 return toURI(*FE);
428 // Return the symbol location of the token at \p TokLoc.
429 std::optional<SymbolLocation>
430 SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
431 const auto &SM = ASTCtx->getSourceManager();
432 const auto FE = SM.getFileEntryRefForID(SM.getFileID(TokLoc));
433 if (!FE)
434 return std::nullopt;
436 SymbolLocation Result;
437 Result.FileURI = HeaderFileURIs->toURI(*FE).c_str();
438 auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
439 Result.Start = Range.first;
440 Result.End = Range.second;
442 return Result;
445 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
446 SymbolCollector::~SymbolCollector() = default;
448 void SymbolCollector::initialize(ASTContext &Ctx) {
449 ASTCtx = &Ctx;
450 HeaderFileURIs = std::make_unique<HeaderFileURICache>(
451 this->PP, ASTCtx->getSourceManager(), Opts);
452 CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
453 CompletionTUInfo =
454 std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
457 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
458 const ASTContext &ASTCtx,
459 const Options &Opts,
460 bool IsMainFileOnly) {
461 // Skip anonymous declarations, e.g (anonymous enum/class/struct).
462 if (ND.getDeclName().isEmpty())
463 return false;
465 // Skip main-file symbols if we are not collecting them.
466 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
467 return false;
469 // Skip symbols in anonymous namespaces in header files.
470 if (!IsMainFileOnly && ND.isInAnonymousNamespace())
471 return false;
473 // For function local symbols, index only classes and its member functions.
474 if (index::isFunctionLocalSymbol(&ND))
475 return isa<RecordDecl>(ND) ||
476 (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
478 // We want most things but not "local" symbols such as symbols inside
479 // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
480 // FIXME: Need a matcher for ExportDecl in order to include symbols declared
481 // within an export.
482 const auto *DeclCtx = ND.getDeclContext();
483 switch (DeclCtx->getDeclKind()) {
484 case Decl::TranslationUnit:
485 case Decl::Namespace:
486 case Decl::LinkageSpec:
487 case Decl::Enum:
488 case Decl::ObjCProtocol:
489 case Decl::ObjCInterface:
490 case Decl::ObjCCategory:
491 case Decl::ObjCCategoryImpl:
492 case Decl::ObjCImplementation:
493 break;
494 default:
495 // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
496 // easier to cast.
497 if (!isa<RecordDecl>(DeclCtx))
498 return false;
501 // Avoid indexing internal symbols in protobuf generated headers.
502 if (isPrivateProtoDecl(ND))
503 return false;
504 if (!Opts.CollectReserved &&
505 (hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) &&
506 ASTCtx.getSourceManager().isInSystemHeader(ND.getLocation()))
507 return false;
509 return true;
512 const Decl *
513 SymbolCollector::getRefContainer(const Decl *Enclosing,
514 const SymbolCollector::Options &Opts) {
515 while (Enclosing) {
516 const auto *ND = dyn_cast<NamedDecl>(Enclosing);
517 if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) {
518 break;
520 Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
522 return Enclosing;
525 // Always return true to continue indexing.
526 bool SymbolCollector::handleDeclOccurrence(
527 const Decl *D, index::SymbolRoleSet Roles,
528 llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
529 index::IndexDataConsumer::ASTNodeInfo ASTNode) {
530 assert(ASTCtx && PP && HeaderFileURIs);
531 assert(CompletionAllocator && CompletionTUInfo);
532 assert(ASTNode.OrigD);
533 // Indexing API puts canonical decl into D, which might not have a valid
534 // source location for implicit/built-in decls. Fallback to original decl in
535 // such cases.
536 if (D->getLocation().isInvalid())
537 D = ASTNode.OrigD;
538 // If OrigD is an declaration associated with a friend declaration and it's
539 // not a definition, skip it. Note that OrigD is the occurrence that the
540 // collector is currently visiting.
541 if ((ASTNode.OrigD->getFriendObjectKind() !=
542 Decl::FriendObjectKind::FOK_None) &&
543 !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
544 return true;
545 // A declaration created for a friend declaration should not be used as the
546 // canonical declaration in the index. Use OrigD instead, unless we've already
547 // picked a replacement for D
548 if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
549 D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
550 // Flag to mark that D should be considered canonical meaning its declaration
551 // will override any previous declaration for the Symbol.
552 bool DeclIsCanonical = false;
553 // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
554 // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
555 if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
556 DeclIsCanonical = true;
557 if (const auto *CID = IID->getClassInterface())
558 if (const auto *DD = CID->getDefinition())
559 if (!DD->isImplicitInterfaceDecl())
560 D = DD;
562 // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
563 // its ObjCCategoryDecl if it has one.
564 if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
565 DeclIsCanonical = true;
566 if (const auto *CD = CID->getCategoryDecl())
567 D = CD;
569 const NamedDecl *ND = dyn_cast<NamedDecl>(D);
570 if (!ND)
571 return true;
573 auto ID = getSymbolIDCached(ND);
574 if (!ID)
575 return true;
577 // Mark D as referenced if this is a reference coming from the main file.
578 // D may not be an interesting symbol, but it's cheaper to check at the end.
579 auto &SM = ASTCtx->getSourceManager();
580 if (Opts.CountReferences &&
581 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
582 SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
583 ReferencedSymbols.insert(ID);
585 // ND is the canonical (i.e. first) declaration. If it's in the main file
586 // (which is not a header), then no public declaration was visible, so assume
587 // it's main-file only.
588 bool IsMainFileOnly =
589 SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
590 !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
591 ASTCtx->getLangOpts());
592 // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
593 if (ASTNode.OrigD->isImplicit() ||
594 !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
595 return true;
597 // Note: we need to process relations for all decl occurrences, including
598 // refs, because the indexing code only populates relations for specific
599 // occurrences. For example, RelationBaseOf is only populated for the
600 // occurrence inside the base-specifier.
601 processRelations(*ND, ID, Relations);
603 bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
604 // Unlike other fields, e.g. Symbols (which use spelling locations), we use
605 // file locations for references (as it aligns the behavior of clangd's
606 // AST-based xref).
607 // FIXME: we should try to use the file locations for other fields.
608 if (CollectRef &&
609 (!IsMainFileOnly || Opts.CollectMainFileRefs ||
610 ND->isExternallyVisible()) &&
611 !isa<NamespaceDecl>(ND)) {
612 auto FileLoc = SM.getFileLoc(Loc);
613 auto FID = SM.getFileID(FileLoc);
614 if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {
615 addRef(ID, SymbolRef{FileLoc, FID, Roles,
616 getRefContainer(ASTNode.Parent, Opts),
617 isSpelled(FileLoc, *ND)});
620 // Don't continue indexing if this is a mere reference.
621 if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
622 static_cast<unsigned>(index::SymbolRole::Definition))))
623 return true;
625 // FIXME: ObjCPropertyDecl are not properly indexed here:
626 // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
627 // not a NamedDecl.
628 auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
629 if (!OriginalDecl)
630 return true;
632 const Symbol *BasicSymbol = Symbols.find(ID);
633 if (isPreferredDeclaration(*OriginalDecl, Roles))
634 // If OriginalDecl is preferred, replace/create the existing canonical
635 // declaration (e.g. a class forward declaration). There should be at most
636 // one duplicate as we expect to see only one preferred declaration per
637 // TU, because in practice they are definitions.
638 BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
639 else if (!BasicSymbol || DeclIsCanonical)
640 BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
642 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
643 addDefinition(*OriginalDecl, *BasicSymbol);
645 return true;
648 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
649 assert(HeaderFileURIs && PP);
650 const auto &SM = PP->getSourceManager();
651 const auto MainFileEntryRef = SM.getFileEntryRefForID(SM.getMainFileID());
652 assert(MainFileEntryRef);
654 const std::string &MainFileURI = HeaderFileURIs->toURI(*MainFileEntryRef);
655 // Add macro references.
656 for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
657 for (const auto &MacroRef : IDToRefs.second) {
658 const auto &Range = MacroRef.toRange(SM);
659 bool IsDefinition = MacroRef.IsDefinition;
660 Ref R;
661 R.Location.Start.setLine(Range.start.line);
662 R.Location.Start.setColumn(Range.start.character);
663 R.Location.End.setLine(Range.end.line);
664 R.Location.End.setColumn(Range.end.character);
665 R.Location.FileURI = MainFileURI.c_str();
666 R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
667 Refs.insert(IDToRefs.first, R);
668 if (IsDefinition) {
669 Symbol S;
670 S.ID = IDToRefs.first;
671 auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
672 auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
673 S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
674 S.SymInfo.Kind = index::SymbolKind::Macro;
675 S.SymInfo.SubKind = index::SymbolSubKind::None;
676 S.SymInfo.Properties = index::SymbolPropertySet();
677 S.SymInfo.Lang = index::SymbolLanguage::C;
678 S.Origin = Opts.Origin;
679 S.CanonicalDeclaration = R.Location;
680 // Make the macro visible for code completion if main file is an
681 // include-able header.
682 if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) {
683 S.Flags |= Symbol::IndexedForCodeCompletion;
684 S.Flags |= Symbol::VisibleOutsideFile;
686 Symbols.insert(S);
692 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
693 const MacroInfo *MI,
694 index::SymbolRoleSet Roles,
695 SourceLocation Loc) {
696 assert(PP);
697 // Builtin macros don't have useful locations and aren't needed in completion.
698 if (MI->isBuiltinMacro())
699 return true;
701 const auto &SM = PP->getSourceManager();
702 auto DefLoc = MI->getDefinitionLoc();
703 // Also avoid storing macros that aren't defined in any file, i.e. predefined
704 // macros like __DBL_MIN__ and those defined on the command line.
705 if (SM.isWrittenInBuiltinFile(DefLoc) ||
706 SM.isWrittenInCommandLineFile(DefLoc) ||
707 Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
708 return true;
710 auto ID = getSymbolIDCached(Name->getName(), MI, SM);
711 if (!ID)
712 return true;
714 auto SpellingLoc = SM.getSpellingLoc(Loc);
715 bool IsMainFileOnly =
716 SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
717 !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
718 ASTCtx->getLangOpts());
719 // Do not store references to main-file macros.
720 if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
721 (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {
722 // FIXME: Populate container information for macro references.
723 // FIXME: All MacroRefs are marked as Spelled now, but this should be
724 // checked.
725 addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr,
726 /*Spelled=*/true});
729 // Collect symbols.
730 if (!Opts.CollectMacro)
731 return true;
733 // Skip main-file macros if we are not collecting them.
734 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
735 return false;
737 // Mark the macro as referenced if this is a reference coming from the main
738 // file. The macro may not be an interesting symbol, but it's cheaper to check
739 // at the end.
740 if (Opts.CountReferences &&
741 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
742 SM.getFileID(SpellingLoc) == SM.getMainFileID())
743 ReferencedSymbols.insert(ID);
745 // Don't continue indexing if this is a mere reference.
746 // FIXME: remove macro with ID if it is undefined.
747 if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
748 Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
749 return true;
751 // Only collect one instance in case there are multiple.
752 if (Symbols.find(ID) != nullptr)
753 return true;
755 Symbol S;
756 S.ID = std::move(ID);
757 S.Name = Name->getName();
758 if (!IsMainFileOnly) {
759 S.Flags |= Symbol::IndexedForCodeCompletion;
760 S.Flags |= Symbol::VisibleOutsideFile;
762 S.SymInfo = index::getSymbolInfoForMacro(*MI);
763 S.Origin = Opts.Origin;
764 // FIXME: use the result to filter out symbols.
765 shouldIndexFile(SM.getFileID(Loc));
766 if (auto DeclLoc = getTokenLocation(DefLoc))
767 S.CanonicalDeclaration = *DeclLoc;
769 CodeCompletionResult SymbolCompletion(Name);
770 const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
771 *PP, *CompletionAllocator, *CompletionTUInfo);
772 std::string Signature;
773 std::string SnippetSuffix;
774 getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
775 SymbolCompletion.CursorKind);
776 S.Signature = Signature;
777 S.CompletionSnippetSuffix = SnippetSuffix;
779 IndexedMacros.insert(Name);
781 setIncludeLocation(S, DefLoc, include_cleaner::Macro{Name, DefLoc});
782 Symbols.insert(S);
783 return true;
786 void SymbolCollector::processRelations(
787 const NamedDecl &ND, const SymbolID &ID,
788 ArrayRef<index::SymbolRelation> Relations) {
789 for (const auto &R : Relations) {
790 auto RKind = indexableRelation(R);
791 if (!RKind)
792 continue;
793 const Decl *Object = R.RelatedSymbol;
795 auto ObjectID = getSymbolIDCached(Object);
796 if (!ObjectID)
797 continue;
799 // Record the relation.
800 // TODO: There may be cases where the object decl is not indexed for some
801 // reason. Those cases should probably be removed in due course, but for
802 // now there are two possible ways to handle it:
803 // (A) Avoid storing the relation in such cases.
804 // (B) Store it anyways. Clients will likely lookup() the SymbolID
805 // in the index and find nothing, but that's a situation they
806 // probably need to handle for other reasons anyways.
807 // We currently do (B) because it's simpler.
808 if (*RKind == RelationKind::BaseOf)
809 this->Relations.insert({ID, *RKind, ObjectID});
810 else if (*RKind == RelationKind::OverriddenBy)
811 this->Relations.insert({ObjectID, *RKind, ID});
815 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
816 const include_cleaner::Symbol &Sym) {
817 const auto &SM = PP->getSourceManager();
818 if (!Opts.CollectIncludePath ||
819 shouldCollectIncludePath(S.SymInfo.Kind) == Symbol::Invalid)
820 return;
822 // Use the expansion location to get the #include header since this is
823 // where the symbol is exposed.
824 IncludeFiles[S.ID] = SM.getDecomposedExpansionLoc(DefLoc).first;
826 // We update providers for a symbol with each occurence, as SymbolCollector
827 // might run while parsing, rather than at the end of a translation unit.
828 // Hence we see more and more redecls over time.
829 auto [It, Inserted] = SymbolProviders.try_emplace(S.ID);
830 auto Headers =
831 include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);
832 if (Headers.empty())
833 return;
835 auto *HeadersIter = Headers.begin();
836 include_cleaner::Header H = *HeadersIter;
837 while (HeadersIter != Headers.end() &&
838 H.kind() == include_cleaner::Header::Physical &&
839 !tooling::isSelfContainedHeader(H.physical(), SM,
840 PP->getHeaderSearchInfo())) {
841 H = *HeadersIter;
842 HeadersIter++;
844 It->second = H;
847 llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
848 tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX;
849 if (LangOpts.C11)
850 Lang = tooling::stdlib::Lang::C;
851 else if(!LangOpts.CPlusPlus)
852 return "";
854 if (S->Scope == "std::" && S->Name == "move") {
855 if (!S->Signature.contains(','))
856 return "<utility>";
857 return "<algorithm>";
860 if (auto StdSym = tooling::stdlib::Symbol::named(S->Scope, S->Name, Lang))
861 if (auto Header = StdSym->header())
862 return Header->name();
863 return "";
866 void SymbolCollector::finish() {
867 // At the end of the TU, add 1 to the refcount of all referenced symbols.
868 for (const auto &ID : ReferencedSymbols) {
869 if (const auto *S = Symbols.find(ID)) {
870 // SymbolSlab::Builder returns const symbols because strings are interned
871 // and modifying returned symbols without inserting again wouldn't go
872 // well. const_cast is safe here as we're modifying a data owned by the
873 // Symbol. This reduces time spent in SymbolCollector by ~1%.
874 ++const_cast<Symbol *>(S)->References;
877 if (Opts.CollectMacro) {
878 assert(PP);
879 // First, drop header guards. We can't identify these until EOF.
880 for (const IdentifierInfo *II : IndexedMacros) {
881 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
882 if (auto ID =
883 getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))
884 if (MI->isUsedForHeaderGuard())
885 Symbols.erase(ID);
888 llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC;
889 llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;
890 // Fill in IncludeHeaders.
891 // We delay this until end of TU so header guards are all resolved.
892 for (const auto &[SID, OptionalProvider] : SymbolProviders) {
893 const Symbol *S = Symbols.find(SID);
894 if (!S)
895 continue;
896 assert(IncludeFiles.contains(SID));
898 const auto FID = IncludeFiles.at(SID);
899 // Determine if the FID is #include'd or #import'ed.
900 Symbol::IncludeDirective Directives = Symbol::Invalid;
901 auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind);
902 if ((CollectDirectives & Symbol::Include) != 0)
903 Directives |= Symbol::Include;
904 // Only allow #import for symbols from ObjC-like files.
905 if ((CollectDirectives & Symbol::Import) != 0) {
906 auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID);
907 if (Inserted)
908 It->second = FilesWithObjCConstructs.contains(FID) ||
909 tooling::codeContainsImports(
910 ASTCtx->getSourceManager().getBufferData(FID));
911 if (It->second)
912 Directives |= Symbol::Import;
915 if (Directives == Symbol::Invalid)
916 continue;
918 // Use the include location-based logic for Objective-C symbols.
919 if (Directives & Symbol::Import) {
920 llvm::StringRef IncludeHeader = getStdHeader(S, ASTCtx->getLangOpts());
921 if (IncludeHeader.empty())
922 IncludeHeader = HeaderFileURIs->getIncludeHeader(FID);
924 if (!IncludeHeader.empty()) {
925 auto NewSym = *S;
926 NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives});
927 Symbols.insert(NewSym);
929 // FIXME: use providers from include-cleaner library once it's polished
930 // for Objective-C.
931 continue;
934 assert(Directives == Symbol::Include);
935 // For #include's, use the providers computed by the include-cleaner
936 // library.
937 if (!OptionalProvider)
938 continue;
939 const auto &H = *OptionalProvider;
940 const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(H);
941 if (Inserted) {
942 auto &SM = ASTCtx->getSourceManager();
943 if (H.kind() == include_cleaner::Header::Kind::Physical) {
944 // FIXME: Get rid of this once include-cleaner has support for system
945 // headers.
946 if (auto Canonical =
947 HeaderFileURIs->mapCanonical(H.physical().getName());
948 !Canonical.empty())
949 SpellingIt->second = Canonical;
950 // For physical files, prefer URIs as spellings might change
951 // depending on the translation unit.
952 else if (tooling::isSelfContainedHeader(H.physical(), SM,
953 PP->getHeaderSearchInfo()))
954 SpellingIt->second =
955 HeaderFileURIs->toURI(H.physical());
956 } else {
957 SpellingIt->second = include_cleaner::spellHeader(
958 {H, PP->getHeaderSearchInfo(),
959 SM.getFileEntryForID(SM.getMainFileID())});
963 if (!SpellingIt->second.empty()) {
964 auto NewSym = *S;
965 NewSym.IncludeHeaders.push_back({SpellingIt->second, 1, Directives});
966 Symbols.insert(NewSym);
970 ReferencedSymbols.clear();
971 IncludeFiles.clear();
972 SymbolProviders.clear();
973 FilesWithObjCConstructs.clear();
976 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
977 bool IsMainFileOnly) {
978 auto &Ctx = ND.getASTContext();
979 auto &SM = Ctx.getSourceManager();
981 Symbol S;
982 S.ID = std::move(ID);
983 std::string QName = printQualifiedName(ND);
984 // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
985 // for consistency with CodeCompletionString and a clean name/signature split.
986 std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
987 std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
988 S.TemplateSpecializationArgs = TemplateSpecializationArgs;
990 // We collect main-file symbols, but do not use them for code completion.
991 if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
992 S.Flags |= Symbol::IndexedForCodeCompletion;
993 if (isImplementationDetail(&ND))
994 S.Flags |= Symbol::ImplementationDetail;
995 if (!IsMainFileOnly)
996 S.Flags |= Symbol::VisibleOutsideFile;
997 S.SymInfo = index::getSymbolInfo(&ND);
998 auto Loc = nameLocation(ND, SM);
999 assert(Loc.isValid() && "Invalid source location for NamedDecl");
1000 // FIXME: use the result to filter out symbols.
1001 auto FID = SM.getFileID(Loc);
1002 shouldIndexFile(FID);
1003 if (auto DeclLoc = getTokenLocation(Loc))
1004 S.CanonicalDeclaration = *DeclLoc;
1006 S.Origin = Opts.Origin;
1007 if (ND.getAvailability() == AR_Deprecated)
1008 S.Flags |= Symbol::Deprecated;
1010 // Add completion info.
1011 // FIXME: we may want to choose a different redecl, or combine from several.
1012 assert(ASTCtx && PP && "ASTContext and Preprocessor must be set.");
1013 // We use the primary template, as clang does during code completion.
1014 CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
1015 const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
1016 *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
1017 *CompletionTUInfo,
1018 /*IncludeBriefComments*/ false);
1019 std::string Documentation =
1020 formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
1021 /*CommentsFromHeaders=*/true));
1022 if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
1023 if (Opts.StoreAllDocumentation)
1024 S.Documentation = Documentation;
1025 Symbols.insert(S);
1026 return Symbols.find(S.ID);
1028 S.Documentation = Documentation;
1029 std::string Signature;
1030 std::string SnippetSuffix;
1031 getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
1032 SymbolCompletion.CursorKind);
1033 S.Signature = Signature;
1034 S.CompletionSnippetSuffix = SnippetSuffix;
1035 std::string ReturnType = getReturnType(*CCS);
1036 S.ReturnType = ReturnType;
1038 std::optional<OpaqueType> TypeStorage;
1039 if (S.Flags & Symbol::IndexedForCodeCompletion) {
1040 TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
1041 if (TypeStorage)
1042 S.Type = TypeStorage->raw();
1045 Symbols.insert(S);
1046 setIncludeLocation(S, ND.getLocation(), include_cleaner::Symbol{ND});
1047 if (S.SymInfo.Lang == index::SymbolLanguage::ObjC)
1048 FilesWithObjCConstructs.insert(FID);
1049 return Symbols.find(S.ID);
1052 void SymbolCollector::addDefinition(const NamedDecl &ND,
1053 const Symbol &DeclSym) {
1054 if (DeclSym.Definition)
1055 return;
1056 const auto &SM = ND.getASTContext().getSourceManager();
1057 auto Loc = nameLocation(ND, SM);
1058 shouldIndexFile(SM.getFileID(Loc));
1059 auto DefLoc = getTokenLocation(Loc);
1060 // If we saw some forward declaration, we end up copying the symbol.
1061 // This is not ideal, but avoids duplicating the "is this a definition" check
1062 // in clang::index. We should only see one definition.
1063 if (!DefLoc)
1064 return;
1065 Symbol S = DeclSym;
1066 // FIXME: use the result to filter out symbols.
1067 S.Definition = *DefLoc;
1068 Symbols.insert(S);
1071 bool SymbolCollector::shouldIndexFile(FileID FID) {
1072 if (!Opts.FileFilter)
1073 return true;
1074 auto I = FilesToIndexCache.try_emplace(FID);
1075 if (I.second)
1076 I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
1077 return I.first->second;
1080 void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {
1081 const auto &SM = ASTCtx->getSourceManager();
1082 // FIXME: use the result to filter out references.
1083 shouldIndexFile(SR.FID);
1084 if (const auto FE = SM.getFileEntryRefForID(SR.FID)) {
1085 auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());
1086 Ref R;
1087 R.Location.Start = Range.first;
1088 R.Location.End = Range.second;
1089 R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str();
1090 R.Kind = toRefKind(SR.Roles, SR.Spelled);
1091 R.Container = getSymbolIDCached(SR.Container);
1092 Refs.insert(ID, R);
1096 SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {
1097 auto It = DeclToIDCache.try_emplace(D, SymbolID{});
1098 if (It.second)
1099 It.first->second = getSymbolID(D);
1100 return It.first->second;
1103 SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,
1104 const MacroInfo *MI,
1105 const SourceManager &SM) {
1106 auto It = MacroToIDCache.try_emplace(MI, SymbolID{});
1107 if (It.second)
1108 It.first->second = getSymbolID(MacroName, MI, SM);
1109 return It.first->second;
1111 } // namespace clangd
1112 } // namespace clang