1 //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SymbolIndexManager.h"
13 #include "find-all-symbols/SymbolInfo.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/Path.h"
20 #define DEBUG_TYPE "clang-include-fixer"
23 namespace include_fixer
{
25 using find_all_symbols::SymbolInfo
;
26 using find_all_symbols::SymbolAndSignals
;
28 // Calculate a score based on whether we think the given header is closely
29 // related to the given source file.
30 static double similarityScore(llvm::StringRef FileName
,
31 llvm::StringRef Header
) {
32 // Compute the maximum number of common path segments between Header and
33 // a suffix of FileName.
34 // We do not do a full longest common substring computation, as Header
35 // specifies the path we would directly #include, so we assume it is rooted
36 // relatively to a subproject of the repository.
38 for (auto FileI
= llvm::sys::path::begin(FileName
),
39 FileE
= llvm::sys::path::end(FileName
);
40 FileI
!= FileE
; ++FileI
) {
42 for (auto HeaderI
= llvm::sys::path::begin(Header
),
43 HeaderE
= llvm::sys::path::end(Header
), I
= FileI
;
44 HeaderI
!= HeaderE
&& *I
== *HeaderI
&& I
!= FileE
; ++I
, ++HeaderI
) {
47 MaxSegments
= std::max(Segments
, MaxSegments
);
52 static void rank(std::vector
<SymbolAndSignals
> &Symbols
,
53 llvm::StringRef FileName
) {
54 llvm::StringMap
<double> Score
;
55 for (const auto &Symbol
: Symbols
) {
56 // Calculate a score from the similarity of the header the symbol is in
57 // with the current file and the popularity of the symbol.
58 double NewScore
= similarityScore(FileName
, Symbol
.Symbol
.getFilePath()) *
59 (1.0 + std::log2(1 + Symbol
.Signals
.Seen
));
60 double &S
= Score
[Symbol
.Symbol
.getFilePath()];
61 S
= std::max(S
, NewScore
);
63 // Sort by the gathered scores. Use file name as a tie breaker so we can
66 [&](const SymbolAndSignals
&A
, const SymbolAndSignals
&B
) {
67 auto AS
= Score
[A
.Symbol
.getFilePath()];
68 auto BS
= Score
[B
.Symbol
.getFilePath()];
71 return A
.Symbol
.getFilePath() < B
.Symbol
.getFilePath();
75 std::vector
<find_all_symbols::SymbolInfo
>
76 SymbolIndexManager::search(llvm::StringRef Identifier
,
78 llvm::StringRef FileName
) const {
79 // The identifier may be fully qualified, so split it and get all the context
81 llvm::SmallVector
<llvm::StringRef
, 8> Names
;
82 Identifier
.split(Names
, "::");
84 bool IsFullyQualified
= false;
85 if (Identifier
.starts_with("::")) {
86 Names
.erase(Names
.begin()); // Drop first (empty) element.
87 IsFullyQualified
= true;
90 // As long as we don't find a result keep stripping name parts from the end.
91 // This is to support nested classes which aren't recorded in the database.
92 // Eventually we will either hit a class (namespaces aren't in the database
93 // either) and can report that result.
94 bool TookPrefix
= false;
95 std::vector
<SymbolAndSignals
> MatchedSymbols
;
97 std::vector
<SymbolAndSignals
> Symbols
;
98 for (const auto &DB
: SymbolIndices
) {
99 auto Res
= DB
.get()->search(Names
.back());
100 Symbols
.insert(Symbols
.end(), Res
.begin(), Res
.end());
103 LLVM_DEBUG(llvm::dbgs() << "Searching " << Names
.back() << "... got "
104 << Symbols
.size() << " results...\n");
106 for (auto &SymAndSig
: Symbols
) {
107 const SymbolInfo
&Symbol
= SymAndSig
.Symbol
;
108 // Match the identifier name without qualifier.
109 bool IsMatched
= true;
110 auto SymbolContext
= Symbol
.getContexts().begin();
111 auto IdentiferContext
= Names
.rbegin() + 1; // Skip identifier name.
112 // Match the remaining context names.
113 while (IdentiferContext
!= Names
.rend() &&
114 SymbolContext
!= Symbol
.getContexts().end()) {
115 if (SymbolContext
->second
== *IdentiferContext
) {
118 } else if (SymbolContext
->first
==
119 find_all_symbols::SymbolInfo::ContextType::EnumDecl
) {
120 // Skip non-scoped enum context.
128 // If the name was qualified we only want to add results if we evaluated
130 if (IsFullyQualified
)
131 IsMatched
&= (SymbolContext
== Symbol
.getContexts().end());
133 // FIXME: Support full match. At this point, we only find symbols in
134 // database which end with the same contexts with the identifier.
135 if (IsMatched
&& IdentiferContext
== Names
.rend()) {
136 // If we're in a situation where we took a prefix but the thing we
137 // found couldn't possibly have a nested member ignore it.
139 (Symbol
.getSymbolKind() == SymbolInfo::SymbolKind::Function
||
140 Symbol
.getSymbolKind() == SymbolInfo::SymbolKind::Variable
||
141 Symbol
.getSymbolKind() ==
142 SymbolInfo::SymbolKind::EnumConstantDecl
||
143 Symbol
.getSymbolKind() == SymbolInfo::SymbolKind::Macro
))
146 MatchedSymbols
.push_back(std::move(SymAndSig
));
151 } while (MatchedSymbols
.empty() && !Names
.empty() && IsNestedSearch
);
153 rank(MatchedSymbols
, FileName
);
154 // Strip signals, they are no longer needed.
155 std::vector
<SymbolInfo
> Res
;
156 Res
.reserve(MatchedSymbols
.size());
157 for (auto &SymAndSig
: MatchedSymbols
)
158 Res
.push_back(std::move(SymAndSig
.Symbol
));
162 } // namespace include_fixer