[Workflow] Try to fix code-formatter failing to find changes in some cases.
[llvm-project.git] / clang-tools-extra / clang-include-fixer / SymbolIndexManager.cpp
blob952c5fdedc291e51b3118c0e6f5c7cbfa6302727
1 //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SymbolIndexManager.h"
11 #include <cmath>
13 #include "find-all-symbols/SymbolInfo.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/Path.h"
20 #define DEBUG_TYPE "clang-include-fixer"
22 namespace clang {
23 namespace include_fixer {
25 using find_all_symbols::SymbolInfo;
26 using find_all_symbols::SymbolAndSignals;
28 // Calculate a score based on whether we think the given header is closely
29 // related to the given source file.
30 static double similarityScore(llvm::StringRef FileName,
31 llvm::StringRef Header) {
32 // Compute the maximum number of common path segments between Header and
33 // a suffix of FileName.
34 // We do not do a full longest common substring computation, as Header
35 // specifies the path we would directly #include, so we assume it is rooted
36 // relatively to a subproject of the repository.
37 int MaxSegments = 1;
38 for (auto FileI = llvm::sys::path::begin(FileName),
39 FileE = llvm::sys::path::end(FileName);
40 FileI != FileE; ++FileI) {
41 int Segments = 0;
42 for (auto HeaderI = llvm::sys::path::begin(Header),
43 HeaderE = llvm::sys::path::end(Header), I = FileI;
44 HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
45 ++Segments;
47 MaxSegments = std::max(Segments, MaxSegments);
49 return MaxSegments;
52 static void rank(std::vector<SymbolAndSignals> &Symbols,
53 llvm::StringRef FileName) {
54 llvm::StringMap<double> Score;
55 for (const auto &Symbol : Symbols) {
56 // Calculate a score from the similarity of the header the symbol is in
57 // with the current file and the popularity of the symbol.
58 double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
59 (1.0 + std::log2(1 + Symbol.Signals.Seen));
60 double &S = Score[Symbol.Symbol.getFilePath()];
61 S = std::max(S, NewScore);
63 // Sort by the gathered scores. Use file name as a tie breaker so we can
64 // deduplicate.
65 llvm::sort(Symbols,
66 [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
67 auto AS = Score[A.Symbol.getFilePath()];
68 auto BS = Score[B.Symbol.getFilePath()];
69 if (AS != BS)
70 return AS > BS;
71 return A.Symbol.getFilePath() < B.Symbol.getFilePath();
72 });
75 std::vector<find_all_symbols::SymbolInfo>
76 SymbolIndexManager::search(llvm::StringRef Identifier,
77 bool IsNestedSearch,
78 llvm::StringRef FileName) const {
79 // The identifier may be fully qualified, so split it and get all the context
80 // names.
81 llvm::SmallVector<llvm::StringRef, 8> Names;
82 Identifier.split(Names, "::");
84 bool IsFullyQualified = false;
85 if (Identifier.startswith("::")) {
86 Names.erase(Names.begin()); // Drop first (empty) element.
87 IsFullyQualified = true;
90 // As long as we don't find a result keep stripping name parts from the end.
91 // This is to support nested classes which aren't recorded in the database.
92 // Eventually we will either hit a class (namespaces aren't in the database
93 // either) and can report that result.
94 bool TookPrefix = false;
95 std::vector<SymbolAndSignals> MatchedSymbols;
96 do {
97 std::vector<SymbolAndSignals> Symbols;
98 for (const auto &DB : SymbolIndices) {
99 auto Res = DB.get()->search(Names.back());
100 Symbols.insert(Symbols.end(), Res.begin(), Res.end());
103 LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
104 << Symbols.size() << " results...\n");
106 for (auto &SymAndSig : Symbols) {
107 const SymbolInfo &Symbol = SymAndSig.Symbol;
108 // Match the identifier name without qualifier.
109 bool IsMatched = true;
110 auto SymbolContext = Symbol.getContexts().begin();
111 auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
112 // Match the remaining context names.
113 while (IdentiferContext != Names.rend() &&
114 SymbolContext != Symbol.getContexts().end()) {
115 if (SymbolContext->second == *IdentiferContext) {
116 ++IdentiferContext;
117 ++SymbolContext;
118 } else if (SymbolContext->first ==
119 find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
120 // Skip non-scoped enum context.
121 ++SymbolContext;
122 } else {
123 IsMatched = false;
124 break;
128 // If the name was qualified we only want to add results if we evaluated
129 // all contexts.
130 if (IsFullyQualified)
131 IsMatched &= (SymbolContext == Symbol.getContexts().end());
133 // FIXME: Support full match. At this point, we only find symbols in
134 // database which end with the same contexts with the identifier.
135 if (IsMatched && IdentiferContext == Names.rend()) {
136 // If we're in a situation where we took a prefix but the thing we
137 // found couldn't possibly have a nested member ignore it.
138 if (TookPrefix &&
139 (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
140 Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
141 Symbol.getSymbolKind() ==
142 SymbolInfo::SymbolKind::EnumConstantDecl ||
143 Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
144 continue;
146 MatchedSymbols.push_back(std::move(SymAndSig));
149 Names.pop_back();
150 TookPrefix = true;
151 } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
153 rank(MatchedSymbols, FileName);
154 // Strip signals, they are no longer needed.
155 std::vector<SymbolInfo> Res;
156 Res.reserve(MatchedSymbols.size());
157 for (auto &SymAndSig : MatchedSymbols)
158 Res.push_back(std::move(SymAndSig.Symbol));
159 return Res;
162 } // namespace include_fixer
163 } // namespace clang