1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Some operations such as code completion produce a set of candidates.
10 /// Usually the user can choose between them, but we should put the best options
11 /// at the top (they're easier to select, and more likely to be seen).
13 /// This file defines building blocks for ranking candidates.
14 /// It's used by the features directly and also in the implementation of
15 /// indexes, as indexes also need to heuristically limit their results.
17 /// The facilities here are:
18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
19 /// These are structured in a way that they can be debugged, and are fairly
20 /// consistent regardless of the source.
21 /// - compute scores from scoring signals. These are suitable for sorting.
22 /// - sorting utilities like the TopN container.
23 /// These could be split up further to isolate dependencies if we care.
25 //===----------------------------------------------------------------------===//
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
30 #include "FileDistance.h"
31 #include "clang/Sema/CodeCompleteConsumer.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/StringSet.h"
44 class CodeCompletionResult
;
51 // Signals structs are designed to be aggregated from 0 or more sources.
52 // A default instance has neutral signals, and sources are merged into it.
53 // They can be dumped for debugging, and evaluate()d into a score.
55 /// Attributes of a symbol that affect how much we like it.
56 struct SymbolQualitySignals
{
57 bool Deprecated
= false;
58 bool ReservedName
= false; // __foo, _Foo are usually implementation details.
59 // FIXME: make these findable once user types _.
60 bool ImplementationDetail
= false;
61 unsigned References
= 0;
76 void merge(const CodeCompletionResult
&SemaCCResult
);
77 void merge(const Symbol
&IndexResult
);
79 // Condense these signals down to a single number, higher is better.
80 float evaluateHeuristics() const;
82 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&,
83 const SymbolQualitySignals
&);
85 /// Attributes of a symbol-query pair that affect how much we like it.
86 struct SymbolRelevanceSignals
{
87 /// The name of the symbol (for ContextWords). Must be explicitly assigned.
89 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
91 /// Lowercase words relevant to the context (e.g. near the completion point).
92 llvm::StringSet
<>* ContextWords
= nullptr;
93 bool Forbidden
= false; // Unavailable (e.g const) or inaccessible (private).
94 /// Whether fixits needs to be applied for that completion or not.
95 bool NeedsFixIts
= false;
96 bool InBaseClass
= false; // A member from base class of the accessed class.
98 URIDistance
*FileProximityMatch
= nullptr;
99 /// These are used to calculate proximity between the index symbol and the
101 llvm::StringRef SymbolURI
;
102 /// FIXME: unify with index proximity score - signals should be
103 /// source-independent.
104 /// Proximity between best declaration and the query. [0-1], 1 is closest.
105 float SemaFileProximityScore
= 0;
107 // Scope proximity is only considered (both index and sema) when this is set.
108 ScopeDistance
*ScopeProximityMatch
= nullptr;
109 std::optional
<llvm::StringRef
> SymbolScope
;
110 // A symbol from sema should be accessible from the current scope.
111 bool SemaSaysInScope
= false;
113 // An approximate measure of where we expect the symbol to be used.
114 enum AccessibleScope
{
119 } Scope
= GlobalScope
;
126 CodeCompletionContext::Kind Context
= CodeCompletionContext::CCC_Other
;
128 // Whether symbol is an instance member of a class.
129 bool IsInstanceMember
= false;
131 // Whether clang provided a preferred type in the completion context.
132 bool HadContextType
= false;
133 // Whether a source completion item or a symbol had a type information.
134 bool HadSymbolType
= false;
135 // Whether the item matches the type expected in the completion context.
136 bool TypeMatchesPreferred
= false;
138 /// Length of the unqualified partial name of Symbol typed in
139 /// CompletionPrefix.
140 unsigned FilterLength
= 0;
142 const ASTSignals
*MainFileSignals
= nullptr;
143 /// Number of references to the candidate in the main file.
144 unsigned MainFileRefs
= 0;
145 /// Number of unique symbols in the main file which belongs to candidate's
146 /// namespace. This indicates how relevant the namespace is in the current
148 unsigned ScopeRefsInFile
= 0;
150 /// Set of derived signals computed by calculateDerivedSignals(). Must not be
152 struct DerivedSignals
{
153 /// Whether Name contains some word from context.
154 bool NameMatchesContext
= false;
155 /// Min distance between SymbolURI and all the headers included by the TU.
156 unsigned FileProximityDistance
= FileDistance::Unreachable
;
157 /// Min distance between SymbolScope and all the available scopes.
158 unsigned ScopeProximityDistance
= FileDistance::Unreachable
;
161 DerivedSignals
calculateDerivedSignals() const;
163 void merge(const CodeCompletionResult
&SemaResult
);
164 void merge(const Symbol
&IndexResult
);
165 void computeASTSignals(const CodeCompletionResult
&SemaResult
);
167 // Condense these signals down to a single number, higher is better.
168 float evaluateHeuristics() const;
170 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&,
171 const SymbolRelevanceSignals
&);
173 /// Combine symbol quality and relevance into a single score.
174 float evaluateSymbolAndRelevance(float SymbolQuality
, float SymbolRelevance
);
176 /// Same semantics as CodeComplete::Score. Quality score and Relevance score
177 /// have been removed since DecisionForest cannot assign individual scores to
178 /// Quality and Relevance signals.
179 struct DecisionForestScores
{
181 float ExcludingName
= 0.f
;
185 evaluateDecisionForest(const SymbolQualitySignals
&Quality
,
186 const SymbolRelevanceSignals
&Relevance
, float Base
);
188 /// TopN<T> is a lossy container that preserves only the "best" N elements.
189 template <typename T
, typename Compare
= std::greater
<T
>> class TopN
{
191 using value_type
= T
;
192 TopN(size_t N
, Compare Greater
= Compare())
193 : N(N
), Greater(std::move(Greater
)) {}
195 // Adds a candidate to the set.
196 // Returns true if a candidate was dropped to get back under N.
197 bool push(value_type
&&V
) {
198 bool Dropped
= false;
199 if (Heap
.size() >= N
) {
201 if (N
> 0 && Greater(V
, Heap
.front())) {
202 std::pop_heap(Heap
.begin(), Heap
.end(), Greater
);
203 Heap
.back() = std::move(V
);
204 std::push_heap(Heap
.begin(), Heap
.end(), Greater
);
207 Heap
.push_back(std::move(V
));
208 std::push_heap(Heap
.begin(), Heap
.end(), Greater
);
210 assert(Heap
.size() <= N
);
211 assert(std::is_heap(Heap
.begin(), Heap
.end(), Greater
));
215 // Returns candidates from best to worst.
216 std::vector
<value_type
> items() && {
217 std::sort_heap(Heap
.begin(), Heap
.end(), Greater
);
218 assert(Heap
.size() <= N
);
219 return std::move(Heap
);
224 std::vector
<value_type
> Heap
; // Min-heap, comparator is Greater.
228 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
229 /// LSP. (The highest score compares smallest so it sorts at the top).
230 std::string
sortText(float Score
, llvm::StringRef Tiebreak
= "");
232 struct SignatureQualitySignals
{
233 uint32_t NumberOfParameters
= 0;
234 uint32_t NumberOfOptionalParameters
= 0;
235 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind
=
236 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function
;
238 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&,
239 const SignatureQualitySignals
&);
241 } // namespace clangd
244 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H