1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The FileDistance structure allows calculating the minimum distance to paths
11 // We simply walk up the path's ancestors until we find a node whose cost is
12 // known, and add the cost of walking back down. Initialization ensures this
13 // gives the correct path to the roots.
14 // We cache the results, so that the runtime is O(|A|), where A is the set of
15 // all distinct ancestors of visited paths.
17 // Example after initialization with /=2, /bar=0, DownCost = 1:
21 // After querying /foo/bar and /bar/foo:
28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
29 // practice this is a small constant factor.
31 //===-------------------------------------------------------------------------//
33 #include "FileDistance.h"
35 #include "support/Logger.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/Support/Path.h"
44 // Convert a path into the canonical form.
45 // Canonical form is either "/", or "/segment" * N:
46 // C:\foo\bar --> /c:/foo/bar
49 static llvm::SmallString
<128> canonicalize(llvm::StringRef Path
) {
50 llvm::SmallString
<128> Result
= Path
.rtrim('/');
51 native(Result
, llvm::sys::path::Style::posix
);
52 if (Result
.empty() || Result
.front() != '/')
53 Result
.insert(Result
.begin(), '/');
57 constexpr const unsigned FileDistance::Unreachable
;
58 const llvm::hash_code
FileDistance::RootHash
=
59 llvm::hash_value(llvm::StringRef("/"));
61 FileDistance::FileDistance(llvm::StringMap
<SourceParams
> Sources
,
62 const FileDistanceOptions
&Opts
)
64 llvm::DenseMap
<llvm::hash_code
, llvm::SmallVector
<llvm::hash_code
>> DownEdges
;
65 // Compute the best distance following only up edges.
66 // Keep track of down edges, in case we can use them to improve on this.
67 for (const auto &S
: Sources
) {
68 auto Canonical
= canonicalize(S
.getKey());
69 dlog("Source {0} = {1}, MaxUp = {2}", Canonical
, S
.second
.Cost
,
70 S
.second
.MaxUpTraversals
);
71 // Walk up to ancestors of this source, assigning cost.
72 llvm::StringRef Rest
= Canonical
;
73 llvm::hash_code Hash
= llvm::hash_value(Rest
);
74 for (unsigned I
= 0; !Rest
.empty(); ++I
) {
75 Rest
= parent_path(Rest
, llvm::sys::path::Style::posix
);
76 auto NextHash
= llvm::hash_value(Rest
);
77 auto &Down
= DownEdges
[NextHash
];
78 if (!llvm::is_contained(Down
, Hash
))
80 // We can't just break after MaxUpTraversals, must still set DownEdges.
81 if (I
> S
.getValue().MaxUpTraversals
) {
82 if (Cache
.contains(Hash
))
85 unsigned Cost
= S
.getValue().Cost
+ I
* Opts
.UpCost
;
86 auto R
= Cache
.try_emplace(Hash
, Cost
);
88 if (Cost
< R
.first
->second
) {
89 R
.first
->second
= Cost
;
91 // If we're not the best way to get to this path, stop assigning.
99 // Now propagate scores parent -> child if that's an improvement.
100 // BFS ensures we propagate down chains (must visit parents before children).
101 std::queue
<llvm::hash_code
> Next
;
102 for (auto Child
: DownEdges
.lookup(llvm::hash_value(llvm::StringRef(""))))
104 while (!Next
.empty()) {
105 auto Parent
= Next
.front();
107 auto ParentCost
= Cache
.lookup(Parent
);
108 for (auto Child
: DownEdges
.lookup(Parent
)) {
109 if (Parent
!= RootHash
|| Opts
.AllowDownTraversalFromRoot
) {
111 Cache
.try_emplace(Child
, Unreachable
).first
->getSecond();
112 if (ParentCost
+ Opts
.DownCost
< ChildCost
)
113 ChildCost
= ParentCost
+ Opts
.DownCost
;
120 unsigned FileDistance::distance(llvm::StringRef Path
) {
121 auto Canonical
= canonicalize(Path
);
122 unsigned Cost
= Unreachable
;
123 llvm::SmallVector
<llvm::hash_code
> Ancestors
;
124 // Walk up ancestors until we find a path we know the distance for.
125 for (llvm::StringRef Rest
= Canonical
; !Rest
.empty();
126 Rest
= parent_path(Rest
, llvm::sys::path::Style::posix
)) {
127 auto Hash
= llvm::hash_value(Rest
);
128 if (Hash
== RootHash
&& !Ancestors
.empty() &&
129 !Opts
.AllowDownTraversalFromRoot
) {
133 auto It
= Cache
.find(Hash
);
134 if (It
!= Cache
.end()) {
138 Ancestors
.push_back(Hash
);
140 // Now we know the costs for (known node, queried node].
141 // Fill these in, walking down the directory tree.
142 for (llvm::hash_code Hash
: llvm::reverse(Ancestors
)) {
143 if (Cost
!= Unreachable
)
144 Cost
+= Opts
.DownCost
;
145 Cache
.try_emplace(Hash
, Cost
);
147 dlog("distance({0} = {1})", Path
, Cost
);
151 unsigned URIDistance::distance(llvm::StringRef URI
) {
152 auto R
= Cache
.try_emplace(llvm::hash_value(URI
), FileDistance::Unreachable
);
154 return R
.first
->getSecond();
155 if (auto U
= clangd::URI::parse(URI
)) {
156 dlog("distance({0} = {1})", URI
, U
->body());
157 R
.first
->second
= forScheme(U
->scheme()).distance(U
->body());
159 log("URIDistance::distance() of unparseable {0}: {1}", URI
, U
.takeError());
161 return R
.first
->second
;
164 FileDistance
&URIDistance::forScheme(llvm::StringRef Scheme
) {
165 auto &Delegate
= ByScheme
[Scheme
];
167 llvm::StringMap
<SourceParams
> SchemeSources
;
168 for (const auto &Source
: Sources
) {
169 if (auto U
= clangd::URI::create(Source
.getKey(), Scheme
))
170 SchemeSources
.try_emplace(U
->body(), Source
.getValue());
172 llvm::consumeError(U
.takeError());
174 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme
,
175 SchemeSources
.size(), Sources
.size());
176 Delegate
.reset(new FileDistance(std::move(SchemeSources
), Opts
));
181 static std::pair
<std::string
, int> scopeToPath(llvm::StringRef Scope
) {
182 llvm::SmallVector
<llvm::StringRef
> Split
;
183 Scope
.split(Split
, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
184 return {"/" + llvm::join(Split
, "/"), Split
.size()};
188 createScopeFileDistance(llvm::ArrayRef
<std::string
> QueryScopes
) {
189 FileDistanceOptions Opts
;
192 Opts
.AllowDownTraversalFromRoot
= false;
194 llvm::StringMap
<SourceParams
> Sources
;
195 llvm::StringRef Preferred
=
196 QueryScopes
.empty() ? "" : QueryScopes
.front().c_str();
197 for (llvm::StringRef S
: QueryScopes
) {
199 // Penalize the global scope even it's preferred, as all projects can define
200 // symbols in it, and there is pattern where using-namespace is used in
201 // place of enclosing namespaces (e.g. in implementation files).
203 Param
.Cost
= S
== "" ? 4 : 0;
204 else if (Preferred
.startswith(S
) && !S
.empty())
205 continue; // just rely on up-traversals.
207 Param
.Cost
= S
== "" ? 6 : 2;
208 auto Path
= scopeToPath(S
);
209 // The global namespace is not 'near' its children.
210 Param
.MaxUpTraversals
= std::max(Path
.second
- 1, 0);
211 Sources
[Path
.first
] = std::move(Param
);
213 return FileDistance(std::move(Sources
), Opts
);
216 ScopeDistance::ScopeDistance(llvm::ArrayRef
<std::string
> QueryScopes
)
217 : Distance(createScopeFileDistance(QueryScopes
)) {}
219 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope
) {
220 return Distance
.distance(scopeToPath(SymbolScope
).first
);
223 } // namespace clangd