1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
12 //===----------------------------------------------------------------------===//
14 #include "index/Index.h"
15 #include "index/Relation.h"
16 #include "index/Serialization.h"
17 #include "index/remote/Client.h"
18 #include "llvm/ADT/ScopeExit.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/LineEditor/LineEditor.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Signals.h"
30 llvm::cl::opt
<std::string
> IndexLocation(
31 llvm::cl::desc("<path to index file | remote:server.address>"),
32 llvm::cl::Positional
);
34 llvm::cl::opt
<std::string
>
35 ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
37 llvm::cl::opt
<std::string
> ProjectRoot(
40 "Path to the project. Required when connecting using remote index."));
42 static constexpr char Overview
[] = R
"(
43 This is an **experimental** interactive tool to process user-provided search
44 queries over given symbol collection obtained via clangd-indexer. The
45 tool can be used to evaluate search quality of existing index implementations
46 and manually construct non-trivial test cases.
48 You can connect to remote index by passing remote:address to dexp. Example:
50 $ dexp remote:0.0.0.0:9000
52 Type use "help
" request to get information about the details.
55 void reportTime(llvm::StringRef Name
, llvm::function_ref
<void()> F
) {
56 const auto TimerStart
= std::chrono::high_resolution_clock::now();
58 const auto TimerStop
= std::chrono::high_resolution_clock::now();
59 const auto Duration
= std::chrono::duration_cast
<std::chrono::milliseconds
>(
60 TimerStop
- TimerStart
);
61 llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name
, Duration
);
64 std::vector
<SymbolID
> getSymbolIDsFromIndex(llvm::StringRef QualifiedName
,
65 const SymbolIndex
*Index
) {
66 FuzzyFindRequest Request
;
67 // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
68 // qualifier for global scope.
69 bool IsGlobalScope
= QualifiedName
.consume_front("::");
70 auto Names
= splitQualifiedName(QualifiedName
);
71 if (IsGlobalScope
|| !Names
.first
.empty())
72 Request
.Scopes
= {std::string(Names
.first
)};
74 // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
75 // add the global scope to the request.
76 Request
.Scopes
= {""};
78 Request
.Query
= std::string(Names
.second
);
79 std::vector
<SymbolID
> SymIDs
;
80 Index
->fuzzyFind(Request
, [&](const Symbol
&Sym
) {
81 std::string SymQualifiedName
= (Sym
.Scope
+ Sym
.Name
).str();
82 if (QualifiedName
== SymQualifiedName
)
83 SymIDs
.push_back(Sym
.ID
);
88 // REPL commands inherit from Command and contain their options as members.
89 // Creating a Command populates parser options, parseAndRun() resets them.
91 // By resetting the parser options, we lost the standard -help flag.
92 llvm::cl::opt
<bool, false, llvm::cl::parser
<bool>> Help
{
93 "help", llvm::cl::desc("Display available options"),
94 llvm::cl::ValueDisallowed
, llvm::cl::cat(llvm::cl::getGeneralCategory())};
95 // FIXME: Allow commands to signal failure.
96 virtual void run() = 0;
99 const SymbolIndex
*Index
;
102 virtual ~Command() = default;
103 bool parseAndRun(llvm::ArrayRef
<const char *> Argv
, const char *Overview
,
104 const SymbolIndex
&Index
) {
105 std::string ParseErrs
;
106 llvm::raw_string_ostream
OS(ParseErrs
);
107 bool Ok
= llvm::cl::ParseCommandLineOptions(Argv
.size(), Argv
.data(),
109 // must do this before opts are destroyed
110 auto Cleanup
= llvm::make_scope_exit(llvm::cl::ResetCommandLineParser
);
111 if (Help
.getNumOccurrences() > 0) {
112 // Avoid printing parse errors in this case.
113 // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
114 llvm::cl::PrintHelpMessage();
118 llvm::outs() << OS
.str();
120 this->Index
= &Index
;
121 reportTime(Argv
[0], [&] { run(); });
127 // FIXME(kbobyrev): Ideas for more commands:
128 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
129 // usages in the tool driver and actually use llvm::cl library in the REPL.
130 // * show posting list density histogram (our dump data somewhere so that user
132 // * show number of tokens of each kind
133 // * print out tokens with the most dense posting lists
134 // * print out tokens with least dense posting lists
136 class FuzzyFind
: public Command
{
137 llvm::cl::opt
<std::string
> Query
{
139 llvm::cl::Positional
,
141 llvm::cl::desc("Query string to be fuzzy-matched"),
143 llvm::cl::opt
<std::string
> Scopes
{
145 llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
147 llvm::cl::opt
<unsigned> Limit
{
150 llvm::cl::desc("Max results to display"),
153 void run() override
{
154 FuzzyFindRequest Request
;
155 Request
.Limit
= Limit
;
156 Request
.Query
= Query
;
157 if (Scopes
.getNumOccurrences() > 0) {
158 llvm::SmallVector
<llvm::StringRef
> Scopes
;
159 llvm::StringRef(this->Scopes
).split(Scopes
, ',');
160 Request
.Scopes
= {Scopes
.begin(), Scopes
.end()};
162 Request
.AnyScope
= Request
.Scopes
.empty();
163 // FIXME(kbobyrev): Print symbol final scores to see the distribution.
164 static const auto *OutputFormat
= "{0,-4} | {1,-40} | {2,-25}\n";
165 llvm::outs() << llvm::formatv(OutputFormat
, "Rank", "Symbol ID",
168 Index
->fuzzyFind(Request
, [&](const Symbol
&Sym
) {
169 llvm::outs() << llvm::formatv(OutputFormat
, Rank
++, Sym
.ID
.str(),
170 Sym
.Scope
+ Sym
.Name
);
175 class Lookup
: public Command
{
176 llvm::cl::opt
<std::string
> ID
{
178 llvm::cl::Positional
,
179 llvm::cl::desc("Symbol ID to look up (hex)"),
181 llvm::cl::opt
<std::string
> Name
{
183 llvm::cl::desc("Qualified name to look up."),
186 void run() override
{
187 if (ID
.getNumOccurrences() == 0 && Name
.getNumOccurrences() == 0) {
189 << "Missing required argument: please provide id or -name.\n";
192 std::vector
<SymbolID
> IDs
;
193 if (ID
.getNumOccurrences()) {
194 auto SID
= SymbolID::fromStr(ID
);
196 llvm::errs() << llvm::toString(SID
.takeError()) << "\n";
201 IDs
= getSymbolIDsFromIndex(Name
, Index
);
204 LookupRequest Request
;
205 Request
.IDs
.insert(IDs
.begin(), IDs
.end());
206 bool FoundSymbol
= false;
207 Index
->lookup(Request
, [&](const Symbol
&Sym
) {
209 llvm::outs() << toYAML(Sym
);
212 llvm::errs() << "not found\n";
216 class Refs
: public Command
{
217 llvm::cl::opt
<std::string
> ID
{
219 llvm::cl::Positional
,
220 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
222 llvm::cl::opt
<std::string
> Name
{
224 llvm::cl::desc("Qualified name of the symbol being queried."),
226 llvm::cl::opt
<std::string
> Filter
{
228 llvm::cl::init(".*"),
230 "Print all results from files matching this regular expression."),
233 void run() override
{
234 if (ID
.getNumOccurrences() == 0 && Name
.getNumOccurrences() == 0) {
236 << "Missing required argument: please provide id or -name.\n";
239 std::vector
<SymbolID
> IDs
;
240 if (ID
.getNumOccurrences()) {
241 auto SID
= SymbolID::fromStr(ID
);
243 llvm::errs() << llvm::toString(SID
.takeError()) << "\n";
248 IDs
= getSymbolIDsFromIndex(Name
, Index
);
249 if (IDs
.size() > 1) {
250 llvm::errs() << llvm::formatv(
251 "The name {0} is ambiguous, found {1} different "
252 "symbols. Please use id flag to disambiguate.\n",
257 RefsRequest RefRequest
;
258 RefRequest
.IDs
.insert(IDs
.begin(), IDs
.end());
259 llvm::Regex
RegexFilter(Filter
);
260 Index
->refs(RefRequest
, [&RegexFilter
](const Ref
&R
) {
261 auto U
= URI::parse(R
.Location
.FileURI
);
263 llvm::errs() << U
.takeError();
266 if (RegexFilter
.match(U
->body()))
267 llvm::outs() << R
<< "\n";
272 class Relations
: public Command
{
273 llvm::cl::opt
<std::string
> ID
{
275 llvm::cl::Positional
,
276 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
278 llvm::cl::opt
<RelationKind
> Relation
{
280 llvm::cl::desc("Relation kind for the predicate."),
281 values(clEnumValN(RelationKind::BaseOf
, "base_of",
282 "Find subclasses of a class."),
283 clEnumValN(RelationKind::OverriddenBy
, "overridden_by",
284 "Find methods that overrides a virtual method.")),
287 void run() override
{
288 if (ID
.getNumOccurrences() == 0 || Relation
.getNumOccurrences() == 0) {
290 << "Missing required argument: please provide id and -relation.\n";
293 RelationsRequest Req
;
294 if (ID
.getNumOccurrences()) {
295 auto SID
= SymbolID::fromStr(ID
);
297 llvm::errs() << llvm::toString(SID
.takeError()) << "\n";
300 Req
.Subjects
.insert(*SID
);
302 Req
.Predicate
= Relation
.getValue();
303 Index
->relations(Req
, [](const SymbolID
&SID
, const Symbol
&S
) {
304 llvm::outs() << toYAML(S
);
309 class Export
: public Command
{
310 llvm::cl::opt
<IndexFileFormat
> Format
{
312 llvm::cl::desc("Format of index export"),
314 clEnumValN(IndexFileFormat::YAML
, "yaml",
315 "human-readable YAML format"),
316 clEnumValN(IndexFileFormat::RIFF
, "binary", "binary RIFF format")),
317 llvm::cl::init(IndexFileFormat::YAML
),
319 llvm::cl::opt
<std::string
> OutputFile
{
321 llvm::cl::Positional
,
323 llvm::cl::desc("Output file for export"),
327 void run() override
{
328 using namespace clang::clangd
;
329 // Read input file (as specified in global option)
330 auto Buffer
= llvm::MemoryBuffer::getFile(IndexLocation
);
332 llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation
) << "\n";
336 // Auto-detects input format when parsing
337 auto IndexIn
= clang::clangd::readIndexFile(Buffer
->get()->getBuffer(),
338 SymbolOrigin::Static
);
340 llvm::errs() << llvm::toString(IndexIn
.takeError()) << "\n";
344 // Prepare output file
346 llvm::raw_fd_ostream
OutputStream(OutputFile
, EC
);
348 llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile
)
354 clang::clangd::IndexFileOut
IndexOut(IndexIn
.get());
355 IndexOut
.Format
= Format
;
356 OutputStream
<< IndexOut
;
362 const char *Description
;
363 std::function
<std::unique_ptr
<Command
>()> Implementation
;
365 {"find", "Search for symbols with fuzzyFind", std::make_unique
<FuzzyFind
>},
366 {"lookup", "Dump symbol details by ID or qualified name",
367 std::make_unique
<Lookup
>},
368 {"refs", "Find references by ID or qualified name", std::make_unique
<Refs
>},
369 {"relations", "Find relations by ID and relation kind",
370 std::make_unique
<Relations
>},
371 {"export", "Export index", std::make_unique
<Export
>},
374 std::unique_ptr
<SymbolIndex
> openIndex(llvm::StringRef Index
) {
375 return Index
.starts_with("remote:")
376 ? remote::getClient(Index
.drop_front(strlen("remote:")),
378 : loadIndex(Index
, SymbolOrigin::Static
, /*UseDex=*/true,
379 /*SupportContainedRefs=*/true);
382 bool runCommand(std::string Request
, const SymbolIndex
&Index
) {
383 // Split on spaces and add required null-termination.
384 std::replace(Request
.begin(), Request
.end(), ' ', '\0');
385 llvm::SmallVector
<llvm::StringRef
> Args
;
386 llvm::StringRef(Request
).split(Args
, '\0', /*MaxSplit=*/-1,
387 /*KeepEmpty=*/false);
390 if (Args
.front() == "help") {
391 llvm::outs() << "dexp - Index explorer\nCommands:\n";
392 for (const auto &C
: CommandInfo
)
393 llvm::outs() << llvm::formatv("{0,16} - {1}\n", C
.Name
, C
.Description
);
394 llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
397 llvm::SmallVector
<const char *> FakeArgv
;
398 for (llvm::StringRef S
: Args
)
399 FakeArgv
.push_back(S
.data()); // Terminated by separator or end of string.
401 for (const auto &Cmd
: CommandInfo
) {
402 if (Cmd
.Name
== Args
.front())
403 return Cmd
.Implementation()->parseAndRun(FakeArgv
, Cmd
.Description
,
406 llvm::errs() << "Unknown command. Try 'help'.\n";
411 } // namespace clangd
414 int main(int argc
, const char *argv
[]) {
415 using namespace clang::clangd
;
417 llvm::cl::ParseCommandLineOptions(argc
, argv
, Overview
);
419 // Preserve global options when flag parser is reset, so commands can use
421 IndexLocation
.setValue(IndexLocation
, /*initial=*/true);
422 ExecCommand
.setValue(ExecCommand
, /*initial=*/true);
423 ProjectRoot
.setValue(ProjectRoot
, /*initial=*/true);
425 llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
426 llvm::sys::PrintStackTraceOnErrorSignal(argv
[0]);
428 bool RemoteMode
= llvm::StringRef(IndexLocation
).starts_with("remote:");
429 if (RemoteMode
&& ProjectRoot
.empty()) {
430 llvm::errs() << "--project-root is required in remote mode\n";
434 std::unique_ptr
<SymbolIndex
> Index
;
435 reportTime(RemoteMode
? "Remote index client creation" : "Dex build",
436 [&]() { Index
= openIndex(IndexLocation
); });
439 llvm::errs() << "Failed to open the index.\n";
443 if (!ExecCommand
.empty())
444 return runCommand(ExecCommand
, *Index
) ? 0 : 1;
446 llvm::LineEditor
LE("dexp");
447 while (std::optional
<std::string
> Request
= LE
.readLine())
448 runCommand(std::move(*Request
), *Index
);