1 //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "FuzzySymbolIndex.h"
9 #include "llvm/Support/Regex.h"
11 using clang::find_all_symbols::SymbolAndSignals
;
12 using llvm::StringRef
;
15 namespace include_fixer
{
18 class MemSymbolIndex
: public FuzzySymbolIndex
{
20 MemSymbolIndex(std::vector
<SymbolAndSignals
> Symbols
) {
21 for (auto &Symbol
: Symbols
) {
22 auto Tokens
= tokenize(Symbol
.Symbol
.getName());
23 this->Symbols
.emplace_back(
24 StringRef(llvm::join(Tokens
.begin(), Tokens
.end(), " ")),
29 std::vector
<SymbolAndSignals
> search(StringRef Query
) override
{
30 auto Tokens
= tokenize(Query
);
31 llvm::Regex
Pattern("^" + queryRegexp(Tokens
));
32 std::vector
<SymbolAndSignals
> Results
;
33 for (const Entry
&E
: Symbols
)
34 if (Pattern
.match(E
.first
))
35 Results
.push_back(E
.second
);
40 using Entry
= std::pair
<llvm::SmallString
<32>, SymbolAndSignals
>;
41 std::vector
<Entry
> Symbols
;
44 // Helpers for tokenize state machine.
46 EMPTY
, // No pending characters.
47 ONE_BIG
, // Read one uppercase letter, could be WORD or Word.
48 BIG_WORD
, // Reading an uppercase WORD.
49 SMALL_WORD
, // Reading a lowercase word.
50 NUMBER
// Reading a number.
53 enum CharType
{ UPPER
, LOWER
, DIGIT
, MISC
};
54 CharType
classify(char c
) {
66 std::vector
<std::string
> FuzzySymbolIndex::tokenize(StringRef Text
) {
67 std::vector
<std::string
> Result
;
68 // State describes the treatment of text from Start to I.
69 // Once text is Flush()ed into Result, we're done with it and advance Start.
70 TokenizeState State
= EMPTY
;
72 auto Flush
= [&](size_t End
) {
74 Result
.push_back(Text
.substr(Start
, End
- Start
).lower());
79 for (size_t I
= 0; I
< Text
.size(); ++I
) {
80 CharType Type
= classify(Text
[I
]);
83 else if (Type
== LOWER
)
86 Flush(I
- 1); // FOOBar: first token is FOO, not FOOB.
97 else if (Type
== UPPER
)
108 else if (Type
== DIGIT
&& State
!= NUMBER
) {
118 FuzzySymbolIndex::queryRegexp(const std::vector
<std::string
> &Tokens
) {
120 for (size_t I
= 0; I
< Tokens
.size(); ++I
) {
122 Result
.append("[[:alnum:]]* ");
123 for (size_t J
= 0; J
< Tokens
[I
].size(); ++J
) {
125 Result
.append("([[:alnum:]]* )?");
126 Result
.push_back(Tokens
[I
][J
]);
132 llvm::Expected
<std::unique_ptr
<FuzzySymbolIndex
>>
133 FuzzySymbolIndex::createFromYAML(StringRef FilePath
) {
134 auto Buffer
= llvm::MemoryBuffer::getFile(FilePath
);
136 return llvm::errorCodeToError(Buffer
.getError());
137 return std::make_unique
<MemSymbolIndex
>(
138 find_all_symbols::ReadSymbolInfosFromYAML(Buffer
.get()->getBuffer()));
141 } // namespace include_fixer