1 //===-- FuzzyMatchTests.cpp - String fuzzy matcher tests --------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "FuzzyMatch.h"
11 #include "gmock/gmock.h"
12 #include "gtest/gtest.h"
19 struct ExpectedMatch
{
20 // Annotations are optional, and will not be asserted if absent.
21 ExpectedMatch(llvm::StringRef Match
) : Word(Match
), Annotated(Match
) {
23 Word
.erase(std::remove(Word
.begin(), Word
.end(), C
), Word
.end());
24 if (Word
.size() == Annotated
->size())
25 Annotated
= llvm::None
;
27 bool accepts(llvm::StringRef ActualAnnotated
) const {
28 return !Annotated
|| ActualAnnotated
== *Annotated
;
31 friend llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
,
32 const ExpectedMatch
&M
) {
35 OS
<< "' as " << *M
.Annotated
;
42 llvm::Optional
<llvm::StringRef
> Annotated
;
45 struct MatchesMatcher
: public ::testing::MatcherInterface
<llvm::StringRef
> {
46 ExpectedMatch Candidate
;
47 llvm::Optional
<float> Score
;
48 MatchesMatcher(ExpectedMatch Candidate
, llvm::Optional
<float> Score
)
49 : Candidate(std::move(Candidate
)), Score(Score
) {}
51 void DescribeTo(::std::ostream
*OS
) const override
{
52 llvm::raw_os_ostream(*OS
) << "Matches " << Candidate
;
54 *OS
<< " with score " << *Score
;
57 bool MatchAndExplain(llvm::StringRef Pattern
,
58 ::testing::MatchResultListener
*L
) const override
{
59 std::unique_ptr
<llvm::raw_ostream
> OS(
61 ? (llvm::raw_ostream
*)(new llvm::raw_os_ostream(*L
->stream()))
62 : new llvm::raw_null_ostream());
63 FuzzyMatcher
Matcher(Pattern
);
64 auto Result
= Matcher
.match(Candidate
.Word
);
65 auto AnnotatedMatch
= Matcher
.dumpLast(*OS
<< "\n");
66 return Result
&& Candidate
.accepts(AnnotatedMatch
) &&
67 (!Score
|| ::testing::Value(*Result
, ::testing::FloatEq(*Score
)));
71 // Accepts patterns that match a given word, optionally requiring a score.
72 // Dumps the debug tables on match failure.
73 ::testing::Matcher
<llvm::StringRef
> matches(llvm::StringRef M
,
74 llvm::Optional
<float> Score
= {}) {
75 return ::testing::MakeMatcher
<llvm::StringRef
>(new MatchesMatcher(M
, Score
));
78 TEST(FuzzyMatch
, Matches
) {
79 EXPECT_THAT("", matches("unique_ptr"));
80 EXPECT_THAT("u_p", matches("[u]nique[_p]tr"));
81 EXPECT_THAT("up", matches("[u]nique_[p]tr"));
82 EXPECT_THAT("uq", Not(matches("unique_ptr")));
83 EXPECT_THAT("qp", Not(matches("unique_ptr")));
84 EXPECT_THAT("log", Not(matches("SVGFEMorphologyElement")));
86 EXPECT_THAT("tit", matches("win.[tit]"));
87 EXPECT_THAT("title", matches("win.[title]"));
88 EXPECT_THAT("WordCla", matches("[Word]Character[Cla]ssifier"));
89 EXPECT_THAT("WordCCla", matches("[WordC]haracter[Cla]ssifier"));
91 EXPECT_THAT("dete", Not(matches("editor.quickSuggestionsDelay")));
93 EXPECT_THAT("highlight", matches("editorHover[Highlight]"));
94 EXPECT_THAT("hhighlight", matches("editor[H]over[Highlight]"));
95 EXPECT_THAT("dhhighlight", Not(matches("editorHoverHighlight")));
97 EXPECT_THAT("-moz", matches("[-moz]-foo"));
98 EXPECT_THAT("moz", matches("-[moz]-foo"));
99 EXPECT_THAT("moza", matches("-[moz]-[a]nimation"));
101 EXPECT_THAT("ab", matches("[ab]A"));
102 EXPECT_THAT("ccm", Not(matches("cacmelCase")));
103 EXPECT_THAT("bti", Not(matches("the_black_knight")));
104 EXPECT_THAT("ccm", Not(matches("camelCase")));
105 EXPECT_THAT("cmcm", Not(matches("camelCase")));
106 EXPECT_THAT("BK", matches("the_[b]lack_[k]night"));
107 EXPECT_THAT("KeyboardLayout=", Not(matches("KeyboardLayout")));
108 EXPECT_THAT("LLL", matches("SVisual[L]ogger[L]ogs[L]ist"));
109 EXPECT_THAT("LLLL", Not(matches("SVilLoLosLi")));
110 EXPECT_THAT("LLLL", Not(matches("SVisualLoggerLogsList")));
111 EXPECT_THAT("TEdit", matches("[T]ext[Edit]"));
112 EXPECT_THAT("TEdit", matches("[T]ext[Edit]or"));
113 EXPECT_THAT("TEdit", Not(matches("[T]ext[edit]")));
114 EXPECT_THAT("TEdit", matches("[t]ext_[edit]"));
115 EXPECT_THAT("TEditDt", matches("[T]ext[Edit]or[D]ecoration[T]ype"));
116 EXPECT_THAT("TEdit", matches("[T]ext[Edit]orDecorationType"));
117 EXPECT_THAT("Tedit", matches("[T]ext[Edit]"));
118 EXPECT_THAT("ba", Not(matches("?AB?")));
119 EXPECT_THAT("bkn", matches("the_[b]lack_[kn]ight"));
120 EXPECT_THAT("bt", Not(matches("the_[b]lack_knigh[t]")));
121 EXPECT_THAT("ccm", Not(matches("[c]amelCase[cm]")));
122 EXPECT_THAT("fdm", Not(matches("[f]in[dM]odel")));
123 EXPECT_THAT("fob", Not(matches("[fo]o[b]ar")));
124 EXPECT_THAT("fobz", Not(matches("foobar")));
125 EXPECT_THAT("foobar", matches("[foobar]"));
126 EXPECT_THAT("form", matches("editor.[form]atOnSave"));
127 EXPECT_THAT("g p", matches("[G]it:[ P]ull"));
128 EXPECT_THAT("g p", matches("[G]it:[ P]ull"));
129 EXPECT_THAT("gip", matches("[Gi]t: [P]ull"));
130 EXPECT_THAT("gip", matches("[Gi]t: [P]ull"));
131 EXPECT_THAT("gp", matches("[G]it: [P]ull"));
132 EXPECT_THAT("gp", matches("[G]it_Git_[P]ull"));
133 EXPECT_THAT("is", matches("[I]mport[S]tatement"));
134 EXPECT_THAT("is", matches("[is]Valid"));
135 EXPECT_THAT("lowrd", Not(matches("[low]Wo[rd]")));
136 EXPECT_THAT("myvable", Not(matches("[myva]ria[ble]")));
137 EXPECT_THAT("no", Not(matches("")));
138 EXPECT_THAT("no", Not(matches("match")));
139 EXPECT_THAT("ob", Not(matches("foobar")));
140 EXPECT_THAT("sl", matches("[S]Visual[L]oggerLogsList"));
141 EXPECT_THAT("sllll", matches("[S]Visua[L]ogger[Ll]ama[L]ist"));
142 EXPECT_THAT("THRE", matches("H[T]ML[HRE]lement"));
143 EXPECT_THAT("b", Not(matches("NDEBUG")));
144 EXPECT_THAT("Three", matches("[Three]"));
145 EXPECT_THAT("fo", Not(matches("barfoo")));
146 EXPECT_THAT("fo", matches("bar_[fo]o"));
147 EXPECT_THAT("fo", matches("bar_[Fo]o"));
148 EXPECT_THAT("fo", matches("bar [fo]o"));
149 EXPECT_THAT("fo", matches("bar.[fo]o"));
150 EXPECT_THAT("fo", matches("bar/[fo]o"));
151 EXPECT_THAT("fo", matches("bar\\[fo]o"));
155 matches("[aaaaaa]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
156 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
157 EXPECT_THAT("baba", Not(matches("ababababab")));
158 EXPECT_THAT("fsfsfs", Not(matches("dsafdsafdsafdsafdsafdsafdsafasdfdsa")));
159 EXPECT_THAT("fsfsfsfsfsfsfsf",
160 Not(matches("dsafdsafdsafdsafdsafdsafdsafasdfdsafdsafdsafdsafdsfd"
161 "safdsfdfdfasdnfdsajfndsjnafjndsajlknfdsa")));
163 EXPECT_THAT(" g", matches("[ g]roup"));
164 EXPECT_THAT("g", matches(" [g]roup"));
165 EXPECT_THAT("g g", Not(matches(" groupGroup")));
166 EXPECT_THAT("g g", matches(" [g]roup[ G]roup"));
167 EXPECT_THAT(" g g", matches("[ ] [g]roup[ G]roup"));
168 EXPECT_THAT("zz", matches("[zz]Group"));
169 EXPECT_THAT("zzg", matches("[zzG]roup"));
170 EXPECT_THAT("g", matches("zz[G]roup"));
172 EXPECT_THAT("aaaa", matches("_a_[aaaa]")); // Prefer consecutive.
173 // These would ideally match, but would need special segmentation rules.
174 EXPECT_THAT("printf", Not(matches("s[printf]")));
175 EXPECT_THAT("str", Not(matches("o[str]eam")));
176 EXPECT_THAT("strcpy", Not(matches("strncpy")));
177 EXPECT_THAT("std", Not(matches("PTHREAD_MUTEX_STALLED")));
178 EXPECT_THAT("std", Not(matches("pthread_condattr_setpshared")));
181 struct RankMatcher
: public ::testing::MatcherInterface
<llvm::StringRef
> {
182 std::vector
<ExpectedMatch
> RankedStrings
;
183 RankMatcher(std::initializer_list
<ExpectedMatch
> RankedStrings
)
184 : RankedStrings(RankedStrings
) {}
186 void DescribeTo(::std::ostream
*OS
) const override
{
187 llvm::raw_os_ostream
O(*OS
);
188 O
<< "Ranks strings in order: [";
189 for (const auto &Str
: RankedStrings
)
194 bool MatchAndExplain(llvm::StringRef Pattern
,
195 ::testing::MatchResultListener
*L
) const override
{
196 std::unique_ptr
<llvm::raw_ostream
> OS(
198 ? (llvm::raw_ostream
*)(new llvm::raw_os_ostream(*L
->stream()))
199 : new llvm::raw_null_ostream());
200 FuzzyMatcher
Matcher(Pattern
);
201 const ExpectedMatch
*LastMatch
;
202 llvm::Optional
<float> LastScore
;
204 for (const auto &Str
: RankedStrings
) {
205 auto Score
= Matcher
.match(Str
.Word
);
207 *OS
<< "\nDoesn't match '" << Str
.Word
<< "'";
208 Matcher
.dumpLast(*OS
<< "\n");
212 llvm::raw_string_ostream
Info(Buf
);
213 auto AnnotatedMatch
= Matcher
.dumpLast(Info
);
215 if (!Str
.accepts(AnnotatedMatch
)) {
216 *OS
<< "\nDoesn't match " << Str
<< ", but " << AnnotatedMatch
<< "\n"
219 } else if (LastScore
&& *LastScore
< *Score
) {
220 *OS
<< "\nRanks '" << Str
.Word
<< "'=" << *Score
<< " above '"
221 << LastMatch
->Word
<< "'=" << *LastScore
<< "\n"
223 Matcher
.match(LastMatch
->Word
);
224 Matcher
.dumpLast(*OS
<< "\n");
235 // Accepts patterns that match all the strings and rank them in the given order.
236 // Dumps the debug tables on match failure.
237 template <typename
... T
>
238 ::testing::Matcher
<llvm::StringRef
> ranks(T
... RankedStrings
) {
239 return ::testing::MakeMatcher
<llvm::StringRef
>(
240 new RankMatcher
{ExpectedMatch(RankedStrings
)...});
243 TEST(FuzzyMatch
, Ranking
) {
245 ranks("[cons]ole", "[Cons]ole", "ArrayBuffer[Cons]tructor"));
246 EXPECT_THAT("foo", ranks("[foo]", "[Foo]"));
248 ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes"));
250 ranks("[onmes]sage", "[onMes]sage", "[on]This[M]ega[Es]capes"));
251 EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase"));
252 EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase"));
253 EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath"));
254 EXPECT_THAT("pa", ranks("[pa]rse", "[pa]th", "[pa]fdsa"));
255 EXPECT_THAT("log", ranks("[log]", "Scroll[Log]icalPosition"));
256 EXPECT_THAT("e", ranks("[e]lse", "Abstract[E]lement"));
257 EXPECT_THAT("workbench.sideb",
258 ranks("[workbench.sideB]ar.location",
259 "[workbench.]editor.default[SideB]ySideLayout"));
260 EXPECT_THAT("editor.r", ranks("[editor.r]enderControlCharacter",
261 "[editor.]overview[R]ulerlanes",
262 "diff[Editor.r]enderSideBySide"));
263 EXPECT_THAT("-mo", ranks("[-mo]z-columns", "[-]ms-ime-[mo]de"));
264 EXPECT_THAT("convertModelPosition",
265 ranks("[convertModelPosition]ToViewPosition",
266 "[convert]ViewTo[ModelPosition]"));
267 EXPECT_THAT("is", ranks("[is]ValidViewletId", "[i]mport [s]tatement"));
268 EXPECT_THAT("strcpy", ranks("[strcpy]", "[strcpy]_s"));
271 // Verify some bounds so we know scores fall in the right range.
272 // Testing exact scores is fragile, so we prefer Ranking tests.
273 TEST(FuzzyMatch
, Scoring
) {
274 EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 7.f
/ 12.f
));
275 EXPECT_THAT("abs", matches("[abs]l", 1.f
));
276 EXPECT_THAT("abs", matches("[abs]", 2.f
));
277 EXPECT_THAT("Abs", matches("[abs]", 2.f
));
280 TEST(FuzzyMatch
, InitialismAndPrefix
) {
281 // We want these scores to be roughly the same.
282 EXPECT_THAT("up", matches("[u]nique_[p]tr", 3.f
/ 4.f
));
283 EXPECT_THAT("up", matches("[up]per_bound", 1.f
));
286 // Returns pretty-printed segmentation of Text.
287 // e.g. std::basic_string --> +-- +---- +-----
288 std::string
segment(llvm::StringRef Text
) {
289 std::vector
<CharRole
> Roles(Text
.size());
290 calculateRoles(Text
, Roles
);
292 for (unsigned I
= 0; I
< Text
.size(); ++I
)
293 Printed
.push_back("?-+ "[static_cast<unsigned>(Roles
[I
])]);
297 // this is a no-op hack so clang-format will vertically align our testcases.
298 std::string
returns(llvm::StringRef Text
) { return std::string(Text
); }
300 TEST(FuzzyMatch
, Segmentation
) {
301 EXPECT_THAT(segment("std::basic_string"), //
302 returns("+-- +---- +-----"));
303 EXPECT_THAT(segment("XMLHttpRequest"), //
304 returns("+--+---+------"));
305 EXPECT_THAT(segment("t3h PeNgU1N oF d00m!!!!!!!!"), //
306 returns("+-- +-+-+-+ ++ +--- "));
310 } // namespace clangd