1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/app_list/search/tokenized_string_match.h"
9 #include "base/i18n/string_search.h"
10 #include "base/logging.h"
11 #include "ui/app_list/search/tokenized_string_char_iterator.h"
17 // The factors below are applied when the current char of query matches
18 // the current char of the text to be matched. Different factors are chosen
19 // based on where the match happens. kIsPrefixMultiplier is used when the
20 // matched portion is a prefix of both the query and the text, which implies
21 // that the matched chars are at the same position in query and text. This is
22 // the most preferred case thus it has the highest score. When the current char
23 // of the query and the text does not match, the algorithm moves to the next
24 // token in the text and try to match from there. kIsFrontOfWordMultipler will
25 // be used if the first char of the token matches the current char of the query.
26 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
29 // Suppose the text to be matched is 'Google Chrome'.
30 // Query 'go' would yield kIsPrefixMultiplier for each char.
31 // Query 'gc' would use kIsPrefixMultiplier for 'g' and
32 // kIsFrontOfWordMultipler for 'c'.
33 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
34 // kIsWeakHitMultiplier for 'h'.
35 // Query 'oo' does not match any prefix and would use the substring match
36 // fallback, thus kIsSubstringMultiplier is used for each char.
37 const double kIsPrefixMultiplier
= 1.0;
38 const double kIsFrontOfWordMultipler
= 0.8;
39 const double kIsWeakHitMultiplier
= 0.6;
40 const double kIsSubstringMultiplier
= 0.4;
42 // A relevance score that represents no match.
43 const double kNoMatchScore
= 0.0;
45 // PrefixMatcher matches the chars of a given query as prefix of tokens in
46 // a given text or as prefix of the acronyms of those text tokens.
49 PrefixMatcher(const TokenizedString
& query
,
50 const TokenizedString
& text
)
53 current_match_(gfx::Range::InvalidRange()),
54 current_relevance_(kNoMatchScore
) {
57 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to
58 // perform DFS (depth first search) so that all possible matches are
59 // attempted. Stops on the first full match and returns true. Otherwise,
60 // returns false to indicate no match.
63 // No match found and no more states to try. Bail out.
64 if (states_
.empty()) {
65 current_relevance_
= kNoMatchScore
;
66 current_hits_
.clear();
72 // Skip restored match to try other possibilites.
73 AdvanceToNextTextToken();
76 if (current_match_
.IsValid())
77 current_hits_
.push_back(current_match_
);
82 double relevance() const { return current_relevance_
; }
83 const TokenizedStringMatch::Hits
& hits() const { return current_hits_
; }
86 // Context record of a match.
88 State() : relevance(kNoMatchScore
) {}
89 State(double relevance
,
90 const gfx::Range
& current_match
,
91 const TokenizedStringMatch::Hits
& hits
,
92 const TokenizedStringCharIterator
& query_iter
,
93 const TokenizedStringCharIterator
& text_iter
)
94 : relevance(relevance
),
95 current_match(current_match
),
96 hits(hits
.begin(), hits
.end()),
97 query_iter_state(query_iter
.GetState()),
98 text_iter_state(text_iter
.GetState()) {}
100 // The current score of the processed query chars.
103 // Current matching range.
104 gfx::Range current_match
;
106 // Completed matching ranges of the processed query chars.
107 TokenizedStringMatch::Hits hits
;
109 // States of the processed query and text chars.
110 TokenizedStringCharIterator::State query_iter_state
;
111 TokenizedStringCharIterator::State text_iter_state
;
113 typedef std::vector
<State
> States
;
115 // Match chars from the query and text one by one. For each matching char,
116 // calculate relevance and matching ranges. And the current stats is
117 // recorded so that the match could be skipped later to try other
118 // possiblities. Repeat until any of the iterators run out. Return true if
119 // query iterator runs out, i.e. all chars in query are matched.
121 while (!query_iter_
.end() && !text_iter_
.end()) {
122 if (query_iter_
.Get() == text_iter_
.Get()) {
125 if (query_iter_
.GetArrayPos() == text_iter_
.GetArrayPos())
126 current_relevance_
+= kIsPrefixMultiplier
;
127 else if (text_iter_
.IsFirstCharOfToken())
128 current_relevance_
+= kIsFrontOfWordMultipler
;
130 current_relevance_
+= kIsWeakHitMultiplier
;
132 if (!current_match_
.IsValid())
133 current_match_
.set_start(text_iter_
.GetArrayPos());
134 current_match_
.set_end(text_iter_
.GetArrayPos() +
135 text_iter_
.GetCharSize());
137 query_iter_
.NextChar();
138 text_iter_
.NextChar();
140 AdvanceToNextTextToken();
144 return query_iter_
.end();
147 // Skip to the next text token and close current match. Invoked when a
148 // mismatch happens or to skip a restored match.
149 void AdvanceToNextTextToken() {
150 if (current_match_
.IsValid()) {
151 current_hits_
.push_back(current_match_
);
152 current_match_
= gfx::Range::InvalidRange();
155 text_iter_
.NextToken();
159 states_
.push_back(State(current_relevance_
,
167 DCHECK(!states_
.empty());
169 State
& last_match
= states_
.back();
170 current_relevance_
= last_match
.relevance
;
171 current_match_
= last_match
.current_match
;
172 current_hits_
.swap(last_match
.hits
);
173 query_iter_
.SetState(last_match
.query_iter_state
);
174 text_iter_
.SetState(last_match
.text_iter_state
);
179 TokenizedStringCharIterator query_iter_
;
180 TokenizedStringCharIterator text_iter_
;
183 gfx::Range current_match_
;
185 double current_relevance_
;
186 TokenizedStringMatch::Hits current_hits_
;
188 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher
);
193 TokenizedStringMatch::TokenizedStringMatch()
194 : relevance_(kNoMatchScore
) {}
196 TokenizedStringMatch::~TokenizedStringMatch() {}
198 bool TokenizedStringMatch::Calculate(const TokenizedString
& query
,
199 const TokenizedString
& text
) {
200 relevance_
= kNoMatchScore
;
203 PrefixMatcher
matcher(query
, text
);
204 if (matcher
.Match()) {
205 relevance_
= matcher
.relevance();
206 hits_
.assign(matcher
.hits().begin(), matcher
.hits().end());
209 // Substring match as a fallback.
210 if (relevance_
== kNoMatchScore
) {
211 size_t substr_match_start
= 0;
212 size_t substr_match_length
= 0;
213 if (base::i18n::StringSearchIgnoringCaseAndAccents(query
.text(),
216 &substr_match_length
)) {
217 relevance_
= kIsSubstringMultiplier
* substr_match_length
;
218 hits_
.push_back(gfx::Range(substr_match_start
,
219 substr_match_start
+ substr_match_length
));
223 // Temper the relevance score with an exponential curve. Each point of
224 // relevance (roughly, each keystroke) is worth less than the last. This means
225 // that typing a few characters of a word is enough to promote matches very
226 // high, with any subsequent characters being worth comparatively less.
227 // TODO(mgiuca): This doesn't really play well with Omnibox results, since as
228 // you type more characters, the app/omnibox results tend to jump over each
230 relevance_
= 1.0 - std::pow(0.5, relevance_
);
232 return relevance_
> kNoMatchScore
;
235 bool TokenizedStringMatch::Calculate(const base::string16
& query
,
236 const base::string16
& text
) {
237 const TokenizedString
tokenized_query(query
);
238 const TokenizedString
tokenized_text(text
);
239 return Calculate(tokenized_query
, tokenized_text
);
242 } // namespace app_list