1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/app_list/search/tokenized_string_match.h"
7 #include "base/i18n/string_search.h"
8 #include "base/logging.h"
9 #include "ui/app_list/search/tokenized_string_char_iterator.h"
15 // The factors below are applied when the current char of query matches
16 // the current char of the text to be matched. Different factors are chosen
17 // based on where the match happens. kIsPrefixMultiplier is used when the
18 // matched portion is a prefix of both the query and the text, which implies
19 // that the matched chars are at the same position in query and text. This is
20 // the most preferred case thus it has the highest score. When the current char
21 // of the query and the text does not match, the algorithm moves to the next
22 // token in the text and try to match from there. kIsFrontOfWordMultipler will
23 // be used if the first char of the token matches the current char of the query.
24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
27 // Suppose the text to be matched is 'Google Chrome'.
28 // Query 'go' would yield kIsPrefixMultiplier for each char.
29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and
30 // kIsFrontOfWordMultipler for 'c'.
31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
32 // kIsWeakHitMultiplier for 'h'.
33 // Query 'oo' does not match any prefix and would use the substring match
34 // fallback, thus kIsSubstringMultiplier is used for each char.
35 const double kIsPrefixMultiplier
= 1.0;
36 const double kIsFrontOfWordMultipler
= 0.8;
37 const double kIsWeakHitMultiplier
= 0.6;
38 const double kIsSubstringMultiplier
= 0.4;
40 // A relevance score that represents no match.
41 const double kNoMatchScore
= 0.0;
43 // PrefixMatcher matches the chars of a given query as prefix of tokens in
44 // a given text or as prefix of the acronyms of those text tokens.
47 PrefixMatcher(const TokenizedString
& query
,
48 const TokenizedString
& text
)
51 current_match_(gfx::Range::InvalidRange()),
52 current_relevance_(kNoMatchScore
) {
55 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to
56 // perform DFS (depth first search) so that all possible matches are
57 // attempted. Stops on the first full match and returns true. Otherwise,
58 // returns false to indicate no match.
61 // No match found and no more states to try. Bail out.
62 if (states_
.empty()) {
63 current_relevance_
= kNoMatchScore
;
64 current_hits_
.clear();
70 // Skip restored match to try other possibilites.
71 AdvanceToNextTextToken();
74 if (current_match_
.IsValid())
75 current_hits_
.push_back(current_match_
);
80 double relevance() const { return current_relevance_
; }
81 const TokenizedStringMatch::Hits
& hits() const { return current_hits_
; }
84 // Context record of a match.
86 State() : relevance(kNoMatchScore
) {}
87 State(double relevance
,
88 const gfx::Range
& current_match
,
89 const TokenizedStringMatch::Hits
& hits
,
90 const TokenizedStringCharIterator
& query_iter
,
91 const TokenizedStringCharIterator
& text_iter
)
92 : relevance(relevance
),
93 current_match(current_match
),
94 hits(hits
.begin(), hits
.end()),
95 query_iter_state(query_iter
.GetState()),
96 text_iter_state(text_iter
.GetState()) {}
98 // The current score of the processed query chars.
101 // Current matching range.
102 gfx::Range current_match
;
104 // Completed matching ranges of the processed query chars.
105 TokenizedStringMatch::Hits hits
;
107 // States of the processed query and text chars.
108 TokenizedStringCharIterator::State query_iter_state
;
109 TokenizedStringCharIterator::State text_iter_state
;
111 typedef std::vector
<State
> States
;
113 // Match chars from the query and text one by one. For each matching char,
114 // calculate relevance and matching ranges. And the current stats is
115 // recorded so that the match could be skipped later to try other
116 // possiblities. Repeat until any of the iterators run out. Return true if
117 // query iterator runs out, i.e. all chars in query are matched.
119 while (!query_iter_
.end() && !text_iter_
.end()) {
120 if (query_iter_
.Get() == text_iter_
.Get()) {
123 if (query_iter_
.GetArrayPos() == text_iter_
.GetArrayPos())
124 current_relevance_
+= kIsPrefixMultiplier
;
125 else if (text_iter_
.IsFirstCharOfToken())
126 current_relevance_
+= kIsFrontOfWordMultipler
;
128 current_relevance_
+= kIsWeakHitMultiplier
;
130 if (!current_match_
.IsValid())
131 current_match_
.set_start(text_iter_
.GetArrayPos());
132 current_match_
.set_end(text_iter_
.GetArrayPos() +
133 text_iter_
.GetCharSize());
135 query_iter_
.NextChar();
136 text_iter_
.NextChar();
138 AdvanceToNextTextToken();
142 return query_iter_
.end();
145 // Skip to the next text token and close current match. Invoked when a
146 // mismatch happens or to skip a restored match.
147 void AdvanceToNextTextToken() {
148 if (current_match_
.IsValid()) {
149 current_hits_
.push_back(current_match_
);
150 current_match_
= gfx::Range::InvalidRange();
153 text_iter_
.NextToken();
157 states_
.push_back(State(current_relevance_
,
165 DCHECK(!states_
.empty());
167 State
& last_match
= states_
.back();
168 current_relevance_
= last_match
.relevance
;
169 current_match_
= last_match
.current_match
;
170 current_hits_
.swap(last_match
.hits
);
171 query_iter_
.SetState(last_match
.query_iter_state
);
172 text_iter_
.SetState(last_match
.text_iter_state
);
177 TokenizedStringCharIterator query_iter_
;
178 TokenizedStringCharIterator text_iter_
;
181 gfx::Range current_match_
;
183 double current_relevance_
;
184 TokenizedStringMatch::Hits current_hits_
;
186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher
);
191 TokenizedStringMatch::TokenizedStringMatch()
192 : relevance_(kNoMatchScore
) {}
194 TokenizedStringMatch::~TokenizedStringMatch() {}
196 bool TokenizedStringMatch::Calculate(const TokenizedString
& query
,
197 const TokenizedString
& text
) {
198 relevance_
= kNoMatchScore
;
201 PrefixMatcher
matcher(query
, text
);
202 if (matcher
.Match()) {
203 relevance_
= matcher
.relevance();
204 hits_
.assign(matcher
.hits().begin(), matcher
.hits().end());
207 // Substring match as a fallback.
208 if (relevance_
== kNoMatchScore
) {
209 size_t substr_match_start
= 0;
210 size_t substr_match_length
= 0;
211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query
.text(),
214 &substr_match_length
)) {
215 relevance_
= kIsSubstringMultiplier
* substr_match_length
;
216 hits_
.push_back(gfx::Range(substr_match_start
,
217 substr_match_start
+ substr_match_length
));
221 // Using length() for normalizing is not 100% correct but should be good
222 // enough compared with using real char count of the text.
223 if (text
.text().length())
224 relevance_
/= text
.text().length();
226 return relevance_
> kNoMatchScore
;
229 bool TokenizedStringMatch::Calculate(const base::string16
& query
,
230 const base::string16
& text
) {
231 const TokenizedString
tokenized_query(query
);
232 const TokenizedString
tokenized_text(text
);
233 return Calculate(tokenized_query
, tokenized_text
);
236 } // namespace app_list