Remove wpr.archive_info dependancy on page to avoid circular dependancies.
[chromium-blink-merge.git] / ui / app_list / search / tokenized_string_match.cc
blobe94f0ebef0f186e03b900fba9e4c62c26b552903
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/app_list/search/tokenized_string_match.h"
7 #include "base/i18n/string_search.h"
8 #include "base/logging.h"
9 #include "ui/app_list/search/tokenized_string_char_iterator.h"
11 namespace app_list {
13 namespace {
15 // The factors below are applied when the current char of query matches
16 // the current char of the text to be matched. Different factors are chosen
17 // based on where the match happens. kIsPrefixMultiplier is used when the
18 // matched portion is a prefix of both the query and the text, which implies
19 // that the matched chars are at the same position in query and text. This is
20 // the most preferred case thus it has the highest score. When the current char
21 // of the query and the text does not match, the algorithm moves to the next
22 // token in the text and try to match from there. kIsFrontOfWordMultipler will
23 // be used if the first char of the token matches the current char of the query.
24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
25 // used.
26 // Examples:
27 // Suppose the text to be matched is 'Google Chrome'.
28 // Query 'go' would yield kIsPrefixMultiplier for each char.
29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and
30 // kIsFrontOfWordMultipler for 'c'.
31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
32 // kIsWeakHitMultiplier for 'h'.
33 // Query 'oo' does not match any prefix and would use the substring match
34 // fallback, thus kIsSubstringMultiplier is used for each char.
35 const double kIsPrefixMultiplier = 1.0;
36 const double kIsFrontOfWordMultipler = 0.8;
37 const double kIsWeakHitMultiplier = 0.6;
38 const double kIsSubstringMultiplier = 0.4;
40 // A relevance score that represents no match.
41 const double kNoMatchScore = 0.0;
43 // PrefixMatcher matches the chars of a given query as prefix of tokens in
44 // a given text or as prefix of the acronyms of those text tokens.
45 class PrefixMatcher {
46 public:
47 PrefixMatcher(const TokenizedString& query,
48 const TokenizedString& text)
49 : query_iter_(query),
50 text_iter_(text),
51 current_match_(gfx::Range::InvalidRange()),
52 current_relevance_(kNoMatchScore) {
55 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to
56 // perform DFS (depth first search) so that all possible matches are
57 // attempted. Stops on the first full match and returns true. Otherwise,
58 // returns false to indicate no match.
59 bool Match() {
60 while (!RunMatch()) {
61 // No match found and no more states to try. Bail out.
62 if (states_.empty()) {
63 current_relevance_ = kNoMatchScore;
64 current_hits_.clear();
65 return false;
68 PopState();
70 // Skip restored match to try other possibilites.
71 AdvanceToNextTextToken();
74 if (current_match_.IsValid())
75 current_hits_.push_back(current_match_);
77 return true;
80 double relevance() const { return current_relevance_; }
81 const TokenizedStringMatch::Hits& hits() const { return current_hits_; }
83 private:
84 // Context record of a match.
85 struct State {
86 State() : relevance(kNoMatchScore) {}
87 State(double relevance,
88 const gfx::Range& current_match,
89 const TokenizedStringMatch::Hits& hits,
90 const TokenizedStringCharIterator& query_iter,
91 const TokenizedStringCharIterator& text_iter)
92 : relevance(relevance),
93 current_match(current_match),
94 hits(hits.begin(), hits.end()),
95 query_iter_state(query_iter.GetState()),
96 text_iter_state(text_iter.GetState()) {}
98 // The current score of the processed query chars.
99 double relevance;
101 // Current matching range.
102 gfx::Range current_match;
104 // Completed matching ranges of the processed query chars.
105 TokenizedStringMatch::Hits hits;
107 // States of the processed query and text chars.
108 TokenizedStringCharIterator::State query_iter_state;
109 TokenizedStringCharIterator::State text_iter_state;
111 typedef std::vector<State> States;
113 // Match chars from the query and text one by one. For each matching char,
114 // calculate relevance and matching ranges. And the current stats is
115 // recorded so that the match could be skipped later to try other
116 // possiblities. Repeat until any of the iterators run out. Return true if
117 // query iterator runs out, i.e. all chars in query are matched.
118 bool RunMatch() {
119 while (!query_iter_.end() && !text_iter_.end()) {
120 if (query_iter_.Get() == text_iter_.Get()) {
121 PushState();
123 if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos())
124 current_relevance_ += kIsPrefixMultiplier;
125 else if (text_iter_.IsFirstCharOfToken())
126 current_relevance_ += kIsFrontOfWordMultipler;
127 else
128 current_relevance_ += kIsWeakHitMultiplier;
130 if (!current_match_.IsValid())
131 current_match_.set_start(text_iter_.GetArrayPos());
132 current_match_.set_end(text_iter_.GetArrayPos() +
133 text_iter_.GetCharSize());
135 query_iter_.NextChar();
136 text_iter_.NextChar();
137 } else {
138 AdvanceToNextTextToken();
142 return query_iter_.end();
145 // Skip to the next text token and close current match. Invoked when a
146 // mismatch happens or to skip a restored match.
147 void AdvanceToNextTextToken() {
148 if (current_match_.IsValid()) {
149 current_hits_.push_back(current_match_);
150 current_match_ = gfx::Range::InvalidRange();
153 text_iter_.NextToken();
156 void PushState() {
157 states_.push_back(State(current_relevance_,
158 current_match_,
159 current_hits_,
160 query_iter_,
161 text_iter_));
164 void PopState() {
165 DCHECK(!states_.empty());
167 State& last_match = states_.back();
168 current_relevance_ = last_match.relevance;
169 current_match_ = last_match.current_match;
170 current_hits_.swap(last_match.hits);
171 query_iter_.SetState(last_match.query_iter_state);
172 text_iter_.SetState(last_match.text_iter_state);
174 states_.pop_back();
177 TokenizedStringCharIterator query_iter_;
178 TokenizedStringCharIterator text_iter_;
180 States states_;
181 gfx::Range current_match_;
183 double current_relevance_;
184 TokenizedStringMatch::Hits current_hits_;
186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher);
189 } // namespace
191 TokenizedStringMatch::TokenizedStringMatch()
192 : relevance_(kNoMatchScore) {}
194 TokenizedStringMatch::~TokenizedStringMatch() {}
196 bool TokenizedStringMatch::Calculate(const TokenizedString& query,
197 const TokenizedString& text) {
198 relevance_ = kNoMatchScore;
199 hits_.clear();
201 PrefixMatcher matcher(query, text);
202 if (matcher.Match()) {
203 relevance_ = matcher.relevance();
204 hits_.assign(matcher.hits().begin(), matcher.hits().end());
207 // Substring match as a fallback.
208 if (relevance_ == kNoMatchScore) {
209 size_t substr_match_start = 0;
210 size_t substr_match_length = 0;
211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(),
212 text.text(),
213 &substr_match_start,
214 &substr_match_length)) {
215 relevance_ = kIsSubstringMultiplier * substr_match_length;
216 hits_.push_back(gfx::Range(substr_match_start,
217 substr_match_start + substr_match_length));
221 // Using length() for normalizing is not 100% correct but should be good
222 // enough compared with using real char count of the text.
223 if (text.text().length())
224 relevance_ /= text.text().length();
226 return relevance_ > kNoMatchScore;
229 bool TokenizedStringMatch::Calculate(const base::string16& query,
230 const base::string16& text) {
231 const TokenizedString tokenized_query(query);
232 const TokenizedString tokenized_text(text);
233 return Calculate(tokenized_query, tokenized_text);
236 } // namespace app_list