Add CryptAuthApiCallFlow class, the underlying object to handle all CryptAuth API...
[chromium-blink-merge.git] / components / query_parser / query_parser.cc
blobd09edff867a17aa37b34ff2877a7ab51119bfd67
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/query_parser/query_parser.h"
7 #include <algorithm>
9 #include "base/compiler_specific.h"
10 #include "base/i18n/break_iterator.h"
11 #include "base/i18n/case_conversion.h"
12 #include "base/logging.h"
13 #include "base/stl_util.h"
14 #include "base/strings/utf_string_conversions.h"
16 namespace query_parser {
17 namespace {
19 // Returns true if |mp1.first| is less than |mp2.first|. This is used to
20 // sort match positions.
21 int CompareMatchPosition(const Snippet::MatchPosition& mp1,
22 const Snippet::MatchPosition& mp2) {
23 return mp1.first < mp2.first;
26 // Returns true if |mp2| intersects |mp1|. This is intended for use by
27 // CoalesceMatchesFrom and isn't meant as a general intersection comparison
28 // function.
29 bool SnippetIntersects(const Snippet::MatchPosition& mp1,
30 const Snippet::MatchPosition& mp2) {
31 return mp2.first >= mp1.first && mp2.first <= mp1.second;
34 // Coalesces match positions in |matches| after index that intersect the match
35 // position at |index|.
36 void CoalesceMatchesFrom(size_t index, Snippet::MatchPositions* matches) {
37 Snippet::MatchPosition& mp = (*matches)[index];
38 for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1;
39 i != matches->end(); ) {
40 if (SnippetIntersects(mp, *i)) {
41 mp.second = std::max(mp.second, i->second);
42 i = matches->erase(i);
43 } else {
44 return;
49 // Returns true if the character is considered a quote.
50 bool IsQueryQuote(wchar_t ch) {
51 return ch == '"' ||
52 ch == 0xab || // left pointing double angle bracket
53 ch == 0xbb || // right pointing double angle bracket
54 ch == 0x201c || // left double quotation mark
55 ch == 0x201d || // right double quotation mark
56 ch == 0x201e; // double low-9 quotation mark
59 } // namespace
61 // Inheritance structure:
62 // Queries are represented as trees of QueryNodes.
63 // QueryNodes are either a collection of subnodes (a QueryNodeList)
64 // or a single word (a QueryNodeWord).
66 // A QueryNodeWord is a single word in the query.
67 class QueryNodeWord : public QueryNode {
68 public:
69 explicit QueryNodeWord(const base::string16& word);
70 virtual ~QueryNodeWord();
72 const base::string16& word() const { return word_; }
74 void set_literal(bool literal) { literal_ = literal; }
76 // QueryNode:
77 virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
78 virtual bool IsWord() const OVERRIDE;
79 virtual bool Matches(const base::string16& word, bool exact) const OVERRIDE;
80 virtual bool HasMatchIn(
81 const QueryWordVector& words,
82 Snippet::MatchPositions* match_positions) const OVERRIDE;
83 virtual bool HasMatchIn(
84 const QueryWordVector& words) const OVERRIDE;
85 virtual void AppendWords(std::vector<base::string16>* words) const OVERRIDE;
87 private:
88 base::string16 word_;
89 bool literal_;
91 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord);
94 QueryNodeWord::QueryNodeWord(const base::string16& word)
95 : word_(word),
96 literal_(false) {}
98 QueryNodeWord::~QueryNodeWord() {}
100 int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const {
101 query->append(word_);
103 // Use prefix search if we're not literal and long enough.
104 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
105 *query += L'*';
106 return 1;
109 bool QueryNodeWord::IsWord() const {
110 return true;
113 bool QueryNodeWord::Matches(const base::string16& word, bool exact) const {
114 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
115 return word == word_;
116 return word.size() >= word_.size() &&
117 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
120 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words,
121 Snippet::MatchPositions* match_positions) const {
122 bool matched = false;
123 for (size_t i = 0; i < words.size(); ++i) {
124 if (Matches(words[i].word, false)) {
125 size_t match_start = words[i].position;
126 match_positions->push_back(
127 Snippet::MatchPosition(match_start,
128 match_start + static_cast<int>(word_.size())));
129 matched = true;
132 return matched;
135 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words) const {
136 for (size_t i = 0; i < words.size(); ++i) {
137 if (Matches(words[i].word, false))
138 return true;
140 return false;
143 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const {
144 words->push_back(word_);
147 // A QueryNodeList has a collection of QueryNodes which are deleted in the end.
148 class QueryNodeList : public QueryNode {
149 public:
150 QueryNodeList();
151 virtual ~QueryNodeList();
153 QueryNodeStarVector* children() { return &children_; }
155 void AddChild(QueryNode* node);
157 // Remove empty subnodes left over from other parsing.
158 void RemoveEmptySubnodes();
160 // QueryNode:
161 virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
162 virtual bool IsWord() const OVERRIDE;
163 virtual bool Matches(const base::string16& word, bool exact) const OVERRIDE;
164 virtual bool HasMatchIn(
165 const QueryWordVector& words,
166 Snippet::MatchPositions* match_positions) const OVERRIDE;
167 virtual bool HasMatchIn(const QueryWordVector& words) const OVERRIDE;
168 virtual void AppendWords(std::vector<base::string16>* words) const OVERRIDE;
170 protected:
171 int AppendChildrenToString(base::string16* query) const;
173 QueryNodeStarVector children_;
175 private:
176 DISALLOW_COPY_AND_ASSIGN(QueryNodeList);
179 QueryNodeList::QueryNodeList() {}
181 QueryNodeList::~QueryNodeList() {
182 STLDeleteElements(&children_);
185 void QueryNodeList::AddChild(QueryNode* node) {
186 children_.push_back(node);
189 void QueryNodeList::RemoveEmptySubnodes() {
190 for (size_t i = 0; i < children_.size(); ++i) {
191 if (children_[i]->IsWord())
192 continue;
194 QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]);
195 list_node->RemoveEmptySubnodes();
196 if (list_node->children()->empty()) {
197 children_.erase(children_.begin() + i);
198 --i;
199 delete list_node;
204 int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const {
205 return AppendChildrenToString(query);
208 bool QueryNodeList::IsWord() const {
209 return false;
212 bool QueryNodeList::Matches(const base::string16& word, bool exact) const {
213 NOTREACHED();
214 return false;
217 bool QueryNodeList::HasMatchIn(const QueryWordVector& words,
218 Snippet::MatchPositions* match_positions) const {
219 NOTREACHED();
220 return false;
223 bool QueryNodeList::HasMatchIn(const QueryWordVector& words) const {
224 NOTREACHED();
225 return false;
228 void QueryNodeList::AppendWords(std::vector<base::string16>* words) const {
229 for (size_t i = 0; i < children_.size(); ++i)
230 children_[i]->AppendWords(words);
233 int QueryNodeList::AppendChildrenToString(base::string16* query) const {
234 int num_words = 0;
235 for (QueryNodeStarVector::const_iterator node = children_.begin();
236 node != children_.end(); ++node) {
237 if (node != children_.begin())
238 query->push_back(L' ');
239 num_words += (*node)->AppendToSQLiteQuery(query);
241 return num_words;
244 // A QueryNodePhrase is a phrase query ("quoted").
245 class QueryNodePhrase : public QueryNodeList {
246 public:
247 QueryNodePhrase();
248 virtual ~QueryNodePhrase();
250 // QueryNodeList:
251 virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
252 virtual bool HasMatchIn(
253 const QueryWordVector& words,
254 Snippet::MatchPositions* match_positions) const OVERRIDE;
255 virtual bool HasMatchIn(const QueryWordVector& words) const OVERRIDE;
257 private:
258 bool MatchesAll(const QueryWordVector& words,
259 const QueryWord** first_word,
260 const QueryWord** last_word) const;
261 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase);
264 QueryNodePhrase::QueryNodePhrase() {}
266 QueryNodePhrase::~QueryNodePhrase() {}
268 int QueryNodePhrase::AppendToSQLiteQuery(base::string16* query) const {
269 query->push_back(L'"');
270 int num_words = AppendChildrenToString(query);
271 query->push_back(L'"');
272 return num_words;
275 bool QueryNodePhrase::MatchesAll(const QueryWordVector& words,
276 const QueryWord** first_word,
277 const QueryWord** last_word) const {
278 if (words.size() < children_.size())
279 return false;
281 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
282 bool matched_all = true;
283 for (size_t j = 0; j < children_.size(); ++j) {
284 if (!children_[j]->Matches(words[i + j].word, true)) {
285 matched_all = false;
286 break;
289 if (matched_all) {
290 *first_word = &words[i];
291 *last_word = &words[i + children_.size() - 1];
292 return true;
295 return false;
298 bool QueryNodePhrase::HasMatchIn(
299 const QueryWordVector& words,
300 Snippet::MatchPositions* match_positions) const {
301 const QueryWord* first_word;
302 const QueryWord* last_word;
304 if (MatchesAll(words, &first_word, &last_word)) {
305 match_positions->push_back(
306 Snippet::MatchPosition(first_word->position,
307 last_word->position + last_word->word.length()));
308 return true;
310 return false;
313 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const {
314 const QueryWord* first_word;
315 const QueryWord* last_word;
316 return MatchesAll(words, &first_word, &last_word);
319 QueryParser::QueryParser() {}
321 // static
322 bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) {
323 DCHECK(!word.empty());
324 size_t minimum_length = 3;
325 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
326 // because they 'behave like' Latin letters. Moreover, we should
327 // normalize the former before reaching here.
328 if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
329 minimum_length = 2;
330 return word.size() >= minimum_length;
333 int QueryParser::ParseQuery(const base::string16& query,
334 base::string16* sqlite_query) {
335 QueryNodeList root;
336 if (!ParseQueryImpl(query, &root))
337 return 0;
338 return root.AppendToSQLiteQuery(sqlite_query);
341 void QueryParser::ParseQueryWords(const base::string16& query,
342 std::vector<base::string16>* words) {
343 QueryNodeList root;
344 if (!ParseQueryImpl(query, &root))
345 return;
346 root.AppendWords(words);
349 void QueryParser::ParseQueryNodes(const base::string16& query,
350 QueryNodeStarVector* nodes) {
351 QueryNodeList root;
352 if (ParseQueryImpl(base::i18n::ToLower(query), &root))
353 nodes->swap(*root.children());
356 bool QueryParser::DoesQueryMatch(const base::string16& text,
357 const QueryNodeStarVector& query_nodes,
358 Snippet::MatchPositions* match_positions) {
359 if (query_nodes.empty())
360 return false;
362 QueryWordVector query_words;
363 base::string16 lower_text = base::i18n::ToLower(text);
364 ExtractQueryWords(lower_text, &query_words);
366 if (query_words.empty())
367 return false;
369 Snippet::MatchPositions matches;
370 for (size_t i = 0; i < query_nodes.size(); ++i) {
371 if (!query_nodes[i]->HasMatchIn(query_words, &matches))
372 return false;
374 if (lower_text.length() != text.length()) {
375 // The lower case string differs from the original string. The matches are
376 // meaningless.
377 // TODO(sky): we need a better way to align the positions so that we don't
378 // completely punt here.
379 match_positions->clear();
380 } else {
381 SortAndCoalesceMatchPositions(&matches);
382 match_positions->swap(matches);
384 return true;
387 bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words,
388 const QueryNodeStarVector& query_nodes) {
389 if (query_nodes.empty() || query_words.empty())
390 return false;
392 for (size_t i = 0; i < query_nodes.size(); ++i) {
393 if (!query_nodes[i]->HasMatchIn(query_words))
394 return false;
396 return true;
399 bool QueryParser::ParseQueryImpl(const base::string16& query,
400 QueryNodeList* root) {
401 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
402 // TODO(evanm): support a locale here
403 if (!iter.Init())
404 return false;
406 // To handle nesting, we maintain a stack of QueryNodeLists.
407 // The last element (back) of the stack contains the current, deepest node.
408 std::vector<QueryNodeList*> query_stack;
409 query_stack.push_back(root);
411 bool in_quotes = false; // whether we're currently in a quoted phrase
412 while (iter.Advance()) {
413 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
414 // is not necessarily a word, but could also be a sequence of punctuation
415 // or whitespace.
416 if (iter.IsWord()) {
417 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString());
418 if (in_quotes)
419 word_node->set_literal(true);
420 query_stack.back()->AddChild(word_node);
421 } else { // Punctuation.
422 if (IsQueryQuote(query[iter.prev()])) {
423 if (!in_quotes) {
424 QueryNodeList* quotes_node = new QueryNodePhrase;
425 query_stack.back()->AddChild(quotes_node);
426 query_stack.push_back(quotes_node);
427 in_quotes = true;
428 } else {
429 query_stack.pop_back(); // Stop adding to the quoted phrase.
430 in_quotes = false;
436 root->RemoveEmptySubnodes();
437 return true;
440 void QueryParser::ExtractQueryWords(const base::string16& text,
441 QueryWordVector* words) {
442 base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
443 // TODO(evanm): support a locale here
444 if (!iter.Init())
445 return;
447 while (iter.Advance()) {
448 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
449 // is not necessarily a word, but could also be a sequence of punctuation
450 // or whitespace.
451 if (iter.IsWord()) {
452 base::string16 word = iter.GetString();
453 if (!word.empty()) {
454 words->push_back(QueryWord());
455 words->back().word = word;
456 words->back().position = iter.prev();
462 // static
463 void QueryParser::SortAndCoalesceMatchPositions(
464 Snippet::MatchPositions* matches) {
465 std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
466 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove
467 // from matches.
468 for (size_t i = 0; i < matches->size(); ++i)
469 CoalesceMatchesFrom(i, matches);
472 } // namespace query_parser