Roll src/third_party/WebKit 605a979:06cb9e9 (svn 202556:202558)
[chromium-blink-merge.git] / components / query_parser / query_parser.h
blobd251047c9ce4667099526177bac073c34a6517fa
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
6 #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
8 #include <vector>
10 #include "base/basictypes.h"
11 #include "base/strings/string16.h"
12 #include "components/query_parser/snippet.h"
14 namespace query_parser {
16 class QueryNodeList;
18 // Used by HasMatchIn.
19 struct QueryWord {
20 // The work to match against.
21 base::string16 word;
23 // The starting position of the word in the original text.
24 size_t position;
27 enum class MatchingAlgorithm {
28 // Only words long enough are considered for prefix search. Shorter words are
29 // considered for exact matches.
30 DEFAULT,
31 // All words are considered for a prefix search.
32 ALWAYS_PREFIX_SEARCH,
35 typedef std::vector<query_parser::QueryWord> QueryWordVector;
37 // QueryNode is used by QueryParser to represent the elements that constitute a
38 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
39 // for external usage.
40 class QueryNode {
41 public:
42 virtual ~QueryNode() {}
44 // Serialize ourselves out to a string that can be passed to SQLite. Returns
45 // the number of words in this node.
46 virtual int AppendToSQLiteQuery(base::string16* query) const = 0;
48 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
49 virtual bool IsWord() const = 0;
51 // Returns true if this node matches |word|. If |exact| is true, the string
52 // must exactly match. Otherwise, this uses a starts with comparison.
53 virtual bool Matches(const base::string16& word, bool exact) const = 0;
55 // Returns true if this node matches at least one of the words in |words|. An
56 // entry is added to |match_positions| for all matching words giving the
57 // matching regions.
58 virtual bool HasMatchIn(const QueryWordVector& words,
59 Snippet::MatchPositions* match_positions) const = 0;
61 // Returns true if this node matches at least one of the words in |words|.
62 virtual bool HasMatchIn(const QueryWordVector& words) const = 0;
64 // Appends the words that make up this node in |words|.
65 virtual void AppendWords(std::vector<base::string16>* words) const = 0;
68 typedef std::vector<query_parser::QueryNode*> QueryNodeStarVector;
70 // This class is used to parse queries entered into the history search into more
71 // normalized queries that can be passed to the SQLite backend.
72 class QueryParser {
73 public:
74 QueryParser();
76 // For CJK ideographs and Korean Hangul, even a single character
77 // can be useful in prefix matching, but that may give us too many
78 // false positives. Moreover, the current ICU word breaker gives us
79 // back every single Chinese character as a word so that there's no
80 // point doing anything for them and we only adjust the minimum length
81 // to 2 for Korean Hangul while using 3 for others. This is a temporary
82 // hack until we have a segmentation support.
83 static bool IsWordLongEnoughForPrefixSearch(
84 const base::string16& word,
85 MatchingAlgorithm matching_algorithm);
87 // Parse a query into a SQLite query. The resulting query is placed in
88 // |sqlite_query| and the number of words is returned.
89 int ParseQuery(const base::string16& query,
90 MatchingAlgorithm matching_algorithm,
91 base::string16* sqlite_query);
93 // Parses |query|, returning the words that make up it. Any words in quotes
94 // are put in |words| without the quotes. For example, the query text
95 // "foo bar" results in two entries being added to words, one for foo and one
96 // for bar.
97 void ParseQueryWords(const base::string16& query,
98 MatchingAlgorithm matching_algorithm,
99 std::vector<base::string16>* words);
101 // Parses |query|, returning the nodes that constitute the valid words in the
102 // query. This is intended for later usage with DoesQueryMatch. Ownership of
103 // the nodes passes to the caller.
104 void ParseQueryNodes(const base::string16& query,
105 MatchingAlgorithm matching_algorithm,
106 QueryNodeStarVector* nodes);
108 // Returns true if the string text matches the query nodes created by a call
109 // to ParseQuery. If the query does match, each of the matching positions in
110 // the text is added to |match_positions|.
111 bool DoesQueryMatch(const base::string16& text,
112 const QueryNodeStarVector& nodes,
113 Snippet::MatchPositions* match_positions);
115 // Returns true if all of the |words| match the query |nodes| created by a
116 // call to ParseQuery.
117 bool DoesQueryMatch(const QueryWordVector& words,
118 const QueryNodeStarVector& nodes);
120 // Extracts the words from |text|, placing each word into |words|.
121 void ExtractQueryWords(const base::string16& text,
122 QueryWordVector* words);
124 // Sorts the match positions in |matches| by their first index, then
125 // coalesces any match positions that intersect each other.
126 static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches);
128 private:
129 // Does the work of parsing |query|; creates nodes in |root| as appropriate.
130 // This is invoked from both of the ParseQuery methods.
131 bool ParseQueryImpl(const base::string16& query,
132 MatchingAlgorithm matching_algorithm,
133 QueryNodeList* root);
135 DISALLOW_COPY_AND_ASSIGN(QueryParser);
138 } // namespace query_parser
140 #endif // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_