1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
6 #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
10 #include "base/basictypes.h"
11 #include "base/strings/string16.h"
12 #include "components/query_parser/snippet.h"
14 namespace query_parser
{
18 // Used by HasMatchIn.
20 // The work to match against.
23 // The starting position of the word in the original text.
27 typedef std::vector
<query_parser::QueryWord
> QueryWordVector
;
29 // QueryNode is used by QueryParser to represent the elements that constitute a
30 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
31 // for external usage.
34 virtual ~QueryNode() {}
36 // Serialize ourselves out to a string that can be passed to SQLite. Returns
37 // the number of words in this node.
38 virtual int AppendToSQLiteQuery(base::string16
* query
) const = 0;
40 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
41 virtual bool IsWord() const = 0;
43 // Returns true if this node matches |word|. If |exact| is true, the string
44 // must exactly match. Otherwise, this uses a starts with comparison.
45 virtual bool Matches(const base::string16
& word
, bool exact
) const = 0;
47 // Returns true if this node matches at least one of the words in |words|. An
48 // entry is added to |match_positions| for all matching words giving the
50 virtual bool HasMatchIn(const QueryWordVector
& words
,
51 Snippet::MatchPositions
* match_positions
) const = 0;
53 // Returns true if this node matches at least one of the words in |words|.
54 virtual bool HasMatchIn(const QueryWordVector
& words
) const = 0;
56 // Appends the words that make up this node in |words|.
57 virtual void AppendWords(std::vector
<base::string16
>* words
) const = 0;
60 typedef std::vector
<query_parser::QueryNode
*> QueryNodeStarVector
;
62 // This class is used to parse queries entered into the history search into more
63 // normalized queries that can be passed to the SQLite backend.
68 // For CJK ideographs and Korean Hangul, even a single character
69 // can be useful in prefix matching, but that may give us too many
70 // false positives. Moreover, the current ICU word breaker gives us
71 // back every single Chinese character as a word so that there's no
72 // point doing anything for them and we only adjust the minimum length
73 // to 2 for Korean Hangul while using 3 for others. This is a temporary
74 // hack until we have a segmentation support.
75 static bool IsWordLongEnoughForPrefixSearch(const base::string16
& word
);
77 // Parse a query into a SQLite query. The resulting query is placed in
78 // |sqlite_query| and the number of words is returned.
79 int ParseQuery(const base::string16
& query
, base::string16
* sqlite_query
);
81 // Parses |query|, returning the words that make up it. Any words in quotes
82 // are put in |words| without the quotes. For example, the query text
83 // "foo bar" results in two entries being added to words, one for foo and one
85 void ParseQueryWords(const base::string16
& query
,
86 std::vector
<base::string16
>* words
);
88 // Parses |query|, returning the nodes that constitute the valid words in the
89 // query. This is intended for later usage with DoesQueryMatch. Ownership of
90 // the nodes passes to the caller.
91 void ParseQueryNodes(const base::string16
& query
,
92 QueryNodeStarVector
* nodes
);
94 // Returns true if the string text matches the query nodes created by a call
95 // to ParseQuery. If the query does match, each of the matching positions in
96 // the text is added to |match_positions|.
97 bool DoesQueryMatch(const base::string16
& text
,
98 const QueryNodeStarVector
& nodes
,
99 Snippet::MatchPositions
* match_positions
);
101 // Returns true if all of the |words| match the query |nodes| created by a
102 // call to ParseQuery.
103 bool DoesQueryMatch(const QueryWordVector
& words
,
104 const QueryNodeStarVector
& nodes
);
106 // Extracts the words from |text|, placing each word into |words|.
107 void ExtractQueryWords(const base::string16
& text
,
108 QueryWordVector
* words
);
110 // Sorts the match positions in |matches| by their first index, then
111 // coalesces any match positions that intersect each other.
112 static void SortAndCoalesceMatchPositions(Snippet::MatchPositions
* matches
);
115 // Does the work of parsing |query|; creates nodes in |root| as appropriate.
116 // This is invoked from both of the ParseQuery methods.
117 bool ParseQueryImpl(const base::string16
& query
, QueryNodeList
* root
);
119 DISALLOW_COPY_AND_ASSIGN(QueryParser
);
122 } // namespace query_parser
124 #endif // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_