1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "base/memory/scoped_vector.h"
7 #include "base/strings/utf_string_conversions.h"
8 #include "components/query_parser/query_parser.h"
9 #include "testing/gtest/include/gtest/gtest.h"
11 namespace query_parser
{
13 class QueryParserTest
: public testing::Test
{
17 const int expected_word_count
;
20 std::string
QueryToString(const std::string
& query
);
23 QueryParser query_parser_
;
26 // Test helper: Convert a user query string in 8-bit (for hardcoding
27 // convenience) to a SQLite query string.
28 std::string
QueryParserTest::QueryToString(const std::string
& query
) {
29 base::string16 sqlite_query
;
30 query_parser_
.ParseQuery(base::UTF8ToUTF16(query
),
31 MatchingAlgorithm::DEFAULT
,
33 return base::UTF16ToUTF8(sqlite_query
);
36 // Basic multi-word queries, including prefix matching.
37 TEST_F(QueryParserTest
, SimpleQueries
) {
38 EXPECT_EQ("", QueryToString(" "));
39 EXPECT_EQ("singleword*", QueryToString("singleword"));
40 EXPECT_EQ("spacedout*", QueryToString(" spacedout "));
41 EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
42 // Short words aren't prefix matches. For Korean Hangul
43 // the minimum is 2 while for other scripts, it's 3.
44 EXPECT_EQ("f b", QueryToString(" f b"));
46 EXPECT_EQ(base::WideToUTF8(L
"\xAC00 \xC7A5"),
47 QueryToString(base::WideToUTF8(L
" \xAC00 \xC7A5")));
48 EXPECT_EQ("foo* bar*", QueryToString(" foo bar "));
50 EXPECT_EQ(base::WideToUTF8(L
"\xAC00\xC7A5* \xBE5B\xACE0*"),
51 QueryToString(base::WideToUTF8(L
"\xAC00\xC7A5 \xBE5B\xACE0")));
54 // Quoted substring parsing.
55 TEST_F(QueryParserTest
, Quoted
) {
57 EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
59 EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
60 // Missing begin quotes
61 EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
63 EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes"));
66 // Apostrophes within words should be preserved, but otherwise stripped.
67 TEST_F(QueryParserTest
, Apostrophes
) {
68 EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
69 EXPECT_EQ("l'foo*", QueryToString("l'foo"));
70 EXPECT_EQ("foo*", QueryToString("'foo"));
73 // Special characters.
74 TEST_F(QueryParserTest
, SpecialChars
) {
75 EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
78 TEST_F(QueryParserTest
, NumWords
) {
81 { "foo \"bar baz\"", 3 },
83 { "foo \"bar baz\" blah", 4 },
86 for (size_t i
= 0; i
< arraysize(data
); ++i
) {
87 base::string16 query_string
;
88 EXPECT_EQ(data
[i
].expected_word_count
,
89 query_parser_
.ParseQuery(base::UTF8ToUTF16(data
[i
].input
),
90 MatchingAlgorithm::DEFAULT
,
95 TEST_F(QueryParserTest
, ParseQueryNodesAndMatch
) {
97 const std::string query
;
98 const std::string text
;
100 const size_t m1_start
;
102 const size_t m2_start
;
105 { "foo", "fooey foo", true, 0, 3, 6, 9 },
106 { "foo foo", "foo", true, 0, 3, 0, 0 },
107 { "foo fooey", "fooey", true, 0, 5, 0, 0 },
108 { "fooey foo", "fooey", true, 0, 5, 0, 0 },
109 { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 },
110 { "blah", "blah", true, 0, 4, 0, 0 },
111 { "blah", "foo", false, 0, 0, 0, 0 },
112 { "blah", "blahblah", true, 0, 4, 0, 0 },
113 { "blah", "foo blah", true, 4, 8, 0, 0 },
114 { "foo blah", "blah", false, 0, 0, 0, 0 },
115 { "foo blah", "blahx foobar", true, 0, 4, 6, 9 },
116 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
117 { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 },
118 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
119 { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 },
120 { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 },
122 for (size_t i
= 0; i
< arraysize(data
); ++i
) {
124 ScopedVector
<QueryNode
> query_nodes
;
125 parser
.ParseQueryNodes(base::UTF8ToUTF16(data
[i
].query
),
126 MatchingAlgorithm::DEFAULT
,
128 Snippet::MatchPositions match_positions
;
129 ASSERT_EQ(data
[i
].matches
,
130 parser
.DoesQueryMatch(base::UTF8ToUTF16(data
[i
].text
),
134 if (data
[i
].m1_start
!= 0 || data
[i
].m1_end
!= 0) {
135 ASSERT_TRUE(match_positions
.size() >= 1);
136 EXPECT_EQ(data
[i
].m1_start
, match_positions
[0].first
);
137 EXPECT_EQ(data
[i
].m1_end
, match_positions
[0].second
);
140 if (data
[i
].m2_start
!= 0 || data
[i
].m2_end
!= 0) {
141 ASSERT_TRUE(match_positions
.size() == 1 + offset
);
142 EXPECT_EQ(data
[i
].m2_start
, match_positions
[offset
].first
);
143 EXPECT_EQ(data
[i
].m2_end
, match_positions
[offset
].second
);
148 TEST_F(QueryParserTest
, ParseQueryWords
) {
150 const std::string text
;
151 const std::string w1
;
152 const std::string w2
;
153 const std::string w3
;
154 const size_t word_count
;
156 { "foo", "foo", "", "", 1 },
157 { "foo bar", "foo", "bar", "", 2 },
158 { "\"foo bar\"", "foo", "bar", "", 2 },
159 { "\"foo bar\" a", "foo", "bar", "a", 3 },
161 for (size_t i
= 0; i
< arraysize(data
); ++i
) {
162 std::vector
<base::string16
> results
;
164 parser
.ParseQueryWords(base::UTF8ToUTF16(data
[i
].text
),
165 MatchingAlgorithm::DEFAULT
,
167 ASSERT_EQ(data
[i
].word_count
, results
.size());
168 EXPECT_EQ(data
[i
].w1
, base::UTF16ToUTF8(results
[0]));
169 if (results
.size() == 2)
170 EXPECT_EQ(data
[i
].w2
, base::UTF16ToUTF8(results
[1]));
171 if (results
.size() == 3)
172 EXPECT_EQ(data
[i
].w3
, base::UTF16ToUTF8(results
[2]));
176 } // namespace query_parser