[ci] Fix clang-santisers job for GHA change
[xapian.git] / xapian-core / tests / api_query.cc
blobc155b6660f3a1bf1c07fd89fac55c7c26d5b0b70
1 /** @file
2 * @brief Query-related tests.
3 */
4 /* Copyright (C) 2008-2024 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_query.h"
26 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
31 #include "apitest.h"
33 using namespace std;
35 DEFINE_TESTCASE(queryterms1, !backend) {
36 Xapian::Query query = Xapian::Query::MatchAll;
37 /// Regression test - in 1.0.10 and earlier "" was included in the list.
38 TEST(query.get_terms_begin() == query.get_terms_end());
39 TEST(query.get_unique_terms_begin() == query.get_unique_terms_end());
40 query = Xapian::Query(query.OP_AND_NOT, query, Xapian::Query("fair"));
41 TEST_EQUAL(*query.get_terms_begin(), "fair");
42 TEST_EQUAL(*query.get_unique_terms_begin(), "fair");
44 Xapian::QueryParser qp;
45 Xapian::Query q = qp.parse_query("\"the the the\"");
47 auto t = q.get_terms_begin();
48 size_t count = 0;
49 while (t != q.get_terms_end()) {
50 TEST_EQUAL(*t, "the");
51 ++count;
52 ++t;
54 TEST_EQUAL(count, 3);
57 auto t = q.get_unique_terms_begin();
58 size_t count = 0;
59 while (t != q.get_unique_terms_end()) {
60 TEST_EQUAL(*t, "the");
61 ++count;
62 ++t;
64 TEST_EQUAL(count, 1);
68 DEFINE_TESTCASE(matchall2, !backend) {
69 TEST_STRINGS_EQUAL(Xapian::Query::MatchAll.get_description(),
70 "Query(<alldocuments>)");
73 DEFINE_TESTCASE(matchnothing1, !backend) {
74 TEST_STRINGS_EQUAL(Xapian::Query::MatchNothing.get_description(),
75 "Query()");
76 vector<Xapian::Query> subqs;
77 subqs.push_back(Xapian::Query("foo"));
78 subqs.push_back(Xapian::Query::MatchNothing);
79 Xapian::Query q(Xapian::Query::OP_AND, subqs.begin(), subqs.end());
80 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
82 Xapian::Query q2(Xapian::Query::OP_AND,
83 Xapian::Query("foo"), Xapian::Query::MatchNothing);
84 TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
86 Xapian::Query q3(Xapian::Query::OP_AND,
87 Xapian::Query::MatchNothing, Xapian::Query("foo"));
88 TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
90 Xapian::Query q4(Xapian::Query::OP_AND_MAYBE,
91 Xapian::Query("foo"), Xapian::Query::MatchNothing);
92 TEST_STRINGS_EQUAL(q4.get_description(), "Query(foo)");
94 Xapian::Query q5(Xapian::Query::OP_AND_MAYBE,
95 Xapian::Query::MatchNothing, Xapian::Query("foo"));
96 TEST_STRINGS_EQUAL(q5.get_description(), "Query()");
98 Xapian::Query q6(Xapian::Query::OP_AND_NOT,
99 Xapian::Query("foo"), Xapian::Query::MatchNothing);
100 TEST_STRINGS_EQUAL(q6.get_description(), "Query(foo)");
102 Xapian::Query q7(Xapian::Query::OP_AND_NOT,
103 Xapian::Query::MatchNothing, Xapian::Query("foo"));
104 TEST_STRINGS_EQUAL(q7.get_description(), "Query()");
107 DEFINE_TESTCASE(overload1, !backend) {
108 Xapian::Query q;
109 q = Xapian::Query("foo") & Xapian::Query("bar");
110 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
112 // Test &= appends a same-type subquery (since Xapian 1.4.10).
113 q &= Xapian::Query("baz");
114 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar AND baz))");
115 // But not if the RHS is the same query:
116 q = Xapian::Query("foo") & Xapian::Query("bar");
117 #ifdef __has_warning
118 # if __has_warning("-Wself-assign-overloaded")
119 // Suppress warning from newer clang about self-assignment so we can
120 // test that self-assignment works!
121 # pragma clang diagnostic push
122 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
123 # endif
124 #endif
125 q &= q;
126 #ifdef __has_warning
127 # if __has_warning("-Wself-assign-overloaded")
128 # pragma clang diagnostic pop
129 # endif
130 #endif
131 TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) AND (foo AND bar)))");
133 // Also not if the query has a refcount > 1.
134 q = Xapian::Query("foo") & Xapian::Query("bar");
135 Xapian::Query qcopy = q;
136 qcopy &= Xapian::Query("baz");
137 TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo AND bar) AND baz))");
138 // And q shouldn't change.
139 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
141 // Check that MatchNothing still results in MatchNothing:
142 q = Xapian::Query("foo") & Xapian::Query("bar");
143 q &= Xapian::Query::MatchNothing;
144 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
145 // Check we don't combine for other operators:
146 q = Xapian::Query("foo") | Xapian::Query("bar");
147 q &= Xapian::Query("baz");
148 TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo OR bar) AND baz))");
150 // Test |= appends a same-type subquery (since Xapian 1.4.10).
151 q = Xapian::Query("foo") | Xapian::Query("bar");
152 q |= Xapian::Query("baz");
153 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar OR baz))");
154 // But not if the RHS is the same query:
155 q = Xapian::Query("foo") | Xapian::Query("bar");
156 #ifdef __has_warning
157 # if __has_warning("-Wself-assign-overloaded")
158 // Suppress warning from newer clang about self-assignment so we can
159 // test that self-assignment works!
160 # pragma clang diagnostic push
161 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
162 # endif
163 #endif
164 q |= q;
165 #ifdef __has_warning
166 # if __has_warning("-Wself-assign-overloaded")
167 # pragma clang diagnostic pop
168 # endif
169 #endif
170 TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo OR bar) OR (foo OR bar)))");
172 // Also not if the query has a refcount > 1.
173 q = Xapian::Query("foo") | Xapian::Query("bar");
174 Xapian::Query qcopy = q;
175 qcopy |= Xapian::Query("baz");
176 TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo OR bar) OR baz))");
177 // And q shouldn't change.
178 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
180 // Check that MatchNothing still results in no change:
181 q = Xapian::Query("foo") | Xapian::Query("bar");
182 q |= Xapian::Query::MatchNothing;
183 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
184 // Check we don't combine for other operators:
185 q = Xapian::Query("foo") & Xapian::Query("bar");
186 q |= Xapian::Query("baz");
187 TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) OR baz))");
189 // Test ^= appends a same-type subquery (since Xapian 1.4.10).
190 q = Xapian::Query("foo") ^ Xapian::Query("bar");
191 q ^= Xapian::Query("baz");
192 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar XOR baz))");
193 // But a query ^= itself gives an empty query.
194 q = Xapian::Query("foo") ^ Xapian::Query("bar");
195 #ifdef __has_warning
196 # if __has_warning("-Wself-assign-overloaded")
197 // Suppress warning from newer clang about self-assignment so we can
198 // test that self-assignment works!
199 # pragma clang diagnostic push
200 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
201 # endif
202 #endif
203 q ^= q;
204 #ifdef __has_warning
205 # if __has_warning("-Wself-assign-overloaded")
206 # pragma clang diagnostic pop
207 # endif
208 #endif
209 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
211 // Even if the reference count > 1.
212 q = Xapian::Query("foo") ^ Xapian::Query("bar");
213 Xapian::Query qcopy = q;
214 q ^= qcopy;
215 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
218 // Also not if the query has a refcount > 1.
219 q = Xapian::Query("foo") ^ Xapian::Query("bar");
220 Xapian::Query qcopy = q;
221 qcopy ^= Xapian::Query("baz");
222 TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo XOR bar) XOR baz))");
223 // And q shouldn't change.
224 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
226 // Check that MatchNothing still results in no change:
227 q = Xapian::Query("foo") ^ Xapian::Query("bar");
228 q ^= Xapian::Query::MatchNothing;
229 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
230 // Check we don't combine for other operators:
231 q = Xapian::Query("foo") & Xapian::Query("bar");
232 q ^= Xapian::Query("baz");
233 TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) XOR baz))");
235 q = Xapian::Query("foo") &~ Xapian::Query("bar");
236 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
237 // In 1.4.9 and earlier this gave (foo AND (<alldocuments> AND_NOT bar)).
238 q = Xapian::Query("foo");
239 q &= ~Xapian::Query("bar");
240 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
241 q = ~Xapian::Query("bar");
242 TEST_STRINGS_EQUAL(q.get_description(), "Query((<alldocuments> AND_NOT bar))");
243 q = Xapian::Query("foo") & Xapian::Query::MatchNothing;
244 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
245 q = Xapian::Query("foo") | Xapian::Query("bar");
246 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
247 q = Xapian::Query("foo") | Xapian::Query::MatchNothing;
248 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
249 q = Xapian::Query("foo") ^ Xapian::Query("bar");
250 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
251 q = Xapian::Query("foo") ^ Xapian::Query::MatchNothing;
252 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
253 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
254 TEST_STRINGS_EQUAL(q.get_description(), "Query(1.25 * (one OR two))");
255 q = (Xapian::Query("one") & Xapian::Query("two")) * 42;
256 TEST_STRINGS_EQUAL(q.get_description(), "Query(42 * (one AND two))");
257 q = Xapian::Query("one") / 2.0;
258 TEST_STRINGS_EQUAL(q.get_description(), "Query(0.5 * one)");
261 /** Regression test and feature test.
263 * This threw AssertionError in 1.0.9 and earlier (bug#201) and gave valgrind
264 * errors in 1.0.11 and earlier (bug#349).
266 * Currently the OR-subquery case is supported, other operators aren't.
268 DEFINE_TESTCASE(possubqueries1, backend) {
269 Xapian::Database db = get_database("possubqueries1",
270 [](Xapian::WritableDatabase& wdb,
271 const string&)
273 Xapian::Document doc;
274 doc.add_posting("a", 1);
275 doc.add_posting("b", 2);
276 doc.add_posting("c", 3);
277 wdb.add_document(doc);
280 Xapian::Query a_or_b(Xapian::Query::OP_OR,
281 Xapian::Query("a"),
282 Xapian::Query("b"));
283 Xapian::Query near(Xapian::Query::OP_NEAR, a_or_b, a_or_b);
284 // As of 1.3.0, we no longer rearrange queries at this point, so check
285 // that we don't.
286 TEST_STRINGS_EQUAL(near.get_description(),
287 "Query(((a OR b) NEAR 2 (a OR b)))");
288 Xapian::Query phrase(Xapian::Query::OP_PHRASE, a_or_b, a_or_b);
289 TEST_STRINGS_EQUAL(phrase.get_description(),
290 "Query(((a OR b) PHRASE 2 (a OR b)))");
292 Xapian::Query a_and_b(Xapian::Query::OP_AND,
293 Xapian::Query("a"),
294 Xapian::Query("b"));
295 Xapian::Query a_near_b(Xapian::Query::OP_NEAR,
296 Xapian::Query("a"),
297 Xapian::Query("b"));
298 Xapian::Query a_phrs_b(Xapian::Query::OP_PHRASE,
299 Xapian::Query("a"),
300 Xapian::Query("b"));
301 Xapian::Query c("c");
303 // FIXME: The plan is to actually try to support the cases below, but
304 // for now at least ensure they are cleanly rejected.
305 Xapian::Enquire enq(db);
307 TEST_EXCEPTION(Xapian::UnimplementedError,
308 Xapian::Query q(Xapian::Query::OP_NEAR, a_and_b, c);
309 enq.set_query(q);
310 (void)enq.get_mset(0, 10));
312 TEST_EXCEPTION(Xapian::UnimplementedError,
313 Xapian::Query q(Xapian::Query::OP_NEAR, a_near_b, c);
314 enq.set_query(q);
315 (void)enq.get_mset(0, 10));
317 TEST_EXCEPTION(Xapian::UnimplementedError,
318 Xapian::Query q(Xapian::Query::OP_NEAR, a_phrs_b, c);
319 enq.set_query(q);
320 (void)enq.get_mset(0, 10));
322 TEST_EXCEPTION(Xapian::UnimplementedError,
323 Xapian::Query q(Xapian::Query::OP_PHRASE, a_and_b, c);
324 enq.set_query(q);
325 (void)enq.get_mset(0, 10));
327 TEST_EXCEPTION(Xapian::UnimplementedError,
328 Xapian::Query q(Xapian::Query::OP_PHRASE, a_near_b, c);
329 enq.set_query(q);
330 (void)enq.get_mset(0, 10));
332 TEST_EXCEPTION(Xapian::UnimplementedError,
333 Xapian::Query q(Xapian::Query::OP_PHRASE, a_phrs_b, c);
334 enq.set_query(q);
335 (void)enq.get_mset(0, 10));
338 /// Test that XOR handles all remaining subqueries running out at the same
339 // time.
340 DEFINE_TESTCASE(xor3, backend) {
341 Xapian::Database db = get_database("apitest_simpledata");
343 static const char * const subqs[] = {
344 "this", "hack", "which", "paragraph", "is", "return", "this", "this"
346 // Document where the subqueries run out *does* match XOR:
347 Xapian::Query q(Xapian::Query::OP_XOR, subqs + 1, subqs + 6);
348 Xapian::Enquire enq(db);
349 enq.set_query(q);
350 Xapian::MSet mset = enq.get_mset(0, 10);
352 TEST_EQUAL(mset.size(), 3);
353 TEST_EQUAL(*mset[0], 4);
354 TEST_EQUAL(*mset[1], 2);
355 TEST_EQUAL(*mset[2], 3);
357 // Document where the subqueries run out *does not* match XOR:
358 q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 1, subqs + 5);
359 enq.set_query(q);
360 mset = enq.get_mset(0, 10);
362 TEST_EQUAL(mset.size(), 4);
363 TEST_EQUAL(*mset[0], 5);
364 TEST_EQUAL(*mset[1], 4);
365 TEST_EQUAL(*mset[2], 2);
366 TEST_EQUAL(*mset[3], 3);
368 // Tests that XOR subqueries that match all docs are handled well when
369 // calculating min/est/max match counts.
370 q = Xapian::Query(Xapian::Query::OP_XOR, subqs, subqs + 2);
371 enq.set_query(q);
372 mset = enq.get_mset(0, 0);
373 TEST_EQUAL(mset.size(), 0);
374 TEST_EQUAL(mset.get_matches_lower_bound(), 5);
375 TEST_EQUAL(mset.get_matches_estimated(), 5);
376 TEST_EQUAL(mset.get_matches_upper_bound(), 5);
378 q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 5, subqs + 7);
379 enq.set_query(q);
380 mset = enq.get_mset(0, 0);
381 TEST_EQUAL(mset.size(), 0);
382 TEST_EQUAL(mset.get_matches_lower_bound(), 5);
383 TEST_EQUAL(mset.get_matches_estimated(), 5);
384 TEST_EQUAL(mset.get_matches_upper_bound(), 5);
386 q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 5, subqs + 8);
387 enq.set_query(q);
388 mset = enq.get_mset(0, 0);
389 TEST_EQUAL(mset.size(), 0);
390 TEST_EQUAL(mset.get_matches_lower_bound(), 1);
391 TEST_EQUAL(mset.get_matches_estimated(), 1);
392 TEST_EQUAL(mset.get_matches_upper_bound(), 1);
395 /// Check encoding of non-UTF8 terms in query descriptions.
396 DEFINE_TESTCASE(nonutf8termdesc1, !backend) {
397 TEST_EQUAL(Xapian::Query("\xc0\x80\xf5\x80\x80\x80\xfe\xff").get_description(),
398 "Query(\\xc0\\x80\\xf5\\x80\\x80\\x80\\xfe\\xff)");
399 TEST_EQUAL(Xapian::Query(string("\x00\x1f", 2)).get_description(),
400 "Query(\\x00\\x1f)");
401 // Check that backslashes are encoded so output isn't ambiguous.
402 TEST_EQUAL(Xapian::Query("back\\slash").get_description(),
403 "Query(back\\x5cslash)");
404 // Check that \x7f is escaped.
405 TEST_EQUAL(Xapian::Query("D\x7f_\x7f~").get_description(),
406 "Query(D\\x7f_\\x7f~)");
409 /// Test introspection on Query objects.
410 DEFINE_TESTCASE(queryintro1, !backend) {
411 TEST_EQUAL(Xapian::Query::MatchAll.get_type(), Xapian::Query::LEAF_MATCH_ALL);
412 TEST_EQUAL(Xapian::Query::MatchAll.get_num_subqueries(), 0);
413 TEST_EQUAL(Xapian::Query::MatchNothing.get_type(), Xapian::Query::LEAF_MATCH_NOTHING);
414 TEST_EQUAL(Xapian::Query::MatchNothing.get_num_subqueries(), 0);
416 Xapian::Query q;
417 q = Xapian::Query(q.OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query("fair"));
418 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
419 TEST_EQUAL(q.get_num_subqueries(), 2);
420 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_MATCH_ALL);
421 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
423 q = Xapian::Query("foo", 2, 1);
424 TEST_EQUAL(q.get_leaf_wqf(), 2);
425 TEST_EQUAL(q.get_leaf_pos(), 1);
427 q = Xapian::Query("bar");
428 TEST_EQUAL(q.get_leaf_wqf(), 1);
429 TEST_EQUAL(q.get_leaf_pos(), 0);
431 q = Xapian::Query("foo") & Xapian::Query("bar");
432 TEST_EQUAL(q.get_type(), q.OP_AND);
434 q = Xapian::Query("foo") &~ Xapian::Query("bar");
435 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
437 q = ~Xapian::Query("bar");
438 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
440 q = Xapian::Query("foo") | Xapian::Query("bar");
441 TEST_EQUAL(q.get_type(), q.OP_OR);
443 q = Xapian::Query("foo") ^ Xapian::Query("bar");
444 TEST_EQUAL(q.get_type(), q.OP_XOR);
446 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
447 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
448 TEST_EQUAL(q.get_num_subqueries(), 1);
449 TEST_EQUAL(q.get_subquery(0).get_type(), q.OP_OR);
451 q = Xapian::Query("one") / 2.0;
452 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
453 TEST_EQUAL(q.get_num_subqueries(), 1);
454 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
456 q = Xapian::Query(q.OP_NEAR, Xapian::Query("a"), Xapian::Query("b"));
457 TEST_EQUAL(q.get_type(), q.OP_NEAR);
458 TEST_EQUAL(q.get_num_subqueries(), 2);
459 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
460 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
462 q = Xapian::Query(q.OP_PHRASE, Xapian::Query("c"), Xapian::Query("d"));
463 TEST_EQUAL(q.get_type(), q.OP_PHRASE);
464 TEST_EQUAL(q.get_num_subqueries(), 2);
465 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
466 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
469 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.3.
470 // We were incorrectly converting a term which indexed all docs and was used
471 // in an unweighted phrase into an all docs postlist, so check that this
472 // case actually works.
473 DEFINE_TESTCASE(phrasealldocs1, backend) {
474 Xapian::Database db = get_database("apitest_declen");
475 Xapian::Query q;
476 static const char * const phrase[] = { "this", "is", "the" };
477 q = Xapian::Query(q.OP_AND_NOT,
478 Xapian::Query("paragraph"),
479 Xapian::Query(q.OP_PHRASE, phrase, phrase + 3));
480 Xapian::Enquire enq(db);
481 enq.set_query(q);
482 Xapian::MSet mset = enq.get_mset(0, 10);
483 TEST_EQUAL(mset.size(), 3);
486 struct wildcard_testcase {
487 const char * pattern;
488 Xapian::termcount max_expansion;
489 char max_type;
490 const char * terms[4];
493 #define WILDCARD_EXCEPTION { 0, 0, 0, "" }
494 static const
495 wildcard_testcase wildcard1_testcases[] = {
496 // Tries to expand to 7 terms.
497 { "th", 6, 'E', WILDCARD_EXCEPTION },
498 { "thou", 1, 'E', { "though", 0, 0, 0 } },
499 { "s", 2, 'F', { "say", "search", 0, 0 } },
500 { "s", 2, 'M', { "simpl", "so", 0, 0 } }
503 DEFINE_TESTCASE(wildcard1, backend) {
504 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
505 // so the wildcard may expand to more terms than the limit if some aren't
506 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
507 // frequency from the subdatabase, and so may select different terms in
508 // each subdatabase.
509 SKIP_TEST_FOR_BACKEND("multi");
510 Xapian::Database db = get_database("apitest_simpledata");
511 Xapian::Enquire enq(db);
512 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
514 for (auto&& test : wildcard1_testcases) {
515 tout << test.pattern << '\n';
516 auto tend = test.terms + 4;
517 while (tend[-1] == NULL) --tend;
518 bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
519 Xapian::Query q;
520 if (test.max_type) {
521 int max_type;
522 switch (test.max_type) {
523 case 'E':
524 max_type = Xapian::Query::WILDCARD_LIMIT_ERROR;
525 break;
526 case 'F':
527 max_type = Xapian::Query::WILDCARD_LIMIT_FIRST;
528 break;
529 case 'M':
530 max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
531 break;
532 default:
533 FAIL_TEST("Unexpected max_type value");
535 q = Xapian::Query(o, test.pattern, test.max_expansion, max_type);
536 } else {
537 q = Xapian::Query(o, test.pattern, test.max_expansion);
539 enq.set_query(q);
540 try {
541 Xapian::MSet mset = enq.get_mset(0, 10);
542 TEST(!expect_exception);
543 q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
544 enq.set_query(q);
545 Xapian::MSet mset2 = enq.get_mset(0, 10);
546 TEST_EQUAL(mset.size(), mset2.size());
547 TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
548 } catch (const Xapian::WildcardError &) {
549 TEST(expect_exception);
554 /// Regression test for #696, fixed in 1.3.4.
555 DEFINE_TESTCASE(wildcard2, backend) {
556 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
557 // so the wildcard may expand to more terms than the limit if some aren't
558 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
559 // frequency from the subdatabase, and so may select different terms in
560 // each subdatabase.
561 SKIP_TEST_FOR_BACKEND("multi");
562 Xapian::Database db = get_database("apitest_simpledata");
563 Xapian::Enquire enq(db);
564 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
566 const int max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
567 Xapian::Query q0(o, "w", 2, max_type);
568 Xapian::Query q(o, "s", 2, max_type);
569 Xapian::Query q2(o, "t", 2, max_type);
570 q = Xapian::Query(q.OP_OR, q0, q);
571 q = Xapian::Query(q.OP_OR, q, q2);
572 enq.set_query(q);
573 Xapian::MSet mset = enq.get_mset(0, 10);
574 TEST_EQUAL(mset.size(), 6);
577 /** Regression test for bug in initial implementation.
579 * If any terms started with A-Z then the next term that didn't wasn't
580 * considered.
582 DEFINE_TESTCASE(wildcard3, backend) {
583 Xapian::Database db = get_database("wildcard3",
584 [](Xapian::WritableDatabase& wdb,
585 const string&)
587 Xapian::Document doc;
588 doc.add_term("Zfoo");
589 doc.add_term("a");
590 wdb.add_document(doc);
591 doc.add_term("abc");
592 wdb.add_document(doc);
595 Xapian::Enquire enq(db);
596 Xapian::Query q(Xapian::Query::OP_WILDCARD, "?", 0,
597 Xapian::Query::WILDCARD_PATTERN_GLOB);
598 enq.set_query(q);
599 Xapian::MSet mset = enq.get_mset(0, 10);
600 TEST_EQUAL(mset.size(), 2);
603 /** Regression test for OP_WILDCARD bug, fixed in 1.4.26.
605 * Fix overly high reported termweight values in some cases.
607 DEFINE_TESTCASE(wildcard4, backend) {
608 Xapian::Database db = get_database("apitest_simpledata");
609 Xapian::Enquire enq(db);
610 Xapian::Query q(Xapian::Query::OP_WILDCARD, "u", 0,
611 Xapian::Query::WILDCARD_LIMIT_ERROR,
612 Xapian::Query::OP_OR);
613 q |= Xapian::Query("xyzzy");
614 q |= Xapian::Query("use");
615 enq.set_query(q);
616 Xapian::MSet mset = enq.get_mset(0, 10);
617 TEST_EQUAL(mset.size(), 4);
618 TEST_EQUAL(mset[0].get_percent(), 25);
619 TEST_EQUAL_DOUBLE(mset.get_termweight("up"), 1.48489483900601);
620 // The exact termweight value here depends on the backend, but before the
621 // bug fix we were doubling the termweight of "use".
622 TEST_REL(mset.get_termweight("use"), <, 0.9);
623 TEST_EQUAL(mset.get_termweight("xyzzy"), 0.0);
624 // Enquire::get_matching_terms_begin() doesn't report terms from wildcard
625 // expansion, but it should report an explicit query term which also
626 // happens be in a wildcard expansion.
627 string terms;
628 for (auto t = enq.get_matching_terms_begin(*mset[1]);
629 t != enq.get_matching_terms_end(*mset[1]);
630 ++t) {
631 if (!terms.empty()) terms += ' ';
632 terms += *t;
634 TEST_EQUAL(terms, "use");
637 DEFINE_TESTCASE(dualprefixwildcard1, backend) {
638 Xapian::Database db = get_database("apitest_simpledata");
639 Xapian::Query q(Xapian::Query::OP_SYNONYM,
640 Xapian::Query(Xapian::Query::OP_WILDCARD, "fo"),
641 Xapian::Query(Xapian::Query::OP_WILDCARD, "Sfo"));
642 tout << q.get_description() << '\n';
643 Xapian::Enquire enq(db);
644 enq.set_query(q);
645 TEST_EQUAL(enq.get_mset(0, 5).size(), 2);
648 /// Test special case wildcards.
649 DEFINE_TESTCASE(specialwildcard1, !backend) {
650 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
651 const auto f = Xapian::Query::WILDCARD_PATTERN_GLOB;
653 // Empty wildcard -> MatchNothing.
654 TEST_EQUAL(Xapian::Query(o, "", 0, f).get_description(), "Query()");
656 // "*", "?*", etc -> MatchAll.
657 #define QUERY_ALLDOCS "Query(<alldocuments>)"
658 TEST_EQUAL(Xapian::Query(o, "*", 0, f).get_description(), QUERY_ALLDOCS);
659 TEST_EQUAL(Xapian::Query(o, "**", 0, f).get_description(), QUERY_ALLDOCS);
660 TEST_EQUAL(Xapian::Query(o, "?*", 0, f).get_description(), QUERY_ALLDOCS);
661 TEST_EQUAL(Xapian::Query(o, "*?", 0, f).get_description(), QUERY_ALLDOCS);
662 TEST_EQUAL(Xapian::Query(o, "*?*", 0, f).get_description(), QUERY_ALLDOCS);
665 static void
666 gen_singlecharwildcard1_db(Xapian::WritableDatabase& db, const string&)
669 Xapian::Document doc;
670 doc.add_term("test");
671 db.add_document(doc);
674 Xapian::Document doc;
675 doc.add_term("t\xc3\xaast");
676 db.add_document(doc);
679 Xapian::Document doc;
680 doc.add_term("t\xe1\x80\x80st");
681 db.add_document(doc);
684 Xapian::Document doc;
685 doc.add_term("t\xf3\x80\x80\x80st");
686 db.add_document(doc);
689 Xapian::Document doc;
690 doc.add_term("toast");
691 db.add_document(doc);
694 Xapian::Document doc;
695 doc.add_term("t*t");
696 db.add_document(doc);
700 /// Test `?` extended wildcard.
701 DEFINE_TESTCASE(singlecharwildcard1, backend) {
702 Xapian::Database db = get_database("singlecharwildcard1",
703 gen_singlecharwildcard1_db);
704 Xapian::Enquire enq(db);
705 enq.set_weighting_scheme(Xapian::BoolWeight());
707 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
708 const auto f = Xapian::Query::WILDCARD_PATTERN_SINGLE;
711 // Check that `?` matches one Unicode character.
712 enq.set_query(Xapian::Query(o, "t?st", 0, f));
713 Xapian::MSet mset = enq.get_mset(0, 100);
714 mset_expect_order(mset, 1, 2, 3, 4);
718 // Check that `??` doesn't match a single two-byte UTF-8 character.
719 enq.set_query(Xapian::Query(o, "t??st", 0, f));
720 Xapian::MSet mset = enq.get_mset(0, 100);
721 mset_expect_order(mset, 5);
725 // Check that `*` is handled as a literal character not a wildcard.
726 enq.set_query(Xapian::Query(o, "t*t", 0, f));
727 Xapian::MSet mset = enq.get_mset(0, 100);
728 mset_expect_order(mset, 6);
732 static void
733 gen_multicharwildcard1_db(Xapian::WritableDatabase& db, const string&)
736 Xapian::Document doc;
737 doc.add_term("ananas");
738 db.add_document(doc);
741 Xapian::Document doc;
742 doc.add_term("annas");
743 db.add_document(doc);
746 Xapian::Document doc;
747 doc.add_term("bananas");
748 db.add_document(doc);
751 Xapian::Document doc;
752 doc.add_term("banannas");
753 db.add_document(doc);
756 Xapian::Document doc;
757 doc.add_term("b?nanas");
758 db.add_document(doc);
762 /// Test `*` extended wildcard.
763 DEFINE_TESTCASE(multicharwildcard1, backend) {
764 Xapian::Database db = get_database("multicharwildcard1",
765 gen_multicharwildcard1_db);
766 Xapian::Enquire enq(db);
767 enq.set_weighting_scheme(Xapian::BoolWeight());
769 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
770 const auto f = Xapian::Query::WILDCARD_PATTERN_MULTI;
773 // Check `*` can handle partial matches before and after.
774 enq.set_query(Xapian::Query(o, "b*anas", 0, f));
775 Xapian::MSet mset = enq.get_mset(0, 100);
776 mset_expect_order(mset, 3, 5);
780 // Check leading `*` works.
781 enq.set_query(Xapian::Query(o, "*anas", 0, f));
782 Xapian::MSet mset = enq.get_mset(0, 100);
783 mset_expect_order(mset, 1, 3, 5);
787 // Check more than one `*` works.
788 enq.set_query(Xapian::Query(o, "*ann*", 0, f));
789 Xapian::MSet mset = enq.get_mset(0, 100);
790 mset_expect_order(mset, 2, 4);
794 // Check that `?` is handled as a literal character not a wildcard.
795 enq.set_query(Xapian::Query(o, "b?n*", 0, f));
796 Xapian::MSet mset = enq.get_mset(0, 100);
797 mset_expect_order(mset, 5);
801 struct editdist_testcase {
802 const char* target;
803 unsigned edit_distance;
804 Xapian::termcount max_expansion;
805 char max_type;
806 const char* terms[4];
809 #define EDITDIST_EXCEPTION { 0, 0, 0, "" }
810 static const
811 editdist_testcase editdist1_testcases[] = {
812 // Tries to expand to 9 terms.
813 { "muse", 2, 8, 'E', EDITDIST_EXCEPTION },
814 { "museum", 3, 3, 'E', { "mset", "must", "use", 0 } },
815 { "thou", 0, 9, 'E', { 0, 0, 0, 0 } },
816 { "though", 0, 9, 'E', { "though", 0, 0, 0 } },
817 { "museum", 3, 1, 'F', { "mset", 0, 0, 0 } },
818 { "museum", 3, 1, 'M', { "use", 0, 0, 0 } },
821 DEFINE_TESTCASE(editdist1, backend) {
822 // FIXME: The counting of terms the subquery expands to is per subdatabase,
823 // so it may expand to more terms than the limit if some aren't in all
824 // subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the frequency from
825 // the subdatabase, and so may select different terms in each subdatabase.
826 SKIP_TEST_FOR_BACKEND("multi");
827 Xapian::Database db = get_database("apitest_simpledata");
828 Xapian::Enquire enq(db);
829 const Xapian::Query::op o = Xapian::Query::OP_EDIT_DISTANCE;
831 for (auto&& test : editdist1_testcases) {
832 tout << test.target << '\n';
833 auto tend = test.terms + 4;
834 while (tend > test.terms && tend[-1] == NULL) --tend;
835 bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
836 Xapian::Query q;
837 int max_type;
838 switch (test.max_type) {
839 case 'E':
840 max_type = Xapian::Query::WILDCARD_LIMIT_ERROR;
841 break;
842 case 'F':
843 max_type = Xapian::Query::WILDCARD_LIMIT_FIRST;
844 break;
845 case 'M':
846 max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
847 break;
848 default:
849 FAIL_TEST("Unexpected max_type value");
851 q = Xapian::Query(o, test.target, test.max_expansion, max_type,
852 q.OP_SYNONYM, test.edit_distance);
853 enq.set_query(q);
854 tout << q.get_description() << '\n';
855 try {
856 Xapian::MSet mset = enq.get_mset(0, 10);
857 TEST(!expect_exception);
858 q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
859 enq.set_query(q);
860 Xapian::MSet mset2 = enq.get_mset(0, 10);
861 TEST_EQUAL(mset.size(), mset2.size());
862 TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
863 } catch (const Xapian::WildcardError&) {
864 TEST(expect_exception);
869 // u8"foo" is const char8_t[] in C++20 and later.
870 #define UTF8(X) reinterpret_cast<const char*>(u8"" X "")
872 static const
873 editdist_testcase editdist2_testcases[] = {
874 { UTF8("\U00010000"), 1, 8, 'E', { UTF8("a\U00010000"), 0, 0, 0 } },
877 /// Test Unicode edit distance calculations.
878 DEFINE_TESTCASE(editdist2, backend) {
879 Xapian::Database db = get_database("editdist2",
880 [](Xapian::WritableDatabase& wdb,
881 const string&)
883 Xapian::Document doc;
884 doc.add_term(UTF8("a\U00010000"));
885 wdb.add_document(doc);
887 Xapian::Enquire enq(db);
888 const Xapian::Query::op o = Xapian::Query::OP_EDIT_DISTANCE;
890 for (auto&& test : editdist2_testcases) {
891 tout << test.target << '\n';
892 auto tend = test.terms + 4;
893 while (tend > test.terms && tend[-1] == NULL) --tend;
894 bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
895 Xapian::Query q;
896 int max_type;
897 switch (test.max_type) {
898 case 'E':
899 max_type = Xapian::Query::WILDCARD_LIMIT_ERROR;
900 break;
901 case 'F':
902 max_type = Xapian::Query::WILDCARD_LIMIT_FIRST;
903 break;
904 case 'M':
905 max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
906 break;
907 default:
908 FAIL_TEST("Unexpected max_type value");
910 q = Xapian::Query(o, test.target, test.max_expansion, max_type,
911 q.OP_SYNONYM, test.edit_distance);
912 enq.set_query(q);
913 tout << q.get_description() << '\n';
914 try {
915 Xapian::MSet mset = enq.get_mset(0, 10);
916 TEST(!expect_exception);
917 q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
918 enq.set_query(q);
919 Xapian::MSet mset2 = enq.get_mset(0, 10);
920 TEST_EQUAL(mset.size(), mset2.size());
921 TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
922 } catch (const Xapian::WildcardError&) {
923 TEST(expect_exception);
928 DEFINE_TESTCASE(dualprefixeditdist1, backend) {
929 Xapian::Database db = get_database("dualprefixeditdist1",
930 [](Xapian::WritableDatabase& wdb,
931 const string&)
933 Xapian::Document doc;
934 doc.add_term("opossum");
935 doc.add_term("possum");
936 wdb.add_document(doc);
937 doc.clear_terms();
938 doc.add_term("Spossums");
939 wdb.add_document(doc);
942 auto OP_EDIT_DISTANCE = Xapian::Query::OP_EDIT_DISTANCE;
943 auto OP_SYNONYM = Xapian::Query::OP_SYNONYM;
944 Xapian::Query q0(OP_EDIT_DISTANCE, "possum");
945 Xapian::Query q1(OP_EDIT_DISTANCE, "Spossum", 0, 0, OP_SYNONYM, 2, 1);
946 Xapian::Query q(OP_SYNONYM, q0, q1);
947 tout << q.get_description() << '\n';
948 Xapian::Enquire enq(db);
949 enq.set_query(q0);
950 Xapian::MSet mset = enq.get_mset(0, 5);
951 TEST_EQUAL(mset.size(), 1);
952 TEST_EQUAL(*mset[0], 1);
953 enq.set_query(q1);
954 mset = enq.get_mset(0, 5);
955 TEST_EQUAL(mset.size(), 1);
956 TEST_EQUAL(*mset[0], 2);
957 enq.set_query(q);
958 mset = enq.get_mset(0, 5);
959 TEST_EQUAL(mset.size(), 2);
962 struct positional_testcase {
963 int window;
964 const char * terms[4];
965 Xapian::docid result;
968 static const
969 positional_testcase loosephrase1_testcases[] = {
970 { 5, { "expect", "to", "mset", 0 }, 0 },
971 { 5, { "word", "well", "the", 0 }, 2 },
972 { 5, { "if", "word", "doesnt", 0 }, 0 },
973 { 5, { "at", "line", "three", 0 }, 0 },
974 { 5, { "paragraph", "other", "the", 0 }, 0 },
975 { 5, { "other", "the", "with", 0 }, 0 }
978 /// Regression test for bug fixed in 1.3.3 and 1.2.21.
979 DEFINE_TESTCASE(loosephrase1, backend) {
980 Xapian::Database db = get_database("apitest_simpledata");
981 Xapian::Enquire enq(db);
983 for (auto&& test : loosephrase1_testcases) {
984 auto tend = test.terms + 4;
985 while (tend[-1] == NULL) --tend;
986 auto OP_PHRASE = Xapian::Query::OP_PHRASE;
987 Xapian::Query q(OP_PHRASE, test.terms, tend, test.window);
988 enq.set_query(q);
989 Xapian::MSet mset = enq.get_mset(0, 10);
990 if (test.result == 0) {
991 TEST(mset.empty());
992 } else {
993 TEST_EQUAL(mset.size(), 1);
994 TEST_EQUAL(*mset[0], test.result);
999 static const
1000 positional_testcase loosenear1_testcases[] = {
1001 { 4, { "test", "the", "with", 0 }, 1 },
1002 { 4, { "expect", "word", "the", 0 }, 2 },
1003 { 4, { "line", "be", "blank", 0 }, 1 },
1004 { 2, { "banana", "banana", 0, 0 }, 0 },
1005 { 3, { "banana", "banana", 0, 0 }, 0 },
1006 { 2, { "word", "word", 0, 0 }, 2 },
1007 { 4, { "work", "meant", "work", 0 }, 0 },
1008 { 4, { "this", "one", "yet", "one" }, 0 }
1011 /// Regression tests for bugs fixed in 1.3.3 and 1.2.21.
1012 DEFINE_TESTCASE(loosenear1, backend) {
1013 Xapian::Database db = get_database("apitest_simpledata");
1014 Xapian::Enquire enq(db);
1016 for (auto&& test : loosenear1_testcases) {
1017 auto tend = test.terms + 4;
1018 while (tend[-1] == NULL) --tend;
1019 Xapian::Query q(Xapian::Query::OP_NEAR, test.terms, tend, test.window);
1020 enq.set_query(q);
1021 Xapian::MSet mset = enq.get_mset(0, 10);
1022 if (test.result == 0) {
1023 TEST(mset.empty());
1024 } else {
1025 TEST_EQUAL(mset.size(), 1);
1026 TEST_EQUAL(*mset[0], test.result);
1031 /// Regression test for bug fixed in 1.3.6 - the first case segfaulted in 1.3.x.
1032 DEFINE_TESTCASE(complexphrase1, backend) {
1033 Xapian::Database db = get_database("apitest_simpledata");
1034 Xapian::Enquire enq(db);
1035 Xapian::Query query(Xapian::Query::OP_PHRASE,
1036 Xapian::Query("a") | Xapian::Query("b"),
1037 Xapian::Query("i"));
1038 enq.set_query(query);
1039 TEST(enq.get_mset(0, 10).empty());
1040 Xapian::Query query2(Xapian::Query::OP_PHRASE,
1041 Xapian::Query("a") | Xapian::Query("b"),
1042 Xapian::Query("c"));
1043 enq.set_query(query2);
1044 TEST(enq.get_mset(0, 10).empty());
1047 /// Regression test for bug fixed in 1.3.6 - the first case segfaulted in 1.3.x.
1048 DEFINE_TESTCASE(complexnear1, backend) {
1049 Xapian::Database db = get_database("apitest_simpledata");
1050 Xapian::Enquire enq(db);
1051 Xapian::Query query(Xapian::Query::OP_NEAR,
1052 Xapian::Query("a") | Xapian::Query("b"),
1053 Xapian::Query("i"));
1054 enq.set_query(query);
1055 TEST(enq.get_mset(0, 10).empty());
1056 Xapian::Query query2(Xapian::Query::OP_NEAR,
1057 Xapian::Query("a") | Xapian::Query("b"),
1058 Xapian::Query("c"));
1059 enq.set_query(query2);
1060 TEST(enq.get_mset(0, 10).empty());
1063 /// Check subqueries of MatchAll, MatchNothing and PostingSource are supported.
1064 DEFINE_TESTCASE(complexphrase2, backend) {
1065 Xapian::Database db = get_database("apitest_simpledata");
1066 Xapian::Enquire enq(db);
1067 Xapian::ValueWeightPostingSource ps(0);
1068 Xapian::Query subqs[3] = {
1069 Xapian::Query(Xapian::Query::OP_PHRASE,
1070 Xapian::Query("a"),
1071 Xapian::Query(&ps)),
1072 Xapian::Query(Xapian::Query::OP_PHRASE,
1073 Xapian::Query("and"),
1074 Xapian::Query::MatchAll),
1075 Xapian::Query(Xapian::Query::OP_PHRASE,
1076 Xapian::Query("at"),
1077 Xapian::Query::MatchNothing)
1079 Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
1080 enq.set_query(query);
1081 (void)enq.get_mset(0, 10);
1084 /// Check subqueries of MatchAll, MatchNothing and PostingSource are supported.
1085 DEFINE_TESTCASE(complexnear2, backend) {
1086 Xapian::Database db = get_database("apitest_simpledata");
1087 Xapian::Enquire enq(db);
1088 Xapian::ValueWeightPostingSource ps(0);
1089 Xapian::Query subqs[3] = {
1090 Xapian::Query(Xapian::Query::OP_NEAR,
1091 Xapian::Query("a"),
1092 Xapian::Query(&ps)),
1093 Xapian::Query(Xapian::Query::OP_NEAR,
1094 Xapian::Query("and"),
1095 Xapian::Query::MatchAll),
1096 Xapian::Query(Xapian::Query::OP_NEAR,
1097 Xapian::Query("at"),
1098 Xapian::Query::MatchNothing)
1100 Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
1101 enq.set_query(query);
1102 (void)enq.get_mset(0, 10);
1105 /// A zero estimated number of matches broke the code to round the estimate.
1106 DEFINE_TESTCASE(zeroestimate1, backend) {
1107 Xapian::Enquire enquire(get_database("apitest_simpledata"));
1108 Xapian::Query phrase(Xapian::Query::OP_PHRASE,
1109 Xapian::Query("absolute"),
1110 Xapian::Query("rubbish"));
1111 enquire.set_query(phrase &~ Xapian::Query("queri"));
1112 Xapian::MSet mset = enquire.get_mset(0, 0);
1113 TEST_EQUAL(mset.get_matches_estimated(), 0);
1116 /// Feature test for OR under OP_PHRASE support added in 1.4.3.
1117 DEFINE_TESTCASE(complexphrase3, backend) {
1118 Xapian::Database db = get_database("apitest_simpledata");
1119 Xapian::Enquire enq(db);
1120 Xapian::Query query(Xapian::Query::OP_PHRASE,
1121 Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"),
1122 Xapian::Query("a"));
1123 enq.set_query(query);
1124 mset_expect_order(enq.get_mset(0, 10), 1);
1125 Xapian::Query query2(Xapian::Query::OP_PHRASE,
1126 Xapian::Query("a"),
1127 Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"));
1128 enq.set_query(query2);
1129 mset_expect_order(enq.get_mset(0, 10));
1130 Xapian::Query query3(Xapian::Query::OP_PHRASE,
1131 Xapian::Query("one") | Xapian::Query("with"),
1132 Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"));
1133 enq.set_query(query3);
1134 mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1135 Xapian::Query query4(Xapian::Query::OP_PHRASE,
1136 Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"),
1137 Xapian::Query("one") | Xapian::Query("with"));
1138 enq.set_query(query4);
1139 mset_expect_order(enq.get_mset(0, 10));
1142 /// Feature test for OR under OP_NEAR support added in 1.4.3.
1143 DEFINE_TESTCASE(complexnear3, backend) {
1144 Xapian::Database db = get_database("apitest_simpledata");
1145 Xapian::Enquire enq(db);
1146 Xapian::Query query(Xapian::Query::OP_NEAR,
1147 Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"),
1148 Xapian::Query("a"));
1149 enq.set_query(query);
1150 mset_expect_order(enq.get_mset(0, 10), 1);
1151 Xapian::Query query2(Xapian::Query::OP_NEAR,
1152 Xapian::Query("a"),
1153 Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"));
1154 enq.set_query(query2);
1155 mset_expect_order(enq.get_mset(0, 10), 1);
1156 Xapian::Query query3(Xapian::Query::OP_NEAR,
1157 Xapian::Query("one") | Xapian::Query("with"),
1158 Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"));
1159 enq.set_query(query3);
1160 mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1161 Xapian::Query query4(Xapian::Query::OP_NEAR,
1162 Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"),
1163 Xapian::Query("one") | Xapian::Query("with"));
1164 enq.set_query(query4);
1165 mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1168 static void
1169 gen_subdbwithoutpos1_db(Xapian::WritableDatabase& db, const string&)
1171 Xapian::Document doc;
1172 doc.add_term("this");
1173 doc.add_term("paragraph");
1174 doc.add_term("wibble", 5);
1175 db.add_document(doc);
1178 DEFINE_TESTCASE(subdbwithoutpos1, backend) {
1179 Xapian::Database db(get_database("apitest_simpledata"));
1180 TEST(db.has_positions());
1182 Xapian::Query q_near(Xapian::Query::OP_NEAR,
1183 Xapian::Query("this"),
1184 Xapian::Query("paragraph"));
1186 Xapian::Query q_phrase(Xapian::Query::OP_PHRASE,
1187 Xapian::Query("this"),
1188 Xapian::Query("paragraph"));
1190 Xapian::Enquire enq1(db);
1191 enq1.set_query(q_near);
1192 Xapian::MSet mset1 = enq1.get_mset(0, 10);
1193 TEST_EQUAL(mset1.size(), 3);
1195 enq1.set_query(q_phrase);
1196 mset1 = enq1.get_mset(0, 10);
1197 TEST_EQUAL(mset1.size(), 3);
1199 Xapian::Database db2 =
1200 get_database("subdbwithoutpos1", gen_subdbwithoutpos1_db);
1201 TEST(!db2.has_positions());
1203 // If a database has no positional info, we used to map OP_PHRASE and
1204 // OP_NEAR to OP_AND, but since 1.5.0 we no longer do.
1205 Xapian::Enquire enq2(db2);
1206 enq2.set_query(q_near);
1207 Xapian::MSet mset2 = enq2.get_mset(0, 10);
1208 TEST_EQUAL(mset2.size(), 0);
1210 enq2.set_query(q_phrase);
1211 mset2 = enq2.get_mset(0, 10);
1212 TEST_EQUAL(mset2.size(), 0);
1214 // If one sub-database in a combined database has no positional info but
1215 // other sub-databases do, then we shouldn't convert OP_PHRASE to OP_AND
1216 // (but prior to 1.4.3 we did).
1217 db.add_database(db2);
1218 TEST(db.has_positions());
1220 Xapian::Enquire enq3(db);
1221 enq3.set_query(q_near);
1222 Xapian::MSet mset3 = enq3.get_mset(0, 10);
1223 TEST_EQUAL(mset3.size(), 3);
1224 // Regression test for bug introduced in 1.4.3 which led to a division by
1225 // zero and then (at least on Linux) we got 1% here.
1226 TEST_EQUAL(mset3[0].get_percent(), 100);
1228 enq3.set_query(q_phrase);
1229 mset3 = enq3.get_mset(0, 10);
1230 TEST_EQUAL(mset3.size(), 3);
1231 // Regression test for bug introduced in 1.4.3 which led to a division by
1232 // zero and then (at least on Linux) we got 1% here.
1233 TEST_EQUAL(mset3[0].get_percent(), 100);
1235 // Regression test for https://trac.xapian.org/ticket/752
1236 auto q = (Xapian::Query("this") & q_phrase) | Xapian::Query("wibble");
1237 enq3.set_query(q);
1238 mset3 = enq3.get_mset(0, 10);
1239 TEST_EQUAL(mset3.size(), 4);
1242 // Regression test for bug fixed in 1.4.4 and 1.2.25.
1243 DEFINE_TESTCASE(notandor1, backend) {
1244 Xapian::Database db(get_database("etext"));
1245 using Xapian::Query;
1246 Query q = Query("the") &~ (Query("friedrich") &
1247 (Query("day") | Query("night")));
1248 Xapian::Enquire enq(db);
1249 enq.set_query(q);
1251 Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1252 TEST_EQUAL(mset.get_matches_estimated(), 344);
1255 // Regression test for bug fixed in git master before 1.5.0.
1256 DEFINE_TESTCASE(boolorbug1, backend) {
1257 Xapian::Database db(get_database("etext"));
1258 using Xapian::Query;
1259 Query q = Query("the") &~ Query(Query::OP_WILDCARD, "pru");
1260 Xapian::Enquire enq(db);
1261 enq.set_query(q);
1263 Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1264 // Due to a bug in BoolOrPostList this returned 330 results.
1265 TEST_EQUAL(mset.get_matches_estimated(), 331);
1268 // Regression test for bug introduced in 1.4.13 and fixed in 1.4.14.
1269 DEFINE_TESTCASE(hoistnotbug1, backend) {
1270 Xapian::Database db(get_database("etext"));
1271 using Xapian::Query;
1272 Query q(Query::OP_PHRASE, Query("the"), Query("king"));
1273 q &= ~Query("worldtornado");
1274 q &= Query("a");
1275 Xapian::Enquire enq(db);
1276 enq.set_query(q);
1278 // This reliably fails before the fix in an assertion build, and may crash
1279 // in other builds.
1280 Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1281 TEST_EQUAL(mset.get_matches_estimated(), 42);
1284 // Regression test for segfault optimising query on git master before 1.5.0.
1285 DEFINE_TESTCASE(emptynot1, backend) {
1286 Xapian::Database db(get_database("apitest_simpledata"));
1287 Xapian::Enquire enq(db);
1288 enq.set_weighting_scheme(Xapian::BoolWeight());
1289 Xapian::Query query = Xapian::Query("document") & Xapian::Query("api");
1290 // This range won't match anything, so collapses to MatchNothing as we
1291 // optimise the query.
1292 query = Xapian::Query(query.OP_AND_NOT,
1293 query,
1294 Xapian::Query(Xapian::Query::OP_VALUE_GE, 1234, "x"));
1295 enq.set_query(query);
1296 Xapian::MSet mset = enq.get_mset(0, 10);
1297 TEST_EQUAL(mset.size(), 1);
1298 // Essentially the same test but with a term which doesn't match anything
1299 // on the right side.
1300 query = Xapian::Query("document") & Xapian::Query("api");
1301 query = Xapian::Query(query.OP_AND_NOT,
1302 query,
1303 Xapian::Query("nosuchterm"));
1304 enq.set_query(query);
1305 mset = enq.get_mset(0, 10);
1306 TEST_EQUAL(mset.size(), 1);
1307 // Essentially the same test but with a wildcard which doesn't match
1308 // anything on right side.
1309 query = Xapian::Query("document") & Xapian::Query("api");
1310 query = Xapian::Query(query.OP_AND_NOT,
1311 query,
1312 Xapian::Query(query.OP_WILDCARD, "nosuchwildcard"));
1313 enq.set_query(query);
1314 mset = enq.get_mset(0, 10);
1315 TEST_EQUAL(mset.size(), 1);
1318 // Similar case to emptynot1 but for OP_AND_MAYBE. This case wasn't failing,
1319 // so this isn't a regression test, but we do want to ensure it works.
1320 DEFINE_TESTCASE(emptymaybe1, backend) {
1321 Xapian::Database db(get_database("apitest_simpledata"));
1322 Xapian::Enquire enq(db);
1323 enq.set_weighting_scheme(Xapian::BoolWeight());
1324 Xapian::Query query = Xapian::Query("document") & Xapian::Query("api");
1325 // This range won't match anything, so collapses to MatchNothing as we
1326 // optimise the query.
1327 query = Xapian::Query(query.OP_AND_MAYBE,
1328 query,
1329 Xapian::Query(Xapian::Query::OP_VALUE_GE, 1234, "x"));
1330 enq.set_query(query);
1331 Xapian::MSet mset = enq.get_mset(0, 10);
1332 TEST_EQUAL(mset.size(), 1);
1333 // Essentially the same test but with a term which doesn't match anything
1334 // on the right side.
1335 query = Xapian::Query("document") & Xapian::Query("api");
1336 query = Xapian::Query(query.OP_AND_MAYBE,
1337 query,
1338 Xapian::Query("nosuchterm"));
1339 enq.set_query(query);
1340 mset = enq.get_mset(0, 10);
1341 TEST_EQUAL(mset.size(), 1);
1342 // Essentially the same test but with a wildcard which doesn't match
1343 // anything on right side.
1344 query = Xapian::Query("document") & Xapian::Query("api");
1345 query = Xapian::Query(query.OP_AND_MAYBE,
1346 query,
1347 Xapian::Query(query.OP_WILDCARD, "nosuchwildcard"));
1348 enq.set_query(query);
1349 mset = enq.get_mset(0, 10);
1350 TEST_EQUAL(mset.size(), 1);
1353 // Regression test for optimisation bug on git master before 1.5.0.
1354 // The query optimiser ignored the NOT part when the LHS contained
1355 // a MatchAll.
1356 DEFINE_TESTCASE(allnot1, backend) {
1357 Xapian::Database db(get_database("apitest_simpledata"));
1358 Xapian::Enquire enq(db);
1359 Xapian::Query query;
1360 // This case wasn't a problem, but would have been if the index-all term
1361 // was handled like MatchAll by this optimisation (which it might be in
1362 // future).
1363 query = Xapian::Query{query.OP_AND_NOT,
1364 Xapian::Query("this"),
1365 Xapian::Query("the")};
1366 enq.set_query(0 * query);
1367 Xapian::MSet mset = enq.get_mset(0, 10);
1368 TEST_EQUAL(mset.size(), 2);
1369 query = Xapian::Query{query.OP_AND_NOT,
1370 query.MatchAll,
1371 Xapian::Query("the")};
1372 enq.set_query(0 * query);
1373 mset = enq.get_mset(0, 10);
1374 TEST_EQUAL(mset.size(), 2);
1377 // Regression test for optimisation bug on git master before 1.5.0.
1378 // The query optimiser didn't handle the RHS of AND_MAYBE not matching
1379 // anything.
1380 DEFINE_TESTCASE(emptymayberhs1, backend) {
1381 Xapian::Database db(get_database("apitest_simpledata"));
1382 Xapian::Enquire enq(db);
1383 // The RHS doesn't match anything, which now gives a NULL PostList*, and
1384 // we were trying to dereference that in this case.
1385 Xapian::Query query(Xapian::Query::OP_AND_MAYBE,
1386 Xapian::Query("document"),
1387 Xapian::Query("xyzzy"));
1388 enq.set_query(query);
1389 Xapian::MSet mset = enq.get_mset(0, 10);
1390 TEST_EQUAL(mset.size(), 2);
1393 DEFINE_TESTCASE(phraseweightcheckbug1, backend) {
1394 Xapian::Database db(get_database("phraseweightcheckbug1"));
1395 Xapian::Enquire enq(db);
1396 static const char* const words[] = {"hello", "world"};
1397 Xapian::Query query{Xapian::Query::OP_PHRASE, begin(words), end(words), 2};
1398 query = Xapian::Query(query.OP_OR, query, Xapian::Query("most"));
1399 tout << query.get_description() << '\n';
1400 enq.set_query(query);
1401 Xapian::MSet mset = enq.get_mset(0, 3);
1402 TEST_EQUAL(mset.size(), 3);
1405 DEFINE_TESTCASE(orphanedhint1, backend) {
1406 Xapian::Database db(get_database("apitest_simpledata"));
1407 Xapian::Enquire enq(db);
1408 auto OP_WILDCARD = Xapian::Query::OP_WILDCARD;
1409 Xapian::Query query = Xapian::Query(OP_WILDCARD, "doc") &
1410 Xapian::Query(OP_WILDCARD, "xyzzy");
1411 query |= Xapian::Query("test");
1412 tout << query.get_description() << '\n';
1413 enq.set_query(query);
1414 Xapian::MSet mset = enq.get_mset(0, 3);
1415 TEST_EQUAL(mset.size(), 1);
1418 // Regression test for bugs in initial implementation of query optimisation
1419 // based on docid range information.
1420 DEFINE_TESTCASE(docidrangebugs1, backend) {
1421 Xapian::Database db(get_database("apitest_simpledata"));
1422 Xapian::Enquire enq(db);
1424 // This triggered a bug in BoolOrPostList::get_docid_range().
1425 Xapian::Query query(Xapian::Query::OP_FILTER,
1426 Xapian::Query("typo"),
1427 Xapian::Query("rubbish") | Xapian::Query("this"));
1428 enq.set_query(query);
1429 Xapian::MSet mset = enq.get_mset(0, 1);
1430 TEST_EQUAL(mset.size(), 1);
1432 Xapian::Query query2(Xapian::Query::OP_FILTER,
1433 Xapian::Query("typo"),
1434 Xapian::Query("this") | Xapian::Query("rubbish"));
1435 enq.set_query(query2);
1436 mset = enq.get_mset(0, 1);
1437 TEST_EQUAL(mset.size(), 1);
1439 // Alternative reproducer where the first term doesn't match any
1440 // documents.
1441 Xapian::Query query3(Xapian::Query::OP_FILTER,
1442 Xapian::Query("typo"),
1443 Xapian::Query("nosuchterm") | Xapian::Query("this"));
1444 enq.set_query(query3);
1445 mset = enq.get_mset(0, 1);
1446 TEST_EQUAL(mset.size(), 1);
1448 Xapian::Query query4(Xapian::Query::OP_FILTER,
1449 Xapian::Query("typo"),
1450 Xapian::Query("this") | Xapian::Query("nosuchterm"));
1451 enq.set_query(query4);
1452 mset = enq.get_mset(0, 1);
1453 TEST_EQUAL(mset.size(), 1);