Update for 1.4.20
[xapian.git] / xapian-core / tests / api_opvalue.cc
blob320d8923559b49f55452823294bdbaf67211fedf
1 /** @file
2 * @brief Tests of the OP_VALUE_* query operators.
3 */
4 /* Copyright 2007,2008,2009,2010,2010,2011,2017,2019 Olly Betts
5 * Copyright 2008 Lemur Consulting Ltd
6 * Copyright 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "api_opvalue.h"
28 #include <xapian.h>
30 #include "apitest.h"
31 #include "testsuite.h"
32 #include "testutils.h"
34 #include <string>
36 using namespace std;
38 // Feature test for Query::OP_VALUE_RANGE.
39 DEFINE_TESTCASE(valuerange1, backend) {
40 Xapian::Database db(get_database("apitest_phrase"));
41 Xapian::Enquire enq(db);
42 static const char * const vals[] = {
43 "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
45 for (auto start : vals) {
46 for (auto end : vals) {
47 Xapian::Query query(Xapian::Query::OP_VALUE_RANGE, 1, start, end);
48 enq.set_query(query);
49 Xapian::MSet mset = enq.get_mset(0, 20);
50 // Check that documents in the MSet match the value range filter.
51 set<Xapian::docid> matched;
52 Xapian::MSetIterator i;
53 for (i = mset.begin(); i != mset.end(); ++i) {
54 matched.insert(*i);
55 string value = db.get_document(*i).get_value(1);
56 TEST_REL(value,>=,start);
57 TEST_REL(value,<=,end);
59 // Check that documents not in the MSet don't match the value range filter.
60 for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
61 if (matched.find(j) == matched.end()) {
62 string value = db.get_document(j).get_value(1);
63 tout << value << " < '" << start << "' or > '" << end << "'" << endl;
64 TEST(value < start || value > end);
71 // Regression test for Query::OP_VALUE_LE - used to return document IDs for
72 // non-existent documents.
73 DEFINE_TESTCASE(valuerange2, writable) {
74 Xapian::WritableDatabase db = get_writable_database();
75 Xapian::Document doc;
76 doc.set_data("5");
77 doc.add_value(0, "5");
78 db.replace_document(5, doc);
79 Xapian::Enquire enq(db);
81 Xapian::Query query(Xapian::Query::OP_VALUE_LE, 0, "6");
82 enq.set_query(query);
83 Xapian::MSet mset = enq.get_mset(0, 20);
85 TEST_EQUAL(mset.size(), 1);
86 TEST_EQUAL(*(mset[0]), 5);
89 static void
90 make_valuerange5(Xapian::WritableDatabase &db, const string &)
92 Xapian::Document doc;
93 doc.add_value(0, "BOOK");
94 db.add_document(doc);
95 doc.add_value(0, "VOLUME");
96 db.add_document(doc);
99 // Check that lower and upper bounds are used.
100 DEFINE_TESTCASE(valuerange5, generated) {
101 Xapian::Database db = get_database("valuerange5", make_valuerange5);
103 // If the lower bound is empty, either the specified value slot is
104 // never used in the database, or the backend doesn't track value bounds.
105 // Neither should be true here.
106 TEST(!db.get_value_lower_bound(0).empty());
108 Xapian::Enquire enq(db);
110 Xapian::Query query(Xapian::Query::OP_VALUE_RANGE, 0, "APPLE", "BANANA");
111 enq.set_query(query);
112 Xapian::MSet mset = enq.get_mset(0, 0);
113 TEST_EQUAL(mset.get_matches_estimated(), 0);
115 Xapian::Query query2(Xapian::Query::OP_VALUE_RANGE, 0, "WALRUS", "ZEBRA");
116 enq.set_query(query2);
117 mset = enq.get_mset(0, 0);
118 TEST_EQUAL(mset.get_matches_estimated(), 0);
121 static void
122 make_singularvalue_db(Xapian::WritableDatabase &db, const string &)
124 Xapian::Document doc;
125 db.add_document(doc);
126 doc.add_value(0, "SINGULAR");
127 db.add_document(doc);
128 db.add_document(doc);
131 // Check handling of bounds when bounds are equal.
132 DEFINE_TESTCASE(valuerange6, generated) {
133 const auto OP_VALUE_RANGE = Xapian::Query::OP_VALUE_RANGE;
134 Xapian::Database db = get_database("singularvalue", make_singularvalue_db);
136 Xapian::Enquire enq(db);
138 Xapian::Query query;
139 query = Xapian::Query(OP_VALUE_RANGE, 0, "SATSUMA", "SLOE");
140 enq.set_query(query);
141 Xapian::MSet mset = enq.get_mset(0, 0);
142 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
143 TEST_EQUAL(mset.get_matches_estimated(), 2);
144 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
146 query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PLUM");
147 enq.set_query(query);
148 mset = enq.get_mset(0, 0);
149 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
150 TEST_EQUAL(mset.get_matches_estimated(), 0);
151 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
153 query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PEACH");
154 enq.set_query(query);
155 mset = enq.get_mset(0, 0);
156 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
157 TEST_EQUAL(mset.get_matches_estimated(), 0);
158 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
160 query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PEACHERINE");
161 enq.set_query(query);
162 mset = enq.get_mset(0, 0);
163 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
164 TEST_EQUAL(mset.get_matches_estimated(), 0);
165 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
167 query = Xapian::Query(OP_VALUE_RANGE, 0, "SING", "SINGULARITY");
168 enq.set_query(query);
169 mset = enq.get_mset(0, 0);
170 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
171 TEST_EQUAL(mset.get_matches_estimated(), 2);
172 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
174 query = Xapian::Query(OP_VALUE_RANGE, 0, "SING", "SINGULAR");
175 enq.set_query(query);
176 mset = enq.get_mset(0, 0);
177 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
178 TEST_EQUAL(mset.get_matches_estimated(), 2);
179 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
181 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULAR", "SINGULARITY");
182 enq.set_query(query);
183 mset = enq.get_mset(0, 0);
184 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
185 TEST_EQUAL(mset.get_matches_estimated(), 2);
186 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
188 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULAR", "SINGULAR");
189 enq.set_query(query);
190 mset = enq.get_mset(0, 0);
191 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
192 TEST_EQUAL(mset.get_matches_estimated(), 2);
193 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
195 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINGULARITY");
196 enq.set_query(query);
197 mset = enq.get_mset(0, 0);
198 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
199 TEST_EQUAL(mset.get_matches_estimated(), 0);
200 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
202 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINGULARITIES");
203 enq.set_query(query);
204 mset = enq.get_mset(0, 0);
205 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
206 TEST_EQUAL(mset.get_matches_estimated(), 0);
207 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
209 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINNER");
210 enq.set_query(query);
211 mset = enq.get_mset(0, 0);
212 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
213 TEST_EQUAL(mset.get_matches_estimated(), 0);
214 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
216 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "ZEBRA");
217 enq.set_query(query);
218 mset = enq.get_mset(0, 0);
219 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
220 TEST_EQUAL(mset.get_matches_estimated(), 0);
221 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
223 query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGE", "SINGER");
224 enq.set_query(query);
225 mset = enq.get_mset(0, 0);
226 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
227 TEST_EQUAL(mset.get_matches_estimated(), 0);
228 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
230 // Check no assertions when slot is empty. Regression test for bug
231 // introduced and fixed between 1.4.5 and 1.4.6.
232 query = Xapian::Query(OP_VALUE_RANGE, 1, "MONK", "MONKEY");
233 enq.set_query(query);
234 mset = enq.get_mset(0, 0);
235 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
236 TEST_EQUAL(mset.get_matches_estimated(), 0);
237 TEST_EQUAL(mset.get_matches_upper_bound(), 0);
240 static void
241 make_valprefixbounds_db(Xapian::WritableDatabase &db, const string &)
243 Xapian::Document doc;
244 db.add_document(doc);
245 doc.add_value(0, "ZERO");
246 db.add_document(doc);
247 doc.add_value(0, string("ZERO\0", 5));
248 db.add_document(doc);
251 // Check handling of bounds when low is a prefix of high.
252 DEFINE_TESTCASE(valuerange7, generated) {
253 const auto OP_VALUE_RANGE = Xapian::Query::OP_VALUE_RANGE;
254 Xapian::Database db = get_database("valprefixbounds", make_valprefixbounds_db);
256 Xapian::Enquire enq(db);
258 Xapian::Query query;
259 query = Xapian::Query(OP_VALUE_RANGE, 0, "ZAP", "ZOO");
260 enq.set_query(query);
261 Xapian::MSet mset = enq.get_mset(0, 0);
262 TEST_EQUAL(mset.get_matches_lower_bound(), 2);
263 TEST_EQUAL(mset.get_matches_estimated(), 2);
264 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
266 query = Xapian::Query(OP_VALUE_RANGE, 0, "ZAP", "ZERO");
267 enq.set_query(query);
268 mset = enq.get_mset(0, 0);
269 TEST_EQUAL(mset.get_matches_estimated(), 1);
270 if (startswith(get_dbtype(), "multi")) {
271 // The second shard will just have one document with "ZERO" in the slot
272 // so we can tell there's exactly one match there, and the first shard
273 // has one "ZERO\0" and one empty entry, so we can tell that can't
274 // match.
275 TEST_EQUAL(mset.get_matches_lower_bound(), 1);
276 TEST_EQUAL(mset.get_matches_upper_bound(), 1);
277 } else {
278 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
279 TEST_EQUAL(mset.get_matches_upper_bound(), 2);
283 // Feature test for Query::OP_VALUE_GE.
284 DEFINE_TESTCASE(valuege1, backend) {
285 Xapian::Database db(get_database("apitest_phrase"));
286 Xapian::Enquire enq(db);
287 static const char * const vals[] = {
288 "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
290 for (auto start : vals) {
291 Xapian::Query query(Xapian::Query::OP_VALUE_GE, 1, start);
292 enq.set_query(query);
293 Xapian::MSet mset = enq.get_mset(0, 20);
294 // Check that documents in the MSet match the value range filter.
295 set<Xapian::docid> matched;
296 Xapian::MSetIterator i;
297 for (i = mset.begin(); i != mset.end(); ++i) {
298 matched.insert(*i);
299 string value = db.get_document(*i).get_value(1);
300 tout << "'" << start << "' <= '" << value << "'" << endl;
301 TEST_REL(value,>=,start);
303 // Check that documents not in the MSet don't match the value range
304 // filter.
305 for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
306 if (matched.find(j) == matched.end()) {
307 string value = db.get_document(j).get_value(1);
308 tout << value << " < '" << start << "'" << endl;
309 TEST_REL(value,<,start);
315 // Regression test for Query::OP_VALUE_GE - used to segfault if check() got
316 // called.
317 DEFINE_TESTCASE(valuege2, backend) {
318 Xapian::Database db(get_database("apitest_phrase"));
319 Xapian::Enquire enq(db);
320 Xapian::Query query(Xapian::Query::OP_AND,
321 Xapian::Query("what"),
322 Xapian::Query(Xapian::Query::OP_VALUE_GE, 1, "aa"));
323 enq.set_query(query);
324 Xapian::MSet mset = enq.get_mset(0, 20);
327 // Feature test for Query::OP_VALUE_LE.
328 DEFINE_TESTCASE(valuele1, backend) {
329 Xapian::Database db(get_database("apitest_phrase"));
330 Xapian::Enquire enq(db);
331 static const char * const vals[] = {
332 "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
334 for (auto end : vals) {
335 Xapian::Query query(Xapian::Query::OP_VALUE_LE, 1, end);
336 enq.set_query(query);
337 Xapian::MSet mset = enq.get_mset(0, 20);
338 // Check that documents in the MSet match the value range filter.
339 set<Xapian::docid> matched;
340 Xapian::MSetIterator i;
341 for (i = mset.begin(); i != mset.end(); ++i) {
342 matched.insert(*i);
343 string value = db.get_document(*i).get_value(1);
344 TEST_REL(value,<=,end);
346 // Check that documents not in the MSet don't match the value range
347 // filter.
348 for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
349 if (matched.find(j) == matched.end()) {
350 string value = db.get_document(j).get_value(1);
351 TEST_REL(value,>,end);
357 // Check that Query(OP_VALUE_GE, 0, "") -> Query::MatchAll.
358 DEFINE_TESTCASE(valuege3, !backend) {
359 Xapian::Query query(Xapian::Query::OP_VALUE_GE, 0, "");
360 TEST_STRINGS_EQUAL(query.get_description(), Xapian::Query::MatchAll.get_description());
363 // Test Query::OP_VALUE_GE in a query which causes its skip_to() to be used.
364 DEFINE_TESTCASE(valuege4, backend) {
365 Xapian::Database db(get_database("apitest_phrase"));
366 Xapian::Enquire enq(db);
368 // This query should put the ValueGePostList on the LHS of the AND because
369 // it has a lower estimated termfreq than the term "fridg". As a result,
370 // the skip_to() method is used to advance the ValueGePostList.
371 Xapian::Query query(Xapian::Query::OP_AND,
372 Xapian::Query("fridg"),
373 Xapian::Query(Xapian::Query::OP_VALUE_GE, 1, "aa"));
374 enq.set_query(query);
375 Xapian::MSet mset = enq.get_mset(0, 20);
378 // Test Query::OP_VALUE_RANGE in a query which causes its check() to be used.
379 DEFINE_TESTCASE(valuerange3, backend) {
380 Xapian::Database db(get_database("apitest_phrase"));
381 Xapian::Enquire enq(db);
382 Xapian::Query query(Xapian::Query::OP_AND,
383 Xapian::Query("what"),
384 Xapian::Query(Xapian::Query::OP_VALUE_RANGE, 1,
385 "aa", "z"));
386 enq.set_query(query);
387 Xapian::MSet mset = enq.get_mset(0, 20);
390 // Test Query::OP_VALUE_RANGE in a query which causes its skip_to() to be used.
391 DEFINE_TESTCASE(valuerange4, backend) {
392 Xapian::Database db(get_database("apitest_phrase"));
393 Xapian::Enquire enq(db);
394 Xapian::Query query(Xapian::Query::OP_AND,
395 Xapian::Query("fridg"),
396 Xapian::Query(Xapian::Query::OP_VALUE_RANGE, 1,
397 "aa", "z"));
398 enq.set_query(query);
399 Xapian::MSet mset = enq.get_mset(0, 20);
402 /// Test improved upper bound and estimate in 1.4.3.
403 DEFINE_TESTCASE(valuerangematchesub1, backend) {
404 Xapian::Database db(get_database("etext"));
405 Xapian::Enquire enq(db);
406 // Values present in slot 10 range from 'e' to 'w'.
407 Xapian::Query query(Xapian::Query(Xapian::Query::OP_VALUE_RANGE, 10,
408 "h", "i"));
409 enq.set_query(query);
410 Xapian::MSet mset = enq.get_mset(0, 0);
411 // The upper bound used to be db.size().
412 TEST_EQUAL(mset.get_matches_upper_bound(), db.get_value_freq(10));
413 TEST_EQUAL(mset.get_matches_lower_bound(), 0);
414 // The estimate used to be db.size() / 2, now it's calculated
415 // proportional to the possible range.
416 TEST_REL(mset.get_matches_estimated(), <=, db.get_doccount() / 3);