Reimplement Language Modelling weights
[xapian.git] / xapian-core / tests / api_nodb.cc
blob70609d6e258e36bbef77c58c65cfb6854675dc93
1 /** @file
2 * @brief tests which don't use any of the backends
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002-2024 Olly Betts
7 * Copyright 2006 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #include <config.h>
27 #include "api_nodb.h"
29 #include <xapian.h>
31 #include "apitest.h"
32 #include "testsuite.h"
33 #include "testutils.h"
35 #include <list>
36 #include <string>
37 #include <vector>
39 using namespace std;
41 // tests that get_query_terms() returns the terms in the right order
42 DEFINE_TESTCASE(getqterms1, !backend) {
43 list<string> answers_list;
44 answers_list.push_back("one");
45 answers_list.push_back("two");
46 answers_list.push_back("three");
47 answers_list.push_back("four");
49 Xapian::Query myquery(Xapian::Query::OP_OR,
50 Xapian::Query(Xapian::Query::OP_AND,
51 Xapian::Query("one", 1, 1),
52 Xapian::Query("three", 1, 3)),
53 Xapian::Query(Xapian::Query::OP_OR,
54 Xapian::Query("four", 1, 4),
55 Xapian::Query("two", 1, 2)));
57 list<string> list1;
59 Xapian::TermIterator t;
60 for (t = myquery.get_terms_begin(); t != myquery.get_terms_end(); ++t)
61 list1.push_back(*t);
63 TEST(list1 == answers_list);
64 list<string> list2(myquery.get_terms_begin(), myquery.get_terms_end());
65 TEST(list2 == answers_list);
68 // tests that get_query_terms() doesn't SEGV on an empty query
69 // (regression test for bug in 0.9.0)
70 DEFINE_TESTCASE(getqterms2, !backend) {
71 Xapian::Query empty_query;
72 TEST_EQUAL(empty_query.get_terms_begin(), empty_query.get_terms_end());
73 TEST_EQUAL(empty_query.get_unique_terms_begin(),
74 empty_query.get_unique_terms_end());
77 // tests that empty queries work correctly
78 DEFINE_TESTCASE(emptyquery2, !backend) {
79 // test that Query::empty() is true for an empty query.
80 TEST(Xapian::Query().empty());
81 // test that an empty query has length 0
82 TEST(Xapian::Query().get_length() == 0);
83 vector<Xapian::Query> v;
84 TEST(Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end()).empty());
85 TEST(Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end()).get_length() == 0);
88 /// Regression test for behaviour for an empty query with AND_NOT.
89 DEFINE_TESTCASE(emptyquery3, !backend) {
90 static const Xapian::Query::op ops[] = {
91 Xapian::Query::OP_AND,
92 Xapian::Query::OP_OR,
93 Xapian::Query::OP_XOR,
94 Xapian::Query::OP_AND_MAYBE,
95 Xapian::Query::OP_AND_NOT
98 for (size_t i = 0; i < sizeof(ops) / sizeof(ops[0]); ++i) {
99 tout << "Testing op #" << i << '\n';
100 Xapian::Query empty;
101 Xapian::Query q("test");
102 Xapian::Query qcombine(ops[i], empty, q);
103 tout << qcombine.get_description() << '\n';
104 Xapian::Query qcombine2(ops[i], q, empty);
105 tout << qcombine2.get_description() << '\n';
106 Xapian::Query qcombine3(ops[i], empty, empty);
107 tout << qcombine3.get_description() << '\n';
111 // tests that query lengths are calculated correctly
112 DEFINE_TESTCASE(querylen1, !backend) {
113 // test that a simple query has the right length
114 Xapian::Query myquery;
115 myquery = Xapian::Query(Xapian::Query::OP_OR,
116 Xapian::Query("foo"),
117 Xapian::Query("bar"));
118 myquery = Xapian::Query(Xapian::Query::OP_AND,
119 myquery,
120 Xapian::Query(Xapian::Query::OP_OR,
121 Xapian::Query("wibble"),
122 Xapian::Query("spoon")));
124 TEST_EQUAL(myquery.get_length(), 4);
125 TEST(!myquery.empty());
128 // tests that query lengths are calculated correctly
129 DEFINE_TESTCASE(querylen2, !backend) {
130 // test with an even bigger and strange query
131 string terms[3] = {
132 "foo",
133 "bar",
134 "baz"
136 Xapian::Query queries[3] = {
137 Xapian::Query("wibble"),
138 Xapian::Query("wobble"),
139 Xapian::Query(Xapian::Query::OP_OR, string("jelly"), string("belly"))
142 Xapian::Query myquery;
143 vector<string> v1(terms, terms + 3);
144 vector<Xapian::Query> v2(queries, queries + 3);
145 vector<Xapian::Query *> v3;
146 Xapian::Query query1(Xapian::Query::OP_AND, string("ball"), string("club"));
147 Xapian::Query query2("ring");
148 v3.push_back(&query1);
149 v3.push_back(&query2);
151 Xapian::Query myq1 = Xapian::Query(Xapian::Query::OP_AND, v1.begin(), v1.end());
152 tout << "myq1=" << myq1 << "\n";
153 TEST_EQUAL(myq1.get_length(), 3);
155 Xapian::Query myq2_1 = Xapian::Query(Xapian::Query::OP_OR, v2.begin(), v2.end());
156 tout << "myq2_1=" << myq2_1 << "\n";
157 TEST_EQUAL(myq2_1.get_length(), 4);
159 Xapian::Query myq2_2 = Xapian::Query(Xapian::Query::OP_AND, v3.begin(), v3.end());
160 tout << "myq2_2=" << myq2_2 << "\n";
161 TEST_EQUAL(myq2_2.get_length(), 3);
163 Xapian::Query myq2 = Xapian::Query(Xapian::Query::OP_OR, myq2_1, myq2_2);
164 tout << "myq2=" << myq2 << "\n";
165 TEST_EQUAL(myq2.get_length(), 7);
167 myquery = Xapian::Query(Xapian::Query::OP_OR, myq1, myq2);
168 tout << "myquery=" << myquery << "\n";
169 TEST_EQUAL(myquery.get_length(), 10);
172 /** Check we no longer flatten subqueries combined with the same operator.
174 * Prior to 1.3.0 we did flatten these, but it's simpler to just handle this
175 * when we convert the query to a PostList tree, and that works better with
176 * Query objects being immutable.
178 DEFINE_TESTCASE(dontflattensubqueries1, !backend) {
179 Xapian::Query queries1[3] = {
180 Xapian::Query("wibble"),
181 Xapian::Query("wobble"),
182 Xapian::Query(Xapian::Query::OP_OR, string("jelly"), string("belly"))
185 Xapian::Query queries2[3] = {
186 Xapian::Query(Xapian::Query::OP_AND, string("jelly"), string("belly")),
187 Xapian::Query("wibble"),
188 Xapian::Query("wobble")
191 vector<Xapian::Query> vec1(queries1, queries1 + 3);
192 Xapian::Query myquery1(Xapian::Query::OP_OR, vec1.begin(), vec1.end());
193 TEST_EQUAL(myquery1.get_description(),
194 "Query((wibble OR wobble OR (jelly OR belly)))");
196 vector<Xapian::Query> vec2(queries2, queries2 + 3);
197 Xapian::Query myquery2(Xapian::Query::OP_AND, vec2.begin(), vec2.end());
198 TEST_EQUAL(myquery2.get_description(),
199 "Query(((jelly AND belly) AND wibble AND wobble))");
202 // test behaviour when creating a query from an empty vector
203 DEFINE_TESTCASE(emptyquerypart1, !backend) {
204 vector<string> emptyterms;
205 Xapian::Query query(Xapian::Query::OP_OR, emptyterms.begin(), emptyterms.end());
206 TEST(Xapian::Query(Xapian::Query::OP_AND, query, Xapian::Query("x")).empty());
207 TEST(Xapian::Query(Xapian::Query::OP_AND, query, Xapian::Query("x")).get_length() == 0);
208 TEST(!Xapian::Query(Xapian::Query::OP_OR, query, Xapian::Query("x")).empty());
209 TEST(Xapian::Query(Xapian::Query::OP_OR, query, Xapian::Query("x")).get_length() == 1);
212 DEFINE_TESTCASE(stemlangs1, !backend) {
213 string langs = Xapian::Stem::get_available_languages();
214 tout << "available languages '" << langs << "'\n";
215 TEST(!langs.empty());
217 // Also test the language codes.
218 langs += " ar hy eu ca da nl en fi fr de hu id ga it lt ne nb nn no pt ro"
219 " ru es sv ta tr";
221 string::size_type i = 0;
222 while (true) {
223 string::size_type spc = langs.find(' ', i);
224 // The only spaces in langs should be a single one between each pair
225 // of language names.
226 TEST_NOT_EQUAL(i, spc);
228 // Try making a stemmer for this language. We should be able to create
229 // it without an exception being thrown.
230 string language(langs, i, spc - i);
231 tout << "checking language code '" << language << "' works\n";
232 Xapian::Stem stemmer(language);
233 TEST(!stemmer.is_none());
234 if (language.size() > 2) {
235 string expected("Xapian::Stem(");
236 expected += language;
237 expected += ')';
238 TEST_EQUAL(stemmer.get_description(), expected);
241 if (spc == string::npos) break;
242 i = spc + 1;
246 // Stem("none") should give a no-op stemmer.
247 Xapian::Stem stem_nothing = Xapian::Stem("none");
248 TEST(stem_nothing.is_none());
249 TEST_EQUAL(stem_nothing.get_description(), "Xapian::Stem(none)");
253 // Stem("") should be equivalent.
254 Xapian::Stem stem_nothing = Xapian::Stem("");
255 TEST(stem_nothing.is_none());
256 TEST_EQUAL(stem_nothing.get_description(), "Xapian::Stem(none)");
260 // Regression test.
261 DEFINE_TESTCASE(nosuchdb1, !backend) {
262 // This is a "nodb" test because it doesn't test a particular backend.
263 try {
264 Xapian::Database db("NOsuChdaTabASe");
265 FAIL_TEST("Managed to open 'NOsuChdaTabASe'");
266 } catch (const Xapian::DatabaseOpeningError & e) {
267 // We don't really require this exact message, but in Xapian <= 1.1.0
268 // this gave "Couldn't detect type of database".
269 TEST_STRINGS_EQUAL(e.get_msg(), "Couldn't stat 'NOsuChdaTabASe'");
272 try {
273 Xapian::Database::check("NOsuChdaTabASe");
274 FAIL_TEST("Managed to check 'NOsuChdaTabASe'");
275 } catch (const Xapian::DatabaseOpeningError & e) {
276 // In 1.4.3 and earlier, this threw DatabaseError with the message:
277 // "File is not a Xapian database or database table" (confusing as
278 // there is no file).
279 TEST_STRINGS_EQUAL(e.get_msg(),
280 "Couldn't find Xapian database or table to check");
284 // Feature tests for value manipulations.
285 DEFINE_TESTCASE(addvalue1, !backend) {
286 // Regression test for add_value on an existing value (bug#82).
287 Xapian::Document doc;
288 doc.add_value(1, "original");
289 doc.add_value(1, "replacement");
290 TEST_EQUAL(doc.get_value(1), "replacement");
292 doc.add_value(2, "too");
293 doc.add_value(3, "free");
294 doc.add_value(4, "for");
296 doc.remove_value(2);
297 doc.remove_value(4);
298 TEST_EQUAL(doc.get_value(0), "");
299 TEST_EQUAL(doc.get_value(1), "replacement");
300 TEST_EQUAL(doc.get_value(2), "");
301 TEST_EQUAL(doc.get_value(3), "free");
302 TEST_EQUAL(doc.get_value(4), "");
305 // tests that the collapsing on termpos optimisation gives correct query length
306 DEFINE_TESTCASE(poscollapse2, !backend) {
307 Xapian::Query q(Xapian::Query::OP_OR, Xapian::Query("this", 1, 1), Xapian::Query("this", 1, 1));
308 TEST_EQUAL(q.get_length(), 2);
311 // Regression test: query on an uninitialised database segfaulted with 1.0.0.
312 // As of 1.5.0, this is just handled as an empty database.
313 DEFINE_TESTCASE(uninitdb1, !backend) {
314 Xapian::Database db;
315 Xapian::Enquire enq(db);
318 // Test a scaleweight query applied to a match nothing query
319 DEFINE_TESTCASE(scaleweight3, !backend) {
320 Xapian::Query matchnothing(Xapian::Query::MatchNothing);
321 Xapian::Query query(Xapian::Query::OP_SCALE_WEIGHT, matchnothing, 3.0);
322 TEST_EQUAL(query.get_description(), "Query()");
325 // Regression test - before 1.1.0, you could add docid 0 to an RSet.
326 DEFINE_TESTCASE(rset3, !backend) {
327 Xapian::RSet rset;
328 TEST_EXCEPTION(Xapian::InvalidArgumentError, rset.add_document(0));
329 TEST(rset.empty());
330 TEST_EQUAL(rset.size(), 0);
331 rset.add_document(1);
332 rset.add_document(static_cast<Xapian::docid>(-1));
333 TEST_EXCEPTION(Xapian::InvalidArgumentError, rset.add_document(0));
334 TEST(!rset.empty());
335 TEST_EQUAL(rset.size(), 2);
338 // Regression test - RSet::get_description() gave a malformed answer in 1.0.7.
339 DEFINE_TESTCASE(rset4, !backend) {
340 Xapian::RSet rset;
341 TEST_STRINGS_EQUAL(rset.get_description(), "RSet()");
342 rset.add_document(2);
343 // In 1.0.7 this gave: RSet(RSet(RSet::Internal(, 2))
344 TEST_STRINGS_EQUAL(rset.get_description(), "RSet(2)");
345 rset.add_document(1);
346 TEST_STRINGS_EQUAL(rset.get_description(), "RSet(1,2)");
349 // Direct test of ValueSetMatchDecider
350 DEFINE_TESTCASE(valuesetmatchdecider1, !backend) {
351 Xapian::ValueSetMatchDecider vsmd1(0, true);
352 vsmd1.add_value("42");
353 Xapian::ValueSetMatchDecider vsmd2(0, false);
354 vsmd2.remove_value("nosuch"); // Test removing a value which isn't present.
355 vsmd2.add_value("42");
356 Xapian::ValueSetMatchDecider vsmd3(0, true);
357 vsmd3.add_value("42");
358 vsmd3.add_value("blah");
360 Xapian::Document doc;
361 TEST(!vsmd1(doc));
362 TEST(vsmd2(doc));
363 TEST(!vsmd3(doc));
364 doc.add_value(0, "42");
365 TEST(vsmd1(doc));
366 TEST(!vsmd2(doc));
367 TEST(vsmd3(doc));
368 doc.add_value(0, "blah");
369 TEST(!vsmd1(doc));
370 TEST(vsmd2(doc));
371 TEST(vsmd3(doc));
373 vsmd3.remove_value("nosuch"); // Test removing a value which isn't present.
374 vsmd3.remove_value("blah");
375 TEST(!vsmd1(doc));
376 TEST(vsmd2(doc));
377 TEST(!vsmd3(doc));
378 doc.add_value(0, "42");
379 TEST(vsmd1(doc));
380 TEST(!vsmd2(doc));
381 TEST(vsmd3(doc));
384 // Test that requesting termfreq or termweight on an empty mset returns 0.
385 // New behaviour as of 1.5.0 - previously both methods threw
386 // Xapian::InvalidOperationError.
387 DEFINE_TESTCASE(emptymset1, !backend) {
388 Xapian::MSet emptymset;
389 TEST_EQUAL(emptymset.get_termfreq("foo"), 0);
390 TEST_EQUAL(emptymset.get_termweight("foo"), 0.0);
393 DEFINE_TESTCASE(expanddeciderfilterprefix1, !backend) {
394 string prefix = "tw";
395 Xapian::ExpandDeciderFilterPrefix decider(prefix);
396 TEST(!decider("one"));
397 TEST(!decider("t"));
398 TEST(!decider(""));
399 TEST(!decider("Two"));
400 TEST(decider("two"));
401 TEST(decider("twitter"));
402 TEST(decider(prefix));