Fix integer type used by ESet
[xapian.git] / xapian-core / tests / api_weight.cc
blobfedd9156c01f1470edd67a627f6c96366dbddb6b
1 /** @file
2 * @brief tests of Xapian::Weight subclasses
3 */
4 /* Copyright (C) 2004,2012,2013,2016,2017,2019 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
6 * Copyright (C) 2016 Vivek Pal
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <config.h>
25 #include "api_weight.h"
26 #include <cmath>
28 #include <xapian.h>
30 #include "apitest.h"
31 #include "testutils.h"
33 using namespace std;
35 // Test exception for junk after serialised weight.
36 DEFINE_TESTCASE(tradweight3, !backend) {
37 Xapian::TradWeight wt(42);
38 try {
39 Xapian::TradWeight t;
40 Xapian::TradWeight * t2 = t.unserialise(wt.serialise() + "X");
41 // Make sure we actually use the weight.
42 bool empty = t2->name().empty();
43 delete t2;
44 if (empty)
45 FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
46 FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
47 } catch (const Xapian::SerialisationError &e) {
48 // Regression test for error in exception message fixed in 1.2.11 and
49 // 1.3.1.
50 TEST(e.get_msg().find("BM25") == string::npos);
51 TEST(e.get_msg().find("Trad") != string::npos);
55 // Test Exception for junk after serialised weight.
56 DEFINE_TESTCASE(unigramlmweight3, !backend) {
57 Xapian::LMWeight wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0.5, 1.0);
58 try {
59 Xapian::LMWeight t;
60 Xapian::LMWeight * t2 = t.unserialise(wt.serialise() + "X");
61 // Make sure we actually use the weight.
62 bool empty = t2->name().empty();
63 delete t2;
64 if (empty)
65 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
66 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
67 } catch (const Xapian::SerialisationError &e) {
68 TEST(e.get_msg().find("LM") != string::npos);
72 // Test exception for junk after serialised weight.
73 DEFINE_TESTCASE(bm25weight3, !backend) {
74 Xapian::BM25Weight wt(2.0, 0.5, 1.3, 0.6, 0.01);
75 try {
76 Xapian::BM25Weight b;
77 Xapian::BM25Weight * b2 = b.unserialise(wt.serialise() + "X");
78 // Make sure we actually use the weight.
79 bool empty = b2->name().empty();
80 delete b2;
81 if (empty)
82 FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
83 FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
84 } catch (const Xapian::SerialisationError &e) {
85 TEST(e.get_msg().find("BM25") != string::npos);
89 // Test parameter combinations which should be unaffected by doclength.
90 DEFINE_TESTCASE(bm25weight4, backend) {
91 Xapian::Database db = get_database("apitest_simpledata");
92 Xapian::Enquire enquire(db);
93 enquire.set_query(Xapian::Query("paragraph"));
94 Xapian::MSet mset;
96 enquire.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
97 mset = enquire.get_mset(0, 10);
98 TEST_EQUAL(mset.size(), 5);
99 // Expect: wdf has an effect on weight, but doclen doesn't.
100 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
101 TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
102 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
103 TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
105 enquire.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
106 mset = enquire.get_mset(0, 10);
107 TEST_EQUAL(mset.size(), 5);
108 // Expect: neither wdf nor doclen affects weight.
109 TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
112 /// Test non-zero k2 with zero k1.
113 // Regression test for bug fixed in 1.2.17 and 1.3.2.
114 DEFINE_TESTCASE(bm25weight5, backend) {
115 Xapian::Database db = get_database("apitest_simpledata");
116 Xapian::Enquire enquire(db);
117 enquire.set_query(Xapian::Query("paragraph"));
118 Xapian::MSet mset;
120 enquire.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
121 mset = enquire.get_mset(0, 10);
122 TEST_EQUAL(mset.size(), 5);
123 // Expect: wdf has no effect on weight; shorter docs rank higher.
124 mset_expect_order(mset, 3, 5, 1, 4, 2);
125 TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[1].get_weight());
126 TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
127 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
128 TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
131 // Test exception for junk after serialised weight.
132 DEFINE_TESTCASE(bm25plusweight1, !backend) {
133 Xapian::BM25PlusWeight wt(2.0, 0.1, 1.3, 0.6, 0.01, 0.5);
134 try {
135 Xapian::BM25PlusWeight b;
136 Xapian::BM25PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
137 // Make sure we actually use the weight.
138 bool empty = b2->name().empty();
139 delete b2;
140 if (empty)
141 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised to empty name!");
142 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised OK");
143 } catch (const Xapian::SerialisationError &e) {
144 TEST(e.get_msg().find("BM25Plus") != string::npos);
148 // Test parameter combinations which should be unaffected by doclength.
149 DEFINE_TESTCASE(bm25plusweight2, backend) {
150 Xapian::Database db = get_database("apitest_simpledata");
151 Xapian::Enquire enquire(db);
152 enquire.set_query(Xapian::Query("paragraph"));
153 Xapian::MSet mset;
155 enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
156 mset = enquire.get_mset(0, 10);
157 TEST_EQUAL(mset.size(), 5);
158 // Expect: wdf has an effect on weight, but doclen doesn't.
159 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
160 TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
161 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
162 TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
164 enquire.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
165 mset = enquire.get_mset(0, 10);
166 TEST_EQUAL(mset.size(), 5);
167 // Expect: neither wdf nor doclen affects weight.
168 TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
171 // Regression test for a mistake corrected in the BM25+ implementation.
172 DEFINE_TESTCASE(bm25plusweight3, backend) {
173 Xapian::Database db = get_database("apitest_simpledata");
174 Xapian::Enquire enquire(db);
175 enquire.set_query(Xapian::Query("paragraph"));
176 Xapian::MSet mset;
178 enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
179 mset = enquire.get_mset(0, 10);
180 TEST_EQUAL(mset.size(), 5);
182 // The value of each doc weight calculated manually from the BM25+ formulae
183 // by using the respective document statistics.
184 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 0.7920796567487473);
185 TEST_EQUAL_DOUBLE(mset[1].get_weight(), 0.7846980783848447);
186 TEST_EQUAL_DOUBLE(mset[2].get_weight(), 0.7558817623365934);
187 TEST_EQUAL_DOUBLE(mset[3].get_weight(), 0.7210119356168847);
188 TEST_EQUAL_DOUBLE(mset[4].get_weight(), 0.7210119356168847);
191 // Test exception for junk after serialised weight.
192 DEFINE_TESTCASE(inl2weight1, !backend) {
193 Xapian::InL2Weight wt(2.0);
194 try {
195 Xapian::InL2Weight b;
196 Xapian::InL2Weight * b2 = b.unserialise(wt.serialise() + "X");
197 // Make sure we actually use the weight.
198 bool empty = b2->name().empty();
199 delete b2;
200 if (empty)
201 FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
202 FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
203 } catch (const Xapian::SerialisationError &e) {
204 TEST(e.get_msg().find("InL2") != string::npos);
208 // Test for invalid values of c.
209 DEFINE_TESTCASE(inl2weight2, !backend) {
210 // InvalidArgumentError should be thrown if the parameter c is invalid.
211 TEST_EXCEPTION(Xapian::InvalidArgumentError,
212 Xapian::InL2Weight wt(-2.0));
214 TEST_EXCEPTION(Xapian::InvalidArgumentError,
215 Xapian::InL2Weight wt2(0.0));
217 /* Parameter c should be set to 1.0 by constructor if none is given. */
218 Xapian::InL2Weight weight2;
219 TEST_EQUAL(weight2.serialise(), Xapian::InL2Weight(1.0).serialise());
222 // Feature tests for Inl2Weight
223 DEFINE_TESTCASE(inl2weight3, backend) {
224 Xapian::Database db = get_database("apitest_simpledata");
225 Xapian::Enquire enquire(db);
226 Xapian::Query query("banana");
228 enquire.set_query(query);
229 enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
231 Xapian::MSet mset1;
232 mset1 = enquire.get_mset(0, 10);
233 TEST_EQUAL(mset1.size(), 1);
234 mset_expect_order(mset1, 6);
236 /* The value has been calculated in the python interpreter by looking at the
237 * database statistics. */
238 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.559711143842063);
240 // Test with OP_SCALE_WEIGHT.
241 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
242 enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
244 Xapian::MSet mset2;
245 mset2 = enquire.get_mset(0, 10);
246 TEST_EQUAL(mset2.size(), 1);
247 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
248 TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
251 // Test exception for junk after serialised weight.
252 DEFINE_TESTCASE(ifb2weight1, !backend) {
253 Xapian::IfB2Weight wt(2.0);
254 try {
255 Xapian::IfB2Weight b;
256 Xapian::IfB2Weight * b2 = b.unserialise(wt.serialise() + "X");
257 // Make sure we actually use the weight.
258 bool empty = b2->name().empty();
259 delete b2;
260 if (empty)
261 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
262 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
263 } catch (const Xapian::SerialisationError &e) {
264 TEST(e.get_msg().find("IfB2") != string::npos);
268 // Test for invalid values of c.
269 DEFINE_TESTCASE(ifb2weight2, !backend) {
270 // InvalidArgumentError should be thrown if the parameter c is invalid.
271 TEST_EXCEPTION(Xapian::InvalidArgumentError,
272 Xapian::IfB2Weight wt(-2.0));
274 TEST_EXCEPTION(Xapian::InvalidArgumentError,
275 Xapian::IfB2Weight wt2(0.0));
277 /* Parameter c should be set to 1.0 by constructor if none is given. */
278 Xapian::IfB2Weight weight2;
279 TEST_EQUAL(weight2.serialise(), Xapian::IfB2Weight(1.0).serialise());
282 // Feature test
283 DEFINE_TESTCASE(ifb2weight3, backend) {
284 Xapian::Database db = get_database("apitest_simpledata");
285 Xapian::Enquire enquire(db);
286 Xapian::Query query("banana");
288 enquire.set_query(query);
289 enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
291 Xapian::MSet mset1;
292 mset1 = enquire.get_mset(0, 10);
293 TEST_EQUAL(mset1.size(), 1);
295 /* The value of the weight has been manually calculated using the statistics
296 * of the test database. */
297 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.119422287684126);
299 // Test with OP_SCALE_WEIGHT.
300 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
301 enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
303 Xapian::MSet mset2;
304 mset2 = enquire.get_mset(0, 10);
305 TEST_EQUAL(mset2.size(), 1);
306 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
307 TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
310 // Test exception for junk after serialised weight.
311 DEFINE_TESTCASE(ineb2weight1, !backend) {
312 Xapian::IneB2Weight wt(2.0);
313 try {
314 Xapian::IneB2Weight b;
315 Xapian::IneB2Weight * b2 = b.unserialise(wt.serialise() + "X");
316 // Make sure we actually use the weight.
317 bool empty = b2->name().empty();
318 delete b2;
319 if (empty)
320 FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
321 FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
322 } catch (const Xapian::SerialisationError &e) {
323 TEST(e.get_msg().find("IneB2") != string::npos);
327 // Test for invalid values of c.
328 DEFINE_TESTCASE(ineb2weight2, !backend) {
329 // InvalidArgumentError should be thrown if parameter c is invalid.
330 TEST_EXCEPTION(Xapian::InvalidArgumentError,
331 Xapian::IneB2Weight wt(-2.0));
333 TEST_EXCEPTION(Xapian::InvalidArgumentError,
334 Xapian::IneB2Weight wt2(0.0));
336 /* Parameter c should be set to 1.0 by constructor if none is given. */
337 Xapian::IneB2Weight weight2;
338 TEST_EQUAL(weight2.serialise(), Xapian::IneB2Weight(1.0).serialise());
341 // Feature test.
342 DEFINE_TESTCASE(ineb2weight3, backend) {
343 Xapian::Database db = get_database("apitest_simpledata");
344 Xapian::Enquire enquire(db);
345 Xapian::Query query("paragraph");
346 enquire.set_query(query);
347 enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
349 Xapian::MSet mset1;
350 mset1 = enquire.get_mset(0, 10);
351 TEST_EQUAL(mset1.size(), 5);
353 // The third document in the database is 4th in the ranking.
354 /* The weight value has been manually calculated by using the statistics
355 * of the test database. */
356 TEST_EQUAL_DOUBLE(mset1[4].get_weight(), 0.61709730297692400036);
358 // Test with OP_SCALE_WEIGHT.
359 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
360 enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
362 Xapian::MSet mset2;
363 mset2 = enquire.get_mset(0, 10);
364 TEST_EQUAL(mset2.size(), 5);
366 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
367 for (int i = 0; i < 5; ++i) {
368 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
372 // Test exception for junk after serialised weight.
373 DEFINE_TESTCASE(bb2weight1, !backend) {
374 Xapian::BB2Weight wt(2.0);
375 try {
376 Xapian::BB2Weight b;
377 Xapian::BB2Weight * b2 = b.unserialise(wt.serialise() + "X");
378 // Make sure we actually use the weight.
379 bool empty = b2->name().empty();
380 delete b2;
381 if (empty)
382 FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
383 FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
384 } catch (const Xapian::SerialisationError &e) {
385 TEST(e.get_msg().find("BB2") != string::npos);
389 // Test for invalid values of c.
390 DEFINE_TESTCASE(bb2weight2, !backend) {
391 // InvalidArgumentError should be thrown if the parameter c is invalid.
392 TEST_EXCEPTION(Xapian::InvalidArgumentError,
393 Xapian::BB2Weight wt(-2.0));
395 TEST_EXCEPTION(Xapian::InvalidArgumentError,
396 Xapian::BB2Weight wt2(0.0));
398 /* Parameter c should be set to 1.0 by constructor if none is given. */
399 Xapian::BB2Weight weight2;
400 TEST_EQUAL(weight2.serialise(), Xapian::BB2Weight(1.0).serialise());
403 // Feature test
404 DEFINE_TESTCASE(bb2weight3, backend) {
405 Xapian::Database db = get_database("apitest_simpledata");
406 Xapian::Enquire enquire(db);
407 Xapian::Query query("paragraph");
409 enquire.set_query(query);
410 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
412 Xapian::MSet mset1;
413 mset1 = enquire.get_mset(0, 10);
414 TEST_EQUAL(mset1.size(), 5);
415 /* The third document in the database has the highest weight and is the
416 * first in the mset. */
417 // Value calculated manually by using the statistics of the test database.
418 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.6823696969784483);
420 // Test with OP_SCALE_WEIGHT.
421 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
422 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
424 Xapian::MSet mset2;
425 mset2 = enquire.get_mset(0, 10);
426 TEST_EQUAL(mset2.size(), 5);
428 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
429 for (int i = 0; i < 5; ++i) {
430 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
433 // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
434 // were applying the factor to the upper bound twice).
435 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 1.0 / 1024));
436 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
438 Xapian::MSet mset3;
439 mset3 = enquire.get_mset(0, 10);
440 TEST_EQUAL(mset3.size(), 5);
442 for (int i = 0; i < 5; ++i) {
443 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight() * 1024);
447 // Regression test: we used to calculate log2(0) when there was only one doc.
448 DEFINE_TESTCASE(bb2weight4, backend) {
449 Xapian::Database db = get_database("apitest_onedoc");
450 Xapian::Enquire enquire(db);
451 Xapian::Query query("word");
453 enquire.set_query(query);
454 enquire.set_weighting_scheme(Xapian::BB2Weight());
456 Xapian::MSet mset1;
457 mset1 = enquire.get_mset(0, 10);
458 TEST_EQUAL(mset1.size(), 1);
459 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.431020621347435);
462 // Feature test.
463 DEFINE_TESTCASE(dlhweight1, backend) {
464 Xapian::Database db = get_database("apitest_simpledata");
465 Xapian::Enquire enquire(db);
466 Xapian::Query query("a");
468 enquire.set_query(query);
469 enquire.set_weighting_scheme(Xapian::DLHWeight());
471 Xapian::MSet mset1;
472 mset1 = enquire.get_mset(0, 10);
473 TEST_EQUAL(mset1.size(), 3);
474 mset_expect_order(mset1, 3, 1, 2);
475 // Weights calculated manually using stats from the database.
476 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.0046477754371292362);
477 TEST_EQUAL_DOUBLE(mset1[1].get_weight(), 0.97621929514640352757);
478 // The following weight would be negative but gets clamped to 0.
479 TEST_EQUAL_DOUBLE(mset1[2].get_weight(), 0.0);
481 // Test with OP_SCALE_WEIGHT.
482 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
483 enquire.set_weighting_scheme(Xapian::DLHWeight());
485 Xapian::MSet mset2;
486 mset2 = enquire.get_mset(0, 10);
487 TEST_EQUAL(mset2.size(), 3);
489 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
490 for (Xapian::doccount i = 0; i < mset2.size(); ++i) {
491 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
495 // Test exception for junk after serialised weight.
496 DEFINE_TESTCASE(dlhweight2, !backend) {
497 Xapian::DLHWeight wt;
498 try {
499 Xapian::DLHWeight t;
500 Xapian::DLHWeight * t2 = t.unserialise(wt.serialise() + "X");
501 // Make sure we actually use the weight.
502 bool empty = t2->name().empty();
503 delete t2;
504 if (empty)
505 FAIL_TEST("Serialised DLHWeight with junk appended unserialised to empty name!");
506 FAIL_TEST("Serialised DLHWeight with junk appended unserialised OK");
507 } catch (const Xapian::SerialisationError &e) {
508 TEST(e.get_msg().find("DLH") != string::npos);
512 static void
513 gen_wdf_eq_doclen_db(Xapian::WritableDatabase& db, const string&)
515 Xapian::Document doc;
516 doc.add_term("solo", 37);
517 db.add_document(doc);
520 // Test wdf == doclen.
521 DEFINE_TESTCASE(dlhweight3, generated) {
522 Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
523 Xapian::Enquire enquire(db);
524 Xapian::Query query("solo");
526 enquire.set_query(query);
527 enquire.set_weighting_scheme(Xapian::DLHWeight());
529 Xapian::MSet mset1;
530 mset1 = enquire.get_mset(0, 10);
531 TEST_EQUAL(mset1.size(), 1);
532 // Weight gets clamped to zero.
533 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
536 // Test exception for junk after serialised weight.
537 DEFINE_TESTCASE(pl2weight1, !backend) {
538 Xapian::PL2Weight wt(2.0);
539 try {
540 Xapian::PL2Weight b;
541 Xapian::PL2Weight * b2 = b.unserialise(wt.serialise() + "X");
542 // Make sure we actually use the weight.
543 bool empty = b2->name().empty();
544 delete b2;
545 if (empty)
546 FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
547 FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
548 } catch (const Xapian::SerialisationError &e) {
549 TEST(e.get_msg().find("PL2") != string::npos);
553 // Test for invalid values of c.
554 DEFINE_TESTCASE(pl2weight2, !backend) {
555 // InvalidArgumentError should be thrown if parameter c is invalid.
556 TEST_EXCEPTION(Xapian::InvalidArgumentError,
557 Xapian::PL2Weight wt(-2.0));
559 /* Parameter c should be set to 1.0 by constructor if none is given. */
560 Xapian::PL2Weight weight2;
561 TEST_EQUAL(weight2.serialise(), Xapian::PL2Weight(1.0).serialise());
564 // Feature Test.
565 DEFINE_TESTCASE(pl2weight3, backend) {
566 Xapian::Database db = get_database("apitest_simpledata");
567 Xapian::Enquire enquire(db);
568 Xapian::Query query("paragraph");
569 enquire.set_query(query);
570 Xapian::MSet mset;
572 enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
573 mset = enquire.get_mset(0, 10);
574 TEST_EQUAL(mset.size(), 5);
575 // Expected weight difference calculated in extended precision using stats
576 // from the test database.
577 TEST_EQUAL_DOUBLE(mset[2].get_weight(),
578 mset[3].get_weight() + 0.0086861771701328694);
580 // Test with OP_SCALE_WEIGHT.
581 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
582 enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
584 Xapian::MSet mset2;
585 mset2 = enquire.get_mset(0, 10);
586 TEST_EQUAL(mset2.size(), 5);
587 TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
588 for (int i = 0; i < 5; ++i) {
589 TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
593 // Test exception for junk after serialised weight.
594 DEFINE_TESTCASE(pl2plusweight1, !backend) {
595 Xapian::PL2PlusWeight wt(2.0, 0.9);
596 try {
597 Xapian::PL2PlusWeight b;
598 Xapian::PL2PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
599 // Make sure we actually use the weight.
600 bool empty = b2->name().empty();
601 delete b2;
602 if (empty)
603 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised to empty name!");
604 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised OK");
605 } catch (const Xapian::SerialisationError &e) {
606 TEST(e.get_msg().find("PL2Plus") != string::npos);
610 // Test for invalid values of parameters, c and delta.
611 DEFINE_TESTCASE(pl2plusweight2, !backend) {
612 // InvalidArgumentError should be thrown if parameter c is invalid.
613 TEST_EXCEPTION(Xapian::InvalidArgumentError,
614 Xapian::PL2PlusWeight wt(-2.0, 0.9));
616 // InvalidArgumentError should be thrown if parameter delta is invalid.
617 TEST_EXCEPTION(Xapian::InvalidArgumentError,
618 Xapian::PL2PlusWeight wt(1.0, -1.9));
621 // Test for default values of parameters, c and delta.
622 DEFINE_TESTCASE(pl2plusweight3, !backend) {
623 Xapian::PL2PlusWeight weight2;
625 /* Parameter c should be set to 1.0 by constructor if none is given. */
626 TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
628 /* Parameter delta should be set to 0.8 by constructor if none is given. */
629 TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
632 // Feature Test 1 for PL2PlusWeight.
633 DEFINE_TESTCASE(pl2plusweight4, backend) {
634 Xapian::Database db = get_database("apitest_simpledata");
635 Xapian::Enquire enquire(db);
636 enquire.set_query(Xapian::Query("paragraph"));
637 Xapian::MSet mset;
639 enquire.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
640 mset = enquire.get_mset(0, 10);
641 TEST_EQUAL(mset.size(), 5);
642 // Expected weight difference calculated in extended precision using stats
643 // from the test database.
644 TEST_EQUAL_DOUBLE(mset[2].get_weight(),
645 mset[3].get_weight() + 0.0086861771701328694);
648 // Feature Test 2 for PL2PlusWeight
649 DEFINE_TESTCASE(pl2plusweight5, backend) {
650 Xapian::Database db = get_database("apitest_simpledata");
651 Xapian::Enquire enquire(db);
652 Xapian::Query query("word");
653 enquire.set_query(query);
654 Xapian::MSet mset;
656 enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
657 mset = enquire.get_mset(0, 10);
658 // Expect MSet contains two documents having query "word".
659 TEST_EQUAL(mset.size(), 2);
660 // Expect Document 2 has higher weight than document 4 because
661 // "word" appears more no. of times in document 2 than document 4.
662 mset_expect_order(mset, 2, 4);
664 // Test with OP_SCALE_WEIGHT.
665 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
666 enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
668 Xapian::MSet mset2;
669 mset2 = enquire.get_mset(0, 10);
670 TEST_EQUAL(mset2.size(), mset.size());
671 TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
672 for (Xapian::doccount i = 0; i < mset.size(); ++i) {
673 TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
677 // Feature test
678 DEFINE_TESTCASE(dphweight1, backend) {
679 Xapian::Database db = get_database("apitest_simpledata");
680 Xapian::Enquire enquire(db);
681 Xapian::Query query("paragraph");
683 enquire.set_query(query);
684 enquire.set_weighting_scheme(Xapian::DPHWeight());
686 Xapian::MSet mset1;
687 mset1 = enquire.get_mset(0, 10);
688 TEST_EQUAL(mset1.size(), 5);
689 /* The weight has been calculated manually by using the statistics of the
690 * test database. */
691 TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
693 // Test with OP_SCALE_WEIGHT.
694 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
695 enquire.set_weighting_scheme(Xapian::DPHWeight());
697 Xapian::MSet mset2;
698 mset2 = enquire.get_mset(0, 10);
699 TEST_EQUAL(mset2.size(), 5);
700 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
701 for (int i = 0; i < 5; ++i) {
702 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
706 // Test exception for junk after serialised weight.
707 DEFINE_TESTCASE(dphweight2, !backend) {
708 Xapian::DPHWeight wt;
709 try {
710 Xapian::DPHWeight t;
711 Xapian::DPHWeight * t2 = t.unserialise(wt.serialise() + "X");
712 // Make sure we actually use the weight.
713 bool empty = t2->name().empty();
714 delete t2;
715 if (empty)
716 FAIL_TEST("Serialised DPHWeight with junk appended unserialised to empty name!");
717 FAIL_TEST("Serialised DPHWeight with junk appended unserialised OK");
718 } catch (const Xapian::SerialisationError &e) {
719 TEST(e.get_msg().find("DPH") != string::npos);
723 // Test wdf == doclen.
724 DEFINE_TESTCASE(dphweight3, generated) {
725 Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
726 Xapian::Enquire enquire(db);
727 Xapian::Query query("solo");
729 enquire.set_query(query);
730 enquire.set_weighting_scheme(Xapian::DPHWeight());
732 Xapian::MSet mset1;
733 mset1 = enquire.get_mset(0, 10);
734 TEST_EQUAL(mset1.size(), 1);
735 // Weight gets clamped to zero.
736 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
739 // Test for various cases of normalization string.
740 DEFINE_TESTCASE(tfidfweight1, !backend) {
741 // InvalidArgumentError should be thrown if normalization string is invalid
742 TEST_EXCEPTION(Xapian::InvalidArgumentError,
743 Xapian::TfIdfWeight b("JOHN_LENNON"));
745 TEST_EXCEPTION(Xapian::InvalidArgumentError,
746 Xapian::TfIdfWeight b("LOL"));
748 /* Normalization string should be set to "ntn" by constructor if none is
749 given. */
750 Xapian::TfIdfWeight weight2;
751 TEST_EQUAL(weight2.serialise(), Xapian::TfIdfWeight("ntn").serialise());
754 // Test exception for junk after serialised weight.
755 DEFINE_TESTCASE(tfidfweight2, !backend) {
756 Xapian::TfIdfWeight wt("ntn");
757 try {
758 Xapian::TfIdfWeight b;
759 Xapian::TfIdfWeight * b2 = b.unserialise(wt.serialise() + "X");
760 // Make sure we actually use the weight.
761 bool empty = b2->name().empty();
762 delete b2;
763 if (empty)
764 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
765 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
766 } catch (const Xapian::SerialisationError &e) {
767 TEST(e.get_msg().find("TfIdf") != string::npos);
771 // Feature tests for various normalization functions.
772 DEFINE_TESTCASE(tfidfweight3, backend) {
773 Xapian::Database db = get_database("apitest_simpledata");
774 Xapian::Enquire enquire(db);
775 Xapian::Query query("word");
776 Xapian::MSet mset;
778 // Check for "ntn" when termfreq != N
779 enquire.set_query(query);
780 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
781 mset = enquire.get_mset(0, 10);
782 TEST_EQUAL(mset.size(), 2);
783 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
784 mset_expect_order(mset, 2, 4);
785 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * log(6.0 / 2));
787 // Check that wqf is taken into account.
788 enquire.set_query(Xapian::Query("word", 2));
789 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
790 Xapian::MSet mset2 = enquire.get_mset(0, 10);
791 TEST_EQUAL(mset2.size(), 2);
792 // wqf is 2, so weights should be doubled.
793 TEST_EQUAL_DOUBLE(mset[0].get_weight() * 2, mset2[0].get_weight());
794 TEST_EQUAL_DOUBLE(mset[1].get_weight() * 2, mset2[1].get_weight());
796 // Test with OP_SCALE_WEIGHT.
797 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
798 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
799 mset2 = enquire.get_mset(0, 10);
800 TEST_EQUAL(mset2.size(), 2);
801 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
802 mset_expect_order(mset2, 2, 4);
803 TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
804 TEST_EQUAL_DOUBLE(15 * mset[0].get_weight(), mset2[0].get_weight());
806 // check for "nfn" when termfreq != N
807 enquire.set_query(query);
808 enquire.set_weighting_scheme(Xapian::TfIdfWeight("nfn"));
809 mset = enquire.get_mset(0, 10);
810 TEST_EQUAL(mset.size(), 2);
811 mset_expect_order(mset, 2, 4);
812 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 / 2);
814 // check for "nsn" when termfreq != N
815 enquire.set_query(query);
816 enquire.set_weighting_scheme(Xapian::TfIdfWeight("nsn"));
817 mset = enquire.get_mset(0, 10);
818 TEST_EQUAL(mset.size(), 2);
819 mset_expect_order(mset, 2, 4);
820 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));
822 // Check for "bnn" and for both branches of 'b'.
823 enquire.set_query(Xapian::Query("test"));
824 enquire.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
825 mset = enquire.get_mset(0, 10);
826 TEST_EQUAL(mset.size(), 1);
827 mset_expect_order(mset, 1);
828 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1.0);
830 // Check for "lnn" and for both branches of 'l'.
831 enquire.set_query(Xapian::Query("word"));
832 enquire.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
833 mset = enquire.get_mset(0, 10);
834 TEST_EQUAL(mset.size(), 2);
835 mset_expect_order(mset, 2, 4);
836 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
837 TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
839 // Check for "snn"
840 enquire.set_query(Xapian::Query("paragraph"));
841 enquire.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
842 mset = enquire.get_mset(0, 10);
843 TEST_EQUAL(mset.size(), 5);
844 mset_expect_order(mset, 2, 1, 4, 3, 5);
845 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 9.0);
846 TEST_EQUAL_DOUBLE(mset[4].get_weight(), 1.0);
848 // Check for "ntn" when termfreq=N
849 enquire.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "t"
850 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
851 mset = enquire.get_mset(0, 10);
852 TEST_EQUAL(mset.size(), 6);
853 mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
854 for (int i = 0; i < 6; ++i) {
855 TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
858 // Check for "npn" and for both branches of 'p'
859 enquire.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
860 enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
861 mset = enquire.get_mset(0, 10);
862 TEST_EQUAL(mset.size(), 6);
863 mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
864 for (int i = 0; i < 6; ++i) {
865 TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
868 // Check for "Lnn".
869 enquire.set_query(Xapian::Query("word"));
870 enquire.set_weighting_scheme(Xapian::TfIdfWeight("Lnn"));
871 mset = enquire.get_mset(0, 10);
872 TEST_EQUAL(mset.size(), 2);
873 mset_expect_order(mset, 2, 4);
874 TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
875 TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
877 enquire.set_query(Xapian::Query("word"));
878 enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
879 mset = enquire.get_mset(0, 10);
880 TEST_EQUAL(mset.size(), 2);
881 mset_expect_order(mset, 2, 4);
882 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8 * log((6.0 - 2) / 2));
883 TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1 * log((6.0 - 2) / 2));
886 class CheckInitWeight : public Xapian::Weight {
887 public:
888 double factor;
890 unsigned & zero_inits, & non_zero_inits;
892 CheckInitWeight(unsigned &z, unsigned &n)
893 : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
895 void init(double factor_) {
896 factor = factor_;
897 if (factor == 0.0)
898 ++zero_inits;
899 else
900 ++non_zero_inits;
903 Weight * clone() const {
904 return new CheckInitWeight(zero_inits, non_zero_inits);
907 double get_sumpart(Xapian::termcount, Xapian::termcount,
908 Xapian::termcount) const {
909 return 1.0;
912 double get_maxpart() const { return 1.0; }
914 double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
915 return 1.0 / doclen;
918 double get_maxextra() const { return 1.0; }
921 /// Regression test - check init() is called for the term-indep Weight obj.
922 DEFINE_TESTCASE(checkinitweight1, backend && !multi && !remote) {
923 Xapian::Database db = get_database("apitest_simpledata");
924 Xapian::Enquire enquire(db);
925 Xapian::Query q(Xapian::Query::OP_AND,
926 Xapian::Query("this"), Xapian::Query("paragraph"));
927 enquire.set_query(q);
928 unsigned zero_inits = 0, non_zero_inits = 0;
929 CheckInitWeight wt(zero_inits, non_zero_inits);
930 enquire.set_weighting_scheme(wt);
931 Xapian::MSet mset = enquire.get_mset(0, 3);
932 TEST_EQUAL(zero_inits, 1);
933 TEST_EQUAL(non_zero_inits, 2);
936 class CheckStatsWeight : public Xapian::Weight {
937 public:
938 double factor;
940 Xapian::Database db;
942 string term1;
944 // When testing OP_SYNONYM, term2 is also set.
945 // When testing OP_WILDCARD, term2 == "*".
946 // When testing a repeated term, term2 == "=" for the first occurrence and
947 // "_" for subsequent occurrences.
948 mutable string term2;
950 Xapian::termcount & sum;
951 Xapian::termcount & sum_squares;
953 mutable Xapian::termcount len_upper;
954 mutable Xapian::termcount len_lower;
955 mutable Xapian::termcount wdf_upper;
957 CheckStatsWeight(const Xapian::Database & db_,
958 const string & term1_,
959 const string & term2_,
960 Xapian::termcount & sum_,
961 Xapian::termcount & sum_squares_)
962 : factor(-1.0), db(db_), term1(term1_), term2(term2_),
963 sum(sum_), sum_squares(sum_squares_),
964 len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
966 need_stat(COLLECTION_SIZE);
967 need_stat(RSET_SIZE);
968 need_stat(AVERAGE_LENGTH);
969 need_stat(TERMFREQ);
970 need_stat(RELTERMFREQ);
971 need_stat(QUERY_LENGTH);
972 need_stat(WQF);
973 need_stat(WDF);
974 need_stat(DOC_LENGTH);
975 need_stat(DOC_LENGTH_MIN);
976 need_stat(DOC_LENGTH_MAX);
977 need_stat(WDF_MAX);
978 need_stat(COLLECTION_FREQ);
979 need_stat(UNIQUE_TERMS);
980 need_stat(TOTAL_LENGTH);
983 CheckStatsWeight(const Xapian::Database & db_,
984 const string & term_,
985 Xapian::termcount & sum_,
986 Xapian::termcount & sum_squares_)
987 : CheckStatsWeight(db_, term_, string(), sum_, sum_squares_) { }
989 void init(double factor_) {
990 factor = factor_;
993 Weight * clone() const {
994 auto res = new CheckStatsWeight(db, term1, term2, sum, sum_squares);
995 if (term2 == "=") {
996 // The object passed to Enquire::set_weighting_scheme() is cloned
997 // right away, and then cloned again for each term, and then
998 // potentially once more for the term-independent weight
999 // contribution. In the repeated case, we want to handle the first
1000 // actual term specially, so we arrange for that to have "=" for
1001 // term2, and subsequent clones to have "_", so that we accumulate
1002 // sum and sum_squares on the first occurrence only.
1003 term2 = "_";
1005 return res;
1008 double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen,
1009 Xapian::termcount uniqueterms) const {
1010 Xapian::doccount num_docs = db.get_doccount();
1011 TEST_EQUAL(get_collection_size(), num_docs);
1012 TEST_EQUAL(get_rset_size(), 0);
1013 TEST_EQUAL(get_average_length(), db.get_avlength());
1014 Xapian::totallength totlen = get_total_length();
1015 TEST_EQUAL(totlen, db.get_total_length());
1016 double total_term_occurences = get_average_length() * num_docs;
1017 TEST_EQUAL(Xapian::totallength(total_term_occurences + 0.5), totlen);
1018 if (term2.empty() || term2 == "=" || term2 == "_") {
1019 TEST_EQUAL(get_termfreq(), db.get_termfreq(term1));
1020 TEST_EQUAL(get_collection_freq(), db.get_collection_freq(term1));
1021 if (term2.empty()) {
1022 TEST_EQUAL(get_query_length(), 1);
1023 } else {
1024 TEST_EQUAL(get_query_length(), 2);
1026 } else {
1027 Xapian::doccount tfmax = 0, tfsum = 0;
1028 Xapian::termcount cfmax = 0, cfsum = 0;
1029 if (term2 == "*") {
1030 // OP_WILDCARD case.
1031 for (auto&& t = db.allterms_begin(term1);
1032 t != db.allterms_end(term1); ++t) {
1033 Xapian::doccount tf = t.get_termfreq();
1034 tout << "->" << *t << " " << tf << endl;
1035 tfsum += tf;
1036 tfmax = max(tfmax, tf);
1037 Xapian::termcount cf = db.get_collection_freq(*t);
1038 cfsum += cf;
1039 cfmax = max(cfmax, cf);
1041 TEST_EQUAL(get_query_length(), 1);
1042 } else {
1043 // OP_SYNONYM case.
1044 Xapian::doccount tf1 = db.get_termfreq(term1);
1045 Xapian::doccount tf2 = db.get_termfreq(term2);
1046 tfsum = tf1 + tf2;
1047 tfmax = max(tf1, tf2);
1048 Xapian::termcount cf1 = db.get_collection_freq(term1);
1049 Xapian::termcount cf2 = db.get_collection_freq(term2);
1050 cfsum = cf1 + cf2;
1051 cfmax = max(cf1, cf2);
1052 TEST_EQUAL(get_query_length(), 2);
1054 // Synonym occurs at least as many times as any term.
1055 TEST_REL(get_termfreq(), >=, tfmax);
1056 TEST_REL(get_collection_freq(), >=, cfmax);
1057 // Synonym can't occur more times than the terms do.
1058 TEST_REL(get_termfreq(), <=, tfsum);
1059 TEST_REL(get_collection_freq(), <=, cfsum);
1060 // Synonym can't occur more times than there are documents/terms.
1061 TEST_REL(get_termfreq(), <=, num_docs);
1062 TEST_REL(get_collection_freq(), <=, totlen);
1064 TEST_EQUAL(get_reltermfreq(), 0);
1065 TEST_EQUAL(get_wqf(), 1);
1066 TEST_REL(doclen,>=,len_lower);
1067 TEST_REL(doclen,<=,len_upper);
1068 TEST_REL(uniqueterms,>=,1);
1069 TEST_REL(uniqueterms,<=,doclen);
1070 TEST_REL(wdf,<=,wdf_upper);
1071 if (term2 != "_") {
1072 sum += wdf;
1073 sum_squares += wdf * wdf;
1075 return 1.0;
1078 double get_maxpart() const {
1079 if (len_upper == 0) {
1080 len_lower = get_doclength_lower_bound();
1081 len_upper = get_doclength_upper_bound();
1082 wdf_upper = get_wdf_upper_bound();
1084 return 1.0;
1087 double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
1088 return 1.0 / doclen;
1091 double get_maxextra() const { return 1.0; }
1094 /// Check the weight subclass gets the correct stats.
1095 DEFINE_TESTCASE(checkstatsweight1, backend && !remote) {
1096 Xapian::Database db = get_database("apitest_simpledata");
1097 Xapian::Enquire enquire(db);
1098 Xapian::TermIterator a;
1099 for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1100 const string & term = *a;
1101 enquire.set_query(Xapian::Query(term));
1102 Xapian::termcount sum = 0;
1103 Xapian::termcount sum_squares = 0;
1104 CheckStatsWeight wt(db, term, sum, sum_squares);
1105 enquire.set_weighting_scheme(wt);
1106 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1108 // The document order in the multi-db case isn't the same as the
1109 // postlist order on the combined DB, so it's hard to compare the
1110 // wdf for each document in the Weight objects, but we can sum
1111 // the wdfs and the squares of the wdfs which provides a decent
1112 // check that we're not getting the wrong wdf values (it ensures
1113 // they have the right mean and standard deviation).
1114 Xapian::termcount expected_sum = 0;
1115 Xapian::termcount expected_sum_squares = 0;
1116 Xapian::PostingIterator i;
1117 for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1118 Xapian::termcount wdf = i.get_wdf();
1119 expected_sum += wdf;
1120 expected_sum_squares += wdf * wdf;
1122 TEST_EQUAL(sum, expected_sum);
1123 TEST_EQUAL(sum_squares, expected_sum_squares);
1127 /// Check the weight subclass gets the correct stats with OP_SYNONYM.
1128 // Regression test for bugs fixed in 1.4.1.
1129 DEFINE_TESTCASE(checkstatsweight2, backend && !remote) {
1130 Xapian::Database db = get_database("apitest_simpledata");
1131 Xapian::Enquire enquire(db);
1132 Xapian::TermIterator a;
1133 for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1134 const string & term1 = *a;
1135 if (++a == db.allterms_end()) break;
1136 const string & term2 = *a;
1137 Xapian::Query q(Xapian::Query::OP_SYNONYM,
1138 Xapian::Query(term1), Xapian::Query(term2));
1139 tout << q.get_description() << endl;
1140 enquire.set_query(q);
1141 Xapian::termcount sum = 0;
1142 Xapian::termcount sum_squares = 0;
1143 CheckStatsWeight wt(db, term1, term2, sum, sum_squares);
1144 enquire.set_weighting_scheme(wt);
1145 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1147 // The document order in the multi-db case isn't the same as the
1148 // postlist order on the combined DB, so it's hard to compare the
1149 // wdf for each document in the Weight objects, but we can sum
1150 // the wdfs and the squares of the wdfs which provides a decent
1151 // check that we're not getting the wrong wdf values (it ensures
1152 // they have the right mean and standard deviation).
1153 Xapian::termcount expected_sum = 0;
1154 Xapian::termcount expected_sum_squares = 0;
1155 Xapian::PostingIterator i = db.postlist_begin(term1);
1156 Xapian::PostingIterator j = db.postlist_begin(term2);
1157 Xapian::docid did1 = *i, did2 = *j;
1158 while (true) {
1159 // To calculate expected_sum_squares correctly we need to square
1160 // the sum per document.
1161 Xapian::termcount wdf;
1162 if (did1 == did2) {
1163 wdf = i.get_wdf() + j.get_wdf();
1164 did1 = did2 = 0;
1165 } else if (did1 < did2) {
1166 wdf = i.get_wdf();
1167 did1 = 0;
1168 } else {
1169 wdf = j.get_wdf();
1170 did2 = 0;
1172 expected_sum += wdf;
1173 expected_sum_squares += wdf * wdf;
1175 if (did1 == 0) {
1176 if (++i != db.postlist_end(term1)) {
1177 did1 = *i;
1178 } else {
1179 if (did2 == Xapian::docid(-1)) break;
1180 did1 = Xapian::docid(-1);
1183 if (did2 == 0) {
1184 if (++j != db.postlist_end(term2)) {
1185 did2 = *j;
1186 } else {
1187 if (did1 == Xapian::docid(-1)) break;
1188 did2 = Xapian::docid(-1);
1192 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1193 // the individual terms.
1194 TEST_EQUAL(sum, expected_sum);
1195 TEST_REL(sum_squares, >=, expected_sum_squares);
1199 /// Check the weight subclass gets the correct stats with OP_WILDCARD.
1200 // Regression test for bug fixed in 1.4.1.
1201 // Don't run with multi-database, as the termfreq checks don't work
1202 // there - FIXME: Investigate this - it smells like a bug.
1203 DEFINE_TESTCASE(checkstatsweight3, backend && !remote && !multi) {
1204 struct PlCmp {
1205 bool operator()(const Xapian::PostingIterator& a,
1206 const Xapian::PostingIterator& b) {
1207 return *a < *b;
1211 Xapian::Database db = get_database("apitest_simpledata");
1212 Xapian::Enquire enquire(db);
1213 Xapian::TermIterator a;
1214 static const char * const testcases[] = {
1215 "a", // a* matches all documents, but no term matches all.
1216 "pa", // Expands to only "paragraph", matching 5.
1217 "zulu", // No matches.
1218 "th", // Term "this" matches all documents.
1220 for (auto pattern : testcases) {
1221 Xapian::Query q(Xapian::Query::OP_WILDCARD, pattern);
1222 tout << q.get_description() << endl;
1223 enquire.set_query(q);
1224 Xapian::termcount sum = 0;
1225 Xapian::termcount sum_squares = 0;
1226 CheckStatsWeight wt(db, pattern, "*", sum, sum_squares);
1227 enquire.set_weighting_scheme(wt);
1228 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1230 // The document order in the multi-db case isn't the same as the
1231 // postlist order on the combined DB, so it's hard to compare the
1232 // wdf for each document in the Weight objects, but we can sum
1233 // the wdfs and the squares of the wdfs which provides a decent
1234 // check that we're not getting the wrong wdf values (it ensures
1235 // they have the right mean and standard deviation).
1236 Xapian::termcount expected_sum = 0;
1237 Xapian::termcount expected_sum_squares = 0;
1238 vector<Xapian::PostingIterator> postlists;
1239 for (auto&& t = db.allterms_begin(pattern);
1240 t != db.allterms_end(pattern); ++t) {
1241 postlists.emplace_back(db.postlist_begin(*t));
1243 make_heap(postlists.begin(), postlists.end(), PlCmp());
1244 Xapian::docid did = 0;
1245 Xapian::termcount wdf = 0;
1246 while (!postlists.empty()) {
1247 pop_heap(postlists.begin(), postlists.end(), PlCmp());
1248 Xapian::docid did_new = *postlists.back();
1249 Xapian::termcount wdf_new = postlists.back().get_wdf();
1250 if (++(postlists.back()) == Xapian::PostingIterator()) {
1251 postlists.pop_back();
1252 } else {
1253 push_heap(postlists.begin(), postlists.end(), PlCmp());
1255 if (did_new != did) {
1256 expected_sum += wdf;
1257 expected_sum_squares += wdf * wdf;
1258 wdf = 0;
1259 did = did_new;
1261 wdf += wdf_new;
1263 expected_sum += wdf;
1264 expected_sum_squares += wdf * wdf;
1265 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1266 // the individual terms.
1267 TEST_EQUAL(sum, expected_sum);
1268 TEST_REL(sum_squares, >=, expected_sum_squares);
1272 /// Check the stats for a repeated term are correct.
1273 // Regression test for bug fixed in 1.4.6. Doesn't work with
1274 // multi as the weight object is cloned more times.
1275 DEFINE_TESTCASE(checkstatsweight4, backend && !remote && !multi) {
1276 Xapian::Database db = get_database("apitest_simpledata");
1277 Xapian::Enquire enquire(db);
1278 Xapian::TermIterator a;
1279 for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1280 const string & term = *a;
1281 enquire.set_query(Xapian::Query(term, 1, 1) |
1282 Xapian::Query(term, 1, 2));
1283 Xapian::termcount sum = 0;
1284 Xapian::termcount sum_squares = 0;
1285 CheckStatsWeight wt(db, term, "=", sum, sum_squares);
1286 enquire.set_weighting_scheme(wt);
1287 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1289 // The document order in the multi-db case isn't the same as the
1290 // postlist order on the combined DB, so it's hard to compare the
1291 // wdf for each document in the Weight objects, but we can sum
1292 // the wdfs and the squares of the wdfs which provides a decent
1293 // check that we're not getting the wrong wdf values (it ensures
1294 // they have the right mean and standard deviation).
1295 Xapian::termcount expected_sum = 0;
1296 Xapian::termcount expected_sum_squares = 0;
1297 Xapian::PostingIterator i;
1298 for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1299 Xapian::termcount wdf = i.get_wdf();
1300 expected_sum += wdf;
1301 expected_sum_squares += wdf * wdf;
1303 TEST_EQUAL(sum, expected_sum);
1304 TEST_EQUAL(sum_squares, expected_sum_squares);
1308 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
1309 DEFINE_TESTCASE(unigramlmweight4, backend) {
1310 Xapian::Database db = get_database("apitest_simpledata");
1311 Xapian::Enquire enquire1(db);
1312 Xapian::Enquire enquire2(db);
1313 enquire1.set_query(Xapian::Query("paragraph"));
1314 Xapian::MSet mset1;
1315 enquire2.set_query(Xapian::Query("paragraph"));
1316 Xapian::MSet mset2;
1317 // 5 documents available with term paragraph so mset size should be 5
1318 enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING, 1, 0));
1319 enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 1, 0));
1320 mset1 = enquire1.get_mset(0, 10);
1321 mset2 = enquire2.get_mset(0, 10);
1323 TEST_EQUAL(mset1.size(), 5);
1324 TEST_EQUAL_DOUBLE(mset1[1].get_weight(), mset2[1].get_weight());
1327 /* Test for checking if we don't use smoothing all
1328 * of them should give same result i.e wdf_double/len_double */
1329 DEFINE_TESTCASE(unigramlmweight5, backend) {
1330 Xapian::Database db = get_database("apitest_simpledata");
1331 Xapian::Enquire enquire1(db);
1332 Xapian::Enquire enquire2(db);
1333 Xapian::Enquire enquire3(db);
1334 Xapian::Enquire enquire4(db);
1335 enquire1.set_query(Xapian::Query("paragraph"));
1336 Xapian::MSet mset1;
1337 enquire2.set_query(Xapian::Query("paragraph"));
1338 Xapian::MSet mset2;
1339 enquire3.set_query(Xapian::Query("paragraph"));
1340 Xapian::MSet mset3;
1341 enquire4.set_query(Xapian::Query("paragraph"));
1342 Xapian::MSet mset4;
1343 // 5 documents available with term paragraph so mset size should be 5
1344 enquire1.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING, 0, 0));
1345 enquire2.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0, 0));
1346 enquire3.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING, 0, 0));
1347 enquire4.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING, 0, 0));
1349 mset1 = enquire1.get_mset(0, 10);
1350 mset2 = enquire2.get_mset(0, 10);
1351 mset3 = enquire3.get_mset(0, 10);
1352 mset4 = enquire4.get_mset(0, 10);
1354 TEST_EQUAL(mset1.size(), 5);
1355 TEST_EQUAL(mset2.size(), 5);
1356 TEST_EQUAL(mset3.size(), 5);
1357 TEST_EQUAL(mset4.size(), 5);
1358 for (size_t i = 0; i < 5; ++i) {
1359 TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset4[i].get_weight());
1360 TEST_EQUAL_DOUBLE(mset2[i].get_weight(), mset4[i].get_weight());
1361 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset2[i].get_weight());
1362 TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset2[i].get_weight());
1363 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset4[i].get_weight());
1364 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight());
1368 // Test Exception for junk after serialised weight (with Dir+ enabled).
1369 DEFINE_TESTCASE(unigramlmweight6, !backend) {
1370 Xapian::LMWeight wt(0, Xapian::Weight::DIRICHLET_SMOOTHING, 0.5, 1.0);
1371 try {
1372 Xapian::LMWeight d;
1373 Xapian::LMWeight * d2 = d.unserialise(wt.serialise() + "X");
1374 // Make sure we actually use the weight.
1375 bool empty = d2->name().empty();
1376 delete d2;
1377 if (empty)
1378 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
1379 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
1380 } catch (const Xapian::SerialisationError &e) {
1381 TEST(e.get_msg().find("LM") != string::npos);
1385 // Feature test for Dir+ function.
1386 DEFINE_TESTCASE(unigramlmweight7, backend) {
1387 Xapian::Database db = get_database("apitest_simpledata");
1388 Xapian::Enquire enquire1(db);
1389 Xapian::Enquire enquire2(db);
1390 enquire1.set_query(Xapian::Query("paragraph"));
1391 enquire2.set_query(Xapian::Query("paragraph"));
1392 Xapian::MSet mset1;
1393 Xapian::MSet mset2;
1395 enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1396 enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_PLUS_SMOOTHING, 2000, 0.05));
1398 mset1 = enquire1.get_mset(0, 10);
1399 mset2 = enquire2.get_mset(0, 10);
1401 // mset size should be 5
1402 TEST_EQUAL(mset1.size(), 5);
1403 TEST_EQUAL(mset2.size(), 5);
1405 // Expect mset weights associated with Dir+ more than mset weights by Dir
1406 // because of the presence of extra weight component in Dir+ function.
1407 TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
1408 TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
1409 TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
1410 TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
1411 TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
1414 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
1415 DEFINE_TESTCASE(unigramlmweight8, backend) {
1416 Xapian::Database db = get_database("apitest_simpledata");
1417 Xapian::Enquire enquire(db);
1418 Xapian::Query query("paragraph");
1420 enquire.set_query(query);
1421 enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1423 Xapian::MSet mset1;
1424 mset1 = enquire.get_mset(0, 10);
1425 TEST_EQUAL(mset1.size(), 5);
1427 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
1428 enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1430 Xapian::MSet mset2;
1431 mset2 = enquire.get_mset(0, 10);
1432 TEST_EQUAL(mset2.size(), mset1.size());
1433 TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
1434 for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
1435 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
1439 // Feature test for BoolWeight.
1440 // Test exception for junk after serialised weight.
1441 DEFINE_TESTCASE(boolweight1, !backend) {
1442 Xapian::BoolWeight wt;
1443 try {
1444 Xapian::BoolWeight t;
1445 Xapian::BoolWeight * t2 = t.unserialise(wt.serialise() + "X");
1446 // Make sure we actually use the weight.
1447 bool empty = t2->name().empty();
1448 delete t2;
1449 if (empty)
1450 FAIL_TEST("Serialised BoolWeight with junk appended unserialised to empty name!");
1451 FAIL_TEST("Serialised BoolWeight with junk appended unserialised OK");
1452 } catch (const Xapian::SerialisationError &e) {
1453 TEST(e.get_msg().find("Bool") != string::npos);
1457 // Feature test for CoordWeight.
1458 DEFINE_TESTCASE(coordweight1, backend) {
1459 Xapian::Enquire enquire(get_database("apitest_simpledata"));
1460 enquire.set_weighting_scheme(Xapian::CoordWeight());
1461 static const char * const terms[] = {
1462 "this", "line", "paragraph", "rubbish"
1464 Xapian::Query query(Xapian::Query::OP_OR,
1465 terms, terms + sizeof(terms) / sizeof(terms[0]));
1466 enquire.set_query(query);
1467 Xapian::MSet mymset1 = enquire.get_mset(0, 100);
1468 // CoordWeight scores 1 for each matching term, so the weight should equal
1469 // the number of matching terms.
1470 for (Xapian::MSetIterator i = mymset1.begin(); i != mymset1.end(); ++i) {
1471 Xapian::termcount matching_terms = 0;
1472 Xapian::TermIterator t = enquire.get_matching_terms_begin(i);
1473 while (t != enquire.get_matching_terms_end(i)) {
1474 ++matching_terms;
1475 ++t;
1477 TEST_EQUAL(i.get_weight(), matching_terms);
1480 // Test with OP_SCALE_WEIGHT.
1481 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
1482 Xapian::MSet mymset2 = enquire.get_mset(0, 100);
1483 TEST_EQUAL(mymset1.size(), mymset2.size());
1484 for (Xapian::doccount i = 0; i != mymset1.size(); ++i) {
1485 TEST_EQUAL(15.0 * mymset1[i].get_weight(), mymset2[i].get_weight());
1489 // Test exception for junk after serialised weight.
1490 DEFINE_TESTCASE(coordweight2, !backend) {
1491 Xapian::CoordWeight wt;
1492 try {
1493 Xapian::CoordWeight t;
1494 Xapian::CoordWeight * t2 = t.unserialise(wt.serialise() + "X");
1495 // Make sure we actually use the weight.
1496 bool empty = t2->name().empty();
1497 delete t2;
1498 if (empty)
1499 FAIL_TEST("Serialised CoordWeight with junk appended unserialised to empty name!");
1500 FAIL_TEST("Serialised CoordWeight with junk appended unserialised OK");
1501 } catch (const Xapian::SerialisationError &e) {
1502 TEST(e.get_msg().find("Coord") != string::npos);