2 * @brief tests of Xapian::Weight subclasses
4 /* Copyright (C) 2004,2012,2013,2016,2017,2019 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
6 * Copyright (C) 2016 Vivek Pal
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "api_weight.h"
31 #include "testutils.h"
35 // Test exception for junk after serialised weight.
36 DEFINE_TESTCASE(tradweight3
, !backend
) {
37 Xapian::TradWeight
wt(42);
40 Xapian::TradWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
41 // Make sure we actually use the weight.
42 bool empty
= t2
->name().empty();
45 FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
46 FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
47 } catch (const Xapian::SerialisationError
&e
) {
48 // Regression test for error in exception message fixed in 1.2.11 and
50 TEST(e
.get_msg().find("BM25") == string::npos
);
51 TEST(e
.get_msg().find("Trad") != string::npos
);
55 // Test Exception for junk after serialised weight.
56 DEFINE_TESTCASE(unigramlmweight3
, !backend
) {
57 Xapian::LMWeight
wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0.5, 1.0);
60 Xapian::LMWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
61 // Make sure we actually use the weight.
62 bool empty
= t2
->name().empty();
65 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
66 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
67 } catch (const Xapian::SerialisationError
&e
) {
68 TEST(e
.get_msg().find("LM") != string::npos
);
72 // Test exception for junk after serialised weight.
73 DEFINE_TESTCASE(bm25weight3
, !backend
) {
74 Xapian::BM25Weight
wt(2.0, 0.5, 1.3, 0.6, 0.01);
77 Xapian::BM25Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
78 // Make sure we actually use the weight.
79 bool empty
= b2
->name().empty();
82 FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
83 FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
84 } catch (const Xapian::SerialisationError
&e
) {
85 TEST(e
.get_msg().find("BM25") != string::npos
);
89 // Test parameter combinations which should be unaffected by doclength.
90 DEFINE_TESTCASE(bm25weight4
, backend
) {
91 Xapian::Database db
= get_database("apitest_simpledata");
92 Xapian::Enquire
enquire(db
);
93 enquire
.set_query(Xapian::Query("paragraph"));
96 enquire
.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
97 mset
= enquire
.get_mset(0, 10);
98 TEST_EQUAL(mset
.size(), 5);
99 // Expect: wdf has an effect on weight, but doclen doesn't.
100 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
101 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), mset
[2].get_weight());
102 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
103 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), mset
[4].get_weight());
105 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
106 mset
= enquire
.get_mset(0, 10);
107 TEST_EQUAL(mset
.size(), 5);
108 // Expect: neither wdf nor doclen affects weight.
109 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[4].get_weight());
112 /// Test non-zero k2 with zero k1.
113 // Regression test for bug fixed in 1.2.17 and 1.3.2.
114 DEFINE_TESTCASE(bm25weight5
, backend
) {
115 Xapian::Database db
= get_database("apitest_simpledata");
116 Xapian::Enquire
enquire(db
);
117 enquire
.set_query(Xapian::Query("paragraph"));
120 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
121 mset
= enquire
.get_mset(0, 10);
122 TEST_EQUAL(mset
.size(), 5);
123 // Expect: wdf has no effect on weight; shorter docs rank higher.
124 mset_expect_order(mset
, 3, 5, 1, 4, 2);
125 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[1].get_weight());
126 TEST_REL(mset
[1].get_weight(),>,mset
[2].get_weight());
127 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
128 TEST_REL(mset
[3].get_weight(),>,mset
[4].get_weight());
131 // Test exception for junk after serialised weight.
132 DEFINE_TESTCASE(bm25plusweight1
, !backend
) {
133 Xapian::BM25PlusWeight
wt(2.0, 0.1, 1.3, 0.6, 0.01, 0.5);
135 Xapian::BM25PlusWeight b
;
136 Xapian::BM25PlusWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
137 // Make sure we actually use the weight.
138 bool empty
= b2
->name().empty();
141 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised to empty name!");
142 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised OK");
143 } catch (const Xapian::SerialisationError
&e
) {
144 TEST(e
.get_msg().find("BM25Plus") != string::npos
);
148 // Test parameter combinations which should be unaffected by doclength.
149 DEFINE_TESTCASE(bm25plusweight2
, backend
) {
150 Xapian::Database db
= get_database("apitest_simpledata");
151 Xapian::Enquire
enquire(db
);
152 enquire
.set_query(Xapian::Query("paragraph"));
155 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
156 mset
= enquire
.get_mset(0, 10);
157 TEST_EQUAL(mset
.size(), 5);
158 // Expect: wdf has an effect on weight, but doclen doesn't.
159 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
160 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), mset
[2].get_weight());
161 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
162 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), mset
[4].get_weight());
164 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
165 mset
= enquire
.get_mset(0, 10);
166 TEST_EQUAL(mset
.size(), 5);
167 // Expect: neither wdf nor doclen affects weight.
168 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[4].get_weight());
171 // Regression test for a mistake corrected in the BM25+ implementation.
172 DEFINE_TESTCASE(bm25plusweight3
, backend
) {
173 Xapian::Database db
= get_database("apitest_simpledata");
174 Xapian::Enquire
enquire(db
);
175 enquire
.set_query(Xapian::Query("paragraph"));
178 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
179 mset
= enquire
.get_mset(0, 10);
180 TEST_EQUAL(mset
.size(), 5);
182 // The value of each doc weight calculated manually from the BM25+ formulae
183 // by using the respective document statistics.
184 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 0.7920796567487473);
185 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 0.7846980783848447);
186 TEST_EQUAL_DOUBLE(mset
[2].get_weight(), 0.7558817623365934);
187 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), 0.7210119356168847);
188 TEST_EQUAL_DOUBLE(mset
[4].get_weight(), 0.7210119356168847);
191 // Test exception for junk after serialised weight.
192 DEFINE_TESTCASE(inl2weight1
, !backend
) {
193 Xapian::InL2Weight
wt(2.0);
195 Xapian::InL2Weight b
;
196 Xapian::InL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
197 // Make sure we actually use the weight.
198 bool empty
= b2
->name().empty();
201 FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
202 FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
203 } catch (const Xapian::SerialisationError
&e
) {
204 TEST(e
.get_msg().find("InL2") != string::npos
);
208 // Test for invalid values of c.
209 DEFINE_TESTCASE(inl2weight2
, !backend
) {
210 // InvalidArgumentError should be thrown if the parameter c is invalid.
211 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
212 Xapian::InL2Weight
wt(-2.0));
214 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
215 Xapian::InL2Weight
wt2(0.0));
217 /* Parameter c should be set to 1.0 by constructor if none is given. */
218 Xapian::InL2Weight weight2
;
219 TEST_EQUAL(weight2
.serialise(), Xapian::InL2Weight(1.0).serialise());
222 // Feature tests for Inl2Weight
223 DEFINE_TESTCASE(inl2weight3
, backend
) {
224 Xapian::Database db
= get_database("apitest_simpledata");
225 Xapian::Enquire
enquire(db
);
226 Xapian::Query
query("banana");
228 enquire
.set_query(query
);
229 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
232 mset1
= enquire
.get_mset(0, 10);
233 TEST_EQUAL(mset1
.size(), 1);
234 mset_expect_order(mset1
, 6);
236 /* The value has been calculated in the python interpreter by looking at the
237 * database statistics. */
238 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.559711143842063);
240 // Test with OP_SCALE_WEIGHT.
241 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
242 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
245 mset2
= enquire
.get_mset(0, 10);
246 TEST_EQUAL(mset2
.size(), 1);
247 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
248 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
251 // Test exception for junk after serialised weight.
252 DEFINE_TESTCASE(ifb2weight1
, !backend
) {
253 Xapian::IfB2Weight
wt(2.0);
255 Xapian::IfB2Weight b
;
256 Xapian::IfB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
257 // Make sure we actually use the weight.
258 bool empty
= b2
->name().empty();
261 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
262 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
263 } catch (const Xapian::SerialisationError
&e
) {
264 TEST(e
.get_msg().find("IfB2") != string::npos
);
268 // Test for invalid values of c.
269 DEFINE_TESTCASE(ifb2weight2
, !backend
) {
270 // InvalidArgumentError should be thrown if the parameter c is invalid.
271 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
272 Xapian::IfB2Weight
wt(-2.0));
274 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
275 Xapian::IfB2Weight
wt2(0.0));
277 /* Parameter c should be set to 1.0 by constructor if none is given. */
278 Xapian::IfB2Weight weight2
;
279 TEST_EQUAL(weight2
.serialise(), Xapian::IfB2Weight(1.0).serialise());
283 DEFINE_TESTCASE(ifb2weight3
, backend
) {
284 Xapian::Database db
= get_database("apitest_simpledata");
285 Xapian::Enquire
enquire(db
);
286 Xapian::Query
query("banana");
288 enquire
.set_query(query
);
289 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
292 mset1
= enquire
.get_mset(0, 10);
293 TEST_EQUAL(mset1
.size(), 1);
295 /* The value of the weight has been manually calculated using the statistics
296 * of the test database. */
297 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 3.119422287684126);
299 // Test with OP_SCALE_WEIGHT.
300 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
301 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
304 mset2
= enquire
.get_mset(0, 10);
305 TEST_EQUAL(mset2
.size(), 1);
306 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
307 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
310 // Test exception for junk after serialised weight.
311 DEFINE_TESTCASE(ineb2weight1
, !backend
) {
312 Xapian::IneB2Weight
wt(2.0);
314 Xapian::IneB2Weight b
;
315 Xapian::IneB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
316 // Make sure we actually use the weight.
317 bool empty
= b2
->name().empty();
320 FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
321 FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
322 } catch (const Xapian::SerialisationError
&e
) {
323 TEST(e
.get_msg().find("IneB2") != string::npos
);
327 // Test for invalid values of c.
328 DEFINE_TESTCASE(ineb2weight2
, !backend
) {
329 // InvalidArgumentError should be thrown if parameter c is invalid.
330 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
331 Xapian::IneB2Weight
wt(-2.0));
333 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
334 Xapian::IneB2Weight
wt2(0.0));
336 /* Parameter c should be set to 1.0 by constructor if none is given. */
337 Xapian::IneB2Weight weight2
;
338 TEST_EQUAL(weight2
.serialise(), Xapian::IneB2Weight(1.0).serialise());
342 DEFINE_TESTCASE(ineb2weight3
, backend
) {
343 Xapian::Database db
= get_database("apitest_simpledata");
344 Xapian::Enquire
enquire(db
);
345 Xapian::Query
query("paragraph");
346 enquire
.set_query(query
);
347 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
350 mset1
= enquire
.get_mset(0, 10);
351 TEST_EQUAL(mset1
.size(), 5);
353 // The third document in the database is 4th in the ranking.
354 /* The weight value has been manually calculated by using the statistics
355 * of the test database. */
356 TEST_EQUAL_DOUBLE(mset1
[4].get_weight(), 0.61709730297692400036);
358 // Test with OP_SCALE_WEIGHT.
359 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
360 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
363 mset2
= enquire
.get_mset(0, 10);
364 TEST_EQUAL(mset2
.size(), 5);
366 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
367 for (int i
= 0; i
< 5; ++i
) {
368 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
372 // Test exception for junk after serialised weight.
373 DEFINE_TESTCASE(bb2weight1
, !backend
) {
374 Xapian::BB2Weight
wt(2.0);
377 Xapian::BB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
378 // Make sure we actually use the weight.
379 bool empty
= b2
->name().empty();
382 FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
383 FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
384 } catch (const Xapian::SerialisationError
&e
) {
385 TEST(e
.get_msg().find("BB2") != string::npos
);
389 // Test for invalid values of c.
390 DEFINE_TESTCASE(bb2weight2
, !backend
) {
391 // InvalidArgumentError should be thrown if the parameter c is invalid.
392 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
393 Xapian::BB2Weight
wt(-2.0));
395 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
396 Xapian::BB2Weight
wt2(0.0));
398 /* Parameter c should be set to 1.0 by constructor if none is given. */
399 Xapian::BB2Weight weight2
;
400 TEST_EQUAL(weight2
.serialise(), Xapian::BB2Weight(1.0).serialise());
404 DEFINE_TESTCASE(bb2weight3
, backend
) {
405 Xapian::Database db
= get_database("apitest_simpledata");
406 Xapian::Enquire
enquire(db
);
407 Xapian::Query
query("paragraph");
409 enquire
.set_query(query
);
410 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
413 mset1
= enquire
.get_mset(0, 10);
414 TEST_EQUAL(mset1
.size(), 5);
415 /* The third document in the database has the highest weight and is the
416 * first in the mset. */
417 // Value calculated manually by using the statistics of the test database.
418 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.6823696969784483);
420 // Test with OP_SCALE_WEIGHT.
421 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
422 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
425 mset2
= enquire
.get_mset(0, 10);
426 TEST_EQUAL(mset2
.size(), 5);
428 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
429 for (int i
= 0; i
< 5; ++i
) {
430 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
433 // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
434 // were applying the factor to the upper bound twice).
435 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 1.0 / 1024));
436 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
439 mset3
= enquire
.get_mset(0, 10);
440 TEST_EQUAL(mset3
.size(), 5);
442 for (int i
= 0; i
< 5; ++i
) {
443 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight() * 1024);
447 // Regression test: we used to calculate log2(0) when there was only one doc.
448 DEFINE_TESTCASE(bb2weight4
, backend
) {
449 Xapian::Database db
= get_database("apitest_onedoc");
450 Xapian::Enquire
enquire(db
);
451 Xapian::Query
query("word");
453 enquire
.set_query(query
);
454 enquire
.set_weighting_scheme(Xapian::BB2Weight());
457 mset1
= enquire
.get_mset(0, 10);
458 TEST_EQUAL(mset1
.size(), 1);
459 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 3.431020621347435);
463 DEFINE_TESTCASE(dlhweight1
, backend
) {
464 Xapian::Database db
= get_database("apitest_simpledata");
465 Xapian::Enquire
enquire(db
);
466 Xapian::Query
query("a");
468 enquire
.set_query(query
);
469 enquire
.set_weighting_scheme(Xapian::DLHWeight());
472 mset1
= enquire
.get_mset(0, 10);
473 TEST_EQUAL(mset1
.size(), 3);
474 mset_expect_order(mset1
, 3, 1, 2);
475 // Weights calculated manually using stats from the database.
476 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.0046477754371292362);
477 TEST_EQUAL_DOUBLE(mset1
[1].get_weight(), 0.97621929514640352757);
478 // The following weight would be negative but gets clamped to 0.
479 TEST_EQUAL_DOUBLE(mset1
[2].get_weight(), 0.0);
481 // Test with OP_SCALE_WEIGHT.
482 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
483 enquire
.set_weighting_scheme(Xapian::DLHWeight());
486 mset2
= enquire
.get_mset(0, 10);
487 TEST_EQUAL(mset2
.size(), 3);
489 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
490 for (Xapian::doccount i
= 0; i
< mset2
.size(); ++i
) {
491 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
495 // Test exception for junk after serialised weight.
496 DEFINE_TESTCASE(dlhweight2
, !backend
) {
497 Xapian::DLHWeight wt
;
500 Xapian::DLHWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
501 // Make sure we actually use the weight.
502 bool empty
= t2
->name().empty();
505 FAIL_TEST("Serialised DLHWeight with junk appended unserialised to empty name!");
506 FAIL_TEST("Serialised DLHWeight with junk appended unserialised OK");
507 } catch (const Xapian::SerialisationError
&e
) {
508 TEST(e
.get_msg().find("DLH") != string::npos
);
513 gen_wdf_eq_doclen_db(Xapian::WritableDatabase
& db
, const string
&)
515 Xapian::Document doc
;
516 doc
.add_term("solo", 37);
517 db
.add_document(doc
);
520 // Test wdf == doclen.
521 DEFINE_TESTCASE(dlhweight3
, generated
) {
522 Xapian::Database db
= get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db
);
523 Xapian::Enquire
enquire(db
);
524 Xapian::Query
query("solo");
526 enquire
.set_query(query
);
527 enquire
.set_weighting_scheme(Xapian::DLHWeight());
530 mset1
= enquire
.get_mset(0, 10);
531 TEST_EQUAL(mset1
.size(), 1);
532 // Weight gets clamped to zero.
533 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
536 // Test exception for junk after serialised weight.
537 DEFINE_TESTCASE(pl2weight1
, !backend
) {
538 Xapian::PL2Weight
wt(2.0);
541 Xapian::PL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
542 // Make sure we actually use the weight.
543 bool empty
= b2
->name().empty();
546 FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
547 FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
548 } catch (const Xapian::SerialisationError
&e
) {
549 TEST(e
.get_msg().find("PL2") != string::npos
);
553 // Test for invalid values of c.
554 DEFINE_TESTCASE(pl2weight2
, !backend
) {
555 // InvalidArgumentError should be thrown if parameter c is invalid.
556 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
557 Xapian::PL2Weight
wt(-2.0));
559 /* Parameter c should be set to 1.0 by constructor if none is given. */
560 Xapian::PL2Weight weight2
;
561 TEST_EQUAL(weight2
.serialise(), Xapian::PL2Weight(1.0).serialise());
565 DEFINE_TESTCASE(pl2weight3
, backend
) {
566 Xapian::Database db
= get_database("apitest_simpledata");
567 Xapian::Enquire
enquire(db
);
568 Xapian::Query
query("paragraph");
569 enquire
.set_query(query
);
572 enquire
.set_weighting_scheme(Xapian::PL2Weight(2.0));
573 mset
= enquire
.get_mset(0, 10);
574 TEST_EQUAL(mset
.size(), 5);
575 // Expected weight difference calculated in extended precision using stats
576 // from the test database.
577 TEST_EQUAL_DOUBLE(mset
[2].get_weight(),
578 mset
[3].get_weight() + 0.0086861771701328694);
580 // Test with OP_SCALE_WEIGHT.
581 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
582 enquire
.set_weighting_scheme(Xapian::PL2Weight(2.0));
585 mset2
= enquire
.get_mset(0, 10);
586 TEST_EQUAL(mset2
.size(), 5);
587 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
588 for (int i
= 0; i
< 5; ++i
) {
589 TEST_EQUAL_DOUBLE(15.0 * mset
[i
].get_weight(), mset2
[i
].get_weight());
593 // Test exception for junk after serialised weight.
594 DEFINE_TESTCASE(pl2plusweight1
, !backend
) {
595 Xapian::PL2PlusWeight
wt(2.0, 0.9);
597 Xapian::PL2PlusWeight b
;
598 Xapian::PL2PlusWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
599 // Make sure we actually use the weight.
600 bool empty
= b2
->name().empty();
603 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised to empty name!");
604 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised OK");
605 } catch (const Xapian::SerialisationError
&e
) {
606 TEST(e
.get_msg().find("PL2Plus") != string::npos
);
610 // Test for invalid values of parameters, c and delta.
611 DEFINE_TESTCASE(pl2plusweight2
, !backend
) {
612 // InvalidArgumentError should be thrown if parameter c is invalid.
613 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
614 Xapian::PL2PlusWeight
wt(-2.0, 0.9));
616 // InvalidArgumentError should be thrown if parameter delta is invalid.
617 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
618 Xapian::PL2PlusWeight
wt(1.0, -1.9));
621 // Test for default values of parameters, c and delta.
622 DEFINE_TESTCASE(pl2plusweight3
, !backend
) {
623 Xapian::PL2PlusWeight weight2
;
625 /* Parameter c should be set to 1.0 by constructor if none is given. */
626 TEST_EQUAL(weight2
.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
628 /* Parameter delta should be set to 0.8 by constructor if none is given. */
629 TEST_EQUAL(weight2
.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
632 // Feature Test 1 for PL2PlusWeight.
633 DEFINE_TESTCASE(pl2plusweight4
, backend
) {
634 Xapian::Database db
= get_database("apitest_simpledata");
635 Xapian::Enquire
enquire(db
);
636 enquire
.set_query(Xapian::Query("paragraph"));
639 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
640 mset
= enquire
.get_mset(0, 10);
641 TEST_EQUAL(mset
.size(), 5);
642 // Expected weight difference calculated in extended precision using stats
643 // from the test database.
644 TEST_EQUAL_DOUBLE(mset
[2].get_weight(),
645 mset
[3].get_weight() + 0.0086861771701328694);
648 // Feature Test 2 for PL2PlusWeight
649 DEFINE_TESTCASE(pl2plusweight5
, backend
) {
650 Xapian::Database db
= get_database("apitest_simpledata");
651 Xapian::Enquire
enquire(db
);
652 Xapian::Query
query("word");
653 enquire
.set_query(query
);
656 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
657 mset
= enquire
.get_mset(0, 10);
658 // Expect MSet contains two documents having query "word".
659 TEST_EQUAL(mset
.size(), 2);
660 // Expect Document 2 has higher weight than document 4 because
661 // "word" appears more no. of times in document 2 than document 4.
662 mset_expect_order(mset
, 2, 4);
664 // Test with OP_SCALE_WEIGHT.
665 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
666 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
669 mset2
= enquire
.get_mset(0, 10);
670 TEST_EQUAL(mset2
.size(), mset
.size());
671 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
672 for (Xapian::doccount i
= 0; i
< mset
.size(); ++i
) {
673 TEST_EQUAL_DOUBLE(15.0 * mset
[i
].get_weight(), mset2
[i
].get_weight());
678 DEFINE_TESTCASE(dphweight1
, backend
) {
679 Xapian::Database db
= get_database("apitest_simpledata");
680 Xapian::Enquire
enquire(db
);
681 Xapian::Query
query("paragraph");
683 enquire
.set_query(query
);
684 enquire
.set_weighting_scheme(Xapian::DPHWeight());
687 mset1
= enquire
.get_mset(0, 10);
688 TEST_EQUAL(mset1
.size(), 5);
689 /* The weight has been calculated manually by using the statistics of the
691 TEST_EQUAL_DOUBLE(mset1
[2].get_weight() - mset1
[4].get_weight(), 0.542623617687990167);
693 // Test with OP_SCALE_WEIGHT.
694 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
695 enquire
.set_weighting_scheme(Xapian::DPHWeight());
698 mset2
= enquire
.get_mset(0, 10);
699 TEST_EQUAL(mset2
.size(), 5);
700 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
701 for (int i
= 0; i
< 5; ++i
) {
702 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
706 // Test exception for junk after serialised weight.
707 DEFINE_TESTCASE(dphweight2
, !backend
) {
708 Xapian::DPHWeight wt
;
711 Xapian::DPHWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
712 // Make sure we actually use the weight.
713 bool empty
= t2
->name().empty();
716 FAIL_TEST("Serialised DPHWeight with junk appended unserialised to empty name!");
717 FAIL_TEST("Serialised DPHWeight with junk appended unserialised OK");
718 } catch (const Xapian::SerialisationError
&e
) {
719 TEST(e
.get_msg().find("DPH") != string::npos
);
723 // Test wdf == doclen.
724 DEFINE_TESTCASE(dphweight3
, generated
) {
725 Xapian::Database db
= get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db
);
726 Xapian::Enquire
enquire(db
);
727 Xapian::Query
query("solo");
729 enquire
.set_query(query
);
730 enquire
.set_weighting_scheme(Xapian::DPHWeight());
733 mset1
= enquire
.get_mset(0, 10);
734 TEST_EQUAL(mset1
.size(), 1);
735 // Weight gets clamped to zero.
736 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
739 // Test for various cases of normalization string.
740 DEFINE_TESTCASE(tfidfweight1
, !backend
) {
741 // InvalidArgumentError should be thrown if normalization string is invalid
742 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
743 Xapian::TfIdfWeight
b("JOHN_LENNON"));
745 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
746 Xapian::TfIdfWeight
b("LOL"));
748 /* Normalization string should be set to "ntn" by constructor if none is
750 Xapian::TfIdfWeight weight2
;
751 TEST_EQUAL(weight2
.serialise(), Xapian::TfIdfWeight("ntn").serialise());
754 // Test exception for junk after serialised weight.
755 DEFINE_TESTCASE(tfidfweight2
, !backend
) {
756 Xapian::TfIdfWeight
wt("ntn");
758 Xapian::TfIdfWeight b
;
759 Xapian::TfIdfWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
760 // Make sure we actually use the weight.
761 bool empty
= b2
->name().empty();
764 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
765 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
766 } catch (const Xapian::SerialisationError
&e
) {
767 TEST(e
.get_msg().find("TfIdf") != string::npos
);
771 // Feature tests for various normalization functions.
772 DEFINE_TESTCASE(tfidfweight3
, backend
) {
773 Xapian::Database db
= get_database("apitest_simpledata");
774 Xapian::Enquire
enquire(db
);
775 Xapian::Query
query("word");
778 // Check for "ntn" when termfreq != N
779 enquire
.set_query(query
);
780 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
781 mset
= enquire
.get_mset(0, 10);
782 TEST_EQUAL(mset
.size(), 2);
783 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
784 mset_expect_order(mset
, 2, 4);
785 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 * log(6.0 / 2));
787 // Check that wqf is taken into account.
788 enquire
.set_query(Xapian::Query("word", 2));
789 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
790 Xapian::MSet mset2
= enquire
.get_mset(0, 10);
791 TEST_EQUAL(mset2
.size(), 2);
792 // wqf is 2, so weights should be doubled.
793 TEST_EQUAL_DOUBLE(mset
[0].get_weight() * 2, mset2
[0].get_weight());
794 TEST_EQUAL_DOUBLE(mset
[1].get_weight() * 2, mset2
[1].get_weight());
796 // Test with OP_SCALE_WEIGHT.
797 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
798 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
799 mset2
= enquire
.get_mset(0, 10);
800 TEST_EQUAL(mset2
.size(), 2);
801 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
802 mset_expect_order(mset2
, 2, 4);
803 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
804 TEST_EQUAL_DOUBLE(15 * mset
[0].get_weight(), mset2
[0].get_weight());
806 // check for "nfn" when termfreq != N
807 enquire
.set_query(query
);
808 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("nfn"));
809 mset
= enquire
.get_mset(0, 10);
810 TEST_EQUAL(mset
.size(), 2);
811 mset_expect_order(mset
, 2, 4);
812 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 / 2);
814 // check for "nsn" when termfreq != N
815 enquire
.set_query(query
);
816 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("nsn"));
817 mset
= enquire
.get_mset(0, 10);
818 TEST_EQUAL(mset
.size(), 2);
819 mset_expect_order(mset
, 2, 4);
820 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));
822 // Check for "bnn" and for both branches of 'b'.
823 enquire
.set_query(Xapian::Query("test"));
824 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
825 mset
= enquire
.get_mset(0, 10);
826 TEST_EQUAL(mset
.size(), 1);
827 mset_expect_order(mset
, 1);
828 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1.0);
830 // Check for "lnn" and for both branches of 'l'.
831 enquire
.set_query(Xapian::Query("word"));
832 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
833 mset
= enquire
.get_mset(0, 10);
834 TEST_EQUAL(mset
.size(), 2);
835 mset_expect_order(mset
, 2, 4);
836 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
837 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
840 enquire
.set_query(Xapian::Query("paragraph"));
841 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
842 mset
= enquire
.get_mset(0, 10);
843 TEST_EQUAL(mset
.size(), 5);
844 mset_expect_order(mset
, 2, 1, 4, 3, 5);
845 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 9.0);
846 TEST_EQUAL_DOUBLE(mset
[4].get_weight(), 1.0);
848 // Check for "ntn" when termfreq=N
849 enquire
.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "t"
850 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
851 mset
= enquire
.get_mset(0, 10);
852 TEST_EQUAL(mset
.size(), 6);
853 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
854 for (int i
= 0; i
< 6; ++i
) {
855 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
858 // Check for "npn" and for both branches of 'p'
859 enquire
.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
860 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
861 mset
= enquire
.get_mset(0, 10);
862 TEST_EQUAL(mset
.size(), 6);
863 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
864 for (int i
= 0; i
< 6; ++i
) {
865 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
869 enquire
.set_query(Xapian::Query("word"));
870 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("Lnn"));
871 mset
= enquire
.get_mset(0, 10);
872 TEST_EQUAL(mset
.size(), 2);
873 mset_expect_order(mset
, 2, 4);
874 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
875 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
877 enquire
.set_query(Xapian::Query("word"));
878 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
879 mset
= enquire
.get_mset(0, 10);
880 TEST_EQUAL(mset
.size(), 2);
881 mset_expect_order(mset
, 2, 4);
882 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8 * log((6.0 - 2) / 2));
883 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1 * log((6.0 - 2) / 2));
886 class CheckInitWeight
: public Xapian::Weight
{
890 unsigned & zero_inits
, & non_zero_inits
;
892 CheckInitWeight(unsigned &z
, unsigned &n
)
893 : factor(-1.0), zero_inits(z
), non_zero_inits(n
) { }
895 void init(double factor_
) {
903 Weight
* clone() const {
904 return new CheckInitWeight(zero_inits
, non_zero_inits
);
907 double get_sumpart(Xapian::termcount
, Xapian::termcount
,
908 Xapian::termcount
) const {
912 double get_maxpart() const { return 1.0; }
914 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
918 double get_maxextra() const { return 1.0; }
921 /// Regression test - check init() is called for the term-indep Weight obj.
922 DEFINE_TESTCASE(checkinitweight1
, backend
&& !multi
&& !remote
) {
923 Xapian::Database db
= get_database("apitest_simpledata");
924 Xapian::Enquire
enquire(db
);
925 Xapian::Query
q(Xapian::Query::OP_AND
,
926 Xapian::Query("this"), Xapian::Query("paragraph"));
927 enquire
.set_query(q
);
928 unsigned zero_inits
= 0, non_zero_inits
= 0;
929 CheckInitWeight
wt(zero_inits
, non_zero_inits
);
930 enquire
.set_weighting_scheme(wt
);
931 Xapian::MSet mset
= enquire
.get_mset(0, 3);
932 TEST_EQUAL(zero_inits
, 1);
933 TEST_EQUAL(non_zero_inits
, 2);
936 class CheckStatsWeight
: public Xapian::Weight
{
944 // When testing OP_SYNONYM, term2 is also set.
945 // When testing OP_WILDCARD, term2 == "*".
946 // When testing a repeated term, term2 == "=" for the first occurrence and
947 // "_" for subsequent occurrences.
948 mutable string term2
;
950 Xapian::termcount
& sum
;
951 Xapian::termcount
& sum_squares
;
953 mutable Xapian::termcount len_upper
;
954 mutable Xapian::termcount len_lower
;
955 mutable Xapian::termcount wdf_upper
;
957 CheckStatsWeight(const Xapian::Database
& db_
,
958 const string
& term1_
,
959 const string
& term2_
,
960 Xapian::termcount
& sum_
,
961 Xapian::termcount
& sum_squares_
)
962 : factor(-1.0), db(db_
), term1(term1_
), term2(term2_
),
963 sum(sum_
), sum_squares(sum_squares_
),
964 len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
966 need_stat(COLLECTION_SIZE
);
967 need_stat(RSET_SIZE
);
968 need_stat(AVERAGE_LENGTH
);
970 need_stat(RELTERMFREQ
);
971 need_stat(QUERY_LENGTH
);
974 need_stat(DOC_LENGTH
);
975 need_stat(DOC_LENGTH_MIN
);
976 need_stat(DOC_LENGTH_MAX
);
978 need_stat(COLLECTION_FREQ
);
979 need_stat(UNIQUE_TERMS
);
980 need_stat(TOTAL_LENGTH
);
983 CheckStatsWeight(const Xapian::Database
& db_
,
984 const string
& term_
,
985 Xapian::termcount
& sum_
,
986 Xapian::termcount
& sum_squares_
)
987 : CheckStatsWeight(db_
, term_
, string(), sum_
, sum_squares_
) { }
989 void init(double factor_
) {
993 Weight
* clone() const {
994 auto res
= new CheckStatsWeight(db
, term1
, term2
, sum
, sum_squares
);
996 // The object passed to Enquire::set_weighting_scheme() is cloned
997 // right away, and then cloned again for each term, and then
998 // potentially once more for the term-independent weight
999 // contribution. In the repeated case, we want to handle the first
1000 // actual term specially, so we arrange for that to have "=" for
1001 // term2, and subsequent clones to have "_", so that we accumulate
1002 // sum and sum_squares on the first occurrence only.
1008 double get_sumpart(Xapian::termcount wdf
, Xapian::termcount doclen
,
1009 Xapian::termcount uniqueterms
) const {
1010 Xapian::doccount num_docs
= db
.get_doccount();
1011 TEST_EQUAL(get_collection_size(), num_docs
);
1012 TEST_EQUAL(get_rset_size(), 0);
1013 TEST_EQUAL(get_average_length(), db
.get_avlength());
1014 Xapian::totallength totlen
= get_total_length();
1015 TEST_EQUAL(totlen
, db
.get_total_length());
1016 double total_term_occurences
= get_average_length() * num_docs
;
1017 TEST_EQUAL(Xapian::totallength(total_term_occurences
+ 0.5), totlen
);
1018 if (term2
.empty() || term2
== "=" || term2
== "_") {
1019 TEST_EQUAL(get_termfreq(), db
.get_termfreq(term1
));
1020 TEST_EQUAL(get_collection_freq(), db
.get_collection_freq(term1
));
1021 if (term2
.empty()) {
1022 TEST_EQUAL(get_query_length(), 1);
1024 TEST_EQUAL(get_query_length(), 2);
1027 Xapian::doccount tfmax
= 0, tfsum
= 0;
1028 Xapian::termcount cfmax
= 0, cfsum
= 0;
1030 // OP_WILDCARD case.
1031 for (auto&& t
= db
.allterms_begin(term1
);
1032 t
!= db
.allterms_end(term1
); ++t
) {
1033 Xapian::doccount tf
= t
.get_termfreq();
1034 tout
<< "->" << *t
<< " " << tf
<< endl
;
1036 tfmax
= max(tfmax
, tf
);
1037 Xapian::termcount cf
= db
.get_collection_freq(*t
);
1039 cfmax
= max(cfmax
, cf
);
1041 TEST_EQUAL(get_query_length(), 1);
1044 Xapian::doccount tf1
= db
.get_termfreq(term1
);
1045 Xapian::doccount tf2
= db
.get_termfreq(term2
);
1047 tfmax
= max(tf1
, tf2
);
1048 Xapian::termcount cf1
= db
.get_collection_freq(term1
);
1049 Xapian::termcount cf2
= db
.get_collection_freq(term2
);
1051 cfmax
= max(cf1
, cf2
);
1052 TEST_EQUAL(get_query_length(), 2);
1054 // Synonym occurs at least as many times as any term.
1055 TEST_REL(get_termfreq(), >=, tfmax
);
1056 TEST_REL(get_collection_freq(), >=, cfmax
);
1057 // Synonym can't occur more times than the terms do.
1058 TEST_REL(get_termfreq(), <=, tfsum
);
1059 TEST_REL(get_collection_freq(), <=, cfsum
);
1060 // Synonym can't occur more times than there are documents/terms.
1061 TEST_REL(get_termfreq(), <=, num_docs
);
1062 TEST_REL(get_collection_freq(), <=, totlen
);
1064 TEST_EQUAL(get_reltermfreq(), 0);
1065 TEST_EQUAL(get_wqf(), 1);
1066 TEST_REL(doclen
,>=,len_lower
);
1067 TEST_REL(doclen
,<=,len_upper
);
1068 TEST_REL(uniqueterms
,>=,1);
1069 TEST_REL(uniqueterms
,<=,doclen
);
1070 TEST_REL(wdf
,<=,wdf_upper
);
1073 sum_squares
+= wdf
* wdf
;
1078 double get_maxpart() const {
1079 if (len_upper
== 0) {
1080 len_lower
= get_doclength_lower_bound();
1081 len_upper
= get_doclength_upper_bound();
1082 wdf_upper
= get_wdf_upper_bound();
1087 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
1088 return 1.0 / doclen
;
1091 double get_maxextra() const { return 1.0; }
1094 /// Check the weight subclass gets the correct stats.
1095 DEFINE_TESTCASE(checkstatsweight1
, backend
&& !remote
) {
1096 Xapian::Database db
= get_database("apitest_simpledata");
1097 Xapian::Enquire
enquire(db
);
1098 Xapian::TermIterator a
;
1099 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
1100 const string
& term
= *a
;
1101 enquire
.set_query(Xapian::Query(term
));
1102 Xapian::termcount sum
= 0;
1103 Xapian::termcount sum_squares
= 0;
1104 CheckStatsWeight
wt(db
, term
, sum
, sum_squares
);
1105 enquire
.set_weighting_scheme(wt
);
1106 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1108 // The document order in the multi-db case isn't the same as the
1109 // postlist order on the combined DB, so it's hard to compare the
1110 // wdf for each document in the Weight objects, but we can sum
1111 // the wdfs and the squares of the wdfs which provides a decent
1112 // check that we're not getting the wrong wdf values (it ensures
1113 // they have the right mean and standard deviation).
1114 Xapian::termcount expected_sum
= 0;
1115 Xapian::termcount expected_sum_squares
= 0;
1116 Xapian::PostingIterator i
;
1117 for (i
= db
.postlist_begin(term
); i
!= db
.postlist_end(term
); ++i
) {
1118 Xapian::termcount wdf
= i
.get_wdf();
1119 expected_sum
+= wdf
;
1120 expected_sum_squares
+= wdf
* wdf
;
1122 TEST_EQUAL(sum
, expected_sum
);
1123 TEST_EQUAL(sum_squares
, expected_sum_squares
);
1127 /// Check the weight subclass gets the correct stats with OP_SYNONYM.
1128 // Regression test for bugs fixed in 1.4.1.
1129 DEFINE_TESTCASE(checkstatsweight2
, backend
&& !remote
) {
1130 Xapian::Database db
= get_database("apitest_simpledata");
1131 Xapian::Enquire
enquire(db
);
1132 Xapian::TermIterator a
;
1133 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
1134 const string
& term1
= *a
;
1135 if (++a
== db
.allterms_end()) break;
1136 const string
& term2
= *a
;
1137 Xapian::Query
q(Xapian::Query::OP_SYNONYM
,
1138 Xapian::Query(term1
), Xapian::Query(term2
));
1139 tout
<< q
.get_description() << endl
;
1140 enquire
.set_query(q
);
1141 Xapian::termcount sum
= 0;
1142 Xapian::termcount sum_squares
= 0;
1143 CheckStatsWeight
wt(db
, term1
, term2
, sum
, sum_squares
);
1144 enquire
.set_weighting_scheme(wt
);
1145 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1147 // The document order in the multi-db case isn't the same as the
1148 // postlist order on the combined DB, so it's hard to compare the
1149 // wdf for each document in the Weight objects, but we can sum
1150 // the wdfs and the squares of the wdfs which provides a decent
1151 // check that we're not getting the wrong wdf values (it ensures
1152 // they have the right mean and standard deviation).
1153 Xapian::termcount expected_sum
= 0;
1154 Xapian::termcount expected_sum_squares
= 0;
1155 Xapian::PostingIterator i
= db
.postlist_begin(term1
);
1156 Xapian::PostingIterator j
= db
.postlist_begin(term2
);
1157 Xapian::docid did1
= *i
, did2
= *j
;
1159 // To calculate expected_sum_squares correctly we need to square
1160 // the sum per document.
1161 Xapian::termcount wdf
;
1163 wdf
= i
.get_wdf() + j
.get_wdf();
1165 } else if (did1
< did2
) {
1172 expected_sum
+= wdf
;
1173 expected_sum_squares
+= wdf
* wdf
;
1176 if (++i
!= db
.postlist_end(term1
)) {
1179 if (did2
== Xapian::docid(-1)) break;
1180 did1
= Xapian::docid(-1);
1184 if (++j
!= db
.postlist_end(term2
)) {
1187 if (did1
== Xapian::docid(-1)) break;
1188 did2
= Xapian::docid(-1);
1192 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1193 // the individual terms.
1194 TEST_EQUAL(sum
, expected_sum
);
1195 TEST_REL(sum_squares
, >=, expected_sum_squares
);
1199 /// Check the weight subclass gets the correct stats with OP_WILDCARD.
1200 // Regression test for bug fixed in 1.4.1.
1201 // Don't run with multi-database, as the termfreq checks don't work
1202 // there - FIXME: Investigate this - it smells like a bug.
1203 DEFINE_TESTCASE(checkstatsweight3
, backend
&& !remote
&& !multi
) {
1205 bool operator()(const Xapian::PostingIterator
& a
,
1206 const Xapian::PostingIterator
& b
) {
1211 Xapian::Database db
= get_database("apitest_simpledata");
1212 Xapian::Enquire
enquire(db
);
1213 Xapian::TermIterator a
;
1214 static const char * const testcases
[] = {
1215 "a", // a* matches all documents, but no term matches all.
1216 "pa", // Expands to only "paragraph", matching 5.
1217 "zulu", // No matches.
1218 "th", // Term "this" matches all documents.
1220 for (auto pattern
: testcases
) {
1221 Xapian::Query
q(Xapian::Query::OP_WILDCARD
, pattern
);
1222 tout
<< q
.get_description() << endl
;
1223 enquire
.set_query(q
);
1224 Xapian::termcount sum
= 0;
1225 Xapian::termcount sum_squares
= 0;
1226 CheckStatsWeight
wt(db
, pattern
, "*", sum
, sum_squares
);
1227 enquire
.set_weighting_scheme(wt
);
1228 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1230 // The document order in the multi-db case isn't the same as the
1231 // postlist order on the combined DB, so it's hard to compare the
1232 // wdf for each document in the Weight objects, but we can sum
1233 // the wdfs and the squares of the wdfs which provides a decent
1234 // check that we're not getting the wrong wdf values (it ensures
1235 // they have the right mean and standard deviation).
1236 Xapian::termcount expected_sum
= 0;
1237 Xapian::termcount expected_sum_squares
= 0;
1238 vector
<Xapian::PostingIterator
> postlists
;
1239 for (auto&& t
= db
.allterms_begin(pattern
);
1240 t
!= db
.allterms_end(pattern
); ++t
) {
1241 postlists
.emplace_back(db
.postlist_begin(*t
));
1243 make_heap(postlists
.begin(), postlists
.end(), PlCmp());
1244 Xapian::docid did
= 0;
1245 Xapian::termcount wdf
= 0;
1246 while (!postlists
.empty()) {
1247 pop_heap(postlists
.begin(), postlists
.end(), PlCmp());
1248 Xapian::docid did_new
= *postlists
.back();
1249 Xapian::termcount wdf_new
= postlists
.back().get_wdf();
1250 if (++(postlists
.back()) == Xapian::PostingIterator()) {
1251 postlists
.pop_back();
1253 push_heap(postlists
.begin(), postlists
.end(), PlCmp());
1255 if (did_new
!= did
) {
1256 expected_sum
+= wdf
;
1257 expected_sum_squares
+= wdf
* wdf
;
1263 expected_sum
+= wdf
;
1264 expected_sum_squares
+= wdf
* wdf
;
1265 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1266 // the individual terms.
1267 TEST_EQUAL(sum
, expected_sum
);
1268 TEST_REL(sum_squares
, >=, expected_sum_squares
);
1272 /// Check the stats for a repeated term are correct.
1273 // Regression test for bug fixed in 1.4.6. Doesn't work with
1274 // multi as the weight object is cloned more times.
1275 DEFINE_TESTCASE(checkstatsweight4
, backend
&& !remote
&& !multi
) {
1276 Xapian::Database db
= get_database("apitest_simpledata");
1277 Xapian::Enquire
enquire(db
);
1278 Xapian::TermIterator a
;
1279 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
1280 const string
& term
= *a
;
1281 enquire
.set_query(Xapian::Query(term
, 1, 1) |
1282 Xapian::Query(term
, 1, 2));
1283 Xapian::termcount sum
= 0;
1284 Xapian::termcount sum_squares
= 0;
1285 CheckStatsWeight
wt(db
, term
, "=", sum
, sum_squares
);
1286 enquire
.set_weighting_scheme(wt
);
1287 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1289 // The document order in the multi-db case isn't the same as the
1290 // postlist order on the combined DB, so it's hard to compare the
1291 // wdf for each document in the Weight objects, but we can sum
1292 // the wdfs and the squares of the wdfs which provides a decent
1293 // check that we're not getting the wrong wdf values (it ensures
1294 // they have the right mean and standard deviation).
1295 Xapian::termcount expected_sum
= 0;
1296 Xapian::termcount expected_sum_squares
= 0;
1297 Xapian::PostingIterator i
;
1298 for (i
= db
.postlist_begin(term
); i
!= db
.postlist_end(term
); ++i
) {
1299 Xapian::termcount wdf
= i
.get_wdf();
1300 expected_sum
+= wdf
;
1301 expected_sum_squares
+= wdf
* wdf
;
1303 TEST_EQUAL(sum
, expected_sum
);
1304 TEST_EQUAL(sum_squares
, expected_sum_squares
);
1308 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
1309 DEFINE_TESTCASE(unigramlmweight4
, backend
) {
1310 Xapian::Database db
= get_database("apitest_simpledata");
1311 Xapian::Enquire
enquire1(db
);
1312 Xapian::Enquire
enquire2(db
);
1313 enquire1
.set_query(Xapian::Query("paragraph"));
1315 enquire2
.set_query(Xapian::Query("paragraph"));
1317 // 5 documents available with term paragraph so mset size should be 5
1318 enquire1
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 1, 0));
1319 enquire2
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 1, 0));
1320 mset1
= enquire1
.get_mset(0, 10);
1321 mset2
= enquire2
.get_mset(0, 10);
1323 TEST_EQUAL(mset1
.size(), 5);
1324 TEST_EQUAL_DOUBLE(mset1
[1].get_weight(), mset2
[1].get_weight());
1327 /* Test for checking if we don't use smoothing all
1328 * of them should give same result i.e wdf_double/len_double */
1329 DEFINE_TESTCASE(unigramlmweight5
, backend
) {
1330 Xapian::Database db
= get_database("apitest_simpledata");
1331 Xapian::Enquire
enquire1(db
);
1332 Xapian::Enquire
enquire2(db
);
1333 Xapian::Enquire
enquire3(db
);
1334 Xapian::Enquire
enquire4(db
);
1335 enquire1
.set_query(Xapian::Query("paragraph"));
1337 enquire2
.set_query(Xapian::Query("paragraph"));
1339 enquire3
.set_query(Xapian::Query("paragraph"));
1341 enquire4
.set_query(Xapian::Query("paragraph"));
1343 // 5 documents available with term paragraph so mset size should be 5
1344 enquire1
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 0, 0));
1345 enquire2
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0, 0));
1346 enquire3
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING
, 0, 0));
1347 enquire4
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING
, 0, 0));
1349 mset1
= enquire1
.get_mset(0, 10);
1350 mset2
= enquire2
.get_mset(0, 10);
1351 mset3
= enquire3
.get_mset(0, 10);
1352 mset4
= enquire4
.get_mset(0, 10);
1354 TEST_EQUAL(mset1
.size(), 5);
1355 TEST_EQUAL(mset2
.size(), 5);
1356 TEST_EQUAL(mset3
.size(), 5);
1357 TEST_EQUAL(mset4
.size(), 5);
1358 for (size_t i
= 0; i
< 5; ++i
) {
1359 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset4
[i
].get_weight());
1360 TEST_EQUAL_DOUBLE(mset2
[i
].get_weight(), mset4
[i
].get_weight());
1361 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset2
[i
].get_weight());
1362 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset2
[i
].get_weight());
1363 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset4
[i
].get_weight());
1364 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight());
1368 // Test Exception for junk after serialised weight (with Dir+ enabled).
1369 DEFINE_TESTCASE(unigramlmweight6
, !backend
) {
1370 Xapian::LMWeight
wt(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 0.5, 1.0);
1373 Xapian::LMWeight
* d2
= d
.unserialise(wt
.serialise() + "X");
1374 // Make sure we actually use the weight.
1375 bool empty
= d2
->name().empty();
1378 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
1379 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
1380 } catch (const Xapian::SerialisationError
&e
) {
1381 TEST(e
.get_msg().find("LM") != string::npos
);
1385 // Feature test for Dir+ function.
1386 DEFINE_TESTCASE(unigramlmweight7
, backend
) {
1387 Xapian::Database db
= get_database("apitest_simpledata");
1388 Xapian::Enquire
enquire1(db
);
1389 Xapian::Enquire
enquire2(db
);
1390 enquire1
.set_query(Xapian::Query("paragraph"));
1391 enquire2
.set_query(Xapian::Query("paragraph"));
1395 enquire1
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1396 enquire2
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_PLUS_SMOOTHING
, 2000, 0.05));
1398 mset1
= enquire1
.get_mset(0, 10);
1399 mset2
= enquire2
.get_mset(0, 10);
1401 // mset size should be 5
1402 TEST_EQUAL(mset1
.size(), 5);
1403 TEST_EQUAL(mset2
.size(), 5);
1405 // Expect mset weights associated with Dir+ more than mset weights by Dir
1406 // because of the presence of extra weight component in Dir+ function.
1407 TEST_REL(mset2
[0].get_weight(),>,mset1
[0].get_weight());
1408 TEST_REL(mset2
[1].get_weight(),>,mset1
[1].get_weight());
1409 TEST_REL(mset2
[2].get_weight(),>,mset1
[2].get_weight());
1410 TEST_REL(mset2
[3].get_weight(),>,mset1
[3].get_weight());
1411 TEST_REL(mset2
[4].get_weight(),>,mset1
[4].get_weight());
1414 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
1415 DEFINE_TESTCASE(unigramlmweight8
, backend
) {
1416 Xapian::Database db
= get_database("apitest_simpledata");
1417 Xapian::Enquire
enquire(db
);
1418 Xapian::Query
query("paragraph");
1420 enquire
.set_query(query
);
1421 enquire
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1424 mset1
= enquire
.get_mset(0, 10);
1425 TEST_EQUAL(mset1
.size(), 5);
1427 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
1428 enquire
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1431 mset2
= enquire
.get_mset(0, 10);
1432 TEST_EQUAL(mset2
.size(), mset1
.size());
1433 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
1434 for (Xapian::doccount i
= 0; i
< mset1
.size(); ++i
) {
1435 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
1439 // Feature test for BoolWeight.
1440 // Test exception for junk after serialised weight.
1441 DEFINE_TESTCASE(boolweight1
, !backend
) {
1442 Xapian::BoolWeight wt
;
1444 Xapian::BoolWeight t
;
1445 Xapian::BoolWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
1446 // Make sure we actually use the weight.
1447 bool empty
= t2
->name().empty();
1450 FAIL_TEST("Serialised BoolWeight with junk appended unserialised to empty name!");
1451 FAIL_TEST("Serialised BoolWeight with junk appended unserialised OK");
1452 } catch (const Xapian::SerialisationError
&e
) {
1453 TEST(e
.get_msg().find("Bool") != string::npos
);
1457 // Feature test for CoordWeight.
1458 DEFINE_TESTCASE(coordweight1
, backend
) {
1459 Xapian::Enquire
enquire(get_database("apitest_simpledata"));
1460 enquire
.set_weighting_scheme(Xapian::CoordWeight());
1461 static const char * const terms
[] = {
1462 "this", "line", "paragraph", "rubbish"
1464 Xapian::Query
query(Xapian::Query::OP_OR
,
1465 terms
, terms
+ sizeof(terms
) / sizeof(terms
[0]));
1466 enquire
.set_query(query
);
1467 Xapian::MSet mymset1
= enquire
.get_mset(0, 100);
1468 // CoordWeight scores 1 for each matching term, so the weight should equal
1469 // the number of matching terms.
1470 for (Xapian::MSetIterator i
= mymset1
.begin(); i
!= mymset1
.end(); ++i
) {
1471 Xapian::termcount matching_terms
= 0;
1472 Xapian::TermIterator t
= enquire
.get_matching_terms_begin(i
);
1473 while (t
!= enquire
.get_matching_terms_end(i
)) {
1477 TEST_EQUAL(i
.get_weight(), matching_terms
);
1480 // Test with OP_SCALE_WEIGHT.
1481 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
1482 Xapian::MSet mymset2
= enquire
.get_mset(0, 100);
1483 TEST_EQUAL(mymset1
.size(), mymset2
.size());
1484 for (Xapian::doccount i
= 0; i
!= mymset1
.size(); ++i
) {
1485 TEST_EQUAL(15.0 * mymset1
[i
].get_weight(), mymset2
[i
].get_weight());
1489 // Test exception for junk after serialised weight.
1490 DEFINE_TESTCASE(coordweight2
, !backend
) {
1491 Xapian::CoordWeight wt
;
1493 Xapian::CoordWeight t
;
1494 Xapian::CoordWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
1495 // Make sure we actually use the weight.
1496 bool empty
= t2
->name().empty();
1499 FAIL_TEST("Serialised CoordWeight with junk appended unserialised to empty name!");
1500 FAIL_TEST("Serialised CoordWeight with junk appended unserialised OK");
1501 } catch (const Xapian::SerialisationError
&e
) {
1502 TEST(e
.get_msg().find("Coord") != string::npos
);