2 * @brief tests of MatchSpy usage
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5 * Copyright 2009,2011,2012,2015,2019 Olly Betts
6 * Copyright 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 #include "api_matchspy.h"
34 #include "backendmanager.h"
36 #include "testsuite.h"
37 #include "testutils.h"
42 // #######################################################################
45 class SimpleMatchSpy
: public Xapian::MatchSpy
{
47 // Vector which will be filled with all the document contents seen.
48 std::vector
<std::string
> seen
;
50 void operator()(const Xapian::Document
&doc
, double) {
51 // Note that this is not recommended usage of get_data() - you
52 // generally shouldn't call get_data() from inside a MatchSpy, because
53 // it is (likely to be) a slow operation resulting in considerable IO.
54 seen
.push_back(doc
.get_data());
58 // Basic test of a matchspy.
59 DEFINE_TESTCASE(matchspy1
, backend
&& !remote
) {
60 Xapian::Database
db(get_database("apitest_simpledata"));
61 Xapian::Enquire
enquire(db
);
62 enquire
.set_query(Xapian::Query("this"));
66 Xapian::MSet nospymset
= enquire
.get_mset(0, 100);
67 enquire
.add_matchspy(&myspy
);
68 Xapian::MSet spymset
= enquire
.get_mset(0, 100);
70 // Check that the match estimates aren't affected by the matchspy.
71 TEST_EQUAL(nospymset
, spymset
);
73 vector
<bool> docid_checked(db
.get_lastdocid());
75 // Check that we get the expected number of matches, and that the stored
76 // document contents are right.
77 Xapian::MSetIterator i
= spymset
.begin();
78 TEST(i
!= spymset
.end());
79 TEST_EQUAL(spymset
.size(), 6);
80 TEST_EQUAL(myspy
.seen
.size(), spymset
.size());
82 std::sort(myspy
.seen
.begin(), myspy
.seen
.end());
84 std::vector
<std::string
> seen2
;
85 for ( ; i
!= spymset
.end(); ++i
) {
86 const Xapian::Document
doc(i
.get_document());
87 seen2
.push_back(doc
.get_data());
89 std::sort(seen2
.begin(), seen2
.end());
91 TEST_EQUAL(myspy
.seen
.size(), seen2
.size());
92 std::vector
<std::string
>::const_iterator j
= myspy
.seen
.begin();
93 std::vector
<std::string
>::const_iterator j2
= seen2
.begin();
94 for (; j
!= myspy
.seen
.end(); ++j
, ++j2
) {
99 static string
values_to_repr(const Xapian::ValueCountMatchSpy
& spy
) {
100 string
resultrepr("|");
101 for (Xapian::TermIterator i
= spy
.values_begin();
102 i
!= spy
.values_end();
106 resultrepr
+= str(i
.get_termfreq());
113 make_matchspy2_db(Xapian::WritableDatabase
&db
, const string
&)
115 for (int c
= 1; c
<= 25; ++c
) {
116 Xapian::Document doc
;
117 doc
.set_data("Document " + str(c
));
119 for (int factor
= 1; factor
<= c
; ++factor
) {
121 if (c
% factor
== 0) {
122 doc
.add_term("XFACT" + str(factor
));
127 // Number of factors.
128 doc
.add_value(0, str(factors
));
130 doc
.add_value(1, str(c
% 10));
132 doc
.add_value(2, "fish");
134 doc
.add_value(3, str(str(c
).size()));
136 db
.add_document(doc
);
140 DEFINE_TESTCASE(matchspy2
, generated
)
142 Xapian::Database db
= get_database("matchspy2", make_matchspy2_db
);
144 Xapian::ValueCountMatchSpy
spy0(0);
145 Xapian::ValueCountMatchSpy
spy1(1);
146 Xapian::ValueCountMatchSpy
spy3(3);
148 Xapian::Enquire
enq(db
);
150 enq
.set_query(Xapian::Query("all"));
151 if (startswith(get_dbtype(), "multi")) {
152 // Without this, we short-cut on the second shard because we don't get
153 // the documents in ascending weight order.
154 enq
.set_weighting_scheme(Xapian::CoordWeight());
157 enq
.add_matchspy(&spy0
);
158 enq
.add_matchspy(&spy1
);
159 enq
.add_matchspy(&spy3
);
160 Xapian::MSet mset
= enq
.get_mset(0, 10);
162 TEST_EQUAL(spy0
.get_total(), 25);
163 TEST_EQUAL(spy1
.get_total(), 25);
164 TEST_EQUAL(spy3
.get_total(), 25);
166 static const char * const results
[] = {
167 "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
168 "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
171 TEST_STRINGS_EQUAL(values_to_repr(spy0
), results
[0]);
172 TEST_STRINGS_EQUAL(values_to_repr(spy1
), results
[1]);
173 TEST_STRINGS_EQUAL(values_to_repr(spy3
), results
[2]);
176 DEFINE_TESTCASE(matchspy4
, generated
)
178 XFAIL_FOR_BACKEND("multi_remote",
179 "Matchspy counts hits on remote and locally");
180 XFAIL_FOR_BACKEND("multi_glass_remote",
181 "Matchspy counts hits on remote and locally");
183 Xapian::Database db
= get_database("matchspy2", make_matchspy2_db
);
185 // We're going to run the match twice - once sorted by relevance, and once
186 // sorted by a value. This is a regression test - the matcher used to fail
187 // to show some documents to the spy when sorting by non-pure-relevance.
188 Xapian::ValueCountMatchSpy
spya0(0);
189 Xapian::ValueCountMatchSpy
spya1(1);
190 Xapian::ValueCountMatchSpy
spya3(3);
191 Xapian::ValueCountMatchSpy
spyb0(0);
192 Xapian::ValueCountMatchSpy
spyb1(1);
193 Xapian::ValueCountMatchSpy
spyb3(3);
195 Xapian::Enquire
enqa(db
);
196 Xapian::Enquire
enqb(db
);
198 enqa
.set_query(Xapian::Query("all"));
199 if (startswith(get_dbtype(), "multi")) {
200 // Without this, we short-cut on the second shard because we don't get
201 // the documents in ascending weight order.
202 enqa
.set_weighting_scheme(Xapian::CoordWeight());
204 enqb
.set_query(Xapian::Query("all"));
206 enqa
.add_matchspy(&spya0
);
207 enqa
.add_matchspy(&spya1
);
208 enqa
.add_matchspy(&spya3
);
209 enqb
.add_matchspy(&spyb0
);
210 enqb
.add_matchspy(&spyb1
);
211 enqb
.add_matchspy(&spyb3
);
213 Xapian::MSet mseta
= enqa
.get_mset(0, 10);
214 enqb
.set_sort_by_value(0, false);
215 Xapian::MSet msetb
= enqb
.get_mset(0, 10, 100);
217 TEST_EQUAL(spya0
.get_total(), 25);
218 TEST_EQUAL(spya1
.get_total(), 25);
219 TEST_EQUAL(spya3
.get_total(), 25);
220 TEST_EQUAL(spyb0
.get_total(), 25);
221 TEST_EQUAL(spyb1
.get_total(), 25);
222 TEST_EQUAL(spyb3
.get_total(), 25);
224 static const char * const results
[] = {
225 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
226 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
229 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
230 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
235 std::vector
<Xapian::ValueCountMatchSpy
*> spies
;
236 spies
.push_back(&spya0
);
237 spies
.push_back(&spya1
);
238 spies
.push_back(NULL
);
239 spies
.push_back(&spya3
);
240 spies
.push_back(&spyb0
);
241 spies
.push_back(&spyb1
);
242 spies
.push_back(NULL
);
243 spies
.push_back(&spyb3
);
244 for (Xapian::valueno v
= 0; results
[v
]; ++v
) {
245 tout
<< "value " << v
<< endl
;
246 Xapian::ValueCountMatchSpy
* spy
= spies
[v
];
247 string
allvals_str("|");
249 size_t allvals_size
= 0;
250 for (Xapian::TermIterator i
= spy
->top_values_begin(100);
251 i
!= spy
->top_values_end(100);
252 ++i
, ++allvals_size
) {
255 allvals_str
+= str(i
.get_termfreq());
258 tout
<< allvals_str
<< endl
;
259 TEST_STRINGS_EQUAL(allvals_str
, results
[v
]);
261 for (size_t count
= 0; count
< allvals_size
; ++count
) {
262 tout
<< "count " << count
<< endl
;
263 for (Xapian::TermIterator i
= spy
->top_values_begin(100),
264 j
= spy
->top_values_begin(count
);
265 i
!= spy
->top_values_end(100) &&
266 j
!= spy
->top_values_end(count
);
268 tout
<< "j " << j
<< endl
;
270 TEST_EQUAL(i
.get_termfreq(), j
.get_termfreq());
277 // Test builtin match spies
278 DEFINE_TESTCASE(matchspy5
, backend
)
280 Xapian::Database
db(get_database("apitest_simpledata"));
281 Xapian::Enquire
enquire(db
);
282 enquire
.set_query(Xapian::Query("this"));
284 Xapian::ValueCountMatchSpy
myspy1(1);
285 Xapian::ValueCountMatchSpy
myspy2(1);
287 enquire
.add_matchspy(&myspy1
);
288 enquire
.add_matchspy(&myspy2
);
289 Xapian::MSet mymset
= enquire
.get_mset(0, 100);
290 TEST_EQUAL(mymset
.size(), 6);
292 Xapian::TermIterator i
= myspy1
.values_begin();
293 TEST(i
!= myspy1
.values_end());
295 TEST_EQUAL(i
.get_termfreq(), 5);
297 TEST(i
!= myspy1
.values_end());
299 TEST_EQUAL(i
.get_termfreq(), 1);
301 TEST(i
== myspy1
.values_end());
303 i
= myspy2
.values_begin();
304 TEST(i
!= myspy2
.values_end());
306 TEST_EQUAL(i
.get_termfreq(), 5);
308 TEST(i
!= myspy2
.values_end());
310 TEST_EQUAL(i
.get_termfreq(), 1);
312 TEST(i
== myspy2
.values_end());
315 class MySpy
: public Xapian::MatchSpy
{
316 void operator()(const Xapian::Document
&, double) {
320 // Test exceptions from matchspy base class, and get_description method.
321 DEFINE_TESTCASE(matchspy6
, !backend
)
325 TEST_EXCEPTION(Xapian::UnimplementedError
, spy
.clone());
326 TEST_EXCEPTION(Xapian::UnimplementedError
, spy
.name());
327 TEST_EXCEPTION(Xapian::UnimplementedError
, spy
.serialise());
328 TEST_EXCEPTION(Xapian::UnimplementedError
,
329 spy
.unserialise(std::string(), Xapian::Registry()));
330 TEST_EXCEPTION(Xapian::UnimplementedError
, spy
.serialise_results());
331 TEST_EXCEPTION(Xapian::UnimplementedError
,
332 spy
.merge_results(std::string()));
333 TEST_EQUAL(spy
.get_description(), "Xapian::MatchSpy()");
336 /// Regression test for bug fixed in 1.4.12.
337 DEFINE_TESTCASE(matchspy7
, !backend
)
339 Xapian::ValueCountMatchSpy
myspy(1);
340 string s
= myspy
.serialise_results();
342 // This merge_results() call used to enter an infinite loop.
343 TEST_EXCEPTION(Xapian::NetworkError
, myspy
.merge_results(s
));