[ci] Fix clang-santisers job for GHA change
[xapian.git] / xapian-bindings / python3 / pythontest.py
blobd1524531eb5071f434440f73599e02cffb5f49ca
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016,2019 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
75 # Test that str is rejected by sortable_unserialise().
76 try:
77 xapian.sortable_unserialise("unicode")
78 except TypeError as e:
79 expect(str(e), 'expected bytes, str found')
81 return db
83 def test_exception_base():
84 """Check that xapian exceptions have Exception as a base class.
86 """
87 try:
88 raise xapian.InvalidOperationError("Test exception")
89 except Exception as e:
90 pass
92 def test_mset_iter():
93 """Test iterators over MSets.
95 """
96 db = setup_database()
97 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
99 enquire = xapian.Enquire(db)
100 enquire.set_query(query)
101 mset = enquire.get_mset(0, 10)
102 items = [item for item in mset]
103 expect(len(items), 5)
104 expect(len(mset), len(items), "Expected number of items to be length of mset")
106 context("testing returned item from mset")
107 expect(items[2].docid, 4)
108 expect(items[2].rank, 2)
109 expect(items[2].percent, 86)
110 expect(items[2].collapse_key, b'')
111 expect(items[2].collapse_count, 0)
112 expect(items[2].document.get_data(), b'was it warm? three')
114 # Check iterators for sub-msets against the whole mset.
115 for start in range(0, 6):
116 for maxitems in range(0, 6):
117 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
118 submset = enquire.get_mset(start, maxitems)
119 num = 0
120 for item in submset:
121 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
122 expect(item.rank, num + start)
124 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
125 hit = submset.get_hit(num)
126 expect(hit.docid, item.docid)
127 expect(hit.rank, item.rank)
128 expect(hit.percent, item.percent)
129 expect(hit.document.get_data(), item.document.get_data())
130 expect(hit.collapse_key, item.collapse_key)
131 expect(hit.collapse_count, item.collapse_count)
133 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
134 hit = mset.get_hit(num + start)
135 expect(hit.docid, item.docid)
136 expect(hit.rank, item.rank)
137 expect(hit.percent, item.percent)
138 expect(hit.document.get_data(), item.document.get_data())
139 expect(hit.collapse_key, item.collapse_key)
140 expect(hit.collapse_count, item.collapse_count)
142 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
143 expect(submset[num].docid, item.docid)
144 expect(submset[num].rank, item.rank)
145 expect(submset[num].percent, item.percent)
146 expect(submset[num].document.get_data(), item.document.get_data())
147 expect(submset[num].collapse_key, item.collapse_key)
148 expect(submset[num].collapse_count, item.collapse_count)
150 num += 1
152 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
153 # Test out-of-range access to mset:
154 expect_exception(IndexError, 'Mset index out of range',
155 submset.__getitem__, -10)
156 expect_exception(IndexError, 'Mset index out of range',
157 submset.__getitem__, 10)
158 expect_exception(IndexError, 'Mset index out of range',
159 submset.__getitem__, -1-len(submset))
160 expect_exception(IndexError, 'Mset index out of range',
161 submset.__getitem__, len(submset))
163 # Check that the item contents remain valid when the iterator has
164 # moved on.
165 saved_items = [item for item in submset]
166 for num in range(len(saved_items)):
167 item = saved_items[num]
168 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
169 expect(submset[num].docid, item.docid)
170 expect(submset[num].rank, item.rank)
171 expect(submset[num].percent, item.percent)
172 expect(submset[num].document.get_data(), item.document.get_data())
173 expect(submset[num].collapse_key, item.collapse_key)
174 expect(submset[num].collapse_count, item.collapse_count)
176 # Check that the right number of items exist in the mset.
177 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
178 items = [item for item in submset]
179 expect(len(items), min(maxitems, 5 - start))
180 expect(len(submset), min(maxitems, 5 - start))
182 def test_eset_iter():
183 """Test iterators over ESets.
186 db = setup_database()
187 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
188 rset = xapian.RSet()
189 rset.add_document(3)
191 context("getting eset items without a query")
192 enquire = xapian.Enquire(db)
193 eset = enquire.get_eset(10, rset)
194 items = [item for item in eset]
195 expect(len(items), 3)
196 expect(len(items), len(eset))
198 context("getting eset items with a query")
199 enquire = xapian.Enquire(db)
200 enquire.set_query(query)
201 eset = enquire.get_eset(10, rset)
202 items2 = [item for item in eset]
203 expect(len(items2), 2)
204 expect(len(items2), len(eset))
206 context("comparing eset items with a query to those without")
207 expect(items2[0].term, items[0].term)
208 expect(items2[1].term, items[2].term)
210 context("comparing eset weights with a query to those without")
211 expect(items2[0].weight, items[0].weight)
212 expect(items2[1].weight, items[2].weight)
214 def test_matchingterms_iter():
215 """Test Enquire.matching_terms iterator.
218 db = setup_database()
219 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
221 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
223 enquire = xapian.Enquire(db)
224 enquire.set_query(query)
225 mset = enquire.get_mset(0, 10)
227 for item in mset:
228 # Make a list of the term names
229 mterms = [term for term in enquire.matching_terms(item.docid)]
230 mterms2 = [term for term in enquire.matching_terms(item)]
231 expect(mterms, mterms2)
233 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
234 expect(mterms, [b'it', b'two', b'warm', b'was'])
236 def test_queryterms_iter():
237 """Test Query term iterator.
240 db = setup_database()
241 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
243 # Make a list of the term names
244 terms = [term for term in query]
245 expect(terms, [b'it', b'two', b'warm', b'was'])
247 def test_queryparser_stoplist_iter():
248 """Test QueryParser stoplist iterator.
251 stemmer = xapian.Stem('en')
253 # Check behaviour without having set a stoplist.
254 queryparser = xapian.QueryParser()
255 queryparser.set_stemmer(stemmer)
256 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
257 expect([term for term in queryparser.stoplist()], [])
258 query = queryparser.parse_query('to be or not to be is the questions')
259 expect([term for term in queryparser.stoplist()], [])
260 expect(str(query),
261 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
262 'Zis@7 OR Zthe@8 OR Zquestion@9))')
264 # Check behaviour with a stoplist, but no stemmer
265 queryparser = xapian.QueryParser()
266 stopper = xapian.SimpleStopper()
267 stopper.add('to')
268 stopper.add('not')
269 stopper.add('question')
270 queryparser.set_stopper(stopper)
271 expect([term for term in queryparser.stoplist()], [])
272 query = queryparser.parse_query('to be or not to be is the questions')
274 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
275 expect(str(query),
276 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
278 # Check behaviour with a stoplist and a stemmer
279 queryparser.set_stemmer(stemmer)
280 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
281 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to']) # Shouldn't have changed since previous query.
282 query = queryparser.parse_query('to be or not to be is the questions')
284 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
285 expect(str(query),
286 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
288 def test_queryparser_unstem_iter():
289 """Test QueryParser unstemlist iterator.
292 stemmer = xapian.Stem('en')
294 queryparser = xapian.QueryParser()
295 expect([term for term in queryparser.unstemlist('to')], [])
296 expect([term for term in queryparser.unstemlist('question')], [])
297 expect([term for term in queryparser.unstemlist('questions')], [])
298 query = queryparser.parse_query('to question questions')
300 expect([term for term in queryparser.unstemlist('to')], [b'to'])
301 expect([term for term in queryparser.unstemlist('question')], [b'question'])
302 expect([term for term in queryparser.unstemlist('questions')], [b'questions'])
303 expect(str(query),
304 'Query((to@1 OR question@2 OR questions@3))')
307 queryparser = xapian.QueryParser()
308 queryparser.set_stemmer(stemmer)
309 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
310 expect([term for term in queryparser.unstemlist('Zto')], [])
311 expect([term for term in queryparser.unstemlist('Zquestion')], [])
312 expect([term for term in queryparser.unstemlist('Zquestions')], [])
313 query = queryparser.parse_query('to question questions')
315 expect([term for term in queryparser.unstemlist('Zto')], [b'to'])
316 expect([term for term in queryparser.unstemlist('Zquestion')], [b'question', b'questions'])
317 expect([term for term in queryparser.unstemlist('Zquestions')], [])
318 expect(str(query),
319 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
321 def test_allterms_iter():
322 """Test all-terms iterator on Database.
325 db = setup_database()
327 context("making a list of the term names and frequencies")
328 terms = []
329 freqs = []
330 for termitem in db:
331 terms.append(termitem.term)
332 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
333 freqs.append(termitem.termfreq)
334 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
336 context("checking that items are no longer valid once the iterator has moved on")
337 termitems = [termitem for termitem in db]
339 expect(len(termitems), len(terms))
340 for i in range(len(termitems)):
341 expect(termitems[i].term, terms[i])
343 expect(len(termitems), len(freqs))
344 for termitem in termitems:
345 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
347 context("checking that restricting the terms iterated with a prefix works")
348 prefix_terms = []
349 prefix_freqs = []
350 for i in range(len(terms)):
351 if terms[i].startswith(b't'):
352 prefix_terms.append(terms[i])
353 prefix_freqs.append(freqs[i])
354 i = 0
355 for termitem in db.allterms('t'):
356 expect(termitem.term, prefix_terms[i])
357 expect(termitem.termfreq, prefix_freqs[i])
358 i += 1
359 expect(len(prefix_terms), i)
361 def test_termlist_iter():
362 """Test termlist iterator on Database.
365 db = setup_database()
367 # Make lists of the item contents
368 terms = []
369 wdfs = []
370 freqs = []
371 positers = []
372 for termitem in db.termlist(3):
373 terms.append(termitem.term)
374 wdfs.append(termitem.wdf)
375 freqs.append(termitem.termfreq)
376 positers.append([pos for pos in termitem.positer])
378 expect(terms, [b'it', b'two', b'warm', b'was'])
379 expect(wdfs, [1, 2, 1, 1])
380 expect(freqs, [5, 3, 4, 4])
381 expect(positers, [[2], [], [3], [1]])
383 # Test skip_to().
384 tliter = db.termlist(3)
386 # skip to an item before the first item.
387 termitem = tliter.skip_to('a')
388 expect((termitem.term, termitem.wdf, termitem.termfreq,
389 [pos for pos in termitem.positer]), (b'it', 1, 5, [2]))
391 # skip forwards to an item.
392 termitem = tliter.skip_to('two')
393 expect((termitem.term, termitem.wdf, termitem.termfreq,
394 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
396 # skip to same place (should return same item)
397 termitem = tliter.skip_to('two')
398 expect((termitem.term, termitem.wdf, termitem.termfreq,
399 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
401 # next() after a skip_to(), should return next item.
402 termitem = next(tliter)
403 expect((termitem.term, termitem.wdf, termitem.termfreq,
404 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
406 # skip to same place (should return same item)
407 termitem = tliter.skip_to('warm')
408 expect((termitem.term, termitem.wdf, termitem.termfreq,
409 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
411 # skip backwards (should return same item)
412 termitem = tliter.skip_to('a')
414 # skip to after end.
415 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
416 # skip backwards (should still return StopIteration).
417 expect_exception(StopIteration, '', tliter.skip_to, 'a')
418 # next should continue to return StopIteration.
419 expect_exception(StopIteration, '', next, tliter)
422 # Make a list of the terms (so we can test if they're still valid
423 # once the iterator has moved on).
424 termitems = [termitem for termitem in db.termlist(3)]
426 expect(len(termitems), len(terms))
427 for i in range(len(termitems)):
428 expect(termitems[i].term, terms[i])
430 expect(len(termitems), len(wdfs))
431 for i in range(len(termitems)):
432 expect(termitems[i].wdf, wdfs[i])
434 expect(len(termitems), len(freqs))
435 for termitem in termitems:
436 expect_exception(xapian.InvalidOperationError,
437 'Iterator has moved, and does not support random access',
438 getattr, termitem, 'termfreq')
440 expect(len(termitems), len(freqs))
441 for termitem in termitems:
442 expect_exception(xapian.InvalidOperationError,
443 'Iterator has moved, and does not support random access',
444 getattr, termitem, 'positer')
446 def test_dbdocument_iter():
447 """Test document terms iterator for document taken from a database.
450 db = setup_database()
452 doc = db.get_document(3)
454 # Make lists of the item contents
455 terms = []
456 wdfs = []
457 freqs = []
458 positers = []
459 for termitem in doc:
460 terms.append(termitem.term)
461 wdfs.append(termitem.wdf)
462 freqs.append(termitem.termfreq)
463 positers.append([pos for pos in termitem.positer])
465 expect(terms, [b'it', b'two', b'warm', b'was'])
466 expect(wdfs, [1, 2, 1, 1])
467 expect(freqs, [5, 3, 4, 4])
468 expect(positers, [[2], [], [3], [1]])
470 # Make a list of the terms (so we can test if they're still valid
471 # once the iterator has moved on).
472 termitems = [termitem for termitem in doc]
474 expect(len(termitems), len(terms))
475 for i in range(len(termitems)):
476 expect(termitems[i].term, terms[i])
478 expect(len(termitems), len(wdfs))
479 for i in range(len(termitems)):
480 expect(termitems[i].wdf, wdfs[i])
482 expect(len(termitems), len(freqs))
483 for termitem in termitems:
484 expect_exception(xapian.InvalidOperationError,
485 'Iterator has moved, and does not support random access',
486 getattr, termitem, 'termfreq')
488 expect(len(termitems), len(freqs))
489 for termitem in termitems:
490 expect_exception(xapian.InvalidOperationError,
491 'Iterator has moved, and does not support random access',
492 getattr, termitem, 'positer')
494 def test_newdocument_iter():
495 """Test document terms iterator for newly created document.
498 doc = xapian.Document()
499 doc.set_data("was it warm? two")
500 doc.add_posting("was", 1)
501 doc.add_posting("it", 2)
502 doc.add_posting("warm", 3)
503 doc.add_term("two", 2)
505 # Make lists of the item contents
506 terms = []
507 wdfs = []
508 positers = []
509 for termitem in doc:
510 terms.append(termitem.term)
511 wdfs.append(termitem.wdf)
512 expect_exception(xapian.InvalidOperationError,
513 "get_termfreq() not valid for a TermIterator from a "
514 "Document which is not associated with a database",
515 getattr, termitem, 'termfreq')
516 positers.append([pos for pos in termitem.positer])
518 expect(terms, [b'it', b'two', b'warm', b'was'])
519 expect(wdfs, [1, 2, 1, 1])
520 expect(positers, [[2], [], [3], [1]])
522 # Make a list of the terms (so we can test if they're still valid
523 # once the iterator has moved on).
524 termitems = [termitem for termitem in doc]
526 expect(len(termitems), len(terms))
527 for i in range(len(termitems)):
528 expect(termitems[i].term, terms[i])
530 expect(len(termitems), len(wdfs))
531 for i in range(len(termitems)):
532 expect(termitems[i].wdf, wdfs[i])
534 for termitem in termitems:
535 expect_exception(xapian.InvalidOperationError,
536 'Iterator has moved, and does not support random access',
537 getattr, termitem, 'termfreq')
539 expect(len(termitems), len(positers))
540 for termitem in termitems:
541 expect_exception(xapian.InvalidOperationError,
542 'Iterator has moved, and does not support random access',
543 getattr, termitem, 'positer')
545 def test_postinglist_iter():
546 """Test postinglist iterator on Database.
549 db = setup_database()
551 # Make lists of the item contents
552 docids = []
553 doclengths = []
554 wdfs = []
555 positers = []
556 for posting in db.postlist('it'):
557 docids.append(posting.docid)
558 doclengths.append(posting.doclength)
559 wdfs.append(posting.wdf)
560 positers.append([pos for pos in posting.positer])
562 expect(docids, [1, 2, 3, 4, 5])
563 expect(doclengths, [3, 3, 5, 8, 19])
564 expect(wdfs, [1, 1, 1, 1, 8])
565 expect(positers, [[1], [2], [2], [2], [2, 7]])
567 # Test skip_to().
568 pliter = db.postlist('it')
570 # skip to an item before the first item.
571 posting = pliter.skip_to(0)
572 expect((posting.docid, posting.doclength, posting.wdf,
573 [pos for pos in posting.positer]), (1, 3, 1, [1]))
575 # skip forwards to an item.
576 posting = pliter.skip_to(3)
577 expect((posting.docid, posting.doclength, posting.wdf,
578 [pos for pos in posting.positer]), (3, 5, 1, [2]))
580 # skip to same place (should return same item)
581 posting = pliter.skip_to(3)
582 expect((posting.docid, posting.doclength, posting.wdf,
583 [pos for pos in posting.positer]), (3, 5, 1, [2]))
585 # next() after a skip_to(), should return next item.
586 posting = next(pliter)
587 expect((posting.docid, posting.doclength, posting.wdf,
588 [pos for pos in posting.positer]), (4, 8, 1, [2]))
590 # skip to same place (should return same item)
591 posting = pliter.skip_to(4)
592 expect((posting.docid, posting.doclength, posting.wdf,
593 [pos for pos in posting.positer]), (4, 8, 1, [2]))
595 # skip backwards (should return same item)
596 posting = pliter.skip_to(2)
597 expect((posting.docid, posting.doclength, posting.wdf,
598 [pos for pos in posting.positer]), (4, 8, 1, [2]))
600 # skip to after end.
601 expect_exception(StopIteration, '', pliter.skip_to, 6)
602 # skip backwards (should still return StopIteration).
603 expect_exception(StopIteration, '', pliter.skip_to, 6)
604 # next should continue to return StopIteration.
605 expect_exception(StopIteration, '', next, pliter)
608 # Make a list of the postings (so we can test if they're still valid once
609 # the iterator has moved on).
610 postings = [posting for posting in db.postlist('it')]
612 expect(len(postings), len(docids))
613 for i in range(len(postings)):
614 expect(postings[i].docid, docids[i])
616 expect(len(postings), len(doclengths))
617 for i in range(len(postings)):
618 expect(postings[i].doclength, doclengths[i])
620 expect(len(postings), len(wdfs))
621 for i in range(len(postings)):
622 expect(postings[i].wdf, wdfs[i])
624 expect(len(postings), len(positers))
625 for posting in postings:
626 expect_exception(xapian.InvalidOperationError,
627 'Iterator has moved, and does not support random access',
628 getattr, posting, 'positer')
630 def test_valuestream_iter():
631 """Test a valuestream iterator on Database.
634 db = setup_database()
636 # Check basic iteration
637 expect([(item.docid, item.value) for item in db.valuestream(0)],
638 [(3, b'\xa4'), (4, b'\xa2'), (5, b'\xa4')])
639 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
640 expect([(item.docid, item.value) for item in db.valuestream(5)],
641 [(5, b"five")])
642 expect([(item.docid, item.value) for item in db.valuestream(9)],
643 [(5, b"nine")])
645 # Test skip_to() on iterator with no values, and behaviours when called
646 # after already returning StopIteration.
647 i = db.valuestream(1)
648 expect_exception(StopIteration, "", i.skip_to, 1)
649 expect_exception(StopIteration, "", i.skip_to, 1)
650 i = db.valuestream(1)
651 expect_exception(StopIteration, "", i.skip_to, 1)
652 expect_exception(StopIteration, "", i.__next__)
653 i = db.valuestream(1)
654 expect_exception(StopIteration, "", i.__next__)
655 expect_exception(StopIteration, "", i.skip_to, 1)
657 # Test that skipping to a value works, and that skipping doesn't have to
658 # advance.
659 i = db.valuestream(0)
660 item = i.skip_to(4)
661 expect((item.docid, item.value), (4, b'\xa2'))
662 item = i.skip_to(4)
663 expect((item.docid, item.value), (4, b'\xa2'))
664 item = i.skip_to(1)
665 expect((item.docid, item.value), (4, b'\xa2'))
666 item = i.skip_to(5)
667 expect((item.docid, item.value), (5, b'\xa4'))
668 expect_exception(StopIteration, "", i.skip_to, 6)
670 # Test that alternating skip_to() and next() works.
671 i = db.valuestream(0)
672 item = next(i)
673 expect((item.docid, item.value), (3, b'\xa4'))
674 item = i.skip_to(4)
675 expect((item.docid, item.value), (4, b'\xa2'))
676 item = next(i)
677 expect((item.docid, item.value), (5, b'\xa4'))
678 expect_exception(StopIteration, "", i.skip_to, 6)
680 # Test that next works correctly after skip_to() called with an earlier
681 # item.
682 i = db.valuestream(0)
683 item = i.skip_to(4)
684 expect((item.docid, item.value), (4, b'\xa2'))
685 item = i.skip_to(1)
686 expect((item.docid, item.value), (4, b'\xa2'))
687 item = next(i)
688 expect((item.docid, item.value), (5, b'\xa4'))
690 # Test that next works correctly after skipping to last item
691 i = db.valuestream(0)
692 item = i.skip_to(5)
693 expect((item.docid, item.value), (5, b'\xa4'))
694 expect_exception(StopIteration, "", i.__next__)
696 def test_position_iter():
697 """Test position iterator for a document in a database.
700 db = setup_database()
702 doc = db.get_document(5)
704 # Make lists of the item contents
705 positions = [position for position in db.positionlist(5, 'it')]
707 expect(positions, [2, 7])
709 def test_value_iter():
710 """Test iterators over list of values in a document.
713 db = setup_database()
714 doc = db.get_document(5)
716 items = list(doc.values())
717 expect(len(items), 3)
718 expect(items[0].num, 0)
719 expect(items[0].value, xapian.sortable_serialise(2))
720 expect(items[1].num, 5)
721 expect(items[1].value, b'five')
722 expect(items[2].num, 9)
723 expect(items[2].value, b'nine')
725 def test_synonyms_iter():
726 """Test iterators over list of synonyms in a database.
729 dbpath = 'db_test_synonyms_iter'
730 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
732 db.add_synonym('hello', 'hi')
733 db.add_synonym('hello', 'howdy')
735 expect([item for item in db.synonyms('foo')], [])
736 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
737 expect([item for item in db.synonym_keys()], [b'hello'])
738 expect([item for item in db.synonym_keys('foo')], [])
739 expect([item for item in db.synonym_keys('he')], [b'hello'])
740 expect([item for item in db.synonym_keys('hello')], [b'hello'])
742 dbr=xapian.Database(dbpath)
743 expect([item for item in dbr.synonyms('foo')], [])
744 expect([item for item in dbr.synonyms('hello')], [])
745 expect([item for item in dbr.synonym_keys()], [])
746 expect([item for item in dbr.synonym_keys('foo')], [])
747 expect([item for item in dbr.synonym_keys('he')], [])
748 expect([item for item in dbr.synonym_keys('hello')], [])
750 db.commit()
752 expect([item for item in db.synonyms('foo')], [])
753 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
754 expect([item for item in db.synonym_keys()], [b'hello'])
755 expect([item for item in db.synonym_keys('foo')], [])
756 expect([item for item in db.synonym_keys('he')], [b'hello'])
757 expect([item for item in db.synonym_keys('hello')], [b'hello'])
759 dbr=xapian.Database(dbpath)
760 expect([item for item in dbr.synonyms('foo')] , [])
761 expect([item for item in dbr.synonyms('hello')], [b'hi', b'howdy'])
762 expect([item for item in dbr.synonym_keys()], [b'hello'])
763 expect([item for item in dbr.synonym_keys('foo')], [])
764 expect([item for item in dbr.synonym_keys('he')], [b'hello'])
765 expect([item for item in dbr.synonym_keys('hello')], [b'hello'])
767 db.close()
768 expect(xapian.Database.check(dbpath), 0)
769 dbr.close()
770 shutil.rmtree(dbpath)
772 def test_metadata_keys_iter():
773 """Test iterators over list of metadata keys in a database.
776 dbpath = 'db_test_metadata_iter'
777 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
779 db.set_metadata('author', 'richard')
780 db.set_metadata('item1', 'hello')
781 db.set_metadata('item1', 'hi')
782 db.set_metadata('item2', 'howdy')
783 db.set_metadata('item3', '')
784 db.set_metadata('item4', 'goodbye')
785 db.set_metadata('item4', '')
786 db.set_metadata('type', 'greeting')
788 expect([item for item in db.metadata_keys()],
789 [b'author', b'item1', b'item2', b'type'])
790 expect([item for item in db.metadata_keys('foo')], [])
791 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
792 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
793 expect([item for item in db.metadata_keys('type')], [b'type'])
795 dbr=xapian.Database(dbpath)
796 expect([item for item in dbr.metadata_keys()], [])
797 expect([item for item in dbr.metadata_keys('foo')], [])
798 expect([item for item in dbr.metadata_keys('item')], [])
799 expect([item for item in dbr.metadata_keys('it')], [])
800 expect([item for item in dbr.metadata_keys('type')], [])
802 db.commit()
803 expect([item for item in db.metadata_keys()],
804 [b'author', b'item1', b'item2', b'type'])
805 expect([item for item in db.metadata_keys('foo')], [])
806 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
807 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
808 expect([item for item in db.metadata_keys('type')], [b'type'])
810 dbr=xapian.Database(dbpath)
811 expect([item for item in dbr.metadata_keys()],
812 [b'author', b'item1', b'item2', b'type'])
813 expect([item for item in dbr.metadata_keys('foo')], [])
814 expect([item for item in dbr.metadata_keys('item')], [b'item1', b'item2'])
815 expect([item for item in dbr.metadata_keys('it')], [b'item1', b'item2'])
816 expect([item for item in dbr.metadata_keys('type')], [b'type'])
818 db.close()
819 expect(xapian.Database.check(dbpath), 0)
820 dbr.close()
821 shutil.rmtree(dbpath)
823 def test_spell():
824 """Test basic spelling correction features.
827 dbpath = 'db_test_spell'
828 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
830 db.add_spelling('hello')
831 db.add_spelling('mell', 2)
832 expect(db.get_spelling_suggestion('hell'), b'mell')
833 expect([(item.term, item.termfreq) for item in db.spellings()], [(b'hello', 1), (b'mell', 2)])
834 dbr=xapian.Database(dbpath)
835 expect(dbr.get_spelling_suggestion('hell'), b'')
836 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
837 db.commit()
838 dbr=xapian.Database(dbpath)
839 expect(db.get_spelling_suggestion('hell'), b'mell')
840 expect(dbr.get_spelling_suggestion('hell'), b'mell')
841 expect([(item.term, item.termfreq) for item in dbr.spellings()], [(b'hello', 1), (b'mell', 2)])
843 db.close()
844 expect(xapian.Database.check(dbpath), 0)
845 dbr.close()
846 shutil.rmtree(dbpath)
848 def test_queryparser_custom_rp():
849 """Test QueryParser with a custom (in python) RangeProcessor.
852 class MyRP(xapian.RangeProcessor):
853 def __init__(self):
854 xapian.RangeProcessor.__init__(self)
856 def __call__(self, begin, end):
857 begin = "A" + begin.decode('utf-8')
858 end = "B" + end.decode('utf-8')
859 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
861 queryparser = xapian.QueryParser()
862 myrp = MyRP()
864 queryparser.add_rangeprocessor(myrp)
865 query = queryparser.parse_query('5..8')
867 expect(str(query),
868 'Query(VALUE_RANGE 7 A5 B8)')
870 def test_queryparser_custom_rp_deallocation():
871 """Test that QueryParser doesn't delete RangeProcessors too soon.
874 class MyRP(xapian.RangeProcessor):
875 def __init__(self):
876 xapian.RangeProcessor.__init__(self)
878 def __call__(self, begin, end):
879 begin = "A" + begin.decode('utf-8')
880 end = "B" + end.decode('utf-8')
881 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
883 def make_parser():
884 queryparser = xapian.QueryParser()
885 myrp = MyRP()
886 queryparser.add_rangeprocessor(myrp)
887 return queryparser
889 queryparser = make_parser()
890 query = queryparser.parse_query('5..8')
892 expect(str(query),
893 'Query(VALUE_RANGE 7 A5 B8)')
895 def test_scale_weight():
896 """Test query OP_SCALE_WEIGHT feature.
899 db = setup_database()
900 for mult in (0, 1, 2.5):
901 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
902 mult)
903 query1 = xapian.Query("it")
904 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
906 enquire = xapian.Enquire(db)
907 enquire.set_query(query1)
908 mset1 = enquire.get_mset(0, 10)
909 enquire.set_query(query2)
910 mset2 = enquire.get_mset(0, 10)
911 if mult <= 0:
912 expected = [(0, item.docid) for item in mset1]
913 expected.sort()
914 else:
915 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
916 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
918 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
919 query1 = xapian.Query("it")
920 expect_exception(xapian.InvalidArgumentError,
921 "OP_SCALE_WEIGHT requires factor >= 0",
922 xapian.Query,
923 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
926 def test_weight_normalise():
927 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
929 This test first runs a search (asking for no results) to get the maximum
930 possible weight for a query, and then checks that the results of
931 MSet.get_max_possible() match this.
933 This tests that the get_max_possible() value is correct (though it isn't
934 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
935 compensate correctly.
938 db = setup_database()
939 for query in (
940 "it",
941 "was",
942 "it was",
943 "it was four",
944 "it was four five",
945 "\"was it warm\" four notpresent",
946 "notpresent",
948 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
949 qp = xapian.QueryParser()
950 query1 = qp.parse_query(query)
951 enquire = xapian.Enquire(db)
952 enquire.set_query(query1)
953 mset1 = enquire.get_mset(0, 0)
955 # Check the max_attained value is 0 - this gives us some reassurance
956 # that the match didn't actually do the work of calculating any
957 # results.
958 expect(mset1.get_max_attained(), 0)
960 max_possible = mset1.get_max_possible()
961 if query == "notpresent":
962 expect(max_possible, 0)
963 continue
964 mult = 1.0 / max_possible
965 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
967 enquire = xapian.Enquire(db)
968 enquire.set_query(query2)
969 mset2 = enquire.get_mset(0, 10)
970 # max_possible should be 1 (excluding rounding errors) for mset2
971 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
972 for item in mset2:
973 expect(item.weight > 0, True)
974 expect(item.weight <= 1, True)
977 def test_valuesetmatchdecider():
978 """Simple tests of the ValueSetMatchDecider class
981 md = xapian.ValueSetMatchDecider(0, True)
982 doc = xapian.Document()
983 expect(md(doc), False)
985 md.add_value('foo')
986 doc.add_value(0, 'foo')
987 expect(md(doc), True)
989 md.remove_value('foo')
990 expect(md(doc), False)
992 md = xapian.ValueSetMatchDecider(0, False)
993 expect(md(doc), True)
995 md.add_value('foo')
996 expect(md(doc), False)
999 def test_postingsource():
1000 """Simple test of the PostingSource class.
1003 class OddPostingSource(xapian.PostingSource):
1004 def __init__(self, max):
1005 xapian.PostingSource.__init__(self)
1006 self.max = max
1008 def init(self, db):
1009 self.current = -1
1010 self.weight = db.get_doccount() + 1
1011 self.set_maxweight(self.weight)
1013 def get_termfreq_min(self): return 0
1014 def get_termfreq_est(self): return int(self.max / 2)
1015 def get_termfreq_max(self): return self.max
1016 def __next__(self, minweight):
1017 self.current += 2
1018 self.weight -= 1.0
1019 self.set_maxweight(self.weight)
1020 def at_end(self): return self.current > self.max
1021 def get_docid(self): return self.current
1022 def get_weight(self): return self.weight
1024 dbpath = 'db_test_postingsource'
1025 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1026 for id in range(10):
1027 doc = xapian.Document()
1028 db.add_document(doc)
1030 # Do a dance to check that the posting source doesn't get dereferenced too
1031 # soon in various cases.
1032 def mkenq(db):
1033 # First - check that it's kept when the source goes out of scope.
1034 def mkquery():
1035 source = OddPostingSource(10)
1036 # The posting source is inside a list to check that case is
1037 # correctly handled.
1038 return xapian.Query(xapian.Query.OP_OR,
1039 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1041 # Check that it's kept when the query goes out of scope.
1042 def submkenq():
1043 query = mkquery()
1044 enquire = xapian.Enquire(db)
1045 enquire.set_query(query)
1046 return enquire
1048 # Check it's kept when the query is retrieved from enquire and put into
1049 # a new enquire.
1050 def submkenq2():
1051 enq1 = submkenq()
1052 enquire = xapian.Enquire(db)
1053 enquire.set_query(enq1.get_query())
1054 return enquire
1056 return submkenq2()
1058 enquire = mkenq(db)
1059 mset = enquire.get_mset(0, 10)
1061 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1062 expect(mset[0].weight, db.get_doccount())
1064 db.close()
1065 expect(xapian.Database.check(dbpath), 0)
1066 shutil.rmtree(dbpath)
1068 def test_postingsource2():
1069 """Simple test of the PostingSource class.
1072 dbpath = 'db_test_postingsource2'
1073 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1074 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1075 for id in range(10):
1076 doc = xapian.Document()
1077 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1078 db.add_document(doc)
1080 source = xapian.ValueWeightPostingSource(1)
1081 query = xapian.Query(source)
1082 del source # Check that query keeps a reference to it.
1084 enquire = xapian.Enquire(db)
1085 enquire.set_query(query)
1086 mset = enquire.get_mset(0, 10)
1088 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1090 db.close()
1091 shutil.rmtree(dbpath)
1093 def test_postingsource3():
1094 """Test that ValuePostingSource can be usefully subclassed.
1097 dbpath = 'db_test_postingsource3'
1098 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1099 vals = (1, 3, 2, 4)
1100 for wt in vals:
1101 doc = xapian.Document()
1102 doc.add_value(1, xapian.sortable_serialise(wt))
1103 db.add_document(doc)
1105 class PyValuePostingSource(xapian.ValuePostingSource):
1106 def __init__(self, slot):
1107 xapian.ValuePostingSource.__init__(self, slot)
1109 def init(self, db):
1110 xapian.ValuePostingSource.init(self, db)
1111 self.current = -1
1112 slot = self.get_slot()
1113 ub = db.get_value_upper_bound(slot)
1114 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1116 def next(self, minweight):
1117 return xapian.ValuePostingSource.next(self, minweight)
1118 def get_weight(self):
1119 value = self.get_value()
1120 return xapian.sortable_unserialise(value) ** 3
1122 source = PyValuePostingSource(1)
1123 query = xapian.Query(source)
1124 #del source # Check that query keeps a reference to it.
1126 enquire = xapian.Enquire(db)
1127 enquire.set_query(query)
1128 mset = enquire.get_mset(0, 10)
1130 expect([item.docid for item in mset], [4, 2, 3, 1])
1132 db.close()
1133 expect(xapian.Database.check(dbpath), 0)
1134 shutil.rmtree(dbpath)
1136 def test_value_stats():
1137 """Simple test of being able to get value statistics.
1140 dbpath = 'db_test_value_stats'
1141 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1143 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1144 for id in range(10):
1145 doc = xapian.Document()
1146 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1147 db.add_document(doc)
1149 expect(db.get_value_freq(0), 0)
1150 expect(db.get_value_lower_bound(0), b"")
1151 expect(db.get_value_upper_bound(0), b"")
1152 expect(db.get_value_freq(1), 10)
1153 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1154 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1155 expect(db.get_value_freq(2), 0)
1156 expect(db.get_value_lower_bound(2), b"")
1157 expect(db.get_value_upper_bound(2), b"")
1159 db.close()
1160 expect(xapian.Database.check(dbpath), 0)
1161 shutil.rmtree(dbpath)
1163 def test_get_uuid():
1164 """Test getting UUIDs from databases.
1167 dbpath = 'db_test_get_uuid'
1168 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1169 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1170 dbr1 = xapian.Database(dbpath + "1")
1171 dbr2 = xapian.Database(dbpath + "2")
1172 expect(db1.get_uuid() != db2.get_uuid(), True)
1173 expect(db1.get_uuid(), dbr1.get_uuid())
1174 expect(db2.get_uuid(), dbr2.get_uuid())
1176 db = xapian.Database()
1177 db.add_database(db1)
1178 expect(db1.get_uuid(), db.get_uuid())
1180 db1.close()
1181 db2.close()
1182 dbr1.close()
1183 dbr2.close()
1184 db.close()
1185 shutil.rmtree(dbpath + "1")
1186 shutil.rmtree(dbpath + "2")
1188 def test_director_exception():
1189 """Test handling of an exception raised in a director.
1192 db = setup_database()
1193 query = xapian.Query('it')
1194 enq = xapian.Enquire(db)
1195 enq.set_query(query)
1196 class TestException(Exception):
1197 def __init__(self, a, b):
1198 Exception.__init__(self, a + b)
1200 rset = xapian.RSet()
1201 rset.add_document(1)
1202 class EDecider(xapian.ExpandDecider):
1203 def __call__(self, term):
1204 raise TestException("foo", "bar")
1205 edecider = EDecider()
1206 expect_exception(TestException, "foobar", edecider, "foo")
1207 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1209 class MDecider(xapian.MatchDecider):
1210 def __call__(self, doc):
1211 raise TestException("foo", "bar")
1212 mdecider = MDecider()
1213 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1214 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1216 def check_vals(db, vals):
1217 """Check that the values in slot 1 are as in vals.
1220 for docid in range(1, db.get_lastdocid() + 1):
1221 val = db.get_document(docid).get_value(1)
1222 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1224 def test_value_mods():
1225 """Test handling of modifications to values.
1228 dbpath = 'db_test_value_mods'
1229 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1230 random.seed(42)
1231 doccount = 1000
1232 vals = {}
1234 # Add a value to all the documents
1235 for num in range(1, doccount):
1236 doc=xapian.Document()
1237 val = ('val%d' % num).encode('utf-8')
1238 doc.add_value(1, val)
1239 db.add_document(doc)
1240 vals[num] = val
1241 db.commit()
1242 check_vals(db, vals)
1244 # Modify one of the values (this is a regression test which failed with the
1245 # initial implementation of streaming values).
1246 doc = xapian.Document()
1247 val = b'newval0'
1248 doc.add_value(1, val)
1249 db.replace_document(2, doc)
1250 vals[2] = val
1251 db.commit()
1252 check_vals(db, vals)
1254 # Do some random modifications.
1255 for count in range(1, doccount * 2):
1256 docid = random.randint(1, doccount)
1257 doc = xapian.Document()
1259 if count % 5 == 0:
1260 val = b''
1261 else:
1262 val = ('newval%d' % count).encode('utf-8')
1263 doc.add_value(1, val)
1264 db.replace_document(docid, doc)
1265 vals[docid] = val
1267 # Check the values before and after modification.
1268 check_vals(db, vals)
1269 db.commit()
1270 check_vals(db, vals)
1272 # Delete all the values which are non-empty, in a random order.
1273 keys = [key for key, val in vals.items() if val != '']
1274 random.shuffle(keys)
1275 for key in keys:
1276 doc = xapian.Document()
1277 db.replace_document(key, doc)
1278 vals[key] = b''
1279 check_vals(db, vals)
1280 db.commit()
1281 check_vals(db, vals)
1283 db.close()
1284 expect_exception(xapian.DatabaseClosedError, "Database has been closed", check_vals, db, vals)
1285 shutil.rmtree(dbpath)
1287 def test_serialise_document():
1288 """Test serialisation of documents.
1291 doc = xapian.Document()
1292 doc.add_term('foo', 2)
1293 doc.add_value(1, b'bar')
1294 doc.set_data('baz')
1295 s = doc.serialise()
1296 doc2 = xapian.Document.unserialise(s)
1297 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1298 expect(len(list(doc.termlist())), 1)
1299 expect([(item.term, item.wdf) for item in doc.termlist()],
1300 [(item.term, item.wdf) for item in doc2.termlist()])
1301 expect([(item.num, item.value) for item in list(doc.values())],
1302 [(item.num, item.value) for item in list(doc2.values())])
1303 expect(doc.get_data(), doc2.get_data())
1304 expect(doc.get_data(), b'baz')
1306 db = setup_database()
1307 doc = db.get_document(1)
1308 s = doc.serialise()
1309 doc2 = xapian.Document.unserialise(s)
1310 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1311 expect(len(list(doc.termlist())), 3)
1312 expect([(item.term, item.wdf) for item in doc.termlist()],
1313 [(item.term, item.wdf) for item in doc2.termlist()])
1314 expect([(item.num, item.value) for item in list(doc.values())],
1315 [(item.num, item.value) for item in list(doc2.values())])
1316 expect(doc.get_data(), doc2.get_data())
1317 expect(doc.get_data(), b'is it cold?')
1319 def test_serialise_query():
1320 """Test serialisation of queries.
1323 q = xapian.Query()
1324 q2 = xapian.Query.unserialise(q.serialise())
1325 expect(str(q), str(q2))
1326 expect(str(q), 'Query()')
1328 q = xapian.Query('hello')
1329 q2 = xapian.Query.unserialise(q.serialise())
1330 expect(str(q), str(q2))
1331 expect(str(q), 'Query(hello)')
1333 q = xapian.Query(xapian.Query.OP_OR, ('hello', b'world'))
1334 q2 = xapian.Query.unserialise(q.serialise())
1335 expect(str(q), str(q2))
1336 expect(str(q), 'Query((hello OR world))')
1338 def test_preserve_query_parser_stopper():
1339 """Test preservation of stopper set on query parser.
1342 def make_qp():
1343 queryparser = xapian.QueryParser()
1344 stopper = xapian.SimpleStopper()
1345 stopper.add('to')
1346 stopper.add('not')
1347 queryparser.set_stopper(stopper)
1348 del stopper
1349 return queryparser
1350 queryparser = make_qp()
1351 query = queryparser.parse_query('to be')
1352 expect([term for term in queryparser.stoplist()], [b'to'])
1354 def test_preserve_term_generator_stopper():
1355 """Test preservation of stopper set on term generator.
1358 def make_tg():
1359 termgen = xapian.TermGenerator()
1360 termgen.set_stemmer(xapian.Stem('en'))
1361 stopper = xapian.SimpleStopper()
1362 stopper.add('to')
1363 stopper.add('not')
1364 termgen.set_stopper(stopper)
1365 del stopper
1366 return termgen
1367 termgen = make_tg()
1369 termgen.index_text('to be')
1370 doc = termgen.get_document()
1371 terms = [term.term for term in doc.termlist()]
1372 terms.sort()
1373 expect(terms, [b'Zbe', b'be', b'to'])
1375 def test_preserve_enquire_sorter():
1376 """Test preservation of sorter set on enquire.
1379 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1380 doc = xapian.Document()
1381 doc.add_term('foo')
1382 doc.add_value(1, '1')
1383 db.add_document(doc)
1384 db.add_document(doc)
1386 def make_enq1(db):
1387 enq = xapian.Enquire(db)
1388 sorter = xapian.MultiValueKeyMaker()
1389 enq.set_sort_by_key(sorter, False)
1390 del sorter
1391 return enq
1392 enq = make_enq1(db)
1393 enq.set_query(xapian.Query('foo'))
1394 enq.get_mset(0, 10)
1396 def make_enq2(db):
1397 enq = xapian.Enquire(db)
1398 sorter = xapian.MultiValueKeyMaker()
1399 enq.set_sort_by_key_then_relevance(sorter, False)
1400 del sorter
1401 return enq
1402 enq = make_enq2(db)
1403 enq.set_query(xapian.Query('foo'))
1404 enq.get_mset(0, 10)
1406 def make_enq3(db):
1407 enq = xapian.Enquire(db)
1408 sorter = xapian.MultiValueKeyMaker()
1409 enq.set_sort_by_relevance_then_key(sorter, False)
1410 del sorter
1411 return enq
1412 enq = make_enq3(db)
1413 enq.set_query(xapian.Query('foo'))
1414 enq.get_mset(0, 10)
1416 def test_matchspy():
1417 """Test use of matchspies.
1420 db = setup_database()
1421 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1422 enq = xapian.Enquire(db)
1423 enq.set_query(query)
1425 def set_matchspy_deref(enq):
1426 """Set a matchspy, and then drop the reference, to check that it
1427 doesn't get deleted too soon.
1429 spy = xapian.ValueCountMatchSpy(0)
1430 enq.add_matchspy(spy)
1431 del spy
1432 set_matchspy_deref(enq)
1433 mset = enq.get_mset(0, 10)
1434 expect(len(mset), 5)
1436 spy = xapian.ValueCountMatchSpy(0)
1437 enq.add_matchspy(spy)
1438 # Regression test for clear_matchspies() - used to always raise an
1439 # exception due to a copy and paste error in its definition.
1440 enq.clear_matchspies()
1441 mset = enq.get_mset(0, 10)
1442 expect([item for item in list(spy.values())], [])
1444 enq.add_matchspy(spy)
1445 mset = enq.get_mset(0, 10)
1446 expect(spy.get_total(), 5)
1447 expect([(item.term, item.termfreq) for item in list(spy.values())], [
1448 (xapian.sortable_serialise(1.5), 1),
1449 (xapian.sortable_serialise(2), 2),
1451 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1452 (xapian.sortable_serialise(2), 2),
1453 (xapian.sortable_serialise(1.5), 1),
1456 def test_import_star():
1457 """Test that "from xapian import *" works.
1459 This is a regression test - this failed in the 1.2.0 release.
1460 It's not normally good style to use it, but it should work anyway!
1463 import test_xapian_star
1465 def test_latlongcoords_iter():
1466 """Test LatLongCoordsIterator wrapping.
1469 coords = xapian.LatLongCoords()
1470 expect([c for c in coords], [])
1471 coords.append(xapian.LatLongCoord(0, 0))
1472 coords.append(xapian.LatLongCoord(0, 1))
1473 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1474 'Xapian::LatLongCoord(0, 1)'])
1477 def test_compactor():
1478 """Test that xapian.Compactor works.
1481 tmpdir = tempfile.mkdtemp()
1482 db1 = db2 = db3 = None
1483 try:
1484 db1path = os.path.join(tmpdir, 'db1')
1485 db2path = os.path.join(tmpdir, 'db2')
1486 db3path = os.path.join(tmpdir, 'db3')
1488 # Set up a couple of sample input databases
1489 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1490 doc1 = xapian.Document()
1491 doc1.add_term('Hello')
1492 doc1.add_term('Hello1')
1493 doc1.add_value(0, 'Val1')
1494 db1.set_metadata('key', '1')
1495 db1.set_metadata('key1', '1')
1496 db1.add_document(doc1)
1497 db1.commit()
1499 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1500 doc2 = xapian.Document()
1501 doc2.add_term('Hello')
1502 doc2.add_term('Hello2')
1503 doc2.add_value(0, 'Val2')
1504 db2.set_metadata('key', '2')
1505 db2.set_metadata('key2', '2')
1506 db2.add_document(doc2)
1507 db2.commit()
1509 db_to_compact = xapian.Database()
1510 db_to_compact.add_database(xapian.Database(db1path))
1511 db_to_compact.add_database(xapian.Database(db2path))
1512 # Compact with the default compactor
1513 # Metadata conflicts are resolved by picking the first value
1514 db_to_compact.compact(db3path)
1516 db3 = xapian.Database(db3path)
1517 expect([(item.term, item.termfreq) for item in db3.allterms()],
1518 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1519 expect(db3.get_document(1).get_value(0), b'Val1')
1520 expect(db3.get_document(2).get_value(0), b'Val2')
1521 expect(db3.get_metadata('key'), b'1')
1522 expect(db3.get_metadata('key1'), b'1')
1523 expect(db3.get_metadata('key2'), b'2')
1525 context("testing a custom compactor which merges duplicate metadata")
1526 class MyCompactor(xapian.Compactor):
1527 def __init__(self):
1528 xapian.Compactor.__init__(self)
1529 self.log = []
1531 def set_status(self, table, status):
1532 if len(status) == 0:
1533 self.log.append('Starting %s' % table.decode('utf-8'))
1534 else:
1535 self.log.append('%s: %s' % (table.decode('utf-8'), status.decode('utf-8')))
1537 def resolve_duplicate_metadata(self, key, vals):
1538 return b','.join(vals)
1540 c = MyCompactor()
1541 db_to_compact = xapian.Database()
1542 db_to_compact.add_database(xapian.Database(db1path))
1543 db_to_compact.add_database(xapian.Database(db2path))
1544 db_to_compact.compact(db3path, 0, 0, c)
1545 log = '\n'.join(c.log)
1546 # Check we got some messages in the log
1547 expect('Starting postlist' in log, True)
1549 db3 = xapian.Database(db3path)
1550 expect([(item.term, item.termfreq) for item in db3.allterms()],
1551 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1552 expect(db3.get_metadata('key'), b'1,2')
1553 expect(db3.get_metadata('key1'), b'1')
1554 expect(db3.get_metadata('key2'), b'2')
1556 finally:
1557 if db1 is not None:
1558 db1.close()
1559 if db2 is not None:
1560 db2.close()
1561 if db3 is not None:
1562 db3.close()
1564 shutil.rmtree(tmpdir)
1566 def test_custom_matchspy():
1567 class MSpy(xapian.MatchSpy):
1568 def __init__(self):
1569 xapian.MatchSpy.__init__(self)
1570 self.count = 0
1572 def __call__(self, doc, weight):
1573 self.count += 1
1575 mspy = MSpy()
1577 db = setup_database()
1578 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1580 enquire = xapian.Enquire(db)
1581 enquire.add_matchspy(mspy)
1582 enquire.set_query(query)
1583 mset = enquire.get_mset(0, 1)
1584 expect(len(mset), 1)
1585 expect(mspy.count >= 1, True)
1587 expect(db.get_doccount(), 5)
1589 def test_removed_features():
1590 ok = True
1591 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1592 doc = xapian.Document()
1593 enq = xapian.Enquire(db)
1594 eset = xapian.ESet()
1595 mset = xapian.MSet()
1596 query = xapian.Query()
1597 qp = xapian.QueryParser()
1598 titer = xapian._TermIterator()
1599 postiter = xapian._PostingIterator()
1601 def check_missing(obj, attr):
1602 expect_exception(AttributeError, None, getattr, obj, attr)
1604 check_missing(xapian, 'Stem_get_available_languages')
1605 check_missing(xapian, 'TermIterator')
1606 check_missing(xapian, 'PositionIterator')
1607 check_missing(xapian, 'PostingIterator')
1608 check_missing(xapian, 'ValueIterator')
1609 check_missing(xapian, 'MSetIterator')
1610 check_missing(xapian, 'ESetIterator')
1611 check_missing(db, 'allterms_begin')
1612 check_missing(db, 'allterms_end')
1613 check_missing(db, 'metadata_keys_begin')
1614 check_missing(db, 'metadata_keys_end')
1615 check_missing(db, 'synonym_keys_begin')
1616 check_missing(db, 'synonym_keys_end')
1617 check_missing(db, 'synonyms_begin')
1618 check_missing(db, 'synonyms_end')
1619 check_missing(db, 'spellings_begin')
1620 check_missing(db, 'spellings_end')
1621 check_missing(db, 'positionlist_begin')
1622 check_missing(db, 'positionlist_end')
1623 check_missing(db, 'postlist_begin')
1624 check_missing(db, 'postlist_end')
1625 check_missing(db, 'termlist_begin')
1626 check_missing(db, 'termlist_end')
1627 check_missing(doc, 'termlist_begin')
1628 check_missing(doc, 'termlist_end')
1629 check_missing(doc, 'values_begin')
1630 check_missing(doc, 'values_end')
1631 check_missing(enq, 'get_matching_terms_begin')
1632 check_missing(enq, 'get_matching_terms_end')
1633 check_missing(eset, 'begin')
1634 check_missing(eset, 'end')
1635 check_missing(mset, 'begin')
1636 check_missing(mset, 'end')
1637 check_missing(postiter, 'positionlist_begin')
1638 check_missing(postiter, 'positionlist_end')
1639 check_missing(query, 'get_terms_begin')
1640 check_missing(query, 'get_terms_end')
1641 check_missing(qp, 'stoplist_begin')
1642 check_missing(qp, 'stoplist_end')
1643 check_missing(qp, 'unstem_begin')
1644 check_missing(qp, 'unstem_end')
1645 check_missing(titer, 'positionlist_begin')
1646 check_missing(titer, 'positionlist_end')
1648 def test_repr():
1649 # repr() returned None in 1.4.0.
1650 expect(repr(xapian.Query('foo')) is None, False)
1651 expect(repr(xapian.AssertionError('foo')) is None, False)
1652 expect(repr(xapian.InvalidArgumentError('foo')) is None, False)
1653 expect(repr(xapian.InvalidOperationError('foo')) is None, False)
1654 expect(repr(xapian.UnimplementedError('foo')) is None, False)
1655 expect(repr(xapian.DatabaseError('foo')) is None, False)
1656 expect(repr(xapian.DatabaseClosedError('foo')) is None, False)
1657 expect(repr(xapian.DatabaseCorruptError('foo')) is None, False)
1658 expect(repr(xapian.DatabaseCreateError('foo')) is None, False)
1659 expect(repr(xapian.DatabaseLockError('foo')) is None, False)
1660 expect(repr(xapian.DatabaseModifiedError('foo')) is None, False)
1661 expect(repr(xapian.DatabaseOpeningError('foo')) is None, False)
1662 expect(repr(xapian.DatabaseVersionError('foo')) is None, False)
1663 expect(repr(xapian.DatabaseNotFoundError('foo')) is None, False)
1664 expect(repr(xapian.DocNotFoundError('foo')) is None, False)
1665 expect(repr(xapian.FeatureUnavailableError('foo')) is None, False)
1666 expect(repr(xapian.InternalError('foo')) is None, False)
1667 expect(repr(xapian.NetworkError('foo')) is None, False)
1668 expect(repr(xapian.NetworkTimeoutError('foo')) is None, False)
1669 expect(repr(xapian.QueryParserError('foo')) is None, False)
1670 expect(repr(xapian.SerialisationError('foo')) is None, False)
1671 expect(repr(xapian.RangeError('foo')) is None, False)
1672 expect(repr(xapian.WildcardError('foo')) is None, False)
1673 expect(repr(xapian.Document()) is None, False)
1674 expect(repr(xapian.Registry()) is None, False)
1675 expect(repr(xapian.Query()) is None, False)
1676 expect(repr(xapian.Stem('en')) is None, False)
1677 expect(repr(xapian.TermGenerator()) is None, False)
1678 expect(repr(xapian.MSet()) is None, False)
1679 expect(repr(xapian.ESet()) is None, False)
1680 expect(repr(xapian.RSet()) is None, False)
1681 expect(repr(xapian.MultiValueKeyMaker()) is None, False)
1682 expect(repr(xapian.SimpleStopper()) is None, False)
1683 expect(repr(xapian.RangeProcessor()) is None, False)
1684 expect(repr(xapian.DateRangeProcessor(1)) is None, False)
1685 expect(repr(xapian.NumberRangeProcessor(1)) is None, False)
1686 expect(repr(xapian.QueryParser()) is None, False)
1687 expect(repr(xapian.BoolWeight()) is None, False)
1688 expect(repr(xapian.TfIdfWeight()) is None, False)
1689 expect(repr(xapian.BM25Weight()) is None, False)
1690 expect(repr(xapian.BM25PlusWeight()) is None, False)
1691 expect(repr(xapian.TradWeight()) is None, False)
1692 expect(repr(xapian.InL2Weight()) is None, False)
1693 expect(repr(xapian.IfB2Weight()) is None, False)
1694 expect(repr(xapian.IneB2Weight()) is None, False)
1695 expect(repr(xapian.BB2Weight()) is None, False)
1696 expect(repr(xapian.DLHWeight()) is None, False)
1697 expect(repr(xapian.PL2Weight()) is None, False)
1698 expect(repr(xapian.PL2PlusWeight()) is None, False)
1699 expect(repr(xapian.DPHWeight()) is None, False)
1700 expect(repr(xapian.LM2StageWeight()) is None, False)
1701 expect(repr(xapian.LMAbsDiscountWeight()) is None, False)
1702 expect(repr(xapian.LMDirichletWeight()) is None, False)
1703 expect(repr(xapian.LMJMWeight()) is None, False)
1704 expect(repr(xapian.CoordWeight()) is None, False)
1705 expect(repr(xapian.Compactor()) is None, False)
1706 expect(repr(xapian.ValuePostingSource(1)) is None, False)
1707 expect(repr(xapian.ValueWeightPostingSource(1)) is None, False)
1708 expect(repr(xapian.DecreasingValueWeightPostingSource(1)) is None, False)
1709 expect(repr(xapian.ValueMapPostingSource(1)) is None, False)
1710 expect(repr(xapian.FixedWeightPostingSource(1)) is None, False)
1711 expect(repr(xapian.ValueCountMatchSpy(1)) is None, False)
1712 expect(repr(xapian.LatLongCoord()) is None, False)
1713 expect(repr(xapian.LatLongCoords()) is None, False)
1714 expect(repr(xapian.GreatCircleMetric()) is None, False)
1715 expect(repr(xapian.Database()) is None, False)
1716 expect(repr(xapian.WritableDatabase()) is None, False)
1718 def test_lone_surrogate():
1719 # Test that a lone surrogate in input data raises UnicodeEncodeError.
1720 # Regression test for bug fixed in 1.4.12 (previous versions quietly
1721 # skipped the lone surrogate when converting to UTF-8).
1722 noop_stemmer = xapian.Stem("none")
1723 try:
1724 term = noop_stemmer(u"a\udead0")
1725 raise TestFail("Lone surrogate accepted (output as %s)" % term)
1726 except UnicodeEncodeError:
1727 pass
1729 result = True
1731 # Run all tests (ie, callables with names starting "test_").
1732 def run():
1733 global result
1734 if not runtests(globals(), sys.argv[1:]):
1735 result = False
1737 print("Running tests without threads")
1738 run()
1740 if have_threads:
1741 print("Running tests with threads")
1743 # This testcase seems to just block when run in a thread under Python 3
1744 # on some plaforms. It fails with 3.2.3 on Debian wheezy, but passes
1745 # with the exact same package version on Debian unstable not long after
1746 # the jessie release. The issue it's actually serving to regression
1747 # test for is covered by running it without threads, so just disable it
1748 # rather than risk test failures that don't seem to indicate a problem
1749 # in Xapian.
1750 del test_import_star
1752 t = threading.Thread(name='test runner', target=run)
1753 t.start()
1754 # Block until the thread has completed so the thread gets a chance to exit
1755 # with error status.
1756 t.join()
1758 if not result:
1759 sys.exit(1)
1761 # vim:syntax=python:set expandtab: