1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016,2019 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
35 from testsuite
import *
38 """Set up and return an inmemory database with 5 documents.
41 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
43 doc
= xapian
.Document()
44 doc
.set_data("is it cold?")
46 doc
.add_posting("it", 1)
47 doc
.add_posting("cold", 2)
50 doc
= xapian
.Document()
51 doc
.set_data("was it warm?")
52 doc
.add_posting("was", 1)
53 doc
.add_posting("it", 2)
54 doc
.add_posting("warm", 3)
56 doc
.set_data("was it warm? two")
57 doc
.add_term("two", 2)
58 doc
.add_value(0, xapian
.sortable_serialise(2))
60 doc
.set_data("was it warm? three")
61 doc
.add_term("three", 3)
62 doc
.add_value(0, xapian
.sortable_serialise(1.5))
64 doc
.set_data("was it warm? four it")
65 doc
.add_term("four", 4)
67 doc
.add_posting("it", 7)
68 doc
.add_value(5, 'five')
69 doc
.add_value(9, 'nine')
70 doc
.add_value(0, xapian
.sortable_serialise(2))
73 expect(db
.get_doccount(), 5)
75 # Test that str is rejected by sortable_unserialise().
77 xapian
.sortable_unserialise("unicode")
78 except TypeError as e
:
79 expect(str(e
), 'expected bytes, str found')
83 def test_exception_base():
84 """Check that xapian exceptions have Exception as a base class.
88 raise xapian
.InvalidOperationError("Test exception")
89 except Exception as e
:
93 """Test iterators over MSets.
97 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
99 enquire
= xapian
.Enquire(db
)
100 enquire
.set_query(query
)
101 mset
= enquire
.get_mset(0, 10)
102 items
= [item
for item
in mset
]
103 expect(len(items
), 5)
104 expect(len(mset
), len(items
), "Expected number of items to be length of mset")
106 context("testing returned item from mset")
107 expect(items
[2].docid
, 4)
108 expect(items
[2].rank
, 2)
109 expect(items
[2].percent
, 86)
110 expect(items
[2].collapse_key
, b
'')
111 expect(items
[2].collapse_count
, 0)
112 expect(items
[2].document
.get_data(), b
'was it warm? three')
114 # Check iterators for sub-msets against the whole mset.
115 for start
in range(0, 6):
116 for maxitems
in range(0, 6):
117 context("checking iterators for sub-mset from %d, maxitems %d" % (start
, maxitems
))
118 submset
= enquire
.get_mset(start
, maxitems
)
121 context("testing hit %d for sub-mset from %d, maxitems %d" % (num
, start
, maxitems
))
122 expect(item
.rank
, num
+ start
)
124 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num
, start
, maxitems
))
125 hit
= submset
.get_hit(num
)
126 expect(hit
.docid
, item
.docid
)
127 expect(hit
.rank
, item
.rank
)
128 expect(hit
.percent
, item
.percent
)
129 expect(hit
.document
.get_data(), item
.document
.get_data())
130 expect(hit
.collapse_key
, item
.collapse_key
)
131 expect(hit
.collapse_count
, item
.collapse_count
)
133 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num
, start
, maxitems
))
134 hit
= mset
.get_hit(num
+ start
)
135 expect(hit
.docid
, item
.docid
)
136 expect(hit
.rank
, item
.rank
)
137 expect(hit
.percent
, item
.percent
)
138 expect(hit
.document
.get_data(), item
.document
.get_data())
139 expect(hit
.collapse_key
, item
.collapse_key
)
140 expect(hit
.collapse_count
, item
.collapse_count
)
142 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num
, start
, maxitems
))
143 expect(submset
[num
].docid
, item
.docid
)
144 expect(submset
[num
].rank
, item
.rank
)
145 expect(submset
[num
].percent
, item
.percent
)
146 expect(submset
[num
].document
.get_data(), item
.document
.get_data())
147 expect(submset
[num
].collapse_key
, item
.collapse_key
)
148 expect(submset
[num
].collapse_count
, item
.collapse_count
)
152 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start
, maxitems
))
153 # Test out-of-range access to mset:
154 expect_exception(IndexError, 'Mset index out of range',
155 submset
.__getitem
__, -10)
156 expect_exception(IndexError, 'Mset index out of range',
157 submset
.__getitem
__, 10)
158 expect_exception(IndexError, 'Mset index out of range',
159 submset
.__getitem
__, -1-len(submset
))
160 expect_exception(IndexError, 'Mset index out of range',
161 submset
.__getitem
__, len(submset
))
163 # Check that the item contents remain valid when the iterator has
165 saved_items
= [item
for item
in submset
]
166 for num
in range(len(saved_items
)):
167 item
= saved_items
[num
]
168 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num
, start
, maxitems
))
169 expect(submset
[num
].docid
, item
.docid
)
170 expect(submset
[num
].rank
, item
.rank
)
171 expect(submset
[num
].percent
, item
.percent
)
172 expect(submset
[num
].document
.get_data(), item
.document
.get_data())
173 expect(submset
[num
].collapse_key
, item
.collapse_key
)
174 expect(submset
[num
].collapse_count
, item
.collapse_count
)
176 # Check that the right number of items exist in the mset.
177 context("checking length of sub-mset from %d, maxitems %d" % (start
, maxitems
))
178 items
= [item
for item
in submset
]
179 expect(len(items
), min(maxitems
, 5 - start
))
180 expect(len(submset
), min(maxitems
, 5 - start
))
182 def test_eset_iter():
183 """Test iterators over ESets.
186 db
= setup_database()
187 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
191 context("getting eset items without a query")
192 enquire
= xapian
.Enquire(db
)
193 eset
= enquire
.get_eset(10, rset
)
194 items
= [item
for item
in eset
]
195 expect(len(items
), 3)
196 expect(len(items
), len(eset
))
198 context("getting eset items with a query")
199 enquire
= xapian
.Enquire(db
)
200 enquire
.set_query(query
)
201 eset
= enquire
.get_eset(10, rset
)
202 items2
= [item
for item
in eset
]
203 expect(len(items2
), 2)
204 expect(len(items2
), len(eset
))
206 context("comparing eset items with a query to those without")
207 expect(items2
[0].term
, items
[0].term
)
208 expect(items2
[1].term
, items
[2].term
)
210 context("comparing eset weights with a query to those without")
211 expect(items2
[0].weight
, items
[0].weight
)
212 expect(items2
[1].weight
, items
[2].weight
)
214 def test_matchingterms_iter():
215 """Test Enquire.matching_terms iterator.
218 db
= setup_database()
219 query
= xapian
.Query(xapian
.Query
.OP_OR
, ("was", "it", "warm", "two"))
221 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
223 enquire
= xapian
.Enquire(db
)
224 enquire
.set_query(query
)
225 mset
= enquire
.get_mset(0, 10)
228 # Make a list of the term names
229 mterms
= [term
for term
in enquire
.matching_terms(item
.docid
)]
230 mterms2
= [term
for term
in enquire
.matching_terms(item
)]
231 expect(mterms
, mterms2
)
233 mterms
= [term
for term
in enquire
.matching_terms(mset
.get_hit(0))]
234 expect(mterms
, [b
'it', b
'two', b
'warm', b
'was'])
236 def test_queryterms_iter():
237 """Test Query term iterator.
240 db
= setup_database()
241 query
= xapian
.Query(xapian
.Query
.OP_OR
, ("was", "it", "warm", "two"))
243 # Make a list of the term names
244 terms
= [term
for term
in query
]
245 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
247 def test_queryparser_stoplist_iter():
248 """Test QueryParser stoplist iterator.
251 stemmer
= xapian
.Stem('en')
253 # Check behaviour without having set a stoplist.
254 queryparser
= xapian
.QueryParser()
255 queryparser
.set_stemmer(stemmer
)
256 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
257 expect([term
for term
in queryparser
.stoplist()], [])
258 query
= queryparser
.parse_query('to be or not to be is the questions')
259 expect([term
for term
in queryparser
.stoplist()], [])
261 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
262 'Zis@7 OR Zthe@8 OR Zquestion@9))')
264 # Check behaviour with a stoplist, but no stemmer
265 queryparser
= xapian
.QueryParser()
266 stopper
= xapian
.SimpleStopper()
269 stopper
.add('question')
270 queryparser
.set_stopper(stopper
)
271 expect([term
for term
in queryparser
.stoplist()], [])
272 query
= queryparser
.parse_query('to be or not to be is the questions')
274 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to'])
276 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
278 # Check behaviour with a stoplist and a stemmer
279 queryparser
.set_stemmer(stemmer
)
280 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
281 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to']) # Shouldn't have changed since previous query.
282 query
= queryparser
.parse_query('to be or not to be is the questions')
284 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to'])
286 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
288 def test_queryparser_unstem_iter():
289 """Test QueryParser unstemlist iterator.
292 stemmer
= xapian
.Stem('en')
294 queryparser
= xapian
.QueryParser()
295 expect([term
for term
in queryparser
.unstemlist('to')], [])
296 expect([term
for term
in queryparser
.unstemlist('question')], [])
297 expect([term
for term
in queryparser
.unstemlist('questions')], [])
298 query
= queryparser
.parse_query('to question questions')
300 expect([term
for term
in queryparser
.unstemlist('to')], [b
'to'])
301 expect([term
for term
in queryparser
.unstemlist('question')], [b
'question'])
302 expect([term
for term
in queryparser
.unstemlist('questions')], [b
'questions'])
304 'Query((to@1 OR question@2 OR questions@3))')
307 queryparser
= xapian
.QueryParser()
308 queryparser
.set_stemmer(stemmer
)
309 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
310 expect([term
for term
in queryparser
.unstemlist('Zto')], [])
311 expect([term
for term
in queryparser
.unstemlist('Zquestion')], [])
312 expect([term
for term
in queryparser
.unstemlist('Zquestions')], [])
313 query
= queryparser
.parse_query('to question questions')
315 expect([term
for term
in queryparser
.unstemlist('Zto')], [b
'to'])
316 expect([term
for term
in queryparser
.unstemlist('Zquestion')], [b
'question', b
'questions'])
317 expect([term
for term
in queryparser
.unstemlist('Zquestions')], [])
319 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
321 def test_allterms_iter():
322 """Test all-terms iterator on Database.
325 db
= setup_database()
327 context("making a list of the term names and frequencies")
331 terms
.append(termitem
.term
)
332 expect_exception(xapian
.InvalidOperationError
, 'Iterator does not support wdfs', getattr, termitem
, 'wdf')
333 freqs
.append(termitem
.termfreq
)
334 expect_exception(xapian
.InvalidOperationError
, 'Iterator does not support position lists', getattr, termitem
, 'positer')
336 context("checking that items are no longer valid once the iterator has moved on")
337 termitems
= [termitem
for termitem
in db
]
339 expect(len(termitems
), len(terms
))
340 for i
in range(len(termitems
)):
341 expect(termitems
[i
].term
, terms
[i
])
343 expect(len(termitems
), len(freqs
))
344 for termitem
in termitems
:
345 expect_exception(xapian
.InvalidOperationError
, 'Iterator has moved, and does not support random access', getattr, termitem
, 'termfreq')
347 context("checking that restricting the terms iterated with a prefix works")
350 for i
in range(len(terms
)):
351 if terms
[i
].startswith(b
't'):
352 prefix_terms
.append(terms
[i
])
353 prefix_freqs
.append(freqs
[i
])
355 for termitem
in db
.allterms('t'):
356 expect(termitem
.term
, prefix_terms
[i
])
357 expect(termitem
.termfreq
, prefix_freqs
[i
])
359 expect(len(prefix_terms
), i
)
361 def test_termlist_iter():
362 """Test termlist iterator on Database.
365 db
= setup_database()
367 # Make lists of the item contents
372 for termitem
in db
.termlist(3):
373 terms
.append(termitem
.term
)
374 wdfs
.append(termitem
.wdf
)
375 freqs
.append(termitem
.termfreq
)
376 positers
.append([pos
for pos
in termitem
.positer
])
378 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
379 expect(wdfs
, [1, 2, 1, 1])
380 expect(freqs
, [5, 3, 4, 4])
381 expect(positers
, [[2], [], [3], [1]])
384 tliter
= db
.termlist(3)
386 # skip to an item before the first item.
387 termitem
= tliter
.skip_to('a')
388 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
389 [pos
for pos
in termitem
.positer
]), (b
'it', 1, 5, [2]))
391 # skip forwards to an item.
392 termitem
= tliter
.skip_to('two')
393 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
394 [pos
for pos
in termitem
.positer
]), (b
'two', 2, 3, []))
396 # skip to same place (should return same item)
397 termitem
= tliter
.skip_to('two')
398 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
399 [pos
for pos
in termitem
.positer
]), (b
'two', 2, 3, []))
401 # next() after a skip_to(), should return next item.
402 termitem
= next(tliter
)
403 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
404 [pos
for pos
in termitem
.positer
]), (b
'warm', 1, 4, [3]))
406 # skip to same place (should return same item)
407 termitem
= tliter
.skip_to('warm')
408 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
409 [pos
for pos
in termitem
.positer
]), (b
'warm', 1, 4, [3]))
411 # skip backwards (should return same item)
412 termitem
= tliter
.skip_to('a')
415 expect_exception(StopIteration, '', tliter
.skip_to
, 'zoo')
416 # skip backwards (should still return StopIteration).
417 expect_exception(StopIteration, '', tliter
.skip_to
, 'a')
418 # next should continue to return StopIteration.
419 expect_exception(StopIteration, '', next
, tliter
)
422 # Make a list of the terms (so we can test if they're still valid
423 # once the iterator has moved on).
424 termitems
= [termitem
for termitem
in db
.termlist(3)]
426 expect(len(termitems
), len(terms
))
427 for i
in range(len(termitems
)):
428 expect(termitems
[i
].term
, terms
[i
])
430 expect(len(termitems
), len(wdfs
))
431 for i
in range(len(termitems
)):
432 expect(termitems
[i
].wdf
, wdfs
[i
])
434 expect(len(termitems
), len(freqs
))
435 for termitem
in termitems
:
436 expect_exception(xapian
.InvalidOperationError
,
437 'Iterator has moved, and does not support random access',
438 getattr, termitem
, 'termfreq')
440 expect(len(termitems
), len(freqs
))
441 for termitem
in termitems
:
442 expect_exception(xapian
.InvalidOperationError
,
443 'Iterator has moved, and does not support random access',
444 getattr, termitem
, 'positer')
446 def test_dbdocument_iter():
447 """Test document terms iterator for document taken from a database.
450 db
= setup_database()
452 doc
= db
.get_document(3)
454 # Make lists of the item contents
460 terms
.append(termitem
.term
)
461 wdfs
.append(termitem
.wdf
)
462 freqs
.append(termitem
.termfreq
)
463 positers
.append([pos
for pos
in termitem
.positer
])
465 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
466 expect(wdfs
, [1, 2, 1, 1])
467 expect(freqs
, [5, 3, 4, 4])
468 expect(positers
, [[2], [], [3], [1]])
470 # Make a list of the terms (so we can test if they're still valid
471 # once the iterator has moved on).
472 termitems
= [termitem
for termitem
in doc
]
474 expect(len(termitems
), len(terms
))
475 for i
in range(len(termitems
)):
476 expect(termitems
[i
].term
, terms
[i
])
478 expect(len(termitems
), len(wdfs
))
479 for i
in range(len(termitems
)):
480 expect(termitems
[i
].wdf
, wdfs
[i
])
482 expect(len(termitems
), len(freqs
))
483 for termitem
in termitems
:
484 expect_exception(xapian
.InvalidOperationError
,
485 'Iterator has moved, and does not support random access',
486 getattr, termitem
, 'termfreq')
488 expect(len(termitems
), len(freqs
))
489 for termitem
in termitems
:
490 expect_exception(xapian
.InvalidOperationError
,
491 'Iterator has moved, and does not support random access',
492 getattr, termitem
, 'positer')
494 def test_newdocument_iter():
495 """Test document terms iterator for newly created document.
498 doc
= xapian
.Document()
499 doc
.set_data("was it warm? two")
500 doc
.add_posting("was", 1)
501 doc
.add_posting("it", 2)
502 doc
.add_posting("warm", 3)
503 doc
.add_term("two", 2)
505 # Make lists of the item contents
510 terms
.append(termitem
.term
)
511 wdfs
.append(termitem
.wdf
)
512 expect_exception(xapian
.InvalidOperationError
,
513 "get_termfreq() not valid for a TermIterator from a "
514 "Document which is not associated with a database",
515 getattr, termitem
, 'termfreq')
516 positers
.append([pos
for pos
in termitem
.positer
])
518 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
519 expect(wdfs
, [1, 2, 1, 1])
520 expect(positers
, [[2], [], [3], [1]])
522 # Make a list of the terms (so we can test if they're still valid
523 # once the iterator has moved on).
524 termitems
= [termitem
for termitem
in doc
]
526 expect(len(termitems
), len(terms
))
527 for i
in range(len(termitems
)):
528 expect(termitems
[i
].term
, terms
[i
])
530 expect(len(termitems
), len(wdfs
))
531 for i
in range(len(termitems
)):
532 expect(termitems
[i
].wdf
, wdfs
[i
])
534 for termitem
in termitems
:
535 expect_exception(xapian
.InvalidOperationError
,
536 'Iterator has moved, and does not support random access',
537 getattr, termitem
, 'termfreq')
539 expect(len(termitems
), len(positers
))
540 for termitem
in termitems
:
541 expect_exception(xapian
.InvalidOperationError
,
542 'Iterator has moved, and does not support random access',
543 getattr, termitem
, 'positer')
545 def test_postinglist_iter():
546 """Test postinglist iterator on Database.
549 db
= setup_database()
551 # Make lists of the item contents
556 for posting
in db
.postlist('it'):
557 docids
.append(posting
.docid
)
558 doclengths
.append(posting
.doclength
)
559 wdfs
.append(posting
.wdf
)
560 positers
.append([pos
for pos
in posting
.positer
])
562 expect(docids
, [1, 2, 3, 4, 5])
563 expect(doclengths
, [3, 3, 5, 8, 19])
564 expect(wdfs
, [1, 1, 1, 1, 8])
565 expect(positers
, [[1], [2], [2], [2], [2, 7]])
568 pliter
= db
.postlist('it')
570 # skip to an item before the first item.
571 posting
= pliter
.skip_to(0)
572 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
573 [pos
for pos
in posting
.positer
]), (1, 3, 1, [1]))
575 # skip forwards to an item.
576 posting
= pliter
.skip_to(3)
577 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
578 [pos
for pos
in posting
.positer
]), (3, 5, 1, [2]))
580 # skip to same place (should return same item)
581 posting
= pliter
.skip_to(3)
582 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
583 [pos
for pos
in posting
.positer
]), (3, 5, 1, [2]))
585 # next() after a skip_to(), should return next item.
586 posting
= next(pliter
)
587 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
588 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
590 # skip to same place (should return same item)
591 posting
= pliter
.skip_to(4)
592 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
593 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
595 # skip backwards (should return same item)
596 posting
= pliter
.skip_to(2)
597 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
598 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
601 expect_exception(StopIteration, '', pliter
.skip_to
, 6)
602 # skip backwards (should still return StopIteration).
603 expect_exception(StopIteration, '', pliter
.skip_to
, 6)
604 # next should continue to return StopIteration.
605 expect_exception(StopIteration, '', next
, pliter
)
608 # Make a list of the postings (so we can test if they're still valid once
609 # the iterator has moved on).
610 postings
= [posting
for posting
in db
.postlist('it')]
612 expect(len(postings
), len(docids
))
613 for i
in range(len(postings
)):
614 expect(postings
[i
].docid
, docids
[i
])
616 expect(len(postings
), len(doclengths
))
617 for i
in range(len(postings
)):
618 expect(postings
[i
].doclength
, doclengths
[i
])
620 expect(len(postings
), len(wdfs
))
621 for i
in range(len(postings
)):
622 expect(postings
[i
].wdf
, wdfs
[i
])
624 expect(len(postings
), len(positers
))
625 for posting
in postings
:
626 expect_exception(xapian
.InvalidOperationError
,
627 'Iterator has moved, and does not support random access',
628 getattr, posting
, 'positer')
630 def test_valuestream_iter():
631 """Test a valuestream iterator on Database.
634 db
= setup_database()
636 # Check basic iteration
637 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(0)],
638 [(3, b
'\xa4'), (4, b
'\xa2'), (5, b
'\xa4')])
639 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(1)], [])
640 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(5)],
642 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(9)],
645 # Test skip_to() on iterator with no values, and behaviours when called
646 # after already returning StopIteration.
647 i
= db
.valuestream(1)
648 expect_exception(StopIteration, "", i
.skip_to
, 1)
649 expect_exception(StopIteration, "", i
.skip_to
, 1)
650 i
= db
.valuestream(1)
651 expect_exception(StopIteration, "", i
.skip_to
, 1)
652 expect_exception(StopIteration, "", i
.__next
__)
653 i
= db
.valuestream(1)
654 expect_exception(StopIteration, "", i
.__next
__)
655 expect_exception(StopIteration, "", i
.skip_to
, 1)
657 # Test that skipping to a value works, and that skipping doesn't have to
659 i
= db
.valuestream(0)
661 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
663 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
665 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
667 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
668 expect_exception(StopIteration, "", i
.skip_to
, 6)
670 # Test that alternating skip_to() and next() works.
671 i
= db
.valuestream(0)
673 expect((item
.docid
, item
.value
), (3, b
'\xa4'))
675 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
677 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
678 expect_exception(StopIteration, "", i
.skip_to
, 6)
680 # Test that next works correctly after skip_to() called with an earlier
682 i
= db
.valuestream(0)
684 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
686 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
688 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
690 # Test that next works correctly after skipping to last item
691 i
= db
.valuestream(0)
693 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
694 expect_exception(StopIteration, "", i
.__next
__)
696 def test_position_iter():
697 """Test position iterator for a document in a database.
700 db
= setup_database()
702 doc
= db
.get_document(5)
704 # Make lists of the item contents
705 positions
= [position
for position
in db
.positionlist(5, 'it')]
707 expect(positions
, [2, 7])
709 def test_value_iter():
710 """Test iterators over list of values in a document.
713 db
= setup_database()
714 doc
= db
.get_document(5)
716 items
= list(doc
.values())
717 expect(len(items
), 3)
718 expect(items
[0].num
, 0)
719 expect(items
[0].value
, xapian
.sortable_serialise(2))
720 expect(items
[1].num
, 5)
721 expect(items
[1].value
, b
'five')
722 expect(items
[2].num
, 9)
723 expect(items
[2].value
, b
'nine')
725 def test_synonyms_iter():
726 """Test iterators over list of synonyms in a database.
729 dbpath
= 'db_test_synonyms_iter'
730 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
732 db
.add_synonym('hello', 'hi')
733 db
.add_synonym('hello', 'howdy')
735 expect([item
for item
in db
.synonyms('foo')], [])
736 expect([item
for item
in db
.synonyms('hello')], [b
'hi', b
'howdy'])
737 expect([item
for item
in db
.synonym_keys()], [b
'hello'])
738 expect([item
for item
in db
.synonym_keys('foo')], [])
739 expect([item
for item
in db
.synonym_keys('he')], [b
'hello'])
740 expect([item
for item
in db
.synonym_keys('hello')], [b
'hello'])
742 dbr
=xapian
.Database(dbpath
)
743 expect([item
for item
in dbr
.synonyms('foo')], [])
744 expect([item
for item
in dbr
.synonyms('hello')], [])
745 expect([item
for item
in dbr
.synonym_keys()], [])
746 expect([item
for item
in dbr
.synonym_keys('foo')], [])
747 expect([item
for item
in dbr
.synonym_keys('he')], [])
748 expect([item
for item
in dbr
.synonym_keys('hello')], [])
752 expect([item
for item
in db
.synonyms('foo')], [])
753 expect([item
for item
in db
.synonyms('hello')], [b
'hi', b
'howdy'])
754 expect([item
for item
in db
.synonym_keys()], [b
'hello'])
755 expect([item
for item
in db
.synonym_keys('foo')], [])
756 expect([item
for item
in db
.synonym_keys('he')], [b
'hello'])
757 expect([item
for item
in db
.synonym_keys('hello')], [b
'hello'])
759 dbr
=xapian
.Database(dbpath
)
760 expect([item
for item
in dbr
.synonyms('foo')] , [])
761 expect([item
for item
in dbr
.synonyms('hello')], [b
'hi', b
'howdy'])
762 expect([item
for item
in dbr
.synonym_keys()], [b
'hello'])
763 expect([item
for item
in dbr
.synonym_keys('foo')], [])
764 expect([item
for item
in dbr
.synonym_keys('he')], [b
'hello'])
765 expect([item
for item
in dbr
.synonym_keys('hello')], [b
'hello'])
768 expect(xapian
.Database
.check(dbpath
), 0)
770 shutil
.rmtree(dbpath
)
772 def test_metadata_keys_iter():
773 """Test iterators over list of metadata keys in a database.
776 dbpath
= 'db_test_metadata_iter'
777 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
779 db
.set_metadata('author', 'richard')
780 db
.set_metadata('item1', 'hello')
781 db
.set_metadata('item1', 'hi')
782 db
.set_metadata('item2', 'howdy')
783 db
.set_metadata('item3', '')
784 db
.set_metadata('item4', 'goodbye')
785 db
.set_metadata('item4', '')
786 db
.set_metadata('type', 'greeting')
788 expect([item
for item
in db
.metadata_keys()],
789 [b
'author', b
'item1', b
'item2', b
'type'])
790 expect([item
for item
in db
.metadata_keys('foo')], [])
791 expect([item
for item
in db
.metadata_keys('item')], [b
'item1', b
'item2'])
792 expect([item
for item
in db
.metadata_keys('it')], [b
'item1', b
'item2'])
793 expect([item
for item
in db
.metadata_keys('type')], [b
'type'])
795 dbr
=xapian
.Database(dbpath
)
796 expect([item
for item
in dbr
.metadata_keys()], [])
797 expect([item
for item
in dbr
.metadata_keys('foo')], [])
798 expect([item
for item
in dbr
.metadata_keys('item')], [])
799 expect([item
for item
in dbr
.metadata_keys('it')], [])
800 expect([item
for item
in dbr
.metadata_keys('type')], [])
803 expect([item
for item
in db
.metadata_keys()],
804 [b
'author', b
'item1', b
'item2', b
'type'])
805 expect([item
for item
in db
.metadata_keys('foo')], [])
806 expect([item
for item
in db
.metadata_keys('item')], [b
'item1', b
'item2'])
807 expect([item
for item
in db
.metadata_keys('it')], [b
'item1', b
'item2'])
808 expect([item
for item
in db
.metadata_keys('type')], [b
'type'])
810 dbr
=xapian
.Database(dbpath
)
811 expect([item
for item
in dbr
.metadata_keys()],
812 [b
'author', b
'item1', b
'item2', b
'type'])
813 expect([item
for item
in dbr
.metadata_keys('foo')], [])
814 expect([item
for item
in dbr
.metadata_keys('item')], [b
'item1', b
'item2'])
815 expect([item
for item
in dbr
.metadata_keys('it')], [b
'item1', b
'item2'])
816 expect([item
for item
in dbr
.metadata_keys('type')], [b
'type'])
819 expect(xapian
.Database
.check(dbpath
), 0)
821 shutil
.rmtree(dbpath
)
824 """Test basic spelling correction features.
827 dbpath
= 'db_test_spell'
828 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
830 db
.add_spelling('hello')
831 db
.add_spelling('mell', 2)
832 expect(db
.get_spelling_suggestion('hell'), b
'mell')
833 expect([(item
.term
, item
.termfreq
) for item
in db
.spellings()], [(b
'hello', 1), (b
'mell', 2)])
834 dbr
=xapian
.Database(dbpath
)
835 expect(dbr
.get_spelling_suggestion('hell'), b
'')
836 expect([(item
.term
, item
.termfreq
) for item
in dbr
.spellings()], [])
838 dbr
=xapian
.Database(dbpath
)
839 expect(db
.get_spelling_suggestion('hell'), b
'mell')
840 expect(dbr
.get_spelling_suggestion('hell'), b
'mell')
841 expect([(item
.term
, item
.termfreq
) for item
in dbr
.spellings()], [(b
'hello', 1), (b
'mell', 2)])
844 expect(xapian
.Database
.check(dbpath
), 0)
846 shutil
.rmtree(dbpath
)
848 def test_queryparser_custom_rp():
849 """Test QueryParser with a custom (in python) RangeProcessor.
852 class MyRP(xapian
.RangeProcessor
):
854 xapian
.RangeProcessor
.__init
__(self
)
856 def __call__(self
, begin
, end
):
857 begin
= "A" + begin
.decode('utf-8')
858 end
= "B" + end
.decode('utf-8')
859 return xapian
.Query(xapian
.Query
.OP_VALUE_RANGE
, 7, begin
, end
)
861 queryparser
= xapian
.QueryParser()
864 queryparser
.add_rangeprocessor(myrp
)
865 query
= queryparser
.parse_query('5..8')
868 'Query(VALUE_RANGE 7 A5 B8)')
870 def test_queryparser_custom_rp_deallocation():
871 """Test that QueryParser doesn't delete RangeProcessors too soon.
874 class MyRP(xapian
.RangeProcessor
):
876 xapian
.RangeProcessor
.__init
__(self
)
878 def __call__(self
, begin
, end
):
879 begin
= "A" + begin
.decode('utf-8')
880 end
= "B" + end
.decode('utf-8')
881 return xapian
.Query(xapian
.Query
.OP_VALUE_RANGE
, 7, begin
, end
)
884 queryparser
= xapian
.QueryParser()
886 queryparser
.add_rangeprocessor(myrp
)
889 queryparser
= make_parser()
890 query
= queryparser
.parse_query('5..8')
893 'Query(VALUE_RANGE 7 A5 B8)')
895 def test_scale_weight():
896 """Test query OP_SCALE_WEIGHT feature.
899 db
= setup_database()
900 for mult
in (0, 1, 2.5):
901 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
903 query1
= xapian
.Query("it")
904 query2
= xapian
.Query(xapian
.Query
.OP_SCALE_WEIGHT
, query1
, mult
)
906 enquire
= xapian
.Enquire(db
)
907 enquire
.set_query(query1
)
908 mset1
= enquire
.get_mset(0, 10)
909 enquire
.set_query(query2
)
910 mset2
= enquire
.get_mset(0, 10)
912 expected
= [(0, item
.docid
) for item
in mset1
]
915 expected
= [(int(item
.weight
* mult
* 1000000), item
.docid
) for item
in mset1
]
916 expect([(int(item
.weight
* 1000000), item
.docid
) for item
in mset2
], expected
)
918 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
919 query1
= xapian
.Query("it")
920 expect_exception(xapian
.InvalidArgumentError
,
921 "OP_SCALE_WEIGHT requires factor >= 0",
923 xapian
.Query
.OP_SCALE_WEIGHT
, query1
, -1)
926 def test_weight_normalise():
927 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
929 This test first runs a search (asking for no results) to get the maximum
930 possible weight for a query, and then checks that the results of
931 MSet.get_max_possible() match this.
933 This tests that the get_max_possible() value is correct (though it isn't
934 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
935 compensate correctly.
938 db
= setup_database()
945 "\"was it warm\" four notpresent",
948 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query
)
949 qp
= xapian
.QueryParser()
950 query1
= qp
.parse_query(query
)
951 enquire
= xapian
.Enquire(db
)
952 enquire
.set_query(query1
)
953 mset1
= enquire
.get_mset(0, 0)
955 # Check the max_attained value is 0 - this gives us some reassurance
956 # that the match didn't actually do the work of calculating any
958 expect(mset1
.get_max_attained(), 0)
960 max_possible
= mset1
.get_max_possible()
961 if query
== "notpresent":
962 expect(max_possible
, 0)
964 mult
= 1.0 / max_possible
965 query2
= xapian
.Query(xapian
.Query
.OP_SCALE_WEIGHT
, query1
, mult
)
967 enquire
= xapian
.Enquire(db
)
968 enquire
.set_query(query2
)
969 mset2
= enquire
.get_mset(0, 10)
970 # max_possible should be 1 (excluding rounding errors) for mset2
971 expect(int(mset2
.get_max_possible() * 1000000.0 + 0.5), 1000000)
973 expect(item
.weight
> 0, True)
974 expect(item
.weight
<= 1, True)
977 def test_valuesetmatchdecider():
978 """Simple tests of the ValueSetMatchDecider class
981 md
= xapian
.ValueSetMatchDecider(0, True)
982 doc
= xapian
.Document()
983 expect(md(doc
), False)
986 doc
.add_value(0, 'foo')
987 expect(md(doc
), True)
989 md
.remove_value('foo')
990 expect(md(doc
), False)
992 md
= xapian
.ValueSetMatchDecider(0, False)
993 expect(md(doc
), True)
996 expect(md(doc
), False)
999 def test_postingsource():
1000 """Simple test of the PostingSource class.
1003 class OddPostingSource(xapian
.PostingSource
):
1004 def __init__(self
, max):
1005 xapian
.PostingSource
.__init
__(self
)
1010 self
.weight
= db
.get_doccount() + 1
1011 self
.set_maxweight(self
.weight
)
1013 def get_termfreq_min(self
): return 0
1014 def get_termfreq_est(self
): return int(self
.max / 2)
1015 def get_termfreq_max(self
): return self
.max
1016 def __next__(self
, minweight
):
1019 self
.set_maxweight(self
.weight
)
1020 def at_end(self
): return self
.current
> self
.max
1021 def get_docid(self
): return self
.current
1022 def get_weight(self
): return self
.weight
1024 dbpath
= 'db_test_postingsource'
1025 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1026 for id in range(10):
1027 doc
= xapian
.Document()
1028 db
.add_document(doc
)
1030 # Do a dance to check that the posting source doesn't get dereferenced too
1031 # soon in various cases.
1033 # First - check that it's kept when the source goes out of scope.
1035 source
= OddPostingSource(10)
1036 # The posting source is inside a list to check that case is
1037 # correctly handled.
1038 return xapian
.Query(xapian
.Query
.OP_OR
,
1039 ["terM wHich wilL NoT maTch", xapian
.Query(source
)])
1041 # Check that it's kept when the query goes out of scope.
1044 enquire
= xapian
.Enquire(db
)
1045 enquire
.set_query(query
)
1048 # Check it's kept when the query is retrieved from enquire and put into
1052 enquire
= xapian
.Enquire(db
)
1053 enquire
.set_query(enq1
.get_query())
1059 mset
= enquire
.get_mset(0, 10)
1061 expect([item
.docid
for item
in mset
], [1, 3, 5, 7, 9])
1062 expect(mset
[0].weight
, db
.get_doccount())
1065 expect(xapian
.Database
.check(dbpath
), 0)
1066 shutil
.rmtree(dbpath
)
1068 def test_postingsource2():
1069 """Simple test of the PostingSource class.
1072 dbpath
= 'db_test_postingsource2'
1073 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1074 vals
= (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1075 for id in range(10):
1076 doc
= xapian
.Document()
1077 doc
.add_value(1, xapian
.sortable_serialise(vals
[id]))
1078 db
.add_document(doc
)
1080 source
= xapian
.ValueWeightPostingSource(1)
1081 query
= xapian
.Query(source
)
1082 del source
# Check that query keeps a reference to it.
1084 enquire
= xapian
.Enquire(db
)
1085 enquire
.set_query(query
)
1086 mset
= enquire
.get_mset(0, 10)
1088 expect([item
.docid
for item
in mset
], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1091 shutil
.rmtree(dbpath
)
1093 def test_postingsource3():
1094 """Test that ValuePostingSource can be usefully subclassed.
1097 dbpath
= 'db_test_postingsource3'
1098 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1101 doc
= xapian
.Document()
1102 doc
.add_value(1, xapian
.sortable_serialise(wt
))
1103 db
.add_document(doc
)
1105 class PyValuePostingSource(xapian
.ValuePostingSource
):
1106 def __init__(self
, slot
):
1107 xapian
.ValuePostingSource
.__init
__(self
, slot
)
1110 xapian
.ValuePostingSource
.init(self
, db
)
1112 slot
= self
.get_slot()
1113 ub
= db
.get_value_upper_bound(slot
)
1114 self
.set_maxweight(xapian
.sortable_unserialise(ub
) ** 3)
1116 def next(self
, minweight
):
1117 return xapian
.ValuePostingSource
.next(self
, minweight
)
1118 def get_weight(self
):
1119 value
= self
.get_value()
1120 return xapian
.sortable_unserialise(value
) ** 3
1122 source
= PyValuePostingSource(1)
1123 query
= xapian
.Query(source
)
1124 #del source # Check that query keeps a reference to it.
1126 enquire
= xapian
.Enquire(db
)
1127 enquire
.set_query(query
)
1128 mset
= enquire
.get_mset(0, 10)
1130 expect([item
.docid
for item
in mset
], [4, 2, 3, 1])
1133 expect(xapian
.Database
.check(dbpath
), 0)
1134 shutil
.rmtree(dbpath
)
1136 def test_value_stats():
1137 """Simple test of being able to get value statistics.
1140 dbpath
= 'db_test_value_stats'
1141 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1143 vals
= (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1144 for id in range(10):
1145 doc
= xapian
.Document()
1146 doc
.add_value(1, xapian
.sortable_serialise(vals
[id]))
1147 db
.add_document(doc
)
1149 expect(db
.get_value_freq(0), 0)
1150 expect(db
.get_value_lower_bound(0), b
"")
1151 expect(db
.get_value_upper_bound(0), b
"")
1152 expect(db
.get_value_freq(1), 10)
1153 expect(db
.get_value_lower_bound(1), xapian
.sortable_serialise(0))
1154 expect(db
.get_value_upper_bound(1), xapian
.sortable_serialise(9))
1155 expect(db
.get_value_freq(2), 0)
1156 expect(db
.get_value_lower_bound(2), b
"")
1157 expect(db
.get_value_upper_bound(2), b
"")
1160 expect(xapian
.Database
.check(dbpath
), 0)
1161 shutil
.rmtree(dbpath
)
1163 def test_get_uuid():
1164 """Test getting UUIDs from databases.
1167 dbpath
= 'db_test_get_uuid'
1168 db1
= xapian
.WritableDatabase(dbpath
+ "1", xapian
.DB_CREATE_OR_OVERWRITE
)
1169 db2
= xapian
.WritableDatabase(dbpath
+ "2", xapian
.DB_CREATE_OR_OVERWRITE
)
1170 dbr1
= xapian
.Database(dbpath
+ "1")
1171 dbr2
= xapian
.Database(dbpath
+ "2")
1172 expect(db1
.get_uuid() != db2
.get_uuid(), True)
1173 expect(db1
.get_uuid(), dbr1
.get_uuid())
1174 expect(db2
.get_uuid(), dbr2
.get_uuid())
1176 db
= xapian
.Database()
1177 db
.add_database(db1
)
1178 expect(db1
.get_uuid(), db
.get_uuid())
1185 shutil
.rmtree(dbpath
+ "1")
1186 shutil
.rmtree(dbpath
+ "2")
1188 def test_director_exception():
1189 """Test handling of an exception raised in a director.
1192 db
= setup_database()
1193 query
= xapian
.Query('it')
1194 enq
= xapian
.Enquire(db
)
1195 enq
.set_query(query
)
1196 class TestException(Exception):
1197 def __init__(self
, a
, b
):
1198 Exception.__init
__(self
, a
+ b
)
1200 rset
= xapian
.RSet()
1201 rset
.add_document(1)
1202 class EDecider(xapian
.ExpandDecider
):
1203 def __call__(self
, term
):
1204 raise TestException("foo", "bar")
1205 edecider
= EDecider()
1206 expect_exception(TestException
, "foobar", edecider
, "foo")
1207 expect_exception(TestException
, "foobar", enq
.get_eset
, 10, rset
, edecider
)
1209 class MDecider(xapian
.MatchDecider
):
1210 def __call__(self
, doc
):
1211 raise TestException("foo", "bar")
1212 mdecider
= MDecider()
1213 expect_exception(TestException
, "foobar", mdecider
, xapian
.Document())
1214 expect_exception(TestException
, "foobar", enq
.get_mset
, 0, 10, None, mdecider
)
1216 def check_vals(db
, vals
):
1217 """Check that the values in slot 1 are as in vals.
1220 for docid
in range(1, db
.get_lastdocid() + 1):
1221 val
= db
.get_document(docid
).get_value(1)
1222 expect(val
, vals
[docid
], "Expected stored value in doc %d" % docid
)
1224 def test_value_mods():
1225 """Test handling of modifications to values.
1228 dbpath
= 'db_test_value_mods'
1229 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1234 # Add a value to all the documents
1235 for num
in range(1, doccount
):
1236 doc
=xapian
.Document()
1237 val
= ('val%d' % num
).encode('utf-8')
1238 doc
.add_value(1, val
)
1239 db
.add_document(doc
)
1242 check_vals(db
, vals
)
1244 # Modify one of the values (this is a regression test which failed with the
1245 # initial implementation of streaming values).
1246 doc
= xapian
.Document()
1248 doc
.add_value(1, val
)
1249 db
.replace_document(2, doc
)
1252 check_vals(db
, vals
)
1254 # Do some random modifications.
1255 for count
in range(1, doccount
* 2):
1256 docid
= random
.randint(1, doccount
)
1257 doc
= xapian
.Document()
1262 val
= ('newval%d' % count
).encode('utf-8')
1263 doc
.add_value(1, val
)
1264 db
.replace_document(docid
, doc
)
1267 # Check the values before and after modification.
1268 check_vals(db
, vals
)
1270 check_vals(db
, vals
)
1272 # Delete all the values which are non-empty, in a random order.
1273 keys
= [key
for key
, val
in vals
.items() if val
!= '']
1274 random
.shuffle(keys
)
1276 doc
= xapian
.Document()
1277 db
.replace_document(key
, doc
)
1279 check_vals(db
, vals
)
1281 check_vals(db
, vals
)
1284 expect_exception(xapian
.DatabaseClosedError
, "Database has been closed", check_vals
, db
, vals
)
1285 shutil
.rmtree(dbpath
)
1287 def test_serialise_document():
1288 """Test serialisation of documents.
1291 doc
= xapian
.Document()
1292 doc
.add_term('foo', 2)
1293 doc
.add_value(1, b
'bar')
1296 doc2
= xapian
.Document
.unserialise(s
)
1297 expect(len(list(doc
.termlist())), len(list(doc2
.termlist())))
1298 expect(len(list(doc
.termlist())), 1)
1299 expect([(item
.term
, item
.wdf
) for item
in doc
.termlist()],
1300 [(item
.term
, item
.wdf
) for item
in doc2
.termlist()])
1301 expect([(item
.num
, item
.value
) for item
in list(doc
.values())],
1302 [(item
.num
, item
.value
) for item
in list(doc2
.values())])
1303 expect(doc
.get_data(), doc2
.get_data())
1304 expect(doc
.get_data(), b
'baz')
1306 db
= setup_database()
1307 doc
= db
.get_document(1)
1309 doc2
= xapian
.Document
.unserialise(s
)
1310 expect(len(list(doc
.termlist())), len(list(doc2
.termlist())))
1311 expect(len(list(doc
.termlist())), 3)
1312 expect([(item
.term
, item
.wdf
) for item
in doc
.termlist()],
1313 [(item
.term
, item
.wdf
) for item
in doc2
.termlist()])
1314 expect([(item
.num
, item
.value
) for item
in list(doc
.values())],
1315 [(item
.num
, item
.value
) for item
in list(doc2
.values())])
1316 expect(doc
.get_data(), doc2
.get_data())
1317 expect(doc
.get_data(), b
'is it cold?')
1319 def test_serialise_query():
1320 """Test serialisation of queries.
1324 q2
= xapian
.Query
.unserialise(q
.serialise())
1325 expect(str(q
), str(q2
))
1326 expect(str(q
), 'Query()')
1328 q
= xapian
.Query('hello')
1329 q2
= xapian
.Query
.unserialise(q
.serialise())
1330 expect(str(q
), str(q2
))
1331 expect(str(q
), 'Query(hello)')
1333 q
= xapian
.Query(xapian
.Query
.OP_OR
, ('hello', b
'world'))
1334 q2
= xapian
.Query
.unserialise(q
.serialise())
1335 expect(str(q
), str(q2
))
1336 expect(str(q
), 'Query((hello OR world))')
1338 def test_preserve_query_parser_stopper():
1339 """Test preservation of stopper set on query parser.
1343 queryparser
= xapian
.QueryParser()
1344 stopper
= xapian
.SimpleStopper()
1347 queryparser
.set_stopper(stopper
)
1350 queryparser
= make_qp()
1351 query
= queryparser
.parse_query('to be')
1352 expect([term
for term
in queryparser
.stoplist()], [b
'to'])
1354 def test_preserve_term_generator_stopper():
1355 """Test preservation of stopper set on term generator.
1359 termgen
= xapian
.TermGenerator()
1360 termgen
.set_stemmer(xapian
.Stem('en'))
1361 stopper
= xapian
.SimpleStopper()
1364 termgen
.set_stopper(stopper
)
1369 termgen
.index_text('to be')
1370 doc
= termgen
.get_document()
1371 terms
= [term
.term
for term
in doc
.termlist()]
1373 expect(terms
, [b
'Zbe', b
'be', b
'to'])
1375 def test_preserve_enquire_sorter():
1376 """Test preservation of sorter set on enquire.
1379 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
1380 doc
= xapian
.Document()
1382 doc
.add_value(1, '1')
1383 db
.add_document(doc
)
1384 db
.add_document(doc
)
1387 enq
= xapian
.Enquire(db
)
1388 sorter
= xapian
.MultiValueKeyMaker()
1389 enq
.set_sort_by_key(sorter
, False)
1393 enq
.set_query(xapian
.Query('foo'))
1397 enq
= xapian
.Enquire(db
)
1398 sorter
= xapian
.MultiValueKeyMaker()
1399 enq
.set_sort_by_key_then_relevance(sorter
, False)
1403 enq
.set_query(xapian
.Query('foo'))
1407 enq
= xapian
.Enquire(db
)
1408 sorter
= xapian
.MultiValueKeyMaker()
1409 enq
.set_sort_by_relevance_then_key(sorter
, False)
1413 enq
.set_query(xapian
.Query('foo'))
1416 def test_matchspy():
1417 """Test use of matchspies.
1420 db
= setup_database()
1421 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
1422 enq
= xapian
.Enquire(db
)
1423 enq
.set_query(query
)
1425 def set_matchspy_deref(enq
):
1426 """Set a matchspy, and then drop the reference, to check that it
1427 doesn't get deleted too soon.
1429 spy
= xapian
.ValueCountMatchSpy(0)
1430 enq
.add_matchspy(spy
)
1432 set_matchspy_deref(enq
)
1433 mset
= enq
.get_mset(0, 10)
1434 expect(len(mset
), 5)
1436 spy
= xapian
.ValueCountMatchSpy(0)
1437 enq
.add_matchspy(spy
)
1438 # Regression test for clear_matchspies() - used to always raise an
1439 # exception due to a copy and paste error in its definition.
1440 enq
.clear_matchspies()
1441 mset
= enq
.get_mset(0, 10)
1442 expect([item
for item
in list(spy
.values())], [])
1444 enq
.add_matchspy(spy
)
1445 mset
= enq
.get_mset(0, 10)
1446 expect(spy
.get_total(), 5)
1447 expect([(item
.term
, item
.termfreq
) for item
in list(spy
.values())], [
1448 (xapian
.sortable_serialise(1.5), 1),
1449 (xapian
.sortable_serialise(2), 2),
1451 expect([(item
.term
, item
.termfreq
) for item
in spy
.top_values(10)], [
1452 (xapian
.sortable_serialise(2), 2),
1453 (xapian
.sortable_serialise(1.5), 1),
1456 def test_import_star():
1457 """Test that "from xapian import *" works.
1459 This is a regression test - this failed in the 1.2.0 release.
1460 It's not normally good style to use it, but it should work anyway!
1463 import test_xapian_star
1465 def test_latlongcoords_iter():
1466 """Test LatLongCoordsIterator wrapping.
1469 coords
= xapian
.LatLongCoords()
1470 expect([c
for c
in coords
], [])
1471 coords
.append(xapian
.LatLongCoord(0, 0))
1472 coords
.append(xapian
.LatLongCoord(0, 1))
1473 expect([str(c
) for c
in coords
], ['Xapian::LatLongCoord(0, 0)',
1474 'Xapian::LatLongCoord(0, 1)'])
1477 def test_compactor():
1478 """Test that xapian.Compactor works.
1481 tmpdir
= tempfile
.mkdtemp()
1482 db1
= db2
= db3
= None
1484 db1path
= os
.path
.join(tmpdir
, 'db1')
1485 db2path
= os
.path
.join(tmpdir
, 'db2')
1486 db3path
= os
.path
.join(tmpdir
, 'db3')
1488 # Set up a couple of sample input databases
1489 db1
= xapian
.WritableDatabase(db1path
, xapian
.DB_CREATE_OR_OVERWRITE
)
1490 doc1
= xapian
.Document()
1491 doc1
.add_term('Hello')
1492 doc1
.add_term('Hello1')
1493 doc1
.add_value(0, 'Val1')
1494 db1
.set_metadata('key', '1')
1495 db1
.set_metadata('key1', '1')
1496 db1
.add_document(doc1
)
1499 db2
= xapian
.WritableDatabase(db2path
, xapian
.DB_CREATE_OR_OVERWRITE
)
1500 doc2
= xapian
.Document()
1501 doc2
.add_term('Hello')
1502 doc2
.add_term('Hello2')
1503 doc2
.add_value(0, 'Val2')
1504 db2
.set_metadata('key', '2')
1505 db2
.set_metadata('key2', '2')
1506 db2
.add_document(doc2
)
1509 db_to_compact
= xapian
.Database()
1510 db_to_compact
.add_database(xapian
.Database(db1path
))
1511 db_to_compact
.add_database(xapian
.Database(db2path
))
1512 # Compact with the default compactor
1513 # Metadata conflicts are resolved by picking the first value
1514 db_to_compact
.compact(db3path
)
1516 db3
= xapian
.Database(db3path
)
1517 expect([(item
.term
, item
.termfreq
) for item
in db3
.allterms()],
1518 [(b
'Hello', 2), (b
'Hello1', 1), (b
'Hello2', 1)])
1519 expect(db3
.get_document(1).get_value(0), b
'Val1')
1520 expect(db3
.get_document(2).get_value(0), b
'Val2')
1521 expect(db3
.get_metadata('key'), b
'1')
1522 expect(db3
.get_metadata('key1'), b
'1')
1523 expect(db3
.get_metadata('key2'), b
'2')
1525 context("testing a custom compactor which merges duplicate metadata")
1526 class MyCompactor(xapian
.Compactor
):
1528 xapian
.Compactor
.__init
__(self
)
1531 def set_status(self
, table
, status
):
1532 if len(status
) == 0:
1533 self
.log
.append('Starting %s' % table
.decode('utf-8'))
1535 self
.log
.append('%s: %s' % (table
.decode('utf-8'), status
.decode('utf-8')))
1537 def resolve_duplicate_metadata(self
, key
, vals
):
1538 return b
','.join(vals
)
1541 db_to_compact
= xapian
.Database()
1542 db_to_compact
.add_database(xapian
.Database(db1path
))
1543 db_to_compact
.add_database(xapian
.Database(db2path
))
1544 db_to_compact
.compact(db3path
, 0, 0, c
)
1545 log
= '\n'.join(c
.log
)
1546 # Check we got some messages in the log
1547 expect('Starting postlist' in log
, True)
1549 db3
= xapian
.Database(db3path
)
1550 expect([(item
.term
, item
.termfreq
) for item
in db3
.allterms()],
1551 [(b
'Hello', 2), (b
'Hello1', 1), (b
'Hello2', 1)])
1552 expect(db3
.get_metadata('key'), b
'1,2')
1553 expect(db3
.get_metadata('key1'), b
'1')
1554 expect(db3
.get_metadata('key2'), b
'2')
1564 shutil
.rmtree(tmpdir
)
1566 def test_custom_matchspy():
1567 class MSpy(xapian
.MatchSpy
):
1569 xapian
.MatchSpy
.__init
__(self
)
1572 def __call__(self
, doc
, weight
):
1577 db
= setup_database()
1578 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
1580 enquire
= xapian
.Enquire(db
)
1581 enquire
.add_matchspy(mspy
)
1582 enquire
.set_query(query
)
1583 mset
= enquire
.get_mset(0, 1)
1584 expect(len(mset
), 1)
1585 expect(mspy
.count
>= 1, True)
1587 expect(db
.get_doccount(), 5)
1589 def test_removed_features():
1591 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
1592 doc
= xapian
.Document()
1593 enq
= xapian
.Enquire(db
)
1594 eset
= xapian
.ESet()
1595 mset
= xapian
.MSet()
1596 query
= xapian
.Query()
1597 qp
= xapian
.QueryParser()
1598 titer
= xapian
._TermIterator
()
1599 postiter
= xapian
._PostingIterator
()
1601 def check_missing(obj
, attr
):
1602 expect_exception(AttributeError, None, getattr, obj
, attr
)
1604 check_missing(xapian
, 'Stem_get_available_languages')
1605 check_missing(xapian
, 'TermIterator')
1606 check_missing(xapian
, 'PositionIterator')
1607 check_missing(xapian
, 'PostingIterator')
1608 check_missing(xapian
, 'ValueIterator')
1609 check_missing(xapian
, 'MSetIterator')
1610 check_missing(xapian
, 'ESetIterator')
1611 check_missing(db
, 'allterms_begin')
1612 check_missing(db
, 'allterms_end')
1613 check_missing(db
, 'metadata_keys_begin')
1614 check_missing(db
, 'metadata_keys_end')
1615 check_missing(db
, 'synonym_keys_begin')
1616 check_missing(db
, 'synonym_keys_end')
1617 check_missing(db
, 'synonyms_begin')
1618 check_missing(db
, 'synonyms_end')
1619 check_missing(db
, 'spellings_begin')
1620 check_missing(db
, 'spellings_end')
1621 check_missing(db
, 'positionlist_begin')
1622 check_missing(db
, 'positionlist_end')
1623 check_missing(db
, 'postlist_begin')
1624 check_missing(db
, 'postlist_end')
1625 check_missing(db
, 'termlist_begin')
1626 check_missing(db
, 'termlist_end')
1627 check_missing(doc
, 'termlist_begin')
1628 check_missing(doc
, 'termlist_end')
1629 check_missing(doc
, 'values_begin')
1630 check_missing(doc
, 'values_end')
1631 check_missing(enq
, 'get_matching_terms_begin')
1632 check_missing(enq
, 'get_matching_terms_end')
1633 check_missing(eset
, 'begin')
1634 check_missing(eset
, 'end')
1635 check_missing(mset
, 'begin')
1636 check_missing(mset
, 'end')
1637 check_missing(postiter
, 'positionlist_begin')
1638 check_missing(postiter
, 'positionlist_end')
1639 check_missing(query
, 'get_terms_begin')
1640 check_missing(query
, 'get_terms_end')
1641 check_missing(qp
, 'stoplist_begin')
1642 check_missing(qp
, 'stoplist_end')
1643 check_missing(qp
, 'unstem_begin')
1644 check_missing(qp
, 'unstem_end')
1645 check_missing(titer
, 'positionlist_begin')
1646 check_missing(titer
, 'positionlist_end')
1649 # repr() returned None in 1.4.0.
1650 expect(repr(xapian
.Query('foo')) is None, False)
1651 expect(repr(xapian
.AssertionError('foo')) is None, False)
1652 expect(repr(xapian
.InvalidArgumentError('foo')) is None, False)
1653 expect(repr(xapian
.InvalidOperationError('foo')) is None, False)
1654 expect(repr(xapian
.UnimplementedError('foo')) is None, False)
1655 expect(repr(xapian
.DatabaseError('foo')) is None, False)
1656 expect(repr(xapian
.DatabaseClosedError('foo')) is None, False)
1657 expect(repr(xapian
.DatabaseCorruptError('foo')) is None, False)
1658 expect(repr(xapian
.DatabaseCreateError('foo')) is None, False)
1659 expect(repr(xapian
.DatabaseLockError('foo')) is None, False)
1660 expect(repr(xapian
.DatabaseModifiedError('foo')) is None, False)
1661 expect(repr(xapian
.DatabaseOpeningError('foo')) is None, False)
1662 expect(repr(xapian
.DatabaseVersionError('foo')) is None, False)
1663 expect(repr(xapian
.DatabaseNotFoundError('foo')) is None, False)
1664 expect(repr(xapian
.DocNotFoundError('foo')) is None, False)
1665 expect(repr(xapian
.FeatureUnavailableError('foo')) is None, False)
1666 expect(repr(xapian
.InternalError('foo')) is None, False)
1667 expect(repr(xapian
.NetworkError('foo')) is None, False)
1668 expect(repr(xapian
.NetworkTimeoutError('foo')) is None, False)
1669 expect(repr(xapian
.QueryParserError('foo')) is None, False)
1670 expect(repr(xapian
.SerialisationError('foo')) is None, False)
1671 expect(repr(xapian
.RangeError('foo')) is None, False)
1672 expect(repr(xapian
.WildcardError('foo')) is None, False)
1673 expect(repr(xapian
.Document()) is None, False)
1674 expect(repr(xapian
.Registry()) is None, False)
1675 expect(repr(xapian
.Query()) is None, False)
1676 expect(repr(xapian
.Stem('en')) is None, False)
1677 expect(repr(xapian
.TermGenerator()) is None, False)
1678 expect(repr(xapian
.MSet()) is None, False)
1679 expect(repr(xapian
.ESet()) is None, False)
1680 expect(repr(xapian
.RSet()) is None, False)
1681 expect(repr(xapian
.MultiValueKeyMaker()) is None, False)
1682 expect(repr(xapian
.SimpleStopper()) is None, False)
1683 expect(repr(xapian
.RangeProcessor()) is None, False)
1684 expect(repr(xapian
.DateRangeProcessor(1)) is None, False)
1685 expect(repr(xapian
.NumberRangeProcessor(1)) is None, False)
1686 expect(repr(xapian
.QueryParser()) is None, False)
1687 expect(repr(xapian
.BoolWeight()) is None, False)
1688 expect(repr(xapian
.TfIdfWeight()) is None, False)
1689 expect(repr(xapian
.BM25Weight()) is None, False)
1690 expect(repr(xapian
.BM25PlusWeight()) is None, False)
1691 expect(repr(xapian
.TradWeight()) is None, False)
1692 expect(repr(xapian
.InL2Weight()) is None, False)
1693 expect(repr(xapian
.IfB2Weight()) is None, False)
1694 expect(repr(xapian
.IneB2Weight()) is None, False)
1695 expect(repr(xapian
.BB2Weight()) is None, False)
1696 expect(repr(xapian
.DLHWeight()) is None, False)
1697 expect(repr(xapian
.PL2Weight()) is None, False)
1698 expect(repr(xapian
.PL2PlusWeight()) is None, False)
1699 expect(repr(xapian
.DPHWeight()) is None, False)
1700 expect(repr(xapian
.LM2StageWeight()) is None, False)
1701 expect(repr(xapian
.LMAbsDiscountWeight()) is None, False)
1702 expect(repr(xapian
.LMDirichletWeight()) is None, False)
1703 expect(repr(xapian
.LMJMWeight()) is None, False)
1704 expect(repr(xapian
.CoordWeight()) is None, False)
1705 expect(repr(xapian
.Compactor()) is None, False)
1706 expect(repr(xapian
.ValuePostingSource(1)) is None, False)
1707 expect(repr(xapian
.ValueWeightPostingSource(1)) is None, False)
1708 expect(repr(xapian
.DecreasingValueWeightPostingSource(1)) is None, False)
1709 expect(repr(xapian
.ValueMapPostingSource(1)) is None, False)
1710 expect(repr(xapian
.FixedWeightPostingSource(1)) is None, False)
1711 expect(repr(xapian
.ValueCountMatchSpy(1)) is None, False)
1712 expect(repr(xapian
.LatLongCoord()) is None, False)
1713 expect(repr(xapian
.LatLongCoords()) is None, False)
1714 expect(repr(xapian
.GreatCircleMetric()) is None, False)
1715 expect(repr(xapian
.Database()) is None, False)
1716 expect(repr(xapian
.WritableDatabase()) is None, False)
1718 def test_lone_surrogate():
1719 # Test that a lone surrogate in input data raises UnicodeEncodeError.
1720 # Regression test for bug fixed in 1.4.12 (previous versions quietly
1721 # skipped the lone surrogate when converting to UTF-8).
1722 noop_stemmer
= xapian
.Stem("none")
1724 term
= noop_stemmer(u
"a\udead0")
1725 raise TestFail("Lone surrogate accepted (output as %s)" % term
)
1726 except UnicodeEncodeError:
1731 # Run all tests (ie, callables with names starting "test_").
1734 if not runtests(globals(), sys
.argv
[1:]):
1737 print("Running tests without threads")
1741 print("Running tests with threads")
1743 # This testcase seems to just block when run in a thread under Python 3
1744 # on some plaforms. It fails with 3.2.3 on Debian wheezy, but passes
1745 # with the exact same package version on Debian unstable not long after
1746 # the jessie release. The issue it's actually serving to regression
1747 # test for is covered by running it without threads, so just disable it
1748 # rather than risk test failures that don't seem to indicate a problem
1750 del test_import_star
1752 t
= threading
.Thread(name
='test runner', target
=run
)
1754 # Block until the thread has completed so the thread gets a chance to exit
1755 # with error status.
1761 # vim:syntax=python:set expandtab: