Refactor to avoid warning with GCC 12.2
[xapian.git] / xapian-bindings / python / smoketest.py
blobb5404067fd5243f829cc0ebaed5e4a0f5bb26698
1 # Simple test to ensure that we can load the xapian module and exercise basic
2 # functionality successfully.
4 # Copyright (C) 2004,2005,2006,2007,2008,2010,2011,2012,2014,2015,2016,2017,2019 Olly Betts
5 # Copyright (C) 2007 Lemur Consulting Ltd
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import sys
24 import xapian
26 from testsuite import *
28 mystemmers = set()
29 mystemmer_id = 0
30 # Stemmer which strips English vowels.
31 class MyStemmer(xapian.StemImplementation):
32 def __init__(self):
33 global mystemmers
34 global mystemmer_id
35 super(MyStemmer, self).__init__()
36 mystemmers.add(mystemmer_id)
37 self._id = mystemmer_id
38 mystemmer_id += 1
40 def __call__(self, s):
41 import re
42 return re.sub(r'[aeiou]', '', s)
44 def __del__(self):
45 global mystemmers
46 if self._id not in mystemmers:
47 raise TestFail("MyStemmer #%d deleted more than once" % self._id)
48 mystemmers.remove(self._id)
50 def test_all():
51 # Test the version number reporting functions give plausible results.
52 v = "%d.%d.%d" % (xapian.major_version(),
53 xapian.minor_version(),
54 xapian.revision())
55 v2 = xapian.version_string()
56 expect(v2, v, "Unexpected version output")
58 # A regexp check would be better, but seems to create a bogus "leak" of -1
59 # objects in Python 3.
60 expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z')
61 expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"')
63 def access_cvar():
64 res = xapian.cvar
65 print "Unhandled constants: ", res
66 return res
68 # Check that SWIG isn't generating cvar (regression test for ticket#297).
69 expect_exception(AttributeError, "'module' object has no attribute 'cvar'",
70 access_cvar)
72 stem = xapian.Stem("english")
73 expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")
75 doc = xapian.Document()
76 doc.set_data("a\0b")
77 if doc.get_data() == "a":
78 raise TestFail("get_data+set_data truncates at a zero byte")
79 expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte")
80 doc.set_data("is there anybody out there?")
81 doc.add_term("XYzzy")
82 doc.add_posting(stem("is"), 1)
83 doc.add_posting(stem("there"), 2)
84 doc.add_posting(stem("anybody"), 3)
85 doc.add_posting(stem("out"), 4)
86 doc.add_posting(stem("there"), 5)
88 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
89 db.add_document(doc)
90 expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
91 terms = ["smoke", "test", "terms"]
92 expect_query(xapian.Query(xapian.Query.OP_OR, terms),
93 "(smoke OR test OR terms)")
94 query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple"))
95 query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string"))
96 expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
97 expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
98 subqs = ["a", "b"]
99 expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)")
100 expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'),
101 "VALUE_RANGE 0 1 4")
103 # Check database factory functions are wrapped as expected:
105 expect_exception(xapian.DatabaseNotFoundError, None,
106 xapian.open_stub, "nosuchdir/nosuchdb")
107 expect_exception(xapian.DatabaseNotFoundError, None,
108 xapian.open_stub, "nosuchdir/nosuchdb", xapian.DB_OPEN)
110 expect_exception(xapian.DatabaseNotFoundError, None,
111 xapian.chert_open, "nosuchdir/nosuchdb")
112 expect_exception(xapian.DatabaseCreateError, None,
113 xapian.chert_open, "nosuchdir/nosuchdb", xapian.DB_CREATE)
115 expect_exception(xapian.NetworkError, None,
116 xapian.remote_open, "/bin/false", "")
117 expect_exception(xapian.NetworkError, None,
118 xapian.remote_open_writable, "/bin/false", "")
120 expect_exception(xapian.NetworkError, None,
121 xapian.remote_open, "127.0.0.1", 0, 1)
122 expect_exception(xapian.NetworkError, None,
123 xapian.remote_open_writable, "127.0.0.1", 0, 1)
125 # Check wrapping of MatchAll and MatchNothing:
127 expect_query(xapian.Query.MatchAll, "<alldocuments>")
128 expect_query(xapian.Query.MatchNothing, "")
130 # Feature test for Query.__iter__
131 term_count = 0
132 for term in query2:
133 term_count += 1
134 expect(term_count, 4, "Unexpected number of terms in query2")
136 enq = xapian.Enquire(db)
138 # Check Xapian::BAD_VALUENO is wrapped suitably.
139 enq.set_collapse_key(xapian.BAD_VALUENO)
141 enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is"))
142 mset = enq.get_mset(0, 10)
143 expect(mset.size(), 1, "Unexpected mset.size()")
144 expect(len(mset), 1, "Unexpected mset.size()")
146 # Feature test for Enquire.matching_terms(docid)
147 term_count = 0
148 for term in enq.matching_terms(mset.get_hit(0)):
149 term_count += 1
150 expect(term_count, 2, "Unexpected number of matching terms")
152 # Feature test for MSet.__iter__
153 msize = 0
154 for match in mset:
155 msize += 1
156 expect(msize, mset.size(), "Unexpected number of entries in mset")
158 terms = " ".join(enq.matching_terms(mset.get_hit(0)))
159 expect(terms, "is there", "Unexpected terms")
161 # Feature test for ESet.__iter__
162 rset = xapian.RSet()
163 rset.add_document(1)
164 eset = enq.get_eset(10, rset)
165 term_count = 0
166 for term in eset:
167 term_count += 1
168 expect(term_count, 3, "Unexpected number of expand terms")
170 # Feature test for Database.__iter__
171 term_count = 0
172 for term in db:
173 term_count += 1
174 expect(term_count, 5, "Unexpected number of terms in db")
176 # Feature test for Database.allterms
177 term_count = 0
178 for term in db.allterms():
179 term_count += 1
180 expect(term_count, 5, "Unexpected number of terms in db.allterms")
182 # Feature test for Database.postlist
183 count = 0
184 for posting in db.postlist("there"):
185 count += 1
186 expect(count, 1, "Unexpected number of entries in db.postlist('there')")
188 # Feature test for Database.postlist with empty term (alldocspostlist)
189 count = 0
190 for posting in db.postlist(""):
191 count += 1
192 expect(count, 1, "Unexpected number of entries in db.postlist('')")
194 # Feature test for Database.termlist
195 count = 0
196 for term in db.termlist(1):
197 count += 1
198 expect(count, 5, "Unexpected number of entries in db.termlist(1)")
200 # Feature test for Database.positionlist
201 count = 0
202 for term in db.positionlist(1, "there"):
203 count += 1
204 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")
206 # Feature test for Document.termlist
207 count = 0
208 for term in doc.termlist():
209 count += 1
210 expect(count, 5, "Unexpected number of entries in doc.termlist()")
212 # Feature test for TermIter.skip_to
213 term = doc.termlist()
214 term.skip_to('n')
215 while True:
216 try:
217 x = next(term)
218 except StopIteration:
219 break
220 if x.term < 'n':
221 raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term)
223 # Feature test for Document.values
224 count = 0
225 for term in doc.values():
226 count += 1
227 expect(count, 0, "Unexpected number of entries in doc.values")
229 # Check exception handling for Xapian::DocNotFoundError
230 expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)
232 # Check value of OP_ELITE_SET
233 expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")
235 # Feature test for MatchDecider
236 doc = xapian.Document()
237 doc.set_data("Two")
238 doc.add_posting(stem("out"), 1)
239 doc.add_posting(stem("outside"), 1)
240 doc.add_posting(stem("source"), 2)
241 doc.add_value(0, "yes")
242 db.add_document(doc)
244 class testmatchdecider(xapian.MatchDecider):
245 def __call__(self, doc):
246 return doc.get_value(0) == "yes"
248 query = xapian.Query(stem("out"))
249 enquire = xapian.Enquire(db)
250 enquire.set_query(query)
251 mset = enquire.get_mset(0, 10, None, testmatchdecider())
252 expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
253 expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")
255 # Feature test for ExpandDecider
256 class testexpanddecider(xapian.ExpandDecider):
257 def __call__(self, term):
258 return (not term.startswith('a'))
260 enquire = xapian.Enquire(db)
261 rset = xapian.RSet()
262 rset.add_document(1)
263 eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider())
264 eset_terms = [term[xapian.ESET_TNAME] for term in eset.items]
265 expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
266 if [t for t in eset_terms if t.startswith('a')]:
267 raise TestFail("ExpandDecider was not used")
269 # Check min_wt argument to get_eset() works (new in 1.2.5).
270 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
271 expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt")
272 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9)
273 expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt")
275 # Check QueryParser parsing error.
276 qp = xapian.QueryParser()
277 expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND")
279 # Check QueryParser pure NOT option
280 qp = xapian.QueryParser()
281 expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
282 "(<alldocuments> AND_NOT test@1)")
284 # Check QueryParser partial option
285 qp = xapian.QueryParser()
286 qp.set_database(db)
287 qp.set_default_op(xapian.Query.OP_AND)
288 qp.set_stemming_strategy(qp.STEM_SOME)
289 qp.set_stemmer(xapian.Stem('en'))
290 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
291 "(Zfoo@1 AND (WILDCARD SYNONYM o OR Zo@2))")
293 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
294 "(Zfoo@1 AND (WILDCARD SYNONYM outside OR Zoutsid@2))")
296 # Test supplying unicode strings
297 expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')),
298 '(foo OR bar)')
299 expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')),
300 '(foo OR bar\xc2\xa3)')
301 expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')),
302 '(foo OR bar\xc2\xa3)')
303 expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'),
304 '(foo OR bar)')
306 expect_query(qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
307 "(<alldocuments> AND_NOT Zt\xc3\xa9st@1)")
309 doc = xapian.Document()
310 doc.set_data(u"Unicode with an acc\xe9nt")
311 doc.add_posting(stem(u"out\xe9r"), 1)
312 expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8'))
313 term = doc.termlist().next().term
314 expect(term, u"out\xe9r".encode('utf-8'))
316 # Check simple stopper
317 stop = xapian.SimpleStopper()
318 qp.set_stopper(stop)
319 expect(stop('a'), False)
320 expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
321 "(Zfoo@1 AND Zbar@2 AND Za@3)")
323 stop.add('a')
324 expect(stop('a'), True)
325 expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
326 "(Zfoo@1 AND Zbar@2)")
328 # Feature test for custom Stopper
329 class my_b_stopper(xapian.Stopper):
330 def __call__(self, term):
331 return term == "b"
333 def get_description(self):
334 return u"my_b_stopper"
336 stop = my_b_stopper()
337 expect(stop.get_description(), u"my_b_stopper")
338 qp.set_stopper(stop)
339 expect(stop('a'), False)
340 expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
341 "(Zfoo@1 AND Zbar@2 AND Za@3)")
343 expect(stop('b'), True)
344 expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN),
345 "(Zfoo@1 AND Zbar@2)")
347 # Test SimpleStopper initialised from a file.
348 try:
349 srcdir = os.environ['srcdir']
350 except KeyError:
351 srcdir = '.'
352 stop = xapian.SimpleStopper(srcdir + '/../shortstop.list')
353 expect(stop('a'), True)
354 expect(stop('am'), False)
355 expect(stop('an'), True)
356 expect(stop('the'), True)
358 expect_exception(xapian.InvalidArgumentError, None, xapian.SimpleStopper, 'nosuchfile')
360 # Test TermGenerator
361 termgen = xapian.TermGenerator()
362 doc = xapian.Document()
363 termgen.set_document(doc)
364 termgen.index_text('foo bar baz foo')
365 expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])])
368 # Check DateValueRangeProcessor works
369 context("checking that DateValueRangeProcessor works")
370 qp = xapian.QueryParser()
371 vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
372 qp.add_valuerangeprocessor(vrpdate)
373 query = qp.parse_query('12/03/99..12/04/01')
374 expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)')
376 # Regression test for bug#193, fixed in 1.0.3.
377 context("running regression test for bug#193")
378 vrp = xapian.NumberValueRangeProcessor(0, '$', True)
379 a = '$10'
380 b = '20'
381 slot, a, b = vrp(a, b)
382 expect(slot, 0)
383 expect(xapian.sortable_unserialise(a), 10)
384 expect(xapian.sortable_unserialise(b), 20)
386 # Feature test for xapian.FieldProcessor
387 context("running feature test for xapian.FieldProcessor")
388 class testfieldprocessor(xapian.FieldProcessor):
389 def __call__(self, s):
390 if s == 'spam':
391 raise Exception('already spam')
392 return xapian.Query("spam")
394 qp.add_prefix('spam', testfieldprocessor())
395 qp.add_boolean_prefix('boolspam', testfieldprocessor())
396 qp.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style
397 qp.add_boolean_prefix('boolspam3', testfieldprocessor(), '')
398 qp.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group')
399 qp.add_boolean_prefix('boolspam5', testfieldprocessor(), None)
400 query = qp.parse_query('spam:ignored')
401 expect(str(query), 'Query(spam)')
403 expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam')
405 # Regression tests copied from PHP (probably always worked in python, but
406 # let's check...)
407 context("running regression tests for issues which were found in PHP")
409 # PHP overload resolution involving boolean types failed.
410 enq.set_sort_by_value(1, True)
412 # Regression test - fixed in 0.9.10.1.
413 oqparser = xapian.QueryParser()
414 oquery = oqparser.parse_query("I like tea")
416 # Regression test for bug fixed in 1.4.4:
417 # https://bugs.debian.org/849722
418 oqparser.add_boolean_prefix('tag', 'K', '')
419 # Make sure other cases also work:
420 oqparser.add_boolean_prefix('zag', 'XR', False) # Old-style
421 oqparser.add_boolean_prefix('rag', 'XR', None)
422 oqparser.add_boolean_prefix('nag', 'XB', '')
423 oqparser.add_boolean_prefix('bag', 'XB', 'blergh')
424 oqparser.add_boolean_prefix('gag', 'XB', u'blergh')
425 oqparser.add_boolean_prefix('jag', 'XB', b'blergh')
427 # Regression test for bug#192 - fixed in 1.0.3.
428 enq.set_cutoff(100)
430 # Test setting and getting metadata
431 expect(db.get_metadata('Foo'), '')
432 db.set_metadata('Foo', 'Foo')
433 expect(db.get_metadata('Foo'), 'Foo')
434 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '')
435 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo')
436 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '')
438 # Test OP_SCALE_WEIGHT and corresponding constructor
439 expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5),
440 "5 * foo")
442 def test_userstem():
443 mystem = MyStemmer()
444 stem = xapian.Stem(mystem)
445 expect(stem('test'), 'tst')
446 stem2 = xapian.Stem(mystem)
447 expect(stem2('toastie'), 'tst')
449 indexer = xapian.TermGenerator()
450 indexer.set_stemmer(xapian.Stem(MyStemmer()))
452 doc = xapian.Document()
453 indexer.set_document(doc)
454 indexer.index_text('hello world')
456 s = '/'
457 for t in doc.termlist():
458 s += t.term
459 s += '/'
460 expect(s, '/Zhll/Zwrld/hello/world/')
462 parser = xapian.QueryParser()
463 parser.set_stemmer(xapian.Stem(MyStemmer()))
464 parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL)
465 expect_query(parser.parse_query('color television'), '(clr@1 OR tlvsn@2)')
467 def test_internal_enums_not_wrapped():
468 leaf_constants = [c for c in dir(xapian.Query) if c.startswith('LEAF_')]
469 expect(leaf_constants, [])
471 def test_internals_not_wrapped():
472 internals = []
473 for c in dir(xapian):
474 # Skip Python stuff like __file__ and __version__.
475 if c.startswith('__'): continue
476 if c.endswith('_'): internals.append(c)
477 # Skip non-classes
478 if not c[0].isupper(): continue
479 cls = eval('xapian.' + c)
480 if type(cls) != type(object): continue
481 for m in dir(cls):
482 if m.startswith('__'): continue
483 if m.endswith('_'): internals.append(c + '.' + m)
485 expect(internals, [])
487 def test_zz9_check_leaks():
488 import gc
489 gc.collect()
490 if len(mystemmers):
491 raise TestFail("%d MyStemmer objects not deleted" % len(mystemmers))
493 # Run all tests (ie, callables with names starting "test_").
494 if not runtests(globals()):
495 sys.exit(1)
497 # vim:syntax=python:set expandtab: