[ci] Fix clang-santisers job for GHA change
[xapian.git] / xapian-bindings / python3 / smoketest.py
blobed39f9d7742d1cbd1e3170cce9723eda444ecbba
1 # Simple test to ensure that we can load the xapian module and exercise basic
2 # functionality successfully.
4 # Copyright (C) 2004-2023 Olly Betts
5 # Copyright (C) 2007 Lemur Consulting Ltd
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import sys
23 import re
24 import xapian
26 from testsuite import *
28 mystemmers = set()
29 mystemmer_id = 0
30 # Stemmer which strips English vowels.
31 class MyStemmer(xapian.StemImplementation):
32 def __init__(self):
33 global mystemmers
34 global mystemmer_id
35 super(MyStemmer, self).__init__()
36 mystemmers.add(mystemmer_id)
37 self._id = mystemmer_id
38 mystemmer_id += 1
40 def __call__(self, s):
41 return re.sub(br'[aeiou]', b'', s)
43 def __del__(self):
44 global mystemmers
45 if self._id not in mystemmers:
46 raise TestFail("MyStemmer #%d deleted more than once" % self._id)
47 mystemmers.remove(self._id)
49 def test_all():
50 # Test the version number reporting functions give plausible results.
51 v = "%d.%d.%d" % (xapian.major_version(),
52 xapian.minor_version(),
53 xapian.revision())
54 v2 = xapian.version_string()
55 expect(v2, v, "Unexpected version output")
57 # A regexp check would be better, but seems to create a bogus "leak" of -1
58 # objects in Python 3.
59 expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z')
60 expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"')
62 def access_cvar():
63 res = xapian.cvar
64 print("Unhandled constants: ", res)
65 return res
67 # Check that SWIG isn't generating cvar (regression test for ticket#297).
69 # Python 3.5 generates a different exception message here to earlier
70 # versions, so we need a check which matches both.
71 expect_exception(AttributeError,
72 lambda msg: msg.find("has no attribute 'cvar'") != -1,
73 access_cvar)
75 stem = xapian.Stem(b"english")
76 expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")
78 doc = xapian.Document()
79 doc.set_data(b"a\0b")
80 if doc.get_data() == b"a":
81 raise TestFail("get_data+set_data truncates at a zero byte")
82 expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte")
83 doc.set_data(b"is there anybody out there?")
84 doc.add_term(b"XYzzy")
85 doc.add_posting(stem(b"is"), 1)
86 doc.add_posting(stem(b"there"), 2)
87 doc.add_posting(stem(b"anybody"), 3)
88 doc.add_posting(stem(b"out"), 4)
89 doc.add_posting(stem(b"there"), 5)
91 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
92 db.add_document(doc)
93 expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
94 terms = ["smoke", "test", "terms"]
95 expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]),
96 "(smoke OR test OR terms)")
97 query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple"))
98 query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string"))
99 expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
100 expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
101 subqs = ["a", "b"]
102 expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)")
103 expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'),
104 "VALUE_RANGE 0 1 4")
106 # Check database factory functions are wrapped as expected:
108 expect_exception(xapian.DatabaseNotFoundError, None,
109 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB))
110 expect_exception(xapian.DatabaseNotFoundError, None,
111 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB))
113 expect_exception(xapian.DatabaseNotFoundError, None,
114 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS))
115 expect_exception(xapian.DatabaseCreateError, None,
116 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS))
118 expect_exception(xapian.FeatureUnavailableError, None,
119 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT))
120 expect_exception(xapian.FeatureUnavailableError, None,
121 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT))
123 expect_exception(xapian.NetworkError, None,
124 xapian.remote_open, b"/bin/false", b"")
125 expect_exception(xapian.NetworkError, None,
126 xapian.remote_open_writable, b"/bin/false", b"")
128 expect_exception(xapian.NetworkError, None,
129 xapian.remote_open, b"127.0.0.1", 0, 1)
130 expect_exception(xapian.NetworkError, None,
131 xapian.remote_open_writable, b"127.0.0.1", 0, 1)
133 # Check wrapping of MatchAll and MatchNothing:
135 expect_query(xapian.Query.MatchAll, "<alldocuments>")
136 expect_query(xapian.Query.MatchNothing, "")
138 # Regression test for constructing OP_WILDCARD queries.
139 expect_query(xapian.Query(xapian.Query.OP_WILDCARD, "wild"),
140 "WILDCARD SYNONYM wild")
141 expect_query(xapian.Query(xapian.Query.OP_WILDCARD, b"wild"),
142 "WILDCARD SYNONYM wild")
143 expect_query(xapian.Query(xapian.Query.OP_WILDCARD, "wild", 0),
144 "WILDCARD SYNONYM wild")
145 expect_query(xapian.Query(xapian.Query.OP_WILDCARD, b"wild", 0),
146 "WILDCARD SYNONYM wild")
148 # Feature test for Query.__iter__
149 term_count = 0
150 for term in query2:
151 term_count += 1
152 expect(term_count, 4, "Unexpected number of terms in query2")
154 enq = xapian.Enquire(db)
156 # Check Xapian::BAD_VALUENO is wrapped suitably.
157 enq.set_collapse_key(xapian.BAD_VALUENO)
159 enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is"))
160 mset = enq.get_mset(0, 10)
161 expect(mset.size(), 1, "Unexpected mset.size()")
162 expect(len(mset), 1, "Unexpected mset.size()")
164 # Feature test for Enquire.matching_terms(docid)
165 term_count = 0
166 for term in enq.matching_terms(mset.get_hit(0)):
167 term_count += 1
168 expect(term_count, 2, "Unexpected number of matching terms")
170 # Feature test for MSet.__iter__
171 msize = 0
172 for match in mset:
173 msize += 1
174 expect(msize, mset.size(), "Unexpected number of entries in mset")
176 terms = b" ".join(enq.matching_terms(mset.get_hit(0)))
177 expect(terms, b"is there", "Unexpected terms")
179 # Feature test for ESet.__iter__
180 rset = xapian.RSet()
181 rset.add_document(1)
182 eset = enq.get_eset(10, rset)
183 term_count = 0
184 for term in eset:
185 term_count += 1
186 expect(term_count, 3, "Unexpected number of expand terms")
188 # Feature test for Database.__iter__
189 term_count = 0
190 for term in db:
191 term_count += 1
192 expect(term_count, 5, "Unexpected number of terms in db")
194 # Feature test for Database.allterms
195 term_count = 0
196 for term in db.allterms():
197 term_count += 1
198 expect(term_count, 5, "Unexpected number of terms in db.allterms")
200 # Feature test for Database.postlist
201 count = 0
202 for posting in db.postlist(b"there"):
203 count += 1
204 expect(count, 1, "Unexpected number of entries in db.postlist('there')")
206 # Feature test for Database.postlist with empty term (alldocspostlist)
207 count = 0
208 for posting in db.postlist(b""):
209 count += 1
210 expect(count, 1, "Unexpected number of entries in db.postlist('')")
212 # Feature test for Database.termlist
213 count = 0
214 for term in db.termlist(1):
215 count += 1
216 expect(count, 5, "Unexpected number of entries in db.termlist(1)")
218 # Feature test for Database.positionlist
219 count = 0
220 for term in db.positionlist(1, b"there"):
221 count += 1
222 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")
224 # Feature test for Document.termlist
225 count = 0
226 for term in doc.termlist():
227 count += 1
228 expect(count, 5, "Unexpected number of entries in doc.termlist()")
230 # Feature test for TermIter.skip_to
231 term = doc.termlist()
232 term.skip_to(b'n')
233 while True:
234 try:
235 x = next(term)
236 except StopIteration:
237 break
238 if x.term < b'n':
239 raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8'))
241 # Feature test for Document.values
242 count = 0
243 for term in list(doc.values()):
244 count += 1
245 expect(count, 0, "Unexpected number of entries in doc.values")
247 # Check exception handling for Xapian::DocNotFoundError
248 expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)
250 # Check value of OP_ELITE_SET
251 expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")
253 # Feature test for MatchDecider
254 doc = xapian.Document()
255 doc.set_data(b"Two")
256 doc.add_posting(stem(b"out"), 1)
257 doc.add_posting(stem(b"outside"), 1)
258 doc.add_posting(stem(b"source"), 2)
259 doc.add_value(0, b"yes")
260 db.add_document(doc)
262 class testmatchdecider(xapian.MatchDecider):
263 def __call__(self, doc):
264 return doc.get_value(0) == b"yes"
266 query = xapian.Query(stem(b"out"))
267 enquire = xapian.Enquire(db)
268 enquire.set_query(query)
269 mset = enquire.get_mset(0, 10, None, testmatchdecider())
270 expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
271 expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")
273 # Feature test for ExpandDecider
274 class testexpanddecider(xapian.ExpandDecider):
275 def __call__(self, term):
276 return (not term.startswith(b'a'))
278 enquire = xapian.Enquire(db)
279 rset = xapian.RSet()
280 rset.add_document(1)
281 eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, testexpanddecider())
282 eset_terms = [item.term for item in eset]
283 expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
284 if [t for t in eset_terms if t.startswith(b'a')]:
285 raise TestFail("ExpandDecider was not used")
287 # Check min_wt argument to get_eset() works (new in 1.2.5).
288 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
289 expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt")
290 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, None, 1.9)
291 expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt")
293 # Check QueryParser parsing error.
294 qp = xapian.QueryParser()
295 expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND")
297 # Check QueryParser pure NOT option
298 qp = xapian.QueryParser()
299 expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
300 "(0 * <alldocuments> AND_NOT test@1)")
302 # Check QueryParser partial option
303 qp = xapian.QueryParser()
304 qp.set_database(db)
305 qp.set_default_op(xapian.Query.OP_AND)
306 qp.set_stemming_strategy(qp.STEM_SOME)
307 qp.set_stemmer(xapian.Stem(b'en'))
308 expect_query(qp.parse_query(b"foo ox", qp.FLAG_PARTIAL),
309 "(Zfoo@1 AND (WILDCARD SYNONYM ox OR Zox@2))")
311 expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL),
312 "(Zfoo@1 AND (WILDCARD SYNONYM outside OR Zoutsid@2))")
314 # Test supplying unicode strings
315 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')),
316 '(foo OR bar)')
317 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')),
318 '(foo OR bar\\xa3)')
319 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')),
320 '(foo OR bar\u00a3)')
321 expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'),
322 '(foo OR bar)')
324 expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
325 "(0 * <alldocuments> AND_NOT Zt\u00e9st@1)")
327 doc = xapian.Document()
328 doc.set_data(b"Unicode with an acc\xe9nt")
329 doc.add_posting(stem(b"out\xe9r"), 1)
330 expect(doc.get_data(), b"Unicode with an acc\xe9nt")
331 term = next(doc.termlist()).term
332 expect(term, b"out\xe9r")
334 # Check simple stopper
335 stop = xapian.SimpleStopper()
336 qp.set_stopper(stop)
337 expect(stop(b'a'), False)
338 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
339 "(Zfoo@1 AND Zbar@2 AND Za@3)")
341 stop.add(b'a')
342 expect(stop(b'a'), True)
343 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
344 "(Zfoo@1 AND Zbar@2)")
346 # Feature test for custom Stopper
347 class my_b_stopper(xapian.Stopper):
348 def __call__(self, term):
349 return term == b"b"
351 def get_description(self):
352 return "my_b_stopper"
354 stop = my_b_stopper()
355 expect(stop.get_description(), "my_b_stopper")
356 qp.set_stopper(stop)
357 expect(stop(b'a'), False)
358 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
359 "(Zfoo@1 AND Zbar@2 AND Za@3)")
361 expect(stop(b'b'), True)
362 expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN),
363 "(Zfoo@1 AND Zbar@2)")
365 # Test TermGenerator
366 termgen = xapian.TermGenerator()
367 doc = xapian.Document()
368 termgen.set_document(doc)
369 termgen.index_text(b'foo bar baz foo')
370 expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])])
373 # Check DateRangeProcessor works
374 context("checking that DateRangeProcessor works")
375 qp = xapian.QueryParser()
376 rpdate = xapian.DateRangeProcessor(1, xapian.RP_DATE_PREFER_MDY, 1960)
377 qp.add_rangeprocessor(rpdate)
378 query = qp.parse_query(b'12/03/99..12/04/01')
379 expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)')
381 # Feature test for xapian.FieldProcessor
382 context("running feature test for xapian.FieldProcessor")
383 class testfieldprocessor(xapian.FieldProcessor):
384 def __call__(self, s):
385 if s == 'spam':
386 raise Exception('already spam')
387 return xapian.Query("spam")
389 qp.add_prefix('spam', testfieldprocessor())
390 qp.add_boolean_prefix('boolspam', testfieldprocessor())
391 qp.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style
392 qp.add_boolean_prefix('boolspam3', testfieldprocessor(), '')
393 qp.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group')
394 qp.add_boolean_prefix('boolspam5', testfieldprocessor(), None)
395 query = qp.parse_query('spam:ignored')
396 expect(str(query), 'Query(spam)')
398 # FIXME: This doesn't currently work:
399 # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam')
401 # Regression tests copied from PHP (probably always worked in python, but
402 # let's check...)
403 context("running regression tests for issues which were found in PHP")
405 # PHP overload resolution involving boolean types failed.
406 enq.set_sort_by_value(1, True)
408 # Regression test - fixed in 0.9.10.1.
409 oqparser = xapian.QueryParser()
410 oquery = oqparser.parse_query(b"I like tea")
412 # Regression test for bug fixed in 1.4.4:
413 # https://bugs.debian.org/849722
414 oqparser.add_boolean_prefix('tag', 'K', '')
415 # Make sure other cases also work:
416 oqparser.add_boolean_prefix('zag', 'XR', False) # Old-style
417 oqparser.add_boolean_prefix('rag', 'XR', None)
418 oqparser.add_boolean_prefix('nag', 'XB', '')
419 oqparser.add_boolean_prefix('bag', 'XB', 'blergh')
420 oqparser.add_boolean_prefix('jag', 'XB', b'blergh')
422 # Regression test for bug#192 - fixed in 1.0.3.
423 enq.set_cutoff(100)
425 # Test setting and getting metadata
426 expect(db.get_metadata(b'Foo'), b'')
427 db.set_metadata(b'Foo', b'Foo')
428 expect(db.get_metadata(b'Foo'), b'Foo')
429 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')
430 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo')
431 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')
433 # Test OP_SCALE_WEIGHT and corresponding constructor
434 expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5),
435 "5 * foo")
437 def test_userstem():
438 mystem = MyStemmer()
439 stem = xapian.Stem(mystem)
440 expect(stem(b'test'), b'tst')
441 stem2 = xapian.Stem(mystem)
442 expect(stem2(b'toastie'), b'tst')
444 indexer = xapian.TermGenerator()
445 indexer.set_stemmer(xapian.Stem(MyStemmer()))
447 doc = xapian.Document()
448 indexer.set_document(doc)
449 indexer.index_text(b'hello world')
451 s = '/'
452 for t in doc.termlist():
453 s += t.term.decode('utf-8')
454 s += '/'
455 expect(s, '/Zhll/Zwrld/hello/world/')
457 parser = xapian.QueryParser()
458 parser.set_stemmer(xapian.Stem(MyStemmer()))
459 parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL)
460 expect_query(parser.parse_query(b'color television'), '(clr@1 OR tlvsn@2)')
462 def test_internal_enums_not_wrapped():
463 leaf_constants = [c for c in dir(xapian.Query) if c.startswith('LEAF_')]
464 expect(leaf_constants, [])
466 def test_internals_not_wrapped():
467 internals = []
468 for c in dir(xapian):
469 # Skip Python stuff like __file__ and __version__.
470 if c.startswith('__'): continue
471 if c.endswith('_'): internals.append(c)
472 # Skip non-classes
473 if not c[0].isupper(): continue
474 cls = eval('xapian.' + c)
475 if type(cls) != type(object): continue
476 for m in dir(cls):
477 if m.startswith('__'): continue
478 if m.endswith('_'): internals.append(c + '.' + m)
480 expect(internals, [])
482 def test_zz9_check_leaks():
483 import gc
484 gc.collect()
485 if len(mystemmers):
486 raise TestFail("%d MyStemmer objects not deleted" % len(mystemmers))
488 # Run all tests (ie, callables with names starting "test_").
489 if not runtests(globals()):
490 sys.exit(1)
492 # vim:syntax=python:set expandtab: