Update for 1.2.25.5
[xapian.git] / search-xapian / Xapian.pm
blob1e0734f4a00793bf2bc5249ed5394e811f6270c1
1 package Search::Xapian;
3 use 5.006;
4 use strict;
5 use warnings;
7 our $VERSION = '1.2.25.5';
9 use Exporter 'import';
11 use Search::Xapian::Database;
12 use Search::Xapian::Document;
13 use Search::Xapian::ESet;
14 use Search::Xapian::ESetIterator;
15 use Search::Xapian::Error;
16 use Search::Xapian::MSet;
17 use Search::Xapian::MSetIterator;
18 use Search::Xapian::MultiValueSorter;
19 use Search::Xapian::PositionIterator;
20 use Search::Xapian::PostingIterator;
21 use Search::Xapian::Query;
22 use Search::Xapian::QueryParser;
23 use Search::Xapian::RSet;
24 use Search::Xapian::Stem;
25 use Search::Xapian::TermGenerator;
26 use Search::Xapian::TermIterator;
27 use Search::Xapian::ValueIterator;
28 use Search::Xapian::WritableDatabase;
30 use Search::Xapian::BM25Weight;
31 use Search::Xapian::BoolWeight;
32 use Search::Xapian::TradWeight;
34 use Search::Xapian::ValueCountMatchSpy;
36 use Search::Xapian::SimpleStopper;
37 use Search::Xapian::PerlStopper;
39 require DynaLoader;
41 our @ISA = qw(DynaLoader);
43 # We need to use the RTLD_GLOBAL flag to dlopen() so that other C++
44 # modules that link against libxapian.so get the *same* value for all the
45 # weak symbols (eg, the exception classes)
46 sub dl_load_flags { 0x01 }
48 # This allows declaration use Search::Xapian ':all';
49 # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
50 # will save memory.
51 our %EXPORT_TAGS = (
52 'ops' => [ qw(
53 OP_AND
54 OP_OR
55 OP_AND_NOT
56 OP_XOR
57 OP_AND_MAYBE
58 OP_FILTER
59 OP_NEAR
60 OP_PHRASE
61 OP_VALUE_RANGE
62 OP_SCALE_WEIGHT
63 OP_ELITE_SET
64 OP_VALUE_GE
65 OP_VALUE_LE
66 ) ],
67 'db' => [ qw(
68 DB_OPEN
69 DB_CREATE
70 DB_CREATE_OR_OPEN
71 DB_CREATE_OR_OVERWRITE
72 ) ],
73 'enq_order' => [ qw(
74 ENQ_DESCENDING
75 ENQ_ASCENDING
76 ENQ_DONT_CARE
77 ) ],
78 'qpflags' => [ qw(
79 FLAG_BOOLEAN
80 FLAG_PHRASE
81 FLAG_LOVEHATE
82 FLAG_BOOLEAN_ANY_CASE
83 FLAG_WILDCARD
84 FLAG_PURE_NOT
85 FLAG_PARTIAL
86 FLAG_SPELLING_CORRECTION
87 FLAG_SYNONYM
88 FLAG_AUTO_SYNONYMS
89 FLAG_AUTO_MULTIWORD_SYNONYMS
90 FLAG_DEFAULT
91 ) ],
92 'qpstem' => [ qw(
93 STEM_NONE
94 STEM_SOME
95 STEM_ALL
96 ) ]
98 $EXPORT_TAGS{standard} = [ @{ $EXPORT_TAGS{'ops'} },
99 @{ $EXPORT_TAGS{'db'} },
100 @{ $EXPORT_TAGS{'qpflags'} },
101 @{ $EXPORT_TAGS{'qpstem'} } ];
102 $EXPORT_TAGS{all} = [ @{ $EXPORT_TAGS{'standard'} }, @{ $EXPORT_TAGS{'enq_order'} }, 'BAD_VALUENO' ];
105 # Names which can be exported.
106 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
108 # Don't export any names by default.
109 our @EXPORT = qw( );
111 bootstrap Search::Xapian $VERSION;
113 # Preloaded methods go here.
115 our @OP_NAMES;
116 foreach (@{ $EXPORT_TAGS{'ops'} }) {
117 $OP_NAMES[eval $_] = $_;
120 our @DB_NAMES;
121 foreach (@{ $EXPORT_TAGS{'db'} }) {
122 $DB_NAMES[eval $_] = $_;
125 our @FLAG_NAMES;
126 foreach (@{ $EXPORT_TAGS{'qpflags'} }) {
127 $FLAG_NAMES[eval $_] = $_;
130 our @STEM_NAMES;
131 foreach (@{ $EXPORT_TAGS{'qpstem'} }) {
132 $STEM_NAMES[eval $_] = $_;
137 __END__
140 =head1 NAME
142 Search::Xapian - Perl XS frontend to the Xapian C++ search library.
144 =head1 SYNOPSIS
146 use Search::Xapian;
148 my $db = Search::Xapian::Database->new( '[DATABASE DIR]' );
149 my $enq = $db->enquire( '[QUERY TERM]' );
151 printf "Running query '%s'\n", $enq->get_query()->get_description();
153 my @matches = $enq->matches(0, 10);
155 print scalar(@matches) . " results found\n";
157 foreach my $match ( @matches ) {
158 my $doc = $match->get_document();
159 printf "ID %d %d%% [ %s ]\n", $match->get_docid(), $match->get_percent(), $doc->get_data();
162 =head1 DESCRIPTION
164 This module wraps most methods of most Xapian classes. The missing classes
165 and methods should be added in the future. It also provides a simplified,
166 more 'perlish' interface to some common operations, as demonstrated above.
168 There are some gaps in the POD documentation for wrapped classes, but you
169 can read the Xapian C++ API documentation at
170 L<https://xapian.org/docs/apidoc/html/annotated.html> for details of
171 these. Alternatively, take a look at the code in the examples and tests.
173 If you want to use Search::Xapian and the threads module together, make
174 sure you're using Search::Xapian >= 1.0.4.0 and Perl >= 5.8.7. As of 1.0.4.0,
175 Search::Xapian uses CLONE_SKIP to make sure that the perl wrapper objects
176 aren't copied to new threads - without this the underlying C++ objects can get
177 destroyed more than once.
179 If you encounter problems, or have any comments, suggestions, patches, etc
180 please email the Xapian-discuss mailing list (details of which can be found at
181 L<https://xapian.org/lists>).
183 =head2 EXPORT
185 None by default.
187 =head1 :db
189 =over 4
191 =item DB_OPEN
193 Open a database, fail if database doesn't exist.
195 =item DB_CREATE
197 Create a new database, fail if database exists.
199 =item DB_CREATE_OR_OPEN
201 Open an existing database, without destroying data, or create a new
202 database if one doesn't already exist.
204 =item DB_CREATE_OR_OVERWRITE
206 Overwrite database if it exists.
208 =back
210 =head1 :ops
212 =over 4
214 =item OP_AND
216 Match if both subqueries are satisfied.
218 =item OP_OR
220 Match if either subquery is satisfied.
222 =item OP_AND_NOT
224 Match if left but not right subquery is satisfied.
226 =item OP_XOR
228 Match if left or right, but not both queries are satisfied.
230 =item OP_AND_MAYBE
232 Match if left is satisfied, but use weights from both.
234 =item OP_FILTER
236 Like OP_AND, but only weight using the left query.
238 =item OP_NEAR
240 Match if the words are near each other. The window should be specified, as
241 a parameter to C<Search::Xapian::Query::Query>, but it defaults to the
242 number of terms in the list.
244 =item OP_PHRASE
246 Match as a phrase (All words in order).
248 =item OP_ELITE_SET
250 Select an elite set from the subqueries, and perform a query with these combined as an OR query.
252 =item OP_VALUE_RANGE
254 Filter by a range test on a document value.
256 =back
258 =head1 :qpflags
260 =over 4
262 =item FLAG_DEFAULT
264 This gives the QueryParser default flag settings, allowing you to easily add
265 flags to the default ones.
267 =item FLAG_BOOLEAN
269 Support AND, OR, etc and bracketed subexpressions.
271 =item FLAG_LOVEHATE
273 Support + and -.
275 =item FLAG_PHRASE
277 Support quoted phrases.
279 =item FLAG_BOOLEAN_ANY_CASE
281 Support AND, OR, etc even if they aren't in ALLCAPS.
283 =item FLAG_WILDCARD
285 Support right truncation (e.g. Xap*).
287 =item FLAG_PURE_NOT
289 Allow queries such as 'NOT apples'.
291 These require the use of a list of all documents in the database
292 which is potentially expensive, so this feature isn't enabled by
293 default.
295 =item FLAG_PARTIAL
297 Enable partial matching.
299 Partial matching causes the parser to treat the query as a
300 "partially entered" search. This will automatically treat the
301 final word as a wildcarded match, unless it is followed by
302 whitespace, to produce more stable results from interactive
303 searches.
305 =item FLAG_SPELLING_CORRECTION
307 =item FLAG_SYNONYM
309 =item FLAG_AUTO_SYNONYMS
311 =item FLAG_AUTO_MULTIWORD_SYNONYMS
313 =back
315 =head1 :qpstem
317 =over 4
319 =item STEM_ALL
321 Stem all terms.
323 =item STEM_NONE
325 Don't stem any terms.
327 =item STEM_SOME
329 Stem some terms, in a manner compatible with Omega (capitalised words and those
330 in phrases aren't stemmed).
332 =back
334 =head1 :enq_order
336 =over 4
338 =item ENQ_ASCENDING
340 docids sort in ascending order (default)
342 =item ENQ_DESCENDING
344 docids sort in descending order
346 =item ENQ_DONT_CARE
348 docids sort in whatever order is most efficient for the backend
350 =back
352 =head1 :standard
354 Standard is db + ops + qpflags + qpstem
356 =head1 Version functions
358 =over 4
360 =item major_version
362 Returns the major version of the Xapian C++ library being used. E.g. for
363 Xapian 1.0.9 this would return 1.
365 =item minor_version
367 Returns the minor version of the Xapian C++ library being used. E.g. for
368 Xapian 1.0.9 this would return 0.
370 =item revision
372 Returns the revision of the Xapian C++ library being used. E.g. for
373 Xapian 1.0.9 this would return 9. In a stable release series, Xapian libraries
374 with the same minor and major versions are usually ABI compatible, so this
375 often won't match the third component of $Search::Xapian::VERSION (which is the
376 version of the Search::Xapian XS wrappers).
378 =back
380 =head1 Numeric encoding functions
382 =over 4
384 =item sortable_serialise NUMBER
386 Convert a floating point number to a string, preserving sort order.
388 This method converts a floating point number to a string, suitable for
389 using as a value for numeric range restriction, or for use as a sort
390 key.
392 The conversion is platform independent.
394 The conversion attempts to ensure that, for any pair of values supplied
395 to the conversion algorithm, the result of comparing the original
396 values (with a numeric comparison operator) will be the same as the
397 result of comparing the resulting values (with a string comparison
398 operator). On platforms which represent doubles with the precisions
399 specified by IEEE_754, this will be the case: if the representation of
400 doubles is more precise, it is possible that two very close doubles
401 will be mapped to the same string, so will compare equal.
403 Note also that both zero and -zero will be converted to the same
404 representation: since these compare equal, this satisfies the
405 comparison constraint, but it's worth knowing this if you wish to use
406 the encoding in some situation where this distinction matters.
408 Handling of NaN isn't (currently) guaranteed to be sensible.
410 =item sortable_unserialise SERIALISED_NUMBER
412 Convert a string encoded using sortable_serialise back to a floating
413 point number.
415 This expects the input to be a string produced by sortable_serialise().
416 If the input is not such a string, the value returned is undefined (but
417 no error will be thrown).
419 The result of the conversion will be exactly the value which was
420 supplied to sortable_serialise() when making the string on platforms
421 which represent doubles with the precisions specified by IEEE_754, but
422 may be a different (nearby) value on other platforms.
424 =back
426 =head1 TODO
428 =over 4
430 =item Error Handling
432 Error handling for all methods liable to generate them.
434 =item Documentation
436 Add POD documentation for all classes, where possible just adapted from Xapian
437 docs.
439 =item Unwrapped classes
441 The following Xapian classes are not yet wrapped:
442 ErrorHandler, standard ExpandDecider subclasses
443 (user-defined ones works),
444 user-defined weight classes.
446 =item Unwrapped methods
448 The following methods are not yet wrapped:
449 Enquire::get_eset(...) with more than two arguments,
450 Query ctor optional "parameter" parameter,
451 Remote::open(...),
452 static Stem::get_available_languages().
454 We wrap MSet::swap() and MSet::operator[](), but not ESet::swap(),
455 ESet::operator[](). Is swap actually useful? Should we instead tie MSet
456 and ESet to allow them to just be used as lists?
458 =back
460 =head1 CREDITS
462 Thanks to Tye McQueen E<lt>tye@metronet.comE<gt> for explaining the
463 finer points of how best to write XS frontends to C++ libraries, James
464 Aylett E<lt>james@tartarus.orgE<gt> for clarifying the less obvious
465 aspects of the Xapian API, Tim Brody for patches wrapping ::QueryParser and
466 ::Stopper and especially Olly Betts E<lt>olly@survex.comE<gt> for contributing
467 advice, bugfixes, and wrapper code for the more obscure classes.
469 =head1 AUTHOR
471 Alex Bowley E<lt>kilinrax@cpan.orgE<gt>
473 Please report any bugs/suggestions to E<lt>xapian-discuss@lists.xapian.orgE<gt>
474 or use the Xapian bug tracker L<https://xapian.org/bugs>. Please do
475 NOT use the CPAN bug tracker or mail any of the authors individually.
477 =head1 LICENSE
479 This program is free software; you can redistribute it and/or modify
480 it under the same terms as Perl itself.
482 =head1 SEE ALSO
484 L<Search::Xapian::BM25Weight>,
485 L<Search::Xapian::BoolWeight>,
486 L<Search::Xapian::Database>,
487 L<Search::Xapian::Document>,
488 L<Search::Xapian::Enquire>,
489 L<Search::Xapian::MatchSpy>,
490 L<Search::Xapian::MultiValueSorter>,
491 L<Search::Xapian::PositionIterator>,
492 L<Search::Xapian::PostingIterator>,
493 L<Search::Xapian::QueryParser>,
494 L<Search::Xapian::Stem>,
495 L<Search::Xapian::TermGenerator>,
496 L<Search::Xapian::TermIterator>,
497 L<Search::Xapian::TradWeight>,
498 L<Search::Xapian::ValueIterator>,
499 L<Search::Xapian::Weight>,
500 L<Search::Xapian::WritableDatabase>,
502 L<https://xapian.org/>.
504 =cut