2 /* perl
/extra.i
: custom Perl code for xapian-bindings
4 * Based on the perl XS wrapper files.
6 * Copyright
(C
) 2009 Kosei Moriyama
7 * Copyright
(C
) 2011,2012,2013,2015,2016,2018,2019,2020 Olly Betts
9 * This program is free software
; you can redistribute it and
/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation
; either version
2 of the
12 * License
, or
(at your option
) any later version.
14 * This program is distributed in the hope that it will be useful
,
15 * but WITHOUT
ANY WARRANTY
; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program
; if not
, write to the Free Software
21 * Foundation
, Inc.
, 51 Franklin St
, Fifth Floor
, Boston
, MA
02110-1301
30 our $VERSION
= PERL_XAPIAN_VERSION
;
33 # We need to use the RTLD_GLOBAL flag to dlopen
() so that other C
++
34 # modules that link against libxapian.so get the
*same
* value for all the
35 # weak symbols
(eg
, the exception classes
)
36 sub dl_load_flags
{ 0x01 }
38 # Items to export into caller's namespace by default. Note
: do not export
39 # names by default without a very good reason. Use EXPORT_OK instead.
40 # Do not simply export all your public functions
/methods
/constants.
42 # This allows declaration use Xapian '
:all'
;
43 # If you do not need this
, moving things directly into @
EXPORT or @EXPORT_OK
65 DB_CREATE_OR_OVERWRITE
86 FLAG_SPELLING_CORRECTION
89 FLAG_AUTO_MULTIWORD_SYNONYMS
103 $EXPORT_TAGS
{standard
} = [ @
{ $EXPORT_TAGS
{'ops'
} },
104 @
{ $EXPORT_TAGS
{'db'
} },
105 @
{ $EXPORT_TAGS
{'qpflags'
} },
106 @
{ $EXPORT_TAGS
{'qpstem'
} } ];
107 $EXPORT_TAGS
{all
} = [ @
{ $EXPORT_TAGS
{'standard'
} }, @
{ $EXPORT_TAGS
{'enq_order'
} } ];
109 our @EXPORT_OK
= ( @
{ $EXPORT_TAGS
{'all'
} } );
111 # Preloaded methods go here.
114 foreach
(@
{ $EXPORT_TAGS
{'ops'
} }) {
115 $OP_NAMES
[eval $_
] = $_
;
119 foreach
(@
{ $EXPORT_TAGS
{'db'
} }) {
120 $DB_NAMES
[eval $_
] = $_
;
124 foreach
(@
{ $EXPORT_TAGS
{'qpflags'
} }) {
125 $FLAG_NAMES
[eval $_
] = $_
;
129 foreach
(@
{ $EXPORT_TAGS
{'qpstem'
} }) {
130 $STEM_NAMES
[eval $_
] = $_
;
133 # Compatibility wrapping for Xapian
::BAD_VALUENO
(wrapped as a constant since
134 # xapian-bindings
1.4.10).
135 our $BAD_VALUENO
= BAD_VALUENO
;
137 sub search_xapian_compat
{
138 *Search
::Xapian
:: = \
%Xapian
::;
139 *Search
::Xapian
::VERSION
= \$VERSION
;
140 *Search
::Xapian
::OP_NAMES
= \@OP_NAMES
;
141 *Search
::Xapian
::DB_NAMES
= \@DB_NAMES
;
142 *Search
::Xapian
::FLAG_NAMES
= \@FLAG_NAMES
;
143 *Search
::Xapian
::STEM_NAMES
= \@STEM_NAMES
;
144 *Search
::Xapian
::BAD_VALUENO
= \
&BAD_VALUENO;
145 *Search
::Xapian
::DB_OPEN
= \
&DB_OPEN;
146 *Search
::Xapian
::DB_CREATE
= \
&DB_CREATE;
147 *Search
::Xapian
::DB_CREATE_OR_OPEN
= \
&DB_CREATE_OR_OPEN;
148 *Search
::Xapian
::DB_CREATE_OR_OVERWRITE
= \
&DB_CREATE_OR_OVERWRITE;
149 *Search
::Xapian
::version_string
= \
&version_string;
150 *Search
::Xapian
::major_version
= \
&major_version;
151 *Search
::Xapian
::minor_version
= \
&minor_version;
152 *Search
::Xapian
::revision
= \
&revision;
153 *Search
::Xapian
::sortable_serialise
= \
&sortable_serialise;
154 *Search
::Xapian
::sortable_unserialise
= \
&sortable_unserialise;
157 package Xapian
::Database
;
160 my $enquire
= Xapian
::Enquire-
>new
( $self
);
162 $enquire-
>set_query
( @_
);
167 package Xapian
::Enquire
;
170 return $self-
>get_mset
(@_
)->items
();
173 package Xapian
::ESet
;
177 tie
( @array
, 'Xapian
::ESet'
, $self
);
181 use overload '
++'
=> sub
{ $_
[0]->inc
() },
182 '
--'
=> sub
{ $_
[0]->dec
() },
183 '
='
=> sub
{ $_
[0]->clone
() },
184 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
185 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
186 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
187 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
192 my $class
= ref
( $self
);
193 my $copy
= new
( $self
);
201 return bless $eset
, $class
;
206 return $self-
>size
();
209 package Xapian
::ESetIterator
;
210 use overload '
++'
=> sub
{ $_
[0]->inc
() },
211 '
--'
=> sub
{ $_
[0]->dec
() },
212 '
='
=> sub
{ $_
[0]->clone
() },
213 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
214 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
215 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
216 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
221 my $class
= ref
( $self
);
222 my $copy
= new
( $self
);
227 package Xapian
::MSet
;
231 tie
( @array
, 'Xapian
::MSet
::Tied'
, $self
);
238 return bless $mset
, $class
;
243 return $self-
>size
();
246 package Xapian
::MSetIterator
;
247 use overload '
++'
=> sub
{ $_
[0]->inc
() },
248 '
--'
=> sub
{ $_
[0]->dec
() },
249 '
='
=> sub
{ $_
[0]->clone
() },
250 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
251 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
252 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
253 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
258 my $class
= ref
( $self
);
263 package Xapian
::MSet
::Tied
;
264 our @ISA
= qw
(Xapian
::MSet
);
266 package Xapian
::PositionIterator
;
267 use overload '
++'
=> sub
{ $_
[0]->inc
() },
268 '
='
=> sub
{ $_
[0]->clone
() },
269 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
270 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
271 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
272 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
277 my $class
= ref
( $self
);
278 my $copy
= new
( $self
);
283 package Xapian
::PostingIterator
;
284 use overload '
++'
=> sub
{ $_
[0]->inc
() },
285 '
='
=> sub
{ $_
[0]->clone
() },
286 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
287 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
288 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
289 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
294 my $class
= ref
( $self
);
295 my $copy
= new
( $self
);
300 package Xapian
::TermGenerator
;
302 my
($self
, $stopper
) = @_
;
303 $self
{_stopper
} = $stopper
;
307 package Xapian
::TermIterator
;
308 use overload '
++'
=> sub
{ $_
[0]->inc
() },
309 '
='
=> sub
{ $_
[0]->clone
() },
310 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
311 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
312 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
313 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
318 my $class
= ref
( $self
);
319 my $copy
= new
( $self
);
324 package Xapian
::ValueIterator
;
325 use overload '
++'
=> sub
{ $_
[0]->inc
() },
326 '
='
=> sub
{ $_
[0]->clone
() },
327 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
328 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
329 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
330 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
335 my $class
= ref
( $self
);
336 my $copy
= new
( $self
);
341 # Adding CLONE_SKIP functions
342 package Xapian
::LogicError
;
344 package Xapian
::PositionIterator
;
346 package Xapian
::PostingIterator
;
348 package Xapian
::TermIterator
;
350 package Xapian
::ValueIterator
;
352 package Xapian
::Document
;
354 package Xapian
::PostingSource
;
356 package Xapian
::ValuePostingSource
;
358 package Xapian
::ValueWeightPostingSource
;
360 package Xapian
::ValueMapPostingSource
;
362 package Xapian
::FixedWeightPostingSource
;
364 package Xapian
::MSet
;
366 package Xapian
::MSetIterator
;
368 package Xapian
::ESet
;
370 package Xapian
::ESetIterator
;
372 package Xapian
::RSet
;
374 package Xapian
::MatchDecider
;
376 package Xapian
::Enquire
;
378 package Xapian
::Weight
;
380 package Xapian
::BoolWeight
;
382 package Xapian
::BM25Weight
;
384 package Xapian
::TradWeight
;
386 package Xapian
::Database
;
388 package Xapian
::WritableDatabase
;
390 package Xapian
::Query
;
391 sub MatchAll
{ Xapianc
::new_Query
(''
) }
392 sub MatchNothing
{ Xapianc
::new_Query
() }
394 package Xapian
::Stopper
;
396 package Xapian
::SimpleStopper
;
398 package Xapian
::RangeProcessor
;
400 package Xapian
::DateRangeProcessor
;
402 package Xapian
::NumberRangeProcessor
;
404 package Xapian
::FieldProcessor
;
406 package Xapian
::QueryParser
;
408 package Xapian
::Stem
;
410 package Xapian
::TermGenerator
;
412 package Xapian
::Sorter
;
414 package Xapian
::MultiValueSorter
;
416 package Xapian
::ReplicationInfo
;
418 package Xapian
::DatabaseMaster
;
420 package Xapian
::DatabaseReplica
;
422 package Xapian
::ValueSetMatchDecider
;
424 package Xapian
::SerialisationContext
;
426 package Xapian
::MSet
::Tied
;
429 # Pod document of Xapian
433 Xapian
- Perl frontend to the Xapian C
++ search library.
439 my $parser
= Xapian
::QueryParser-
>new
();
440 my $query
= $parser-
>parse_query
( '
[QUERY STRING]'
);
442 my $db
= Xapian
::Database-
>new
( '
[DATABASE DIR]'
);
443 my $enq
= $db-
>enquire
();
445 printf
"Running query '%s'\n", $query-
>get_description
();
447 $enq-
>set_query
( $query
);
448 my @matches
= $enq-
>matches
(0, 10);
450 print scalar
(@matches
) .
" results found\n";
452 foreach my $match
( @matches
) {
453 my $doc
= $match-
>get_document
();
454 printf
"ID %d %d%% [ %s ]\n", $match-
>get_docid
(), $match-
>get_percent
(), $doc-
>get_data
();
459 This module is a pretty-much complete wrapping of the Xapian C
++ API. The
460 main omissions are features which aren't useful to wrap for Perl
, such as
461 Xapian
::UTF8Iterator.
463 This module is generated using SWIG. It is intended as a replacement for
464 the older Search
::Xapian module which is easier to keep up to date and
465 which more completely wraps the C
++ API. It is largely compatible with
466 Search
::Xapian
, but see the COMPATIBILITY section below if you have code using
467 Search
::Xapian which you want to get working with this new module.
469 There are some gaps in the POD documentation for wrapped classes
, but you
470 can read the Xapian C
++ API documentation at
471 L
<https
://xapian.org
/docs
/apidoc
/html
/annotated.html
> for details of
472 these. Alternatively
, take a look at the code in the examples and tests.
474 If you want to use Xapian and the threads module together
, make
475 sure you're using Perl
>= 5.8.7 as then Xapian uses CLONE_SKIP to make sure
476 that the perl wrapper objects aren't copied to new threads
- without this the
477 underlying C
++ objects can get destroyed more than once which leads to
480 If you encounter problems
, or have any comments
, suggestions
, patches
, etc
481 please email the Xapian-discuss mailing list
(details of which can be found at
482 L
<https
://xapian.org
/lists
>).
486 This module is mostly compatible with Search
::Xapian. The following are known
487 differences
, with details of how to write code which works with both.
489 Search
::Xapian overloads stringification
- e.g. C
<"$query"> is equivalent to
490 C
<$query-E
<gt
>get_description
()>, while C
<"$termiterator"> is equivalent to
491 C
<$termiterator-E
<gt
>get_term
()>. This module doesn't support overloaded
492 stringification
, so you should instead explicitly call the method you
493 want. The technical reason for this change is that stringification is hard to
494 support in SWIG-generated bindings
, but this context-sensitive stringification
495 where the operation performed depends on the object type seems unhelpful in
498 Search
::Xapian overloads conversion to an integer for some classes
- e.g.
499 C
<0+$positioniterator
> is equivalent to C
<$positioniterator-E
<gt
>get_termpos
>
500 while C
<0+$postingiterator
> is equivalent to C
<$postingiterator-E
<gt
>get_docid
>.
501 This module doesn't provide these overloads so you should instead explicitly
502 call the method you want. As above
, we think this context-sensitive behaviour
503 wasn't helpful in hindsight.
505 This module is fussier about whether a passed scalar value is a string or
506 an integer than Search
::Xapian
, so e.g. C
<Xapian
::Query-E
<gt
>new
(2001)> will fail
507 but the equivalent worked with Search
::Xapian. If C
<$term
> might not be a
508 string use C
<Xapian
::Query-E
<gt
>new
("$term")> to ensure it is converted to a
509 string. Whether explicit stringification is needed depends on whether the
510 scalar is marked as having a string representation by Perl
; prior to Perl
511 5.36.0 retrieving the string value of an integer could set this flag
, but
512 that's no longer the case in Perl
5.36.0 and later. The simple rule is to
513 always explicitly stringify if the value might be numeric.
515 This behaviour isn't very Perlish
, but is likely to be hard to address
516 universally as it comes from SWIG. Let us know if you find particular places
517 where it's annoying and we can look at addressing those.
519 Both this module and Search
::Xapian support passing a Perl sub
(which can be
520 anonymous
) for the functor classes C
<MatchDecider
> and C
<ExpandDecider
>. In
521 some cases Search
::Xapian accepts a string naming a Perl sub
, but this module
522 never accepts this. Instead of passing C
<"::mymatchdecider">, pass
523 C
<\
&mymatchdecider> which will work with either module. If you really want to
524 dynamically specify the function name
, you can pass C
<sub
{eval
525 "&$dynamicmatchdecider"}>.
527 Search
::Xapian provides a PerlStopper class which is supposed to be
528 subclassable in Perl to implement your own stopper
, but this mechanism doesn't
529 actually seem to work. This module instead supports user-implemented stoppers
530 by accepting a Perl sub in place of a Stopper object.
532 =head3 Importing Either Module
534 If you want your code to use either this module or Search
::Xapian depending
535 what's installed
, then instead of C
<use Search
::Xapian
('
:all'
);> you can use
:
540 Xapian-
>import
('
:all'
);
541 Xapian
::search_xapian_compat
();
544 require Search
::Xapian
;
545 Search
::Xapian-
>import
('
:all'
);
549 If you just C
<use Search
::Xapian
;> then the C
<import
()> calls aren't needed.
551 The C
<Xapian
::search_xapian_compat
()> call sets up aliases in the
552 C
<Search
::Xapian
> namespace so you can write code which refers to
553 C
<Search
::Xapian
> but can actually use this module instead.
565 Open a database
, fail if database doesn't exist.
569 Create a new database
, fail if database exists.
571 =item DB_CREATE_OR_OPEN
573 Open an existing database
, without destroying data
, or create a new
574 database if one doesn't already exist.
576 =item DB_CREATE_OR_OVERWRITE
578 Overwrite database if it exists.
588 Match if both subqueries are satisfied.
592 Match if either subquery is satisfied.
596 Match if left but not right subquery is satisfied.
600 Match if left or right
, but not both queries are satisfied.
604 Match if left is satisfied
, but use weights from both.
608 Like OP_AND
, but only weight using the left query.
612 Match if the words are near each other. The window should be specified
, as
613 a parameter to C
<Xapian
::Query-E
<gt
>new
()>, but it defaults to the
614 number of terms in the list.
618 Match as a phrase
(All words in order
).
622 Select an elite set from the subqueries
, and perform a query with these combined as an
OR query.
626 Filter by a range test on a document value.
636 This gives the QueryParser default flag settings
, allowing you to easily add
637 flags to the default ones.
641 Support
AND, OR, etc and bracketted subexpressions.
649 Support quoted phrases.
651 =item FLAG_BOOLEAN_ANY_CASE
653 Support
AND, OR, etc even if they aren't in ALLCAPS.
657 Support right truncation
(e.g. Xap
*).
659 =item FLAG_WILDCARD_GLOB
661 =item FLAG_WILDCARD_MULTI
663 =item FLAG_WILDCARD_SINGLE
667 Allow queries such as '
NOT apples'.
669 These require the use of a list of all documents in the database
670 which is potentially expensive
, so this feature isn't enabled by
675 Enable partial matching.
677 Partial matching causes the parser to treat the query as a
678 "partially entered" search. This will automatically treat the
679 final word as a wildcarded match
, unless it is followed by
680 whitespace
, to produce more stable results from interactive
683 =item FLAG_SPELLING_CORRECTION
687 =item FLAG_ACCUMULATE
689 =item FLAG_AUTO_SYNONYMS
691 =item FLAG_AUTO_MULTIWORD_SYNONYMS
693 =item FLAG_WORD_BREAKS
701 =item FLAG_NO_POSITIONS
715 Stem all terms and add a
"Z" prefix.
719 Don't stem any terms.
723 Stem some terms
, in a manner compatible with Omega
(capitalised words and those
724 in phrases aren't stemmed
).
726 =item STEM_SOME_FULL_POS
728 Like STEM_SOME but also store term positions for stemmed terms.
738 docids sort in ascending order
(default
)
742 docids sort in descending order
746 docids sort in whatever order is most efficient for the backend
752 Standard is db
+ ops
+ qpflags
+ qpstem
754 =head1 Version functions
760 Returns the major version of the Xapian C
++ library being used. E.g. for
761 Xapian
1.4.15 this would return
1.
765 Returns the minor version of the Xapian C
++ library being used. E.g. for
766 Xapian
1.4.15 this would return
4.
770 Returns the revision of the Xapian C
++ library being used. E.g. for
771 Xapian
1.4.15 this would return
15. In a stable release series
, Xapian
772 libraries with the same minor and major versions are usually ABI compatible
, so
773 this often won't match the third component of C
<$Xapian
::VERSION
> (which is the
774 version of the Xapian wrappers
).
778 =head1 Numeric encoding functions
782 =item sortable_serialise NUMBER
784 Convert a floating point number to a string
, preserving sort order.
786 This method converts a floating point number to a string
, suitable for
787 using as a value for numeric range restriction
, or for use as a sort
790 The conversion is platform independent.
792 The conversion attempts to ensure that
, for any pair of values supplied
793 to the conversion algorithm
, the result of comparing the original
794 values
(with a numeric comparison operator
) will be the same as the
795 result of comparing the resulting values
(with a string comparison
796 operator
). On platforms which represent doubles with the precisions
797 specified by IEEE_754
, this will be the case
: if the representation of
798 doubles is more precise
, it is possible that two very close doubles
799 will be mapped to the same string
, so will compare equal.
801 Note also that both zero and
-zero will be converted to the same
802 representation
: since these compare equal
, this satisfies the
803 comparison constraint
, but it's worth knowing this if you wish to use
804 the encoding in some situation where this distinction matters.
806 Handling of NaN isn't
(currently
) guaranteed to be sensible.
808 =item sortable_unserialise SERIALISED_NUMBER
810 Convert a string encoded using sortable_serialise back to a floating
813 This expects the input to be a string produced by C
<sortable_serialise
()>.
814 If the input is not such a string
, the value returned is undefined
(but
815 no error will be thrown
).
817 The result of the conversion will be exactly the value which was
818 supplied to C
<sortable_serialise
()> when making the string on platforms
819 which represent doubles with the precisions specified by IEEE_754
, but
820 may be a different
(nearby
) value on other platforms.
830 Add POD documentation for all classes
, where possible just adapted from Xapian
833 =item Unwrapped classes
835 The following Xapian classes are not yet wrapped
:
836 user-defined Weight subclasses.
842 These SWIG-generated Perl bindings were originally implemented by Kosei
843 Moriyama in GSoC
2009, and made their debut in the
1.2.4 release.
845 They take a lot of inspiration and some code from Search
::Xapian
, a set
846 of hand-written XS bindings
, originally written by Alex Bowley
, and later
847 maintained by Olly Betts.
849 Search
::Xapian owed thanks to Tye McQueen E
<lt
>tye@metronet.comE
<gt
> for
850 explaining the finer points of how best to write XS frontends to C
++ libraries
,
851 and James Aylett E
<lt
>james@tartarus.orgE
<gt
> for clarifying the less obvious
852 aspects of the Xapian API. Patches for wrapping missing classes and other
853 things were contributed by Olly Betts
, Tim Brody
, Marcus Ramberg
, Peter Karman
,
854 Benjamin Smith
, Rusty Conover
, Frank Lichtenheld
, Henry Combrinck
, Jess
855 Robinson
, David F. Skoll
, Dave O'Neill
, Andreas Marienborg
, Adam Sjøgren
,
856 Dmitry Karasik
, and Val Rosca.
860 Please report any bugs
/suggestions to E
<lt
>xapian-discuss@lists.xapian.orgE
<gt
>
861 or use the Xapian bug tracker L
<https
://xapian.org
/bugs
>. Please do
862 NOT use the CPAN bug tracker or mail contributors individually.
866 This program is free software
; you can redistribute it and
/or modify
867 it under the same terms as Perl itself.
871 L
<Xapian
::BM25Weight
>,
872 L
<Xapian
::BoolWeight
>,
876 L
<Xapian
::MultiValueSorter
>,
877 L
<Xapian
::PositionIterator
>,
878 L
<Xapian
::PostingIterator
>,
880 L
<Xapian
::QueryParser
>,
882 L
<Xapian
::TermGenerator
>,
883 L
<Xapian
::TermIterator
>,
884 L
<Xapian
::TradWeight
>,
885 L
<Xapian
::ValueIterator
>,
887 L
<Xapian
::WritableDatabase
>,
889 L
<https
://xapian.org
/>.