2 /* perl
/extra.i
: custom Perl code for xapian-bindings
4 * Based on the perl XS wrapper files.
6 * Copyright
(C
) 2009 Kosei Moriyama
7 * Copyright
(C
) 2011,2012,2013,2015,2016,2018,2019,2020 Olly Betts
9 * This program is free software
; you can redistribute it and
/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation
; either version
2 of the
12 * License
, or
(at your option
) any later version.
14 * This program is distributed in the hope that it will be useful
,
15 * but WITHOUT
ANY WARRANTY
; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program
; if not
, write to the Free Software
21 * Foundation
, Inc.
, 51 Franklin St
, Fifth Floor
, Boston
, MA
02110-1301
30 our $VERSION
= PERL_XAPIAN_VERSION
;
33 # We need to use the RTLD_GLOBAL flag to dlopen
() so that other C
++
34 # modules that link against libxapian.so get the
*same
* value for all the
35 # weak symbols
(eg
, the exception classes
)
36 sub dl_load_flags
{ 0x01 }
38 # Items to export into caller's namespace by default. Note
: do not export
39 # names by default without a very good reason. Use EXPORT_OK instead.
40 # Do not simply export all your public functions
/methods
/constants.
42 # This allows declaration use Xapian '
:all'
;
43 # If you do not need this
, moving things directly into @
EXPORT or @EXPORT_OK
65 DB_CREATE_OR_OVERWRITE
82 FLAG_SPELLING_CORRECTION
85 FLAG_AUTO_MULTIWORD_SYNONYMS
97 $EXPORT_TAGS
{standard
} = [ @
{ $EXPORT_TAGS
{'ops'
} },
98 @
{ $EXPORT_TAGS
{'db'
} },
99 @
{ $EXPORT_TAGS
{'qpflags'
} },
100 @
{ $EXPORT_TAGS
{'qpstem'
} } ];
101 $EXPORT_TAGS
{all
} = [ @
{ $EXPORT_TAGS
{'standard'
} }, @
{ $EXPORT_TAGS
{'enq_order'
} } ];
103 our @EXPORT_OK
= ( @
{ $EXPORT_TAGS
{'all'
} } );
105 # Preloaded methods go here.
108 foreach
(@
{ $EXPORT_TAGS
{'ops'
} }) {
109 $OP_NAMES
[eval $_
] = $_
;
113 foreach
(@
{ $EXPORT_TAGS
{'db'
} }) {
114 $DB_NAMES
[eval $_
] = $_
;
118 foreach
(@
{ $EXPORT_TAGS
{'qpflags'
} }) {
119 $FLAG_NAMES
[eval $_
] = $_
;
123 foreach
(@
{ $EXPORT_TAGS
{'qpstem'
} }) {
124 $STEM_NAMES
[eval $_
] = $_
;
127 # Compatibility wrapping for Xapian
::BAD_VALUENO
(wrapped as a constant since
128 # xapian-bindings
1.4.10).
129 our $BAD_VALUENO
= BAD_VALUENO
;
131 sub search_xapian_compat
{
132 *Search
::Xapian
:: = \
%Xapian
::;
133 *Search
::Xapian
::VERSION
= \$VERSION
;
134 *Search
::Xapian
::OP_NAMES
= \@OP_NAMES
;
135 *Search
::Xapian
::DB_NAMES
= \@DB_NAMES
;
136 *Search
::Xapian
::FLAG_NAMES
= \@FLAG_NAMES
;
137 *Search
::Xapian
::STEM_NAMES
= \@STEM_NAMES
;
138 *Search
::Xapian
::BAD_VALUENO
= \
&BAD_VALUENO;
139 *Search
::Xapian
::DB_OPEN
= \
&DB_OPEN;
140 *Search
::Xapian
::DB_CREATE
= \
&DB_CREATE;
141 *Search
::Xapian
::DB_CREATE_OR_OPEN
= \
&DB_CREATE_OR_OPEN;
142 *Search
::Xapian
::DB_CREATE_OR_OVERWRITE
= \
&DB_CREATE_OR_OVERWRITE;
143 *Search
::Xapian
::version_string
= \
&version_string;
144 *Search
::Xapian
::major_version
= \
&major_version;
145 *Search
::Xapian
::minor_version
= \
&minor_version;
146 *Search
::Xapian
::revision
= \
&revision;
147 *Search
::Xapian
::sortable_serialise
= \
&sortable_serialise;
148 *Search
::Xapian
::sortable_unserialise
= \
&sortable_unserialise;
151 package Xapian
::Database
;
154 my $enquire
= Xapian
::Enquire-
>new
( $self
);
156 $enquire-
>set_query
( @_
);
161 package Xapian
::Enquire
;
164 return $self-
>get_mset
(@_
)->items
();
167 package Xapian
::ESet
;
171 tie
( @array
, 'Xapian
::ESet'
, $self
);
175 use overload '
++'
=> sub
{ $_
[0]->inc
() },
176 '
--'
=> sub
{ $_
[0]->dec
() },
177 '
='
=> sub
{ $_
[0]->clone
() },
178 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
179 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
180 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
181 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
186 my $class
= ref
( $self
);
187 my $copy
= new
( $self
);
195 return bless $eset
, $class
;
200 return $self-
>size
();
203 package Xapian
::ESetIterator
;
204 use overload '
++'
=> sub
{ $_
[0]->inc
() },
205 '
--'
=> sub
{ $_
[0]->dec
() },
206 '
='
=> sub
{ $_
[0]->clone
() },
207 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
208 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
209 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
210 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
215 my $class
= ref
( $self
);
216 my $copy
= new
( $self
);
221 package Xapian
::MSet
;
225 tie
( @array
, 'Xapian
::MSet
::Tied'
, $self
);
232 return bless $mset
, $class
;
237 return $self-
>size
();
240 package Xapian
::MSetIterator
;
241 use overload '
++'
=> sub
{ $_
[0]->inc
() },
242 '
--'
=> sub
{ $_
[0]->dec
() },
243 '
='
=> sub
{ $_
[0]->clone
() },
244 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
245 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
246 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
247 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
252 my $class
= ref
( $self
);
257 package Xapian
::MSet
::Tied
;
258 our @ISA
= qw
(Xapian
::MSet
);
260 package Xapian
::PositionIterator
;
261 use overload '
++'
=> sub
{ $_
[0]->inc
() },
262 '
='
=> sub
{ $_
[0]->clone
() },
263 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
264 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
265 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
266 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
271 my $class
= ref
( $self
);
272 my $copy
= new
( $self
);
277 package Xapian
::PostingIterator
;
278 use overload '
++'
=> sub
{ $_
[0]->inc
() },
279 '
='
=> sub
{ $_
[0]->clone
() },
280 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
281 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
282 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
283 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
288 my $class
= ref
( $self
);
289 my $copy
= new
( $self
);
294 package Xapian
::TermGenerator
;
296 my
($self
, $stopper
) = @_
;
297 $self
{_stopper
} = $stopper
;
301 package Xapian
::TermIterator
;
302 use overload '
++'
=> sub
{ $_
[0]->inc
() },
303 '
='
=> sub
{ $_
[0]->clone
() },
304 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
305 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
306 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
307 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
312 my $class
= ref
( $self
);
313 my $copy
= new
( $self
);
318 package Xapian
::ValueIterator
;
319 use overload '
++'
=> sub
{ $_
[0]->inc
() },
320 '
='
=> sub
{ $_
[0]->clone
() },
321 'eq'
=> sub
{ $_
[0]->equal
($_
[1]) },
322 'ne'
=> sub
{ $_
[0]->nequal
($_
[1]) },
323 '
=='
=> sub
{ $_
[0]->equal
($_
[1]) },
324 '
!='
=> sub
{ $_
[0]->nequal
($_
[1]) },
329 my $class
= ref
( $self
);
330 my $copy
= new
( $self
);
335 # Adding CLONE_SKIP functions
336 package Xapian
::LogicError
;
338 package Xapian
::PositionIterator
;
340 package Xapian
::PostingIterator
;
342 package Xapian
::TermIterator
;
344 package Xapian
::ValueIterator
;
346 package Xapian
::Document
;
348 package Xapian
::PostingSource
;
350 package Xapian
::ValuePostingSource
;
352 package Xapian
::ValueWeightPostingSource
;
354 package Xapian
::ValueMapPostingSource
;
356 package Xapian
::FixedWeightPostingSource
;
358 package Xapian
::MSet
;
360 package Xapian
::MSetIterator
;
362 package Xapian
::ESet
;
364 package Xapian
::ESetIterator
;
366 package Xapian
::RSet
;
368 package Xapian
::MatchDecider
;
370 package Xapian
::Enquire
;
372 package Xapian
::Weight
;
374 package Xapian
::BoolWeight
;
376 package Xapian
::BM25Weight
;
378 package Xapian
::TradWeight
;
380 package Xapian
::Database
;
382 package Xapian
::WritableDatabase
;
384 package Xapian
::Query
;
385 sub MatchAll
{ Xapianc
::new_Query
(''
) }
386 sub MatchNothing
{ Xapianc
::new_Query
() }
388 package Xapian
::Stopper
;
390 package Xapian
::SimpleStopper
;
392 package Xapian
::RangeProcessor
;
394 package Xapian
::DateRangeProcessor
;
396 package Xapian
::NumberRangeProcessor
;
398 package Xapian
::ValueRangeProcessor
;
400 package Xapian
::StringValueRangeProcessor
;
402 package Xapian
::DateValueRangeProcessor
;
404 package Xapian
::NumberValueRangeProcessor
;
406 package Xapian
::FieldProcessor
;
408 package Xapian
::QueryParser
;
410 package Xapian
::Stem
;
412 package Xapian
::TermGenerator
;
414 package Xapian
::Sorter
;
416 package Xapian
::MultiValueSorter
;
418 package Xapian
::ReplicationInfo
;
420 package Xapian
::DatabaseMaster
;
422 package Xapian
::DatabaseReplica
;
424 package Xapian
::ValueSetMatchDecider
;
426 package Xapian
::SerialisationContext
;
428 package Xapian
::MSet
::Tied
;
431 # Pod document of Xapian
435 Xapian
- Perl frontend to the Xapian C
++ search library.
441 my $parser
= Xapian
::QueryParser-
>new
();
442 my $query
= $parser-
>parse_query
( '
[QUERY STRING]'
);
444 my $db
= Xapian
::Database-
>new
( '
[DATABASE DIR]'
);
445 my $enq
= $db-
>enquire
();
447 printf
"Running query '%s'\n", $query-
>get_description
();
449 $enq-
>set_query
( $query
);
450 my @matches
= $enq-
>matches
(0, 10);
452 print scalar
(@matches
) .
" results found\n";
454 foreach my $match
( @matches
) {
455 my $doc
= $match-
>get_document
();
456 printf
"ID %d %d%% [ %s ]\n", $match-
>get_docid
(), $match-
>get_percent
(), $doc-
>get_data
();
461 This module is a pretty-much complete wrapping of the Xapian C
++ API. The
462 main omissions are features which aren't useful to wrap for Perl
, such as
463 Xapian
::UTF8Iterator.
465 This module is generated using SWIG. It is intended as a replacement for
466 the older Search
::Xapian module which is easier to keep up to date and
467 which more completely wraps the C
++ API. It is largely compatible with
468 Search
::Xapian
, but see the COMPATIBILITY section below if you have code using
469 Search
::Xapian which you want to get working with this new module.
471 There are some gaps in the POD documentation for wrapped classes
, but you
472 can read the Xapian C
++ API documentation at
473 L
<https
://xapian.org
/docs
/apidoc
/html
/annotated.html
> for details of
474 these. Alternatively
, take a look at the code in the examples and tests.
476 If you want to use Xapian and the threads module together
, make
477 sure you're using Perl
>= 5.8.7 as then Xapian uses CLONE_SKIP to make sure
478 that the perl wrapper objects aren't copied to new threads
- without this the
479 underlying C
++ objects can get destroyed more than once which leads to
482 If you encounter problems
, or have any comments
, suggestions
, patches
, etc
483 please email the Xapian-discuss mailing list
(details of which can be found at
484 L
<https
://xapian.org
/lists
>).
488 This module is mostly compatible with Search
::Xapian. The following are known
489 differences
, with details of how to write code which works with both.
491 Search
::Xapian overloads stringification
- e.g. C
<"$query"> is equivalent to
492 C
<$query-E
<gt
>get_description
()>, while C
<"$termiterator"> is equivalent to
493 C
<$termiterator-E
<gt
>get_term
()>. This module doesn't support overloaded
494 stringification
, so you should instead explicitly call the method you
495 want. The technical reason for this change is that stringification is hard to
496 support in SWIG-generated bindings
, but this context-sensitive stringification
497 where the operation performed depends on the object type seems unhelpful in
500 Search
::Xapian overloads conversion to an integer for some classes
- e.g.
501 C
<0+$positioniterator
> is equivalent to C
<$positioniterator-E
<gt
>get_termpos
>
502 while C
<0+$postingiterator
> is equivalent to C
<$postingiterator-E
<gt
>get_docid
>.
503 This module doesn't provide these overloads so you should instead explicitly
504 call the method you want. As above
, we think this context-sensitive behaviour
505 wasn't helpful in hindsight.
507 This module is fussier about whether a passed scalar value is a string or
508 an integer than Search
::Xapian
, so e.g. C
<Xapian
::Query-E
<gt
>new
(2001)> will fail
509 but the equivalent worked with Search
::Xapian. If C
<$term
> might not be a
510 string use C
<Xapian
::Query-E
<gt
>new
("$term")> to ensure it is converted to a
511 string. The new behaviour isn't very Perlish
, but is likely to be hard to
512 address universally as it comes from SWIG. Let us know if you find particular
513 places where it's annoying and we can look at addressing those.
515 Both this module and Search
::Xapian support passing a Perl sub
(which can be
516 anonymous
) for the functor classes C
<MatchDecider
> and C
<ExpandDecider
>. In
517 some cases Search
::Xapian accepts a string naming a Perl sub
, but this module
518 never accepts this. Instead of passing C
<"::mymatchdecider">, pass
519 C
<\
&mymatchdecider> which will work with either module. If you really want to
520 dynamically specify the function name
, you can pass C
<sub
{eval
521 "&$dynamicmatchdecider"}>.
523 Search
::Xapian provides a PerlStopper class which is supposed to be
524 subclassable in Perl to implement your own stopper
, but this mechanism doesn't
525 actually seem to work. This module instead supports user-implemented stoppers
526 by accepting a Perl sub in place of a Stopper object.
528 =head3 Importing Either Module
530 If you want your code to use either this module or Search
::Xapian depending
531 what's installed
, then instead of C
<use Search
::Xapian
('
:all'
);> you can use
:
536 Xapian-
>import
('
:all'
);
537 Xapian
::search_xapian_compat
();
540 require Search
::Xapian
;
541 Search
::Xapian-
>import
('
:all'
);
545 If you just C
<use Search
::Xapian
;> then the C
<import
()> calls aren't needed.
547 The C
<Xapian
::search_xapian_compat
()> call sets up aliases in the
548 C
<Search
::Xapian
> namespace so you can write code which refers to
549 C
<Search
::Xapian
> but can actually use this module instead.
561 Open a database
, fail if database doesn't exist.
565 Create a new database
, fail if database exists.
567 =item DB_CREATE_OR_OPEN
569 Open an existing database
, without destroying data
, or create a new
570 database if one doesn't already exist.
572 =item DB_CREATE_OR_OVERWRITE
574 Overwrite database if it exists.
584 Match if both subqueries are satisfied.
588 Match if either subquery is satisfied.
592 Match if left but not right subquery is satisfied.
596 Match if left or right
, but not both queries are satisfied.
600 Match if left is satisfied
, but use weights from both.
604 Like OP_AND
, but only weight using the left query.
608 Match if the words are near each other. The window should be specified
, as
609 a parameter to C
<Xapian
::Query-E
<gt
>new
()>, but it defaults to the
610 number of terms in the list.
614 Match as a phrase
(All words in order
).
618 Select an elite set from the subqueries
, and perform a query with these combined as an
OR query.
622 Filter by a range test on a document value.
632 This gives the QueryParser default flag settings
, allowing you to easily add
633 flags to the default ones.
637 Support
AND, OR, etc and bracketted subexpressions.
645 Support quoted phrases.
647 =item FLAG_BOOLEAN_ANY_CASE
649 Support
AND, OR, etc even if they aren't in ALLCAPS.
653 Support right truncation
(e.g. Xap
*).
657 Allow queries such as '
NOT apples'.
659 These require the use of a list of all documents in the database
660 which is potentially expensive
, so this feature isn't enabled by
665 Enable partial matching.
667 Partial matching causes the parser to treat the query as a
668 "partially entered" search. This will automatically treat the
669 final word as a wildcarded match
, unless it is followed by
670 whitespace
, to produce more stable results from interactive
673 =item FLAG_SPELLING_CORRECTION
677 =item FLAG_ACCUMULATE
679 =item FLAG_AUTO_SYNONYMS
681 =item FLAG_AUTO_MULTIWORD_SYNONYMS
685 =item FLAG_NO_POSITIONS
699 Stem all terms and add a
"Z" prefix.
703 Don't stem any terms.
707 Stem some terms
, in a manner compatible with Omega
(capitalised words and those
708 in phrases aren't stemmed
).
710 =item STEM_SOME_FULL_POS
712 Like STEM_SOME but also store term positions for stemmed terms.
722 docids sort in ascending order
(default
)
726 docids sort in descending order
730 docids sort in whatever order is most efficient for the backend
736 Standard is db
+ ops
+ qpflags
+ qpstem
738 =head1 Version functions
744 Returns the major version of the Xapian C
++ library being used. E.g. for
745 Xapian
1.4.15 this would return
1.
749 Returns the minor version of the Xapian C
++ library being used. E.g. for
750 Xapian
1.4.15 this would return
4.
754 Returns the revision of the Xapian C
++ library being used. E.g. for
755 Xapian
1.4.15 this would return
15. In a stable release series
, Xapian
756 libraries with the same minor and major versions are usually ABI compatible
, so
757 this often won't match the third component of C
<$Xapian
::VERSION
> (which is the
758 version of the Xapian wrappers
).
762 =head1 Numeric encoding functions
766 =item sortable_serialise NUMBER
768 Convert a floating point number to a string
, preserving sort order.
770 This method converts a floating point number to a string
, suitable for
771 using as a value for numeric range restriction
, or for use as a sort
774 The conversion is platform independent.
776 The conversion attempts to ensure that
, for any pair of values supplied
777 to the conversion algorithm
, the result of comparing the original
778 values
(with a numeric comparison operator
) will be the same as the
779 result of comparing the resulting values
(with a string comparison
780 operator
). On platforms which represent doubles with the precisions
781 specified by IEEE_754
, this will be the case
: if the representation of
782 doubles is more precise
, it is possible that two very close doubles
783 will be mapped to the same string
, so will compare equal.
785 Note also that both zero and
-zero will be converted to the same
786 representation
: since these compare equal
, this satisfies the
787 comparison constraint
, but it's worth knowing this if you wish to use
788 the encoding in some situation where this distinction matters.
790 Handling of NaN isn't
(currently
) guaranteed to be sensible.
792 =item sortable_unserialise SERIALISED_NUMBER
794 Convert a string encoded using sortable_serialise back to a floating
797 This expects the input to be a string produced by C
<sortable_serialise
()>.
798 If the input is not such a string
, the value returned is undefined
(but
799 no error will be thrown
).
801 The result of the conversion will be exactly the value which was
802 supplied to C
<sortable_serialise
()> when making the string on platforms
803 which represent doubles with the precisions specified by IEEE_754
, but
804 may be a different
(nearby
) value on other platforms.
814 Add POD documentation for all classes
, where possible just adapted from Xapian
817 =item Unwrapped classes
819 The following Xapian classes are not yet wrapped
:
820 ErrorHandler
, user-defined Weight subclasses.
826 These SWIG-generated Perl bindings were originally implemented by Kosei
827 Moriyama in GSoC
2009, and made their debut in the
1.2.4 release.
829 They take a lot of inspiration and some code from Search
::Xapian
, a set
830 of hand-written XS bindings
, originally written by Alex Bowley
, and later
831 maintained by Olly Betts.
833 Search
::Xapian owed thanks to Tye McQueen E
<lt
>tye@metronet.comE
<gt
> for
834 explaining the finer points of how best to write XS frontends to C
++ libraries
,
835 and James Aylett E
<lt
>james@tartarus.orgE
<gt
> for clarifying the less obvious
836 aspects of the Xapian API. Patches for wrapping missing classes and other
837 things were contributed by Olly Betts
, Tim Brody
, Marcus Ramberg
, Peter Karman
,
838 Benjamin Smith
, Rusty Conover
, Frank Lichtenheld
, Henry Combrinck
, Jess
839 Robinson
, David F. Skoll
, Dave O'Neill
, Andreas Marienborg
, Adam Sjøgren
,
840 Dmitry Karasik
, and Val Rosca.
844 Please report any bugs
/suggestions to E
<lt
>xapian-discuss@lists.xapian.orgE
<gt
>
845 or use the Xapian bug tracker L
<https
://xapian.org
/bugs
>. Please do
846 NOT use the CPAN bug tracker or mail contributors individually.
850 This program is free software
; you can redistribute it and
/or modify
851 it under the same terms as Perl itself.
855 L
<Xapian
::BM25Weight
>,
856 L
<Xapian
::BoolWeight
>,
860 L
<Xapian
::MultiValueSorter
>,
861 L
<Xapian
::PositionIterator
>,
862 L
<Xapian
::PostingIterator
>,
864 L
<Xapian
::QueryParser
>,
866 L
<Xapian
::TermGenerator
>,
867 L
<Xapian
::TermIterator
>,
868 L
<Xapian
::TradWeight
>,
869 L
<Xapian
::ValueIterator
>,
871 L
<Xapian
::WritableDatabase
>,
873 L
<https
://xapian.org
/>.