2 * @brief parsing a user query string to build a Xapian::Query object
4 /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2021 Olly Betts
5 * Copyright (C) 2010 Adam Sjøgren
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
24 #define XAPIAN_INCLUDED_QUERYPARSER_H
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/queryparser.h> directly; include <xapian.h> instead.
30 #include <xapian/attributes.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/query.h>
34 #include <xapian/termiterator.h>
35 #include <xapian/visibility.h>
45 /** Abstract base class for stop-word decision functor.
47 * If you just want to use an existing stopword list, see
48 * Xapian::SimpleStopper.
50 class XAPIAN_VISIBILITY_DEFAULT Stopper
51 : public Xapian::Internal::opt_intrusive_base
{
52 /// Don't allow assignment.
53 void operator=(const Stopper
&);
55 /// Don't allow copying.
56 Stopper(const Stopper
&);
59 /// Default constructor.
62 /** Is term a stop-word?
64 * @param term The term to test.
66 virtual bool operator()(const std::string
& term
) const = 0;
68 /// Class has virtual methods, so provide a virtual destructor.
69 virtual ~Stopper() { }
71 /// Return a string describing this object.
72 virtual std::string
get_description() const;
74 /** Start reference counting this object.
76 * You can hand ownership of a dynamically allocated Stopper
77 * object to Xapian by calling release() and then passing the object to a
78 * Xapian method. Xapian will arrange to delete the object once it is no
82 opt_intrusive_base::release();
86 /** Start reference counting this object.
88 * You can hand ownership of a dynamically allocated Stopper
89 * object to Xapian by calling release() and then passing the object to a
90 * Xapian method. Xapian will arrange to delete the object once it is no
93 const Stopper
* release() const {
94 opt_intrusive_base::release();
99 /// Simple implementation of Stopper class - this will suit most users.
100 class XAPIAN_VISIBILITY_DEFAULT SimpleStopper
: public Stopper
{
101 std::set
<std::string
> stop_words
;
104 /// Default constructor.
107 /** Initialise from a pair of iterators.
109 * Xapian includes stopword list files for many languages. You can
110 * initialise from a file like so:
112 * std::ifstream words("stopwords/english/stop.txt");
113 * Xapian::SimplerStopper stopper(std::istream_iterator<std::string>(words), std::istream_iterator<std::string>());
116 * In bindings for other languages it isn't possible to pass a C++
117 * iterator pair, so instead this constructor is wrapped to allow
118 * passing a filename.
120 template<class Iterator
>
121 SimpleStopper(Iterator begin
, Iterator end
) : stop_words(begin
, end
) { }
123 /// Add a single stop word.
124 void add(const std::string
& word
) { stop_words
.insert(word
); }
126 virtual bool operator()(const std::string
& term
) const {
127 return stop_words
.find(term
) != stop_words
.end();
130 virtual std::string
get_description() const;
136 RP_DATE_PREFER_MDY
= 4
139 /// Base class for range processors.
140 class XAPIAN_VISIBILITY_DEFAULT RangeProcessor
141 : public Xapian::Internal::opt_intrusive_base
{
142 /// Don't allow assignment.
143 void operator=(const RangeProcessor
&);
145 /// Don't allow copying.
146 RangeProcessor(const RangeProcessor
&);
149 /** The value slot to process.
151 * If this range processor isn't value-based, it can ignore this member.
153 Xapian::valueno slot
;
155 /** The prefix (or suffix with RP_SUFFIX) string to look for. */
160 * Bitwise-or (| in C++) of zero or more of the following:
161 * * Xapian::RP_SUFFIX - require @a str as a suffix
162 * instead of a prefix.
163 * * Xapian::RP_REPEATED - optionally allow @a str
164 * on both ends of the range - e.g. $1..$10 or
165 * 5m..50m. By default a prefix is only checked for on
166 * the start (e.g. date:1/1/1980..31/12/1989), and a
167 * suffix only on the end (e.g. 2..12kg).
172 /** Default constructor. */
173 RangeProcessor() : slot(Xapian::BAD_VALUENO
), flags(0) { }
177 * @param slot_ Which value slot to generate ranges over.
178 * @param str_ A string to look for to recognise values as belonging
179 * to this range (as a prefix by default, or as a suffix
180 * if flags Xapian::RP_SUFFIX is specified).
181 * @param flags_ Zero or more of the following flags, combined with
182 * bitwise-or (| in C++):
183 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
184 * instead of a prefix.
185 * * Xapian::RP_REPEATED - optionally allow @a str_
186 * on both ends of the range - e.g. $1..$10 or
187 * 5m..50m. By default a prefix is only checked for on
188 * the start (e.g. date:1/1/1980..31/12/1989), and a
189 * suffix only on the end (e.g. 2..12kg).
191 explicit RangeProcessor(Xapian::valueno slot_
,
192 const std::string
& str_
= std::string(),
194 : slot(slot_
), str(str_
), flags(flags_
) { }
197 virtual ~RangeProcessor();
199 /** Check prefix/suffix on range.
201 * If they match, remove the prefix/suffix and then call operator()()
202 * to try to handle the range.
204 Xapian::Query
check_range(const std::string
& b
, const std::string
& e
);
206 /** Check for a valid range of this type.
208 * Override this method to implement your own range handling.
210 * @param begin The start of the range as specified in the query string
212 * @param end The end of the range as specified in the query string
213 * by the user (empty string for no upper limit).
215 * @return An OP_VALUE_RANGE Query object (or if end.empty(), an
216 * OP_VALUE_GE Query object). Or if the range isn't one
217 * which this object can handle then
218 * Xapian::Query(Xapian::Query::OP_INVALID) will be
221 virtual Xapian::Query
222 operator()(const std::string
&begin
, const std::string
&end
);
224 /** Start reference counting this object.
226 * You can hand ownership of a dynamically allocated RangeProcessor
227 * object to Xapian by calling release() and then passing the object to a
228 * Xapian method. Xapian will arrange to delete the object once it is no
231 RangeProcessor
* release() {
232 opt_intrusive_base::release();
236 /** Start reference counting this object.
238 * You can hand ownership of a dynamically allocated RangeProcessor
239 * object to Xapian by calling release() and then passing the object to a
240 * Xapian method. Xapian will arrange to delete the object once it is no
243 const RangeProcessor
* release() const {
244 opt_intrusive_base::release();
249 /** Handle a date range.
251 * Begin and end must be dates in a recognised format.
253 class XAPIAN_VISIBILITY_DEFAULT DateRangeProcessor
: public RangeProcessor
{
259 * @param slot_ The value number to return from operator().
261 * @param flags_ Zero or more of the following flags, combined with
263 * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous
264 * dates as month/day/year rather than day/month/year.
266 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
267 * years (default: 1970, so 1/1/69 is 2069 while
270 explicit DateRangeProcessor(Xapian::valueno slot_
,
272 int epoch_year_
= 1970)
273 : RangeProcessor(slot_
, std::string(), flags_
),
274 epoch_year(epoch_year_
) { }
278 * @param slot_ The value slot number to query.
280 * @param str_ A string to look for to recognise values as belonging
281 * to this date range.
283 * @param flags_ Zero or more of the following flags, combined with
285 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
286 * instead of a prefix.
287 * * Xapian::RP_REPEATED - optionally allow @a str_
288 * on both ends of the range - e.g. $1..$10 or
289 * 5m..50m. By default a prefix is only checked for on
290 * the start (e.g. date:1/1/1980..31/12/1989), and a
291 * suffix only on the end (e.g. 2..12kg).
292 * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous
293 * dates as month/day/year rather than day/month/year.
295 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
296 * years (default: 1970, so 1/1/69 is 2069 while
299 * The string supplied in str_ is used by @a operator() to decide whether
300 * the pair of strings supplied to it constitute a valid range. If
301 * prefix_ is true, the first value in a range must begin with str_ (and
302 * the second value may optionally begin with str_);
303 * if prefix_ is false, the second value in a range must end with str_
304 * (and the first value may optionally end with str_).
306 * If str_ is empty, the Xapian::RP_SUFFIX and Xapian::RP_REPEATED are
307 * irrelevant, and no special strings are required at the start or end of
308 * the strings defining the range.
310 * The remainder of both strings defining the endpoints must be valid
313 * For example, if str_ is "created:", Xapian::RP_SUFFIX is not specified,
314 * and the range processor has been added to the queryparser, the
315 * queryparser will accept "created:1/1/2000..31/12/2001".
317 DateRangeProcessor(Xapian::valueno slot_
, const std::string
&str_
,
318 unsigned flags_
= 0, int epoch_year_
= 1970)
319 : RangeProcessor(slot_
, str_
, flags_
),
320 epoch_year(epoch_year_
) { }
322 /** Check for a valid date range.
324 * If any specified prefix is present, and the range looks like a
325 * date range, the dates are converted to the format YYYYMMDD and
326 * combined into a value range query.
328 * @param begin The start of the range as specified in the query string
330 * @param end The end of the range as specified in the query string
333 Xapian::Query
operator()(const std::string
& begin
, const std::string
& end
);
336 /** Handle a number range.
338 * This class must be used on values which have been encoded using
339 * Xapian::sortable_serialise() which turns numbers into strings which
340 * will sort in the same order as the numbers (the same values can be
341 * used to implement a numeric sort).
343 class XAPIAN_VISIBILITY_DEFAULT NumberRangeProcessor
: public RangeProcessor
{
347 * @param slot_ The value slot number to query.
349 * @param str_ A string to look for to recognise values as belonging
350 * to this numeric range.
352 * @param flags_ Zero or more of the following flags, combined with
354 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
355 * instead of a prefix.
356 * * Xapian::RP_REPEATED - optionally allow @a str_
357 * on both ends of the range - e.g. $1..$10 or
358 * 5m..50m. By default a prefix is only checked for on
359 * the start (e.g. date:1/1/1980..31/12/1989), and a
360 * suffix only on the end (e.g. 2..12kg).
362 * The string supplied in str_ is used by @a operator() to decide whether
363 * the pair of strings supplied to it constitute a valid range. If
364 * prefix_ is true, the first value in a range must begin with str_ (and
365 * the second value may optionally begin with str_);
366 * if prefix_ is false, the second value in a range must end with str_
367 * (and the first value may optionally end with str_).
369 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
370 * strings are required at the start or end of the strings defining the
373 * The remainder of both strings defining the endpoints must be valid
374 * floating point numbers. (FIXME: define format recognised).
376 * For example, if str_ is "$" and prefix_ is true, and the range
377 * processor has been added to the queryparser, the queryparser will
378 * accept "$10..50" or "$10..$50", but not "10..50" or "10..$50" as valid
379 * ranges. If str_ is "kg" and prefix_ is false, the queryparser will
380 * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as
383 NumberRangeProcessor(Xapian::valueno slot_
,
384 const std::string
&str_
= std::string(),
386 : RangeProcessor(slot_
, str_
, flags_
) { }
388 /** Check for a valid numeric range.
390 * If BEGIN..END is a valid numeric range with the specified prefix/suffix
391 * (if one was specified), the prefix/suffix is removed, the string
392 * converted to a number, and encoded with Xapian::sortable_serialise(),
393 * and a value range query is built.
395 * @param begin The start of the range as specified in the query string
397 * @param end The end of the range as specified in the query string
400 Xapian::Query
operator()(const std::string
& begin
, const std::string
& end
);
403 /// Base class for value range processors.
404 class XAPIAN_VISIBILITY_DEFAULT ValueRangeProcessor
405 : public Xapian::Internal::opt_intrusive_base
{
406 /// Don't allow assignment.
407 void operator=(const ValueRangeProcessor
&);
409 /// Don't allow copying.
410 ValueRangeProcessor(const ValueRangeProcessor
&);
413 /// Default constructor.
414 ValueRangeProcessor() { }
417 virtual ~ValueRangeProcessor();
419 /** Check for a valid range of this type.
421 * @param[in,out] begin The start of the range as specified in the query
422 * string by the user. This parameter is a
423 * non-const reference so the ValueRangeProcessor
424 * can modify it to return the value to start the
426 * @param[in,out] end The end of the range. This is also a non-const
427 * reference so it can be modified.
429 * @return If this ValueRangeProcessor recognises the range BEGIN..END it
430 * returns the value slot number to range filter on. Otherwise it
431 * returns Xapian::BAD_VALUENO.
433 virtual Xapian::valueno
operator()(std::string
&begin
, std::string
&end
) = 0;
435 /** Start reference counting this object.
437 * You can hand ownership of a dynamically allocated ValueRangeProcessor
438 * object to Xapian by calling release() and then passing the object to a
439 * Xapian method. Xapian will arrange to delete the object once it is no
442 ValueRangeProcessor
* release() {
443 opt_intrusive_base::release();
447 /** Start reference counting this object.
449 * You can hand ownership of a dynamically allocated ValueRangeProcessor
450 * object to Xapian by calling release() and then passing the object to a
451 * Xapian method. Xapian will arrange to delete the object once it is no
454 const ValueRangeProcessor
* release() const {
455 opt_intrusive_base::release();
460 /** Handle a string range.
462 * The end points can be any strings.
464 * @deprecated Use Xapian::RangeProcessor instead (added in 1.3.6).
466 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT StringValueRangeProcessor
: public ValueRangeProcessor
{
468 /** The value slot to process. */
469 Xapian::valueno valno
;
471 /** Whether to look for @a str as a prefix or suffix. */
474 /** The prefix (or suffix if prefix==false) string to look for. */
480 * @param slot_ The value number to return from operator().
482 explicit StringValueRangeProcessor(Xapian::valueno slot_
)
483 : valno(slot_
), str() { }
487 * @param slot_ The value number to return from operator().
488 * @param str_ A string to look for to recognise values as belonging
490 * @param prefix_ Flag specifying whether to check for str_ as a prefix
493 StringValueRangeProcessor(Xapian::valueno slot_
, const std::string
&str_
,
495 : valno(slot_
), prefix(prefix_
), str(str_
) { }
497 /** Check for a valid string range.
499 * @param[in,out] begin The start of the range as specified in the
500 * query string by the user. This parameter is a
501 * non-const reference so the ValueRangeProcessor
502 * can modify it to return the value to start the
504 * @param[in,out] end The end of the range. This is also a non-const
505 * reference so it can be modified.
507 * @return A StringValueRangeProcessor always accepts a range it is
508 * offered, and returns the value of slot_ passed at construction
509 * time. It doesn't modify @a begin or @a end.
511 Xapian::valueno
operator()(std::string
&begin
, std::string
&end
);
514 /** Handle a date range.
516 * Begin and end must be dates in a recognised format.
518 * @deprecated Use Xapian::DateRangeProcessor instead (added in 1.3.6).
520 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT DateValueRangeProcessor
: public StringValueRangeProcessor
{
527 * @param slot_ The value number to return from operator().
528 * @param prefer_mdy_ Should ambiguous dates be interpreted as
529 * month/day/year rather than day/month/year?
531 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
532 * years (default: 1970, so 1/1/69 is 2069 while
535 DateValueRangeProcessor(Xapian::valueno slot_
, bool prefer_mdy_
= false,
536 int epoch_year_
= 1970)
537 : StringValueRangeProcessor(slot_
),
538 prefer_mdy(prefer_mdy_
), epoch_year(epoch_year_
) { }
542 * @param slot_ The value number to return from operator().
544 * @param str_ A string to look for to recognise values as belonging
545 * to this date range.
547 * @param prefix_ Whether to look for the string at the start or end of
548 * the values. If true, the string is a prefix; if
549 * false, the string is a suffix (default: true).
551 * @param prefer_mdy_ Should ambiguous dates be interpreted as
552 * month/day/year rather than day/month/year?
555 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
556 * years (default: 1970, so 1/1/69 is 2069 while
559 * The string supplied in str_ is used by @a operator() to decide whether
560 * the pair of strings supplied to it constitute a valid range. If
561 * prefix_ is true, the first value in a range must begin with str_ (and
562 * the second value may optionally begin with str_);
563 * if prefix_ is false, the second value in a range must end with str_
564 * (and the first value may optionally end with str_).
566 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
567 * strings are required at the start or end of the strings defining the
570 * The remainder of both strings defining the endpoints must be valid
573 * For example, if str_ is "created:" and prefix_ is true, and the range
574 * processor has been added to the queryparser, the queryparser will
575 * accept "created:1/1/2000..31/12/2001".
577 DateValueRangeProcessor(Xapian::valueno slot_
, const std::string
&str_
,
579 bool prefer_mdy_
= false, int epoch_year_
= 1970)
580 : StringValueRangeProcessor(slot_
, str_
, prefix_
),
581 prefer_mdy(prefer_mdy_
), epoch_year(epoch_year_
) { }
586 * This is like the previous version, but with const char * instead of
587 * std::string - we need this overload as otherwise
588 * DateValueRangeProcessor(1, "date:") quietly interprets the second
589 * argument as a boolean in preference to std::string. If you want to
590 * be compatible with 1.2.12 and earlier, then explicitly convert to
591 * std::string, i.e.: DateValueRangeProcessor(1, std::string("date:"))
593 * @param slot_ The value number to return from operator().
595 * @param str_ A string to look for to recognise values as belonging
596 * to this date range.
598 * @param prefix_ Whether to look for the string at the start or end of
599 * the values. If true, the string is a prefix; if
600 * false, the string is a suffix (default: true).
602 * @param prefer_mdy_ Should ambiguous dates be interpreted as
603 * month/day/year rather than day/month/year?
606 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
607 * years (default: 1970, so 1/1/69 is 2069 while
610 * The string supplied in str_ is used by @a operator() to decide whether
611 * the pair of strings supplied to it constitute a valid range. If
612 * prefix_ is true, the first value in a range must begin with str_ (and
613 * the second value may optionally begin with str_);
614 * if prefix_ is false, the second value in a range must end with str_
615 * (and the first value may optionally end with str_).
617 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
618 * strings are required at the start or end of the strings defining the
621 * The remainder of both strings defining the endpoints must be valid
624 * For example, if str_ is "created:" and prefix_ is true, and the range
625 * processor has been added to the queryparser, the queryparser will
626 * accept "created:1/1/2000..31/12/2001".
628 DateValueRangeProcessor(Xapian::valueno slot_
, const char * str_
,
630 bool prefer_mdy_
= false, int epoch_year_
= 1970)
631 : StringValueRangeProcessor(slot_
, str_
, prefix_
),
632 prefer_mdy(prefer_mdy_
), epoch_year(epoch_year_
) { }
635 /** Check for a valid date range.
637 * @param[in,out] begin The start of the range as specified in the
638 * query string by the user. This parameter is a
639 * non-const reference so the ValueRangeProcessor
640 * can modify it to return the value to start the
642 * @param[in,out] end The end of the range. This is also a non-const
643 * reference so it can be modified.
645 * @return If BEGIN..END is a sensible date range, this method modifies
646 * them into the format YYYYMMDD and returns the value of slot_
647 * passed at construction time. Otherwise it returns
648 * Xapian::BAD_VALUENO.
650 Xapian::valueno
operator()(std::string
&begin
, std::string
&end
);
653 /** Handle a number range.
655 * This class must be used on values which have been encoded using
656 * Xapian::sortable_serialise() which turns numbers into strings which
657 * will sort in the same order as the numbers (the same values can be
658 * used to implement a numeric sort).
660 * @deprecated Use Xapian::NumberRangeProcessor instead (added in 1.3.6).
662 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT NumberValueRangeProcessor
: public StringValueRangeProcessor
{
666 * @param slot_ The value number to return from operator().
668 explicit NumberValueRangeProcessor(Xapian::valueno slot_
)
669 : StringValueRangeProcessor(slot_
) { }
673 * @param slot_ The value number to return from operator().
675 * @param str_ A string to look for to recognise values as belonging
676 * to this numeric range.
678 * @param prefix_ Whether to look for the string at the start or end of
679 * the values. If true, the string is a prefix; if
680 * false, the string is a suffix (default: true).
682 * The string supplied in str_ is used by @a operator() to decide whether
683 * the pair of strings supplied to it constitute a valid range. If
684 * prefix_ is true, the first value in a range must begin with str_ (and
685 * the second value may optionally begin with str_);
686 * if prefix_ is false, the second value in a range must end with str_
687 * (and the first value may optionally end with str_).
689 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
690 * strings are required at the start or end of the strings defining the
693 * The remainder of both strings defining the endpoints must be valid
694 * floating point numbers. (FIXME: define format recognised).
696 * For example, if str_ is "$" and prefix_ is true, and the range
697 * processor has been added to the queryparser, the queryparser will
698 * accept "$10..50" or "$10..$50", but not "10..50" or "10..$50" as valid
699 * ranges. If str_ is "kg" and prefix_ is false, the queryparser will
700 * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as
703 NumberValueRangeProcessor(Xapian::valueno slot_
, const std::string
&str_
,
705 : StringValueRangeProcessor(slot_
, str_
, prefix_
) { }
707 /** Check for a valid numeric range.
709 * @param[in,out] begin The start of the range as specified in the
710 * query string by the user. This parameter is a
711 * non-const reference so the ValueRangeProcessor
712 * can modify it to return the value to start the
714 * @param[in,out] end The end of the range. This is also a non-const
715 * reference so it can be modified.
717 * @return If BEGIN..END is a valid numeric range with the specified
718 * prefix/suffix (if one was specified), this method modifies
719 * them by removing the prefix/suffix, converting to a number,
720 * and encoding with Xapian::sortable_serialise(), and returns the
721 * value of slot_ passed at construction time. Otherwise it
722 * returns Xapian::BAD_VALUENO.
724 Xapian::valueno
operator()(std::string
&begin
, std::string
&end
);
727 /** Base class for field processors.
729 class XAPIAN_VISIBILITY_DEFAULT FieldProcessor
730 : public Xapian::Internal::opt_intrusive_base
{
731 /// Don't allow assignment.
732 void operator=(const FieldProcessor
&);
734 /// Don't allow copying.
735 FieldProcessor(const FieldProcessor
&);
738 /// Default constructor.
742 virtual ~FieldProcessor();
744 /** Convert a field-prefixed string to a Query object.
746 * @param str The string to convert.
748 * @return Query object corresponding to @a str.
750 virtual Xapian::Query
operator()(const std::string
&str
) = 0;
752 /** Start reference counting this object.
754 * You can hand ownership of a dynamically allocated FieldProcessor
755 * object to Xapian by calling release() and then passing the object to a
756 * Xapian method. Xapian will arrange to delete the object once it is no
759 FieldProcessor
* release() {
760 opt_intrusive_base::release();
764 /** Start reference counting this object.
766 * You can hand ownership of a dynamically allocated FieldProcessor
767 * object to Xapian by calling release() and then passing the object to a
768 * Xapian method. Xapian will arrange to delete the object once it is no
771 const FieldProcessor
* release() const {
772 opt_intrusive_base::release();
777 /// Build a Xapian::Query object from a user query string.
778 class XAPIAN_VISIBILITY_DEFAULT QueryParser
{
780 /// Class representing the queryparser internals.
782 /// @private @internal Reference counted internals.
783 Xapian::Internal::intrusive_ptr
<Internal
> internal
;
785 /// Enum of feature flags.
787 /// Support AND, OR, etc and bracketed subexpressions.
789 /// Support quoted phrases.
793 /// Support AND, OR, etc even if they aren't in ALLCAPS.
794 FLAG_BOOLEAN_ANY_CASE
= 8,
795 /** Support wildcards.
797 * At present only right truncation (e.g. Xap*) is supported.
799 * Currently you can't use wildcards with boolean filter prefixes,
800 * or in a phrase (either an explicitly quoted one, or one implicitly
801 * generated by hyphens or other punctuation).
803 * In Xapian 1.2.x, you needed to tell the QueryParser object which
804 * database to expand wildcards from by calling set_database(). In
805 * Xapian 1.3.3, OP_WILDCARD was added and wildcards are now
806 * expanded when Enquire::get_mset() is called, with the expansion
807 * using the database being searched.
810 /** Allow queries such as 'NOT apples'.
812 * These require the use of a list of all documents in the database
813 * which is potentially expensive, so this feature isn't enabled by
817 /** Enable partial matching.
819 * Partial matching causes the parser to treat the query as a
820 * "partially entered" search. This will automatically treat the
821 * final word as a wildcarded match, unless it is followed by
822 * whitespace, to produce more stable results from interactive
825 * Currently FLAG_PARTIAL doesn't do anything if the final word
826 * in the query has a boolean filter prefix, or if it is in a phrase
827 * (either an explicitly quoted one, or one implicitly generated by
828 * hyphens or other punctuation). It also doesn't do anything if
829 * if the final word is part of a value range.
831 * In Xapian 1.2.x, you needed to tell the QueryParser object which
832 * database to expand wildcards from by calling set_database(). In
833 * Xapian 1.3.3, OP_WILDCARD was added and wildcards are now
834 * expanded when Enquire::get_mset() is called, with the expansion
835 * using the database being searched.
839 /** Enable spelling correction.
841 * For each word in the query which doesn't exist as a term in the
842 * database, Database::get_spelling_suggestion() will be called and if
843 * a suggestion is returned, a corrected version of the query string
844 * will be built up which can be read using
845 * QueryParser::get_corrected_query_string(). The query returned is
846 * based on the uncorrected query string however - if you want a
847 * parsed query based on the corrected query string, you must call
848 * QueryParser::parse_query() again.
850 * NB: You must also call set_database() for this to work.
852 FLAG_SPELLING_CORRECTION
= 128,
854 /** Enable synonym operator '~'.
856 * NB: You must also call set_database() for this to work.
860 /** Enable automatic use of synonyms for single terms.
862 * NB: You must also call set_database() for this to work.
864 FLAG_AUTO_SYNONYMS
= 512,
866 /** Enable automatic use of synonyms for single terms and groups of
869 * NB: You must also call set_database() for this to work.
871 FLAG_AUTO_MULTIWORD_SYNONYMS
= 1024,
873 /** Enable generation of n-grams from CJK text.
875 * With this enabled, spans of CJK characters are split into unigrams
876 * and bigrams, with the unigrams carrying positional information.
877 * Non-CJK characters are split into words as normal.
879 * The corresponding option needs to have been used at index time.
881 * Flag added in Xapian 1.3.4 and 1.2.22. This mode can be
882 * enabled in 1.2.8 and later by setting environment variable
883 * XAPIAN_CJK_NGRAM to a non-empty value (but doing so was deprecated
886 FLAG_CJK_NGRAM
= 2048,
888 /** Accumulate unstem and stoplist results.
890 * By default, the unstem and stoplist data is reset by a call to
891 * parse_query(), which makes sense if you use the same QueryParser
892 * object to parse a series of independent queries.
894 * If you're using the same QueryParser object to parse several
895 * fields on the same query form, you may want to have the unstem
896 * and stoplist data combined for all of them, in which case you
897 * can use this flag to prevent this data from being reset.
899 * @since Added in Xapian 1.4.18.
901 FLAG_ACCUMULATE
= 65536,
903 /** Produce a query which doesn't use positional information.
905 * With this flag enabled, no positional information will be used
906 * and any query operations which would use it are replaced by
907 * the nearest equivalent which doesn't (so phrase searches, NEAR
908 * and ADJ will result in OP_AND).
910 * @since Added in Xapian 1.4.19.
912 FLAG_NO_POSITIONS
= 0x20000,
914 /** The default flags.
916 * Used if you don't explicitly pass any to @a parse_query().
917 * The default flags are FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE.
919 * Added in Xapian 1.0.11.
921 FLAG_DEFAULT
= FLAG_PHRASE
|FLAG_BOOLEAN
|FLAG_LOVEHATE
924 /// Stemming strategies, for use with set_stemming_strategy().
926 STEM_NONE
, STEM_SOME
, STEM_ALL
, STEM_ALL_Z
, STEM_SOME_FULL_POS
929 /// Copy constructor.
930 QueryParser(const QueryParser
& o
);
933 QueryParser
& operator=(const QueryParser
& o
);
935 #ifdef XAPIAN_MOVE_SEMANTICS
936 /// Move constructor.
937 QueryParser(QueryParser
&& o
);
939 /// Move assignment operator.
940 QueryParser
& operator=(QueryParser
&& o
);
943 /// Default constructor.
951 * This sets the stemming algorithm which will be used by the query
952 * parser. The stemming algorithm will be used according to the stemming
953 * strategy set by set_stemming_strategy(). As of 1.3.1, this defaults
954 * to STEM_SOME, but in earlier versions the default was STEM_NONE. If
955 * you want to work with older versions, you should explicitly set
956 * a stemming strategy as well as setting a stemmer, otherwise your
957 * stemmer won't actually be used.
959 * @param stemmer The Xapian::Stem object to set.
961 void set_stemmer(const Xapian::Stem
& stemmer
);
963 /** Set the stemming strategy.
965 * This controls how the query parser will apply the stemming algorithm.
966 * Note that the stemming algorithm is only applied to words in free-text
967 * fields - boolean filter terms are never stemmed.
969 * @param strategy The strategy to use - possible values are:
970 * - STEM_NONE: Don't perform any stemming. (default in Xapian <=
972 * - STEM_SOME: Stem all terms except for those which start with a
973 * capital letter, or are followed by certain characters
974 * (currently: <code>(/\@<>=*[{"</code> ), or are used
975 * with operators which need positional information.
976 * Stemmed terms are prefixed with 'Z'. (default in
978 * - STEM_SOME_FULL_POS:
979 * Like STEM_SOME but also stems terms used with operators
980 * which need positional information. Added in Xapian
982 * - STEM_ALL: Stem all terms (note: no 'Z' prefix is added).
983 * - STEM_ALL_Z: Stem all terms (note: 'Z' prefix is added). (new in
984 * Xapian 1.2.11 and 1.3.1)
986 void set_stemming_strategy(stem_strategy strategy
);
990 * @param stop The Stopper object to set (default NULL, which means no
993 void set_stopper(const Stopper
*stop
= NULL
);
995 /** Set the default operator.
997 * @param default_op The operator to use to combine non-filter
998 * query items when no explicit operator is used.
1000 * So for example, 'weather forecast' is parsed as
1001 * if it were 'weather OR forecast' by default.
1003 * The most useful values for this are OP_OR (the
1004 * default) and OP_AND. OP_NEAR, OP_PHRASE,
1005 * OP_ELITE_SET, OP_SYNONYM and OP_MAX are also
1006 * permitted. Passing other values will result in
1007 * InvalidArgumentError being thrown.
1009 void set_default_op(Query::op default_op
);
1011 /** Get the current default operator. */
1012 Query::op
get_default_op() const;
1014 /** Specify the database being searched.
1016 * @param db The database to use for spelling correction
1017 * (FLAG_SPELLING_CORRECTION), and synonyms (FLAG_SYNONYM,
1018 * FLAG_AUTO_SYNONYMS, and FLAG_AUTO_MULTIWORD_SYNONYMS).
1020 void set_database(const Database
&db
);
1022 /** Specify the maximum expansion of a wildcard and/or partial term.
1024 * Note: you must also set FLAG_WILDCARD and/or FLAG_PARTIAL in the flags
1025 * parameter to @a parse_query() for this setting to have anything to
1028 * If you don't call this method, the default settings are no limit on
1029 * wildcard expansion, and partial terms expanding to the most frequent
1030 * 100 terms - i.e. as if you'd called:
1032 * set_max_expansion(0);
1033 * set_max_expansion(100, Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT, Xapian::QueryParser::FLAG_PARTIAL);
1035 * @param max_expansion The maximum number of terms each wildcard in the
1036 * query can expand to, or 0 for no limit (which is the
1038 * @param max_type @a Xapian::Query::WILDCARD_LIMIT_ERROR,
1039 * @a Xapian::Query::WILDCARD_LIMIT_FIRST or
1040 * @a Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
1041 * (default: Xapian::Query::WILDCARD_LIMIT_ERROR).
1042 * @param flags What to set the limit for (default:
1043 * FLAG_WILDCARD|FLAG_PARTIAL, setting the limit for both
1044 * wildcards and partial terms).
1048 void set_max_expansion(Xapian::termcount max_expansion
,
1049 int max_type
= Xapian::Query::WILDCARD_LIMIT_ERROR
,
1050 unsigned flags
= FLAG_WILDCARD
|FLAG_PARTIAL
);
1052 /** Specify the maximum expansion of a wildcard.
1054 * If any wildcard expands to more than @a max_expansion terms, an
1055 * exception will be thrown.
1057 * This method is provided for API compatibility with Xapian 1.2.x and is
1058 * deprecated - replace it with:
1060 * set_max_wildcard_expansion(max_expansion,
1061 * Xapian::Query::WILDCARD_LIMIT_ERROR,
1062 * Xapian::QueryParser::FLAG_WILDCARD);
1064 XAPIAN_DEPRECATED(void set_max_wildcard_expansion(Xapian::termcount
));
1068 * @param query_string A free-text query as entered by a user
1069 * @param flags Zero or more QueryParser::feature_flag specifying
1070 * what features the QueryParser should support. Combine
1071 * multiple values with bitwise-or (|) (default FLAG_DEFAULT).
1072 * @param default_prefix The default term prefix to use (default none).
1073 * For example, you can pass "A" when parsing an "Author" field.
1075 * @exception If the query string can't be parsed, then
1076 * Xapian::QueryParserError is thrown. You can get an English
1077 * error message to report to the user by catching it and
1078 * calling get_msg() on the caught exception. The current
1079 * possible values (in case you want to translate them) are:
1081 * @li Unknown range operation
1083 * @li Syntax: <expression> AND <expression>
1084 * @li Syntax: <expression> AND NOT <expression>
1085 * @li Syntax: <expression> NOT <expression>
1086 * @li Syntax: <expression> OR <expression>
1087 * @li Syntax: <expression> XOR <expression>
1089 Query
parse_query(const std::string
&query_string
,
1090 unsigned flags
= FLAG_DEFAULT
,
1091 const std::string
&default_prefix
= std::string());
1093 /** Add a free-text field term prefix.
1098 * qp.add_prefix("author", "A");
1101 * This allows the user to search for author:Orwell which will be
1102 * converted to a search for the term "Aorwell".
1104 * Multiple fields can be mapped to the same prefix. For example, you
1105 * can make title: and subject: aliases for each other.
1107 * As of 1.0.4, you can call this method multiple times with the same
1108 * value of field to allow a single field to be mapped to multiple
1109 * prefixes. Multiple terms being generated for such a field, and
1110 * combined with @c Xapian::Query::OP_OR.
1112 * If any prefixes are specified for the empty field name (i.e. you
1113 * call this method with an empty string as the first parameter)
1114 * these prefixes will be used for terms without a field specifier.
1115 * If you do this and also specify the @c default_prefix parameter to @c
1116 * parse_query(), then the @c default_prefix parameter will override.
1118 * If the prefix parameter is empty, then "field:word" will produce the
1119 * term "word" (and this can be one of several prefixes for a particular
1120 * field, or for terms without a field specifier).
1122 * If you call @c add_prefix() and @c add_boolean_prefix() for the
1123 * same value of @a field, a @c Xapian::InvalidOperationError exception
1126 * In 1.0.3 and earlier, subsequent calls to this method with the same
1127 * value of @a field had no effect.
1129 * @param field The user visible field name
1130 * @param prefix The term prefix to map this to
1132 void add_prefix(const std::string
& field
, const std::string
& prefix
);
1134 /** Register a FieldProcessor.
1136 void add_prefix(const std::string
& field
, Xapian::FieldProcessor
* proc
);
1138 /** Add a boolean term prefix allowing the user to restrict a
1139 * search with a boolean filter specified in the free text query.
1144 * qp.add_boolean_prefix("site", "H");
1147 * This allows the user to restrict a search with site:xapian.org which
1148 * will be converted to Hxapian.org combined with any weighted
1149 * query with @c Xapian::Query::OP_FILTER.
1151 * If multiple boolean filters are specified in a query for the same
1152 * prefix, they will be combined with the @c Xapian::Query::OP_OR
1153 * operator. Then, if there are boolean filters for different prefixes,
1154 * they will be combined with the @c Xapian::Query::OP_AND operator.
1156 * Multiple fields can be mapped to the same prefix (so for example
1157 * you can make site: and domain: aliases for each other). Instances of
1158 * fields with different aliases but the same prefix will still be
1159 * combined with the OR operator.
1161 * For example, if "site" and "domain" map to "H", but author maps to "A",
1162 * a search for "site:foo domain:bar author:Fred" will map to
1163 * "(Hfoo OR Hbar) AND Afred".
1165 * As of 1.0.4, you can call this method multiple times with the same
1166 * value of field to allow a single field to be mapped to multiple
1167 * prefixes. Multiple terms being generated for such a field, and
1168 * combined with @c Xapian::Query::OP_OR.
1170 * Calling this method with an empty string for @a field will cause
1171 * a @c Xapian::InvalidArgumentError.
1173 * If you call @c add_prefix() and @c add_boolean_prefix() for the
1174 * same value of @a field, a @c Xapian::InvalidOperationError exception
1177 * In 1.0.3 and earlier, subsequent calls to this method with the same
1178 * value of @a field had no effect.
1180 * @param field The user visible field name
1181 * @param prefix The term prefix to map this to
1182 * @param grouping Controls how multiple filters are combined - filters
1183 * with the same grouping value are combined with OP_OR,
1184 * then the resulting queries are combined with OP_AND.
1185 * If NULL, then @a field is used for grouping. If an
1186 * empty string, then a unique grouping is created for
1187 * each filter (this is sometimes useful when each
1188 * document can have multiple terms with this prefix).
1191 void add_boolean_prefix(const std::string
&field
, const std::string
&prefix
,
1192 const std::string
* grouping
= NULL
);
1194 /** Add a boolean term prefix allowing the user to restrict a
1195 * search with a boolean filter specified in the free text query.
1197 * This is an older version of this method - use the version with
1198 * the `grouping` parameter in preference to this one.
1200 * @param field The user visible field name
1201 * @param prefix The term prefix to map this to
1202 * @param exclusive Controls how multiple filters are combined. If
1203 * true then @a prefix is used as the `grouping` value,
1204 * so terms with the same prefix are combined with OP_OR,
1205 * then the resulting queries are combined with OP_AND.
1206 * If false, then a unique grouping is created for
1207 * each filter (this is sometimes useful when each
1208 * document can have multiple terms with this prefix).
1210 void add_boolean_prefix(const std::string
&field
, const std::string
&prefix
,
1213 add_boolean_prefix(field
, prefix
);
1215 std::string empty_grouping
;
1216 add_boolean_prefix(field
, prefix
, &empty_grouping
);
1220 /** Register a FieldProcessor for a boolean prefix.
1222 void add_boolean_prefix(const std::string
&field
, Xapian::FieldProcessor
*proc
,
1223 const std::string
* grouping
= NULL
);
1225 /** Register a FieldProcessor for a boolean prefix.
1227 * This is an older version of this method - use the version with
1228 * the `grouping` parameter in preference to this one.
1230 void add_boolean_prefix(const std::string
&field
, Xapian::FieldProcessor
*proc
,
1233 add_boolean_prefix(field
, proc
);
1235 std::string empty_grouping
;
1236 add_boolean_prefix(field
, proc
, &empty_grouping
);
1240 /// Begin iterator over terms omitted from the query as stopwords.
1241 TermIterator
stoplist_begin() const;
1243 /// End iterator over terms omitted from the query as stopwords.
1244 TermIterator
XAPIAN_NOTHROW(stoplist_end() const) {
1245 return TermIterator();
1248 /// Begin iterator over unstemmed forms of the given stemmed query term.
1249 TermIterator
unstem_begin(const std::string
&term
) const;
1251 /// End iterator over unstemmed forms of the given stemmed query term.
1252 TermIterator
XAPIAN_NOTHROW(unstem_end(const std::string
&) const) {
1253 return TermIterator();
1256 /// Register a RangeProcessor.
1257 void add_rangeprocessor(Xapian::RangeProcessor
* range_proc
,
1258 const std::string
* grouping
= NULL
);
1260 /** Register a ValueRangeProcessor.
1262 * This method is provided for API compatibility with Xapian 1.2.x and is
1263 * deprecated - use @a add_rangeprocessor() with a RangeProcessor instead.
1265 XAPIAN_DEPRECATED(void add_valuerangeprocessor(Xapian::ValueRangeProcessor
* vrproc
)) {
1267 // Avoid deprecation warnings if compiling without optimisation.
1268 # pragma GCC diagnostic push
1269 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1271 /// Compatibility shim.
1272 class ShimRangeProcessor
: public RangeProcessor
{
1273 Xapian::Internal::opt_intrusive_ptr
<Xapian::ValueRangeProcessor
> vrp
;
1276 ShimRangeProcessor(Xapian::ValueRangeProcessor
* vrp_
)
1277 : RangeProcessor(Xapian::BAD_VALUENO
), vrp(vrp_
) { }
1280 operator()(const std::string
&begin
, const std::string
&end
)
1282 std::string b
= begin
, e
= end
;
1283 slot
= (*vrp
)(b
, e
);
1284 if (slot
== Xapian::BAD_VALUENO
)
1285 return Xapian::Query(Xapian::Query::OP_INVALID
);
1286 return RangeProcessor::operator()(b
, e
);
1290 add_rangeprocessor((new ShimRangeProcessor(vrproc
))->release());
1292 # pragma GCC diagnostic pop
1296 /** Get the spelling-corrected query string.
1298 * This will only be set if FLAG_SPELLING_CORRECTION is specified when
1299 * QueryParser::parse_query() was last called.
1301 * If there were no corrections, an empty string is returned.
1303 std::string
get_corrected_query_string() const;
1305 /// Return a string describing this object.
1306 std::string
get_description() const;
1310 QueryParser::set_max_wildcard_expansion(Xapian::termcount max_expansion
)
1312 set_max_expansion(max_expansion
,
1313 Xapian::Query::WILDCARD_LIMIT_ERROR
,
1317 /// @private @internal Helper for sortable_serialise().
1318 XAPIAN_VISIBILITY_DEFAULT
1319 size_t XAPIAN_NOTHROW(sortable_serialise_(double value
, char * buf
));
1321 /** Convert a floating point number to a string, preserving sort order.
1323 * This method converts a floating point number to a string, suitable for
1324 * using as a value for numeric range restriction, or for use as a sort
1327 * The conversion is platform independent.
1329 * The conversion attempts to ensure that, for any pair of values supplied
1330 * to the conversion algorithm, the result of comparing the original
1331 * values (with a numeric comparison operator) will be the same as the
1332 * result of comparing the resulting values (with a string comparison
1333 * operator). On platforms which represent doubles with the precisions
1334 * specified by IEEE_754, this will be the case: if the representation of
1335 * doubles is more precise, it is possible that two very close doubles
1336 * will be mapped to the same string, so will compare equal.
1338 * Note also that both zero and -zero will be converted to the same
1339 * representation: since these compare equal, this satisfies the
1340 * comparison constraint, but it's worth knowing this if you wish to use
1341 * the encoding in some situation where this distinction matters.
1343 * Handling of NaN isn't (currently) guaranteed to be sensible.
1345 * @param value The number to serialise.
1347 inline std::string
sortable_serialise(double value
) {
1349 return std::string(buf
, sortable_serialise_(value
, buf
));
1352 /** Convert a string encoded using @a sortable_serialise back to a floating
1355 * This expects the input to be a string produced by @a sortable_serialise().
1356 * If the input is not such a string, the value returned is undefined (but
1357 * no error will be thrown).
1359 * The result of the conversion will be exactly the value which was
1360 * supplied to @a sortable_serialise() when making the string on platforms
1361 * which represent doubles with the precisions specified by IEEE_754, but
1362 * may be a different (nearby) value on other platforms.
1364 * @param serialised The serialised string to decode.
1366 XAPIAN_VISIBILITY_DEFAULT
1367 double XAPIAN_NOTHROW(sortable_unserialise(const std::string
& serialised
));
1371 #endif // XAPIAN_INCLUDED_QUERYPARSER_H