2 * @brief Querying session
4 /* Copyright (C) 2005,2013,2016,2017,2024 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
24 #define XAPIAN_INCLUDED_ENQUIRE_H
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/enquire.h> directly; include <xapian.h> instead.
32 #include <xapian/attributes.h>
33 #include <xapian/eset.h>
34 #include <xapian/intrusive_ptr.h>
35 #include <xapian/mset.h>
36 #include <xapian/types.h>
37 #include <xapian/termiterator.h>
38 #include <xapian/visibility.h>
42 // Forward declarations of classes referenced below.
54 * An Enquire object represents a querying session - most of the options for
55 * running a query can be set on it, and the query is run via
56 * Enquire::get_mset().
58 class XAPIAN_VISIBILITY_DEFAULT Enquire
{
60 /// Class representing the Enquire internals.
62 /// @private @internal Reference counted internals.
63 Xapian::Internal::intrusive_ptr_nonnull
<Internal
> internal
;
65 /** Copying is allowed.
67 * The internals are reference counted, so copying is cheap.
69 Enquire(const Enquire
& o
);
71 /** Copying is allowed.
73 * The internals are reference counted, so assignment is cheap.
75 Enquire
& operator=(const Enquire
& o
);
80 /// Move assignment operator.
81 Enquire
& operator=(Enquire
&& o
);
85 * @param db The database (or databases) to query.
87 * @since 1.5.0 If @a db has no subdatabases, it's handled like any other
88 * empty database. In earlier versions, Xapian::InvalidArgumentError was
89 * thrown in this case.
92 Enquire(const Database
& db
);
99 * If set_query() is not called before calling get_mset(), the default
100 * query used will be Xapian::MatchNothing.
102 * @param query The Xapian::Query object
103 * @param query_length The query length to use (default:
104 * query.get_length())
106 void set_query(const Query
& query
, termcount query_length
= 0);
108 /** Get the currently set query.
110 * If set_query() is not called before calling get_query(), then the
111 * default query Xapian::MatchNothing will be returned.
113 const Query
& get_query() const;
115 /** Set the weighting scheme to use.
117 * The Xapian::Weight object passed is cloned by calling weight.clone(),
118 * so doesn't need to remain valid after the call.
120 * If set_weighting_scheme() is not called before calling get_mset(), the
121 * default weighting scheme is Xapian::BM25Weight().
123 * @param weight Xapian::Weight object
125 void set_weighting_scheme(const Weight
& weight
);
127 /** Ordering of docids.
129 * Parameter to Enquire::set_docid_order().
132 /** docids sort in ascending order (default) */
134 /** docids sort in descending order. */
136 /** docids sort in whatever order is most efficient for the backend. */
140 /** Set sort order for document IDs.
142 * This order only has an effect on documents which would otherwise
143 * have equal rank. When ordering by relevance without a sort key,
144 * this means documents with equal weight. For a boolean match
145 * with no sort key, this means all documents. And if a sort key
146 * is used, this means documents with the same sort key (and also equal
147 * weight if ordering on relevance before or after the sort key).
149 * @param order This can be:
150 * - Xapian::Enquire::ASCENDING
151 * docids sort in ascending order (default)
152 * - Xapian::Enquire::DESCENDING
153 * docids sort in descending order
154 * - Xapian::Enquire::DONT_CARE
155 * docids sort in whatever order is most efficient for the backend
157 * Note: If you add documents in strict date order, then a boolean
158 * search - i.e. set_weighting_scheme(Xapian::BoolWeight()) - with
159 * set_docid_order(Xapian::Enquire::DESCENDING) is an efficient
160 * way to perform "sort by date, newest first", and with
161 * set_docid_order(Xapian::Enquire::ASCENDING) a very efficient way
162 * to perform "sort by date, oldest first".
164 void set_docid_order(docid_order order
);
166 /** Set the sorting to be by relevance only.
168 * This is the default.
170 void set_sort_by_relevance();
172 /** Set the sorting to be by value only.
174 * Note that sorting by values uses a string comparison, so to use
175 * this to sort by a numeric value you'll need to store the numeric
176 * values in a manner which sorts appropriately. For example, you
177 * could use Xapian::sortable_serialise() (which works for floating
178 * point numbers as well as integers), or store numbers padded with
179 * leading zeros or spaces, or with the number of digits prepended.
181 * @param sort_key value number to sort on.
183 * @param reverse If true, reverses the sort order.
185 void set_sort_by_value(valueno sort_key
, bool reverse
);
187 /** Set the sorting to be by key generated from values only.
189 * @param sorter The functor to use for generating keys.
191 * @param reverse If true, reverses the sort order.
193 void set_sort_by_key(KeyMaker
* sorter
,
194 bool reverse
) XAPIAN_NONNULL();
196 /** Set the sorting to be by value, then by relevance for documents
197 * with the same value.
199 * Note that sorting by values uses a string comparison, so to use
200 * this to sort by a numeric value you'll need to store the numeric
201 * values in a manner which sorts appropriately. For example, you
202 * could use Xapian::sortable_serialise() (which works for floating
203 * point numbers as well as integers), or store numbers padded with
204 * leading zeros or spaces, or with the number of digits prepended.
206 * @param sort_key value number to sort on.
208 * @param reverse If true, reverses the sort order.
210 void set_sort_by_value_then_relevance(valueno sort_key
, bool reverse
);
212 /** Set the sorting to be by keys generated from values, then by
213 * relevance for documents with identical keys.
215 * @param sorter The functor to use for generating keys.
217 * @param reverse If true, reverses the sort order.
219 void set_sort_by_key_then_relevance(KeyMaker
* sorter
,
220 bool reverse
) XAPIAN_NONNULL();
222 /** Set the sorting to be by relevance then value.
224 * Note that sorting by values uses a string comparison, so to use
225 * this to sort by a numeric value you'll need to store the numeric
226 * values in a manner which sorts appropriately. For example, you
227 * could use Xapian::sortable_serialise() (which works for floating
228 * point numbers as well as integers), or store numbers padded with
229 * leading zeros or spaces, or with the number of digits prepended.
231 * Note that with the default BM25 weighting scheme parameters,
232 * non-identical documents will rarely have the same weight, so
233 * this setting will give very similar results to
234 * set_sort_by_relevance(). It becomes more useful with particular
235 * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom
238 * @param sort_key value number to sort on.
240 * @param reverse If true, reverses the sort order of sort_key.
241 * Beware that in 1.2.16 and earlier, the sense
242 * of this parameter was incorrectly inverted
243 * and inconsistent with the other set_sort_by_...
244 * methods. This was fixed in 1.2.17, so make that
245 * version a minimum requirement if this detail
246 * matters to your application.
248 void set_sort_by_relevance_then_value(valueno sort_key
, bool reverse
);
250 /** Set the sorting to be by relevance, then by keys generated from
253 * Note that with the default BM25 weighting scheme parameters,
254 * non-identical documents will rarely have the same weight, so
255 * this setting will give very similar results to
256 * set_sort_by_relevance(). It becomes more useful with particular
257 * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom
260 * @param sorter The functor to use for generating keys.
262 * @param reverse If true, reverses the sort order of the generated
263 * keys. Beware that in 1.2.16 and earlier, the sense
264 * of this parameter was incorrectly inverted
265 * and inconsistent with the other set_sort_by_...
266 * methods. This was fixed in 1.2.17, so make that
267 * version a minimum requirement if this detail
268 * matters to your application.
270 void set_sort_by_relevance_then_key(KeyMaker
* sorter
,
271 bool reverse
) XAPIAN_NONNULL();
273 /** Control collapsing of results.
275 * The MSet returned by @a get_mset() will have only the "best" (at most)
276 * @a collapse_max documents with each particular non-empty value in slot
277 * @a collapse_key ("best" being highest ranked - i.e. highest weight or
278 * highest sorting key).
280 * An example use might be to create a value for each document
281 * containing an MD5 hash of the document contents. Then
282 * duplicate documents from different sources can be eliminated at
283 * search time by collapsing with @a collapse_max = 1 (it's better
284 * to eliminate duplicates at index time, but this may not be
285 * always be possible - for example the search may be over more
286 * than one Xapian database).
288 * Another use is to group matches in a particular category (e.g.
289 * you might collapse a mailing list search on the Subject: so
290 * that there's only one result per discussion thread). In this
291 * case you can use get_collapse_count() to give the user some
292 * idea how many other results there are. And if you index the
293 * Subject: as a boolean term as well as putting it in a value,
294 * you can offer a link to a non-collapsed search restricted to
295 * that thread using a boolean filter.
297 * @param collapse_key value slot to collapse on (default is
298 * Xapian::BAD_VALUENO which means no collapsing).
300 * @param collapse_max Maximum number of documents with the same key
301 * to allow (default: 1).
303 void set_collapse_key(valueno collapse_key
, doccount collapse_max
= 1);
305 /** Set lower bounds on percentage and/or weight.
307 * @param percent_threshold Lower bound on percentage score
308 * @param weight_threshold Lower bound on weight (default: 0)
310 * No thresholds are applied by default, and if either threshold is set
311 * to 0, then that threshold is disabled.
313 void set_cutoff(int percent_threshold
, double weight_threshold
= 0);
317 * This matchspy will be called with some of the documents which match
318 * the query, during the match process. Exactly which of the matching
319 * documents are passed to it depends on exactly when certain
320 * optimisations occur during the match process, but it can be
321 * controlled to some extent by setting the @a checkatleast parameter
324 * In particular, if there are enough matching documents, at least the
325 * number specified by @a checkatleast will be passed to the matchspy.
326 * This means that you can force the matchspy to be shown all matching
327 * documents by setting @a checkatleast to the number of documents in
330 * @param spy The MatchSpy subclass to add. The caller must
331 * ensure that this remains valid while the Enquire
332 * object remains active, or until @a
333 * clear_matchspies() is called, or else allocate
334 * the MatchSpy object with new and then disown it by
335 * calling spy->release() before passing it in.
337 void add_matchspy(MatchSpy
* spy
) XAPIAN_NONNULL();
339 /** Remove all the matchspies. */
340 void clear_matchspies();
342 /** Set a time limit for the match.
344 * Matches with check_at_least set high can take a long time in some
345 * cases. You can set a time limit on this, after which check_at_least
346 * will be turned off.
348 * @param time_limit time in seconds after which to disable
349 * check_at_least (default: 0.0 which means no
354 * This feature is currently supported on platforms which support POSIX
355 * interval timers. Interaction with the remote backend when using
356 * multiple databases may have bugs. There's not currently a way to
357 * force the match to end after a certain time.
359 void set_time_limit(double time_limit
);
363 * Run the query using the settings in this Enquire object and those
364 * passed as parameters to the method, and return a Xapian::MSet object.
366 * @param first Zero-based index of the first result to return
367 * (which supports retrieving pages of results).
368 * @param maxitems The maximum number of documents to return.
369 * @param checkatleast Check at least this many documents. By default
370 * Xapian will avoiding considering documents
371 * which it can prove can't match, which is faster
372 * but can result in a loose bounds on and a poor
373 * estimate of the total number of matches -
374 * setting checkatleast higher allows trading off
375 * speed for tighter bounds and a more accurate
376 * estimate. (default: 0)
377 * @param rset Documents marked as relevant (default: no
378 * documents have been marked as relevant)
379 * @param mdecider Xapian::MatchDecider object - this acts as a
380 * yes/no filter on documents which match the
381 * query. See also Xapian::PostingSource.
382 * (default: no Xapian::MatchDecider)
384 MSet
get_mset(doccount first
,
386 doccount checkatleast
= 0,
387 const RSet
* rset
= NULL
,
388 const MatchDecider
* mdecider
= NULL
) const;
392 * Run the query using the settings in this Enquire object and those
393 * passed as parameters to the method, and return a Xapian::MSet object.
395 * @param first Zero-based index of the first result to return
396 * (which supports retrieving pages of results).
397 * @param maxitems The maximum number of documents to return.
398 * @param rset Documents marked as relevant (default: no
399 * documents have been marked as relevant)
400 * @param mdecider Xapian::MatchDecider object - this acts as a
401 * yes/no filter on documents which match the
402 * query. See also Xapian::PostingSource.
403 * (default: no Xapian::MatchDecider)
405 MSet
get_mset(doccount first
,
408 const MatchDecider
* mdecider
= NULL
) const {
409 return get_mset(first
, maxitems
, 0, rset
, mdecider
);
412 /** Iterate query terms matching a document.
414 * Takes terms from the query set by @a set_query() and from the document
415 * with document ID @a did in the database set in the constructor, and
416 * returns terms which are in both, ordered by ascending query position.
417 * Terms which occur more than once in the query are only returned once,
418 * at the lowest term position they occur at.
420 * @param did Document ID in the database set in the constructor
422 TermIterator
get_matching_terms_begin(docid did
) const;
424 /** Iterate query terms matching a document.
426 * Convenience overloaded form, taking a Xapian::MSetIterator instead
427 * of a Xapian::docid.
429 * @param it MSetIterator to return matching terms for
431 TermIterator
get_matching_terms_begin(const MSetIterator
& it
) const {
432 return get_matching_terms_begin(*it
);
435 /// End iterator corresponding to @a get_matching_terms_begin().
436 TermIterator
get_matching_terms_end(docid
) const noexcept
{
437 return TermIterator();
440 /// End iterator corresponding to @a get_matching_terms_begin().
441 TermIterator
get_matching_terms_end(const MSetIterator
&) const noexcept
{
442 return TermIterator();
445 /** Set the weighting scheme to use for expansion.
447 * If you don't call this method, the default is as if you'd used:
449 * set_expansion_scheme("prob");
451 * @param eweightname A string in lowercase specifying the name of
452 * the scheme to be used. The following schemes
453 * are currently available:
454 * * "bo1": Bose-Einstein 1 model from the Divergence
455 * From Randomness framework.
456 * * "prob" : Probabilistic model (since 1.4.26).
457 * * "trad" : Deprecated alias for "prob".
458 * @param expand_k Parameter k for probabilistic query expansion.
459 * A default value of 1.0 is used if none is specified.
461 void set_expansion_scheme(std::string_view eweightname
,
462 double expand_k
= 1.0) const;
464 /** Flag telling get_eset() to allow query terms in Xapian::ESet.
466 * By default, query terms are excluded. This is appropriate when using
467 * get_eset() to generate terms for query expansion, but for some other
468 * uses query terms are also interesting.
470 static const int INCLUDE_QUERY_TERMS
= 1;
472 /** Flag telling get_eset() to always use the exact term frequency.
474 * By default, get_eset() approximates the term frequency in some cases
475 * (currently when we're expanding from more than one database and there
476 * are sub-databases which don't contain any documents marked as
477 * relevant). This is faster and should still return good results, but
478 * this flag allows the exact term frequency to always be used.
480 static const int USE_EXACT_TERMFREQ
= 2;
482 /** Perform query expansion.
484 * Perform query expansion using a Xapian::RSet indicating some documents
485 * which are relevant (typically based on the user marking results or
488 * @param maxitems The maximum number of terms to return.
489 * @param rset Documents marked as relevant.
490 * @param flags Bitwise-or combination of @a
491 * INCLUDE_QUERY_TERMS and @a USE_EXACT_TERMFREQ
492 * flags (default: 0).
493 * @param edecider Xapian::ExpandDecider object - this acts as a
494 * yes/no filter on terms which are being
495 * considered. (default: no
496 * Xapian::ExpandDecider)
497 * @param min_weight Lower bound on weight of acceptable terms
500 * @return Xapian::ESet object containing a list of terms with weights.
502 ESet
get_eset(termcount maxitems
,
505 const ExpandDecider
* edecider
= NULL
,
506 double min_weight
= 0.0) const;
508 /** Perform query expansion.
510 * Perform query expansion using a Xapian::RSet indicating some documents
511 * which are relevant (typically based on the user marking results or
514 * @param maxitems The maximum number of terms to return.
515 * @param rset Documents marked as relevant.
516 * @param edecider Xapian::ExpandDecider object - this acts as a
517 * yes/no filter on terms which are being
520 * @return Xapian::ESet object containing a list of terms with weights.
522 ESet
get_eset(termcount maxitems
,
524 const ExpandDecider
* edecider
) const {
525 return get_eset(maxitems
, rset
, 0, edecider
);
528 /// Return a string describing this object.
529 std::string
get_description() const;
534 #endif // XAPIAN_INCLUDED_ENQUIRE_H