2 * @brief Class representing a list of search results
4 /* Copyright (C) 2015,2016,2019 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 #ifndef XAPIAN_INCLUDED_MSET_H
23 #define XAPIAN_INCLUDED_MSET_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/mset.h> directly; include <xapian.h> instead.
32 #include <xapian/attributes.h>
33 #include <xapian/document.h>
34 #include <xapian/intrusive_ptr.h>
35 #include <xapian/stem.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
43 /// Class representing a list of search results.
44 class XAPIAN_VISIBILITY_DEFAULT MSet
{
45 friend class MSetIterator
;
47 // Helper function for fetch() methods.
48 void fetch_(Xapian::doccount first
, Xapian::doccount last
) const;
51 /// Class representing the MSet internals.
53 /// @private @internal Reference counted internals.
54 Xapian::Internal::intrusive_ptr
<Internal
> internal
;
56 /** Copying is allowed.
58 * The internals are reference counted, so copying is cheap.
62 /** Copying is allowed.
64 * The internals are reference counted, so assignment is cheap.
66 MSet
& operator=(const MSet
& o
);
68 #ifdef XAPIAN_MOVE_SEMANTICS
72 /// Move assignment operator.
73 MSet
& operator=(MSet
&& o
);
76 /** Default constructor.
78 * Creates an empty MSet, mostly useful as a placeholder.
85 /** Convert a weight to a percentage.
87 * The matching document with the highest weight will get 100% if it
88 * matches all the weighted query terms, and proportionally less if it
89 * only matches some, and other weights are scaled by the same factor.
91 * Documents with a non-zero score will always score at least 1%.
93 * Note that these generally aren't percentages of anything meaningful
94 * (unless you use a custom weighting formula where they are!)
96 int convert_to_percent(double weight
) const;
98 /** Convert the weight of the current iterator position to a percentage.
100 * The matching document with the highest weight will get 100% if it
101 * matches all the weighted query terms, and proportionally less if it
102 * only matches some, and other weights are scaled by the same factor.
104 * Documents with a non-zero score will always score at least 1%.
106 * Note that these generally aren't percentages of anything meaningful
107 * (unless you use a custom weighting formula where they are!)
109 int convert_to_percent(const MSetIterator
& it
) const;
111 /** Get the termfreq of a term.
113 * @return The number of documents which @a term occurs in. This
114 * considers all documents in the database being searched, so
115 * gives the same answer as <code>db.get_termfreq(term)</code>
116 * (but is more efficient for query terms as it returns a
117 * value cached during the search.)
119 Xapian::doccount
get_termfreq(const std::string
& term
) const;
121 /** Get the term weight of a term.
123 * @return The maximum weight that @a term could have contributed to a
126 double get_termweight(const std::string
& term
) const;
128 /** Rank of first item in this MSet.
130 * This is the parameter `first` passed to Xapian::Enquire::get_mset().
132 Xapian::doccount
get_firstitem() const;
134 /** Lower bound on the total number of matching documents. */
135 Xapian::doccount
get_matches_lower_bound() const;
136 /** Estimate of the total number of matching documents. */
137 Xapian::doccount
get_matches_estimated() const;
138 /** Upper bound on the total number of matching documents. */
139 Xapian::doccount
get_matches_upper_bound() const;
141 /** Lower bound on the total number of matching documents before collapsing.
143 * Conceptually the same as get_matches_lower_bound() for the same query
144 * without any collapse part (though the actual value may differ).
146 Xapian::doccount
get_uncollapsed_matches_lower_bound() const;
147 /** Estimate of the total number of matching documents before collapsing.
149 * Conceptually the same as get_matches_estimated() for the same query
150 * without any collapse part (though the actual value may differ).
152 Xapian::doccount
get_uncollapsed_matches_estimated() const;
153 /** Upper bound on the total number of matching documents before collapsing.
155 * Conceptually the same as get_matches_upper_bound() for the same query
156 * without any collapse part (though the actual value may differ).
158 Xapian::doccount
get_uncollapsed_matches_upper_bound() const;
160 /** The maximum weight attained by any document. */
161 double get_max_attained() const;
162 /** The maximum possible weight any document could achieve. */
163 double get_max_possible() const;
166 /** Model the relevancy of non-query terms in MSet::snippet().
168 * Non-query terms will be assigned a small weight, and the snippet
169 * will tend to prefer snippets which contain a more interesting
170 * background (where the query term content is equivalent).
172 SNIPPET_BACKGROUND_MODEL
= 1,
173 /** Exhaustively evaluate candidate snippets in MSet::snippet().
175 * Without this flag, snippet generation will stop once it thinks
176 * it has found a "good enough" snippet, which will generally reduce
177 * the time taken to generate a snippet.
179 SNIPPET_EXHAUSTIVE
= 2,
180 /** Return the empty string if no term got matched.
182 * If enabled, snippet() returns an empty string if not a single match
183 * was found in text. If not enabled, snippet() returns a (sub)string
184 * of text without any highlighted terms.
186 SNIPPET_EMPTY_WITHOUT_MATCH
= 4,
188 /** Enable generation of n-grams from CJK text.
190 * This option highlights CJK searches made using the QueryParser
191 * FLAG_CJK_NGRAM flag. Non-CJK characters are split into words as
194 * The TermGenerator FLAG_CJK_NGRAM flag needs to have been used at
197 * This mode can also be enabled by setting environment variable
198 * XAPIAN_CJK_NGRAM to a non-empty value (but doing so was deprecated
201 * @since Added in Xapian 1.4.11.
203 SNIPPET_CJK_NGRAM
= 2048
206 /** Generate a snippet.
208 * This method selects a continuous run of words from @a text, based
209 * mainly on where the query matches (currently terms, exact phrases and
210 * wildcards are taken into account). If flag SNIPPET_BACKGROUND_MODEL is
211 * used (which it is by default) then the selection algorithm also
212 * considers the non-query terms in the text with the aim of showing
213 * a context which provides more useful information.
215 * The size of the text selected can be controlled by the @a length
216 * parameter, which specifies a number of bytes of text to aim to select.
217 * However slightly more text may be selected. Also the size of any
218 * escaping, highlighting or omission markers is not considered.
220 * The returned text is escaped to make it suitable for use in HTML
221 * (though beware that in upstream releases 1.4.5 and earlier this
222 * escaping was sometimes incomplete), and matches with the query will be
223 * highlighted using @a hi_start and @a hi_end.
225 * If the snippet seems to start or end mid-sentence, then @a omit is
226 * prepended or append (respectively) to indicate this.
228 * The same stemming algorithm which was used to build the query should be
229 * specified in @a stemmer.
231 * And @a flags contains flags controlling behaviour.
235 std::string
snippet(const std::string
& text
,
237 const Xapian::Stem
& stemmer
= Xapian::Stem(),
238 unsigned flags
= SNIPPET_BACKGROUND_MODEL
|SNIPPET_EXHAUSTIVE
,
239 const std::string
& hi_start
= "<b>",
240 const std::string
& hi_end
= "</b>",
241 const std::string
& omit
= "...") const;
243 /** Prefetch hint a range of items.
245 * For a remote database, this may start a pipelined fetch of the
246 * requested documents from the remote server.
248 * For a disk-based database, this may send prefetch hints to the
249 * operating system such that the disk blocks the requested documents
250 * are stored in are more likely to be in the cache when we come to
251 * actually read them.
253 void fetch(const MSetIterator
&begin
, const MSetIterator
&end
) const;
255 /** Prefetch hint a single MSet item.
257 * For a remote database, this may start a pipelined fetch of the
258 * requested documents from the remote server.
260 * For a disk-based database, this may send prefetch hints to the
261 * operating system such that the disk blocks the requested documents
262 * are stored in are more likely to be in the cache when we come to
263 * actually read them.
265 void fetch(const MSetIterator
&item
) const;
267 /** Prefetch hint the whole MSet.
269 * For a remote database, this may start a pipelined fetch of the
270 * requested documents from the remote server.
272 * For a disk-based database, this may send prefetch hints to the
273 * operating system such that the disk blocks the requested documents
274 * are stored in are more likely to be in the cache when we come to
275 * actually read them.
277 void fetch() const { fetch_(0, Xapian::doccount(-1)); }
279 /** Return number of items in this MSet object. */
280 Xapian::doccount
size() const;
282 /** Return true if this MSet object is empty. */
283 bool empty() const { return size() == 0; }
285 /** Efficiently swap this MSet object with another. */
286 void swap(MSet
& o
) { internal
.swap(o
.internal
); }
288 /** Return iterator pointing to the first item in this MSet. */
289 MSetIterator
begin() const;
291 /** Return iterator pointing to just after the last item in this MSet. */
292 MSetIterator
end() const;
294 /** Return iterator pointing to the i-th object in this MSet. */
295 MSetIterator
operator[](Xapian::doccount i
) const;
297 /** Return iterator pointing to the last object in this MSet. */
298 MSetIterator
back() const;
300 /// Return a string describing this object.
301 std::string
get_description() const;
303 /** @private @internal MSet is what the C++ STL calls a container.
305 * The following typedefs allow the class to be used in templates in the
306 * same way the standard containers can be.
308 * These are deliberately hidden from the Doxygen-generated docs, as the
309 * machinery here isn't interesting to API users. They just need to know
310 * that Xapian container classes are compatible with the STL.
312 * See "The C++ Programming Language", 3rd ed. section 16.3.1:
316 typedef Xapian::MSetIterator value_type
;
318 typedef Xapian::doccount size_type
;
320 typedef Xapian::doccount_diff difference_type
;
322 typedef Xapian::MSetIterator iterator
;
324 typedef Xapian::MSetIterator const_iterator
;
326 typedef value_type
* pointer
;
328 typedef const value_type
* const_pointer
;
330 typedef value_type
& reference
;
332 typedef const value_type
& const_reference
;
335 /** @private @internal MSet is what the C++ STL calls a container.
337 * The following methods allow the class to be used in templates in the
338 * same way the standard containers can be.
340 * These are deliberately hidden from the Doxygen-generated docs, as the
341 * machinery here isn't interesting to API users. They just need to know
342 * that Xapian container classes are compatible with the STL.
345 // The size is fixed once created.
346 Xapian::doccount
max_size() const { return size(); }
350 /// Iterator over a Xapian::MSet.
351 class XAPIAN_VISIBILITY_DEFAULT MSetIterator
{
354 MSetIterator(const Xapian::MSet
& mset_
, Xapian::doccount off_from_end_
)
355 : mset(mset_
), off_from_end(off_from_end_
) { }
358 /** @private @internal The MSet we are iterating over. */
361 /** @private @internal The current position of the iterator.
363 * We store the offset from the end of @a mset, since that means
364 * MSet::end() just needs to set this member to 0.
366 Xapian::MSet::size_type off_from_end
;
368 /** Create an unpositioned MSetIterator. */
369 MSetIterator() : off_from_end(0) { }
371 /** Get the numeric document id for the current position. */
372 Xapian::docid
operator*() const;
374 /// Advance the iterator to the next position.
375 MSetIterator
& operator++() {
380 /// Advance the iterator to the next position (postfix version).
381 MSetIterator
operator++(int) {
382 MSetIterator retval
= *this;
387 /// Move the iterator to the previous position.
388 MSetIterator
& operator--() {
393 /// Move the iterator to the previous position (postfix version).
394 MSetIterator
operator--(int) {
395 MSetIterator retval
= *this;
400 /** @private @internal MSetIterator is what the C++ STL calls an
401 * random_access_iterator.
403 * The following typedefs allow std::iterator_traits<> to work so that
404 * this iterator can be used with the STL.
406 * These are deliberately hidden from the Doxygen-generated docs, as the
407 * machinery here isn't interesting to API users. They just need to know
408 * that Xapian iterator classes are compatible with the STL.
412 typedef std::random_access_iterator_tag iterator_category
;
414 typedef std::string value_type
;
416 typedef Xapian::termcount_diff difference_type
;
418 typedef std::string
* pointer
;
420 typedef std::string
& reference
;
423 /// Move the iterator forwards by n positions.
424 MSetIterator
& operator+=(difference_type n
) {
429 /// Move the iterator back by n positions.
430 MSetIterator
& operator-=(difference_type n
) {
435 /** Return the iterator incremented by @a n positions.
437 * If @a n is negative, decrements by (-n) positions.
439 MSetIterator
operator+(difference_type n
) const {
440 return MSetIterator(mset
, off_from_end
- n
);
443 /** Return the iterator decremented by @a n positions.
445 * If @a n is negative, increments by (-n) positions.
447 MSetIterator
operator-(difference_type n
) const {
448 return MSetIterator(mset
, off_from_end
+ n
);
451 /** Return the number of positions between @a o and this iterator. */
452 difference_type
operator-(const MSetIterator
& o
) const {
453 return difference_type(o
.off_from_end
) - difference_type(off_from_end
);
456 /** Return the MSet rank for the current position.
458 * The rank of mset[0] is mset.get_firstitem().
460 Xapian::doccount
get_rank() const {
461 return mset
.get_firstitem() + (mset
.size() - off_from_end
);
464 /** Get the Document object for the current position. */
465 Xapian::Document
get_document() const;
467 /** Get the weight for the current position. */
468 double get_weight() const;
470 /** Return the collapse key for the current position.
472 * If collapsing isn't in use, an empty string will be returned.
474 std::string
get_collapse_key() const;
476 /** Return a count of the number of collapses done onto the current key.
478 * This starts at 0, and is incremented each time an item is eliminated
479 * because its key is the same as that of the current item (as returned
480 * by get_collapse_key()).
482 * Note that this is NOT necessarily one less than the total number of
483 * matching documents with this collapse key due to various optimisations
484 * implemented in the matcher - for example, it can skip documents
485 * completely if it can prove their weight wouldn't be enough to make the
488 * You can say is that if get_collapse_count() > 0 then there are
489 * >= get_collapse_count() other documents with the current collapse
490 * key. But if get_collapse_count() == 0 then there may or may not be
491 * other such documents.
493 Xapian::doccount
get_collapse_count() const;
495 /** Return the sort key for the current position.
497 * If sorting didn't use a key then an empty string will be returned.
499 * @since Added in Xapian 1.4.6.
501 std::string
get_sort_key() const;
503 /** Convert the weight of the current iterator position to a percentage.
505 * The matching document with the highest weight will get 100% if it
506 * matches all the weighted query terms, and proportionally less if it
507 * only matches some, and other weights are scaled by the same factor.
509 * Documents with a non-zero score will always score at least 1%.
511 * Note that these generally aren't percentages of anything meaningful
512 * (unless you use a custom weighting formula where they are!)
514 int get_percent() const {
515 return mset
.convert_to_percent(get_weight());
518 /// Return a string describing this object.
519 std::string
get_description() const;
523 XAPIAN_NOTHROW(operator==(const MSetIterator
&a
, const MSetIterator
&b
));
525 /// Equality test for MSetIterator objects.
527 operator==(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
529 return a
.off_from_end
== b
.off_from_end
;
533 XAPIAN_NOTHROW(operator!=(const MSetIterator
&a
, const MSetIterator
&b
));
535 /// Inequality test for MSetIterator objects.
537 operator!=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
543 XAPIAN_NOTHROW(operator<(const MSetIterator
&a
, const MSetIterator
&b
));
545 /// Inequality test for MSetIterator objects.
547 operator<(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
549 return a
.off_from_end
> b
.off_from_end
;
553 XAPIAN_NOTHROW(operator>(const MSetIterator
&a
, const MSetIterator
&b
));
555 /// Inequality test for MSetIterator objects.
557 operator>(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
563 XAPIAN_NOTHROW(operator>=(const MSetIterator
&a
, const MSetIterator
&b
));
565 /// Inequality test for MSetIterator objects.
567 operator>=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
573 XAPIAN_NOTHROW(operator<=(const MSetIterator
&a
, const MSetIterator
&b
));
575 /// Inequality test for MSetIterator objects.
577 operator<=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
582 /** Return MSetIterator @a it incremented by @a n positions.
584 * If @a n is negative, decrements by (-n) positions.
587 operator+(MSetIterator::difference_type n
, const MSetIterator
& it
)
592 // Inlined methods of MSet which need MSetIterator to have been defined:
595 MSet::fetch(const MSetIterator
&begin_it
, const MSetIterator
&end_it
) const
597 fetch_(begin_it
.off_from_end
, end_it
.off_from_end
);
601 MSet::fetch(const MSetIterator
&item
) const
603 fetch_(item
.off_from_end
, item
.off_from_end
);
607 MSet::begin() const {
608 return MSetIterator(*this, size());
613 // Decrementing the result of end() needs to work, so we must pass in
615 return MSetIterator(*this, 0);
619 MSet::operator[](Xapian::doccount i
) const {
620 return MSetIterator(*this, size() - i
);
625 return MSetIterator(*this, 1);
629 MSet::convert_to_percent(const MSetIterator
& it
) const {
630 return convert_to_percent(it
.get_weight());
635 #endif // XAPIAN_INCLUDED_MSET_H