Factor out function to decode 2 hex digits
[xapian.git] / xapian-core / include / xapian / mset.h
blob7e924a005d2854eab6ad638d53bb485af80bc4d9
1 /** @file
2 * @brief Class representing a list of search results
3 */
4 /* Copyright (C) 2015,2016,2019 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #ifndef XAPIAN_INCLUDED_MSET_H
23 #define XAPIAN_INCLUDED_MSET_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/mset.h> directly; include <xapian.h> instead.
27 #endif
29 #include <iterator>
30 #include <string>
32 #include <xapian/attributes.h>
33 #include <xapian/document.h>
34 #include <xapian/intrusive_ptr.h>
35 #include <xapian/stem.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
39 namespace Xapian {
41 class MSetIterator;
43 /// Class representing a list of search results.
44 class XAPIAN_VISIBILITY_DEFAULT MSet {
45 friend class MSetIterator;
47 // Helper function for fetch() methods.
48 void fetch_(Xapian::doccount first, Xapian::doccount last) const;
50 public:
51 /// Class representing the MSet internals.
52 class Internal;
53 /// @private @internal Reference counted internals.
54 Xapian::Internal::intrusive_ptr<Internal> internal;
56 /** Copying is allowed.
58 * The internals are reference counted, so copying is cheap.
60 MSet(const MSet & o);
62 /** Copying is allowed.
64 * The internals are reference counted, so assignment is cheap.
66 MSet & operator=(const MSet & o);
68 #ifdef XAPIAN_MOVE_SEMANTICS
69 /// Move constructor.
70 MSet(MSet && o);
72 /// Move assignment operator.
73 MSet & operator=(MSet && o);
74 #endif
76 /** Default constructor.
78 * Creates an empty MSet, mostly useful as a placeholder.
80 MSet();
82 /// Destructor.
83 ~MSet();
85 /** Convert a weight to a percentage.
87 * The matching document with the highest weight will get 100% if it
88 * matches all the weighted query terms, and proportionally less if it
89 * only matches some, and other weights are scaled by the same factor.
91 * Documents with a non-zero score will always score at least 1%.
93 * Note that these generally aren't percentages of anything meaningful
94 * (unless you use a custom weighting formula where they are!)
96 int convert_to_percent(double weight) const;
98 /** Convert the weight of the current iterator position to a percentage.
100 * The matching document with the highest weight will get 100% if it
101 * matches all the weighted query terms, and proportionally less if it
102 * only matches some, and other weights are scaled by the same factor.
104 * Documents with a non-zero score will always score at least 1%.
106 * Note that these generally aren't percentages of anything meaningful
107 * (unless you use a custom weighting formula where they are!)
109 int convert_to_percent(const MSetIterator & it) const;
111 /** Get the termfreq of a term.
113 * @return The number of documents which @a term occurs in. This
114 * considers all documents in the database being searched, so
115 * gives the same answer as <code>db.get_termfreq(term)</code>
116 * (but is more efficient for query terms as it returns a
117 * value cached during the search.)
119 Xapian::doccount get_termfreq(const std::string & term) const;
121 /** Get the term weight of a term.
123 * @return The maximum weight that @a term could have contributed to a
124 * document.
126 double get_termweight(const std::string & term) const;
128 /** Rank of first item in this MSet.
130 * This is the parameter `first` passed to Xapian::Enquire::get_mset().
132 Xapian::doccount get_firstitem() const;
134 /** Lower bound on the total number of matching documents. */
135 Xapian::doccount get_matches_lower_bound() const;
136 /** Estimate of the total number of matching documents. */
137 Xapian::doccount get_matches_estimated() const;
138 /** Upper bound on the total number of matching documents. */
139 Xapian::doccount get_matches_upper_bound() const;
141 /** Lower bound on the total number of matching documents before collapsing.
143 * Conceptually the same as get_matches_lower_bound() for the same query
144 * without any collapse part (though the actual value may differ).
146 Xapian::doccount get_uncollapsed_matches_lower_bound() const;
147 /** Estimate of the total number of matching documents before collapsing.
149 * Conceptually the same as get_matches_estimated() for the same query
150 * without any collapse part (though the actual value may differ).
152 Xapian::doccount get_uncollapsed_matches_estimated() const;
153 /** Upper bound on the total number of matching documents before collapsing.
155 * Conceptually the same as get_matches_upper_bound() for the same query
156 * without any collapse part (though the actual value may differ).
158 Xapian::doccount get_uncollapsed_matches_upper_bound() const;
160 /** The maximum weight attained by any document. */
161 double get_max_attained() const;
162 /** The maximum possible weight any document could achieve. */
163 double get_max_possible() const;
165 enum {
166 /** Model the relevancy of non-query terms in MSet::snippet().
168 * Non-query terms will be assigned a small weight, and the snippet
169 * will tend to prefer snippets which contain a more interesting
170 * background (where the query term content is equivalent).
172 SNIPPET_BACKGROUND_MODEL = 1,
173 /** Exhaustively evaluate candidate snippets in MSet::snippet().
175 * Without this flag, snippet generation will stop once it thinks
176 * it has found a "good enough" snippet, which will generally reduce
177 * the time taken to generate a snippet.
179 SNIPPET_EXHAUSTIVE = 2,
180 /** Return the empty string if no term got matched.
182 * If enabled, snippet() returns an empty string if not a single match
183 * was found in text. If not enabled, snippet() returns a (sub)string
184 * of text without any highlighted terms.
186 SNIPPET_EMPTY_WITHOUT_MATCH = 4,
188 /** Enable generation of n-grams from CJK text.
190 * This option highlights CJK searches made using the QueryParser
191 * FLAG_CJK_NGRAM flag. Non-CJK characters are split into words as
192 * normal.
194 * The TermGenerator FLAG_CJK_NGRAM flag needs to have been used at
195 * index time.
197 * This mode can also be enabled by setting environment variable
198 * XAPIAN_CJK_NGRAM to a non-empty value (but doing so was deprecated
199 * in 1.4.11).
201 * @since Added in Xapian 1.4.11.
203 SNIPPET_CJK_NGRAM = 2048
206 /** Generate a snippet.
208 * This method selects a continuous run of words from @a text, based
209 * mainly on where the query matches (currently terms, exact phrases and
210 * wildcards are taken into account). If flag SNIPPET_BACKGROUND_MODEL is
211 * used (which it is by default) then the selection algorithm also
212 * considers the non-query terms in the text with the aim of showing
213 * a context which provides more useful information.
215 * The size of the text selected can be controlled by the @a length
216 * parameter, which specifies a number of bytes of text to aim to select.
217 * However slightly more text may be selected. Also the size of any
218 * escaping, highlighting or omission markers is not considered.
220 * The returned text is escaped to make it suitable for use in HTML
221 * (though beware that in upstream releases 1.4.5 and earlier this
222 * escaping was sometimes incomplete), and matches with the query will be
223 * highlighted using @a hi_start and @a hi_end.
225 * If the snippet seems to start or end mid-sentence, then @a omit is
226 * prepended or append (respectively) to indicate this.
228 * The same stemming algorithm which was used to build the query should be
229 * specified in @a stemmer.
231 * And @a flags contains flags controlling behaviour.
233 * Added in 1.3.5.
235 std::string snippet(const std::string & text,
236 size_t length = 500,
237 const Xapian::Stem & stemmer = Xapian::Stem(),
238 unsigned flags = SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE,
239 const std::string & hi_start = "<b>",
240 const std::string & hi_end = "</b>",
241 const std::string & omit = "...") const;
243 /** Prefetch hint a range of items.
245 * For a remote database, this may start a pipelined fetch of the
246 * requested documents from the remote server.
248 * For a disk-based database, this may send prefetch hints to the
249 * operating system such that the disk blocks the requested documents
250 * are stored in are more likely to be in the cache when we come to
251 * actually read them.
253 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
255 /** Prefetch hint a single MSet item.
257 * For a remote database, this may start a pipelined fetch of the
258 * requested documents from the remote server.
260 * For a disk-based database, this may send prefetch hints to the
261 * operating system such that the disk blocks the requested documents
262 * are stored in are more likely to be in the cache when we come to
263 * actually read them.
265 void fetch(const MSetIterator &item) const;
267 /** Prefetch hint the whole MSet.
269 * For a remote database, this may start a pipelined fetch of the
270 * requested documents from the remote server.
272 * For a disk-based database, this may send prefetch hints to the
273 * operating system such that the disk blocks the requested documents
274 * are stored in are more likely to be in the cache when we come to
275 * actually read them.
277 void fetch() const { fetch_(0, Xapian::doccount(-1)); }
279 /** Return number of items in this MSet object. */
280 Xapian::doccount size() const;
282 /** Return true if this MSet object is empty. */
283 bool empty() const { return size() == 0; }
285 /** Efficiently swap this MSet object with another. */
286 void swap(MSet & o) { internal.swap(o.internal); }
288 /** Return iterator pointing to the first item in this MSet. */
289 MSetIterator begin() const;
291 /** Return iterator pointing to just after the last item in this MSet. */
292 MSetIterator end() const;
294 /** Return iterator pointing to the i-th object in this MSet. */
295 MSetIterator operator[](Xapian::doccount i) const;
297 /** Return iterator pointing to the last object in this MSet. */
298 MSetIterator back() const;
300 /// Return a string describing this object.
301 std::string get_description() const;
303 /** @private @internal MSet is what the C++ STL calls a container.
305 * The following typedefs allow the class to be used in templates in the
306 * same way the standard containers can be.
308 * These are deliberately hidden from the Doxygen-generated docs, as the
309 * machinery here isn't interesting to API users. They just need to know
310 * that Xapian container classes are compatible with the STL.
312 * See "The C++ Programming Language", 3rd ed. section 16.3.1:
314 // @{
315 /// @private
316 typedef Xapian::MSetIterator value_type;
317 /// @private
318 typedef Xapian::doccount size_type;
319 /// @private
320 typedef Xapian::doccount_diff difference_type;
321 /// @private
322 typedef Xapian::MSetIterator iterator;
323 /// @private
324 typedef Xapian::MSetIterator const_iterator;
325 /// @private
326 typedef value_type * pointer;
327 /// @private
328 typedef const value_type * const_pointer;
329 /// @private
330 typedef value_type & reference;
331 /// @private
332 typedef const value_type & const_reference;
333 // @}
335 /** @private @internal MSet is what the C++ STL calls a container.
337 * The following methods allow the class to be used in templates in the
338 * same way the standard containers can be.
340 * These are deliberately hidden from the Doxygen-generated docs, as the
341 * machinery here isn't interesting to API users. They just need to know
342 * that Xapian container classes are compatible with the STL.
344 // @{
345 // The size is fixed once created.
346 Xapian::doccount max_size() const { return size(); }
347 // @}
350 /// Iterator over a Xapian::MSet.
351 class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
352 friend class MSet;
354 MSetIterator(const Xapian::MSet & mset_, Xapian::doccount off_from_end_)
355 : mset(mset_), off_from_end(off_from_end_) { }
357 public:
358 /** @private @internal The MSet we are iterating over. */
359 Xapian::MSet mset;
361 /** @private @internal The current position of the iterator.
363 * We store the offset from the end of @a mset, since that means
364 * MSet::end() just needs to set this member to 0.
366 Xapian::MSet::size_type off_from_end;
368 /** Create an unpositioned MSetIterator. */
369 MSetIterator() : off_from_end(0) { }
371 /** Get the numeric document id for the current position. */
372 Xapian::docid operator*() const;
374 /// Advance the iterator to the next position.
375 MSetIterator & operator++() {
376 --off_from_end;
377 return *this;
380 /// Advance the iterator to the next position (postfix version).
381 MSetIterator operator++(int) {
382 MSetIterator retval = *this;
383 --off_from_end;
384 return retval;
387 /// Move the iterator to the previous position.
388 MSetIterator & operator--() {
389 ++off_from_end;
390 return *this;
393 /// Move the iterator to the previous position (postfix version).
394 MSetIterator operator--(int) {
395 MSetIterator retval = *this;
396 ++off_from_end;
397 return retval;
400 /** @private @internal MSetIterator is what the C++ STL calls an
401 * random_access_iterator.
403 * The following typedefs allow std::iterator_traits<> to work so that
404 * this iterator can be used with the STL.
406 * These are deliberately hidden from the Doxygen-generated docs, as the
407 * machinery here isn't interesting to API users. They just need to know
408 * that Xapian iterator classes are compatible with the STL.
410 // @{
411 /// @private
412 typedef std::random_access_iterator_tag iterator_category;
413 /// @private
414 typedef std::string value_type;
415 /// @private
416 typedef Xapian::termcount_diff difference_type;
417 /// @private
418 typedef std::string * pointer;
419 /// @private
420 typedef std::string & reference;
421 // @}
423 /// Move the iterator forwards by n positions.
424 MSetIterator & operator+=(difference_type n) {
425 off_from_end -= n;
426 return *this;
429 /// Move the iterator back by n positions.
430 MSetIterator & operator-=(difference_type n) {
431 off_from_end += n;
432 return *this;
435 /** Return the iterator incremented by @a n positions.
437 * If @a n is negative, decrements by (-n) positions.
439 MSetIterator operator+(difference_type n) const {
440 return MSetIterator(mset, off_from_end - n);
443 /** Return the iterator decremented by @a n positions.
445 * If @a n is negative, increments by (-n) positions.
447 MSetIterator operator-(difference_type n) const {
448 return MSetIterator(mset, off_from_end + n);
451 /** Return the number of positions between @a o and this iterator. */
452 difference_type operator-(const MSetIterator& o) const {
453 return difference_type(o.off_from_end) - difference_type(off_from_end);
456 /** Return the MSet rank for the current position.
458 * The rank of mset[0] is mset.get_firstitem().
460 Xapian::doccount get_rank() const {
461 return mset.get_firstitem() + (mset.size() - off_from_end);
464 /** Get the Document object for the current position. */
465 Xapian::Document get_document() const;
467 /** Get the weight for the current position. */
468 double get_weight() const;
470 /** Return the collapse key for the current position.
472 * If collapsing isn't in use, an empty string will be returned.
474 std::string get_collapse_key() const;
476 /** Return a count of the number of collapses done onto the current key.
478 * This starts at 0, and is incremented each time an item is eliminated
479 * because its key is the same as that of the current item (as returned
480 * by get_collapse_key()).
482 * Note that this is NOT necessarily one less than the total number of
483 * matching documents with this collapse key due to various optimisations
484 * implemented in the matcher - for example, it can skip documents
485 * completely if it can prove their weight wouldn't be enough to make the
486 * result set.
488 * You can say is that if get_collapse_count() > 0 then there are
489 * >= get_collapse_count() other documents with the current collapse
490 * key. But if get_collapse_count() == 0 then there may or may not be
491 * other such documents.
493 Xapian::doccount get_collapse_count() const;
495 /** Return the sort key for the current position.
497 * If sorting didn't use a key then an empty string will be returned.
499 * @since Added in Xapian 1.4.6.
501 std::string get_sort_key() const;
503 /** Convert the weight of the current iterator position to a percentage.
505 * The matching document with the highest weight will get 100% if it
506 * matches all the weighted query terms, and proportionally less if it
507 * only matches some, and other weights are scaled by the same factor.
509 * Documents with a non-zero score will always score at least 1%.
511 * Note that these generally aren't percentages of anything meaningful
512 * (unless you use a custom weighting formula where they are!)
514 int get_percent() const {
515 return mset.convert_to_percent(get_weight());
518 /// Return a string describing this object.
519 std::string get_description() const;
522 bool
523 XAPIAN_NOTHROW(operator==(const MSetIterator &a, const MSetIterator &b));
525 /// Equality test for MSetIterator objects.
526 inline bool
527 operator==(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
529 return a.off_from_end == b.off_from_end;
532 inline bool
533 XAPIAN_NOTHROW(operator!=(const MSetIterator &a, const MSetIterator &b));
535 /// Inequality test for MSetIterator objects.
536 inline bool
537 operator!=(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
539 return !(a == b);
542 bool
543 XAPIAN_NOTHROW(operator<(const MSetIterator &a, const MSetIterator &b));
545 /// Inequality test for MSetIterator objects.
546 inline bool
547 operator<(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
549 return a.off_from_end > b.off_from_end;
552 inline bool
553 XAPIAN_NOTHROW(operator>(const MSetIterator &a, const MSetIterator &b));
555 /// Inequality test for MSetIterator objects.
556 inline bool
557 operator>(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
559 return b < a;
562 inline bool
563 XAPIAN_NOTHROW(operator>=(const MSetIterator &a, const MSetIterator &b));
565 /// Inequality test for MSetIterator objects.
566 inline bool
567 operator>=(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
569 return !(a < b);
572 inline bool
573 XAPIAN_NOTHROW(operator<=(const MSetIterator &a, const MSetIterator &b));
575 /// Inequality test for MSetIterator objects.
576 inline bool
577 operator<=(const MSetIterator &a, const MSetIterator &b) XAPIAN_NOEXCEPT
579 return !(b < a);
582 /** Return MSetIterator @a it incremented by @a n positions.
584 * If @a n is negative, decrements by (-n) positions.
586 inline MSetIterator
587 operator+(MSetIterator::difference_type n, const MSetIterator& it)
589 return it + n;
592 // Inlined methods of MSet which need MSetIterator to have been defined:
594 inline void
595 MSet::fetch(const MSetIterator &begin_it, const MSetIterator &end_it) const
597 fetch_(begin_it.off_from_end, end_it.off_from_end);
600 inline void
601 MSet::fetch(const MSetIterator &item) const
603 fetch_(item.off_from_end, item.off_from_end);
606 inline MSetIterator
607 MSet::begin() const {
608 return MSetIterator(*this, size());
611 inline MSetIterator
612 MSet::end() const {
613 // Decrementing the result of end() needs to work, so we must pass in
614 // *this here.
615 return MSetIterator(*this, 0);
618 inline MSetIterator
619 MSet::operator[](Xapian::doccount i) const {
620 return MSetIterator(*this, size() - i);
623 inline MSetIterator
624 MSet::back() const {
625 return MSetIterator(*this, 1);
628 inline int
629 MSet::convert_to_percent(const MSetIterator & it) const {
630 return convert_to_percent(it.get_weight());
635 #endif // XAPIAN_INCLUDED_MSET_H