Handle <title> in SVG
[xapian.git] / xapian-core / api / postlist.h
blobdb6e29a877766fdf32546dea0ee87163a6521ca6
1 /** @file
2 * @brief Abstract base class for postlists.
3 */
4 /* Copyright (C) 2007,2008,2009,2011,2015,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_POSTLIST_H
23 #define XAPIAN_INCLUDED_POSTLIST_H
25 #include <string>
27 #include "xapian/intrusive_ptr.h"
28 #include <xapian/types.h>
29 #include <xapian/postingiterator.h>
31 #include "backends/positionlist.h"
32 #include "weight/weightinternal.h"
34 class OrPositionList;
36 /// Abstract base class for postlists.
37 class Xapian::PostingIterator::Internal : public Xapian::Internal::intrusive_base {
38 /// Don't allow assignment.
39 void operator=(const Internal &);
41 /// Don't allow copying.
42 Internal(const Internal &);
44 protected:
45 /// Only constructable as a base class for derived classes.
46 Internal() { }
48 public:
49 /** We have virtual methods and want to be able to delete derived classes
50 * using a pointer to the base class, so we need a virtual destructor.
52 virtual ~Internal();
54 /// Get a lower bound on the number of documents indexed by this term.
55 virtual Xapian::doccount get_termfreq_min() const = 0;
57 /// Get an upper bound on the number of documents indexed by this term.
58 virtual Xapian::doccount get_termfreq_max() const = 0;
60 /** Get an estimate of the number of documents indexed by this term.
62 * It should always be true that:
63 * get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
65 virtual Xapian::doccount get_termfreq_est() const = 0;
67 /** Get an estimate for the termfreq and reltermfreq, given the stats.
69 * The frequencies may be for a combination of databases, or for just the
70 * relevant documents, so the results need not lie in the bounds given by
71 * get_termfreq_min() and get_termfreq_max().
73 virtual TermFreqs get_termfreq_est_using_stats(
74 const Xapian::Weight::Internal & stats) const;
76 /// Return an upper bound on what get_weight() can return.
77 virtual double get_maxweight() const = 0;
79 /// Return the current docid.
80 virtual Xapian::docid get_docid() const = 0;
82 /// Return the length of current document.
83 virtual Xapian::termcount get_doclength() const = 0;
84 /* FIXME: Once flint has been retired, we should probably strip out
85 * PostList::get_doclength() and just fetch it from the DB directly.
88 /// Return the number of unique terms in the current document.
89 virtual Xapian::termcount get_unique_terms() const = 0;
91 /** Return the wdf for the document at the current position.
93 * The default implementation throws Xapian::UnimplementedError.
95 virtual Xapian::termcount get_wdf() const;
97 /// Return the weight contribution for the current position.
98 virtual double get_weight() const = 0;
100 virtual const std::string * get_sort_key() const;
102 /** If the collapse key is already known, return it.
104 * This is implemented by MSetPostList (and MergePostList). Other
105 * subclasses rely on the default implementation which just returns
106 * NULL.
108 virtual const std::string * get_collapse_key() const;
110 /// Return true if the current position is past the last entry in this list.
111 virtual bool at_end() const = 0;
113 /** Recalculate the upper bound on what get_weight() can return.
115 * If the tree has pruned, get_maxweight() may use cached values. Calling
116 * this method instead forces a full recalculation.
118 * Note that this method may be called after the postlist has reached the
119 * end. In this situation, the method should return 0.
121 virtual double recalc_maxweight() = 0;
123 /** Read the position list for the term in the current document and
124 * return a pointer to it (owned by the PostList).
126 * The default implementation throws Xapian::UnimplementedError.
128 virtual PositionList * read_position_list();
130 /** Read the position list for the term in the current document and
131 * return a pointer to it (not owned by the PostList).
133 * The default implementation throws Xapian::UnimplementedError.
135 virtual PositionList * open_position_list() const;
137 /** Advance the current position to the next document in the postlist.
139 * The list starts before the first entry in the list, so next(),
140 * skip_to() or check() must be called before any methods which need the
141 * context of the current position.
143 * @param w_min The minimum weight contribution that is needed (this is
144 * just a hint which PostList subclasses may ignore).
146 * @return If a non-NULL pointer is returned, then the caller should
147 * substitute the returned pointer for its pointer to us, and then
148 * delete us. This "pruning" can only happen for a non-leaf
149 * subclass of this class.
151 virtual Internal * next(double w_min) = 0;
153 /** Skip forward to the specified docid.
155 * If the specified docid isn't in the list, position ourselves on the
156 * first document after it (or at_end() if no greater docids are present).
158 * @param w_min The minimum weight contribution that is needed (this is
159 * just a hint which PostList subclasses may ignore).
161 * @return If a non-NULL pointer is returned, then the caller should
162 * substitute the returned pointer for its pointer to us, and then
163 * delete us. This "pruning" can only happen for a non-leaf
164 * subclass of this class.
166 virtual Internal * skip_to(Xapian::docid did, double w_min) = 0;
168 /** Check if the specified docid occurs in this postlist.
170 * The caller is required to ensure that the specified @a docid actually
171 * exists in the database.
173 * This method acts like skip_to() if that can be done at little extra
174 * cost, in which case it then sets @a valid to true.
176 * Otherwise it simply checks if a particular docid is present. If it
177 * is, @a valid is set to true. If it isn't, it sets @a valid to
178 * false, and leaves the position unspecified (and hence the result of
179 * calling methods which depend on the current position, such as
180 * get_docid() and at_end(), are also unspecified). In this state, next()
181 * will advance to the first matching position after @a docid, and
182 * skip_to() will act as it would if the position was the first matching
183 * position after @a docid. If @a valid is set to false, then NULL must
184 * be returned (pruning in this situation doesn't make sense).
186 * The default implementation calls skip_to().
188 virtual Internal * check(Xapian::docid did, double w_min, bool &valid);
190 /** Advance the current position to the next document in the postlist.
192 * Any weight contribution is acceptable.
194 Internal * next() { return next(0.0); }
196 /** Skip forward to the specified docid.
198 * Any weight contribution is acceptable.
200 Internal * skip_to(Xapian::docid did) { return skip_to(did, 0.0); }
202 /// Count the number of leaf subqueries which match at the current position.
203 virtual Xapian::termcount count_matching_subqs() const;
205 /// Gather PositionList* objects for a subtree.
206 virtual void gather_position_lists(OrPositionList* orposlist);
208 /// Return a string description of this object.
209 virtual std::string get_description() const = 0;
212 // In the external API headers, this class is Xapian::PostingIterator::Internal,
213 // but in the library code it's still known as "PostList" in most places.
214 typedef Xapian::PostingIterator::Internal PostList;
216 #endif // XAPIAN_INCLUDED_POSTLIST_H