2 * @brief Abstract base class for postlists.
4 /* Copyright (C) 2007,2008,2009,2011,2015,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_POSTLIST_H
23 #define XAPIAN_INCLUDED_POSTLIST_H
27 #include "xapian/intrusive_ptr.h"
28 #include <xapian/types.h>
29 #include <xapian/postingiterator.h>
31 #include "backends/positionlist.h"
32 #include "weight/weightinternal.h"
36 /// Abstract base class for postlists.
37 class Xapian::PostingIterator::Internal
: public Xapian::Internal::intrusive_base
{
38 /// Don't allow assignment.
39 void operator=(const Internal
&);
41 /// Don't allow copying.
42 Internal(const Internal
&);
45 /// Only constructable as a base class for derived classes.
49 /** We have virtual methods and want to be able to delete derived classes
50 * using a pointer to the base class, so we need a virtual destructor.
54 /// Get a lower bound on the number of documents indexed by this term.
55 virtual Xapian::doccount
get_termfreq_min() const = 0;
57 /// Get an upper bound on the number of documents indexed by this term.
58 virtual Xapian::doccount
get_termfreq_max() const = 0;
60 /** Get an estimate of the number of documents indexed by this term.
62 * It should always be true that:
63 * get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
65 virtual Xapian::doccount
get_termfreq_est() const = 0;
67 /** Get an estimate for the termfreq and reltermfreq, given the stats.
69 * The frequencies may be for a combination of databases, or for just the
70 * relevant documents, so the results need not lie in the bounds given by
71 * get_termfreq_min() and get_termfreq_max().
73 virtual TermFreqs
get_termfreq_est_using_stats(
74 const Xapian::Weight::Internal
& stats
) const;
76 /// Return an upper bound on what get_weight() can return.
77 virtual double get_maxweight() const = 0;
79 /// Return the current docid.
80 virtual Xapian::docid
get_docid() const = 0;
82 /// Return the length of current document.
83 virtual Xapian::termcount
get_doclength() const = 0;
84 /* FIXME: Once flint has been retired, we should probably strip out
85 * PostList::get_doclength() and just fetch it from the DB directly.
88 /// Return the number of unique terms in the current document.
89 virtual Xapian::termcount
get_unique_terms() const = 0;
91 /** Return the wdf for the document at the current position.
93 * The default implementation throws Xapian::UnimplementedError.
95 virtual Xapian::termcount
get_wdf() const;
97 /// Return the weight contribution for the current position.
98 virtual double get_weight() const = 0;
100 virtual const std::string
* get_sort_key() const;
102 /** If the collapse key is already known, return it.
104 * This is implemented by MSetPostList (and MergePostList). Other
105 * subclasses rely on the default implementation which just returns
108 virtual const std::string
* get_collapse_key() const;
110 /// Return true if the current position is past the last entry in this list.
111 virtual bool at_end() const = 0;
113 /** Recalculate the upper bound on what get_weight() can return.
115 * If the tree has pruned, get_maxweight() may use cached values. Calling
116 * this method instead forces a full recalculation.
118 * Note that this method may be called after the postlist has reached the
119 * end. In this situation, the method should return 0.
121 virtual double recalc_maxweight() = 0;
123 /** Read the position list for the term in the current document and
124 * return a pointer to it (owned by the PostList).
126 * The default implementation throws Xapian::UnimplementedError.
128 virtual PositionList
* read_position_list();
130 /** Read the position list for the term in the current document and
131 * return a pointer to it (not owned by the PostList).
133 * The default implementation throws Xapian::UnimplementedError.
135 virtual PositionList
* open_position_list() const;
137 /** Advance the current position to the next document in the postlist.
139 * The list starts before the first entry in the list, so next(),
140 * skip_to() or check() must be called before any methods which need the
141 * context of the current position.
143 * @param w_min The minimum weight contribution that is needed (this is
144 * just a hint which PostList subclasses may ignore).
146 * @return If a non-NULL pointer is returned, then the caller should
147 * substitute the returned pointer for its pointer to us, and then
148 * delete us. This "pruning" can only happen for a non-leaf
149 * subclass of this class.
151 virtual Internal
* next(double w_min
) = 0;
153 /** Skip forward to the specified docid.
155 * If the specified docid isn't in the list, position ourselves on the
156 * first document after it (or at_end() if no greater docids are present).
158 * @param w_min The minimum weight contribution that is needed (this is
159 * just a hint which PostList subclasses may ignore).
161 * @return If a non-NULL pointer is returned, then the caller should
162 * substitute the returned pointer for its pointer to us, and then
163 * delete us. This "pruning" can only happen for a non-leaf
164 * subclass of this class.
166 virtual Internal
* skip_to(Xapian::docid did
, double w_min
) = 0;
168 /** Check if the specified docid occurs in this postlist.
170 * The caller is required to ensure that the specified @a docid actually
171 * exists in the database.
173 * This method acts like skip_to() if that can be done at little extra
174 * cost, in which case it then sets @a valid to true.
176 * Otherwise it simply checks if a particular docid is present. If it
177 * is, @a valid is set to true. If it isn't, it sets @a valid to
178 * false, and leaves the position unspecified (and hence the result of
179 * calling methods which depend on the current position, such as
180 * get_docid() and at_end(), are also unspecified). In this state, next()
181 * will advance to the first matching position after @a docid, and
182 * skip_to() will act as it would if the position was the first matching
183 * position after @a docid. If @a valid is set to false, then NULL must
184 * be returned (pruning in this situation doesn't make sense).
186 * The default implementation calls skip_to().
188 virtual Internal
* check(Xapian::docid did
, double w_min
, bool &valid
);
190 /** Advance the current position to the next document in the postlist.
192 * Any weight contribution is acceptable.
194 Internal
* next() { return next(0.0); }
196 /** Skip forward to the specified docid.
198 * Any weight contribution is acceptable.
200 Internal
* skip_to(Xapian::docid did
) { return skip_to(did
, 0.0); }
202 /// Count the number of leaf subqueries which match at the current position.
203 virtual Xapian::termcount
count_matching_subqs() const;
205 /// Gather PositionList* objects for a subtree.
206 virtual void gather_position_lists(OrPositionList
* orposlist
);
208 /// Return a string description of this object.
209 virtual std::string
get_description() const = 0;
212 // In the external API headers, this class is Xapian::PostingIterator::Internal,
213 // but in the library code it's still known as "PostList" in most places.
214 typedef Xapian::PostingIterator::Internal PostList
;
216 #endif // XAPIAN_INCLUDED_POSTLIST_H