Add accept() wrapper
[xapian.git] / xapian-core / weight / weightinternal.h
bloba131383b1dcc51ba27cabfa7af47faf9e6a40d29
1 /** @file
2 * @brief Xapian::Weight::Internal class, holding database and term statistics.
3 */
4 /* Copyright (C) 2007 Lemur Consulting Ltd
5 * Copyright (C) 2009,2010,2011,2013,2014,2015,2020 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_WEIGHTINTERNAL_H
23 #define XAPIAN_INCLUDED_WEIGHTINTERNAL_H
25 #include "xapian/weight.h"
27 #include "xapian/database.h"
28 #include "xapian/query.h"
30 #include "backends/database.h"
31 #include "internaltypes.h"
32 #include "omassert.h"
34 #include <map>
35 #include <string>
37 /// The frequencies for a term.
38 struct TermFreqs {
39 Xapian::doccount termfreq;
40 Xapian::doccount reltermfreq;
41 Xapian::termcount collfreq;
42 double max_part;
44 TermFreqs() : termfreq(0), reltermfreq(0), collfreq(0), max_part(0.0) {}
45 TermFreqs(Xapian::doccount termfreq_,
46 Xapian::doccount reltermfreq_,
47 Xapian::termcount collfreq_,
48 double max_part_ = 0.0)
49 : termfreq(termfreq_),
50 reltermfreq(reltermfreq_),
51 collfreq(collfreq_),
52 max_part(max_part_) {}
54 void operator+=(const TermFreqs & other) {
55 termfreq += other.termfreq;
56 reltermfreq += other.reltermfreq;
57 collfreq += other.collfreq;
58 max_part += other.max_part;
61 /// Return a std::string describing this object.
62 std::string get_description() const;
65 namespace Xapian {
67 class RSet;
69 /** Class to hold statistics for a given collection. */
70 class Weight::Internal {
71 #ifdef XAPIAN_ASSERTIONS
72 /** Number of sub-databases. */
73 size_t subdbs = 0;
75 /** True if we've finalised the stats.
77 * Used for assertions.
79 mutable bool finalised = false;
80 #endif
82 public:
83 /** Total length of all documents in the collection. */
84 Xapian::totallength total_length = 0;
86 /** Number of documents in the collection. */
87 Xapian::doccount collection_size = 0;
89 /** Number of relevant documents in the collection. */
90 Xapian::doccount rset_size = 0;
92 /** Has max_part been set for any term?
94 * If not, we can avoid having to serialise max_part.
96 bool have_max_part = false;
98 /** Database to get the bounds on doclength and wdf from. */
99 Xapian::Database db;
101 /** The query. */
102 Xapian::Query query;
104 /** Map of term frequencies and relevant term frequencies for the
105 * collection. */
106 std::map<std::string, TermFreqs> termfreqs;
108 Internal() { }
110 /** Add in the supplied statistics from a sub-database.
112 * Used for remote databases, where we pass across a serialised stats
113 * object, unserialise it, and add it to our total.
115 Internal & operator+=(const Internal & inc);
117 void merge(const Weight::Internal& o);
119 void set_query(const Xapian::Query &query_) {
120 AssertEq(subdbs, 0);
121 query = query_;
124 /// Accumulate the rtermfreqs for terms in the query.
125 void accumulate_stats(const Xapian::Database::Internal &sub_db,
126 const Xapian::RSet &rset);
128 /** Get the frequencies for the given term.
130 * termfreq is "n_t", the number of documents in the collection indexed by
131 * the given term.
133 * reltermfreq is "r_t", the number of relevant documents in the
134 * collection indexed by the given term.
136 * collfreq is the total number of occurrences of the term in all
137 * documents.
139 bool get_stats(const std::string & term,
140 Xapian::doccount & termfreq,
141 Xapian::doccount & reltermfreq,
142 Xapian::termcount & collfreq) const {
143 #ifdef XAPIAN_ASSERTIONS
144 finalised = true;
145 #endif
146 // We pass an empty std::string for term when calculating the extra
147 // weight.
148 if (term.empty()) {
149 termfreq = collection_size;
150 collfreq = collection_size;
151 reltermfreq = rset_size;
152 return true;
155 auto i = termfreqs.find(term);
156 if (i == termfreqs.end()) {
157 termfreq = reltermfreq = collfreq = 0;
158 return false;
161 termfreq = i->second.termfreq;
162 reltermfreq = i->second.reltermfreq;
163 collfreq = i->second.collfreq;
164 return true;
167 /// Get just the termfreq.
168 bool get_stats(const std::string & term,
169 Xapian::doccount & termfreq) const {
170 Xapian::doccount dummy1;
171 Xapian::termcount dummy2;
172 return get_stats(term, termfreq, dummy1, dummy2);
175 /// Get the termweight.
176 bool get_termweight(const std::string & term, double & termweight) const {
177 #ifdef XAPIAN_ASSERTIONS
178 finalised = true;
179 #endif
180 termweight = 0.0;
181 if (term.empty()) {
182 return false;
185 auto i = termfreqs.find(term);
186 if (i == termfreqs.end()) {
187 return false;
190 termweight = i->second.max_part;
191 return true;
194 /** Get the minimum and maximum termweights.
196 * Used by the snippet code.
198 void get_max_termweight(double & min_tw, double & max_tw) {
199 auto i = termfreqs.begin();
200 while (i != termfreqs.end() && i->second.max_part == 0.0) ++i;
201 if (rare(i == termfreqs.end())) {
202 min_tw = max_tw = 0.0;
203 return;
205 min_tw = max_tw = i->second.max_part;
206 while (++i != termfreqs.end()) {
207 double max_part = i->second.max_part;
208 if (max_part > max_tw) {
209 max_tw = max_part;
210 } else if (max_part < min_tw && max_part != 0.0) {
211 min_tw = max_part;
216 /// Set max_part for a term.
217 void set_max_part(const std::string & term, double max_part) {
218 have_max_part = true;
219 Assert(!term.empty());
220 auto i = termfreqs.find(term);
221 if (i != termfreqs.end())
222 i->second.max_part += max_part;
225 Xapian::doclength get_average_length() const {
226 #ifdef XAPIAN_ASSERTIONS
227 finalised = true;
228 #endif
229 if (rare(collection_size == 0)) return 0;
230 return Xapian::doclength(total_length) / collection_size;
233 /** Set the "bounds" stats from Database @a db. */
234 void set_bounds_from_db(const Xapian::Database &db_) {
235 Assert(!finalised);
236 db = db_;
239 /// Return a std::string describing this object.
240 std::string get_description() const;
245 #endif // XAPIAN_INCLUDED_WEIGHTINTERNAL_H