Revert "Fix integer type used by ESet"
[xapian.git] / xapian-core / api / postingsource.cc
blob99af1660c85aac031b4bc750cf35940992952bdf
1 /** @file
2 * @brief External sources of posting information
3 */
4 /* Copyright (C) 2008-2022 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <config.h>
25 // We need to be able to set deprecated members of ValuePostingSource.
26 #define XAPIAN_DEPRECATED(X) X
27 #include "xapian/postingsource.h"
29 #include "autoptr.h"
31 #include "backends/database.h"
32 #include "backends/document.h"
33 #include "matcher/multimatch.h"
35 #include "xapian/document.h"
36 #include "xapian/error.h"
37 #include "xapian/queryparser.h" // For sortable_unserialise().
39 #include "omassert.h"
40 #include "net/length.h"
41 #include "serialise-double.h"
42 #include "str.h"
44 #include <cfloat>
46 using namespace std;
48 namespace Xapian {
50 PostingSource::~PostingSource() { }
52 void
53 PostingSource::set_maxweight(double max_weight)
55 if (usual(matcher_)) {
56 MultiMatch * multimatch = static_cast<MultiMatch*>(matcher_);
57 multimatch->recalc_maxweight();
59 max_weight_ = max_weight;
62 double
63 PostingSource::get_weight() const
65 return 0;
68 void
69 PostingSource::skip_to(Xapian::docid did, double min_wt)
71 while (!at_end() && get_docid() < did) {
72 next(min_wt);
76 bool
77 PostingSource::check(Xapian::docid did, double min_wt)
79 skip_to(did, min_wt);
80 return true;
83 PostingSource *
84 PostingSource::clone() const
86 return NULL;
89 string
90 PostingSource::name() const
92 return string();
95 string
96 PostingSource::serialise() const
98 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
101 PostingSource *
102 PostingSource::unserialise(const string &) const
104 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
107 PostingSource *
108 PostingSource::unserialise_with_registry(const std::string &s,
109 const Registry &) const
111 return unserialise(s);
114 string
115 PostingSource::get_description() const
117 return "Xapian::PostingSource subclass";
121 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_)
122 : real_slot(slot_),
123 db(real_db),
124 slot(real_slot),
125 value_it(real_value_it),
126 started(real_started),
127 termfreq_min(real_termfreq_min),
128 termfreq_est(real_termfreq_est),
129 termfreq_max(real_termfreq_max)
133 Xapian::doccount
134 ValuePostingSource::get_termfreq_min() const
136 return real_termfreq_min;
139 Xapian::doccount
140 ValuePostingSource::get_termfreq_est() const
142 return real_termfreq_est;
145 Xapian::doccount
146 ValuePostingSource::get_termfreq_max() const
148 return real_termfreq_max;
151 void
152 ValuePostingSource::next(double min_wt)
154 if (!real_started) {
155 real_started = true;
156 real_value_it = real_db.valuestream_begin(real_slot);
157 } else {
158 ++real_value_it;
161 if (real_value_it == real_db.valuestream_end(real_slot)) return;
163 if (min_wt > get_maxweight()) {
164 real_value_it = real_db.valuestream_end(real_slot);
165 return;
169 void
170 ValuePostingSource::skip_to(Xapian::docid min_docid, double min_wt)
172 if (!real_started) {
173 real_started = true;
174 real_value_it = real_db.valuestream_begin(real_slot);
176 if (real_value_it == real_db.valuestream_end(real_slot)) return;
179 if (min_wt > get_maxweight()) {
180 real_value_it = real_db.valuestream_end(real_slot);
181 return;
183 real_value_it.skip_to(min_docid);
186 bool
187 ValuePostingSource::check(Xapian::docid min_docid, double min_wt)
189 if (!real_started) {
190 real_started = true;
191 real_value_it = real_db.valuestream_begin(real_slot);
193 if (real_value_it == real_db.valuestream_end(real_slot)) return true;
196 if (min_wt > get_maxweight()) {
197 real_value_it = real_db.valuestream_end(real_slot);
198 return true;
200 return real_value_it.check(min_docid);
203 bool
204 ValuePostingSource::at_end() const
206 return real_started && real_value_it == real_db.valuestream_end(real_slot);
209 Xapian::docid
210 ValuePostingSource::get_docid() const
212 return real_value_it.get_docid();
215 void
216 ValuePostingSource::init(const Database & db_)
218 real_db = db_;
219 real_started = false;
220 set_maxweight(DBL_MAX);
221 real_termfreq_max = real_db.get_value_freq(real_slot);
222 real_termfreq_est = real_termfreq_max;
223 real_termfreq_min = real_termfreq_max;
227 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_)
228 : ValuePostingSource(slot_)
232 double
233 ValueWeightPostingSource::get_weight() const
235 Assert(!at_end());
236 Assert(get_started());
237 return sortable_unserialise(get_value());
240 ValueWeightPostingSource *
241 ValueWeightPostingSource::clone() const
243 return new ValueWeightPostingSource(get_slot());
246 string
247 ValueWeightPostingSource::name() const
249 return string("Xapian::ValueWeightPostingSource");
252 string
253 ValueWeightPostingSource::serialise() const
255 return encode_length(get_slot());
258 ValueWeightPostingSource *
259 ValueWeightPostingSource::unserialise(const string &s) const
261 const char * p = s.data();
262 const char * end = p + s.size();
264 Xapian::valueno new_slot;
265 decode_length(&p, end, new_slot);
266 if (p != end) {
267 throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
270 return new ValueWeightPostingSource(new_slot);
273 void
274 ValueWeightPostingSource::init(const Database & db_)
276 ValuePostingSource::init(db_);
278 string upper_bound = get_database().get_value_upper_bound(get_slot());
279 if (upper_bound.empty()) {
280 // This should only happen if there are no entries, in which case the
281 // maxweight is 0.
282 set_maxweight(0.0);
283 } else {
284 set_maxweight(sortable_unserialise(upper_bound));
288 string
289 ValueWeightPostingSource::get_description() const
291 string desc("Xapian::ValueWeightPostingSource(slot=");
292 desc += str(get_slot());
293 desc += ")";
294 return desc;
298 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_)
299 : ValuePostingSource(slot_),
300 default_weight(0.0),
301 max_weight_in_map(0.0)
305 void
306 ValueMapPostingSource::add_mapping(const string & key, double wt)
308 weight_map[key] = wt;
309 max_weight_in_map = max(wt, max_weight_in_map);
312 void
313 ValueMapPostingSource::clear_mappings()
315 weight_map.clear();
316 max_weight_in_map = 0.0;
319 void
320 ValueMapPostingSource::set_default_weight(double wt)
322 default_weight = wt;
325 double
326 ValueMapPostingSource::get_weight() const
328 map<string, double>::const_iterator wit = weight_map.find(get_value());
329 if (wit == weight_map.end()) {
330 return default_weight;
332 return wit->second;
335 ValueMapPostingSource *
336 ValueMapPostingSource::clone() const
338 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(get_slot()));
339 map<string, double>::const_iterator i;
340 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
341 res->add_mapping(i->first, i->second);
343 res->set_default_weight(default_weight);
344 return res.release();
347 string
348 ValueMapPostingSource::name() const
350 return string("Xapian::ValueMapPostingSource");
353 string
354 ValueMapPostingSource::serialise() const
356 string result = encode_length(get_slot());
357 result += serialise_double(default_weight);
359 map<string, double>::const_iterator i;
360 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
361 result.append(encode_length(i->first.size()));
362 result.append(i->first);
363 result.append(serialise_double(i->second));
366 return result;
369 ValueMapPostingSource *
370 ValueMapPostingSource::unserialise(const string &s) const
372 const char * p = s.data();
373 const char * end = p + s.size();
375 Xapian::valueno new_slot;
376 decode_length(&p, end, new_slot);
377 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(new_slot));
378 res->set_default_weight(unserialise_double(&p, end));
379 while (p != end) {
380 size_t keylen;
381 decode_length_and_check(&p, end, keylen);
382 string key(p, keylen);
383 p += keylen;
384 res->add_mapping(key, unserialise_double(&p, end));
386 return res.release();
389 void
390 ValueMapPostingSource::init(const Database & db_)
392 ValuePostingSource::init(db_);
393 set_maxweight(max(max_weight_in_map, default_weight));
396 string
397 ValueMapPostingSource::get_description() const
399 string desc("Xapian::ValueMapPostingSource(slot=");
400 desc += str(get_slot());
401 desc += ")";
402 return desc;
405 FixedWeightPostingSource::FixedWeightPostingSource(double wt)
406 : started(false)
408 // The weight is fixed at wt, so that's the maxweight too. So just store wt
409 // as the maxweight and we can read it from there when we need it.
410 set_maxweight(wt);
413 Xapian::doccount
414 FixedWeightPostingSource::get_termfreq_min() const
416 return termfreq;
419 Xapian::doccount
420 FixedWeightPostingSource::get_termfreq_est() const
422 return termfreq;
425 Xapian::doccount
426 FixedWeightPostingSource::get_termfreq_max() const
428 return termfreq;
431 double
432 FixedWeightPostingSource::get_weight() const
434 return get_maxweight();
437 void
438 FixedWeightPostingSource::next(double min_wt)
440 if (!started) {
441 started = true;
442 it = db.postlist_begin(string());
443 } else {
444 ++it;
447 if (it == db.postlist_end(string())) return;
449 if (check_docid) {
450 it.skip_to(check_docid + 1);
451 check_docid = 0;
454 if (min_wt > get_maxweight()) {
455 it = db.postlist_end(string());
459 void
460 FixedWeightPostingSource::skip_to(Xapian::docid min_docid, double min_wt)
462 if (!started) {
463 started = true;
464 it = db.postlist_begin(string());
466 if (it == db.postlist_end(string())) return;
469 if (check_docid) {
470 if (min_docid < check_docid)
471 min_docid = check_docid + 1;
472 check_docid = 0;
475 if (min_wt > get_maxweight()) {
476 it = db.postlist_end(string());
477 return;
479 it.skip_to(min_docid);
482 bool
483 FixedWeightPostingSource::check(Xapian::docid min_docid, double)
485 // We're guaranteed not to be called if the document doesn't
486 // exist, so just remember the docid passed, and return true.
487 check_docid = min_docid;
488 return true;
491 bool
492 FixedWeightPostingSource::at_end() const
494 if (check_docid != 0) return false;
495 return started && it == db.postlist_end(string());
498 Xapian::docid
499 FixedWeightPostingSource::get_docid() const
501 if (check_docid != 0) return check_docid;
502 return *it;
505 FixedWeightPostingSource *
506 FixedWeightPostingSource::clone() const
508 return new FixedWeightPostingSource(get_maxweight());
511 string
512 FixedWeightPostingSource::name() const
514 return string("Xapian::FixedWeightPostingSource");
517 string
518 FixedWeightPostingSource::serialise() const
520 return serialise_double(get_maxweight());
523 FixedWeightPostingSource *
524 FixedWeightPostingSource::unserialise(const string &s) const
526 const char * p = s.data();
527 const char * s_end = p + s.size();
528 double new_wt = unserialise_double(&p, s_end);
529 if (p != s_end) {
530 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
532 return new FixedWeightPostingSource(new_wt);
535 void
536 FixedWeightPostingSource::init(const Xapian::Database & db_)
538 db = db_;
539 termfreq = db_.get_doccount();
540 started = false;
541 check_docid = 0;
544 string
545 FixedWeightPostingSource::get_description() const
547 string desc("Xapian::FixedWeightPostingSource(wt=");
548 desc += str(get_maxweight());
549 desc += ")";
550 return desc;