[ci] Fix clang-santisers job for GHA change
[xapian.git] / xapian-core / api / postingsource.cc
blobf3a529504f56bc62bf0784243b530e46c0d284f0
1 /** @file
2 * @brief External sources of posting information
3 */
4 /* Copyright (C) 2008-2024 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <config.h>
25 #include "xapian/postingsource.h"
27 #include "backends/databaseinternal.h"
28 #include "backends/documentinternal.h"
29 #include "matcher/postlisttree.h"
31 #include "xapian/document.h"
32 #include "xapian/error.h"
33 #include "xapian/queryparser.h" // For sortable_unserialise().
35 #include "omassert.h"
36 #include "pack.h"
37 #include "serialise-double.h"
38 #include "str.h"
40 #include <cfloat>
41 #include <memory>
43 using namespace std;
45 namespace Xapian {
47 PostingSource::~PostingSource() { }
49 double
50 PostingSource::get_weight() const
52 return 0;
55 void
56 PostingSource::skip_to(Xapian::docid did, double min_wt)
58 while (!at_end() && get_docid() < did) {
59 next(min_wt);
63 bool
64 PostingSource::check(Xapian::docid did, double min_wt)
66 skip_to(did, min_wt);
67 return true;
70 PostingSource *
71 PostingSource::clone() const
73 return NULL;
76 string
77 PostingSource::name() const
79 return string();
82 string
83 PostingSource::serialise() const
85 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
88 PostingSource *
89 PostingSource::unserialise(const string &) const
91 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
94 PostingSource *
95 PostingSource::unserialise_with_registry(const std::string &s,
96 const Registry &) const
98 return unserialise(s);
101 void
102 PostingSource::reset(const Database& db, Xapian::doccount)
104 init(db);
107 void
108 PostingSource::init(const Database&)
110 const char* msg = "Either PostingSource::reset() or PostingSource::init() "
111 "must be overridden";
112 throw Xapian::InvalidOperationError(msg);
115 string
116 PostingSource::get_description() const
118 return "Xapian::PostingSource subclass";
121 Xapian::doccount
122 ValuePostingSource::get_termfreq_min() const
124 return termfreq_min;
127 Xapian::doccount
128 ValuePostingSource::get_termfreq_est() const
130 return termfreq_est;
133 Xapian::doccount
134 ValuePostingSource::get_termfreq_max() const
136 return termfreq_max;
139 void
140 ValuePostingSource::next(double min_wt)
142 if (!started) {
143 started = true;
144 value_it = db.valuestream_begin(slot);
145 } else {
146 ++value_it;
149 if (value_it == db.valuestream_end(slot)) return;
151 if (min_wt > get_maxweight()) {
152 value_it = db.valuestream_end(slot);
153 return;
157 void
158 ValuePostingSource::skip_to(Xapian::docid min_docid, double min_wt)
160 if (!started) {
161 started = true;
162 value_it = db.valuestream_begin(slot);
164 if (value_it == db.valuestream_end(slot)) return;
167 if (min_wt > get_maxweight()) {
168 value_it = db.valuestream_end(slot);
169 return;
171 value_it.skip_to(min_docid);
174 bool
175 ValuePostingSource::check(Xapian::docid min_docid, double min_wt)
177 if (!started) {
178 started = true;
179 value_it = db.valuestream_begin(slot);
181 if (value_it == db.valuestream_end(slot)) return true;
184 if (min_wt > get_maxweight()) {
185 value_it = db.valuestream_end(slot);
186 return true;
188 return value_it.check(min_docid);
191 bool
192 ValuePostingSource::at_end() const
194 return started && value_it == db.valuestream_end(slot);
197 Xapian::docid
198 ValuePostingSource::get_docid() const
200 return value_it.get_docid();
203 void
204 ValuePostingSource::reset(const Database& db_, Xapian::doccount)
206 db = db_;
207 started = false;
208 set_maxweight(DBL_MAX);
209 termfreq_max = db.get_value_freq(slot);
210 termfreq_est = termfreq_max;
211 termfreq_min = termfreq_max;
214 string
215 ValuePostingSource::get_description() const
217 string desc("Xapian::ValuePostingSource(slot=");
218 desc += str(get_slot());
219 desc += ")";
220 return desc;
224 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_)
225 : ValuePostingSource(slot_)
229 double
230 ValueWeightPostingSource::get_weight() const
232 Assert(!at_end());
233 Assert(get_started());
234 return sortable_unserialise(get_value());
237 ValueWeightPostingSource *
238 ValueWeightPostingSource::clone() const
240 return new ValueWeightPostingSource(get_slot());
243 string
244 ValueWeightPostingSource::name() const
246 return string("Xapian::ValueWeightPostingSource");
249 string
250 ValueWeightPostingSource::serialise() const
252 string result;
253 pack_uint_last(result, get_slot());
254 return result;
257 ValueWeightPostingSource *
258 ValueWeightPostingSource::unserialise(const string &s) const
260 const char * p = s.data();
261 const char * end = p + s.size();
263 Xapian::valueno new_slot;
264 if (!unpack_uint_last(&p, end, &new_slot)) {
265 unpack_throw_serialisation_error(p);
268 return new ValueWeightPostingSource(new_slot);
271 void
272 ValueWeightPostingSource::reset(const Database& db_,
273 Xapian::doccount shard_index)
275 ValuePostingSource::reset(db_, shard_index);
277 string upper_bound = get_database().get_value_upper_bound(get_slot());
278 if (upper_bound.empty()) {
279 // This should only happen if there are no entries, in which case the
280 // maxweight is 0.
281 set_maxweight(0.0);
282 } else {
283 set_maxweight(sortable_unserialise(upper_bound));
287 string
288 ValueWeightPostingSource::get_description() const
290 string desc("Xapian::ValueWeightPostingSource(slot=");
291 desc += str(get_slot());
292 desc += ")";
293 return desc;
297 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_)
298 : ValuePostingSource(slot_),
299 default_weight(0.0),
300 max_weight_in_map(0.0)
304 void
305 ValueMapPostingSource::add_mapping(const string & key, double wt)
307 weight_map[key] = wt;
308 max_weight_in_map = max(wt, max_weight_in_map);
311 void
312 ValueMapPostingSource::clear_mappings()
314 weight_map.clear();
315 max_weight_in_map = 0.0;
318 void
319 ValueMapPostingSource::set_default_weight(double wt)
321 default_weight = wt;
324 double
325 ValueMapPostingSource::get_weight() const
327 map<string, double>::const_iterator wit = weight_map.find(get_value());
328 if (wit == weight_map.end()) {
329 return default_weight;
331 return wit->second;
334 ValueMapPostingSource *
335 ValueMapPostingSource::clone() const
337 unique_ptr<ValueMapPostingSource> res(
338 new ValueMapPostingSource(get_slot()));
339 map<string, double>::const_iterator i;
340 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
341 res->add_mapping(i->first, i->second);
343 res->set_default_weight(default_weight);
344 return res.release();
347 string
348 ValueMapPostingSource::name() const
350 return string("Xapian::ValueMapPostingSource");
353 string
354 ValueMapPostingSource::serialise() const
356 string result;
357 pack_uint(result, get_slot());
358 result += serialise_double(default_weight);
360 map<string, double>::const_iterator i;
361 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
362 pack_string(result, i->first);
363 result.append(serialise_double(i->second));
366 return result;
369 ValueMapPostingSource *
370 ValueMapPostingSource::unserialise(const string &s) const
372 const char * p = s.data();
373 const char * end = p + s.size();
375 Xapian::valueno new_slot;
376 if (!unpack_uint(&p, end, &new_slot)) {
377 unpack_throw_serialisation_error(p);
379 unique_ptr<ValueMapPostingSource> res(new ValueMapPostingSource(new_slot));
380 res->set_default_weight(unserialise_double(&p, end));
381 while (p != end) {
382 string key;
383 if (!unpack_string(&p, end, key)) {
384 unpack_throw_serialisation_error(p);
386 res->add_mapping(key, unserialise_double(&p, end));
388 return res.release();
391 void
392 ValueMapPostingSource::reset(const Database& db_, Xapian::doccount shard_index)
394 ValuePostingSource::reset(db_, shard_index);
395 set_maxweight(max(max_weight_in_map, default_weight));
398 string
399 ValueMapPostingSource::get_description() const
401 string desc("Xapian::ValueMapPostingSource(slot=");
402 desc += str(get_slot());
403 desc += ")";
404 return desc;
407 FixedWeightPostingSource::FixedWeightPostingSource(double wt)
408 : started(false)
410 // The weight is fixed at wt, so that's the maxweight too. So just store wt
411 // as the maxweight and we can read it from there when we need it.
412 set_maxweight(wt);
415 Xapian::doccount
416 FixedWeightPostingSource::get_termfreq_min() const
418 return termfreq;
421 Xapian::doccount
422 FixedWeightPostingSource::get_termfreq_est() const
424 return termfreq;
427 Xapian::doccount
428 FixedWeightPostingSource::get_termfreq_max() const
430 return termfreq;
433 double
434 FixedWeightPostingSource::get_weight() const
436 return get_maxweight();
439 void
440 FixedWeightPostingSource::next(double min_wt)
442 if (!started) {
443 started = true;
444 it = db.postlist_begin(string());
445 } else {
446 ++it;
449 if (it == db.postlist_end(string())) return;
451 if (check_docid) {
452 it.skip_to(check_docid + 1);
453 check_docid = 0;
456 if (min_wt > get_maxweight()) {
457 it = db.postlist_end(string());
461 void
462 FixedWeightPostingSource::skip_to(Xapian::docid min_docid, double min_wt)
464 if (!started) {
465 started = true;
466 it = db.postlist_begin(string());
468 if (it == db.postlist_end(string())) return;
471 if (check_docid) {
472 if (min_docid < check_docid)
473 min_docid = check_docid + 1;
474 check_docid = 0;
477 if (min_wt > get_maxweight()) {
478 it = db.postlist_end(string());
479 return;
481 it.skip_to(min_docid);
484 bool
485 FixedWeightPostingSource::check(Xapian::docid min_docid, double)
487 // We're guaranteed not to be called if the document doesn't
488 // exist, so just remember the docid passed, and return true.
489 check_docid = min_docid;
490 return true;
493 bool
494 FixedWeightPostingSource::at_end() const
496 if (check_docid != 0) return false;
497 return started && it == db.postlist_end(string());
500 Xapian::docid
501 FixedWeightPostingSource::get_docid() const
503 if (check_docid != 0) return check_docid;
504 return *it;
507 FixedWeightPostingSource *
508 FixedWeightPostingSource::clone() const
510 return new FixedWeightPostingSource(get_maxweight());
513 string
514 FixedWeightPostingSource::name() const
516 return string("Xapian::FixedWeightPostingSource");
519 string
520 FixedWeightPostingSource::serialise() const
522 return serialise_double(get_maxweight());
525 FixedWeightPostingSource *
526 FixedWeightPostingSource::unserialise(const string &s) const
528 const char * p = s.data();
529 const char * s_end = p + s.size();
530 double new_wt = unserialise_double(&p, s_end);
531 if (p != s_end) {
532 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
534 return new FixedWeightPostingSource(new_wt);
537 void
538 FixedWeightPostingSource::reset(const Xapian::Database& db_, Xapian::doccount)
540 db = db_;
541 termfreq = db_.get_doccount();
542 started = false;
543 check_docid = 0;
546 string
547 FixedWeightPostingSource::get_description() const
549 string desc("Xapian::FixedWeightPostingSource(wt=");
550 desc += str(get_maxweight());
551 desc += ")";
552 return desc;