2 * @brief External sources of posting information
4 /* Copyright (C) 2008-2024 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "xapian/postingsource.h"
27 #include "backends/databaseinternal.h"
28 #include "backends/documentinternal.h"
29 #include "matcher/postlisttree.h"
31 #include "xapian/document.h"
32 #include "xapian/error.h"
33 #include "xapian/queryparser.h" // For sortable_unserialise().
37 #include "serialise-double.h"
47 PostingSource::~PostingSource() { }
50 PostingSource::get_weight() const
56 PostingSource::skip_to(Xapian::docid did
, double min_wt
)
58 while (!at_end() && get_docid() < did
) {
64 PostingSource::check(Xapian::docid did
, double min_wt
)
71 PostingSource::clone() const
77 PostingSource::name() const
83 PostingSource::serialise() const
85 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
89 PostingSource::unserialise(const string
&) const
91 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
95 PostingSource::unserialise_with_registry(const std::string
&s
,
96 const Registry
&) const
98 return unserialise(s
);
102 PostingSource::reset(const Database
& db
, Xapian::doccount
)
108 PostingSource::init(const Database
&)
110 const char* msg
= "Either PostingSource::reset() or PostingSource::init() "
111 "must be overridden";
112 throw Xapian::InvalidOperationError(msg
);
116 PostingSource::get_description() const
118 return "Xapian::PostingSource subclass";
122 ValuePostingSource::get_termfreq_min() const
128 ValuePostingSource::get_termfreq_est() const
134 ValuePostingSource::get_termfreq_max() const
140 ValuePostingSource::next(double min_wt
)
144 value_it
= db
.valuestream_begin(slot
);
149 if (value_it
== db
.valuestream_end(slot
)) return;
151 if (min_wt
> get_maxweight()) {
152 value_it
= db
.valuestream_end(slot
);
158 ValuePostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
162 value_it
= db
.valuestream_begin(slot
);
164 if (value_it
== db
.valuestream_end(slot
)) return;
167 if (min_wt
> get_maxweight()) {
168 value_it
= db
.valuestream_end(slot
);
171 value_it
.skip_to(min_docid
);
175 ValuePostingSource::check(Xapian::docid min_docid
, double min_wt
)
179 value_it
= db
.valuestream_begin(slot
);
181 if (value_it
== db
.valuestream_end(slot
)) return true;
184 if (min_wt
> get_maxweight()) {
185 value_it
= db
.valuestream_end(slot
);
188 return value_it
.check(min_docid
);
192 ValuePostingSource::at_end() const
194 return started
&& value_it
== db
.valuestream_end(slot
);
198 ValuePostingSource::get_docid() const
200 return value_it
.get_docid();
204 ValuePostingSource::reset(const Database
& db_
, Xapian::doccount
)
208 set_maxweight(DBL_MAX
);
209 termfreq_max
= db
.get_value_freq(slot
);
210 termfreq_est
= termfreq_max
;
211 termfreq_min
= termfreq_max
;
215 ValuePostingSource::get_description() const
217 string
desc("Xapian::ValuePostingSource(slot=");
218 desc
+= str(get_slot());
224 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_
)
225 : ValuePostingSource(slot_
)
230 ValueWeightPostingSource::get_weight() const
233 Assert(get_started());
234 return sortable_unserialise(get_value());
237 ValueWeightPostingSource
*
238 ValueWeightPostingSource::clone() const
240 return new ValueWeightPostingSource(get_slot());
244 ValueWeightPostingSource::name() const
246 return string("Xapian::ValueWeightPostingSource");
250 ValueWeightPostingSource::serialise() const
253 pack_uint_last(result
, get_slot());
257 ValueWeightPostingSource
*
258 ValueWeightPostingSource::unserialise(const string
&s
) const
260 const char * p
= s
.data();
261 const char * end
= p
+ s
.size();
263 Xapian::valueno new_slot
;
264 if (!unpack_uint_last(&p
, end
, &new_slot
)) {
265 unpack_throw_serialisation_error(p
);
268 return new ValueWeightPostingSource(new_slot
);
272 ValueWeightPostingSource::reset(const Database
& db_
,
273 Xapian::doccount shard_index
)
275 ValuePostingSource::reset(db_
, shard_index
);
277 string upper_bound
= get_database().get_value_upper_bound(get_slot());
278 if (upper_bound
.empty()) {
279 // This should only happen if there are no entries, in which case the
283 set_maxweight(sortable_unserialise(upper_bound
));
288 ValueWeightPostingSource::get_description() const
290 string
desc("Xapian::ValueWeightPostingSource(slot=");
291 desc
+= str(get_slot());
297 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_
)
298 : ValuePostingSource(slot_
),
300 max_weight_in_map(0.0)
305 ValueMapPostingSource::add_mapping(const string
& key
, double wt
)
307 weight_map
[key
] = wt
;
308 max_weight_in_map
= max(wt
, max_weight_in_map
);
312 ValueMapPostingSource::clear_mappings()
315 max_weight_in_map
= 0.0;
319 ValueMapPostingSource::set_default_weight(double wt
)
325 ValueMapPostingSource::get_weight() const
327 map
<string
, double>::const_iterator wit
= weight_map
.find(get_value());
328 if (wit
== weight_map
.end()) {
329 return default_weight
;
334 ValueMapPostingSource
*
335 ValueMapPostingSource::clone() const
337 unique_ptr
<ValueMapPostingSource
> res(
338 new ValueMapPostingSource(get_slot()));
339 map
<string
, double>::const_iterator i
;
340 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
341 res
->add_mapping(i
->first
, i
->second
);
343 res
->set_default_weight(default_weight
);
344 return res
.release();
348 ValueMapPostingSource::name() const
350 return string("Xapian::ValueMapPostingSource");
354 ValueMapPostingSource::serialise() const
357 pack_uint(result
, get_slot());
358 result
+= serialise_double(default_weight
);
360 map
<string
, double>::const_iterator i
;
361 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
362 pack_string(result
, i
->first
);
363 result
.append(serialise_double(i
->second
));
369 ValueMapPostingSource
*
370 ValueMapPostingSource::unserialise(const string
&s
) const
372 const char * p
= s
.data();
373 const char * end
= p
+ s
.size();
375 Xapian::valueno new_slot
;
376 if (!unpack_uint(&p
, end
, &new_slot
)) {
377 unpack_throw_serialisation_error(p
);
379 unique_ptr
<ValueMapPostingSource
> res(new ValueMapPostingSource(new_slot
));
380 res
->set_default_weight(unserialise_double(&p
, end
));
383 if (!unpack_string(&p
, end
, key
)) {
384 unpack_throw_serialisation_error(p
);
386 res
->add_mapping(key
, unserialise_double(&p
, end
));
388 return res
.release();
392 ValueMapPostingSource::reset(const Database
& db_
, Xapian::doccount shard_index
)
394 ValuePostingSource::reset(db_
, shard_index
);
395 set_maxweight(max(max_weight_in_map
, default_weight
));
399 ValueMapPostingSource::get_description() const
401 string
desc("Xapian::ValueMapPostingSource(slot=");
402 desc
+= str(get_slot());
407 FixedWeightPostingSource::FixedWeightPostingSource(double wt
)
410 // The weight is fixed at wt, so that's the maxweight too. So just store wt
411 // as the maxweight and we can read it from there when we need it.
416 FixedWeightPostingSource::get_termfreq_min() const
422 FixedWeightPostingSource::get_termfreq_est() const
428 FixedWeightPostingSource::get_termfreq_max() const
434 FixedWeightPostingSource::get_weight() const
436 return get_maxweight();
440 FixedWeightPostingSource::next(double min_wt
)
444 it
= db
.postlist_begin(string());
449 if (it
== db
.postlist_end(string())) return;
452 it
.skip_to(check_docid
+ 1);
456 if (min_wt
> get_maxweight()) {
457 it
= db
.postlist_end(string());
462 FixedWeightPostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
466 it
= db
.postlist_begin(string());
468 if (it
== db
.postlist_end(string())) return;
472 if (min_docid
< check_docid
)
473 min_docid
= check_docid
+ 1;
477 if (min_wt
> get_maxweight()) {
478 it
= db
.postlist_end(string());
481 it
.skip_to(min_docid
);
485 FixedWeightPostingSource::check(Xapian::docid min_docid
, double)
487 // We're guaranteed not to be called if the document doesn't
488 // exist, so just remember the docid passed, and return true.
489 check_docid
= min_docid
;
494 FixedWeightPostingSource::at_end() const
496 if (check_docid
!= 0) return false;
497 return started
&& it
== db
.postlist_end(string());
501 FixedWeightPostingSource::get_docid() const
503 if (check_docid
!= 0) return check_docid
;
507 FixedWeightPostingSource
*
508 FixedWeightPostingSource::clone() const
510 return new FixedWeightPostingSource(get_maxweight());
514 FixedWeightPostingSource::name() const
516 return string("Xapian::FixedWeightPostingSource");
520 FixedWeightPostingSource::serialise() const
522 return serialise_double(get_maxweight());
525 FixedWeightPostingSource
*
526 FixedWeightPostingSource::unserialise(const string
&s
) const
528 const char * p
= s
.data();
529 const char * s_end
= p
+ s
.size();
530 double new_wt
= unserialise_double(&p
, s_end
);
532 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
534 return new FixedWeightPostingSource(new_wt
);
538 FixedWeightPostingSource::reset(const Xapian::Database
& db_
, Xapian::doccount
)
541 termfreq
= db_
.get_doccount();
547 FixedWeightPostingSource::get_description() const
549 string
desc("Xapian::FixedWeightPostingSource(wt=");
550 desc
+= str(get_maxweight());