2 * @brief External sources of posting information
4 /* Copyright (C) 2008-2022 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 // We need to be able to set deprecated members of ValuePostingSource.
26 #define XAPIAN_DEPRECATED(X) X
27 #include "xapian/postingsource.h"
31 #include "backends/database.h"
32 #include "backends/document.h"
33 #include "matcher/multimatch.h"
35 #include "xapian/document.h"
36 #include "xapian/error.h"
37 #include "xapian/queryparser.h" // For sortable_unserialise().
40 #include "net/length.h"
41 #include "serialise-double.h"
50 PostingSource::~PostingSource() { }
53 PostingSource::set_maxweight(double max_weight
)
55 if (usual(matcher_
)) {
56 MultiMatch
* multimatch
= static_cast<MultiMatch
*>(matcher_
);
57 multimatch
->recalc_maxweight();
59 max_weight_
= max_weight
;
63 PostingSource::get_weight() const
69 PostingSource::skip_to(Xapian::docid did
, double min_wt
)
71 while (!at_end() && get_docid() < did
) {
77 PostingSource::check(Xapian::docid did
, double min_wt
)
84 PostingSource::clone() const
90 PostingSource::name() const
96 PostingSource::serialise() const
98 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
102 PostingSource::unserialise(const string
&) const
104 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
108 PostingSource::unserialise_with_registry(const std::string
&s
,
109 const Registry
&) const
111 return unserialise(s
);
115 PostingSource::get_description() const
117 return "Xapian::PostingSource subclass";
121 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_
)
125 value_it(real_value_it
),
126 started(real_started
),
127 termfreq_min(real_termfreq_min
),
128 termfreq_est(real_termfreq_est
),
129 termfreq_max(real_termfreq_max
)
134 ValuePostingSource::get_termfreq_min() const
136 return real_termfreq_min
;
140 ValuePostingSource::get_termfreq_est() const
142 return real_termfreq_est
;
146 ValuePostingSource::get_termfreq_max() const
148 return real_termfreq_max
;
152 ValuePostingSource::next(double min_wt
)
156 real_value_it
= real_db
.valuestream_begin(real_slot
);
161 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return;
163 if (min_wt
> get_maxweight()) {
164 real_value_it
= real_db
.valuestream_end(real_slot
);
170 ValuePostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
174 real_value_it
= real_db
.valuestream_begin(real_slot
);
176 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return;
179 if (min_wt
> get_maxweight()) {
180 real_value_it
= real_db
.valuestream_end(real_slot
);
183 real_value_it
.skip_to(min_docid
);
187 ValuePostingSource::check(Xapian::docid min_docid
, double min_wt
)
191 real_value_it
= real_db
.valuestream_begin(real_slot
);
193 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return true;
196 if (min_wt
> get_maxweight()) {
197 real_value_it
= real_db
.valuestream_end(real_slot
);
200 return real_value_it
.check(min_docid
);
204 ValuePostingSource::at_end() const
206 return real_started
&& real_value_it
== real_db
.valuestream_end(real_slot
);
210 ValuePostingSource::get_docid() const
212 return real_value_it
.get_docid();
216 ValuePostingSource::init(const Database
& db_
)
219 real_started
= false;
220 set_maxweight(DBL_MAX
);
221 real_termfreq_max
= real_db
.get_value_freq(real_slot
);
222 real_termfreq_est
= real_termfreq_max
;
223 real_termfreq_min
= real_termfreq_max
;
227 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_
)
228 : ValuePostingSource(slot_
)
233 ValueWeightPostingSource::get_weight() const
236 Assert(get_started());
237 return sortable_unserialise(get_value());
240 ValueWeightPostingSource
*
241 ValueWeightPostingSource::clone() const
243 return new ValueWeightPostingSource(get_slot());
247 ValueWeightPostingSource::name() const
249 return string("Xapian::ValueWeightPostingSource");
253 ValueWeightPostingSource::serialise() const
255 return encode_length(get_slot());
258 ValueWeightPostingSource
*
259 ValueWeightPostingSource::unserialise(const string
&s
) const
261 const char * p
= s
.data();
262 const char * end
= p
+ s
.size();
264 Xapian::valueno new_slot
;
265 decode_length(&p
, end
, new_slot
);
267 throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
270 return new ValueWeightPostingSource(new_slot
);
274 ValueWeightPostingSource::init(const Database
& db_
)
276 ValuePostingSource::init(db_
);
278 string upper_bound
= get_database().get_value_upper_bound(get_slot());
279 if (upper_bound
.empty()) {
280 // This should only happen if there are no entries, in which case the
284 set_maxweight(sortable_unserialise(upper_bound
));
289 ValueWeightPostingSource::get_description() const
291 string
desc("Xapian::ValueWeightPostingSource(slot=");
292 desc
+= str(get_slot());
298 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_
)
299 : ValuePostingSource(slot_
),
301 max_weight_in_map(0.0)
306 ValueMapPostingSource::add_mapping(const string
& key
, double wt
)
308 weight_map
[key
] = wt
;
309 max_weight_in_map
= max(wt
, max_weight_in_map
);
313 ValueMapPostingSource::clear_mappings()
316 max_weight_in_map
= 0.0;
320 ValueMapPostingSource::set_default_weight(double wt
)
326 ValueMapPostingSource::get_weight() const
328 map
<string
, double>::const_iterator wit
= weight_map
.find(get_value());
329 if (wit
== weight_map
.end()) {
330 return default_weight
;
335 ValueMapPostingSource
*
336 ValueMapPostingSource::clone() const
338 AutoPtr
<ValueMapPostingSource
> res(new ValueMapPostingSource(get_slot()));
339 map
<string
, double>::const_iterator i
;
340 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
341 res
->add_mapping(i
->first
, i
->second
);
343 res
->set_default_weight(default_weight
);
344 return res
.release();
348 ValueMapPostingSource::name() const
350 return string("Xapian::ValueMapPostingSource");
354 ValueMapPostingSource::serialise() const
356 string result
= encode_length(get_slot());
357 result
+= serialise_double(default_weight
);
359 map
<string
, double>::const_iterator i
;
360 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
361 result
.append(encode_length(i
->first
.size()));
362 result
.append(i
->first
);
363 result
.append(serialise_double(i
->second
));
369 ValueMapPostingSource
*
370 ValueMapPostingSource::unserialise(const string
&s
) const
372 const char * p
= s
.data();
373 const char * end
= p
+ s
.size();
375 Xapian::valueno new_slot
;
376 decode_length(&p
, end
, new_slot
);
377 AutoPtr
<ValueMapPostingSource
> res(new ValueMapPostingSource(new_slot
));
378 res
->set_default_weight(unserialise_double(&p
, end
));
381 decode_length_and_check(&p
, end
, keylen
);
382 string
key(p
, keylen
);
384 res
->add_mapping(key
, unserialise_double(&p
, end
));
386 return res
.release();
390 ValueMapPostingSource::init(const Database
& db_
)
392 ValuePostingSource::init(db_
);
393 set_maxweight(max(max_weight_in_map
, default_weight
));
397 ValueMapPostingSource::get_description() const
399 string
desc("Xapian::ValueMapPostingSource(slot=");
400 desc
+= str(get_slot());
405 FixedWeightPostingSource::FixedWeightPostingSource(double wt
)
408 // The weight is fixed at wt, so that's the maxweight too. So just store wt
409 // as the maxweight and we can read it from there when we need it.
414 FixedWeightPostingSource::get_termfreq_min() const
420 FixedWeightPostingSource::get_termfreq_est() const
426 FixedWeightPostingSource::get_termfreq_max() const
432 FixedWeightPostingSource::get_weight() const
434 return get_maxweight();
438 FixedWeightPostingSource::next(double min_wt
)
442 it
= db
.postlist_begin(string());
447 if (it
== db
.postlist_end(string())) return;
450 it
.skip_to(check_docid
+ 1);
454 if (min_wt
> get_maxweight()) {
455 it
= db
.postlist_end(string());
460 FixedWeightPostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
464 it
= db
.postlist_begin(string());
466 if (it
== db
.postlist_end(string())) return;
470 if (min_docid
< check_docid
)
471 min_docid
= check_docid
+ 1;
475 if (min_wt
> get_maxweight()) {
476 it
= db
.postlist_end(string());
479 it
.skip_to(min_docid
);
483 FixedWeightPostingSource::check(Xapian::docid min_docid
, double)
485 // We're guaranteed not to be called if the document doesn't
486 // exist, so just remember the docid passed, and return true.
487 check_docid
= min_docid
;
492 FixedWeightPostingSource::at_end() const
494 if (check_docid
!= 0) return false;
495 return started
&& it
== db
.postlist_end(string());
499 FixedWeightPostingSource::get_docid() const
501 if (check_docid
!= 0) return check_docid
;
505 FixedWeightPostingSource
*
506 FixedWeightPostingSource::clone() const
508 return new FixedWeightPostingSource(get_maxweight());
512 FixedWeightPostingSource::name() const
514 return string("Xapian::FixedWeightPostingSource");
518 FixedWeightPostingSource::serialise() const
520 return serialise_double(get_maxweight());
523 FixedWeightPostingSource
*
524 FixedWeightPostingSource::unserialise(const string
&s
) const
526 const char * p
= s
.data();
527 const char * s_end
= p
+ s
.size();
528 double new_wt
= unserialise_double(&p
, s_end
);
530 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
532 return new FixedWeightPostingSource(new_wt
);
536 FixedWeightPostingSource::init(const Xapian::Database
& db_
)
539 termfreq
= db_
.get_doccount();
545 FixedWeightPostingSource::get_description() const
547 string
desc("Xapian::FixedWeightPostingSource(wt=");
548 desc
+= str(get_maxweight());