2 * @brief Xapian::Weight base class
4 /* Copyright (C) 2007,2008,2009,2014,2017,2019,2024 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
6 * Copyright (C) 2017 Vivek Pal
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "xapian/weight.h"
27 #include "backends/leafpostlist.h"
28 #include "weightinternal.h"
33 #include "xapian/error.h"
40 Weight::init_(const Internal
& stats
, Xapian::termcount query_length
,
41 const Xapian::Database::Internal
* shard
)
43 LOGCALL_VOID(MATCH
, "Weight::init_", stats
| query_length
| shard
);
44 collection_size_
= stats
.collection_size
;
45 rset_size_
= stats
.rset_size
;
46 if (stats_needed
& AVERAGE_LENGTH
)
47 average_length_
= stats
.get_average_length();
48 if (stats_needed
& DOC_LENGTH_MAX
)
49 doclength_upper_bound_
= shard
->get_doclength_upper_bound();
50 if (stats_needed
& DOC_LENGTH_MIN
)
51 doclength_lower_bound_
= shard
->get_doclength_lower_bound();
52 if (stats_needed
& UNIQUE_TERMS_MAX
)
53 unique_terms_upper_bound_
= shard
->get_unique_terms_upper_bound();
54 if (stats_needed
& UNIQUE_TERMS_MIN
)
55 unique_terms_lower_bound_
= shard
->get_unique_terms_lower_bound();
56 if (stats_needed
& TOTAL_LENGTH
)
57 total_length_
= stats
.total_length
;
58 if (stats_needed
& DB_DOC_LENGTH_MAX
)
59 db_doclength_upper_bound_
= stats
.db_doclength_upper_bound
;
60 if (stats_needed
& DB_DOC_LENGTH_MIN
)
61 db_doclength_lower_bound_
= stats
.db_doclength_lower_bound
;
62 if (stats_needed
& DB_UNIQUE_TERMS_MAX
)
63 db_unique_terms_upper_bound_
= stats
.db_unique_terms_upper_bound
;
64 if (stats_needed
& DB_UNIQUE_TERMS_MIN
)
65 db_unique_terms_lower_bound_
= stats
.db_unique_terms_lower_bound
;
70 query_length_
= query_length
;
76 Weight::init_(const Internal
& stats
, Xapian::termcount query_length
,
77 const string
& term
, Xapian::termcount wqf
, double factor
,
78 const Xapian::Database::Internal
* shard
,
81 LOGCALL_VOID(MATCH
, "Weight::init_", stats
| query_length
| term
| wqf
| factor
| shard
| postlist_void
);
82 collection_size_
= stats
.collection_size
;
83 rset_size_
= stats
.rset_size
;
84 if (stats_needed
& AVERAGE_LENGTH
)
85 average_length_
= stats
.get_average_length();
86 if (stats_needed
& DOC_LENGTH_MAX
)
87 doclength_upper_bound_
= shard
->get_doclength_upper_bound();
88 if (stats_needed
& DOC_LENGTH_MIN
)
89 doclength_lower_bound_
= shard
->get_doclength_lower_bound();
90 if (stats_needed
& UNIQUE_TERMS_MAX
)
91 unique_terms_upper_bound_
= shard
->get_unique_terms_upper_bound();
92 if (stats_needed
& UNIQUE_TERMS_MIN
)
93 unique_terms_lower_bound_
= shard
->get_unique_terms_lower_bound();
94 if (stats_needed
& TOTAL_LENGTH
)
95 total_length_
= stats
.total_length
;
96 if (stats_needed
& WDF_MAX
) {
97 auto postlist
= static_cast<LeafPostList
*>(postlist_void
);
98 wdf_upper_bound_
= postlist
->get_wdf_upper_bound();
100 if (stats_needed
& DB_DOC_LENGTH_MAX
)
101 db_doclength_upper_bound_
= stats
.db_doclength_upper_bound
;
102 if (stats_needed
& DB_DOC_LENGTH_MIN
)
103 db_doclength_lower_bound_
= stats
.db_doclength_lower_bound
;
104 if (stats_needed
& DB_UNIQUE_TERMS_MAX
)
105 db_unique_terms_upper_bound_
= stats
.db_unique_terms_upper_bound
;
106 if (stats_needed
& DB_UNIQUE_TERMS_MIN
)
107 db_unique_terms_lower_bound_
= stats
.db_unique_terms_lower_bound
;
108 if (stats_needed
& DB_WDF_MAX
) {
109 // FIXME: Nothing uses this stat, so for now return a correct but
110 // likely fairly loose upper bound. Once we have something that
111 // wants to use this we can implement tracking a per-term wdf_max
112 // across the whole database.
113 db_wdf_upper_bound_
= stats
.db_doclength_upper_bound
;
115 if (stats_needed
& (TERMFREQ
| RELTERMFREQ
| COLLECTION_FREQ
)) {
116 bool ok
= stats
.get_stats(term
,
117 termfreq_
, reltermfreq_
, collectionfreq_
);
121 query_length_
= query_length
;
127 Weight::init_(const Internal
& stats
, Xapian::termcount query_length
,
128 double factor
, Xapian::doccount termfreq
,
129 Xapian::doccount reltermfreq
, Xapian::termcount collection_freq
,
130 const Xapian::Database::Internal
* shard
)
132 LOGCALL_VOID(MATCH
, "Weight::init_", stats
| query_length
| factor
| termfreq
| reltermfreq
| collection_freq
| shard
);
134 collection_size_
= stats
.collection_size
;
135 rset_size_
= stats
.rset_size
;
136 if (stats_needed
& AVERAGE_LENGTH
)
137 average_length_
= stats
.get_average_length();
138 if (stats_needed
& (DOC_LENGTH_MAX
| WDF_MAX
)) {
139 doclength_upper_bound_
= shard
->get_doclength_upper_bound();
140 // The doclength is an upper bound on the wdf. This is obviously true
141 // for normal terms, but SynonymPostList ensures that it is also true
142 // for synonym terms by clamping the wdf values returned to the
145 // (This clamping is only actually necessary in cases where a
146 // constituent term of the synonym is repeated.)
147 wdf_upper_bound_
= doclength_upper_bound_
;
149 if (stats_needed
& DOC_LENGTH_MIN
)
150 doclength_lower_bound_
= shard
->get_doclength_lower_bound();
151 if (stats_needed
& UNIQUE_TERMS_MAX
)
152 unique_terms_upper_bound_
= shard
->get_unique_terms_upper_bound();
153 if (stats_needed
& UNIQUE_TERMS_MIN
)
154 unique_terms_lower_bound_
= shard
->get_unique_terms_lower_bound();
155 if (stats_needed
& TOTAL_LENGTH
)
156 total_length_
= stats
.total_length
;
157 if (stats_needed
& (DB_DOC_LENGTH_MAX
| DB_WDF_MAX
)) {
158 db_doclength_upper_bound_
= stats
.db_doclength_upper_bound
;
159 // The doclength is an upper bound on the wdf. This is obviously true
160 // for normal terms, but SynonymPostList ensures that it is also true
161 // for synonym terms by clamping the wdf values returned to the
164 // (This clamping is only actually necessary in cases where a
165 // constituent term of the synonym is repeated.)
166 db_wdf_upper_bound_
= db_doclength_upper_bound_
;
168 if (stats_needed
& DB_DOC_LENGTH_MIN
)
169 db_doclength_lower_bound_
= stats
.db_doclength_lower_bound
;
170 if (stats_needed
& DB_UNIQUE_TERMS_MAX
)
171 db_unique_terms_upper_bound_
= stats
.db_unique_terms_upper_bound
;
172 if (stats_needed
& DB_UNIQUE_TERMS_MIN
)
173 db_unique_terms_lower_bound_
= stats
.db_unique_terms_lower_bound
;
175 termfreq_
= termfreq
;
176 reltermfreq_
= reltermfreq
;
177 query_length_
= query_length
;
178 collectionfreq_
= collection_freq
;
183 Weight::~Weight() { }
192 Weight::short_name() const
198 Weight::serialise() const
200 throw Xapian::UnimplementedError("serialise() not supported for this Xapian::Weight subclass");
204 Weight::unserialise(const string
&) const
206 throw Xapian::UnimplementedError("unserialise() not supported for this Xapian::Weight subclass");
210 Weight::get_sumextra(Xapian::termcount
,
212 Xapian::termcount
) const
218 Weight::get_maxextra() const
224 Weight::create(const string
& s
, const Registry
& reg
)
226 const char *p
= s
.c_str();
230 if (*p
== '\0') break;
236 return reg
.get_weighting_scheme(scheme
)->create_from_parameters(p
);
240 Weight::create_from_parameters(const char *) const
242 throw Xapian::UnimplementedError("create_from_parameters() not supported for this Xapian::Weight subclass");