2 * @brief Xapian::IfB2Weight class - the IfB2 weighting scheme of the DFR framework.
4 /* Copyright (C) 2013,2014 Aarsh Shah
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "xapian/weight.h"
24 #include "common/log2.h"
26 #include "serialise-double.h"
28 #include "xapian/error.h"
34 IfB2Weight::IfB2Weight(double c
)
38 throw Xapian::InvalidArgumentError("Parameter c is invalid");
39 need_stat(AVERAGE_LENGTH
);
40 need_stat(DOC_LENGTH
);
41 need_stat(DOC_LENGTH_MIN
);
42 need_stat(COLLECTION_SIZE
);
43 need_stat(COLLECTION_FREQ
);
51 IfB2Weight::clone() const
53 return new IfB2Weight(param_c
);
57 IfB2Weight::init(double factor
)
60 // This object is for the term-independent contribution, and that's
61 // always zero for this scheme.
65 double wdfn_upper
= get_wdf_upper_bound();
66 if (wdfn_upper
== 0) {
71 double F
= get_collection_freq();
72 double N
= get_collection_size();
74 wdfn_upper
*= log2(1 + (param_c
* get_average_length()) /
75 get_doclength_lower_bound());
77 // This term is constant for all documents.
78 double idf_max
= log2((N
+ 1.0) / (F
+ 0.5));
80 /* Calculate constant values to be used in get_sumpart(). */
81 wqf_product_idf
= get_wqf() * idf_max
* factor
;
82 c_product_avlen
= param_c
* get_average_length();
83 B_constant
= (F
+ 1.0) / get_termfreq();
85 // wdfn * B = wdfn * (F + 1.0) / (get_termfreq() * (wdfn + 1.0)).
86 // By cancelling out wdfn, we get (F + 1.0) / (get_termfreq() * (1.0 + 1.0 / wdfn)).
87 // In order to maximize the product, we need to minimize the denominator, and so we use wdfn_upper.
88 double max_wdfn_product_B
= wdfn_upper
* B_constant
/ (wdfn_upper
+ 1.0);
90 upper_bound
= wqf_product_idf
* max_wdfn_product_B
* factor
;
94 IfB2Weight::name() const
96 return "Xapian::IfB2Weight";
100 IfB2Weight::serialise() const
102 return serialise_double(param_c
);
106 IfB2Weight::unserialise(const string
& s
) const
108 const char *ptr
= s
.data();
109 const char *end
= ptr
+ s
.size();
110 double c
= unserialise_double(&ptr
, end
);
111 if (rare(ptr
!= end
))
112 throw Xapian::SerialisationError("Extra data in IfB2Weight::unserialise()");
113 return new IfB2Weight(c
);
117 IfB2Weight::get_sumpart(Xapian::termcount wdf
, Xapian::termcount len
,
118 Xapian::termcount
) const
120 if (wdf
== 0) return 0.0;
122 wdfn
*= log2(1 + c_product_avlen
/ len
);
124 double wdfn_product_B
= wdfn
* B_constant
/ (wdfn
+ 1.0);
126 return (wqf_product_idf
* wdfn_product_B
);
130 IfB2Weight::get_maxpart() const
136 IfB2Weight::get_sumextra(Xapian::termcount
, Xapian::termcount
) const
142 IfB2Weight::get_maxextra() const