Revert "Fix integer type used by ESet"
[xapian.git] / xapian-core / weight / inl2weight.cc
blobee4a2b351477f43ec73413d8adf4c204a7d5e329
1 /** @file
2 * @brief Xapian::InL2Weight class - the InL2 weighting scheme of the DFR framework.
3 */
4 /* Copyright (C) 2013,2014 Aarsh Shah
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "xapian/weight.h"
24 #include "common/log2.h"
26 #include "serialise-double.h"
28 #include "xapian/error.h"
30 using namespace std;
32 namespace Xapian {
34 InL2Weight::InL2Weight(double c)
35 : param_c(c)
37 if (param_c <= 0)
38 throw Xapian::InvalidArgumentError("Parameter c is invalid");
39 need_stat(AVERAGE_LENGTH);
40 need_stat(DOC_LENGTH);
41 need_stat(DOC_LENGTH_MIN);
42 need_stat(COLLECTION_SIZE);
43 need_stat(WDF);
44 need_stat(WDF_MAX);
45 need_stat(WQF);
46 need_stat(TERMFREQ);
49 InL2Weight *
50 InL2Weight::clone() const
52 return new InL2Weight(param_c);
55 void
56 InL2Weight::init(double factor)
58 if (factor == 0.0) {
59 // This object is for the term-independent contribution, and that's
60 // always zero for this scheme.
61 return;
64 double wdfn_upper = get_wdf_upper_bound();
65 if (wdfn_upper == 0) {
66 upper_bound = 0.0;
67 return;
70 double termfreq = get_termfreq();
71 double N = get_collection_size();
73 wdfn_upper *= log2(1 + (param_c * get_average_length()) /
74 get_doclength_lower_bound());
76 // wdfn * L = wdfn / (wdfn + 1) = 1 / (1 + 1 / wdfn).
77 // To maximize the product, we need to minimize the denominator and so we use wdfn_upper in (1 / wdfn).
78 double maximum_wdfn_product_L = wdfn_upper / (wdfn_upper + 1.0);
80 // This term is constant for all documents.
81 double idf_max = log2((N + 1) / (termfreq + 0.5));
83 /* Calculate constant values to be used in get_sumpart() upfront. */
84 wqf_product_idf = get_wqf() * idf_max * factor;
85 c_product_avlen = param_c * get_average_length();
87 upper_bound = wqf_product_idf * maximum_wdfn_product_L * factor;
90 string
91 InL2Weight::name() const
93 return "Xapian::InL2Weight";
96 string
97 InL2Weight::serialise() const
99 return serialise_double(param_c);
102 InL2Weight *
103 InL2Weight::unserialise(const string & s) const
105 const char *ptr = s.data();
106 const char *end = ptr + s.size();
107 double c = unserialise_double(&ptr, end);
108 if (rare(ptr != end))
109 throw Xapian::SerialisationError("Extra data in InL2Weight::unserialise()");
110 return new InL2Weight(c);
113 double
114 InL2Weight::get_sumpart(Xapian::termcount wdf, Xapian::termcount len,
115 Xapian::termcount) const
117 if (wdf == 0) return 0.0;
118 double wdfn = wdf;
120 wdfn *= log2(1 + c_product_avlen / len);
122 double wdfn_product_L = wdfn / (wdfn + 1.0);
124 return (wqf_product_idf * wdfn_product_L);
127 double
128 InL2Weight::get_maxpart() const
130 return upper_bound;
133 double
134 InL2Weight::get_sumextra(Xapian::termcount, Xapian::termcount) const
136 return 0;
139 double
140 InL2Weight::get_maxextra() const
142 return 0;