Process an incomplete final line from a dump file
[xapian.git] / xapian-applications / omega / weight.cc
blobd159f716871b3778175235a32bc97c4c7c709869
1 /** @file
2 * @brief Set the weighting scheme for Omega
3 */
4 /* Copyright (C) 2009,2013,2016 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "weight.h"
26 #include "stringutils.h"
28 #include <cerrno>
29 #include <cstdlib>
30 #include "common/noreturn.h"
32 using namespace std;
34 XAPIAN_NORETURN(static void
35 parameter_error(const char * param, const string & scheme));
37 static void
38 parameter_error(const char * msg, const string & scheme)
40 string m(msg);
41 m += ": '";
42 m += scheme;
43 m += "'";
44 throw m;
47 static bool
48 double_param(const char ** p, double * ptr_val)
50 char *end;
51 errno = 0;
52 double v = strtod(*p, &end);
53 if (*p == end || errno) return false;
54 *p = end;
55 *ptr_val = v;
56 return true;
59 static bool
60 type_smoothing_param(const char ** p, Xapian::Weight::type_smoothing * ptr_val)
62 char *end;
63 errno = 0;
64 int v = strtol(*p, &end, 10);
65 if (*p == end || errno || v < 1 || v > 4)
66 return false;
67 *p = end;
68 static const Xapian::Weight::type_smoothing smooth_tab[4] = {
69 Xapian::Weight::TWO_STAGE_SMOOTHING,
70 Xapian::Weight::DIRICHLET_SMOOTHING,
71 Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING,
72 Xapian::Weight::JELINEK_MERCER_SMOOTHING
74 *ptr_val = smooth_tab[v - 1];
75 return true;
78 void
79 set_weighting_scheme(Xapian::Enquire & enq, const string & scheme,
80 bool force_boolean)
82 if (!force_boolean) {
83 if (scheme.empty()) return;
85 if (startswith(scheme, "bm25")) {
86 const char *p = scheme.c_str() + 4;
87 if (*p == '\0') {
88 enq.set_weighting_scheme(Xapian::BM25Weight());
89 return;
91 if (C_isspace(*p)) {
92 double k1 = 1;
93 double k2 = 0;
94 double k3 = 1;
95 double b = 0.5;
96 double min_normlen = 0.5;
97 if (!double_param(&p, &k1))
98 parameter_error("Parameter 1 (k1) is invalid", scheme);
99 if (*p && !double_param(&p, &k2))
100 parameter_error("Parameter 2 (k2) is invalid", scheme);
101 if (*p && !double_param(&p, &k3))
102 parameter_error("Parameter 3 (k3) is invalid", scheme);
103 if (*p && !double_param(&p, &b))
104 parameter_error("Parameter 4 (b) is invalid", scheme);
105 if (*p && !double_param(&p, &min_normlen))
106 parameter_error("Parameter 5 (min_normlen) is invalid", scheme);
107 if (*p)
108 parameter_error("Extra data after parameter 5", scheme);
109 Xapian::BM25Weight wt(k1, k2, k3, b, min_normlen);
110 enq.set_weighting_scheme(wt);
111 return;
115 if (startswith(scheme, "trad")) {
116 const char *p = scheme.c_str() + 4;
117 if (*p == '\0') {
118 enq.set_weighting_scheme(Xapian::TradWeight());
119 return;
121 if (C_isspace(*p)) {
122 double k;
123 if (!double_param(&p, &k))
124 parameter_error("Parameter is invalid", scheme);
125 if (*p)
126 parameter_error("Extra data after parameter", scheme);
127 enq.set_weighting_scheme(Xapian::TradWeight(k));
128 return;
132 if (startswith(scheme, "tfidf")) {
133 const char *p = scheme.c_str() + 5;
134 if (*p == '\0') {
135 enq.set_weighting_scheme(Xapian::TfIdfWeight());
136 return;
138 if (C_isspace(*p)) {
139 enq.set_weighting_scheme(Xapian::TfIdfWeight(p + 1));
140 return;
144 if (startswith(scheme, "inl2")) {
145 const char *p = scheme.c_str() + 4;
146 if (*p == '\0') {
147 enq.set_weighting_scheme(Xapian::InL2Weight());
148 return;
150 if (C_isspace(*p)) {
151 double k;
152 if (!double_param(&p, &k))
153 parameter_error("Parameter is invalid", scheme);
154 if (*p)
155 parameter_error("Extra data after parameter", scheme);
156 enq.set_weighting_scheme(Xapian::InL2Weight(k));
157 return;
161 if (startswith(scheme, "ifb2")) {
162 const char *p = scheme.c_str() + 4;
163 if (*p == '\0') {
164 enq.set_weighting_scheme(Xapian::IfB2Weight());
165 return;
167 if (C_isspace(*p)) {
168 double k;
169 if (!double_param(&p, &k))
170 parameter_error("Parameter is invalid", scheme);
171 if (*p)
172 parameter_error("Extra data after parameter", scheme);
173 enq.set_weighting_scheme(Xapian::IfB2Weight(k));
174 return;
178 if (startswith(scheme, "ineb2")) {
179 const char *p = scheme.c_str() + 5;
180 if (*p == '\0') {
181 enq.set_weighting_scheme(Xapian::IneB2Weight());
182 return;
184 if (C_isspace(*p)) {
185 double k;
186 if (!double_param(&p, &k))
187 parameter_error("Parameter is invalid", scheme);
188 if (*p)
189 parameter_error("Extra data after parameter", scheme);
190 enq.set_weighting_scheme(Xapian::IneB2Weight(k));
191 return;
195 if (startswith(scheme, "bb2")) {
196 const char *p = scheme.c_str() + 3;
197 if (*p == '\0') {
198 enq.set_weighting_scheme(Xapian::BB2Weight());
199 return;
201 if (C_isspace(*p)) {
202 double k;
203 if (!double_param(&p, &k))
204 parameter_error("Parameter is invalid", scheme);
205 if (*p)
206 parameter_error("Extra data after parameter", scheme);
207 enq.set_weighting_scheme(Xapian::BB2Weight(k));
208 return;
212 if (startswith(scheme, "dlh")) {
213 const char *p = scheme.c_str() + 3;
214 if (*p == '\0') {
215 enq.set_weighting_scheme(Xapian::DLHWeight());
216 return;
218 if (C_isspace(*p)) {
219 throw "No parameters are required for DLH";
223 if (startswith(scheme, "pl2")) {
224 const char *p = scheme.c_str() + 3;
225 if (*p == '\0') {
226 enq.set_weighting_scheme(Xapian::PL2Weight());
227 return;
229 if (C_isspace(*p)) {
230 double k;
231 if (!double_param(&p, &k))
232 parameter_error("Parameter is invalid", scheme);
233 if (*p)
234 parameter_error("Extra data after parameter", scheme);
235 enq.set_weighting_scheme(Xapian::PL2Weight(k));
236 return;
240 if (startswith(scheme, "dph")) {
241 const char *p = scheme.c_str() + 3;
242 if (*p == '\0') {
243 enq.set_weighting_scheme(Xapian::DPHWeight());
244 return;
246 if (C_isspace(*p)) {
247 throw "No parameters are required for DPH";
251 if (startswith(scheme, "lm")) {
252 const char *p = scheme.c_str() + 2;
253 if (*p == '\0') {
254 enq.set_weighting_scheme(Xapian::LMWeight());
255 return;
257 if (C_isspace(*p)) {
258 double param_log = 0;
259 Xapian::Weight::type_smoothing type = Xapian::Weight::TWO_STAGE_SMOOTHING;
260 double smoothing1 = 0.7;
261 double smoothing2 = 2000;
262 if (!double_param(&p, &param_log))
263 parameter_error("Parameter 1 (log) is invalid", scheme);
264 if (*p && !type_smoothing_param(&p, &type))
265 parameter_error("Parameter 2 (smoothing_type) is invalid", scheme);
266 if (*p && !double_param(&p, &smoothing1))
267 parameter_error("Parameter 3 (smoothing1) is invalid", scheme);
268 if (*p && !double_param(&p, &smoothing2))
269 parameter_error("Parameter 4 (smoothing2) is invalid", scheme);
270 if (*p)
271 parameter_error("Extra data after parameter 4", scheme);
272 Xapian::LMWeight wt(param_log, type, smoothing1, smoothing2);
273 enq.set_weighting_scheme(wt);
274 return;
278 if (scheme == "coord") {
279 enq.set_weighting_scheme(Xapian::CoordWeight());
280 return;
283 if (scheme != "bool") {
284 throw "Unknown $opt{weighting} setting: " + scheme;
288 enq.set_weighting_scheme(Xapian::BoolWeight());