omegatest: Use test_scriptindex_error in another case
[xapian.git] / xapian-core / matcher / localsubmatch.cc
blobe8bc73e3d2d2dc24553ce745cf9c69f3b4e6fa16
1 /** @file
2 * @brief SubMatch class for a local database.
3 */
4 /* Copyright (C) 2006,2007,2009,2010,2011,2013,2014,2015,2016,2018,2020 Olly Betts
5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "localsubmatch.h"
26 #include "backends/database.h"
27 #include "debuglog.h"
28 #include "api/emptypostlist.h"
29 #include "extraweightpostlist.h"
30 #include "api/leafpostlist.h"
31 #include "omassert.h"
32 #include "queryoptimiser.h"
33 #include "synonympostlist.h"
34 #include "api/termlist.h"
35 #include "weight/weightinternal.h"
37 #include "xapian/error.h"
39 #include "autoptr.h"
40 #include <map>
41 #include <string>
43 using namespace std;
45 /** Xapian::Weight subclass which adds laziness.
47 * For terms from a wildcard when remote databases are involved, we need to
48 * delay calling init_() on the weight object until the stats for the terms
49 * from the wildcard have been collated.
51 class LazyWeight : public Xapian::Weight {
52 LeafPostList * pl;
54 Xapian::Weight * real_wt;
56 Xapian::Weight::Internal * stats;
58 Xapian::termcount qlen;
60 Xapian::termcount wqf;
62 double factor;
64 LazyWeight * clone() const;
66 void init(double factor_);
68 public:
69 LazyWeight(LeafPostList * pl_,
70 Xapian::Weight * real_wt_,
71 Xapian::Weight::Internal * stats_,
72 Xapian::termcount qlen_,
73 Xapian::termcount wqf__,
74 double factor_)
75 : pl(pl_),
76 real_wt(real_wt_),
77 stats(stats_),
78 qlen(qlen_),
79 wqf(wqf__),
80 factor(factor_)
81 { }
83 std::string name() const;
85 std::string serialise() const;
86 LazyWeight * unserialise(const std::string & serialised) const;
88 double get_sumpart(Xapian::termcount wdf,
89 Xapian::termcount doclen,
90 Xapian::termcount uniqterms) const;
91 double get_maxpart() const;
93 double get_sumextra(Xapian::termcount doclen,
94 Xapian::termcount uniqterms) const;
95 double get_maxextra() const;
98 LazyWeight *
99 LazyWeight::clone() const
101 throw Xapian::InvalidOperationError("LazyWeight::clone()");
104 void
105 LazyWeight::init(double factor_)
107 (void)factor_;
108 throw Xapian::InvalidOperationError("LazyWeight::init()");
111 string
112 LazyWeight::name() const
114 string desc = "LazyWeight(";
115 desc += real_wt->name();
116 desc += ")";
117 return desc;
120 string
121 LazyWeight::serialise() const
123 throw Xapian::InvalidOperationError("LazyWeight::serialise()");
126 LazyWeight *
127 LazyWeight::unserialise(const string &) const
129 throw Xapian::InvalidOperationError("LazyWeight::unserialise()");
132 double
133 LazyWeight::get_sumpart(Xapian::termcount wdf,
134 Xapian::termcount doclen,
135 Xapian::termcount uniqterms) const
137 (void)wdf;
138 (void)doclen;
139 (void)uniqterms;
140 throw Xapian::InvalidOperationError("LazyWeight::get_sumpart()");
143 double
144 LazyWeight::get_sumextra(Xapian::termcount doclen,
145 Xapian::termcount uniqterms) const
147 (void)doclen;
148 (void)uniqterms;
149 throw Xapian::InvalidOperationError("LazyWeight::get_sumextra()");
152 double
153 LazyWeight::get_maxpart() const
155 // This gets called first for the case we care about.
156 return pl->resolve_lazy_termweight(real_wt, stats, qlen, wqf, factor);
159 double
160 LazyWeight::get_maxextra() const
162 throw Xapian::InvalidOperationError("LazyWeight::get_maxextra()");
165 bool
166 LocalSubMatch::prepare_match(bool nowait,
167 Xapian::Weight::Internal & total_stats)
169 LOGCALL(MATCH, bool, "LocalSubMatch::prepare_match", nowait | total_stats);
170 (void)nowait;
171 Assert(db);
172 total_stats.accumulate_stats(*db, rset);
173 RETURN(true);
176 void
177 LocalSubMatch::start_match(Xapian::doccount first,
178 Xapian::doccount maxitems,
179 Xapian::doccount check_at_least,
180 Xapian::Weight::Internal & total_stats)
182 LOGCALL_VOID(MATCH, "LocalSubMatch::start_match", first | maxitems | check_at_least | total_stats);
183 (void)first;
184 (void)maxitems;
185 (void)check_at_least;
186 // Store a pointer to the total stats to use when building the Query tree.
187 stats = &total_stats;
190 PostList *
191 LocalSubMatch::get_postlist(MultiMatch * matcher,
192 Xapian::termcount* total_subqs_ptr,
193 Xapian::Weight::Internal&)
195 LOGCALL(MATCH, PostList*, "LocalSubMatch::get_postlist", matcher | total_subqs_ptr | Literal("[total_subqs]"));
197 if (query.empty() || db->get_doccount() == 0)
198 RETURN(new EmptyPostList); // MatchNothing
200 // Build the postlist tree for the query. This calls
201 // LocalSubMatch::open_post_list() for each term in the query.
202 PostList * pl;
204 QueryOptimiser opt(*db, *this, matcher, shard_index);
205 double factor = wt_factory->is_bool_weight_() ? 0.0 : 1.0;
206 pl = query.internal->postlist(&opt, factor);
207 *total_subqs_ptr = opt.get_total_subqs();
210 AutoPtr<Xapian::Weight> extra_wt(wt_factory->clone());
211 // Only uses term-independent stats.
212 extra_wt->init_(*stats, qlen);
213 if (extra_wt->get_maxextra() != 0.0) {
214 // There's a term-independent weight contribution, so we combine the
215 // postlist tree with an ExtraWeightPostList which adds in this
216 // contribution.
217 pl = new ExtraWeightPostList(pl, extra_wt.release(), matcher);
220 RETURN(pl);
223 PostList *
224 LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
225 double factor,
226 bool wdf_disjoint)
228 LOGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", or_pl | matcher | factor | wdf_disjoint);
229 if (rare(or_pl->get_termfreq_max() == 0)) {
230 // or_pl is an EmptyPostList or equivalent.
231 return or_pl;
233 LOGVALUE(MATCH, or_pl->get_termfreq_est());
234 Xapian::termcount len_lb = db->get_doclength_lower_bound();
235 AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher, len_lb,
236 wdf_disjoint));
237 AutoPtr<Xapian::Weight> wt(wt_factory->clone());
239 TermFreqs freqs;
240 // Avoid calling get_termfreq_est_using_stats() if the database is empty
241 // so we don't need to special case that repeatedly when implementing it.
242 // FIXME: it would be nicer to handle an empty database higher up, though
243 // we need to catch the case where all the non-empty subdatabases have
244 // failed, so we can't just push this right up to the start of get_mset().
245 if (usual(stats->collection_size != 0)) {
246 freqs = or_pl->get_termfreq_est_using_stats(*stats);
248 wt->init_(*stats, qlen, factor,
249 freqs.termfreq, freqs.reltermfreq, freqs.collfreq);
251 res->set_weight(wt.release());
252 RETURN(res.release());
255 LeafPostList *
256 LocalSubMatch::open_post_list(const string& term,
257 Xapian::termcount wqf,
258 double factor,
259 bool need_positions,
260 bool in_synonym,
261 QueryOptimiser * qopt,
262 bool lazy_weight)
264 LOGCALL(MATCH, LeafPostList *, "LocalSubMatch::open_post_list", term | wqf | factor | need_positions | qopt | lazy_weight);
266 bool weighted = (factor != 0.0 && !term.empty());
268 LeafPostList * pl = NULL;
269 if (!term.empty() && !need_positions) {
270 if ((!weighted && !in_synonym) ||
271 !wt_factory->get_sumpart_needs_wdf_()) {
272 Xapian::doccount sub_tf;
273 db->get_freqs(term, &sub_tf, NULL);
274 if (sub_tf == qopt->db_size) {
275 // If we're not going to use the wdf or term positions, and the
276 // term indexes all documents, we can replace it with the
277 // MatchAll postlist, which is especially efficient if there
278 // are no gaps in the docids.
279 pl = db->open_post_list(string());
280 // Set the term name so the postlist looks up the correct term
281 // frequencies - this is necessary if the weighting scheme
282 // needs collection frequency or reltermfreq (termfreq would be
283 // correct anyway since it's just the collection size in this
284 // case).
285 pl->set_term(term);
290 if (!pl) {
291 const LeafPostList * hint = qopt->get_hint_postlist();
292 if (hint)
293 pl = hint->open_nearby_postlist(term);
294 if (!pl)
295 pl = db->open_post_list(term);
296 qopt->set_hint_postlist(pl);
299 if (lazy_weight) {
300 auto res = stats->termfreqs.emplace(term, TermFreqs());
301 if (res.second) {
302 // Term came from a wildcard, but the same term may be elsewhere
303 // in the query so only accumulate its TermFreqs if emplace()
304 // created a new element.
305 db->get_freqs(term,
306 &res.first->second.termfreq,
307 &res.first->second.collfreq);
311 if (weighted) {
312 Xapian::Weight * wt = wt_factory->clone();
313 if (!lazy_weight) {
314 wt->init_(*stats, qlen, term, wqf, factor, pl);
315 if (pl->get_termfreq() > 0)
316 stats->set_max_part(term, wt->get_maxpart());
317 } else {
318 // Delay initialising the actual weight object, so that we can
319 // gather stats for the terms lazily expanded from a wildcard
320 // (needed for the remote database case).
321 wt = new LazyWeight(pl, wt, stats, qlen, wqf, factor);
323 pl->set_termweight(wt);
325 RETURN(pl);