scriptindex: Fix weird error cases
[xapian.git] / xapian-core / matcher / phrasepostlist.cc
blob133b5da8f24038fdd6d270d0318058e5dbca51a3
1 /** @file
2 * @brief Return docs containing terms forming a particular phrase.
3 */
4 /* Copyright (C) 2006,2007,2009,2010,2011,2014,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "phrasepostlist.h"
25 #include "debuglog.h"
26 #include "backends/positionlist.h"
27 #include "omassert.h"
28 #include "str.h"
30 #include <algorithm>
31 #include <vector>
33 using namespace std;
35 PhrasePostList::PhrasePostList(PostList *source_,
36 Xapian::termpos window_,
37 const vector<PostList*>::const_iterator &terms_begin,
38 const vector<PostList*>::const_iterator &terms_end)
39 : SelectPostList(source_), window(window_), terms(terms_begin, terms_end)
41 size_t n = terms.size();
42 Assert(n > 1);
43 poslists = new PositionList*[n];
46 PhrasePostList::~PhrasePostList()
48 delete [] poslists;
51 void
52 PhrasePostList::start_position_list(unsigned i)
54 poslists[i] = terms[i]->read_position_list();
57 bool
58 PhrasePostList::test_doc()
60 LOGCALL(MATCH, bool, "PhrasePostList::test_doc", NO_ARGS);
62 start_position_list(0);
63 if (!poslists[0]->next())
64 RETURN(false);
66 unsigned read_hwm = 0;
67 Xapian::termpos b;
68 do {
69 Xapian::termpos base = poslists[0]->get_position();
70 Xapian::termpos pos = base;
71 unsigned i = 0;
72 do {
73 if (++i == terms.size()) RETURN(true);
74 if (i > read_hwm) {
75 read_hwm = i;
76 start_position_list(i);
78 if (!poslists[i]->skip_to(pos + 1))
79 RETURN(false);
80 pos = poslists[i]->get_position();
81 b = pos + (terms.size() - i);
82 } while (b - base <= window);
83 // Advance the start of the window to the first position it could match
84 // in given the current position of term i.
85 } while (poslists[0]->skip_to(b - window));
86 RETURN(false);
89 Xapian::termcount
90 PhrasePostList::get_wdf() const
92 // Calculate an estimate for the wdf of a phrase postlist.
94 // We use the minimum wdf of a sub-postlist as our estimate. See the
95 // comment in NearPostList::get_wdf() for justification of this estimate.
96 vector<PostList *>::const_iterator i = terms.begin();
97 Xapian::termcount wdf = (*i)->get_wdf();
98 while (++i != terms.end()) {
99 wdf = min(wdf, (*i)->get_wdf());
101 return wdf;
104 Xapian::doccount
105 PhrasePostList::get_termfreq_est() const
107 // It's hard to estimate how many times the phrase will occur as
108 // it depends a lot on the phrase, but usually the phrase will
109 // occur significantly less often than the individual terms.
110 return source->get_termfreq_est() / 3;
113 TermFreqs
114 PhrasePostList::get_termfreq_est_using_stats(
115 const Xapian::Weight::Internal & stats) const
117 LOGCALL(MATCH, TermFreqs, "PhrasePostList::get_termfreq_est_using_stats", stats);
118 // No idea how to estimate this - do the same as get_termfreq_est() for
119 // now.
120 TermFreqs result(source->get_termfreq_est_using_stats(stats));
121 result.termfreq /= 3;
122 result.reltermfreq /= 3;
123 result.collfreq /= 3;
124 RETURN(result);
127 string
128 PhrasePostList::get_description() const
130 string m = "(Phrase ";
131 m += str(window);
132 m += ' ';
133 m += source->get_description();
134 m += ")";
135 return m;