2 * @brief N-way AND postlist
4 /* Copyright (C) 2007,2009,2011,2012,2015,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "multiandpostlist.h"
31 MultiAndPostList::allocate_plist_and_max_wt()
33 plist
= new PostList
* [n_kids
];
35 max_wt
= new double [n_kids
]();
43 MultiAndPostList::~MultiAndPostList()
46 for (size_t i
= 0; i
< n_kids
; ++i
) {
55 MultiAndPostList::get_termfreq_min() const
57 // The number of matching documents is minimised when we have the minimum
58 // number of matching documents from each sub-postlist, and these are
59 // maximally disjoint.
60 Xapian::doccount sum
= plist
[0]->get_termfreq_min();
62 for (size_t i
= 1; i
< n_kids
; ++i
) {
63 Xapian::doccount sum_old
= sum
;
64 sum
+= plist
[i
]->get_termfreq_min();
65 // If sum < sum_old, the calculation overflowed and the true sum
66 // must be > db_size. Since we added a value <= db_size,
67 // subtracting db_size must un-overflow us.
68 if (sum
>= sum_old
&& sum
<= db_size
) {
69 // It's possible there's no overlap.
74 AssertRelParanoid(sum
,<=,MultiAndPostList::get_termfreq_est());
80 MultiAndPostList::get_termfreq_max() const
82 // We can't match more documents than the least of our sub-postlists.
83 Xapian::doccount result
= plist
[0]->get_termfreq_max();
84 for (size_t i
= 1; i
< n_kids
; ++i
) {
85 Xapian::doccount tf
= plist
[i
]->get_termfreq_max();
86 if (tf
< result
) result
= tf
;
92 MultiAndPostList::get_termfreq_est() const
94 LOGCALL(MATCH
, Xapian::doccount
, "MultiAndPostList::get_termfreq_est", NO_ARGS
);
95 // We shortcut an empty shard and avoid creating a postlist tree for it.
97 // We calculate the estimate assuming independence. With this assumption,
98 // the estimate is the product of the estimates for the sub-postlists
99 // divided by db_size (n_kids - 1) times.
100 double result
= plist
[0]->get_termfreq_est();
101 for (size_t i
= 1; i
< n_kids
; ++i
) {
102 result
= (result
* plist
[i
]->get_termfreq_est()) / db_size
;
104 return static_cast<Xapian::doccount
>(result
+ 0.5);
108 MultiAndPostList::get_termfreq_est_using_stats(
109 const Xapian::Weight::Internal
& stats
) const
111 LOGCALL(MATCH
, TermFreqs
, "MultiAndPostList::get_termfreq_est_using_stats", stats
);
112 // We calculate the estimate assuming independence. With this assumption,
113 // the estimate is the product of the estimates for the sub-postlists
114 // divided by db_size (n_kids - 1) times.
115 TermFreqs
freqs(plist
[0]->get_termfreq_est_using_stats(stats
));
117 double freqest
= double(freqs
.termfreq
);
118 double relfreqest
= double(freqs
.reltermfreq
);
119 double collfreqest
= double(freqs
.collfreq
);
121 // Our caller should have ensured this.
122 Assert(stats
.collection_size
);
124 for (size_t i
= 1; i
< n_kids
; ++i
) {
125 freqs
= plist
[i
]->get_termfreq_est_using_stats(stats
);
127 // If the collection is empty, freqest should be 0 already, so leave
129 freqest
= (freqest
* freqs
.termfreq
) / stats
.collection_size
;
130 if (usual(stats
.total_length
!= 0)) {
131 collfreqest
= (collfreqest
* freqs
.collfreq
) / stats
.total_length
;
134 // If the rset is empty, relfreqest should be 0 already, so leave
136 if (stats
.rset_size
!= 0)
137 relfreqest
= (relfreqest
* freqs
.reltermfreq
) / stats
.rset_size
;
140 RETURN(TermFreqs(static_cast<Xapian::doccount
>(freqest
+ 0.5),
141 static_cast<Xapian::doccount
>(relfreqest
+ 0.5),
142 static_cast<Xapian::termcount
>(collfreqest
+ 0.5)));
146 MultiAndPostList::get_maxweight() const
152 MultiAndPostList::get_docid() const
158 MultiAndPostList::get_doclength() const
161 Xapian::termcount doclength
= plist
[0]->get_doclength();
162 for (size_t i
= 1; i
< n_kids
; ++i
) {
163 AssertEq(doclength
, plist
[i
]->get_doclength());
169 MultiAndPostList::get_unique_terms() const
172 Xapian::termcount unique_terms
= plist
[0]->get_unique_terms();
173 for (size_t i
= 1; i
< n_kids
; ++i
) {
174 AssertEq(unique_terms
, plist
[i
]->get_unique_terms());
180 MultiAndPostList::get_weight() const
184 for (size_t i
= 0; i
< n_kids
; ++i
) {
185 result
+= plist
[i
]->get_weight();
191 MultiAndPostList::at_end() const
197 MultiAndPostList::recalc_maxweight()
200 for (size_t i
= 0; i
< n_kids
; ++i
) {
201 double new_max
= plist
[i
]->recalc_maxweight();
203 max_total
+= new_max
;
209 MultiAndPostList::find_next_match(double w_min
)
212 if (plist
[0]->at_end()) {
216 did
= plist
[0]->get_docid();
217 for (size_t i
= 1; i
< n_kids
; ++i
) {
219 check_helper(i
, did
, w_min
, valid
);
221 next_helper(0, w_min
);
222 goto advanced_plist0
;
224 if (plist
[i
]->at_end()) {
228 Xapian::docid new_did
= plist
[i
]->get_docid();
229 if (new_did
!= did
) {
230 skip_to_helper(0, new_did
, w_min
);
231 goto advanced_plist0
;
238 MultiAndPostList::next(double w_min
)
240 next_helper(0, w_min
);
241 return find_next_match(w_min
);
245 MultiAndPostList::skip_to(Xapian::docid did_min
, double w_min
)
247 skip_to_helper(0, did_min
, w_min
);
248 return find_next_match(w_min
);
252 MultiAndPostList::get_description() const
255 desc
+= plist
[0]->get_description();
256 for (size_t i
= 1; i
< n_kids
; ++i
) {
258 desc
+= plist
[i
]->get_description();
265 MultiAndPostList::get_wdf() const
267 Xapian::termcount totwdf
= 0;
268 for (size_t i
= 0; i
< n_kids
; ++i
) {
269 totwdf
+= plist
[i
]->get_wdf();
275 MultiAndPostList::count_matching_subqs() const
277 Xapian::termcount total
= 0;
278 for (size_t i
= 0; i
< n_kids
; ++i
) {
279 total
+= plist
[i
]->count_matching_subqs();
285 MultiAndPostList::gather_position_lists(OrPositionList
* orposlist
)
287 for (size_t i
= 0; i
< n_kids
; ++i
) {
288 plist
[i
]->gather_position_lists(orposlist
);