1 /* mergepostlist.cc: merge postlists from different databases
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2006,2008,2009,2011,2015,2016 Olly Betts
6 * Copyright 2007,2009 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #include "mergepostlist.h"
27 #include "multimatch.h"
28 #include "api/emptypostlist.h"
29 #include "branchpostlist.h"
32 #include "valuestreamdocument.h"
34 #include "xapian/error.h"
36 // NB don't prune - even with one sublist we still translate docids...
38 MergePostList::~MergePostList()
40 LOGCALL_DTOR(MATCH
, "MergePostList");
41 std::vector
<PostList
*>::const_iterator i
;
42 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
48 MergePostList::next(double w_min
)
50 LOGCALL(MATCH
, PostList
*, "MergePostList::next", w_min
);
51 LOGVALUE(MATCH
, current
);
52 if (current
== -1) current
= 0;
54 // FIXME: should skip over Remote matchers which aren't ready yet
55 // and come back to them later...
56 next_handling_prune(plists
[current
], w_min
, matcher
);
57 if (!plists
[current
]->at_end()) break;
59 if (unsigned(current
) >= plists
.size()) break;
60 vsdoc
.new_subdb(current
);
61 if (matcher
) matcher
->recalc_maxweight();
63 LOGVALUE(MATCH
, current
);
68 MergePostList::skip_to(Xapian::docid did
, double w_min
)
70 LOGCALL(MATCH
, PostList
*, "MergePostList::skip_to", did
| w_min
);
73 // MergePostList doesn't return documents in docid order, so skip_to
74 // isn't a meaningful operation.
75 throw Xapian::InvalidOperationError("MergePostList doesn't support skip_to");
79 MergePostList::get_wdf() const
81 LOGCALL(MATCH
, Xapian::termcount
, "MergePostList::get_wdf", NO_ARGS
);
82 RETURN(plists
[current
]->get_wdf());
86 MergePostList::get_termfreq_max() const
88 LOGCALL(MATCH
, Xapian::doccount
, "MergePostList::get_termfreq_max", NO_ARGS
);
89 // sum of termfreqs for all children
90 Xapian::doccount total
= 0;
91 vector
<PostList
*>::const_iterator i
;
92 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
93 total
+= (*i
)->get_termfreq_max();
99 MergePostList::get_termfreq_min() const
101 LOGCALL(MATCH
, Xapian::doccount
, "MergePostList::get_termfreq_min", NO_ARGS
);
102 // sum of termfreqs for all children
103 Xapian::doccount total
= 0;
104 vector
<PostList
*>::const_iterator i
;
105 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
106 total
+= (*i
)->get_termfreq_min();
112 MergePostList::get_termfreq_est() const
114 LOGCALL(MATCH
, Xapian::doccount
, "MergePostList::get_termfreq_est", NO_ARGS
);
115 // sum of termfreqs for all children
116 Xapian::doccount total
= 0;
117 vector
<PostList
*>::const_iterator i
;
118 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
119 total
+= (*i
)->get_termfreq_est();
125 MergePostList::get_docid() const
127 LOGCALL(MATCH
, Xapian::docid
, "MergePostList::get_docid", NO_ARGS
);
128 Assert(current
!= -1);
129 // FIXME: this needs fixing so we can prune plists - see MultiPostlist
130 // for code which does this...
131 RETURN((plists
[current
]->get_docid() - 1) * plists
.size() + current
+ 1);
135 MergePostList::get_weight() const
137 LOGCALL(MATCH
, double, "MergePostList::get_weight", NO_ARGS
);
138 Assert(current
!= -1);
139 RETURN(plists
[current
]->get_weight());
143 MergePostList::get_sort_key() const
145 LOGCALL(MATCH
, const string
*, "MergePostList::get_sort_key", NO_ARGS
);
146 Assert(current
!= -1);
147 RETURN(plists
[current
]->get_sort_key());
151 MergePostList::get_collapse_key() const
153 LOGCALL(MATCH
, const string
*, "MergePostList::get_collapse_key", NO_ARGS
);
154 Assert(current
!= -1);
155 RETURN(plists
[current
]->get_collapse_key());
159 MergePostList::get_maxweight() const
161 LOGCALL(MATCH
, double, "MergePostList::get_maxweight", NO_ARGS
);
166 MergePostList::recalc_maxweight()
168 LOGCALL(MATCH
, double, "MergePostList::recalc_maxweight", NO_ARGS
);
170 vector
<PostList
*>::iterator i
;
171 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
172 double w
= (*i
)->recalc_maxweight();
173 if (w
> w_max
) w_max
= w
;
179 MergePostList::at_end() const
181 LOGCALL(MATCH
, bool, "MergePostList::at_end", NO_ARGS
);
182 Assert(current
!= -1);
183 RETURN(unsigned(current
) >= plists
.size());
187 MergePostList::get_description() const
189 string desc
= "( Merge ";
190 vector
<PostList
*>::const_iterator i
;
191 for (i
= plists
.begin(); i
!= plists
.end(); ++i
) {
192 desc
+= (*i
)->get_description() + " ";
198 MergePostList::get_doclength() const
200 LOGCALL(MATCH
, Xapian::termcount
, "MergePostList::get_doclength", NO_ARGS
);
201 Assert(current
!= -1);
202 RETURN(plists
[current
]->get_doclength());
206 MergePostList::get_unique_terms() const
208 LOGCALL(MATCH
, Xapian::termcount
, "MergePostList::get_unique_terms", NO_ARGS
);
209 Assert(current
!= -1);
210 RETURN(plists
[current
]->get_unique_terms());
214 MergePostList::count_matching_subqs() const
216 LOGCALL(MATCH
, Xapian::termcount
, "MergePostList::count_matching_subqs", NO_ARGS
);
217 RETURN(plists
[current
]->count_matching_subqs());