Refactor to avoid warning with GCC 12.2
[xapian.git] / xapian-core / expand / esetinternal.cc
blob8c3ffb58548396b5d12ec16ce4297af3abc67eea
1 /** @file
2 * @brief Xapian::ESet::Internal class
3 */
4 /* Copyright (C) 2008,2010,2011,2013,2016 Olly Betts
5 * Copyright (C) 2011 Action Without Borders
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "esetinternal.h"
26 #include "xapian/enquire.h"
27 #include "xapian/expanddecider.h"
28 #include "backends/database.h"
29 #include "debuglog.h"
30 #include "api/omenquireinternal.h"
31 #include "expandweight.h"
32 #include "omassert.h"
33 #include "ortermlist.h"
34 #include "str.h"
35 #include "api/termlist.h"
36 #include "unicode/description_append.h"
38 #include "autoptr.h"
39 #include <set>
40 #include <string>
41 #include <vector>
43 using namespace std;
45 namespace Xapian {
47 string
48 Internal::ExpandTerm::get_description() const
50 string desc("ExpandTerm(");
51 desc += str(wt);
52 desc += ", ";
53 description_append(desc, term);
54 desc += ')';
55 return desc;
58 template<class CLASS> struct delete_ptr {
59 void operator()(CLASS *p) const { delete p; }
62 struct CompareTermListSizeAscending {
63 bool operator()(const TermList *a, const TermList *b) const {
64 return a->get_approx_size() > b->get_approx_size();
68 /** Build a tree of binary TermList objects like QueryOptimiser does for
69 * OrPostList objects.
71 static TermList *
72 build_termlist_tree(const Xapian::Database &db, const RSet & rset)
74 Assert(!rset.empty());
76 const set<Xapian::docid> & docids = rset.internal->get_items();
78 vector<TermList*> termlists;
79 termlists.reserve(docids.size());
81 try {
82 const size_t multiplier = db.internal.size();
83 set<Xapian::docid>::const_iterator i;
84 for (i = docids.begin(); i != docids.end(); ++i) {
85 Xapian::docid realdid = (*i - 1) / multiplier + 1;
86 Xapian::doccount dbnumber = (*i - 1) % multiplier;
88 // Push NULL first to avoid leaking the new TermList if push_back()
89 // throws.
90 termlists.push_back(0);
91 termlists.back() = db.internal[dbnumber]->open_term_list(realdid);
92 termlists.back()->shard_index = dbnumber;
95 Assert(!termlists.empty());
96 if (termlists.size() == 1) return termlists[0];
98 // Make termlists into a heap so that the longest termlist is at the
99 // top of the heap.
100 make_heap(termlists.begin(), termlists.end(),
101 CompareTermListSizeAscending());
103 // Now build a tree of binary TermList objects. The algorithm used to
104 // build the tree is like that used to build an optimal Huffman coding
105 // tree. If we called next() repeatedly, this arrangement would
106 // minimise the number of method calls. Generally we don't actually do
107 // that, but this arrangement is still likely to be a good one, and it
108 // does minimise the work in the worst case.
109 while (true) {
110 AssertRel(termlists.size(), >=, 2);
111 // We build the tree such that at each branch:
113 // l.get_approx_size() >= r.get_approx_size()
115 // We do this so that the OrTermList class can be optimised
116 // assuming that this is the case.
117 TermList * r = termlists.front();
118 pop_heap(termlists.begin(), termlists.end(),
119 CompareTermListSizeAscending());
120 termlists.pop_back();
121 TermList * l = termlists.front();
123 TermList * pl = new OrTermList(l, r);
125 if (termlists.size() == 1) return pl;
127 pop_heap(termlists.begin(), termlists.end(),
128 CompareTermListSizeAscending());
129 termlists.back() = pl;
130 push_heap(termlists.begin(), termlists.end(),
131 CompareTermListSizeAscending());
133 } catch (...) {
134 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
135 throw;
139 void
140 ESet::Internal::expand(Xapian::termcount max_esize,
141 const Xapian::Database & db,
142 const RSet & rset,
143 const Xapian::ExpandDecider * edecider,
144 Xapian::Internal::ExpandWeight & eweight,
145 double min_wt)
147 LOGCALL_VOID(EXPAND, "ESet::Internal::expand", max_esize | db | rset | edecider | eweight);
148 // These two cases are handled by our caller.
149 Assert(max_esize);
150 Assert(!rset.empty());
151 // This method should only be called once for a given ESet::Internal, so
152 // check we're empty.
153 Assert(ebound == 0);
154 Assert(items.empty());
156 AutoPtr<TermList> tree(build_termlist_tree(db, rset));
157 Assert(tree.get());
159 bool is_heap = false;
160 while (true) {
161 // See if the root needs replacing.
162 TermList * new_root = tree->next();
163 if (new_root) {
164 LOGLINE(EXPAND, "Replacing the root of the termlist tree");
165 tree.reset(new_root);
168 if (tree->at_end()) break;
170 string term = tree->get_termname();
172 // If there's an ExpandDecider, see if it accepts the term.
173 if (edecider && !(*edecider)(term)) continue;
175 ++ebound;
177 /* Set up the ExpandWeight by clearing the existing statistics and
178 collecting statistics for the new term. */
179 eweight.collect_stats(tree.get(), term);
181 double wt = eweight.get_weight();
183 // If the weights are equal, we prefer the lexically smaller term and
184 // so we use "<=" not "<" here.
185 if (wt <= min_wt) continue;
187 items.push_back(Xapian::Internal::ExpandTerm(wt, term));
189 // The candidate ESet is overflowing, so remove the worst element in it
190 // using a min-heap.
191 if (items.size() > max_esize) {
192 if (rare(!is_heap)) {
193 is_heap = true;
194 make_heap(items.begin(), items.end());
195 } else {
196 push_heap(items.begin(), items.end());
198 pop_heap(items.begin(), items.end());
199 items.pop_back();
200 min_wt = items.front().wt;
204 // Now sort the contents of the new ESet.
205 if (is_heap) {
206 sort_heap(items.begin(), items.end());
207 } else {
208 sort(items.begin(), items.end());
212 string
213 ESet::Internal::get_description() const
215 string desc("ESet::Internal(ebound=");
216 desc += str(ebound);
218 vector<Xapian::Internal::ExpandTerm>::const_iterator i;
219 for (i = items.begin(); i != items.end(); ++i) {
220 desc += ", ";
221 desc += i->get_description();
223 desc += ')';
225 return desc;
228 ESet::ESet() { }
230 ESet::ESet(const ESet & o) : internal(o.internal) { }
232 ESet&
233 ESet::operator=(const ESet & o)
235 internal = o.internal;
236 return *this;
239 ESet::ESet(ESet &&) = default;
241 ESet&
242 ESet::operator=(ESet &&) = default;
244 ESet::~ESet() { }
246 Xapian::termcount
247 ESet::size() const
249 if (!internal.get()) return 0;
250 return Xapian::termcount(internal->items.size());
253 Xapian::termcount
254 ESet::get_ebound() const
256 if (!internal.get()) return 0;
257 return internal->ebound;
260 std::string
261 ESet::get_description() const
263 string desc = "ESet(";
264 if (internal.get())
265 desc += internal->get_description();
266 desc += ')';
267 return desc;
271 std::string
272 ESetIterator::operator*() const
274 return (eset.internal->items.end() - off_from_end)->term;
277 double
278 ESetIterator::get_weight() const
280 return (eset.internal->items.end() - off_from_end)->wt;
283 std::string
284 ESetIterator::get_description() const
286 string desc = "ESetIterator(";
287 if (eset.internal.get())
288 desc += str(eset.internal->items.size() - off_from_end);
289 desc += ')';
290 return desc;