Handle <title> in SVG
[xapian.git] / xapian-core / api / omenquireinternal.h
blob4f25b39f7579297781ddce915f51a47906d50ca6
1 /** @file omenquireinternal.h
2 * @brief Internals
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7 * Copyright 2009 Lemur Consulting Ltd
8 * Copyright 2011 Action Without Borders
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 * USA
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
35 #include <algorithm>
36 #include <cmath>
37 #include <map>
38 #include <set>
39 #include <unordered_map>
41 #include "weight/weightinternal.h"
43 using namespace std;
45 class OmExpand;
46 class MultiMatch;
48 namespace Xapian {
50 class TermIterator;
52 namespace Internal {
54 /** An item resulting from a query.
55 * This item contains the document id, and the weight calculated for
56 * the document.
58 class MSetItem {
59 public:
60 MSetItem(double wt_, Xapian::docid did_)
61 : wt(wt_), did(did_), collapse_count(0) {}
63 MSetItem(double wt_, Xapian::docid did_, const string &key_)
64 : wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {}
66 MSetItem(double wt_, Xapian::docid did_, const string &key_,
67 Xapian::doccount collapse_count_)
68 : wt(wt_), did(did_), collapse_key(key_),
69 collapse_count(collapse_count_) {}
71 void swap(MSetItem & o) {
72 std::swap(wt, o.wt);
73 std::swap(did, o.did);
74 std::swap(collapse_key, o.collapse_key);
75 std::swap(collapse_count, o.collapse_count);
76 std::swap(sort_key, o.sort_key);
79 /** Weight calculated. */
80 double wt;
82 /** Document id. */
83 Xapian::docid did;
85 /** Value which was used to collapse upon.
87 * If the collapse option is not being used, this will always
88 * have a null value.
90 * If the collapse option is in use, this will contain the collapse
91 * key's value for this particular item. If the key is not present
92 * for this item, the value will be a null string. Only one instance
93 * of each key value (apart from the null string) will be present in
94 * the items in the returned Xapian::MSet.
96 string collapse_key;
98 /** Count of collapses done on collapse_key so far
100 * This is normally 0, and goes up for each collapse done
101 * It is not necessarily an indication of how many collapses
102 * might be done if an exhaustive match was done
104 Xapian::doccount collapse_count;
106 /** Used when sorting by value. */
107 string sort_key;
109 /// Return a string describing this object.
110 string get_description() const;
115 /** Internals of enquire system.
116 * This allows the implementation of Xapian::Enquire to be hidden and reference
117 * counted.
119 class Enquire::Internal : public Xapian::Internal::intrusive_base {
120 friend class MSet::Internal;
121 private:
122 /// The database which this enquire object uses.
123 const Xapian::Database db;
125 /// The user's query.
126 Query query;
128 /// The query length.
129 termcount qlen;
131 /// Copy not allowed
132 Internal(const Internal &);
133 /// Assignment not allowed
134 void operator=(const Internal &);
136 public:
137 typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
139 Xapian::valueno collapse_key;
141 Xapian::doccount collapse_max;
143 Xapian::Enquire::docid_order order;
145 int percent_cutoff;
147 double weight_cutoff;
149 Xapian::valueno sort_key;
150 sort_setting sort_by;
151 bool sort_value_forward;
153 Xapian::Internal::opt_intrusive_ptr<KeyMaker> sorter;
155 double time_limit;
157 /** The weight to use for this query.
159 * This is mutable so that the default BM25Weight object can be
160 * created lazily when first required.
162 mutable Weight * weight;
164 /// The weighting scheme to use for query expansion.
165 std::string eweightname;
167 /// The parameter required for TradWeight query expansion.
168 double expand_k;
170 vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
172 explicit Internal(const Xapian::Database &databases);
173 ~Internal();
175 /** Request a document from the database.
177 void request_doc(const Xapian::Internal::MSetItem &item) const;
179 /** Read a previously requested document from the database.
181 Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;
183 Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;
185 void set_query(const Query & query_, termcount qlen_);
186 const Query & get_query() const;
187 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
188 Xapian::doccount check_at_least,
189 const RSet *omrset,
190 const MatchDecider *mdecider) const;
192 ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
193 const ExpandDecider *edecider, double min_wt) const;
195 TermIterator get_matching_terms(Xapian::docid did) const;
196 TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;
198 Xapian::doccount get_termfreq(const string &tname) const;
200 string get_description() const;
203 class MSet::Internal : public Xapian::Internal::intrusive_base {
204 public:
205 /// Factor to multiply weights by to convert them to percentages.
206 double percent_factor;
208 private:
209 /** The set of documents which have been requested but not yet
210 * collected.
212 mutable set<Xapian::doccount> requested_docs;
214 /// Cache of documents, indexed by MSet index.
215 mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
217 /// Read and cache the documents so far requested.
218 void read_docs() const;
220 /// Copy not allowed
221 Internal(const Internal &);
222 /// Assignment not allowed
223 void operator=(const Internal &);
225 mutable std::unordered_map<std::string, double> snippet_bg_relevance;
227 public:
228 /// Xapian::Enquire reference, for getting documents.
229 Xapian::Internal::intrusive_ptr<const Enquire::Internal> enquire;
231 /** Provides the term frequency and weight for each term in the query. */
232 Xapian::Weight::Internal * stats;
234 /// A list of items comprising the (selected part of the) MSet.
235 vector<Xapian::Internal::MSetItem> items;
237 /// Rank of first item in MSet.
238 Xapian::doccount firstitem;
240 Xapian::doccount matches_lower_bound;
242 Xapian::doccount matches_estimated;
244 Xapian::doccount matches_upper_bound;
246 Xapian::doccount uncollapsed_lower_bound;
248 Xapian::doccount uncollapsed_estimated;
250 Xapian::doccount uncollapsed_upper_bound;
252 double max_possible;
254 double max_attained;
256 Internal()
257 : percent_factor(0),
258 stats(NULL),
259 firstitem(0),
260 matches_lower_bound(0),
261 matches_estimated(0),
262 matches_upper_bound(0),
263 uncollapsed_lower_bound(0),
264 uncollapsed_estimated(0),
265 uncollapsed_upper_bound(0),
266 max_possible(0),
267 max_attained(0) {}
269 /// Note: destroys parameter items.
270 Internal(Xapian::doccount firstitem_,
271 Xapian::doccount matches_upper_bound_,
272 Xapian::doccount matches_lower_bound_,
273 Xapian::doccount matches_estimated_,
274 Xapian::doccount uncollapsed_upper_bound_,
275 Xapian::doccount uncollapsed_lower_bound_,
276 Xapian::doccount uncollapsed_estimated_,
277 double max_possible_,
278 double max_attained_,
279 vector<Xapian::Internal::MSetItem> &items_,
280 double percent_factor_)
281 : percent_factor(percent_factor_),
282 stats(NULL),
283 firstitem(firstitem_),
284 matches_lower_bound(matches_lower_bound_),
285 matches_estimated(matches_estimated_),
286 matches_upper_bound(matches_upper_bound_),
287 uncollapsed_lower_bound(uncollapsed_lower_bound_),
288 uncollapsed_estimated(uncollapsed_estimated_),
289 uncollapsed_upper_bound(uncollapsed_upper_bound_),
290 max_possible(max_possible_),
291 max_attained(max_attained_) {
292 std::swap(items, items_);
295 ~Internal() { delete stats; }
297 /// get a document by index in MSet, via the cache.
298 Xapian::Document get_doc_by_index(Xapian::doccount index) const;
300 /// Converts a weight to a percentage weight
301 int convert_to_percent_internal(double wt) const;
303 std::string snippet(const std::string & text, size_t length,
304 const Xapian::Stem & stemmer,
305 unsigned flags,
306 const std::string & hi_start,
307 const std::string & hi_end,
308 const std::string & omit) const;
310 /// Return a string describing this object.
311 string get_description() const;
313 /** Fetch items specified into the document cache.
315 void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
318 class RSet::Internal : public Xapian::Internal::intrusive_base {
319 friend class Xapian::RSet;
321 private:
322 /// Items in the relevance set.
323 set<Xapian::docid> items;
325 public:
326 const set<Xapian::docid> & get_items() const { return items; }
328 /// Return a string describing this object.
329 string get_description() const;
334 #endif // OM_HGUARD_OMENQUIREINTERNAL_H