1 /** @file omenquireinternal.h
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7 * Copyright 2009 Lemur Consulting Ltd
8 * Copyright 2011 Action Without Borders
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
39 #include <unordered_map>
41 #include "weight/weightinternal.h"
54 /** An item resulting from a query.
55 * This item contains the document id, and the weight calculated for
60 MSetItem(double wt_
, Xapian::docid did_
)
61 : wt(wt_
), did(did_
), collapse_count(0) {}
63 MSetItem(double wt_
, Xapian::docid did_
, const string
&key_
)
64 : wt(wt_
), did(did_
), collapse_key(key_
), collapse_count(0) {}
66 MSetItem(double wt_
, Xapian::docid did_
, const string
&key_
,
67 Xapian::doccount collapse_count_
)
68 : wt(wt_
), did(did_
), collapse_key(key_
),
69 collapse_count(collapse_count_
) {}
71 void swap(MSetItem
& o
) {
73 std::swap(did
, o
.did
);
74 std::swap(collapse_key
, o
.collapse_key
);
75 std::swap(collapse_count
, o
.collapse_count
);
76 std::swap(sort_key
, o
.sort_key
);
79 /** Weight calculated. */
85 /** Value which was used to collapse upon.
87 * If the collapse option is not being used, this will always
90 * If the collapse option is in use, this will contain the collapse
91 * key's value for this particular item. If the key is not present
92 * for this item, the value will be a null string. Only one instance
93 * of each key value (apart from the null string) will be present in
94 * the items in the returned Xapian::MSet.
98 /** Count of collapses done on collapse_key so far
100 * This is normally 0, and goes up for each collapse done
101 * It is not necessarily an indication of how many collapses
102 * might be done if an exhaustive match was done
104 Xapian::doccount collapse_count
;
106 /** Used when sorting by value. */
109 /// Return a string describing this object.
110 string
get_description() const;
115 /** Internals of enquire system.
116 * This allows the implementation of Xapian::Enquire to be hidden and reference
119 class Enquire::Internal
: public Xapian::Internal::intrusive_base
{
120 friend class MSet::Internal
;
122 /// The database which this enquire object uses.
123 const Xapian::Database db
;
125 /// The user's query.
128 /// The query length.
132 Internal(const Internal
&);
133 /// Assignment not allowed
134 void operator=(const Internal
&);
137 typedef enum { REL
, VAL
, VAL_REL
, REL_VAL
} sort_setting
;
139 Xapian::valueno collapse_key
;
141 Xapian::doccount collapse_max
;
143 Xapian::Enquire::docid_order order
;
147 double weight_cutoff
;
149 Xapian::valueno sort_key
;
150 sort_setting sort_by
;
151 bool sort_value_forward
;
153 Xapian::Internal::opt_intrusive_ptr
<KeyMaker
> sorter
;
157 /** The weight to use for this query.
159 * This is mutable so that the default BM25Weight object can be
160 * created lazily when first required.
162 mutable Weight
* weight
;
164 /// The weighting scheme to use for query expansion.
165 std::string eweightname
;
167 /// The parameter required for TradWeight query expansion.
170 vector
<Xapian::Internal::opt_intrusive_ptr
<MatchSpy
>> spies
;
172 explicit Internal(const Xapian::Database
&databases
);
175 /** Request a document from the database.
177 void request_doc(const Xapian::Internal::MSetItem
&item
) const;
179 /** Read a previously requested document from the database.
181 Xapian::Document
read_doc(const Xapian::Internal::MSetItem
&item
) const;
183 Xapian::Document
get_document(const Xapian::Internal::MSetItem
&item
) const;
185 void set_query(const Query
& query_
, termcount qlen_
);
186 const Query
& get_query() const;
187 MSet
get_mset(Xapian::doccount first
, Xapian::doccount maxitems
,
188 Xapian::doccount check_at_least
,
190 const MatchDecider
*mdecider
) const;
192 ESet
get_eset(Xapian::termcount maxitems
, const RSet
& omrset
, int flags
,
193 const ExpandDecider
*edecider
, double min_wt
) const;
195 TermIterator
get_matching_terms(Xapian::docid did
) const;
196 TermIterator
get_matching_terms(const Xapian::MSetIterator
&it
) const;
198 Xapian::doccount
get_termfreq(const string
&tname
) const;
200 string
get_description() const;
203 class MSet::Internal
: public Xapian::Internal::intrusive_base
{
205 /// Factor to multiply weights by to convert them to percentages.
206 double percent_factor
;
209 /** The set of documents which have been requested but not yet
212 mutable set
<Xapian::doccount
> requested_docs
;
214 /// Cache of documents, indexed by MSet index.
215 mutable map
<Xapian::doccount
, Xapian::Document
> indexeddocs
;
217 /// Read and cache the documents so far requested.
218 void read_docs() const;
221 Internal(const Internal
&);
222 /// Assignment not allowed
223 void operator=(const Internal
&);
225 mutable std::unordered_map
<std::string
, double> snippet_bg_relevance
;
228 /// Xapian::Enquire reference, for getting documents.
229 Xapian::Internal::intrusive_ptr
<const Enquire::Internal
> enquire
;
231 /** Provides the term frequency and weight for each term in the query. */
232 Xapian::Weight::Internal
* stats
;
234 /// A list of items comprising the (selected part of the) MSet.
235 vector
<Xapian::Internal::MSetItem
> items
;
237 /// Rank of first item in MSet.
238 Xapian::doccount firstitem
;
240 Xapian::doccount matches_lower_bound
;
242 Xapian::doccount matches_estimated
;
244 Xapian::doccount matches_upper_bound
;
246 Xapian::doccount uncollapsed_lower_bound
;
248 Xapian::doccount uncollapsed_estimated
;
250 Xapian::doccount uncollapsed_upper_bound
;
260 matches_lower_bound(0),
261 matches_estimated(0),
262 matches_upper_bound(0),
263 uncollapsed_lower_bound(0),
264 uncollapsed_estimated(0),
265 uncollapsed_upper_bound(0),
269 /// Note: destroys parameter items.
270 Internal(Xapian::doccount firstitem_
,
271 Xapian::doccount matches_upper_bound_
,
272 Xapian::doccount matches_lower_bound_
,
273 Xapian::doccount matches_estimated_
,
274 Xapian::doccount uncollapsed_upper_bound_
,
275 Xapian::doccount uncollapsed_lower_bound_
,
276 Xapian::doccount uncollapsed_estimated_
,
277 double max_possible_
,
278 double max_attained_
,
279 vector
<Xapian::Internal::MSetItem
> &items_
,
280 double percent_factor_
)
281 : percent_factor(percent_factor_
),
283 firstitem(firstitem_
),
284 matches_lower_bound(matches_lower_bound_
),
285 matches_estimated(matches_estimated_
),
286 matches_upper_bound(matches_upper_bound_
),
287 uncollapsed_lower_bound(uncollapsed_lower_bound_
),
288 uncollapsed_estimated(uncollapsed_estimated_
),
289 uncollapsed_upper_bound(uncollapsed_upper_bound_
),
290 max_possible(max_possible_
),
291 max_attained(max_attained_
) {
292 std::swap(items
, items_
);
295 ~Internal() { delete stats
; }
297 /// get a document by index in MSet, via the cache.
298 Xapian::Document
get_doc_by_index(Xapian::doccount index
) const;
300 /// Converts a weight to a percentage weight
301 int convert_to_percent_internal(double wt
) const;
303 std::string
snippet(const std::string
& text
, size_t length
,
304 const Xapian::Stem
& stemmer
,
306 const std::string
& hi_start
,
307 const std::string
& hi_end
,
308 const std::string
& omit
) const;
310 /// Return a string describing this object.
311 string
get_description() const;
313 /** Fetch items specified into the document cache.
315 void fetch_items(Xapian::doccount first
, Xapian::doccount last
) const;
318 class RSet::Internal
: public Xapian::Internal::intrusive_base
{
319 friend class Xapian::RSet
;
322 /// Items in the relevance set.
323 set
<Xapian::docid
> items
;
326 const set
<Xapian::docid
> & get_items() const { return items
; }
328 /// Return a string describing this object.
329 string
get_description() const;
334 #endif // OM_HGUARD_OMENQUIREINTERNAL_H