Factor out function to decode 2 hex digits
[xapian.git] / xapian-core / include / xapian / document.h
blob1fd1b19e61708c82cf3b7681b8444bab364f0469
1 /** @file
2 * @brief API for working with documents
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2006,2007,2009,2010,2011,2012,2013,2014,2018 Olly Betts
7 * Copyright 2009 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef XAPIAN_INCLUDED_DOCUMENT_H
26 #define XAPIAN_INCLUDED_DOCUMENT_H
28 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
29 # error Never use <xapian/document.h> directly; include <xapian.h> instead.
30 #endif
32 #include <string>
34 #include <xapian/attributes.h>
35 #include <xapian/intrusive_ptr.h>
36 #include <xapian/types.h>
37 #include <xapian/termiterator.h>
38 #include <xapian/valueiterator.h>
39 #include <xapian/visibility.h>
41 namespace Xapian {
43 /** A handle representing a document in a Xapian database.
45 * The Document class fetches information from the database lazily. Usually
46 * this behaviour isn't visible to users (except for the speed benefits), but
47 * if the document in the database is modified or deleted, then preexisting
48 * Document objects may return the old or new versions of data (or throw
49 * Xapian::DocNotFoundError in the case of deletion).
51 * Since Database objects work on a snapshot of the database's state, the
52 * situation above can only happen with a WritableDatabase object, or if
53 * you call Database::reopen() on a Database object.
55 * We recommend you avoid designs where this behaviour is an issue, but if
56 * you need a way to make a non-lazy version of a Document object, you can do
57 * this like so:
59 * doc = Xapian::Document::unserialise(doc.serialise());
61 class XAPIAN_VISIBILITY_DEFAULT Document {
62 public:
63 class Internal;
64 /// @private @internal Reference counted internals.
65 Xapian::Internal::intrusive_ptr<Internal> internal;
67 /** @private @internal Constructor is only used by internal classes.
69 * @param internal_ pointer to internal opaque class
71 explicit Document(Internal *internal_);
73 /** Copying is allowed. The internals are reference counted, so
74 * copying is cheap.
76 * @param other The object to copy.
78 Document(const Document &other);
80 /** Assignment is allowed. The internals are reference counted,
81 * so assignment is cheap.
83 * @param other The object to copy.
85 void operator=(const Document &other);
87 #ifdef XAPIAN_MOVE_SEMANTICS
88 /// Move constructor.
89 Document(Document&& o);
91 /// Move assignment operator.
92 Document& operator=(Document&& o);
93 #endif
95 /// Make a new empty Document
96 Document();
98 /// Destructor
99 ~Document();
101 /** Get value by number.
103 * Returns an empty string if no value with the given number is present
104 * in the document.
106 * @param slot The number of the value.
108 std::string get_value(Xapian::valueno slot) const;
110 /** Add a new value.
112 * The new value will replace any existing value with the same number
113 * (or if the new value is empty, it will remove any existing value
114 * with the same number).
116 * @param slot The value slot to add the value in.
117 * @param value The value to set.
119 void add_value(Xapian::valueno slot, const std::string &value);
121 /// Remove any value with the given number.
122 void remove_value(Xapian::valueno slot);
124 /// Remove all values associated with the document.
125 void clear_values();
127 /** Get data stored in the document.
129 * This is potentially a relatively expensive operation, and shouldn't
130 * normally be used during the match (e.g. in a PostingSource or match
131 * decider functor. Put data for use by match deciders in a value
132 * instead.
134 std::string get_data() const;
136 /** Set data stored in the document.
138 * Xapian treats the data as an opaque blob. It may try to compress
139 * it, but other than that it will just store it and return it when
140 * requested.
142 * @param data The data to store.
144 void set_data(const std::string &data);
146 /** Add an occurrence of a term at a particular position.
148 * Multiple occurrences of the term at the same position are
149 * represented only once in the positional information, but do
150 * increase the wdf.
152 * If the term is not already in the document, it will be added to
153 * it.
155 * @param tname The name of the term.
156 * @param tpos The position of the term.
157 * @param wdfinc The increment that will be applied to the wdf
158 * for this term.
160 void add_posting(const std::string & tname,
161 Xapian::termpos tpos,
162 Xapian::termcount wdfinc = 1);
164 /** Add a term to the document, without positional information.
166 * Any existing positional information for the term will be left
167 * unmodified.
169 * @param tname The name of the term.
170 * @param wdfinc The increment that will be applied to the wdf
171 * for this term (default: 1).
173 void add_term(const std::string & tname, Xapian::termcount wdfinc = 1);
175 /** Add a boolean filter term to the document.
177 * This method adds @a term to the document with wdf of 0 -
178 * this is generally what you want for a term used for boolean
179 * filtering as the wdf of such terms is ignored, and it doesn't
180 * make sense for them to contribute to the document's length.
182 * If the specified term already indexes this document, this method
183 * has no effect.
185 * It is exactly the same as add_term(term, 0).
187 * This method was added in Xapian 1.0.18.
189 * @param term The term to add.
191 void add_boolean_term(const std::string & term) { add_term(term, 0); }
193 /** Remove a posting of a term from the document.
195 * Note that the term will still index the document even if all
196 * occurrences are removed. To remove a term from a document
197 * completely, use remove_term().
199 * @param tname The name of the term.
200 * @param tpos The position of the term.
201 * @param wdfdec The decrement that will be applied to the wdf
202 * when removing this posting. The wdf will not go
203 * below the value of 0.
205 * @exception Xapian::InvalidArgumentError will be thrown if the term
206 * is not at the position specified in the position list for this term
207 * in this document.
209 * @exception Xapian::InvalidArgumentError will be thrown if the term
210 * is not in the document
212 void remove_posting(const std::string & tname,
213 Xapian::termpos tpos,
214 Xapian::termcount wdfdec = 1);
216 /** Remove a range of postings for a term.
218 * Any instances of the term at positions >= @a term_pos_first and
219 * <= @a term_pos_last will be removed, and the wdf reduced by
220 * @a wdf_dec for each instance removed (the wdf will not ever go
221 * below zero though).
223 * It's OK if the term doesn't occur in the range of positions
224 * specified (unlike @a remove_posting()). And if
225 * term_pos_first > term_pos_last, this method does nothing.
227 * @return The number of postings removed.
229 * @exception Xapian::InvalidArgumentError will be thrown if the term
230 * is not in the document
232 * @since Added in Xapian 1.4.8.
234 Xapian::termpos remove_postings(const std::string& term,
235 Xapian::termpos term_pos_first,
236 Xapian::termpos term_pos_last,
237 Xapian::termcount wdf_dec = 1);
239 /** Remove a term and all postings associated with it.
241 * @param tname The name of the term.
243 * @exception Xapian::InvalidArgumentError will be thrown if the term
244 * is not in the document
246 void remove_term(const std::string & tname);
248 /// Remove all terms (and postings) from the document.
249 void clear_terms();
251 /** The length of the termlist - i.e. the number of different terms
252 * which index this document.
254 Xapian::termcount termlist_count() const;
256 /// Iterator for the terms in this document.
257 TermIterator termlist_begin() const;
259 /// Equivalent end iterator for termlist_begin().
260 TermIterator XAPIAN_NOTHROW(termlist_end() const) {
261 return TermIterator();
264 /// Count the values in this document.
265 Xapian::termcount values_count() const;
267 /// Iterator for the values in this document.
268 ValueIterator values_begin() const;
270 /// Equivalent end iterator for values_begin().
271 ValueIterator XAPIAN_NOTHROW(values_end() const) {
272 return ValueIterator();
275 /** Get the document id which is associated with this document (if any).
277 * NB If multiple databases are being searched together, then this
278 * will be the document id in the individual database, not the merged
279 * database!
281 * @return If this document came from a database, return the document
282 * id in that database. Otherwise, return 0 (in Xapian
283 * 1.0.22/1.2.4 or later; prior to this the returned value was
284 * uninitialised).
286 docid get_docid() const;
288 /** Serialise document into a string.
290 * The document representation may change between Xapian releases:
291 * even between minor versions. However, it is guaranteed not to
292 * change if the remote database protocol has not changed between
293 * releases.
295 std::string serialise() const;
297 /** Unserialise a document from a string produced by serialise().
299 static Document unserialise(const std::string &serialised);
301 /// Return a string describing this object.
302 std::string get_description() const;
307 #endif // XAPIAN_INCLUDED_DOCUMENT_H