2 * @brief An indexed database of documents
4 /* Copyright 2003-2024 Olly Betts
5 * Copyright 2007,2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 #ifndef XAPIAN_INCLUDED_DATABASE_H
24 #define XAPIAN_INCLUDED_DATABASE_H
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/database.h> directly; include <xapian.h> instead.
32 #include <string_view>
36 #include <xapian/attributes.h>
37 #include <xapian/constants.h>
38 #include <xapian/intrusive_ptr.h>
39 #include <xapian/positioniterator.h>
40 #include <xapian/postingiterator.h>
41 #include <xapian/termiterator.h>
42 #include <xapian/types.h>
43 #include <xapian/valueiterator.h>
44 #include <xapian/visibility.h>
50 class WritableDatabase
;
52 /** An indexed database of documents.
54 * A Database object contains zero or more shards, and operations are
55 * performed across these shards.
57 * To perform a search on a Database, you need to use an Enquire object.
59 * @since 1.5.0 This class is a reference counted handle like many other
60 * Xapian API classes. In earlier versions, it worked like a typedef
61 * to std::vector<database_shard>. The key difference is that
62 * previously copying or assigning a Xapian::Database made a deep copy,
63 * whereas now it makes a shallow copy.
65 * Most methods can throw:
67 * @exception Xapian::DatabaseCorruptError if database corruption is detected
68 * @exception Xapian::DatabaseError in various situation (for example, if
69 * there's an I/O error).
70 * @exception Xapian::DatabaseModifiedError if the revision being read has
72 * @exception Xapian::DatabaseClosedError may be thrown by some methods after
73 * after @a close() has been called
74 * @exception Xapian::NetworkError when remote databases are in use
76 class XAPIAN_VISIBILITY_DEFAULT Database
{
77 /// @internal Implementation behind check() static methods.
78 static size_t check_(const std::string_view
* path_ptr
,
83 /// @internal Implementation behind public compact() methods.
84 void compact_(const std::string_view
* output_ptr
,
88 Xapian::Compactor
* compactor
) const;
91 /// @private @internal Implementation behind public add_database() methods.
92 void add_database_(const Database
& other
, bool read_only
);
95 /// Class representing the Database internals.
97 /// @private @internal Reference counted internals.
98 Xapian::Internal::intrusive_ptr_nonnull
<Internal
> internal
;
100 /** Add shards from another Database.
102 * Any shards in @a other are appended to the list of shards in this
103 * object. The shards are reference counted and also remain in @a other.
105 * @param other Another Database to add shards from
107 * @exception Xapian::InvalidArgumentError if @a other is the same object
110 void add_database(const Database
& other
) {
111 add_database_(other
, true);
114 /** Return number of shards in this Database object. */
117 /** Construct a Database containing no shards.
119 * You can then add shards by calling add_database(). A Database
120 * containing no shards can also be useful in situations where you need an
127 * @param path Filing system path to open database from
128 * @param flags Bitwise-or of Xapian::DB_* constants
130 * The @a path can be a file (for a stub database or a single-file glass
131 * database) or a directory (for a standard glass database). If
132 * @a flags includes @a DB_BACKEND_INMEMORY then @a path is ignored.
134 * @exception Xapian::DatabaseOpeningError if the specified database
136 * @exception Xapian::DatabaseVersionError if the specified database has
137 * a format too old or too new to be supported.
139 explicit Database(std::string_view path
, int flags
= 0);
141 /** Open a single-file Database.
143 * This method opens a single-file Database given a file descriptor open
144 * on it. Xapian looks starting at the current file offset, allowing a
145 * single file database to be easily embedded within another file.
147 * @param fd File descriptor for the file. Xapian takes ownership
148 * of this and will close it when the database is closed.
149 * @param flags Bitwise-or of Xapian::DB_* constants.
151 * @exception Xapian::DatabaseOpeningError if the specified database
153 * @exception Xapian::DatabaseVersionError if the specified database has
154 * a format too old or too new to be supported.
156 explicit Database(int fd
, int flags
= 0);
158 /// @private @internal Wrap an existing Internal.
159 XAPIAN_VISIBILITY_INTERNAL
160 explicit Database(Internal
* internal
) XAPIAN_NONNULL();
165 /** Copy constructor.
167 * The internals are reference counted, so copying is cheap.
169 Database(const Database
& o
);
171 /** Assignment operator.
173 * The internals are reference counted, so assignment is cheap.
175 Database
& operator=(const Database
& o
);
177 /// Move constructor.
178 Database(Database
&& o
);
180 /// Move assignment operator.
181 Database
& operator=(Database
&& o
);
183 /** Reopen the database at the latest available revision.
185 * Xapian databases (at least with most backends) support versioning
186 * such that a Database object uses a snapshot of the database.
187 * However, write operations may cause this snapshot to be discarded,
188 * which can cause Xapian::DatabaseModifiedError to be thrown. You
189 * can recover from this situation by calling reopen() and restarting
190 * the search operation.
192 * All shards are updated to the latest available revision. This should
193 * be a cheap operation if they're already at the latest revision, so
194 * if you're using the same Database object for many searches it's
195 * reasonable to call reopen() before each search.
197 * @return true if one or more shards have moved to a newer revision
198 * (if false is returned then it's definitely the case that no
199 * shards were reopened, which applications may find useful when
200 * caching results, etc). In Xapian < 1.3.0, this method did not
203 * @exception Xapian::DatabaseError is thrown if close() has been called
204 * on any of the shards.
208 /** Close the database.
210 * This closes the database and closes all its file handles.
212 * For a WritableDatabase, if a transaction is active it will be aborted,
213 * while if no transaction is active commit() will be implicitly called.
214 * Also the write lock is released.
216 * Calling close() on an object cannot be undone - in particular, a
217 * subsequent call to reopen() on the same object will not reopen it, but
218 * will instead throw a Xapian::DatabaseClosedError exception.
220 * Calling close() again on an object which has already been closed has
221 * no effect (and doesn't raise an exception).
223 * After close() has been called, calls to other methods of the database,
224 * and to methods of other objects associated with the database, will
227 * - behave exactly as they would have done if the database had not been
228 * closed (this can only happen if all the required data is cached)
230 * - raise a Xapian::DatabaseClosedError exception.
232 * The reason for this behaviour is that otherwise we'd have to check that
233 * the database is still open on every method call on every object
234 * associated with a Database, when in many cases they are working on data
235 * which has already been loaded and so they are able to just behave
238 * @since This method was added in Xapian 1.1.0.
242 /// Return a string describing this object.
243 virtual std::string
get_description() const;
245 /** Start iterating the postings of a term.
247 * @param term The term to iterate the postings of. An empty string
248 * acts as a special pseudo-term which indexes all the
249 * documents in the database with a wdf of 1.
251 PostingIterator
postlist_begin(std::string_view term
) const;
253 /** End iterator corresponding to postlist_begin(). */
254 PostingIterator
postlist_end(std::string_view
) const noexcept
{
255 return PostingIterator();
258 /** Start iterating the terms in a document.
260 * @param did The document id to iterate terms from
262 * The terms are returned in ascending string order (by byte value).
264 TermIterator
termlist_begin(Xapian::docid did
) const;
266 /** End iterator corresponding to termlist_begin(). */
267 TermIterator
termlist_end(Xapian::docid
) const noexcept
{
268 return TermIterator();
271 /** Does this database have any positional information? */
272 bool has_positions() const;
274 /** Start iterating positions for a term in a document.
276 * @param did The document id of the document
277 * @param term The term
279 * @since 1.1.0 If the specified document doesn't exist or the
280 * specified term doesn't exist in the specified document,
281 * then a valid iterator is still returned, but it will be
282 * equal to positionlist_end().
284 PositionIterator
positionlist_begin(Xapian::docid did
,
285 std::string_view term
) const;
287 /** End iterator corresponding to positionlist_begin(). */
288 PositionIterator
positionlist_end(Xapian::docid
,
289 std::string_view
) const noexcept
{
290 return PositionIterator();
293 /** Start iterating all terms in the database with a given prefix.
295 * The terms are returned in ascending string order (by byte value).
297 * @param prefix The prefix to restrict the returned terms to (default:
300 TermIterator
allterms_begin(std::string_view prefix
= {}) const;
302 /** End iterator corresponding to allterms_begin(prefix). */
303 TermIterator
allterms_end(std::string_view
= {}) const noexcept
305 return TermIterator();
308 /// Get the number of documents in the database.
309 Xapian::doccount
get_doccount() const;
311 /// Get the highest document id which has been used in the database.
312 Xapian::docid
get_lastdocid() const;
314 /// Get the mean document length in the database.
315 double get_average_length() const;
317 /// Old name for get_average_length() for backward compatibility.
318 double get_avlength() const { return get_average_length(); }
320 /** Get the total length of all the documents in the database.
322 * @since Added in Xapian 1.4.5.
324 Xapian::totallength
get_total_length() const;
326 /** Get the number of documents indexed by a specified term.
328 * @param term The term to get the frequency of. An empty string
329 * acts as a special pseudo-term which indexes all the
330 * documents in the database, so returns get_doccount().
331 * If the term isn't present in the database, 0 is
334 Xapian::doccount
get_termfreq(std::string_view term
) const;
336 /** Test is a particular term is present in any document.
338 * @param term The term to test for. An empty string acts as a
339 * special pseudo-term which indexes all the documents in
340 * the database, so returns true if the database contains
343 * db.term_exists(t) gives the same answer as db.get_termfreq(t) != 0, but
344 * is typically more efficient.
346 bool term_exists(std::string_view term
) const;
348 /** Get the total number of occurrences of a specified term.
350 * The collection frequency of a term is defined as the total number of
351 * times it occurs in the database, which is the sum of its wdf in all the
352 * documents it indexes.
354 * @param term The term to get the collection frequency of. An empty
355 * string acts as a special pseudo-term which indexes all
356 * the documents in the database, so returns
357 * get_doccount(). If the term isn't present in the
358 * database, 0 is returned.
360 Xapian::termcount
get_collection_freq(std::string_view term
) const;
362 /** Return the frequency of a given value slot.
364 * This is the number of documents which have a (non-empty) value stored
367 * @param slot The value slot to examine.
369 Xapian::doccount
get_value_freq(Xapian::valueno slot
) const;
371 /** Get a lower bound on the values stored in the given value slot.
373 * If there are no values stored in the given value slot, this will return
376 * @param slot The value slot to examine.
378 std::string
get_value_lower_bound(Xapian::valueno slot
) const;
380 /** Get an upper bound on the values stored in the given value slot.
382 * If there are no values stored in the given value slot, this will return
385 * @param slot The value slot to examine.
387 std::string
get_value_upper_bound(Xapian::valueno slot
) const;
389 /** Get a lower bound on the length of a document in this DB.
391 * This bound does not include any zero-length documents.
393 Xapian::termcount
get_doclength_lower_bound() const;
395 /// Get an upper bound on the length of a document in this DB.
396 Xapian::termcount
get_doclength_upper_bound() const;
398 /// Get an upper bound on the wdf of term @a term.
399 Xapian::termcount
get_wdf_upper_bound(std::string_view term
) const;
401 /// Get a lower bound on the unique terms size of a document in this DB.
402 Xapian::termcount
get_unique_terms_lower_bound() const;
404 /// Get an upper bound on the unique terms size of a document in this DB.
405 Xapian::termcount
get_unique_terms_upper_bound() const;
407 /// Return an iterator over the value in slot @a slot for each document.
408 ValueIterator
valuestream_begin(Xapian::valueno slot
) const;
410 /// Return end iterator corresponding to valuestream_begin().
411 ValueIterator
valuestream_end(Xapian::valueno
) const noexcept
{
412 return ValueIterator();
415 /** Get the length of a document.
417 * @param did The document id of the document
419 * Xapian defines a document's length as the sum of the wdf of all the
420 * terms which index it.
422 Xapian::termcount
get_doclength(Xapian::docid did
) const;
424 /** Get the number of unique terms in a document.
426 * @param did The document id of the document
428 * This is the number of different terms which index the given document.
430 Xapian::termcount
get_unique_terms(Xapian::docid did
) const;
432 Xapian::termcount
get_wdfdocmax(Xapian::docid did
) const;
434 /** Send a keep-alive message.
436 * For remote databases, this method sends a message to the server to
437 * reset the timeout timer. As well as preventing timeouts at the Xapian
438 * remote protocol level, this message will also avoid timeouts at lower
441 * For local databases, this method does nothing.
445 /** Get a document from the database.
447 * The returned object acts as a handle which lazily fetches information
448 * about the specified document from the database.
450 * @param did The document ID of the document to be get
451 * @param flags Zero or more flags bitwise-or-ed together (currently
452 * only Xapian::DOC_ASSUME_VALID is supported).
455 * @exception Xapian::InvalidArgumentError is thrown if @a did is 0.
457 * @exception Xapian::DocNotFoundError is thrown if the specified docid
458 * is not present in this database.
460 Xapian::Document
get_document(Xapian::docid did
,
461 unsigned flags
= 0) const;
463 /** Suggest a spelling correction.
465 * @param word The potentially misspelled word.
466 * @param max_edit_distance Only consider words which are at most
467 * @a max_edit_distance edits from @a
468 * word. An edit is a character
469 * insertion, deletion, or the
470 * transposition of two adjacent
471 * characters (default is 2).
473 std::string
get_spelling_suggestion(std::string_view word
,
474 unsigned max_edit_distance
= 2) const;
476 /** An iterator which returns all the spelling correction targets.
478 * This returns all the words which are considered as targets for the
479 * spelling correction algorithm. The frequency of each word is available
480 * as the term frequency of each entry in the returned iterator.
482 Xapian::TermIterator
spellings_begin() const;
484 /// End iterator corresponding to spellings_begin().
485 Xapian::TermIterator
spellings_end() const noexcept
{
486 return Xapian::TermIterator();
489 /** An iterator which returns all the synonyms for a given term.
491 * @param term The term to return synonyms for.
493 Xapian::TermIterator
synonyms_begin(std::string_view term
) const;
495 /// End iterator corresponding to synonyms_begin(term).
496 Xapian::TermIterator
synonyms_end(std::string_view
) const noexcept
{
497 return Xapian::TermIterator();
500 /** An iterator which returns all terms which have synonyms.
502 * @param prefix If non-empty, only terms with this prefix are returned.
504 Xapian::TermIterator
synonym_keys_begin(std::string_view prefix
= {}) const;
506 /// End iterator corresponding to synonym_keys_begin(prefix).
508 synonym_keys_end(std::string_view
= {}) const noexcept
{
509 return Xapian::TermIterator();
512 /** Get the user-specified metadata associated with a given key.
514 * User-specified metadata allows you to store arbitrary information in
515 * the form of (key, value) pairs. See @a
516 * WritableDatabase::set_metadata() for more information.
518 * When invoked on a Xapian::Database object representing multiple
519 * databases, currently only the metadata for the first is considered but
520 * this behaviour may change in the future.
522 * If there is no piece of metadata associated with the specified key, an
523 * empty string is returned (this applies even for backends which don't
526 * Empty keys are not valid, and specifying one will cause an exception.
528 * @param key The key of the metadata item to access.
530 * @return The retrieved metadata item's value.
532 * @exception Xapian::InvalidArgumentError will be thrown if the key
535 std::string
get_metadata(std::string_view key
) const;
537 /** An iterator which returns all user-specified metadata keys.
539 * When invoked on a Xapian::Database object representing multiple
540 * databases, currently only the metadata for the first is considered but
541 * this behaviour may change in the future.
543 * If the backend doesn't support metadata, then this method returns an
544 * iterator which compares equal to that returned by metadata_keys_end().
546 * @param prefix If non-empty, only keys with this prefix are returned.
548 * @exception Xapian::UnimplementedError will be thrown if the backend
549 * implements user-specified metadata, but doesn't implement
550 * iterating its keys (currently this happens for the InMemory
554 metadata_keys_begin(std::string_view prefix
= {}) const;
556 /// End iterator corresponding to metadata_keys_begin().
558 metadata_keys_end(std::string_view
= {}) const noexcept
{
559 return Xapian::TermIterator();
562 /** Get a UUID for the database.
564 * The UUID will persist for the lifetime of the database.
566 * Replicas (eg, made with the replication protocol, or by copying all the
567 * database files) will have the same UUID. However, copies (made with
568 * copydatabase, or xapian-compact) will have different UUIDs.
570 * If the backend does not support UUIDs or this database has no
571 * subdatabases, the UUID will be empty.
573 * If this database has multiple sub-databases, the UUID string will
574 * contain the UUIDs of all the sub-databases separated by colons.
576 std::string
get_uuid() const;
578 /** Test if this database is currently locked for writing.
580 * If the underlying object is actually a WritableDatabase, always returns
581 * true unless close() has been called.
583 * Otherwise tests if there's a writer holding the lock (or if we can't
584 * test for a lock without taking it on the current platform, throw
585 * Xapian::UnimplementedError). If there's an error while trying to test
586 * the lock, throws Xapian::DatabaseLockError.
588 * For multi-databases, this tests each sub-database and returns true if
589 * any of them are locked.
593 /** Lock a read-only database for writing.
595 * If the database is actually already writable (i.e. a WritableDatabase
596 * via a Database reference) then the same database is returned (with
597 * its flags updated, so this provides an efficient way to modify flags
598 * on an open WritableDatabase).
600 * Unlike unlock(), the object this is called on remains open.
602 * @param flags The flags to use for the writable database. Flags which
603 * specify how to open the database are ignored (e.g.
604 * DB_CREATE_OR_OVERWRITE doesn't result in the database
605 * being wiped), and flags which specify the backend are
606 * also ignored as they are only relevant when creating
609 * @return A WritableDatabase object open on the same database.
611 Xapian::WritableDatabase
lock(int flags
= 0);
613 /** Release a database write lock.
615 * If called on a read-only database then the same database is returned.
617 * If called on a writable database, the object this method was called
620 * @return A Database object open on the same database.
622 Xapian::Database
unlock();
624 /** Get the revision of the database.
626 * The revision is an unsigned integer which increases with each commit.
628 * The database must have exactly one sub-database, which must be of type
629 * glass. Otherwise an exception will be thrown.
632 * https://xapian.org/docs/deprecation#experimental-features
634 Xapian::rev
get_revision() const;
636 /** Check the integrity of a database or database table.
638 * @param path Path to database or table
639 * @param opts Options to use for check
640 * @param out std::ostream to write output to (NULL for no output)
642 static size_t check(std::string_view path
,
644 std::ostream
* out
= NULL
) {
645 return check_(&path
, 0, opts
, out
);
648 /** Check the integrity of a single file database.
650 * @param fd file descriptor for the database. The current file
651 * offset is used, allowing checking a single file
652 * database which is embedded within another file. Xapian
653 * takes ownership of the file descriptor and will close
654 * it before returning.
655 * @param opts Options to use for check
656 * @param out std::ostream to write output to (NULL for no output)
658 static size_t check(int fd
, int opts
= 0, std::ostream
* out
= NULL
) {
659 return check_(NULL
, fd
, opts
, out
);
662 /** Produce a compact version of this database.
664 * @param output Path to write the compact version to. This can be the
665 * same as an input if that input is a stub database (in
666 * which case the database(s) listed in the stub will be
667 * compacted to a new database and then the stub will be
668 * atomically updated to point to this new database).
670 * @param flags Any of the following combined using bitwise-or (| in
672 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
673 * be renumbered the output - currently by applying the same
674 * offset to all the document ids in a particular source database.
675 * If this flag is specified, then this renumbering doesn't
676 * happen, but all the document ids must be unique over all source
677 * databases. Currently the ranges of document ids in each source
678 * must not overlap either, though this restriction may be removed
680 * - Xapian::DBCOMPACT_MULTIPASS
681 * If merging more than 3 databases, merge the postlists in
682 * multiple passes, which is generally faster but requires more
683 * disk space for temporary files.
684 * - Xapian::DBCOMPACT_SINGLE_FILE
685 * Produce a single-file database (only supported for glass
688 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
689 * - Xapian::Compactor::FULL - Split items whenever it saves space
691 * - Xapian::Compactor::FULLER - Allow oversize items to save more
692 * space (not recommended if you ever
693 * plan to update the compacted
695 * - At most one of the following to specify the output format (currently
696 * only glass to honey conversion is supported, and all shards of the
697 * input must have the same format):
698 * - Xapian::DB_BACKEND_HONEY
700 * @param block_size This specifies the block size (in bytes) for to
701 * use for the output. For glass, the block size
702 * must be a power of 2 between 2048 and 65536
703 * (inclusive), and the default (also used if an
704 * invalid value is passed) is 8192 bytes.
706 * @since 1.3.4 This method was added to replace various methods of the
709 void compact(std::string_view output
,
711 int block_size
= 0) {
712 compact_(&output
, 0, flags
, block_size
, NULL
);
715 /** Produce a compact version of this database.
717 * This variant writes a single-file database to the specified file
718 * descriptor. Only the glass backend supports such databases, so
719 * this form is only supported for this backend.
721 * @param fd File descriptor to write the compact version to. The
722 * descriptor needs to be readable and writable (open with
723 * O_RDWR) and seekable. The current file offset is used,
724 * allowing compacting to a single file database embedded
725 * within another file. Xapian takes ownership of the
726 * file descriptor and will close it before returning.
728 * @param flags Any of the following combined using bitwise-or (| in
730 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
731 * be renumbered the output - currently by applying the same
732 * offset to all the document ids in a particular source database.
733 * If this flag is specified, then this renumbering doesn't
734 * happen, but all the document ids must be unique over all source
735 * databases. Currently the ranges of document ids in each source
736 * must not overlap either, though this restriction may be removed
738 * - Xapian::DBCOMPACT_MULTIPASS
739 * If merging more than 3 databases, merge the postlists in
740 * multiple passes, which is generally faster but requires more
741 * disk space for temporary files.
742 * - Xapian::DBCOMPACT_SINGLE_FILE
743 * Produce a single-file database (only supported for glass
746 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
747 * - Xapian::Compactor::FULL - Split items whenever it saves space
749 * - Xapian::Compactor::FULLER - Allow oversize items to save more
750 * space (not recommended if you ever
751 * plan to update the compacted
754 * @param block_size This specifies the block size (in bytes) for to
755 * use for the output. For glass, the block size
756 * must be a power of 2 between 2048 and 65536
757 * (inclusive), and the default (also used if an
758 * invalid value is passed) is 8192 bytes.
760 * @since 1.3.4 This method was added to replace various methods of the
765 int block_size
= 0) {
766 compact_(NULL
, fd
, flags
, block_size
, NULL
);
769 /** Produce a compact version of this database.
771 * The @a compactor functor allows handling progress output and
772 * specifying how user metadata is merged.
774 * @param output Path to write the compact version to.
775 * This can be the same as an input if that input is a
776 * stub database (in which case the database(s) listed
777 * in the stub will be compacted to a new database and
778 * then the stub will be atomically updated to point to
779 * this new database).
781 * @param flags Any of the following combined using bitwise-or (| in
783 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
784 * be renumbered the output - currently by applying the same
785 * offset to all the document ids in a particular source database.
786 * If this flag is specified, then this renumbering doesn't
787 * happen, but all the document ids must be unique over all source
788 * databases. Currently the ranges of document ids in each source
789 * must not overlap either, though this restriction may be removed
791 * - Xapian::DBCOMPACT_MULTIPASS
792 * If merging more than 3 databases, merge the postlists in
793 * multiple passes, which is generally faster but requires more
794 * disk space for temporary files.
795 * - Xapian::DBCOMPACT_SINGLE_FILE
796 * Produce a single-file database (only supported for glass
799 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
800 * - Xapian::Compactor::FULL - Split items whenever it saves space
802 * - Xapian::Compactor::FULLER - Allow oversize items to save more
803 * space (not recommended if you ever
804 * plan to update the compacted
807 * @param block_size This specifies the block size (in bytes) for to
808 * use for the output. For glass, the block size
809 * must be a power of 2 between 2048 and 65536
810 * (inclusive), and the default (also used if an
811 * invalid value is passed) is 8192 bytes.
813 * @param compactor Functor
815 * @since 1.3.4 This method was added to replace various methods of the
818 void compact(std::string_view output
,
821 Xapian::Compactor
& compactor
)
823 compact_(&output
, 0, flags
, block_size
, &compactor
);
826 /** Produce a compact version of this database.
828 * The @a compactor functor allows handling progress output and specifying
829 * how user metadata is merged.
831 * This variant writes a single-file database to the specified file
832 * descriptor. Only the glass backend supports such databases, so this
833 * form is only supported for this backend.
835 * @param fd File descriptor to write the compact version to. The
836 * descriptor needs to be readable and writable (open with
837 * O_RDWR) and seekable. The current file offset is used,
838 * allowing compacting to a single file database embedded
839 * within another file. Xapian takes ownership of the
840 * file descriptor and will close it before returning.
842 * @param flags Any of the following combined using bitwise-or (| in
844 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
845 * be renumbered the output - currently by applying the same
846 * offset to all the document ids in a particular source database.
847 * If this flag is specified, then this renumbering doesn't
848 * happen, but all the document ids must be unique over all source
849 * databases. Currently the ranges of document ids in each source
850 * must not overlap either, though this restriction may be removed
852 * - Xapian::DBCOMPACT_MULTIPASS
853 * If merging more than 3 databases, merge the postlists in
854 * multiple passes, which is generally faster but requires more
855 * disk space for temporary files.
856 * - Xapian::DBCOMPACT_SINGLE_FILE
857 * Produce a single-file database (only supported for glass
860 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
861 * - Xapian::Compactor::FULL - Split items whenever it saves space
863 * - Xapian::Compactor::FULLER - Allow oversize items to save more
864 * space (not recommended if you ever
865 * plan to update the compacted
868 * @param block_size This specifies the block size (in bytes) for to
869 * use for the output. For glass, the block size
870 * must be a power of 2 between 2048 and 65536
871 * (inclusive), and the default (also used if an
872 * invalid value is passed) is 8192 bytes.
874 * @param compactor Functor
876 * @since 1.3.4 This method was added to replace various methods of the
882 Xapian::Compactor
& compactor
)
884 compact_(NULL
, fd
, flags
, block_size
, &compactor
);
887 /** Reconstruct document text.
889 * This uses term positional information to reconstruct the document text
890 * which was indexed. Reading the required positional information is
891 * potentially quite I/O intensive.
893 * The reconstructed text will be missing punctuation and most
896 * @param did The document id of the document to reconstruct
897 * @param length Number of bytes of text to aim for - note that
898 * slightly more may be returned (default: 0 meaning
900 * @param prefix Term prefix to reconstruct (default: none)
901 * @param start_pos First position to reconstruct (default: 0)
902 * @param end_pos Last position to reconstruct (default: 0 meaning all)
904 std::string
reconstruct_text(Xapian::docid did
,
906 std::string_view prefix
= {},
907 Xapian::termpos start_pos
= 0,
908 Xapian::termpos end_pos
= 0) const;
911 /** This class provides read/write access to a database.
913 * A WritableDatabase object contains zero or more shards, and operations are
914 * performed across these shards. Documents added by add_document() are
915 * stored to the shards in a round-robin fashion.
917 * @since 1.5.0 This class is a reference counted handle like many other
918 * Xapian API classes. In earlier versions, it worked like a typedef
919 * to std::vector<database_shard>. The key difference is that
920 * previously copying or assigning a Xapian::Database made a deep copy,
921 * whereas now it makes a shallow copy.
923 * Most methods can throw:
925 * @exception Xapian::DatabaseCorruptError if database corruption is detected
926 * @exception Xapian::DatabaseError in various situation (for example, calling
927 * methods after @a close() has been called)
928 * @exception Xapian::NetworkError when remote databases are in use
930 class XAPIAN_VISIBILITY_DEFAULT WritableDatabase
: public Database
{
931 /** @internal @private Helper method which implements cancel_transaction()
932 * and commit_transaction().
934 * @param do_commit If true, then commit, else cancel.
936 void end_transaction_(bool do_commit
);
939 /** Create a WritableDatabase with no subdatabases.
941 * The created object isn't very useful in this state - it's intended
942 * as a placeholder value.
944 WritableDatabase() : Database() {}
946 /** Add shards from another WritableDatabase.
948 * Any shards in @a other are added to the list of shards in this object.
949 * The shards are reference counted and also remain in @a other.
951 * @param other Another WritableDatabase to add shards from
953 * @exception Xapian::InvalidArgumentError if @a other is the same object
956 void add_database(const WritableDatabase
& other
) {
957 // This method is provided mainly so that adding a Database to a
958 // WritableDatabase is a compile-time error - prior to 1.5.0, it
959 // would essentially act as a "black-hole" shard which discarded
960 // any changes made to it.
961 add_database_(other
, false);
964 /** Create or open a Xapian database for both reading and writing.
966 * @param path Filing system path for the database. If creating a
967 * new database with a backend which uses a directory of
968 * files (such as glass does by default) then Xapian will
969 * create a directory for @a path if necessary (but the
970 * parent directory must already exist).
972 * @param flags A bitwise-or (| in C++) combination of:
974 * * at most one of the following constants indicating how to handle
975 * the database already existing or not (the default action is
976 * Xapian::DB_CREATE_OR_OPEN):
978 * Constant | DB exists | DB doesn't exist
979 * ------------------------------ | --------- | ------------------
980 * Xapian::DB_CREATE_OR_OPEN | open | create
981 * Xapian::DB_CREATE | fail | create
982 * Xapian::DB_CREATE_OR_OVERWRITE | overwrite | create
983 * Xapian::DB_OPEN | open | fail
985 * * at most one of the follow constants indicating which backend to
986 * use when creating a new database, ignored when opening or overwriting
987 * an existing database (default: currently Xapian::DB_BACKEND_GLASS):
990 * ------------------------------ | -----------------------
991 * Xapian::DB_BACKEND_GLASS | Create a glass database
992 * Xapian::DB_BACKEND_CHERT | Create a chert database
993 * Xapian::DB_BACKEND_INMEMORY | Create inmemory DB (ignores @a path)
995 * * any number of the following flags:
997 * - Xapian::DB_NO_SYNC don't call fsync() or similar
998 * - Xapian::DB_FULL_SYNC try harder to ensure data is safe
999 * - Xapian::DB_DANGEROUS don't be crash-safe, no concurrent readers
1000 * - Xapian::DB_NO_TERMLIST don't use a termlist table
1001 * - Xapian::DB_RETRY_LOCK to wait to get a write lock
1003 * @param block_size The block size in bytes to use when creating a
1004 * new database. This is ignored when opening an
1005 * existing database, and by backends which don't
1006 * have the concept of a block size. The glass
1007 * backend allows block sizes which are a power of
1008 * 2 between 2048 and 65536 (inclusive) and its
1009 * default (also used instead of an invalid value)
1012 * @exception Xapian::DatabaseLockError is thrown if the database's
1013 * write lock could not be acquired.
1014 * @exception Xapian::DatabaseOpeningError if the specified database
1016 * @exception Xapian::DatabaseVersionError if the specified database has
1017 * a format too old or too new to be supported.
1019 explicit WritableDatabase(std::string_view path
,
1021 int block_size
= 0);
1023 /** @private @internal Create a WritableDatabase given its internals. */
1024 XAPIAN_VISIBILITY_INTERNAL
1025 explicit WritableDatabase(Database::Internal
* internal_
)
1026 : Database(internal_
) {}
1028 /** Copy constructor.
1030 * The internals are reference counted, so copying is cheap.
1032 WritableDatabase(const WritableDatabase
& o
) : Database(o
) {}
1034 /** Assignment operator.
1036 * The internals are reference counted, so assignment is cheap.
1038 WritableDatabase
& operator=(const WritableDatabase
& o
) {
1039 Database::operator=(o
);
1043 /// Move constructor.
1044 WritableDatabase(WritableDatabase
&& o
) : Database(std::move(o
)) {}
1046 /// Move assignment operator.
1047 WritableDatabase
& operator=(WritableDatabase
&& o
) {
1048 Database::operator=(std::move(o
));
1052 /** Commit pending modifications.
1054 * Updates to a Xapian database are more efficient when applied in bulk,
1055 * so by default Xapian stores modifications in memory until a threshold
1056 * is exceeded and then they are committed to disk.
1058 * When the database is closed (by an explicit call to close() or its
1059 * destructor being called) then commit() is implicitly called unless
1060 * a transaction is active.
1062 * You can force any such pending modifications to be committed by calling
1063 * this method, but bear in mind that the batching happens for a reason
1064 * and calling commit() a lot is likely to slow down indexing.
1066 * If the commit operation succeeds then the changes are reliably written
1067 * to disk and available to readers. If the commit operation fails, then
1068 * any pending modifications are discarded.
1070 * However, note that if called on a sharded database, atomicity isn't
1071 * guaranteed between shards - it's possible for the changes to one
1072 * shard to be committed but changes to another shard to fail.
1074 * It's not valid to call commit() within a transaction - see
1075 * begin_transaction() for more details of how transactions work in
1078 * Currently batched modifications are automatically committed every
1079 * 10000 documents added, deleted, or modified. This value is rather
1080 * conservative, and if you have a machine with plenty of memory,
1081 * you can improve indexing throughput dramatically by setting
1082 * XAPIAN_FLUSH_THRESHOLD in the environment to a larger value.
1084 * @since This method was new in Xapian 1.1.0 - in earlier versions it
1085 * was called flush().
1089 /** Begin a transaction.
1091 * A Xapian transaction is a set of consecutive modifications to be
1092 * committed as an atomic unit - in any committed revision of the
1093 * database either none are present or they all are.
1095 * However, note that if called on a sharded database, atomicity isn't
1096 * guaranteed between shards. Within each shard, the transaction will
1097 * still act atomically.
1099 * A transaction is started with begin_transaction() and can either be
1100 * completed by calling commit_transaction() or aborted by calling
1101 * cancel_transaction().
1103 * Closing the database (by an explicit call to close() or by its
1104 * destructor being called) when a transaction is active will implicitly
1105 * call cancel_transaction() to abort the transaction and discard the
1108 * By default, commit() is implicitly called by begin_transaction() and
1109 * commit_transaction() so that the changes in the transaction are
1110 * committed or not independent of changes before or after it.
1112 * The downside of these implicit calls to commit() is that small
1113 * transactions can harm indexing performance in the same way that
1114 * explicitly calling commit() frequently can.
1116 * If you're applying atomic groups of changes and only wish to ensure
1117 * that each group is either applied or not applied, then you can prevent
1118 * the automatic commit() before and after the transaction by starting the
1119 * transaction with begin_transaction(false). However, if
1120 * cancel_transaction() is called (or if commit_transaction() isn't called
1121 * before the WritableDatabase object is destroyed) then any changes which
1122 * were pending before the transaction began will also be discarded.
1124 * @param flushed Is this a flushed transaction? By default transactions
1125 * are "flushed", which means that committing a
1126 * transaction will ensure those changes are permanently
1127 * written to the database. By contrast, unflushed
1128 * transactions only ensure that changes within the
1129 * transaction are either all applied or all aren't.
1131 * @exception Xapian::UnimplementedError is thrown if this is an InMemory
1132 * database, which don't currently support transactions.
1133 * @exception Xapian::InvalidOperationError will be thrown if a transaction
1134 * is already active.
1136 void begin_transaction(bool flushed
= true);
1138 /** Complete the transaction currently in progress.
1140 * If the transaction was begun as a flushed transaction then the changes
1141 * in it have been committed to the database upon successful completion
1144 * If an exception is thrown, then the changes in the transaction will be
1145 * discarded (if the transaction was not begun as a flushed transaction,
1146 * any changes made but not committed before begin_transaction() will also
1149 * In all cases the transaction will no longer be in progress.
1151 * Note that if called on a sharded database, atomicity isn't guaranteed
1152 * between shards. Within each shard, the transaction will still act
1155 * @exception Xapian::UnimplementedError is thrown if this is an InMemory
1156 * database, which don't currently support transactions.
1157 * @exception Xapian::InvalidOperationError is thrown if no transaction
1160 void commit_transaction() { end_transaction_(true); }
1162 /** Abort the transaction currently in progress.
1164 * Changes made within the current transaction will be discarded (if the
1165 * transaction was not begun as a flushed transaction, any changes made
1166 * but not committed before begin_transaction() will also be discarded).
1168 * @exception Xapian::UnimplementedError is thrown if this is an InMemory
1169 * database, which don't currently support transactions.
1170 * @exception Xapian::InvalidOperationError is thrown if no transaction
1173 void cancel_transaction() { end_transaction_(false); }
1175 /** Add a document to the database.
1177 * The document is allocated document ID (get_lastdocid() + 1) - the
1178 * next highest document ID which has never previously been used by
1179 * this database (so docids from deleted documents won't be reused).
1181 * If you want to specify the document ID to be used, you should
1182 * call replace_document() instead.
1184 * If a transaction is active, the document addition is added to the
1185 * transaction; otherwise it is added to the current batch of changes.
1186 * Either way, it won't be visible to readers right away (unless we're
1187 * not in a transaction and the addition triggers an automatic commit).
1189 * @param doc The Document object to be added.
1191 * @return The document ID allocated to the document.
1193 Xapian::docid
add_document(const Xapian::Document
& doc
);
1195 /** Delete a document from the database.
1197 * This method removes the document with the specified document ID
1198 * from the database.
1200 * If a transaction is active, the document removal is added to the
1201 * transaction; otherwise it is added to the current batch of changes.
1202 * Either way, it won't be visible to readers right away (unless we're
1203 * not in a transaction and the addition triggers an automatic commit).
1205 * @param did The document ID of the document to be removed.
1207 void delete_document(Xapian::docid did
);
1209 /** Delete any documents indexed by a term from the database.
1211 * This method removes any documents indexed by the specified term
1212 * from the database.
1214 * A major use is for convenience when UIDs from another system are
1215 * mapped to terms in Xapian, although this method has other uses
1216 * (for example, you could add a "deletion date" term to documents at
1217 * index time and use this method to delete all documents due for
1218 * deletion on a particular date).
1220 * @param unique_term The term to remove references to.
1222 * @since 1.5.0 The changes made by this method are made atomically.
1223 * Previously automatic commits could happen during the
1226 void delete_document(std::string_view unique_term
);
1228 /** Replace a document in the database.
1230 * This method replaces the document with the specified document ID.
1231 * If document ID @a did isn't currently used, the document will be
1232 * added with document ID @a did.
1234 * The monotonic counter used for automatically allocating document
1235 * IDs is increased so that the next automatically allocated document
1236 * ID will be did + 1. Be aware that if you use this method to
1237 * specify a high document ID for a new document, and also use
1238 * WritableDatabase::add_document(), Xapian may get to a state where
1239 * this counter wraps around and will be unable to automatically
1240 * allocate document IDs!
1242 * Note that changes to the database won't be immediately committed to
1243 * disk; see commit() for more details.
1245 * @param did The document ID of the document to be replaced.
1246 * @param document The new document.
1248 void replace_document(Xapian::docid did
, const Xapian::Document
& document
);
1250 /** Replace any documents matching a term.
1252 * This method replaces any documents indexed by the specified term
1253 * with the specified document. If any documents are indexed by the
1254 * term, the lowest document ID will be used for the document,
1255 * otherwise a new document ID will be generated as for add_document.
1257 * One common use is to allow UIDs from another system to easily be
1258 * mapped to terms in Xapian. Note that this method doesn't
1259 * automatically add unique_term as a term, so you'll need to call
1260 * document.add_term(unique_term) first when using replace_document()
1263 * Note that changes to the database won't be immediately committed to
1264 * disk; see commit() for more details.
1266 * @param unique_term The "unique" term.
1267 * @param document The new document.
1269 * @return The document ID used by the new document. If term existed
1270 * in the database, this will be the first document ID that
1271 * was indexed by that term; otherwise the database allocates
1272 * (get_lastdocid() + 1) as it does for add_document().
1274 * @since 1.5.0 The changes made by this method are made atomically.
1275 * Previously automatic commits could happen during the
1278 Xapian::docid
replace_document(std::string_view unique_term
,
1279 const Xapian::Document
& document
);
1281 /** Add a word to the spelling dictionary.
1283 * If the word is already present, its frequency is increased.
1285 * @param word The word to add.
1286 * @param freqinc How much to increase its frequency by (default 1).
1288 void add_spelling(std::string_view word
,
1289 Xapian::termcount freqinc
= 1) const;
1291 /** Remove a word from the spelling dictionary.
1293 * The word's frequency is decreased, and if would become zero or less
1294 * then the word is removed completely.
1296 * @param word The word to remove.
1297 * @param freqdec How much to decrease its frequency by (default 1).
1299 * @return Any "unused" freqdec (if the word's frequency was less than
1300 * freqdec then the difference is returned, else 0 is returned).
1301 * Prior to 1.5.0 this method had void return type.
1303 termcount
remove_spelling(std::string_view word
,
1304 termcount freqdec
= 1) const;
1306 /** Add a synonym for a term.
1308 * @param term The term to add a synonym for.
1309 * @param synonym The synonym to add. If this is already a synonym for
1310 * @a term, then no action is taken.
1312 void add_synonym(std::string_view term
,
1313 std::string_view synonym
) const;
1315 /** Remove a synonym for a term.
1317 * @param term The term to remove a synonym for.
1318 * @param synonym The synonym to remove. If this isn't currently a
1319 * synonym for @a term, then no action is taken.
1321 void remove_synonym(std::string_view term
,
1322 std::string_view synonym
) const;
1324 /** Remove all synonyms for a term.
1326 * @param term The term to remove all synonyms for. If the term has
1327 * no synonyms, no action is taken.
1329 void clear_synonyms(std::string_view term
) const;
1331 /** Set the user-specified metadata associated with a given key.
1333 * This method sets the metadata value associated with a given key. If
1334 * there is already a metadata value stored in the database with the same
1335 * key, the old value is replaced. If you want to delete an existing item
1336 * of metadata, just set its value to the empty string.
1338 * User-specified metadata allows you to store arbitrary information in
1339 * the form of (key, value) pairs.
1341 * There's no hard limit on the number of metadata items, or the size of
1342 * the metadata values. Metadata keys have a limited length, which depend
1343 * on the backend. We recommend limiting them to 200 bytes. Empty keys
1344 * are not valid, and specifying one will cause an exception.
1346 * Metadata modifications are committed to disk in the same way as
1347 * modifications to the documents in the database are: i.e., modifications
1348 * are atomic, and won't be committed to disk immediately (see commit()
1349 * for more details). This allows metadata to be used to link databases
1350 * with versioned external resources by storing the appropriate version
1351 * number in a metadata item.
1353 * You can also use the metadata to store arbitrary extra information
1354 * associated with terms, documents, or postings by encoding the termname
1355 * and/or document id into the metadata key.
1357 * @param key The key of the metadata item to set.
1359 * @param metadata The value of the metadata item to set.
1361 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1362 * while writing to the database.
1364 * @exception Xapian::DatabaseCorruptError will be thrown if the database
1365 * is in a corrupt state.
1367 * @exception Xapian::InvalidArgumentError will be thrown if the key
1368 * supplied is empty.
1370 * @exception Xapian::UnimplementedError will be thrown if the database
1371 * backend in use doesn't support user-specified metadata.
1373 void set_metadata(std::string_view key
, std::string_view metadata
);
1375 /// Return a string describing this object.
1376 std::string
get_description() const;
1381 #endif // XAPIAN_INCLUDED_DATABASE_H