2 * @brief API for working with Xapian databases
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
7 * Copyright 2006,2008 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #ifndef XAPIAN_INCLUDED_DATABASE_H
26 #define XAPIAN_INCLUDED_DATABASE_H
28 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
29 # error Never use <xapian/database.h> directly; include <xapian.h> instead.
34 #ifdef XAPIAN_MOVE_SEMANTICS
39 #include <xapian/attributes.h>
40 #include <xapian/deprecated.h>
41 #include <xapian/intrusive_ptr.h>
42 #include <xapian/types.h>
43 #include <xapian/positioniterator.h>
44 #include <xapian/postingiterator.h>
45 #include <xapian/termiterator.h>
46 #include <xapian/valueiterator.h>
47 #include <xapian/visibility.h>
54 /** This class is used to access a database, or a group of databases.
56 * For searching, this class is used in conjunction with an Enquire object.
58 * @exception InvalidArgumentError will be thrown if an invalid
59 * argument is supplied, for example, an unknown database type.
61 * @exception DatabaseOpeningError may be thrown if the database cannot
62 * be opened (for example, a required file cannot be found).
64 * @exception DatabaseVersionError may be thrown if the database is in an
65 * unsupported format (for example, created by a newer version of Xapian
66 * which uses an incompatible format).
68 class XAPIAN_VISIBILITY_DEFAULT Database
{
69 /// @internal Implementation behind check() static methods.
70 static size_t check_(const std::string
* path_ptr
, int fd
, int opts
,
73 /// Internal helper behind public compact() methods.
74 void compact_(const std::string
* output_ptr
,
78 Xapian::Compactor
* compactor
) const;
82 /// @private @internal Reference counted internals.
83 std::vector
<Xapian::Internal::intrusive_ptr
<Internal
> > internal
;
85 /** Add an existing database (or group of databases) to those
86 * accessed by this object.
88 * @param database the database(s) to add.
90 void add_database(const Database
& database
);
92 /** Return number of shards in this Database object. */
94 return internal
.size();
97 /** Create a Database with no databases in.
101 /** Open a Database, automatically determining the database
104 * @param path directory that the database is stored in.
105 * @param flags Bitwise-or of Xapian::DB_* constants.
107 explicit Database(const std::string
&path
, int flags
= 0);
109 /** Open a single-file Database.
111 * This method opens a single-file Database given a file descriptor
112 * open on it. Xapian looks starting at the current file offset,
113 * allowing a single file database to be easily embedded within
116 * @param fd file descriptor for the file. Xapian takes ownership of
117 * this and will close it when the database is closed.
118 * @param flags Bitwise-or of Xapian::DB_* constants.
120 explicit Database(int fd
, int flags
= 0);
122 /** @private @internal Create a Database from its internals.
124 explicit Database(Internal
*internal
);
126 /** Destroy this handle on the database.
128 * If there are no copies of this object remaining, the database(s)
133 /** Copying is allowed. The internals are reference counted, so
136 * @param other The object to copy.
138 Database(const Database
&other
);
140 /** Assignment is allowed. The internals are reference counted,
141 * so assignment is cheap.
143 * @param other The object to copy.
145 void operator=(const Database
&other
);
147 #ifdef XAPIAN_MOVE_SEMANTICS
148 /// Move constructor.
149 Database(Database
&& o
);
151 /// Move assignment operator.
152 Database
& operator=(Database
&& o
);
155 /** Re-open the database.
157 * This re-opens the database(s) to the latest available version(s).
158 * It can be used either to make sure the latest results are returned,
159 * or to recover from a Xapian::DatabaseModifiedError.
161 * Calling reopen() on a database which has been closed (with @a
162 * close()) will always raise a Xapian::DatabaseError.
164 * @return true if the database might have been reopened (if false
165 * is returned, the database definitely hasn't been
166 * reopened, which applications may find useful when
167 * caching results, etc). In Xapian < 1.3.0, this method
168 * did not return a value.
172 /** Close the database.
174 * This closes the database and closes all its file handles.
176 * For a WritableDatabase, if a transaction is active it will be
177 * aborted, while if no transaction is active commit() will be
178 * implicitly called. Also the write lock is released.
180 * Closing a database cannot be undone - in particular, calling
181 * reopen() after close() will not reopen it, but will instead throw a
182 * Xapian::DatabaseError exception.
184 * Calling close() again on a database which has already been closed
185 * has no effect (and doesn't raise an exception).
187 * After close() has been called, calls to other methods of the
188 * database, and to methods of other objects associated with the
189 * database, will either:
191 * - behave exactly as they would have done if the database had not
192 * been closed (this can only happen if all the required data is
195 * - raise a Xapian::DatabaseError exception indicating that the
196 * database is closed.
198 * The reason for this behaviour is that otherwise we'd have to check
199 * that the database is still open on every method call on every
200 * object associated with a Database, when in many cases they are
201 * working on data which has already been loaded and so they are able
202 * to just behave correctly.
204 * This method was added in Xapian 1.1.0.
206 virtual void close();
208 /// Return a string describing this object.
209 virtual std::string
get_description() const;
211 /** An iterator pointing to the start of the postlist
214 * @param tname The termname to iterate postings for. If the
215 * term name is the empty string, the iterator
216 * returned will list all the documents in the
217 * database. Such an iterator will always return
218 * a WDF value of 1, since there is no obvious
219 * meaning for this quantity in this case.
221 PostingIterator
postlist_begin(const std::string
&tname
) const;
223 /** Corresponding end iterator to postlist_begin().
225 PostingIterator
XAPIAN_NOTHROW(postlist_end(const std::string
&) const) {
226 return PostingIterator();
229 /** An iterator pointing to the start of the termlist
230 * for a given document.
232 * @param did The document id of the document to iterate terms for.
234 TermIterator
termlist_begin(Xapian::docid did
) const;
236 /** Corresponding end iterator to termlist_begin().
238 TermIterator
XAPIAN_NOTHROW(termlist_end(Xapian::docid
) const) {
239 return TermIterator();
242 /** Does this database have any positional information? */
243 bool has_positions() const;
245 /** An iterator pointing to the start of the position list
246 * for a given term in a given document.
248 PositionIterator
positionlist_begin(Xapian::docid did
, const std::string
&tname
) const;
250 /** Corresponding end iterator to positionlist_begin().
252 PositionIterator
XAPIAN_NOTHROW(positionlist_end(Xapian::docid
, const std::string
&) const) {
253 return PositionIterator();
256 /** An iterator which runs across all terms with a given prefix.
258 * @param prefix The prefix to restrict the returned terms to (default:
261 TermIterator
allterms_begin(const std::string
& prefix
= std::string()) const;
263 /** Corresponding end iterator to allterms_begin(prefix).
265 TermIterator
XAPIAN_NOTHROW(allterms_end(const std::string
& = std::string()) const) {
266 return TermIterator();
269 /// Get the number of documents in the database.
270 Xapian::doccount
get_doccount() const;
272 /// Get the highest document id which has been used in the database.
273 Xapian::docid
get_lastdocid() const;
275 /// Get the average length of the documents in the database.
276 Xapian::doclength
get_avlength() const;
278 /** New name for get_avlength().
280 * Added for forward compatibility with the next release series.
284 double get_average_length() const { return get_avlength(); }
286 /** Get the total length of all the documents in the database.
288 * Added in Xapian 1.4.5.
290 Xapian::totallength
get_total_length() const;
292 /// Get the number of documents in the database indexed by a given term.
293 Xapian::doccount
get_termfreq(const std::string
& tname
) const;
295 /** Check if a given term exists in the database.
297 * @param tname The term to test the existence of.
299 * @return true if and only if the term exists in the database.
300 * This is the same as (get_termfreq(tname) != 0), but
301 * will often be more efficient.
303 bool term_exists(const std::string
& tname
) const;
305 /** Return the total number of occurrences of the given term.
307 * This is the sum of the number of occurrences of the term in each
308 * document it indexes: i.e., the sum of the within document
309 * frequencies of the term.
311 * @param tname The term whose collection frequency is being
314 Xapian::termcount
get_collection_freq(const std::string
& tname
) const;
316 /** Return the frequency of a given value slot.
318 * This is the number of documents which have a (non-empty) value
319 * stored in the slot.
321 * @param slot The value slot to examine.
323 Xapian::doccount
get_value_freq(Xapian::valueno slot
) const;
325 /** Get a lower bound on the values stored in the given value slot.
327 * If there are no values stored in the given value slot, this will
328 * return an empty string.
330 * @param slot The value slot to examine.
332 std::string
get_value_lower_bound(Xapian::valueno slot
) const;
334 /** Get an upper bound on the values stored in the given value slot.
336 * If there are no values stored in the given value slot, this will
337 * return an empty string.
339 * @param slot The value slot to examine.
341 std::string
get_value_upper_bound(Xapian::valueno slot
) const;
343 /** Get a lower bound on the length of a document in this DB.
345 * This bound does not include any zero-length documents.
347 Xapian::termcount
get_doclength_lower_bound() const;
349 /// Get an upper bound on the length of a document in this DB.
350 Xapian::termcount
get_doclength_upper_bound() const;
352 /// Get an upper bound on the wdf of term @a term.
353 Xapian::termcount
get_wdf_upper_bound(const std::string
& term
) const;
355 /// Return an iterator over the value in slot @a slot for each document.
356 ValueIterator
valuestream_begin(Xapian::valueno slot
) const;
358 /// Return end iterator corresponding to valuestream_begin().
359 ValueIterator
XAPIAN_NOTHROW(valuestream_end(Xapian::valueno
) const) {
360 return ValueIterator();
363 /// Get the length of a document.
364 Xapian::termcount
get_doclength(Xapian::docid did
) const;
366 /// Get the number of unique terms in document.
367 Xapian::termcount
get_unique_terms(Xapian::docid did
) const;
369 /** Send a "keep-alive" to remote databases to stop them timing out.
371 * Has no effect on non-remote databases.
375 /** Get a document from the database, given its document id.
377 * This method returns a Xapian::Document object which provides the
378 * information about a document.
380 * @param did The document id of the document to retrieve.
382 * @return A Xapian::Document object containing the document data
384 * @exception Xapian::DocNotFoundError The document specified
385 * could not be found in the database.
387 * @exception Xapian::InvalidArgumentError did was 0, which is not
388 * a valid document id.
390 Xapian::Document
get_document(Xapian::docid did
) const;
392 /** Get a document from the database, given its document id.
394 * This method returns a Xapian::Document object which provides the
395 * information about a document.
397 * @param did The document id of the document to retrieve.
398 * @param flags Zero or more flags bitwise-or-ed together (currently
399 * only Xapian::DOC_ASSUME_VALID is supported).
401 * @return A Xapian::Document object containing the document data
403 * @exception Xapian::DocNotFoundError The document specified
404 * could not be found in the database.
406 * @exception Xapian::InvalidArgumentError did was 0, which is not
407 * a valid document id.
409 Xapian::Document
get_document(Xapian::docid did
, unsigned flags
) const;
411 /** Suggest a spelling correction.
413 * @param word The potentially misspelled word.
414 * @param max_edit_distance Only consider words which are at most
415 * @a max_edit_distance edits from @a word. An edit is a
416 * character insertion, deletion, or the transposition of two
417 * adjacent characters (default is 2).
419 std::string
get_spelling_suggestion(const std::string
&word
,
420 unsigned max_edit_distance
= 2) const;
422 /** An iterator which returns all the spelling correction targets.
424 * This returns all the words which are considered as targets for the
425 * spelling correction algorithm. The frequency of each word is
426 * available as the term frequency of each entry in the returned
429 Xapian::TermIterator
spellings_begin() const;
431 /// Corresponding end iterator to spellings_begin().
432 Xapian::TermIterator
XAPIAN_NOTHROW(spellings_end() const) {
433 return Xapian::TermIterator();
436 /** An iterator which returns all the synonyms for a given term.
438 * @param term The term to return synonyms for.
440 Xapian::TermIterator
synonyms_begin(const std::string
&term
) const;
442 /// Corresponding end iterator to synonyms_begin(term).
443 Xapian::TermIterator
XAPIAN_NOTHROW(synonyms_end(const std::string
&) const) {
444 return Xapian::TermIterator();
447 /** An iterator which returns all terms which have synonyms.
449 * @param prefix If non-empty, only terms with this prefix are
452 Xapian::TermIterator
synonym_keys_begin(const std::string
&prefix
= std::string()) const;
454 /// Corresponding end iterator to synonym_keys_begin(prefix).
455 Xapian::TermIterator
XAPIAN_NOTHROW(synonym_keys_end(const std::string
& = std::string()) const) {
456 return Xapian::TermIterator();
459 /** Get the user-specified metadata associated with a given key.
461 * User-specified metadata allows you to store arbitrary information
462 * in the form of (key, value) pairs. See @a
463 * WritableDatabase::set_metadata() for more information.
465 * When invoked on a Xapian::Database object representing multiple
466 * databases, currently only the metadata for the first is considered
467 * but this behaviour may change in the future.
469 * If there is no piece of metadata associated with the specified
470 * key, an empty string is returned (this applies even for backends
471 * which don't support metadata).
473 * Empty keys are not valid, and specifying one will cause an
476 * @param key The key of the metadata item to access.
478 * @return The retrieved metadata item's value.
480 * @exception Xapian::InvalidArgumentError will be thrown if the
481 * key supplied is empty.
483 std::string
get_metadata(const std::string
& key
) const;
485 /** An iterator which returns all user-specified metadata keys.
487 * When invoked on a Xapian::Database object representing multiple
488 * databases, currently only the metadata for the first is considered
489 * but this behaviour may change in the future.
491 * If the backend doesn't support metadata, then this method returns
492 * an iterator which compares equal to that returned by
493 * metadata_keys_end().
495 * @param prefix If non-empty, only keys with this prefix are
498 * @exception Xapian::UnimplementedError will be thrown if the
499 * backend implements user-specified metadata, but
500 * doesn't implement iterating its keys (currently
501 * this happens for the InMemory backend).
503 Xapian::TermIterator
metadata_keys_begin(const std::string
&prefix
= std::string()) const;
505 /// Corresponding end iterator to metadata_keys_begin().
506 Xapian::TermIterator
XAPIAN_NOTHROW(metadata_keys_end(const std::string
& = std::string()) const) {
507 return Xapian::TermIterator();
510 /** Get a UUID for the database.
512 * The UUID will persist for the lifetime of the database.
514 * Replicas (eg, made with the replication protocol, or by copying all
515 * the database files) will have the same UUID. However, copies (made
516 * with copydatabase, or xapian-compact) will have different UUIDs.
518 * If the backend does not support UUIDs or this database has no
519 * subdatabases, the UUID will be empty.
521 * If this database has multiple sub-databases, the UUID string will
522 * contain the UUIDs of all the sub-databases.
524 std::string
get_uuid() const;
526 /** Test if this database is currently locked for writing.
528 * If the underlying object is actually a WritableDatabase, always
531 * Otherwise tests if there's a writer holding the lock (or if
532 * we can't test for a lock without taking it on the current platform,
533 * throw Xapian::UnimplementedError). If there's an error while
534 * trying to test the lock, throws Xapian::DatabaseLockError.
536 * For multi-databases, this tests each sub-database and returns
537 * true if any of them are locked.
541 /** Get the revision of the database.
543 * The revision is an unsigned integer which increases with each
546 * The database must have exactly one sub-database, which must be of
547 * type chert or glass. Otherwise an exception will be thrown.
550 * https://xapian.org/docs/deprecation#experimental-features
552 Xapian::rev
get_revision() const;
554 /** Check the integrity of a database or database table.
556 * @param path Path to database or table
557 * @param opts Options to use for check
558 * @param out std::ostream to write output to (NULL for no output)
560 static size_t check(const std::string
& path
, int opts
= 0,
561 std::ostream
*out
= NULL
) {
562 return check_(&path
, 0, opts
, out
);
565 /** Check the integrity of a single file database.
567 * @param fd file descriptor for the database. The current file
568 * offset is used, allowing checking a single file
569 * database which is embedded within another file. Xapian
570 * takes ownership of the file descriptor and will close
571 * it before returning.
572 * @param opts Options to use for check
573 * @param out std::ostream to write output to (NULL for no output)
575 static size_t check(int fd
, int opts
= 0, std::ostream
*out
= NULL
) {
576 return check_(NULL
, fd
, opts
, out
);
579 /** Produce a compact version of this database.
581 * New 1.3.4. Various methods of the Compactor class were deprecated
584 * @param output Path to write the compact version to.
585 * This can be the same as an input if that input is a
586 * stub database (in which case the database(s) listed
587 * in the stub will be compacted to a new database and
588 * then the stub will be atomically updated to point to
589 * this new database).
591 * @param flags Any of the following combined using bitwise-or (| in
593 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
594 * be renumbered the output - currently by applying the
595 * same offset to all the document ids in a particular
596 * source database. If this flag is specified, then this
597 * renumbering doesn't happen, but all the document ids
598 * must be unique over all source databases. Currently
599 * the ranges of document ids in each source must not
600 * overlap either, though this restriction may be removed
602 * - Xapian::DBCOMPACT_MULTIPASS
603 * If merging more than 3 databases, merge the postlists
604 * in multiple passes, which is generally faster but
605 * requires more disk space for temporary files.
606 * - Xapian::DBCOMPACT_SINGLE_FILE
607 * Produce a single-file database (only supported for
610 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
611 * - Xapian::Compactor::FULL - Split items whenever it saves
612 * space (the default).
613 * - Xapian::Compactor::FULLER - Allow oversize items to save
614 * more space (not recommended if you ever plan to update the
615 * compacted database).
617 * @param block_size This specifies the block size (in bytes) for
618 * to use for the output. For glass, the block size must
619 * be a power of 2 between 2048 and 65536 (inclusive), and
620 * the default (also used if an invalid value is passed)
623 void compact(const std::string
& output
,
625 int block_size
= 0) {
626 compact_(&output
, 0, flags
, block_size
, NULL
);
629 /** Produce a compact version of this database.
631 * New 1.3.4. Various methods of the Compactor class were deprecated
634 * This variant writes a single-file database to the specified file
635 * descriptor. Only the glass backend supports such databases, so
636 * this form is only supported for this backend.
638 * @param fd File descriptor to write the compact version to. The
639 * descriptor needs to be readable and writable (open with
640 * O_RDWR) and seekable. The current file offset is used,
641 * allowing compacting to a single file database embedded
642 * within another file. Xapian takes ownership of the
643 * file descriptor and will close it before returning.
645 * @param flags Any of the following combined using bitwise-or (| in
647 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
648 * be renumbered the output - currently by applying the
649 * same offset to all the document ids in a particular
650 * source database. If this flag is specified, then this
651 * renumbering doesn't happen, but all the document ids
652 * must be unique over all source databases. Currently
653 * the ranges of document ids in each source must not
654 * overlap either, though this restriction may be removed
656 * - Xapian::DBCOMPACT_MULTIPASS
657 * If merging more than 3 databases, merge the postlists
658 * in multiple passes, which is generally faster but
659 * requires more disk space for temporary files.
660 * - Xapian::DBCOMPACT_SINGLE_FILE
661 * Produce a single-file database (only supported for
662 * glass currently) - this flag is implied in this form
663 * and need not be specified explicitly.
665 * @param block_size This specifies the block size (in bytes) for
666 * to use for the output. For glass, the block size must
667 * be a power of 2 between 2048 and 65536 (inclusive), and
668 * the default (also used if an invalid value is passed)
673 int block_size
= 0) {
674 compact_(NULL
, fd
, flags
, block_size
, NULL
);
677 /** Produce a compact version of this database.
679 * New 1.3.4. Various methods of the Compactor class were deprecated
682 * The @a compactor functor allows handling progress output and
683 * specifying how user metadata is merged.
685 * @param output Path to write the compact version to.
686 * This can be the same as an input if that input is a
687 * stub database (in which case the database(s) listed
688 * in the stub will be compacted to a new database and
689 * then the stub will be atomically updated to point to
690 * this new database).
692 * @param flags Any of the following combined using bitwise-or (| in
694 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
695 * be renumbered the output - currently by applying the
696 * same offset to all the document ids in a particular
697 * source database. If this flag is specified, then this
698 * renumbering doesn't happen, but all the document ids
699 * must be unique over all source databases. Currently
700 * the ranges of document ids in each source must not
701 * overlap either, though this restriction may be removed
703 * - Xapian::DBCOMPACT_MULTIPASS
704 * If merging more than 3 databases, merge the postlists
705 * in multiple passes, which is generally faster but
706 * requires more disk space for temporary files.
707 * - Xapian::DBCOMPACT_SINGLE_FILE
708 * Produce a single-file database (only supported for
711 * @param block_size This specifies the block size (in bytes) for
712 * to use for the output. For glass, the block size must
713 * be a power of 2 between 2048 and 65536 (inclusive), and
714 * the default (also used if an invalid value is passed)
717 * @param compactor Functor
719 void compact(const std::string
& output
,
722 Xapian::Compactor
& compactor
)
724 compact_(&output
, 0, flags
, block_size
, &compactor
);
727 /** Produce a compact version of this database.
729 * New 1.3.4. Various methods of the Compactor class were deprecated
732 * The @a compactor functor allows handling progress output and
733 * specifying how user metadata is merged.
735 * This variant writes a single-file database to the specified file
736 * descriptor. Only the glass backend supports such databases, so
737 * this form is only supported for this backend.
739 * @param fd File descriptor to write the compact version to. The
740 * descriptor needs to be readable and writable (open with
741 * O_RDWR) and seekable. The current file offset is used,
742 * allowing compacting to a single file database embedded
743 * within another file. Xapian takes ownership of the
744 * file descriptor and will close it before returning.
746 * @param flags Any of the following combined using bitwise-or (| in
748 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
749 * be renumbered the output - currently by applying the
750 * same offset to all the document ids in a particular
751 * source database. If this flag is specified, then this
752 * renumbering doesn't happen, but all the document ids
753 * must be unique over all source databases. Currently
754 * the ranges of document ids in each source must not
755 * overlap either, though this restriction may be removed
757 * - Xapian::DBCOMPACT_MULTIPASS
758 * If merging more than 3 databases, merge the postlists
759 * in multiple passes, which is generally faster but
760 * requires more disk space for temporary files.
761 * - Xapian::DBCOMPACT_SINGLE_FILE
762 * Produce a single-file database (only supported for
763 * glass currently) - this flag is implied in this form
764 * and need not be specified explicitly.
766 * @param block_size This specifies the block size (in bytes) for
767 * to use for the output. For glass, the block size must
768 * be a power of 2 between 2048 and 65536 (inclusive), and
769 * the default (also used if an invalid value is passed)
772 * @param compactor Functor
777 Xapian::Compactor
& compactor
)
779 compact_(NULL
, fd
, flags
, block_size
, &compactor
);
783 /** This class provides read/write access to a database.
785 class XAPIAN_VISIBILITY_DEFAULT WritableDatabase
: public Database
{
787 /** Destroy this handle on the database.
789 * If no other handles to this database remain, the database will be
792 * If a transaction is active cancel_transaction() will be implicitly
793 * called; if no transaction is active commit() will be implicitly
794 * called, but any exception will be swallowed (because throwing
795 * exceptions in C++ destructors is problematic). If you aren't using
796 * transactions and want to know about any failure to commit changes,
797 * call commit() explicitly before the destructor gets called.
799 virtual ~WritableDatabase();
801 /** Create a WritableDatabase with no subdatabases.
803 * The created object isn't very useful in this state - it's intended
804 * as a placeholder value.
808 /** Open a database for update, automatically determining the database
811 * If the database is to be created, Xapian will try
812 * to create the directory indicated by path if it doesn't already
813 * exist (but only the leaf directory, not recursively).
815 * @param path directory that the database is stored in.
816 * @param flags one of:
817 * - Xapian::DB_CREATE_OR_OPEN open for read/write; create if no db
818 * exists (the default if flags isn't specified)
819 * - Xapian::DB_CREATE create new database; fail if db exists
820 * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing db; create if
822 * - Xapian::DB_OPEN open for read/write; fail if no db exists
824 * Additionally, the following flags can be combined with action
825 * using bitwise-or (| in C++):
827 * - Xapian::DB_NO_SYNC don't call fsync() or similar
828 * - Xapian::DB_FULL_SYNC try harder to ensure data is safe
829 * - Xapian::DB_DANGEROUS don't be crash-safe, no concurrent readers
830 * - Xapian::DB_NO_TERMLIST don't use a termlist table
831 * - Xapian::DB_RETRY_LOCK to wait to get a write lock
833 * @param block_size If a new database is created, this specifies
834 * the block size (in bytes) for backends which
835 * have such a concept. For chert and glass, the
836 * block size must be a power of 2 between 2048 and
837 * 65536 (inclusive), and the default (also used if
838 * an invalid value is passed) is 8192 bytes.
840 * @exception Xapian::DatabaseCorruptError will be thrown if the
841 * database is in a corrupt state.
843 * @exception Xapian::DatabaseLockError will be thrown if a lock
844 * couldn't be acquired on the database.
846 explicit WritableDatabase(const std::string
&path
,
850 /** @private @internal Create an WritableDatabase given its internals.
852 explicit WritableDatabase(Database::Internal
*internal
);
854 /** Copying is allowed. The internals are reference counted, so
857 * @param other The object to copy.
859 WritableDatabase(const WritableDatabase
&other
);
861 /** Assignment is allowed. The internals are reference counted,
862 * so assignment is cheap.
864 * Note that only an WritableDatabase may be assigned to an
865 * WritableDatabase: an attempt to assign a Database is caught
868 * @param other The object to copy.
870 void operator=(const WritableDatabase
&other
);
872 #ifdef XAPIAN_MOVE_SEMANTICS
873 /// Move constructor.
874 WritableDatabase(WritableDatabase
&& o
) : Database(std::move(o
)) {}
876 /// Move assignment operator.
877 WritableDatabase
& operator=(WritableDatabase
&& o
) {
878 Database::operator=(std::move(o
));
883 /** Add shards from another WritableDatabase.
885 * Any shards in @a other are added to the list of shards in this
886 * object. The shards are reference counted and also remain in
889 * @param other Another WritableDatabase object to add shards from
891 void add_database(const WritableDatabase
& other
) {
892 // This method is provided mainly so that adding a Database to a
893 // WritableDatabase is a compile-time error - prior to 1.4.19, it
894 // would essentially act as a "black-hole" shard which discarded
895 // any changes made to it.
896 Database::add_database(other
);
899 /** Commit any pending modifications made to the database.
901 * For efficiency reasons, when performing multiple updates to a
902 * database it is best (indeed, almost essential) to make as many
903 * modifications as memory will permit in a single pass through
904 * the database. To ensure this, Xapian batches up modifications.
906 * This method may be called at any time to commit any pending
907 * modifications to the database.
909 * If any of the modifications fail, an exception will be thrown and
910 * the database will be left in a state in which each separate
911 * addition, replacement or deletion operation has either been fully
912 * performed or not performed at all: it is then up to the
913 * application to work out which operations need to be repeated.
915 * It's not valid to call commit() within a transaction.
917 * Beware of calling commit() too frequently: this will make indexing
920 * Note that commit() need not be called explicitly: it will be called
921 * automatically when the database is closed, or when a sufficient
922 * number of modifications have been made. By default, this is every
923 * 10000 documents added, deleted, or modified. This value is rather
924 * conservative, and if you have a machine with plenty of memory,
925 * you can improve indexing throughput dramatically by setting
926 * XAPIAN_FLUSH_THRESHOLD in the environment to a larger value.
928 * This method was new in Xapian 1.1.0 - in earlier versions it was
931 * @exception Xapian::DatabaseError will be thrown if a problem occurs
932 * while modifying the database.
934 * @exception Xapian::DatabaseCorruptError will be thrown if the
935 * database is in a corrupt state.
939 /** Pre-1.1.0 name for commit().
941 * Use commit() instead.
943 XAPIAN_DEPRECATED(void flush()) { commit(); }
945 /** Begin a transaction.
947 * In Xapian a transaction is a group of modifications to the database
948 * which are linked such that either all will be applied
949 * simultaneously or none will be applied at all. Even in the case of
950 * a power failure, this characteristic should be preserved (as long
951 * as the filesystem isn't corrupted, etc).
953 * A transaction is started with begin_transaction() and can
954 * either be committed by calling commit_transaction() or aborted
955 * by calling cancel_transaction().
957 * By default, a transaction implicitly calls commit() before and
958 * after so that the modifications stand and fall without affecting
959 * modifications before or after.
961 * The downside of these implicit calls to commit() is that small
962 * transactions can harm indexing performance in the same way that
963 * explicitly calling commit() frequently can.
965 * If you're applying atomic groups of changes and only wish to
966 * ensure that each group is either applied or not applied, then
967 * you can prevent the automatic commit() before and after the
968 * transaction by starting the transaction with
969 * begin_transaction(false). However, if cancel_transaction is
970 * called (or if commit_transaction isn't called before the
971 * WritableDatabase object is destroyed) then any changes which
972 * were pending before the transaction began will also be discarded.
974 * Transactions aren't currently supported by the InMemory backend.
976 * @param flushed Is this a flushed transaction? By default
977 * transactions are "flushed", which means that
978 * committing a transaction will ensure those
979 * changes are permanently written to the
980 * database. By contrast, unflushed transactions
981 * only ensure that changes within the transaction
982 * are either all applied or all aren't.
984 * @exception Xapian::UnimplementedError will be thrown if transactions
985 * are not available for this database type.
987 * @exception Xapian::InvalidOperationError will be thrown if this is
988 * called at an invalid time, such as when a transaction
989 * is already in progress.
991 void begin_transaction(bool flushed
= true);
993 /** Complete the transaction currently in progress.
995 * If this method completes successfully and this is a flushed
996 * transaction, all the database modifications
997 * made during the transaction will have been committed to the
1000 * If an error occurs, an exception will be thrown, and none of
1001 * the modifications made to the database during the transaction
1002 * will have been applied to the database.
1004 * In all cases the transaction will no longer be in progress.
1006 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1007 * while modifying the database.
1009 * @exception Xapian::DatabaseCorruptError will be thrown if the
1010 * database is in a corrupt state.
1012 * @exception Xapian::InvalidOperationError will be thrown if a
1013 * transaction is not currently in progress.
1015 * @exception Xapian::UnimplementedError will be thrown if transactions
1016 * are not available for this database type.
1018 void commit_transaction();
1020 /** Abort the transaction currently in progress, discarding the
1021 * pending modifications made to the database.
1023 * If an error occurs in this method, an exception will be thrown,
1024 * but the transaction will be cancelled anyway.
1026 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1027 * while modifying the database.
1029 * @exception Xapian::DatabaseCorruptError will be thrown if the
1030 * database is in a corrupt state.
1032 * @exception Xapian::InvalidOperationError will be thrown if a
1033 * transaction is not currently in progress.
1035 * @exception Xapian::UnimplementedError will be thrown if transactions
1036 * are not available for this database type.
1038 void cancel_transaction();
1040 /** Add a new document to the database.
1042 * This method adds the specified document to the database,
1043 * returning a newly allocated document ID. Automatically allocated
1044 * document IDs come from a per-database monotonically increasing
1045 * counter, so IDs from deleted documents won't be reused.
1047 * If you want to specify the document ID to be used, you should
1048 * call replace_document() instead.
1050 * Note that changes to the database won't be immediately committed to
1051 * disk; see commit() for more details.
1053 * As with all database modification operations, the effect is
1054 * atomic: the document will either be fully added, or the document
1055 * fails to be added and an exception is thrown (possibly at a
1056 * later time when commit() is called or the database is closed).
1058 * @param document The new document to be added.
1060 * @return The document ID of the newly added document.
1062 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1063 * while writing to the database.
1065 * @exception Xapian::DatabaseCorruptError will be thrown if the
1066 * database is in a corrupt state.
1068 Xapian::docid
add_document(const Xapian::Document
& document
);
1070 /** Delete a document from the database.
1072 * This method removes the document with the specified document ID
1073 * from the database.
1075 * Note that changes to the database won't be immediately committed to
1076 * disk; see commit() for more details.
1078 * As with all database modification operations, the effect is
1079 * atomic: the document will either be fully removed, or the document
1080 * fails to be removed and an exception is thrown (possibly at a
1081 * later time when commit() is called or the database is closed).
1083 * @param did The document ID of the document to be removed.
1085 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1086 * while writing to the database.
1088 * @exception Xapian::DatabaseCorruptError will be thrown if the
1089 * database is in a corrupt state.
1091 void delete_document(Xapian::docid did
);
1093 /** Delete any documents indexed by a term from the database.
1095 * This method removes any documents indexed by the specified term
1096 * from the database.
1098 * A major use is for convenience when UIDs from another system are
1099 * mapped to terms in Xapian, although this method has other uses
1100 * (for example, you could add a "deletion date" term to documents at
1101 * index time and use this method to delete all documents due for
1102 * deletion on a particular date).
1104 * @param unique_term The term to remove references to.
1106 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1107 * while writing to the database.
1109 * @exception Xapian::DatabaseCorruptError will be thrown if the
1110 * database is in a corrupt state.
1112 void delete_document(const std::string
& unique_term
);
1114 /** Replace a given document in the database.
1116 * This method replaces the document with the specified document ID.
1117 * If document ID @a did isn't currently used, the document will be
1118 * added with document ID @a did.
1120 * The monotonic counter used for automatically allocating document
1121 * IDs is increased so that the next automatically allocated document
1122 * ID will be did + 1. Be aware that if you use this method to
1123 * specify a high document ID for a new document, and also use
1124 * WritableDatabase::add_document(), Xapian may get to a state where
1125 * this counter wraps around and will be unable to automatically
1126 * allocate document IDs!
1128 * Note that changes to the database won't be immediately committed to
1129 * disk; see commit() for more details.
1131 * As with all database modification operations, the effect is
1132 * atomic: the document will either be fully replaced, or the document
1133 * fails to be replaced and an exception is thrown (possibly at a
1134 * later time when commit() is called or the database is closed).
1136 * @param did The document ID of the document to be replaced.
1137 * @param document The new document.
1139 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1140 * while writing to the database.
1142 * @exception Xapian::DatabaseCorruptError will be thrown if the
1143 * database is in a corrupt state.
1145 void replace_document(Xapian::docid did
,
1146 const Xapian::Document
& document
);
1148 /** Replace any documents matching a term.
1150 * This method replaces any documents indexed by the specified term
1151 * with the specified document. If any documents are indexed by the
1152 * term, the lowest document ID will be used for the document,
1153 * otherwise a new document ID will be generated as for add_document.
1155 * One common use is to allow UIDs from another system to easily be
1156 * mapped to terms in Xapian. Note that this method doesn't
1157 * automatically add unique_term as a term, so you'll need to call
1158 * document.add_term(unique_term) first when using replace_document()
1161 * Note that changes to the database won't be immediately committed to
1162 * disk; see commit() for more details.
1164 * As with all database modification operations, the effect is
1165 * atomic: the document(s) will either be fully replaced, or the
1166 * document(s) fail to be replaced and an exception is thrown
1168 * later time when commit() is called or the database is closed).
1170 * @param unique_term The "unique" term.
1171 * @param document The new document.
1173 * @return The document ID that document was given.
1175 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1176 * while writing to the database.
1178 * @exception Xapian::DatabaseCorruptError will be thrown if the
1179 * database is in a corrupt state.
1181 Xapian::docid
replace_document(const std::string
& unique_term
,
1182 const Xapian::Document
& document
);
1184 /** Add a word to the spelling dictionary.
1186 * If the word is already present, its frequency is increased.
1188 * @param word The word to add.
1189 * @param freqinc How much to increase its frequency by (default 1).
1191 void add_spelling(const std::string
& word
,
1192 Xapian::termcount freqinc
= 1) const;
1194 /** Remove a word from the spelling dictionary.
1196 * The word's frequency is decreased, and if would become zero or less
1197 * then the word is removed completely.
1199 * @param word The word to remove.
1200 * @param freqdec How much to decrease its frequency by (default 1).
1202 void remove_spelling(const std::string
& word
,
1203 Xapian::termcount freqdec
= 1) const;
1205 /** Add a synonym for a term.
1207 * @param term The term to add a synonym for.
1208 * @param synonym The synonym to add. If this is already a
1209 * synonym for @a term, then no action is taken.
1211 void add_synonym(const std::string
& term
,
1212 const std::string
& synonym
) const;
1214 /** Remove a synonym for a term.
1216 * @param term The term to remove a synonym for.
1217 * @param synonym The synonym to remove. If this isn't currently
1218 * a synonym for @a term, then no action is taken.
1220 void remove_synonym(const std::string
& term
,
1221 const std::string
& synonym
) const;
1223 /** Remove all synonyms for a term.
1225 * @param term The term to remove all synonyms for. If the
1226 * term has no synonyms, no action is taken.
1228 void clear_synonyms(const std::string
& term
) const;
1230 /** Set the user-specified metadata associated with a given key.
1232 * This method sets the metadata value associated with a given key.
1233 * If there is already a metadata value stored in the database with
1234 * the same key, the old value is replaced. If you want to delete an
1235 * existing item of metadata, just set its value to the empty string.
1237 * User-specified metadata allows you to store arbitrary information
1238 * in the form of (key, value) pairs.
1240 * There's no hard limit on the number of metadata items, or the size
1241 * of the metadata values. Metadata keys have a limited length, which
1242 * depend on the backend. We recommend limiting them to 200 bytes.
1243 * Empty keys are not valid, and specifying one will cause an
1246 * Metadata modifications are committed to disk in the same way as
1247 * modifications to the documents in the database are: i.e.,
1248 * modifications are atomic, and won't be committed to disk
1249 * immediately (see commit() for more details). This allows metadata
1250 * to be used to link databases with versioned external resources
1251 * by storing the appropriate version number in a metadata item.
1253 * You can also use the metadata to store arbitrary extra information
1254 * associated with terms, documents, or postings by encoding the
1255 * termname and/or document id into the metadata key.
1257 * @param key The key of the metadata item to set.
1259 * @param metadata The value of the metadata item to set.
1261 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1262 * while writing to the database.
1264 * @exception Xapian::DatabaseCorruptError will be thrown if the
1265 * database is in a corrupt state.
1267 * @exception Xapian::InvalidArgumentError will be thrown if the
1268 * key supplied is empty.
1270 * @exception Xapian::UnimplementedError will be thrown if the
1271 * database backend in use doesn't support user-specified
1274 void set_metadata(const std::string
& key
, const std::string
& metadata
);
1276 /// Return a string describing this object.
1277 std::string
get_description() const;
1282 #endif /* XAPIAN_INCLUDED_DATABASE_H */