Factor out function to decode 2 hex digits
[xapian.git] / xapian-core / include / xapian / compactor.h
blob3c9c9cd39b15d008db0c62b35dd7db9152d87483
1 /** @file
2 * @brief Compact a database, or merge and compact several.
3 */
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2015,2018 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #ifndef XAPIAN_INCLUDED_COMPACTOR_H
24 #define XAPIAN_INCLUDED_COMPACTOR_H
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/compactor.h> directly; include <xapian.h> instead.
28 #endif
30 #include <xapian/constants.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/visibility.h>
34 #include <string>
36 namespace Xapian {
38 class Database;
40 /** Compact a database, or merge and compact several.
42 class XAPIAN_VISIBILITY_DEFAULT Compactor {
43 public:
44 /// Class containing the implementation.
45 class Internal;
47 /** Compaction level. */
48 typedef enum {
49 /** Don't split items unnecessarily. */
50 STANDARD = 0,
51 /** Split items whenever it saves space (the default). */
52 FULL = 1,
53 /** Allow oversize items to save more space (not recommended if you
54 * ever plan to update the compacted database). */
55 FULLER = 2
56 } compaction_level;
58 private:
59 /// @internal Reference counted internals.
60 Xapian::Internal::intrusive_ptr<Internal> internal;
62 void set_flags_(unsigned flags, unsigned mask = 0);
64 public:
65 Compactor();
67 virtual ~Compactor();
69 /** Set the block size to use for tables in the output database.
71 * @param block_size The block size to use. Valid block sizes are
72 * currently powers of two between 2048 and 65536,
73 * with the default being 8192, but the valid
74 * sizes and default may change in the future.
76 XAPIAN_DEPRECATED(void set_block_size(size_t block_size));
78 /** Set whether to preserve existing document id values.
80 * @param renumber The default is true, which means that document ids will
81 * be renumbered - currently by applying the same offset
82 * to all the document ids in a particular source
83 * database.
85 * If false, then the document ids must be unique over all
86 * source databases. Currently the ranges of document ids
87 * in each source must not overlap either, though this
88 * restriction may be removed in the future.
90 XAPIAN_DEPRECATED(void set_renumber(bool renumber)) {
91 set_flags_(renumber ? 0 : DBCOMPACT_NO_RENUMBER,
92 ~unsigned(DBCOMPACT_NO_RENUMBER));
95 /** Set whether to merge postlists in multiple passes.
97 * @param multipass If true and merging more than 3 databases,
98 * merge the postlists in multiple passes, which is generally faster but
99 * requires more disk space for temporary files. By default we don't do
100 * this.
102 XAPIAN_DEPRECATED(void set_multipass(bool multipass)) {
103 set_flags_(multipass ? DBCOMPACT_MULTIPASS : 0,
104 ~unsigned(DBCOMPACT_MULTIPASS));
107 /** Set the compaction level.
109 * @param compaction Available values are:
110 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
111 * - Xapian::Compactor::FULL - Split items whenever it saves space
112 * (the default).
113 * - Xapian::Compactor::FULLER - Allow oversize items to save more space
114 * (not recommended if you ever plan to update the compacted database).
116 XAPIAN_DEPRECATED(void set_compaction_level(compaction_level compaction)) {
117 set_flags_(compaction, ~unsigned(STANDARD|FULL|FULLER));
120 /** Set where to write the output.
122 * @deprecated Use Database::compact(destdir[, compactor]) instead.
124 * @param destdir Output path. This can be the same as an input if that
125 * input is a stub database (in which case the database(s)
126 * listed in the stub will be compacted to a new database
127 * and then the stub will be atomically updated to point
128 * to this new database).
130 XAPIAN_DEPRECATED(void set_destdir(const std::string & destdir));
132 /** Add a source database.
134 * @deprecated Use Database::compact(destdir[, compactor]) instead.
136 * @param srcdir The path to the source database to add.
138 XAPIAN_DEPRECATED(void add_source(const std::string & srcdir));
140 /** Perform the actual compaction/merging operation.
142 * @deprecated Use Database::compact(destdir[, compactor]) instead.
144 XAPIAN_DEPRECATED(void compact());
146 /** Update progress.
148 * Subclass this method if you want to get progress updates during
149 * compaction. This is called for each table first with empty status,
150 * And then one or more times with non-empty status.
152 * The default implementation does nothing.
154 * @param table The table currently being compacted.
155 * @param status A status message.
157 virtual void
158 set_status(const std::string & table, const std::string & status);
160 /** Resolve multiple user metadata entries with the same key.
162 * When merging, if the same user metadata key is set in more than one
163 * input, then this method is called to allow this to be resolving in
164 * an appropriate way.
166 * The default implementation just returns tags[0].
168 * For multipass this will currently get called multiple times for the
169 * same key if there are duplicates to resolve in each pass, but this
170 * may change in the future.
172 * Since 1.4.6, an implementation of this method can return an empty
173 * string to indicate that the appropriate result is to not set a value
174 * for this user metadata key in the output database. In older versions,
175 * you should not return an empty string.
177 * @param key The metadata key with duplicate entries.
178 * @param num_tags How many tags there are.
179 * @param tags An array of num_tags strings containing the tags to
180 * merge.
182 virtual std::string
183 resolve_duplicate_metadata(const std::string & key,
184 size_t num_tags, const std::string tags[]);
189 #endif /* XAPIAN_INCLUDED_COMPACTOR_H */