2 * @brief Perform a document-by-document copy of one or more Xapian databases.
4 /* Copyright (C) 2006-2022 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <initializer_list>
29 #include <cmath> // For log10().
30 #include <cstdlib> // For exit().
31 #include <cstring> // For strcmp() and strrchr().
35 #define PROG_NAME "copydatabase"
36 #define PROG_DESC "Perform a document-by-document copy of one or more Xapian databases"
41 cout
<< "Usage: " PROG_NAME
" SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
43 " --no-renumber Preserve the numbering of document ids (useful if you have\n"
44 " external references to them, or have set them to match\n"
45 " unique ids from an external source). If multiple source\n"
46 " databases are specified and the same docid occurs in more\n"
47 " one, the last occurrence will be the one which ends up in\n"
48 " the destination database.\n"
49 " --help display this help and exit\n"
50 " --version output version information and exit\n";
55 main(int argc
, char **argv
)
58 if (argc
> 1 && argv
[1][0] == '-') {
59 if (strcmp(argv
[1], "--help") == 0) {
60 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
63 if (strcmp(argv
[1], "--version") == 0) {
64 cout
<< PROG_NAME
" - " PACKAGE_STRING
"\n";
67 if (strcmp(argv
[1], "--no-renumber") == 0) {
75 // We expect two or more arguments: at least one source database path
76 // followed by the destination database path.
77 if (argc
< 3) show_usage(1);
79 // Create the destination database, using DB_CREATE so that we don't
80 // try to overwrite or update an existing database in case the user
81 // got the command line argument order wrong.
82 const char *dest
= argv
[argc
- 1];
83 Xapian::WritableDatabase
db_out(dest
, Xapian::DB_CREATE
);
85 for (int i
= 1; i
< argc
- 1; ++i
) {
88 // Remove any trailing directory separator.
90 for (char dir_sep
: DIR_SEPS_LIST
) {
92 src
.resize(src
.size() - 1);
98 // Open the source database.
99 Xapian::Database
db_in(src
);
101 // Find the leaf-name of the database path for reporting progress.
103 // If we found a directory separator, + 1 advances to the next
104 // character; If we didn't, incrementing string::npos will give us 0,
105 // so we use the whole of src as the leaf-name.
106 const char * leaf
= src
.c_str() + (src
.find_last_of(DIR_SEPS
) + 1);
108 // Iterate over all the documents in db_in, copying each to db_out.
109 Xapian::doccount dbsize
= db_in
.get_doccount();
111 cout
<< leaf
<< ": empty!\n";
113 // Calculate how many decimal digits there are in dbsize.
114 int width
= static_cast<int>(log10(double(dbsize
))) + 1;
116 Xapian::doccount c
= 0;
117 Xapian::PostingIterator it
= db_in
.postlist_begin(string());
118 while (it
!= db_in
.postlist_end(string())) {
119 Xapian::docid did
= *it
;
121 db_out
.add_document(db_in
.get_document(did
));
123 db_out
.replace_document(did
, db_in
.get_document(did
));
126 // Update for the first 10, and then every 13th document
127 // counting back from the end (this means that all the
128 // digits "rotate" and the counter ends up on the exact
131 if (c
<= 10 || (dbsize
- c
) % 13 == 0) {
132 cout
<< '\r' << leaf
<< ": ";
133 cout
<< setw(width
) << c
<< '/' << dbsize
<< flush
;
142 cout
<< "Copying spelling data..." << flush
;
143 Xapian::TermIterator spellword
= db_in
.spellings_begin();
144 while (spellword
!= db_in
.spellings_end()) {
145 db_out
.add_spelling(*spellword
, spellword
.get_termfreq());
150 cout
<< "Copying synonym data..." << flush
;
151 Xapian::TermIterator synkey
= db_in
.synonym_keys_begin();
152 while (synkey
!= db_in
.synonym_keys_end()) {
153 string key
= *synkey
;
154 Xapian::TermIterator syn
= db_in
.synonyms_begin(key
);
155 while (syn
!= db_in
.synonyms_end(key
)) {
156 db_out
.add_synonym(key
, *syn
);
163 cout
<< "Copying user metadata..." << flush
;
164 Xapian::TermIterator metakey
= db_in
.metadata_keys_begin();
165 while (metakey
!= db_in
.metadata_keys_end()) {
166 string key
= *metakey
;
167 db_out
.set_metadata(key
, db_in
.get_metadata(key
));
173 cout
<< "Committing..." << flush
;
174 // Commit explicitly so that any error is reported.
177 } catch (const Xapian::Error
& e
) {
178 cerr
<< '\n' << argv
[0] << ": " << e
.get_description() << '\n';