2 * @brief Tests of Database::compact()
4 /* Copyright (C) 2009,2010,2011,2012,2013,2015,2016,2017,2018,2019 Olly Betts
5 * Copyright (C) 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #include "api_compact.h"
31 #include "filetests.h"
32 #include "msvcignoreinvalidparam.h"
34 #include "testsuite.h"
35 #include "testutils.h"
41 #include <sys/types.h>
42 #include "safesysstat.h"
43 #include "safefcntl.h"
44 #include "safeunistd.h"
51 make_sparse_db(Xapian::WritableDatabase
&db
, const string
& s
)
53 // Need non-const pointer for strtoul(), but data isn't modified.
54 char * p
= const_cast<char *>(s
.c_str());
57 bool del
= (*p
== '!');
59 Xapian::docid first
= strtoul(p
, &p
, 10);
60 Xapian::docid last
= first
;
62 last
= strtoul(p
+ 1, &p
, 10);
64 if (*p
&& *p
!= ' ') {
65 tout
<< p
- s
.c_str() << endl
;
66 FAIL_TEST("Bad sparse db spec (expected space): " << s
);
69 FAIL_TEST("Bad sparse db spec (first > last): " << s
);
74 db
.delete_document(first
);
77 string id
= str(first
);
79 doc
.add_term("Q" + str(first
));
80 doc
.add_term(string(first
% 7 + 1, char((first
% 26) + 'a')));
81 db
.replace_document(first
, doc
);
83 } while (first
++ < last
);
85 if (*p
== '\0') break;
93 check_sparse_uid_terms(const string
& path
)
95 Xapian::Database
db(path
);
96 Xapian::TermIterator t
;
97 for (t
= db
.allterms_begin("Q"); t
!= db
.allterms_end("Q"); ++t
) {
98 Xapian::docid did
= atoi((*t
).c_str() + 1);
99 Xapian::PostingIterator p
= db
.postlist_begin(*t
);
104 // With multi the docids in the shards change the behaviour.
105 DEFINE_TESTCASE(compactnorenumber1
, compact
&& generated
&& !multi
) {
106 string a
= get_database_path("compactnorenumber1a", make_sparse_db
,
107 "5-7 24 76 987 1023-1027 9999 !9999");
110 Xapian::Database
db(a
);
111 a_uuid
= db
.get_uuid();
113 string b
= get_database_path("compactnorenumber1b", make_sparse_db
,
115 string c
= get_database_path("compactnorenumber1c", make_sparse_db
,
117 string d
= get_database_path("compactnorenumber1d", make_sparse_db
,
118 "3000 999999 !999999");
120 string out
= get_compaction_output_path("compactnorenumber1out");
124 Xapian::Database
db(a
);
125 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
);
128 check_sparse_uid_terms(out
);
131 TEST(!dir_exists(out
+ "/donor"));
132 Xapian::Database
db(out
);
133 // xapian-compact should change the UUID of the database, but didn't
134 // prior to 1.0.18/1.1.4.
135 string out_uuid
= db
.get_uuid();
136 TEST_NOT_EQUAL(a_uuid
, out_uuid
);
137 TEST_EQUAL(out_uuid
.size(), 36);
138 TEST_NOT_EQUAL(out_uuid
, "00000000-0000-0000-0000-000000000000");
140 // White box test - ensure that the donor database is removed.
141 TEST(!dir_exists(out
+ "/donor"));
147 db
.add_database(Xapian::Database(a
));
148 db
.add_database(Xapian::Database(c
));
149 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
);
151 check_sparse_uid_terms(out
);
153 // Check that xapian-compact is producing a consistent database. Also,
154 // regression test - xapian 1.1.4 set lastdocid to 0 in the output
156 Xapian::Database
outdb(out
);
157 dbcheck(outdb
, 24, 9999);
163 db
.add_database(Xapian::Database(d
));
164 db
.add_database(Xapian::Database(a
));
165 db
.add_database(Xapian::Database(c
));
166 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
);
168 check_sparse_uid_terms(out
);
173 db
.add_database(Xapian::Database(c
));
174 db
.add_database(Xapian::Database(a
));
175 db
.add_database(Xapian::Database(d
));
176 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
);
178 check_sparse_uid_terms(out
);
184 db
.add_database(Xapian::Database(a
));
185 db
.add_database(Xapian::Database(b
));
186 TEST_EXCEPTION(Xapian::InvalidOperationError
,
187 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
)
195 db
.add_database(Xapian::Database(b
));
196 db
.add_database(Xapian::Database(a
));
197 TEST_EXCEPTION(Xapian::InvalidOperationError
,
198 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
)
206 db
.add_database(Xapian::Database(a
));
207 db
.add_database(Xapian::Database(b
));
208 db
.add_database(Xapian::Database(d
));
209 TEST_EXCEPTION(Xapian::InvalidOperationError
,
210 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
)
218 db
.add_database(Xapian::Database(d
));
219 db
.add_database(Xapian::Database(b
));
220 db
.add_database(Xapian::Database(a
));
221 TEST_EXCEPTION(Xapian::InvalidOperationError
,
222 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
)
230 db
.add_database(Xapian::Database(b
));
231 db
.add_database(Xapian::Database(a
));
232 db
.add_database(Xapian::Database(d
));
233 TEST_EXCEPTION(Xapian::InvalidOperationError
,
234 db
.compact(out
, Xapian::DBCOMPACT_NO_RENUMBER
)
239 // Test use of compact to merge two databases.
240 DEFINE_TESTCASE(compactmerge1
, compact
) {
241 string indbpath
= get_database_path("apitest_simpledata");
242 string outdbpath
= get_compaction_output_path("compactmerge1out");
245 const string
& dbtype
= get_dbtype();
246 bool singlefile
= startswith(dbtype
, "singlefile_");
249 db
.add_database(Xapian::Database(indbpath
));
250 db
.add_database(Xapian::Database(indbpath
));
252 db
.compact(outdbpath
, Xapian::DBCOMPACT_SINGLE_FILE
);
254 db
.compact(outdbpath
);
258 Xapian::Database
indb(get_database("apitest_simpledata"));
259 Xapian::Database
outdb(outdbpath
);
261 TEST_EQUAL(indb
.get_doccount() * 2, outdb
.get_doccount());
262 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
265 // Check we actually got a single file out.
266 TEST(file_exists(outdbpath
));
267 TEST_EQUAL(Xapian::Database::check(outdbpath
, 0, &tout
), 0);
268 } else if (startswith(dbtype
, "multi_")) {
269 // Can't check tables for a sharded DB.
270 TEST_EQUAL(Xapian::Database::check(outdbpath
, 0, &tout
), 0);
272 // Check we got a directory out, not a file.
273 TEST(dir_exists(outdbpath
));
274 static const char* const suffixes
[] = {
275 "", "/postlist", "/termlist.", nullptr
277 for (auto s
: suffixes
) {
282 if (get_dbtype() == "chert") {
283 suffix
= "/record.DB";
285 suffix
= "/docdata." + dbtype
;
289 tout
<< "Trying suffix '" << suffix
<< "'" << endl
;
290 string arg
= outdbpath
;
292 TEST_EQUAL(Xapian::Database::check(arg
, 0, &tout
), 0);
298 make_multichunk_db(Xapian::WritableDatabase
&db
, const string
&)
302 Xapian::Document doc
;
305 db
.add_document(doc
);
312 // Test use of compact on a database which has multiple chunks for a term.
313 // This is a regression test for ticket #427
314 DEFINE_TESTCASE(compactmultichunks1
, compact
&& generated
) {
315 string indbpath
= get_database_path("compactmultichunks1in",
316 make_multichunk_db
, "");
317 string outdbpath
= get_compaction_output_path("compactmultichunks1out");
321 Xapian::Database
db(indbpath
);
322 db
.compact(outdbpath
);
325 Xapian::Database
indb(indbpath
);
326 Xapian::Database
outdb(outdbpath
);
328 TEST_EQUAL(indb
.get_doccount(), outdb
.get_doccount());
329 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
332 // Test compacting from a stub database directory.
333 DEFINE_TESTCASE(compactstub1
, compact
) {
334 const char * stubpath
= ".stub/compactstub1";
335 const char * stubpathfile
= ".stub/compactstub1/XAPIANDB";
336 mkdir(".stub", 0755);
337 mkdir(stubpath
, 0755);
338 ofstream
stub(stubpathfile
);
339 TEST(stub
.is_open());
340 stub
<< "auto ../../" << get_database_path("apitest_simpledata") << endl
;
341 stub
<< "auto ../../" << get_database_path("apitest_simpledata2") << endl
;
344 string outdbpath
= get_compaction_output_path("compactstub1out");
348 Xapian::Database
db(stubpath
);
349 db
.compact(outdbpath
);
352 Xapian::Database
indb(stubpath
);
353 Xapian::Database
outdb(outdbpath
);
355 TEST_EQUAL(indb
.get_doccount(), outdb
.get_doccount());
356 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
359 // Test compacting from a stub database file.
360 DEFINE_TESTCASE(compactstub2
, compact
) {
361 const char * stubpath
= ".stub/compactstub2";
362 mkdir(".stub", 0755);
363 ofstream
stub(stubpath
);
364 TEST(stub
.is_open());
365 stub
<< "auto ../" << get_database_path("apitest_simpledata") << endl
;
366 stub
<< "auto ../" << get_database_path("apitest_simpledata2") << endl
;
369 string outdbpath
= get_compaction_output_path("compactstub2out");
373 Xapian::Database
db(stubpath
);
374 db
.compact(outdbpath
);
377 Xapian::Database
indb(stubpath
);
378 Xapian::Database
outdb(outdbpath
);
380 TEST_EQUAL(indb
.get_doccount(), outdb
.get_doccount());
381 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
384 // Test compacting a stub database file to itself.
385 DEFINE_TESTCASE(compactstub3
, compact
) {
386 const char * stubpath
= ".stub/compactstub3";
387 mkdir(".stub", 0755);
388 ofstream
stub(stubpath
);
389 TEST(stub
.is_open());
390 stub
<< "auto ../" << get_database_path("apitest_simpledata") << endl
;
391 stub
<< "auto ../" << get_database_path("apitest_simpledata2") << endl
;
394 Xapian::doccount in_docs
;
396 Xapian::Database
indb(stubpath
);
397 in_docs
= indb
.get_doccount();
398 indb
.compact(stubpath
);
401 Xapian::Database
outdb(stubpath
);
403 TEST_EQUAL(in_docs
, outdb
.get_doccount());
404 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
407 // Test compacting a stub database directory to itself.
408 DEFINE_TESTCASE(compactstub4
, compact
) {
409 const char * stubpath
= ".stub/compactstub4";
410 const char * stubpathfile
= ".stub/compactstub4/XAPIANDB";
411 mkdir(".stub", 0755);
412 mkdir(stubpath
, 0755);
413 ofstream
stub(stubpathfile
);
414 TEST(stub
.is_open());
415 stub
<< "auto ../../" << get_database_path("apitest_simpledata") << endl
;
416 stub
<< "auto ../../" << get_database_path("apitest_simpledata2") << endl
;
419 Xapian::doccount in_docs
;
421 Xapian::Database
indb(stubpath
);
422 in_docs
= indb
.get_doccount();
423 indb
.compact(stubpath
);
426 Xapian::Database
outdb(stubpath
);
428 TEST_EQUAL(in_docs
, outdb
.get_doccount());
429 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
433 make_all_tables(Xapian::WritableDatabase
&db
, const string
&)
435 Xapian::Document doc
;
437 db
.add_document(doc
);
438 db
.add_spelling("foo");
439 db
.add_synonym("bar", "pub");
440 db
.add_synonym("foobar", "foo");
446 make_missing_tables(Xapian::WritableDatabase
&db
, const string
&)
448 Xapian::Document doc
;
450 db
.add_document(doc
);
455 DEFINE_TESTCASE(compactmissingtables1
, compact
&& generated
) {
456 string a
= get_database_path("compactmissingtables1a",
458 string b
= get_database_path("compactmissingtables1b",
459 make_missing_tables
);
461 string out
= get_compaction_output_path("compactmissingtables1out");
466 db
.add_database(Xapian::Database(a
));
467 db
.add_database(Xapian::Database(b
));
472 Xapian::Database
db(out
);
473 TEST_NOT_EQUAL(db
.spellings_begin(), db
.spellings_end());
474 TEST_NOT_EQUAL(db
.synonym_keys_begin(), db
.synonym_keys_end());
475 // FIXME: arrange for input b to not have a termlist table.
476 // TEST_EXCEPTION(Xapian::FeatureUnavailableError, db.termlist_begin(1));
481 make_all_tables2(Xapian::WritableDatabase
&db
, const string
&)
483 Xapian::Document doc
;
485 db
.add_document(doc
);
486 db
.add_spelling("bar");
487 db
.add_synonym("bar", "baa");
488 db
.add_synonym("barfoo", "barbar");
489 db
.add_synonym("foofoo", "barfoo");
494 /// Adds coverage for merging synonym table.
495 DEFINE_TESTCASE(compactmergesynonym1
, compact
&& generated
) {
496 string a
= get_database_path("compactmergesynonym1a",
498 string b
= get_database_path("compactmergesynonym1b",
501 string out
= get_compaction_output_path("compactmergesynonym1out");
506 db
.add_database(Xapian::Database(a
));
507 db
.add_database(Xapian::Database(b
));
512 Xapian::Database
db(out
);
514 Xapian::TermIterator i
= db
.spellings_begin();
515 TEST_NOT_EQUAL(i
, db
.spellings_end());
516 TEST_EQUAL(*i
, "bar");
518 TEST_NOT_EQUAL(i
, db
.spellings_end());
519 TEST_EQUAL(*i
, "foo");
521 TEST_EQUAL(i
, db
.spellings_end());
523 i
= db
.synonym_keys_begin();
524 TEST_NOT_EQUAL(i
, db
.synonym_keys_end());
525 TEST_EQUAL(*i
, "bar");
527 TEST_NOT_EQUAL(i
, db
.synonym_keys_end());
528 TEST_EQUAL(*i
, "barfoo");
530 TEST_NOT_EQUAL(i
, db
.synonym_keys_end());
531 TEST_EQUAL(*i
, "foobar");
533 TEST_NOT_EQUAL(i
, db
.synonym_keys_end());
534 TEST_EQUAL(*i
, "foofoo");
536 TEST_EQUAL(i
, db
.synonym_keys_end());
540 DEFINE_TESTCASE(compactempty1
, compact
) {
541 string empty_dbpath
= get_database_path(string());
542 string outdbpath
= get_compaction_output_path("compactempty1out");
546 // Compacting an empty database tried to divide by zero in 1.3.0.
548 db
.add_database(Xapian::Database(empty_dbpath
));
549 db
.compact(outdbpath
);
551 Xapian::Database
outdb(outdbpath
);
552 TEST_EQUAL(outdb
.get_doccount(), 0);
553 dbcheck(outdb
, 0, 0);
557 // Check compacting two empty databases together.
559 db
.add_database(Xapian::Database(empty_dbpath
));
560 db
.add_database(Xapian::Database(empty_dbpath
));
561 db
.compact(outdbpath
);
563 Xapian::Database
outdb(outdbpath
);
564 TEST_EQUAL(outdb
.get_doccount(), 0);
565 dbcheck(outdb
, 0, 0);
569 DEFINE_TESTCASE(compactmultipass1
, compact
&& generated
) {
570 string outdbpath
= get_compaction_output_path("compactmultipass1");
573 string a
= get_database_path("compactnorenumber1a", make_sparse_db
,
574 "5-7 24 76 987 1023-1027 9999 !9999");
575 string b
= get_database_path("compactnorenumber1b", make_sparse_db
,
577 string c
= get_database_path("compactnorenumber1c", make_sparse_db
,
579 string d
= get_database_path("compactnorenumber1d", make_sparse_db
,
580 "3000 999999 !999999");
584 db
.add_database(Xapian::Database(a
));
585 db
.add_database(Xapian::Database(b
));
586 db
.add_database(Xapian::Database(c
));
587 db
.add_database(Xapian::Database(d
));
588 db
.compact(outdbpath
, Xapian::DBCOMPACT_MULTIPASS
);
591 Xapian::Database
outdb(outdbpath
);
592 dbcheck(outdb
, 29, 1041);
595 // Test compacting to an fd.
596 // Chert doesn't support single file databases.
597 DEFINE_TESTCASE(compacttofd1
, compact
&& !chert
) {
598 Xapian::Database
indb(get_database("apitest_simpledata"));
599 string outdbpath
= get_compaction_output_path("compacttofd1out");
602 int fd
= open(outdbpath
.c_str(), O_CREAT
|O_RDWR
|O_BINARY
, 0666);
606 // Confirm that the fd was closed by Xapian. Set errno first to workaround
607 // a bug in Wine's msvcrt.dll which fails to set errno in this case:
608 // https://bugs.winehq.org/show_bug.cgi?id=43902
611 MSVCIgnoreInvalidParameter invalid_fd_in_close_is_expected
;
612 TEST(close(fd
) == -1);
613 TEST_EQUAL(errno
, EBADF
);
616 Xapian::Database
outdb(outdbpath
);
618 TEST_EQUAL(indb
.get_doccount(), outdb
.get_doccount());
619 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
622 // Test compacting to an fd at at offset.
623 // Chert doesn't support single file databases.
624 DEFINE_TESTCASE(compacttofd2
, compact
&& !chert
) {
625 Xapian::Database
indb(get_database("apitest_simpledata"));
626 string outdbpath
= get_compaction_output_path("compacttofd2out");
629 int fd
= open(outdbpath
.c_str(), O_CREAT
|O_RDWR
|O_BINARY
, 0666);
631 TEST(lseek(fd
, 8192, SEEK_SET
) == 8192);
634 // Confirm that the fd was closed by Xapian. Set errno first to workaround
635 // a bug in Wine's msvcrt.dll which fails to set errno in this case:
636 // https://bugs.winehq.org/show_bug.cgi?id=43902
639 MSVCIgnoreInvalidParameter invalid_fd_in_close_is_expected
;
640 TEST(close(fd
) == -1);
641 TEST_EQUAL(errno
, EBADF
);
644 fd
= open(outdbpath
.c_str(), O_RDONLY
|O_BINARY
, 0666);
647 // Test that the database wasn't just written to the start of the file.
649 size_t n
= sizeof(buf
);
651 ssize_t c
= read(fd
, buf
, n
);
653 for (const char * p
= buf
; p
!= buf
+ c
; ++p
) {
659 TEST(lseek(fd
, 8192, SEEK_SET
) == 8192);
660 Xapian::Database
outdb(fd
);
662 TEST_EQUAL(indb
.get_doccount(), outdb
.get_doccount());
663 dbcheck(outdb
, outdb
.get_doccount(), outdb
.get_doccount());
666 // Regression test for bug fixed in 1.3.5. If you compact a WritableDatabase
667 // with uncommitted changes, you get an inconsistent output.
669 // Chert doesn't support single file databases.
670 DEFINE_TESTCASE(compactsingle1
, compact
&& writable
&& !chert
) {
671 Xapian::WritableDatabase db
= get_writable_database();
672 Xapian::Document doc
;
676 db
.add_document(doc
);
677 // Include a zero-length document as a regression test for a
678 // Database::check() bug fixed in 1.4.7 (and introduced in 1.4.6). Test it
679 // here so we also have test coverage for compaction for such a document.
680 Xapian::Document doc2
;
681 doc2
.add_boolean_term("Kfoo");
682 db
.add_document(doc2
);
683 // Also test a completely empty document.
684 db
.add_document(Xapian::Document());
686 string output
= get_compaction_output_path("compactsingle1-out");
687 // In 1.3.4, we would hang if the output file already existed, so check
691 TEST_EXCEPTION(Xapian::InvalidOperationError
,
692 db
.compact(output
, Xapian::DBCOMPACT_SINGLE_FILE
));
694 // Check the file wasn't removed by the failed attempt.
695 TEST(file_exists(output
));
698 db
.compact(output
, Xapian::DBCOMPACT_SINGLE_FILE
);
701 TEST_EQUAL(Xapian::Database::check(output
, 0, &tout
), 0);
703 TEST_EQUAL(Xapian::Database(output
).get_doccount(), 3);
706 // Regression test for bug fixed in 1.4.6. Same as above, except not with
707 // a single file database!
708 DEFINE_TESTCASE(compact1
, compact
&& writable
) {
709 Xapian::WritableDatabase db
= get_writable_database();
710 Xapian::Document doc
;
714 db
.add_document(doc
);
715 // Include a zero-length document as a regression test for a
716 // Database::check() bug fixed in 1.4.7 (and introduced in 1.4.6). Test it
717 // here so we also have test coverage for compaction for such a document.
718 Xapian::Document doc2
;
719 doc2
.add_boolean_term("Kfoo");
720 db
.add_document(doc2
);
721 // Also test a completely empty document.
722 db
.add_document(Xapian::Document());
724 string output
= get_compaction_output_path("compact1-out");
727 TEST_EXCEPTION(Xapian::InvalidOperationError
,
734 TEST_EQUAL(Xapian::Database::check(output
, 0, &tout
), 0);
736 TEST_EQUAL(Xapian::Database(output
).get_doccount(), 3);