2 * @brief List URLs of documents indexed by omindex
4 /* Copyright 2014 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
29 #include "gnu_getopt.h"
31 #include "common/stringutils.h"
35 #define PROG_NAME "omindex-list"
36 #define PROG_DESC "List URLs of documents indexed by omindex"
41 static void show_usage() {
42 cout
<< "Usage: " PROG_NAME
" [OPTIONS] DATABASE...\n\n"
44 " --help display this help and exit\n"
45 " --version output version information and exit" << endl
;
49 main(int argc
, char **argv
)
51 static const struct option long_opts
[] = {
52 {"help", no_argument
, 0, OPT_HELP
},
53 {"version", no_argument
, 0, OPT_VERSION
},
58 while ((c
= gnu_getopt_long(argc
, argv
, "", long_opts
, 0)) != -1) {
61 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
65 cout
<< PROG_NAME
" - " PACKAGE_STRING
<< endl
;
73 if (argc
- optind
< 1) {
79 while (argv
[optind
]) {
80 db
.add_database(Xapian::Database(argv
[optind
++]));
83 for (Xapian::TermIterator t
= db
.allterms_begin("U");
84 t
!= db
.allterms_end("U");
86 const string
& term
= *t
;
88 if (term
.size() < MAX_SAFE_TERM_LENGTH
) {
89 url
.assign(term
, 1, string::npos
);
91 Xapian::PostingIterator p
= db
.postlist_begin(term
);
92 if (p
== db
.postlist_end(term
)) {
93 cerr
<< "Unique term '" << term
<< "' has no postings!" << endl
;
96 Xapian::docid did
= *p
;
98 if (p
!= db
.postlist_end(term
)) {
99 cerr
<< "warning: Unique term '" << term
<< "' occurs "
100 << t
.get_termfreq() << " times!" << endl
;
102 const string
& data
= db
.get_document(did
).get_data();
104 if (startswith(data
, "url=")) {
105 start
= CONST_STRLEN("url=");
107 start
= data
.find("\nurl=");
108 if (start
== string::npos
) {
109 cerr
<< "No 'url' field in document data for unique term '"
110 << term
<< "'" << endl
;
113 start
+= CONST_STRLEN("\nurl=");
115 url
.assign(data
, start
, data
.find('\n', start
) - start
);
119 } catch (const Xapian::Error
&error
) {
120 cerr
<< argv
[0] << ": " << error
.get_description() << endl
;