Fix tg_termpos1 for 64-bit termpos
[xapian.git] / xapian-applications / omega / omindex-list.cc
blob78fb4849c845d84f2d8d7f69ae406f0f4a8728cf
1 /** @file
2 * @brief List URLs of documents indexed by omindex
3 */
4 /* Copyright 2014 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include <xapian.h>
25 #include <cstdlib>
26 #include <string>
27 #include <iostream>
29 #include "gnu_getopt.h"
30 #include "hashterm.h"
31 #include "common/stringutils.h"
33 using namespace std;
35 #define PROG_NAME "omindex-list"
36 #define PROG_DESC "List URLs of documents indexed by omindex"
38 #define OPT_HELP 1
39 #define OPT_VERSION 2
41 static void show_usage() {
42 cout << "Usage: " PROG_NAME " [OPTIONS] DATABASE...\n\n"
43 "Options:\n"
44 " --help display this help and exit\n"
45 " --version output version information and exit" << endl;
48 int
49 main(int argc, char **argv)
50 try {
51 static const struct option long_opts[] = {
52 {"help", no_argument, 0, OPT_HELP},
53 {"version", no_argument, 0, OPT_VERSION},
54 {NULL, 0, 0, 0}
57 int c;
58 while ((c = gnu_getopt_long(argc, argv, "", long_opts, 0)) != -1) {
59 switch (c) {
60 case OPT_HELP:
61 cout << PROG_NAME " - " PROG_DESC "\n\n";
62 show_usage();
63 exit(0);
64 case OPT_VERSION:
65 cout << PROG_NAME " - " PACKAGE_STRING << endl;
66 exit(0);
67 default:
68 show_usage();
69 exit(1);
73 if (argc - optind < 1) {
74 show_usage();
75 exit(1);
78 Xapian::Database db;
79 while (argv[optind]) {
80 db.add_database(Xapian::Database(argv[optind++]));
83 for (Xapian::TermIterator t = db.allterms_begin("U");
84 t != db.allterms_end("U");
85 ++t) {
86 const string & term = *t;
87 string url;
88 if (term.size() < MAX_SAFE_TERM_LENGTH) {
89 url.assign(term, 1, string::npos);
90 } else {
91 Xapian::PostingIterator p = db.postlist_begin(term);
92 if (p == db.postlist_end(term)) {
93 cerr << "Unique term '" << term << "' has no postings!" << endl;
94 continue;
96 Xapian::docid did = *p;
97 ++p;
98 if (p != db.postlist_end(term)) {
99 cerr << "warning: Unique term '" << term << "' occurs "
100 << t.get_termfreq() << " times!" << endl;
102 const string & data = db.get_document(did).get_data();
103 size_t start;
104 if (startswith(data, "url=")) {
105 start = CONST_STRLEN("url=");
106 } else {
107 start = data.find("\nurl=");
108 if (start == string::npos) {
109 cerr << "No 'url' field in document data for unique term '"
110 << term << "'" << endl;
111 continue;
113 start += CONST_STRLEN("\nurl=");
115 url.assign(data, start, data.find('\n', start) - start);
117 cout << url << endl;
119 } catch (const Xapian::Error &error) {
120 cerr << argv[0] << ": " << error.get_description() << endl;
121 exit(1);