2 * @brief Debug positional data
4 /* Copyright 2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #include <xapian/iterator.h>
34 #include "gnu_getopt.h"
35 #include "stringutils.h"
39 #define PROG_NAME "xapian-pos"
40 #define PROG_DESC "Debug positional data in a Xapian database"
48 cout
<< "Usage: " PROG_NAME
" [OPTIONS] DATABASE\n\n"
50 " -d, --doc=DOCID Show positions for document DOCID\n"
51 " -s, --start=POS Specifies the first position to show\n"
52 " -e, --end=POS Specifies the last position to show\n"
53 " --help display this help and exit\n"
54 " --version output version information and exit" << endl
;
60 Xapian::PositionIterator p
;
65 Pos(const string
& term_
, const Xapian::PositionIterator
& p_
)
66 : p(p_
), term(term_
) { pos
= *p
; }
68 Xapian::termpos
get_pos() const { return pos
; }
70 const string
& get_term() const { return term
; }
73 if (!Xapian::iterator_valid(++p
)) {
82 bool operator()(const Pos
* a
, const Pos
* b
) {
83 if (a
->get_pos() != b
->get_pos()) {
84 return a
->get_pos() > b
->get_pos();
86 return a
->get_term() > b
->get_term();
91 bool to_unsigned_int(const char* s
, T
& result
)
95 auto v
= strtoull(s
, &e
, 0);
98 // Junk after or empty input.
100 } else if (v
> numeric_limits
<T
>::max()) {
112 main(int argc
, char **argv
)
114 static const struct option long_opts
[] = {
115 {"doc", required_argument
, 0, 'd'},
116 {"start", required_argument
, 0, 's'},
117 {"end", required_argument
, 0, 'e'},
118 {"help", no_argument
, 0, OPT_HELP
},
119 {"version", no_argument
, 0, OPT_VERSION
},
123 Xapian::docid did
= 0;
124 Xapian::termpos startpos
= 0;
125 Xapian::termpos endpos
= numeric_limits
<Xapian::termpos
>::max();
127 while ((c
= gnu_getopt_long(argc
, argv
, "d:e:s:", long_opts
, 0)) != -1) {
130 if (!to_unsigned_int(optarg
, did
) || did
== 0) {
131 if (errno
== 0) errno
= ERANGE
;
132 cerr
<< "Bad docid value '" << optarg
<< "': "
133 << strerror(errno
) << endl
;
138 if (!to_unsigned_int(optarg
, startpos
)) {
139 cerr
<< "Bad start position '" << optarg
<< "': "
140 << strerror(errno
) << endl
;
145 if (!to_unsigned_int(optarg
, endpos
)) {
146 cerr
<< "Bad end position '" << optarg
<< "': "
147 << strerror(errno
) << endl
;
152 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
156 cout
<< PROG_NAME
" - " PACKAGE_STRING
<< endl
;
164 // We expect one argument - a database path.
165 if (argc
- optind
!= 1) {
171 cerr
<< "--doc=DOCID option required." << endl
;
177 Xapian::Database
db(argv
[optind
]);
179 for (auto term_it
= db
.termlist_begin(did
);
180 term_it
!= db
.termlist_end(did
); ++term_it
) {
181 const string
& term
= *term_it
;
182 auto pos_it
= db
.positionlist_begin(did
, term
);
183 if (startpos
) pos_it
.skip_to(startpos
);
184 if (pos_it
!= db
.positionlist_end(did
, term
)) {
185 heap
.push_back(new Pos(term
, pos_it
));
189 make_heap(heap
.begin(), heap
.end(), PosCmp());
191 Xapian::termpos old_pos
= startpos
- 1;
192 while (!heap
.empty()) {
193 auto tip
= heap
.front();
194 Xapian::termpos pos
= tip
->get_pos();
195 if (pos
> endpos
) break;
197 switch (pos
- old_pos
) {
199 // Another term at the same position.
203 cout
<< '\n' << pos
<< '\t';
206 cout
<< "\nGap of " << (pos
- old_pos
- 1)
207 << " unused positions\n" << pos
<< '\t';
210 cout
<< tip
->get_term();
215 pop_heap(heap
.begin(), heap
.end(), PosCmp());
216 push_heap(heap
.begin(), heap
.end(), PosCmp());
218 pop_heap(heap
.begin(), heap
.end(), PosCmp());
219 heap
.resize(heap
.size() - 1);
224 } catch (const Xapian::Error
& e
) {
225 cerr
<< '\n' << argv
[0] << ": " << e
.get_description() << endl
;