omegatest: Use test_scriptindex more
[xapian.git] / xapian-applications / omega / scriptindex.cc
blob197ea25f84c29ad961864173c852fa397946c7a4
1 /** @file
2 * @brief index arbitrary data as described by an index script
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001 Sam Liddicott
6 * Copyright 2001,2002 Ananova Ltd
7 * Copyright 2002-2022 Olly Betts
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #include <config.h>
27 #include <xapian.h>
29 #include <algorithm>
30 #include <fstream>
31 #include <iostream>
32 #include <list>
33 #include <map>
34 #include <memory>
35 #include <string>
36 #include <unordered_set>
37 #include <vector>
38 #include <cstring>
40 #include <cerrno>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <ctime>
45 #include "commonhelp.h"
46 #include "hashterm.h"
47 #include "loadfile.h"
48 #include "myhtmlparse.h"
49 #include "parseint.h"
50 #include "setenv.h"
51 #include "str.h"
52 #include "stringutils.h"
53 #include "timegm.h"
54 #include "utf8truncate.h"
55 #include "utils.h"
56 #include "values.h"
58 #ifndef HAVE_STRPTIME
59 #include "portability/strptime.h"
60 #endif
62 #include "gnu_getopt.h"
64 using namespace std;
66 #define PROG_NAME "scriptindex"
67 #define PROG_DESC "index arbitrary data as described by an index script"
69 static bool verbose;
70 static int addcount;
71 static int repcount;
72 static int delcount;
73 static int skipcount;
75 /** What to do if there's a UNIQUE action but a record doesn't use it.
77 static enum {
78 UNIQUE_ERROR,
79 UNIQUE_WARN_NEW,
80 UNIQUE_NEW,
81 UNIQUE_WARN_SKIP,
82 UNIQUE_SKIP
83 } unique_missing = UNIQUE_WARN_NEW;
85 /// Track if UNIQUE action is unused in the current record.
86 static bool unique_unused;
88 /// Track if the current record is being skipping.
89 static bool skipping_record = false;
91 static inline bool
92 prefix_needs_colon(const string & prefix, unsigned ch)
94 if (!C_isupper(ch) && ch != ':') return false;
95 string::size_type len = prefix.length();
96 return (len > 1 && prefix[len - 1] != ':');
99 const char * action_names[] = {
100 // Actions used internally:
101 "bad",
102 "new",
103 // Actual actions:
104 "boolean",
105 "date",
106 "field",
107 "gap",
108 "hash",
109 "hextobin",
110 "index",
111 "indexnopos",
112 "load",
113 "lower",
114 "ltrim",
115 "parsedate",
116 "rtrim",
117 "spell",
118 "split",
119 "squash",
120 "trim",
121 "truncate",
122 "unhtml",
123 "unique",
124 "value",
125 "valuenumeric",
126 "valuepacked",
127 "weight"
130 // For debugging:
131 #define DUMP_ACTION(A) cout << action_names[(A).get_action()] << "(" << (A).get_string_arg() << "," << (A).get_num_arg() << ")\n"
133 class Action {
134 public:
135 typedef enum {
136 // Actions used internally:
137 BAD,
138 NEW,
139 // Actual actions:
140 BOOLEAN,
141 DATE,
142 FIELD,
143 GAP,
144 HASH,
145 HEXTOBIN,
146 INDEX,
147 INDEXNOPOS,
148 LOAD,
149 LOWER,
150 LTRIM,
151 PARSEDATE,
152 RTRIM,
153 SPELL,
154 SPLIT,
155 SQUASH,
156 TRIM,
157 TRUNCATE,
158 UNHTML,
159 UNIQUE,
160 VALUE,
161 VALUENUMERIC,
162 VALUEPACKED,
163 WEIGHT
164 } type;
165 enum { SPLIT_NONE, SPLIT_DEDUP, SPLIT_SORT, SPLIT_PREFIXES };
166 private:
167 type action;
168 int num_arg;
169 string string_arg;
170 // Offset into indexscript line.
171 size_t pos;
172 public:
173 Action(type action_, size_t pos_)
174 : action(action_), num_arg(0), pos(pos_) { }
175 Action(type action_, size_t pos_, const string & arg)
176 : action(action_), string_arg(arg), pos(pos_) {
177 num_arg = atoi(string_arg.c_str());
179 Action(type action_, size_t pos_, const string & arg, int num)
180 : action(action_), num_arg(num), string_arg(arg), pos(pos_) { }
181 type get_action() const { return action; }
182 int get_num_arg() const { return num_arg; }
183 void set_num_arg(int num) { num_arg = num; }
184 const string & get_string_arg() const { return string_arg; }
185 size_t get_pos() const { return pos; }
188 // These allow searching for an Action with a particular Action::type using
189 // std::find().
191 inline bool
192 operator==(const Action& a, Action::type t) { return a.get_action() == t; }
194 inline bool
195 operator==(Action::type t, const Action& a) { return a.get_action() == t; }
197 inline bool
198 operator!=(const Action& a, Action::type t) { return !(a == t); }
200 inline bool
201 operator!=(Action::type t, const Action& a) { return !(t == a); }
203 static void
204 ltrim(string& s, const string& chars)
206 auto i = s.find_first_not_of(chars);
207 if (i) s.erase(0, i);
210 static void
211 rtrim(string& s, const string& chars)
213 s.resize(s.find_last_not_of(chars) + 1);
216 static void
217 squash(string& s, const string& chars)
219 string output;
220 output.reserve(s.size());
221 string::size_type i = 0;
222 while ((i = s.find_first_not_of(chars, i)) != string::npos) {
223 auto j = s.find_first_of(chars, i);
224 if (!output.empty()) output += ' ';
225 output.append(s, i, j - i);
226 i = j;
228 s = std::move(output);
231 enum diag_type { DIAG_ERROR, DIAG_WARN, DIAG_NOTE };
233 static unsigned error_count = 0;
235 static void
236 report_location(enum diag_type type,
237 const string& filename,
238 size_t line = 0,
239 size_t pos = string::npos)
241 cerr << filename;
242 if (line != 0) {
243 cerr << ':' << line;
244 if (pos != string::npos) {
245 // The first column is numbered 1.
246 cerr << ':' << pos + 1;
249 switch (type) {
250 case DIAG_ERROR:
251 cerr << ": error: ";
252 ++error_count;
253 break;
254 case DIAG_WARN:
255 cerr << ": warning: ";
256 break;
257 case DIAG_NOTE:
258 cerr << ": note: ";
259 break;
263 static void
264 report_useless_action(const string &file, size_t line, size_t pos,
265 const string &action)
267 report_location(DIAG_WARN, file, line, pos);
268 cerr << "Index action '" << action << "' has no effect\n";
270 static bool given_left_to_right_warning = false;
271 if (!given_left_to_right_warning) {
272 given_left_to_right_warning = true;
273 report_location(DIAG_NOTE, file, line, pos);
274 cerr << "Actions are executed from left to right\n";
278 static bool index_spec_uses_unique = false;
280 static map<string, vector<Action>> index_spec;
282 // Like std::getline() but handle \r\n line endings too.
283 static istream&
284 getline_portable(istream& stream, string& line)
286 istream& result = getline(stream, line);
287 // Trim multiple \r characters, since that seems the best way to handle
288 // that case.
289 line.resize(line.find_last_not_of('\r') + 1);
290 return result;
293 static void
294 parse_index_script(const string &filename)
296 ifstream script(filename.c_str());
297 if (!script.is_open()) {
298 report_location(DIAG_ERROR, filename);
299 cerr << strerror(errno) << '\n';
300 exit(1);
302 string line;
303 size_t line_no = 0;
304 // Line number where we saw a `unique` action, or 0 if we haven't.
305 int unique_line_no = 0;
306 // Offset into line unique_line_no where the `unique` action was.
307 size_t unique_pos = 0;
308 while (getline(script, line)) {
309 ++line_no;
310 vector<string> fields;
311 vector<Action> actions;
312 string::const_iterator i, j;
313 const string &s = line;
314 i = find_if(s.begin(), s.end(), [](char ch) { return !C_isspace(ch); });
315 if (i == s.end() || *i == '#') {
316 // Blank line or comment.
317 continue;
319 while (true) {
320 if (!C_isalnum(*i)) {
321 report_location(DIAG_ERROR, filename, line_no, i - s.begin());
322 cerr << "field name must start with alphanumeric\n";
324 j = find_if(i + 1, s.end(),
325 [](char ch) { return !C_isalnum(ch) && ch != '_'; });
326 fields.push_back(string(i, j));
327 i = find_if(j, s.end(), [](char ch) { return !C_isspace(ch); });
328 if (i == s.end()) break;
329 if (*i == ':') {
330 ++i;
331 i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
332 break;
334 if (i == j) {
335 report_location(DIAG_ERROR, filename, line_no, i - s.begin());
336 cerr << "bad character '" << *i << "' in field name\n";
337 ++i;
338 i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
339 if (i == s.end()) break;
342 Xapian::termcount weight = 1;
343 size_t useless_weight_pos = string::npos;
344 map<string, Action::type> boolmap;
345 j = i;
346 while (j != s.end()) {
347 size_t action_pos = j - s.begin();
348 i = find_if(j, s.end(), [](char ch) { return !C_isalnum(ch); });
349 string action(s, j - s.begin(), i - j);
350 Action::type code = Action::BAD;
351 unsigned min_args = 0, max_args = 0;
352 bool takes_integer_argument = false;
353 if (!action.empty()) {
354 switch (action[0]) {
355 case 'b':
356 if (action == "boolean") {
357 code = Action::BOOLEAN;
358 max_args = 1;
360 break;
361 case 'd':
362 if (action == "date") {
363 code = Action::DATE;
364 min_args = max_args = 1;
366 break;
367 case 'f':
368 if (action == "field") {
369 code = Action::FIELD;
370 max_args = 1;
372 break;
373 case 'g':
374 if (action == "gap") {
375 code = Action::GAP;
376 max_args = 1;
377 takes_integer_argument = true;
379 break;
380 case 'h':
381 if (action == "hash") {
382 code = Action::HASH;
383 max_args = 1;
384 takes_integer_argument = true;
385 } else if (action == "hextobin") {
386 code = Action::HEXTOBIN;
388 break;
389 case 'i':
390 if (action == "index") {
391 code = Action::INDEX;
392 max_args = 1;
393 } else if (action == "indexnopos") {
394 code = Action::INDEXNOPOS;
395 max_args = 1;
397 break;
398 case 'l':
399 if (action == "lower") {
400 code = Action::LOWER;
401 } else if (action == "load") {
402 code = Action::LOAD;
403 } else if (action == "ltrim") {
404 code = Action::LTRIM;
405 max_args = 1;
407 break;
408 case 'p':
409 if (action == "parsedate") {
410 code = Action::PARSEDATE;
411 min_args = max_args = 1;
413 break;
414 case 'r':
415 if (action == "rtrim") {
416 code = Action::RTRIM;
417 max_args = 1;
419 break;
420 case 's':
421 if (action == "spell") {
422 code = Action::SPELL;
423 } else if (action == "split") {
424 code = Action::SPLIT;
425 min_args = 1;
426 max_args = 2;
427 } else if (action == "squash") {
428 code = Action::SQUASH;
429 max_args = 1;
431 break;
432 case 't':
433 if (action == "truncate") {
434 code = Action::TRUNCATE;
435 min_args = max_args = 1;
436 takes_integer_argument = true;
437 } else if (action == "trim") {
438 code = Action::TRIM;
439 max_args = 1;
441 break;
442 case 'u':
443 if (action == "unhtml") {
444 code = Action::UNHTML;
445 } else if (action == "unique") {
446 code = Action::UNIQUE;
447 min_args = 1;
448 max_args = 2;
450 break;
451 case 'v':
452 if (action == "value") {
453 code = Action::VALUE;
454 min_args = max_args = 1;
455 takes_integer_argument = true;
456 } else if (action == "valuenumeric") {
457 code = Action::VALUENUMERIC;
458 min_args = max_args = 1;
459 takes_integer_argument = true;
460 } else if (action == "valuepacked") {
461 code = Action::VALUEPACKED;
462 min_args = max_args = 1;
463 takes_integer_argument = true;
465 break;
466 case 'w':
467 if (action == "weight") {
468 code = Action::WEIGHT;
469 min_args = max_args = 1;
470 // Don't set takes_integer_argument since we parse
471 // it with parse_unsigned() and issue an error there
472 // - setting takes_integer_argument would give a
473 // double error for arguments with a decimal point.
475 break;
478 if (code == Action::BAD) {
479 report_location(DIAG_ERROR, filename, line_no, action_pos);
480 cerr << "Unknown index action '" << action << "'\n";
482 auto i_after_action = i;
483 i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
485 if (i != s.end() && *i == '=') {
486 if (i != i_after_action) {
487 report_location(DIAG_WARN, filename, line_no,
488 i_after_action - s.begin());
489 cerr << "putting spaces between the action and '=' is "
490 "deprecated\n";
493 if (max_args == 0) {
494 report_location(DIAG_ERROR, filename, line_no,
495 i - s.begin());
496 cerr << "Index action '" << action
497 << "' doesn't take an argument\n";
500 ++i;
501 j = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
502 if (i != j) {
503 report_location(DIAG_WARN, filename, line_no,
504 i - s.begin());
505 cerr << "putting spaces between '=' and the argument is "
506 "deprecated\n";
509 vector<string> vals;
510 while (true) {
511 if (j != s.end() && *j == '"') {
512 // Quoted argument.
513 ++j;
514 string arg;
515 while (true) {
516 i = find_if(j, s.end(),
517 [](char ch) {
518 return ch == '"' || ch == '\\';
520 if (i == s.end()) {
521 report_location(DIAG_ERROR, filename, line_no,
522 s.size());
523 cerr << "No closing quote\n";
524 break;
526 arg.append(j, i);
527 if (*i++ == '"')
528 break;
530 // Escape sequence.
531 if (i == s.end()) {
532 bad_escaping:
533 report_location(DIAG_ERROR, filename, line_no,
534 i - s.begin());
535 cerr << "Bad escaping in quoted action "
536 "argument\n";
537 break;
540 char ch = *i;
541 switch (ch) {
542 case '\\':
543 case '"':
544 break;
545 case '0':
546 ch = '\0';
547 break;
548 case 'n':
549 ch = '\n';
550 break;
551 case 'r':
552 ch = '\r';
553 break;
554 case 't':
555 ch = '\t';
556 break;
557 case 'x': {
558 if (++i == s.end())
559 goto bad_escaping;
560 char ch1 = *i;
561 if (!C_isxdigit(ch1)) {
562 bad_hex_digit:
563 report_location(DIAG_ERROR, filename,
564 line_no, i - s.begin());
565 cerr << "Bad hex digit in escaping\n";
566 --i;
567 break;
569 if (++i == s.end())
570 goto bad_escaping;
571 char ch2 = *i;
572 if (!C_isxdigit(ch2)) {
573 goto bad_hex_digit;
575 ch = hex_digit(ch1) << 4 |
576 hex_digit(ch2);
577 break;
579 default:
580 report_location(DIAG_ERROR, filename,
581 line_no, i - s.begin());
582 cerr << "Bad escape sequence '\\" << ch
583 << "'\n";
584 break;
586 arg += ch;
587 j = i + 1;
589 vals.emplace_back(std::move(arg));
590 if (i == s.end() || C_isspace(*i)) break;
591 if (*i == ',') {
592 ++i;
593 } else {
594 report_location(DIAG_ERROR, filename, line_no,
595 i - s.begin());
596 cerr << "Unexpected character '" << *i
597 << "' after closing quote\n";
598 do {
599 ++i;
600 } while (i != s.end() && *i != ',' && !C_isspace(*i));
601 if (*i != ',') break;
602 ++i;
604 } else if (max_args > 1) {
605 // Unquoted argument, split on comma.
606 i = find_if(j, s.end(),
607 [](char ch) {
608 return C_isspace(ch) || ch == ',';
610 vals.emplace_back(j, i);
611 if (*i != ',') break;
612 ++i;
613 } else {
614 // Unquoted argument, including any commas.
615 i = find_if(j, s.end(),
616 [](char ch) { return C_isspace(ch); });
617 vals.emplace_back(j, i);
618 break;
620 j = i;
622 if (vals.size() == max_args) {
623 report_location(DIAG_ERROR, filename, line_no,
624 i - s.begin());
625 cerr << "Index action '" << action << "' takes at most "
626 << max_args << " arguments\n";
630 if (vals.size() < min_args) {
631 report_location(DIAG_ERROR, filename, line_no,
632 i - s.begin());
633 if (min_args == max_args) {
634 cerr << "Index action '" << action << "' requires "
635 << min_args << " arguments\n";
636 } else {
637 cerr << "Index action '" << action << "' requires "
638 "at least " << min_args << " arguments\n";
640 // Allow action handling code to assume there are min_args
641 // arguments.
642 vals.resize(min_args);
645 string val;
646 if (!vals.empty()) {
647 val = vals.front();
650 if (takes_integer_argument) {
651 auto dot = val.find('.');
652 if (dot != string::npos) {
653 report_location(DIAG_ERROR, filename, line_no,
654 j - s.begin() + dot);
655 cerr << "Index action '" << action
656 << "' takes an integer argument\n";
659 switch (code) {
660 case Action::DATE:
661 if (val != "unix" &&
662 val != "unixutc" &&
663 val != "yyyymmdd") {
664 report_location(DIAG_ERROR, filename, line_no,
665 j - s.begin());
666 cerr << "Invalid parameter '" << val
667 << "' for action 'date'\n";
669 actions.emplace_back(code, action_pos, val);
670 break;
671 case Action::INDEX:
672 case Action::INDEXNOPOS:
673 actions.emplace_back(code, action_pos, val, weight);
674 useless_weight_pos = string::npos;
675 break;
676 case Action::WEIGHT:
677 // We don't push an Action for WEIGHT - instead we
678 // store it ready to use in the INDEX and INDEXNOPOS
679 // Actions.
680 if (!parse_unsigned(val.c_str(), weight)) {
681 report_location(DIAG_ERROR, filename, line_no,
682 j - s.begin());
683 cerr << "Index action 'weight' takes a "
684 "non-negative integer argument\n";
685 weight = 0;
687 if (useless_weight_pos != string::npos) {
688 report_useless_action(filename, line_no,
689 useless_weight_pos, action);
691 useless_weight_pos = action_pos;
692 break;
693 case Action::PARSEDATE: {
694 auto bad_code = val.find("%Z");
695 if (bad_code != val.npos) {
696 report_location(DIAG_ERROR, filename, line_no,
697 j - s.begin() + bad_code);
698 cerr << "Parsing timezone names with %Z is not "
699 "supported\n";
701 #ifndef HAVE_STRUCT_TM_TM_GMTOFF
702 bad_code = val.find("%z");
703 if (bad_code != val.npos) {
704 report_location(DIAG_ERROR, filename, line_no,
705 j - s.begin() + bad_code);
706 cerr << "Parsing timezone offsets with %z is not "
707 "supported on this platform\n";
709 #endif
710 actions.emplace_back(code, action_pos, val);
711 break;
713 case Action::SPLIT: {
714 if (val.empty()) {
715 report_location(DIAG_ERROR, filename, line_no,
716 j - s.begin());
717 cerr << "Split delimiter can't be empty\n";
719 int operation = Action::SPLIT_NONE;
720 if (vals.size() >= 2) {
721 if (vals[1] == "dedup") {
722 operation = Action::SPLIT_DEDUP;
723 } else if (vals[1] == "sort") {
724 operation = Action::SPLIT_SORT;
725 } else if (vals[1] == "none") {
726 operation = Action::SPLIT_NONE;
727 } else if (vals[1] == "prefixes") {
728 operation = Action::SPLIT_PREFIXES;
729 } else {
730 // FIXME: Column should be for where the `op`
731 // parameter starts, which this isn't if the
732 // value is quoted, contains escape sequences,
733 // etc.
734 report_location(DIAG_ERROR, filename, line_no,
735 i - s.begin() - vals[1].size());
736 cerr << "Bad split operation '" << vals[1]
737 << "'\n";
740 actions.emplace_back(code, action_pos, val, operation);
741 break;
743 case Action::TRUNCATE:
744 if (!actions.empty() &&
745 actions.back().get_action() == Action::LOAD) {
746 /* Turn "load truncate=n" into "load" with
747 * num_arg n, so that we don't needlessly
748 * allocate memory and read data we're just
749 * going to ignore.
751 actions.pop_back();
752 code = Action::LOAD;
754 actions.emplace_back(code, action_pos, val);
755 break;
756 case Action::UNIQUE:
757 if (unique_line_no) {
758 report_location(DIAG_ERROR, filename, line_no,
759 action_pos);
760 cerr << "Index action 'unique' used more than "
761 "once\n";
762 report_location(DIAG_NOTE, filename,
763 unique_line_no, unique_pos);
764 cerr << "Previously used here\n";
766 unique_line_no = line_no;
767 unique_pos = action_pos;
768 if (boolmap.find(val) == boolmap.end())
769 boolmap[val] = Action::UNIQUE;
770 if (vals.size() >= 2) {
771 if (vals[1] == "missing=error") {
772 unique_missing = UNIQUE_ERROR;
773 } else if (vals[1] == "missing=new") {
774 unique_missing = UNIQUE_NEW;
775 } else if (vals[1] == "missing=warn+new") {
776 unique_missing = UNIQUE_WARN_NEW;
777 } else if (vals[1] == "missing=skip") {
778 unique_missing = UNIQUE_SKIP;
779 } else if (vals[1] == "missing=warn+skip") {
780 unique_missing = UNIQUE_WARN_SKIP;
781 } else {
782 report_location(DIAG_ERROR, filename, line_no);
783 cerr << "Bad unique parameter '" << vals[1]
784 << "'\n";
787 actions.emplace_back(code, action_pos, val);
788 break;
789 case Action::GAP: {
790 actions.emplace_back(code, action_pos, val);
791 auto& obj = actions.back();
792 auto gap_size = obj.get_num_arg();
793 if (gap_size <= 0) {
794 report_location(DIAG_ERROR, filename, line_no,
795 obj.get_pos() + 3 + 1);
796 cerr << "Index action 'gap' takes a strictly "
797 "positive integer argument\n";
799 break;
801 case Action::HASH: {
802 actions.emplace_back(code, action_pos, val);
803 auto& obj = actions.back();
804 auto max_length = obj.get_num_arg();
805 if (max_length < 6) {
806 report_location(DIAG_ERROR, filename, line_no,
807 obj.get_pos() + 4 + 1);
808 cerr << "Index action 'hash' takes an integer "
809 "argument which must be at least 6\n";
811 break;
813 case Action::LTRIM:
814 case Action::RTRIM:
815 case Action::SQUASH:
816 case Action::TRIM:
817 for (unsigned char ch : val) {
818 if (ch >= 0x80) {
819 auto column = actions.back().get_pos() +
820 strlen(action_names[code]) + 1;
821 report_location(DIAG_ERROR, filename, line_no,
822 column);
823 cerr << "Index action '" << action_names[code]
824 << "' only support ASCII characters "
825 "currently\n";
828 actions.emplace_back(code, action_pos, val);
829 break;
830 case Action::BOOLEAN:
831 boolmap[val] = Action::BOOLEAN;
832 /* FALLTHRU */
833 default:
834 actions.emplace_back(code, action_pos, val);
836 i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
837 } else {
838 if (min_args > 0) {
839 report_location(DIAG_ERROR, filename, line_no,
840 i_after_action - s.begin());
841 if (min_args == max_args) {
842 cerr << "Index action '" << action << "' requires "
843 << min_args << " arguments\n";
844 } else {
845 cerr << "Index action '" << action << "' requires "
846 "at least " << min_args << " arguments\n";
849 switch (code) {
850 case Action::INDEX:
851 case Action::INDEXNOPOS:
852 useless_weight_pos = string::npos;
853 actions.emplace_back(code, action_pos, "", weight);
854 break;
855 case Action::GAP:
856 actions.emplace_back(code, action_pos, "", 100);
857 break;
858 case Action::HASH:
859 actions.emplace_back(code, action_pos, "",
860 MAX_SAFE_TERM_LENGTH - 1);
861 break;
862 case Action::LTRIM:
863 case Action::RTRIM:
864 case Action::SQUASH:
865 case Action::TRIM:
866 actions.emplace_back(code, action_pos, " \t\f\v\r\n");
867 break;
868 default:
869 actions.emplace_back(code, action_pos);
870 break;
873 j = i;
876 if (useless_weight_pos != string::npos) {
877 report_useless_action(filename, line_no, useless_weight_pos,
878 "weight");
881 while (!actions.empty()) {
882 bool done = true;
883 Action::type action = actions.back().get_action();
884 switch (action) {
885 case Action::HASH:
886 case Action::HEXTOBIN:
887 case Action::LOWER:
888 case Action::LTRIM:
889 case Action::PARSEDATE:
890 case Action::RTRIM:
891 case Action::SPELL:
892 case Action::SQUASH:
893 case Action::TRIM:
894 case Action::TRUNCATE:
895 case Action::UNHTML:
896 done = false;
897 report_useless_action(filename, line_no,
898 actions.back().get_pos(),
899 action_names[action]);
900 actions.pop_back();
901 break;
902 default:
903 break;
905 if (done) break;
908 map<string, Action::type>::const_iterator boolpfx;
909 for (boolpfx = boolmap.begin(); boolpfx != boolmap.end(); ++boolpfx) {
910 if (boolpfx->second == Action::UNIQUE) {
911 report_location(DIAG_WARN, filename, unique_line_no,
912 unique_pos);
913 cerr << "Index action 'unique=" << boolpfx->first
914 << "' without 'boolean=" << boolpfx->first << "'\n";
915 static bool given_doesnt_imply_boolean_warning = false;
916 if (!given_doesnt_imply_boolean_warning) {
917 given_doesnt_imply_boolean_warning = true;
918 report_location(DIAG_NOTE, filename, unique_line_no,
919 unique_pos);
920 cerr << "'unique' doesn't implicitly add a boolean term\n";
925 vector<string>::const_iterator field;
926 for (field = fields.begin(); field != fields.end(); ++field) {
927 vector<Action> &v = index_spec[*field];
928 if (v.empty()) {
929 if (fields.size() == 1) {
930 // Optimise common case where there's only one fieldname
931 // for a list of actions.
932 v = std::move(actions);
933 } else {
934 v = actions;
936 } else {
937 v.emplace_back(Action::NEW, string::npos);
938 v.insert(v.end(), actions.begin(), actions.end());
943 if (index_spec.empty()) {
944 report_location(DIAG_ERROR, filename, line_no);
945 cerr << "No rules found in index script\n";
948 if (error_count) {
949 exit(1);
952 index_spec_uses_unique = (unique_line_no > 0);
955 static bool
956 run_actions(vector<Action>::const_iterator action_it,
957 vector<Action>::const_iterator action_end,
958 Xapian::WritableDatabase& database,
959 Xapian::TermGenerator& indexer,
960 const string& old_value,
961 bool& this_field_is_content, Xapian::Document& doc,
962 map<string, list<string>>& fields,
963 string& field, const char* fname,
964 size_t line_no, Xapian::docid& docid)
966 string value = old_value;
967 while (action_it != action_end) {
968 auto& action = *action_it++;
969 switch (action.get_action()) {
970 case Action::BAD:
971 abort();
972 case Action::NEW:
973 value = old_value;
974 // We're processing the same field again - give it a reprieve.
975 this_field_is_content = true;
976 break;
977 case Action::FIELD:
978 if (!value.empty()) {
979 string f = action.get_string_arg();
980 if (f.empty()) f = field;
981 // replace newlines with spaces
982 string s = value;
983 string::size_type j = 0;
984 while ((j = s.find('\n', j)) != string::npos)
985 s[j] = ' ';
986 fields[f].push_back(s);
988 break;
989 case Action::INDEX:
990 indexer.index_text(value,
991 action.get_num_arg(),
992 action.get_string_arg());
993 break;
994 case Action::INDEXNOPOS:
995 // No positional information so phrase searching won't work.
996 // However, the database will use much less diskspace.
997 indexer.index_text_without_positions(value,
998 action.get_num_arg(),
999 action.get_string_arg());
1000 break;
1001 case Action::BOOLEAN: {
1002 // Do nothing if there's no text.
1003 if (value.empty()) break;
1005 string term = action.get_string_arg();
1006 if (prefix_needs_colon(term, value[0])) term += ':';
1007 term += value;
1009 doc.add_boolean_term(term);
1010 break;
1012 case Action::GAP:
1013 indexer.increase_termpos(action.get_num_arg());
1014 break;
1015 case Action::HASH: {
1016 unsigned int max_length = action.get_num_arg();
1017 if (value.length() > max_length)
1018 value = hash_long_term(value, max_length);
1019 break;
1021 case Action::HEXTOBIN: {
1022 size_t len = value.length();
1023 if (len & 1) {
1024 report_location(DIAG_ERROR, fname, line_no);
1025 cerr << "hextobin: input must have even length\n";
1026 exit(1);
1029 string output;
1030 output.reserve(len / 2);
1031 for (size_t j = 0; j < len; j += 2) {
1032 char a = value[j];
1033 char b = value[j + 1];
1034 if (!C_isxdigit(a) || !C_isxdigit(b)) {
1035 report_location(DIAG_ERROR, fname, line_no);
1036 cerr << "hextobin: input must be all hex digits\n";
1037 exit(1);
1039 char r = (hex_digit(a) << 4) | hex_digit(b);
1040 output.push_back(r);
1042 value = std::move(output);
1043 break;
1045 case Action::LOWER:
1046 value = Xapian::Unicode::tolower(value);
1047 break;
1048 case Action::LTRIM:
1049 ltrim(value, action.get_string_arg());
1050 break;
1051 case Action::RTRIM:
1052 rtrim(value, action.get_string_arg());
1053 break;
1054 case Action::TRIM:
1055 rtrim(value, action.get_string_arg());
1056 ltrim(value, action.get_string_arg());
1057 break;
1058 case Action::SQUASH:
1059 squash(value, action.get_string_arg());
1060 break;
1061 case Action::LOAD: {
1062 // If there's no input, just issue a warning.
1063 if (value.empty()) {
1064 report_location(DIAG_WARN, fname, line_no);
1065 cerr << "Empty filename in LOAD action\n";
1066 break;
1068 bool truncated = false;
1069 string filename = std::move(value);
1070 // FIXME: Use NOATIME if we own the file or are root.
1071 if (!load_file(filename, action.get_num_arg(), NOCACHE,
1072 value, truncated)) {
1073 report_location(DIAG_ERROR, fname, line_no);
1074 cerr << "Couldn't load file '" << filename << "': "
1075 << strerror(errno) << '\n';
1076 exit(1);
1078 if (!truncated) break;
1080 /* FALLTHRU */
1081 case Action::TRUNCATE:
1082 utf8_truncate(value, action.get_num_arg());
1083 break;
1084 case Action::SPELL:
1085 indexer.set_flags(indexer.FLAG_SPELLING);
1086 break;
1087 case Action::SPLIT: {
1088 // Find the end of the actions which split should execute.
1089 auto split_end = find(action_it, action_end, Action::NEW);
1091 int split_type = action.get_num_arg();
1092 if (value.empty()) {
1093 // Nothing to do.
1094 } else if (split_type != Action::SPLIT_SORT) {
1095 // Generate split as we consume it.
1096 const string& delimiter = action.get_string_arg();
1098 unique_ptr<unordered_set<string>> seen;
1099 if (split_type == Action::SPLIT_DEDUP) {
1100 seen.reset(new unordered_set<string>);
1103 if (delimiter.size() == 1) {
1104 // Special case for common single character delimiter.
1105 char ch = delimiter[0];
1106 string::size_type i = 0;
1107 while (true) {
1108 string::size_type j = value.find(ch, i);
1109 if (split_type == Action::SPLIT_PREFIXES) {
1110 if (j > 0) {
1111 string val(value, 0, j);
1112 run_actions(action_it, split_end,
1113 database, indexer,
1114 val,
1115 this_field_is_content, doc,
1116 fields,
1117 field, fname, line_no,
1118 docid);
1120 } else if (i != j) {
1121 string val(value, i, j - i);
1122 if (!seen.get() || seen->insert(val).second) {
1123 run_actions(action_it, split_end,
1124 database, indexer,
1125 val,
1126 this_field_is_content, doc,
1127 fields,
1128 field, fname, line_no,
1129 docid);
1132 if (j == string::npos) break;
1133 i = j + 1;
1135 } else {
1136 string::size_type i = 0;
1137 while (true) {
1138 string::size_type j = value.find(delimiter, i);
1139 if (split_type == Action::SPLIT_PREFIXES) {
1140 if (j > 0) {
1141 string val(value, 0, j);
1142 run_actions(action_it, split_end,
1143 database, indexer,
1144 val,
1145 this_field_is_content, doc,
1146 fields,
1147 field, fname, line_no,
1148 docid);
1150 } else if (i != j) {
1151 string val(value, i, j - i);
1152 if (!seen.get() || seen->insert(val).second) {
1153 run_actions(action_it, split_end,
1154 database, indexer,
1155 val,
1156 this_field_is_content, doc,
1157 fields,
1158 field, fname, line_no,
1159 docid);
1162 if (j == string::npos) break;
1163 i = j + delimiter.size();
1166 } else {
1167 vector<string> split_values;
1168 const string& delimiter = action.get_string_arg();
1169 if (delimiter.size() == 1) {
1170 // Special case for common single character delimiter.
1171 char ch = delimiter[0];
1172 string::size_type i = 0;
1173 while (true) {
1174 string::size_type j = value.find(ch, i);
1175 if (i != j) {
1176 split_values.emplace_back(value, i, j - i);
1178 if (j == string::npos) break;
1179 i = j + 1;
1181 } else {
1182 string::size_type i = 0;
1183 while (true) {
1184 string::size_type j = value.find(delimiter, i);
1185 if (i != j) {
1186 split_values.emplace_back(value, i, j - i);
1188 if (j == string::npos) break;
1189 i = j + delimiter.size();
1193 sort(split_values.begin(), split_values.end());
1195 for (auto&& val : split_values) {
1196 run_actions(action_it, split_end,
1197 database, indexer, val,
1198 this_field_is_content, doc, fields,
1199 field, fname, line_no,
1200 docid);
1204 action_it = split_end;
1205 break;
1207 case Action::UNHTML: {
1208 MyHtmlParser p;
1209 try {
1210 // Default HTML character set is latin 1, though
1211 // not specifying one is deprecated these days.
1212 p.parse_html(value, "iso-8859-1", false);
1213 } catch (const string & newcharset) {
1214 p.reset();
1215 p.parse_html(value, newcharset, true);
1217 if (p.indexing_allowed)
1218 value = p.dump;
1219 else
1220 value = "";
1221 break;
1223 case Action::UNIQUE: {
1224 unique_unused = false;
1226 if (value.empty()) {
1227 enum diag_type diag = DIAG_WARN;
1228 switch (unique_missing) {
1229 case UNIQUE_ERROR:
1230 diag = DIAG_ERROR;
1231 /* FALLTHRU */
1232 case UNIQUE_WARN_NEW:
1233 case UNIQUE_WARN_SKIP:
1234 report_location(diag, fname, line_no);
1235 cerr << "UNIQUE action on empty text\n";
1236 default:
1237 break;
1239 switch (unique_missing) {
1240 case UNIQUE_ERROR:
1241 exit(1);
1242 case UNIQUE_SKIP:
1243 case UNIQUE_WARN_SKIP:
1244 skipping_record = true;
1245 break;
1246 case UNIQUE_NEW:
1247 case UNIQUE_WARN_NEW:
1248 break;
1250 break;
1253 // Ensure that the value of this field is unique.
1254 // If a record already exists with the same value,
1255 // it will be replaced with the new record.
1257 // Unique fields aren't considered content - if
1258 // there are no other fields in the document, the
1259 // document is to be deleted.
1260 this_field_is_content = false;
1262 // Argument is the prefix to add to the field value
1263 // to get the unique term.
1264 string t = action.get_string_arg();
1265 if (prefix_needs_colon(t, value[0])) t += ':';
1266 t += value;
1267 Xapian::PostingIterator p = database.postlist_begin(t);
1268 if (p != database.postlist_end(t)) {
1269 docid = *p;
1271 break;
1273 case Action::VALUE:
1274 if (!value.empty())
1275 doc.add_value(action.get_num_arg(), value);
1276 break;
1277 case Action::VALUENUMERIC: {
1278 if (value.empty()) break;
1279 char * end;
1280 double dbl = strtod(value.c_str(), &end);
1281 if (*end) {
1282 report_location(DIAG_WARN, fname, line_no);
1283 cerr << "Trailing characters in VALUENUMERIC: '"
1284 << value << "'\n";
1286 doc.add_value(action.get_num_arg(),
1287 Xapian::sortable_serialise(dbl));
1288 break;
1290 case Action::VALUEPACKED: {
1291 uint32_t word = 0;
1292 if (value.empty() || !C_isdigit(value[0])) {
1293 // strtoul() accepts leading whitespace and negated
1294 // values, neither of which we want to allow.
1295 errno = EINVAL;
1296 } else {
1297 errno = 0;
1298 char* q;
1299 word = strtoul(value.c_str(), &q, 10);
1300 if (!errno && *q != '\0') {
1301 // Trailing characters after converted value.
1302 errno = EINVAL;
1305 if (errno) {
1306 report_location(DIAG_WARN, fname, line_no);
1307 cerr << "valuepacked \"" << value << "\" ";
1308 if (errno == ERANGE) {
1309 cerr << "out of range\n";
1310 } else {
1311 cerr << "not an unsigned integer\n";
1314 int valueslot = action.get_num_arg();
1315 doc.add_value(valueslot, int_to_binary_string(word));
1316 break;
1318 case Action::DATE: {
1319 // Do nothing for empty input.
1320 if (value.empty()) break;
1322 const string & type = action.get_string_arg();
1323 string yyyymmdd;
1324 if (type == "unix") {
1325 time_t t;
1326 if (!parse_signed(value.c_str(), t)) {
1327 report_location(DIAG_WARN, fname, line_no);
1328 cerr << "Date value (in secs) for action DATE "
1329 "must be an integer - ignoring\n";
1330 break;
1332 struct tm *tm = localtime(&t);
1333 int y = tm->tm_year + 1900;
1334 int m = tm->tm_mon + 1;
1335 yyyymmdd = date_to_string(y, m, tm->tm_mday);
1336 } else if (type == "unixutc") {
1337 time_t t;
1338 if (!parse_signed(value.c_str(), t)) {
1339 report_location(DIAG_WARN, fname, line_no);
1340 cerr << "Date value (in secs) for action DATE "
1341 "must be an integer - ignoring\n";
1342 break;
1344 struct tm *tm = gmtime(&t);
1345 int y = tm->tm_year + 1900;
1346 int m = tm->tm_mon + 1;
1347 yyyymmdd = date_to_string(y, m, tm->tm_mday);
1348 } else if (type == "yyyymmdd") {
1349 if (value.length() != 8) {
1350 report_location(DIAG_WARN, fname, line_no);
1351 cerr << "date=yyyymmdd expects an 8 character value "
1352 "- ignoring\n";
1353 break;
1355 yyyymmdd = value;
1358 // Date (YYYYMMDD)
1359 doc.add_boolean_term("D" + yyyymmdd);
1360 yyyymmdd.resize(6);
1361 // Month (YYYYMM)
1362 doc.add_boolean_term("M" + yyyymmdd);
1363 yyyymmdd.resize(4);
1364 // Year (YYYY)
1365 doc.add_boolean_term("Y" + yyyymmdd);
1366 break;
1368 case Action::PARSEDATE: {
1369 string dateformat = action.get_string_arg();
1370 struct tm tm;
1371 memset(&tm, 0, sizeof(tm));
1372 auto ret = strptime(value.c_str(), dateformat.c_str(), &tm);
1373 if (ret == NULL) {
1374 report_location(DIAG_WARN, fname, line_no);
1375 cerr << "\"" << value << "\" doesn't match format "
1376 "\"" << dateformat << '\"' << '\n';
1377 break;
1380 if (*ret != '\0') {
1381 report_location(DIAG_WARN, fname, line_no);
1382 cerr << "\"" << value << "\" not fully matched by "
1383 "format \"" << dateformat << "\" "
1384 "(\"" << ret << "\" left over) but "
1385 "indexing anyway\n";
1387 #ifdef HAVE_STRUCT_TM_TM_GMTOFF
1388 auto gmtoff = tm.tm_gmtoff;
1389 #endif
1390 auto secs_since_epoch = timegm(&tm);
1391 #ifdef HAVE_STRUCT_TM_TM_GMTOFF
1392 secs_since_epoch -= gmtoff;
1393 #endif
1394 value = str(secs_since_epoch);
1395 break;
1397 default:
1398 /* Empty default case to avoid "unhandled enum value"
1399 * warnings. */
1400 break;
1403 return true;
1406 static void
1407 index_file(const char *fname, istream &stream,
1408 Xapian::WritableDatabase &database, Xapian::TermGenerator &indexer)
1410 string line;
1411 size_t line_no = 0;
1412 while (!stream.eof() && getline_portable(stream, line)) {
1413 ++line_no;
1414 // Allow blank lines before the first record and multiple blank lines
1415 // between records.
1416 if (line.empty()) continue;
1418 Xapian::Document doc;
1419 indexer.set_document(doc);
1420 Xapian::docid docid = 0;
1421 map<string, list<string>> fields;
1422 bool seen_content = false;
1423 skipping_record = false;
1424 unique_unused = index_spec_uses_unique;
1425 while (!line.empty()) {
1426 string::size_type eq = line.find('=');
1427 if (eq == string::npos && !line.empty()) {
1428 report_location(DIAG_ERROR, fname, line_no);
1429 cerr << "Expected = somewhere in this line\n";
1430 exit(1);
1432 string field(line, 0, eq);
1433 string value(line, eq + 1, string::npos);
1434 line.clear();
1435 while (getline_portable(stream, line)) {
1436 ++line_no;
1437 if (line.empty() || line[0] != '=') break;
1438 // Replace the '=' with a '\n'.
1439 line[0] = '\n';
1440 value += line;
1443 if (skipping_record) continue;
1445 // Default to not indexing spellings.
1446 indexer.set_flags(Xapian::TermGenerator::flags(0));
1448 bool this_field_is_content = true;
1449 const vector<Action>& v = index_spec[field];
1450 run_actions(v.begin(), v.end(),
1451 database, indexer, value,
1452 this_field_is_content, doc, fields,
1453 field, fname, line_no,
1454 docid);
1455 if (this_field_is_content) seen_content = true;
1458 if (unique_unused) {
1459 enum diag_type diag = DIAG_WARN;
1460 switch (unique_missing) {
1461 case UNIQUE_ERROR:
1462 diag = DIAG_ERROR;
1463 /* FALLTHRU */
1464 case UNIQUE_WARN_NEW:
1465 case UNIQUE_WARN_SKIP:
1466 report_location(diag, fname, line_no);
1467 cerr << "UNIQUE action unused in this record\n";
1468 default:
1469 break;
1471 switch (unique_missing) {
1472 case UNIQUE_ERROR:
1473 exit(1);
1474 case UNIQUE_SKIP:
1475 case UNIQUE_WARN_SKIP:
1476 skipping_record = true;
1477 break;
1478 case UNIQUE_NEW:
1479 case UNIQUE_WARN_NEW:
1480 break;
1484 if (skipping_record) {
1485 ++skipcount;
1486 } else if (!seen_content) {
1487 // We haven't seen any fields (other than unique identifiers)
1488 // so the document is to be deleted.
1489 if (docid) {
1490 database.delete_document(docid);
1491 if (verbose) cout << "Del: " << docid << '\n';
1492 ++delcount;
1494 } else {
1495 string data;
1496 for (auto&& i : fields) {
1497 for (auto&& field_val : i.second) {
1498 data += i.first;
1499 data += '=';
1500 data += field_val;
1501 data += '\n';
1505 // Put the data in the document
1506 doc.set_data(data);
1508 // Add the document to the database
1509 if (docid) {
1510 database.replace_document(docid, doc);
1511 if (verbose) cout << "Replace: " << docid << '\n';
1512 ++repcount;
1513 } else {
1514 docid = database.add_document(doc);
1515 if (verbose) cout << "Add: " << docid << '\n';
1516 ++addcount;
1521 // Commit after each file to make sure all changes from that file make it
1522 // in.
1523 if (verbose) cout << "Committing\n";
1524 database.commit();
1527 static void
1528 show_help(int exit_code)
1530 cout << PROG_NAME " - " PROG_DESC "\n"
1531 "Usage: " PROG_NAME " [OPTIONS] DATABASE INDEXER_SCRIPT [INPUT_FILE]...\n"
1532 "\n"
1533 "Creates or updates a Xapian database with the data from the input files listed\n"
1534 "on the command line. If no files are specified, data is read from stdin.\n"
1535 "\n"
1536 "See https://xapian.org/docs/omega/scriptindex.html for documentation of the\n"
1537 "format for INDEXER_SCRIPT.\n"
1538 "\n"
1539 "Options:\n"
1540 " -v, --verbose display additional messages to aid debugging\n"
1541 " --overwrite create the database anew (the default is to update if\n"
1542 " the database already exists)\n";
1543 print_stemmer_help("");
1544 print_help_and_version_help("");
1545 exit(exit_code);
1549 main(int argc, char **argv)
1550 try {
1551 // If the database already exists, default to updating not overwriting.
1552 int database_mode = Xapian::DB_CREATE_OR_OPEN;
1553 verbose = false;
1554 Xapian::Stem stemmer("english");
1556 // Without this, strptime() seems to treat formats without a timezone as
1557 // being local time, including %s.
1558 setenv("TZ", "UTC", 1);
1560 constexpr auto NO_ARG = no_argument;
1561 constexpr auto REQ_ARG = required_argument;
1562 static const struct option longopts[] = {
1563 { "help", NO_ARG, NULL, 'h' },
1564 { "version", NO_ARG, NULL, 'V' },
1565 { "stemmer", REQ_ARG, NULL, 's' },
1566 { "overwrite", NO_ARG, NULL, 'o' },
1567 { "verbose", NO_ARG, NULL, 'v' },
1568 { 0, 0, NULL, 0 }
1571 int getopt_ret;
1572 while ((getopt_ret = gnu_getopt_long(argc, argv, "vs:hV",
1573 longopts, NULL)) != -1) {
1574 switch (getopt_ret) {
1575 default:
1576 show_help(1);
1577 break;
1578 case 'h': // --help
1579 show_help(0);
1580 break;
1581 case 'V': // --version
1582 print_package_info(PROG_NAME);
1583 return 0;
1584 case 'o': // --overwrite
1585 database_mode = Xapian::DB_CREATE_OR_OVERWRITE;
1586 break;
1587 case 'v':
1588 verbose = true;
1589 break;
1590 case 's':
1591 try {
1592 stemmer = Xapian::Stem(optarg);
1593 } catch (const Xapian::InvalidArgumentError &) {
1594 cerr << "Unknown stemming language '" << optarg << "'.\n";
1595 cerr << "Available language names are: "
1596 << Xapian::Stem::get_available_languages() << '\n';
1597 return 1;
1599 break;
1603 argv += optind;
1604 argc -= optind;
1605 if (argc < 2) {
1606 show_help(1);
1609 parse_index_script(argv[1]);
1611 // Open the database. If another process is currently updating the
1612 // database, wait for the lock to become available.
1613 auto flags = database_mode | Xapian::DB_RETRY_LOCK;
1614 Xapian::WritableDatabase database(argv[0], flags);
1616 Xapian::TermGenerator indexer;
1617 indexer.set_stemmer(stemmer);
1618 // Set the database for spellings to be added to by the "spell" action.
1619 indexer.set_database(database);
1621 addcount = 0;
1622 repcount = 0;
1623 delcount = 0;
1624 skipcount = 0;
1626 if (argc == 2) {
1627 // Read from stdin.
1628 index_file("<stdin>", cin, database, indexer);
1629 } else {
1630 // Read file(s) listed on the command line.
1631 for (int i = 2; i < argc; ++i) {
1632 ifstream stream(argv[i]);
1633 if (stream) {
1634 index_file(argv[i], stream, database, indexer);
1635 } else {
1636 cerr << "Can't open file " << argv[i] << '\n';
1641 cout << "records (added, replaced, deleted, skipped) = ("
1642 << addcount << ", "
1643 << repcount << ", "
1644 << delcount << ", "
1645 << skipcount << ")\n";
1646 } catch (const Xapian::Error &error) {
1647 cerr << "Exception: " << error.get_description() << '\n';
1648 exit(1);
1649 } catch (const std::bad_alloc &) {
1650 cerr << "Exception: std::bad_alloc\n";
1651 exit(1);
1652 } catch (...) {
1653 cerr << "Unknown Exception\n";
1654 exit(1);