2 # htdig2omega - dump an htdig database into a form suitable for indexing
3 # into a Xapian database using scriptindex.
5 # Copyright 2002,2003,2004 Olly Betts
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 $#ARGV == 0 or die "Syntax: $0 HTDIGDIR\nTypical usage: $0 HTDIGDIR|scriptindex XAPIANDB htdig2omega.script\n";
26 my $dir = shift @ARGV;
28 # dump the document database (-w suppresses dumping the word database)
29 system "htdump", "-w", $dir;
30 open DOCS
, "$dir/db.docs" or die $!;
32 my ($id, @x) = split /\t/;
35 unless (s/^([a-zA-Z])://) {
36 print STDERR
"Bad field: $_\n";
41 print "url=$f{'u'}\ncaption=$f{'t'}\nlastmod=$f{'m'}\nsize=$f{'s'}\ntext=$f{'H'}\nmetadesc=$f{'h'}\n\n";