2 eval 'exec perl -wS $0 ${1+"$@"}'
4 #*************************************************************************
6 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 # Copyright 2008 by Sun Microsystems, Inc.
10 # OpenOffice.org - a multi-platform office productivity suite
12 # $RCSfile: th_gen_idx.pl,v $
16 # This file is part of OpenOffice.org.
18 # OpenOffice.org is free software: you can redistribute it and/or modify
19 # it under the terms of the GNU Lesser General Public License version 3
20 # only, as published by the Free Software Foundation.
22 # OpenOffice.org is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 # GNU Lesser General Public License version 3 for more details
26 # (a copy is included in the LICENSE file that accompanied this code).
28 # You should have received a copy of the GNU Lesser General Public License
29 # version 3 along with OpenOffice.org. If not, see
30 # <http://www.openoffice.org/license.html>
31 # for a copy of the LGPLv3 License.
33 #*************************************************************************
36 my ($aent, $aoff) = split('\|',$a);
37 my ($bent, $boff) = split('\|',$b);
41 #FIXME: someone may want "infile" or even parameter parsing
45 if ( $next_is_file ) {
57 print "$0 -o outfile < input\n";
63 my $ne = 0; # number of entries in index
64 my @tindex=(); # the index itself
65 my $foffset = 0; # file position offset into thesaurus
66 my $rec=""; # current string and related pieces
67 my $rl=0; # misc string length
68 my $entry=""; # current word being processed
69 my $nm=0; # number of meaning for the current word
70 my $meaning=""; # current meaning and synonyms
72 my $encoding; # encoding used by text file
75 $outfile = get_outfile
();
76 usage
() if ( $outfile eq "" );
78 # top line of thesaurus provides encoding
80 $foffset = $foffset + length($encoding);
83 # read thesaurus line by line
84 # first line of every block is an entry and meaning count
88 ($entry, $nm) = split('\|',$rec);
92 $rl = $rl + length($meaning);
96 push(@tindex,"$entry|$foffset");
98 $foffset = $foffset + $rl;
101 # now we have all of the information
102 # so sort it and then output the encoding, count and index data
103 @tindex = sort by_entry
@tindex;
106 open OUTFILE
, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
107 print OUTFILE
"$encoding\n";
108 print OUTFILE
"$ne\n";
109 foreach $one (@tindex) {
110 print OUTFILE
"$one\n";