update dev300-m58
[ooovba.git] / dictionaries / util / th_gen_idx.pl
blob08a255c8b4993871bcba25a0151f168008fc3d70
2 eval 'exec perl -wS $0 ${1+"$@"}'
3 if 0;
4 #*************************************************************************
6 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 #
8 # Copyright 2008 by Sun Microsystems, Inc.
10 # OpenOffice.org - a multi-platform office productivity suite
12 # $RCSfile: th_gen_idx.pl,v $
14 # $Revision: 1.5 $
16 # This file is part of OpenOffice.org.
18 # OpenOffice.org is free software: you can redistribute it and/or modify
19 # it under the terms of the GNU Lesser General Public License version 3
20 # only, as published by the Free Software Foundation.
22 # OpenOffice.org is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 # GNU Lesser General Public License version 3 for more details
26 # (a copy is included in the LICENSE file that accompanied this code).
28 # You should have received a copy of the GNU Lesser General Public License
29 # version 3 along with OpenOffice.org. If not, see
30 # <http://www.openoffice.org/license.html>
31 # for a copy of the LGPLv3 License.
33 #*************************************************************************
35 sub by_entry {
36 my ($aent, $aoff) = split('\|',$a);
37 my ($bent, $boff) = split('\|',$b);
38 $aent cmp $bent;
41 #FIXME: someone may want "infile" or even parameter parsing
42 sub get_outfile {
43 my $next_is_file = 0;
44 foreach ( @ARGV ) {
45 if ( $next_is_file ) {
46 return $_
48 if ( $_ eq "-o" ) {
49 $next_is_file = 1;
52 return "";
55 sub usage {
56 print "usage:\n";
57 print "$0 -o outfile < input\n";
59 exit 99;
62 # main routine
63 my $ne = 0; # number of entries in index
64 my @tindex=(); # the index itself
65 my $foffset = 0; # file position offset into thesaurus
66 my $rec=""; # current string and related pieces
67 my $rl=0; # misc string length
68 my $entry=""; # current word being processed
69 my $nm=0; # number of meaning for the current word
70 my $meaning=""; # current meaning and synonyms
71 my $p; # misc uses
72 my $encoding; # encoding used by text file
73 my $outfile = "";
75 $outfile = get_outfile();
76 usage() if ( $outfile eq "" );
78 # top line of thesaurus provides encoding
79 $encoding=<STDIN>;
80 $foffset = $foffset + length($encoding);
81 chomp($encoding);
83 # read thesaurus line by line
84 # first line of every block is an entry and meaning count
85 while ($rec=<STDIN>){
86 $rl = length($rec);
87 chomp($rec);
88 ($entry, $nm) = split('\|',$rec);
89 $p = 0;
90 while ($p < $nm) {
91 $meaning=<STDIN>;
92 $rl = $rl + length($meaning);
93 chomp($meaning);
94 $p++;
96 push(@tindex,"$entry|$foffset");
97 $ne++;
98 $foffset = $foffset + $rl;
101 # now we have all of the information
102 # so sort it and then output the encoding, count and index data
103 @tindex = sort by_entry @tindex;
105 print "$outfile\n";
106 open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
107 print OUTFILE "$encoding\n";
108 print OUTFILE "$ne\n";
109 foreach $one (@tindex) {
110 print OUTFILE "$one\n";
112 close OUTFILE;