1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * Version: MPL 1.1 / GPLv3+ / LGPLv3+
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License or as specified alternatively below. You may obtain a copy of
8 * the License at http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Initial Developer of the Original Code is
16 * Steven Butler <sebutler@gmail.com>
17 * Portions created by the Initial Developer are Copyright (C) 2011 the
18 * Initial Developer. All Rights Reserved.
20 * For minor contributions see the git repository.
22 * Alternatively, the contents of this file may be used under the terms of
23 * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
24 * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
25 * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
26 * instead of those above.
35 static const int MAXLINE
= 1024*64;
39 int main(int argc
, char *argv
[])
41 if (argc
!= 3 || strcmp(argv
[1],"-o"))
43 cout
<< "Usage: idxdict -o outputfile < input\n";
46 // This call improves performance by approx 5x
47 cin
.sync_with_stdio(false);
49 const char * outputFile(argv
[2]);
50 char inputBuffer
[MAXLINE
];
51 multimap
<string
, size_t> entries
;
52 multimap
<string
,size_t>::iterator
ret(entries
.begin());
55 cin
.getline(inputBuffer
, MAXLINE
);
56 const string
encoding(inputBuffer
);
57 size_t currentOffset(encoding
.size()+1);
60 // Extract the next word, but not the entry count
61 cin
.getline(inputBuffer
, MAXLINE
, '|');
65 string
word(inputBuffer
);
66 ret
= entries
.insert(ret
, pair
<string
, size_t>(word
, currentOffset
));
67 currentOffset
+= word
.size() + 1;
68 // Next is the entry count
69 cin
.getline(inputBuffer
, MAXLINE
);
72 cerr
<< "Unable to read entry - insufficient buffer?.\n";
75 currentOffset
+= strlen(inputBuffer
)+1;
76 int entryCount(strtol(inputBuffer
, NULL
, 10));
77 for (int i(0); i
< entryCount
; ++i
)
79 cin
.getline(inputBuffer
, MAXLINE
);
80 currentOffset
+= strlen(inputBuffer
)+1;
85 // Use binary mode to prevent any translation of LF to CRLF on Windows
86 ofstream
outputStream(outputFile
, ios_base::binary
| ios_base::trunc
|ios_base::out
);
87 if (!outputStream
.is_open())
89 cerr
<< "Unable to open output file " << outputFile
<< endl
;
93 outputStream
<< encoding
<< '\n' << entries
.size() << '\n';
95 for (multimap
<string
, size_t>::const_iterator
ii(entries
.begin());
100 outputStream
<< ii
->first
<< '|' << ii
->second
<< '\n';
104 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */