1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // This is the main program for the compressor.
19 #include "bwt_compress.h"
34 using dcsbwt::verbosity
;
36 void Fail(const char* message
, const char* argument
) {
38 std::cerr
<< message
<< argument
<< "\n";
39 std::cerr
<< "Usage: dcsbwt [options] inputfile outputfile\n"
40 << " --transform=args\n"
41 << " --compression=args\n"
42 << " --memory=size[k|K|m|M|g|G]\n"
43 << " --blocksize=size[k|K|m|M|g|G]\n"
44 << " --verbosity=level\n"
45 << " --statistics=level\n";
46 std::exit(EXIT_FAILURE
);
49 void Fail(const char* message
, int64 argument
) {
51 snprintf(buffer
, 15, "%lld", argument
);
52 Fail(message
, buffer
);
55 class CommandLineOptions
{
60 memory_(900LL * (1LL<<20)),
65 std::string
GetTransform() const { return transform_
; }
66 std::string
GetCompression() const { return compression_
; }
67 long long int GetMemory() const { return memory_
; }
68 long long int GetBlocksize() const { return blocksize_
; }
69 int GetVerbosity() const { return verbosity_
; }
70 int GetStatistics() const { return statistics_
; }
72 int ParseOptions(int argc
, char** argv
) {
74 static struct option long_options
[] = {
75 {"transform", 1, 0, 't'},
76 {"compression", 1, 0, 'c'},
77 {"memory", 1, 0, 'm'},
78 {"blocksize", 1, 0, 'b'},
79 {"verbosity", 1, 0, 'v'},
80 {"statistics", 1, 0, 's'},
85 int c
= getopt_long(argc
, argv
, "t:c:m:b:v:s:",
86 long_options
, &option_index
);
91 long long int size
= 0;
98 compression_
= optarg
;
101 size
= ParseSize(optarg
);
103 Fail("invalid option argument: --memory=", optarg
);
107 size
= ParseSize(optarg
);
109 Fail("invalid option argument: --blocksize=", optarg
);
113 level
= ParseLevel(optarg
);
114 if (level
== kInvalidLevel
)
115 Fail("invalid option argument: --verbosity=", optarg
);
119 level
= ParseLevel(optarg
);
120 if (level
== kInvalidLevel
)
121 Fail("invalid option argument: --statistics=", optarg
);
127 std::cerr
<< "Unexpected error: getopt returned: " << c
<< '\n';
128 std::exit(EXIT_FAILURE
);
135 std::string transform_
;
136 std::string compression_
;
137 long long int memory_
;
138 long long int blocksize_
;
142 long long int ParseSize(const char* argument
) {
144 long long int size
= strtoll(argument
, &next
, 0);
162 if (*next
) size
= -1;
167 static const int kInvalidLevel
= INT_MIN
;
169 int ParseLevel(const char* argument
) {
171 long long int level
= strtoll(argument
, &next
, 0);
172 if (*next
|| level
< INT_MIN
|| level
> INT_MAX
)
173 level
= kInvalidLevel
;
179 int main(int argc
, char** argv
) {
181 // Memory for top level code, file buffers, stream buffers, etc.
182 // This is currently a rather arbitrary value.
183 static const int64 kMemoryOverhead
= (1LL << 20);
184 static const int64 kMaxMemory
= (3LL << 30);
186 // Process command line options
187 CommandLineOptions command_line_options
;
188 int first_argument
= command_line_options
.ParseOptions(argc
, argv
);
189 verbosity
= command_line_options
.GetVerbosity();
190 dcsbwt::statistics
= command_line_options
.GetStatistics();
192 if (argc
- first_argument
< 2) Fail("Too few arguments", "");
193 if (argc
- first_argument
> 2) Fail("Too many arguments", "");
195 dcsbwt::BwtCompressor::Options compressor_options
;
196 bool ok
= compressor_options
.SetTransformOptions(
197 command_line_options
.GetTransform());
198 if (!ok
) Fail("Invalid option argument: --transform=",
199 command_line_options
.GetTransform().c_str());
200 ok
= compressor_options
.SetCompressionOptions(
201 command_line_options
.GetCompression());
202 if (!ok
) Fail("Invalid option argument: --compression=",
203 command_line_options
.GetCompression().c_str());
205 int64 memory_budget
= command_line_options
.GetMemory();
206 int64 available_memory
= memory_budget
- kMemoryOverhead
;
207 ok
= compressor_options
.SetMemoryBudget(available_memory
);
209 Fail("Invalid (possibly too small) memory budget: ", memory_budget
);
211 std::clog
<< "Compressing with memory budget of " << memory_budget
212 << " bytes (" << available_memory
<< " after general overhead)"
216 int64 blocksize
= command_line_options
.GetBlocksize();
217 if (0 == blocksize
) {
219 std::clog
<< "No block size given: using suggested block size"
220 << " (no larger than "
221 << compressor_options
.SuggestedBlockSize() << ")"
224 if (blocksize
> compressor_options
.MaxBlockSize())
225 Fail("Too large blocksize", blocksize
);
227 std::clog
<< "Using block size " << blocksize
228 << " (max=" << compressor_options
.MaxBlockSize() << ")"
232 // Open files and set up streams
233 const char* infilename
= argv
[first_argument
];
235 std::cout
<< "Opening input file: " << infilename
237 std::FILE* infile
= std::fopen(infilename
, "r");
239 std::cerr
<< "Cannot open input file " << infilename
<< '\n';
240 std::exit(EXIT_FAILURE
);
243 std::fseek(infile
, 0, SEEK_END
);
244 int64 inputsize
= std::ftell(infile
);
247 std::clog
<< "Input: " << inputsize
<< " bytes from file "<< infilename
250 dcsbwt::InStreamFromFile
instream(infile
);
252 const char* outfilename
= argv
[first_argument
+ 1];
254 std::clog
<< "Opening output file: " << outfilename
<< std::endl
;
256 std::FILE* outfile
= std::fopen(outfilename
, "w");
258 std::cerr
<< "Cannot open output file " << outfilename
<< '\n';
260 std::exit(EXIT_FAILURE
);
262 dcsbwt::OutStreamToFile
outstream(outfile
);
265 dcsbwt::BwtCompressor::Compress(&instream
, inputsize
, &outstream
,
266 compressor_options
, blocksize
);
268 std::fclose(outfile
);