modified: makefile
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / src / dcsbwt.cc
blobad273f8198ebef864f7e02601cb54c4decb7be45
1 // Copyright 2007 Google Inc.
2 //
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // This is the main program for the compressor.
19 #include "bwt_compress.h"
20 #include "stream.h"
21 #include "inttypes.h"
23 #include <string>
24 #include <iostream>
25 #include <cstdlib>
26 #include <cstdio>
27 #include <getopt.h>
29 namespace dcsbwt {
30 int verbosity;
31 int statistics;
34 using dcsbwt::verbosity;
36 void Fail(const char* message, const char* argument) {
37 if (message)
38 std::cerr << message << argument << "\n";
39 std::cerr << "Usage: dcsbwt [options] inputfile outputfile\n"
40 << " --transform=args\n"
41 << " --compression=args\n"
42 << " --memory=size[k|K|m|M|g|G]\n"
43 << " --blocksize=size[k|K|m|M|g|G]\n"
44 << " --verbosity=level\n"
45 << " --statistics=level\n";
46 std::exit(EXIT_FAILURE);
49 void Fail(const char* message, int64 argument) {
50 char buffer[15];
51 snprintf(buffer, 15, "%lld", argument);
52 Fail(message, buffer);
55 class CommandLineOptions {
56 public:
57 CommandLineOptions()
58 : transform_("d"),
59 compression_("r"),
60 memory_(900LL * (1LL<<20)),
61 blocksize_(1LL<<20),
62 verbosity_(0),
63 statistics_(0){}
65 std::string GetTransform() const { return transform_; }
66 std::string GetCompression() const { return compression_; }
67 long long int GetMemory() const { return memory_; }
68 long long int GetBlocksize() const { return blocksize_; }
69 int GetVerbosity() const { return verbosity_; }
70 int GetStatistics() const { return statistics_; }
72 int ParseOptions(int argc, char** argv) {
73 while (1) {
74 static struct option long_options[] = {
75 {"transform", 1, 0, 't'},
76 {"compression", 1, 0, 'c'},
77 {"memory", 1, 0, 'm'},
78 {"blocksize", 1, 0, 'b'},
79 {"verbosity", 1, 0, 'v'},
80 {"statistics", 1, 0, 's'},
81 {0, 0, 0, 0}
84 int option_index = 0;
85 int c = getopt_long(argc, argv, "t:c:m:b:v:s:",
86 long_options, &option_index);
88 if (-1 == c) break;
90 char* next = NULL;
91 long long int size = 0;
92 int level = 0;
93 switch (c) {
94 case 't':
95 transform_ = optarg;
96 break;
97 case 'c':
98 compression_ = optarg;
99 break;
100 case 'm':
101 size = ParseSize(optarg);
102 if (size < 0)
103 Fail("invalid option argument: --memory=", optarg);
104 memory_ = size;
105 break;
106 case 'b':
107 size = ParseSize(optarg);
108 if (size < 0)
109 Fail("invalid option argument: --blocksize=", optarg);
110 blocksize_ = size;
111 break;
112 case 'v':
113 level = ParseLevel(optarg);
114 if (level == kInvalidLevel)
115 Fail("invalid option argument: --verbosity=", optarg);
116 verbosity_ = level;
117 break;
118 case 's':
119 level = ParseLevel(optarg);
120 if (level == kInvalidLevel)
121 Fail("invalid option argument: --statistics=", optarg);
122 statistics_ = level;
123 break;
124 case '?':
125 Fail(NULL, "");
126 default:
127 std::cerr << "Unexpected error: getopt returned: " << c << '\n';
128 std::exit(EXIT_FAILURE);
131 return optind;
134 private:
135 std::string transform_;
136 std::string compression_;
137 long long int memory_;
138 long long int blocksize_;
139 int verbosity_;
140 int statistics_;
142 long long int ParseSize(const char* argument) {
143 char* next;
144 long long int size = strtoll(argument, &next, 0);
145 if (*next) {
146 switch (*next++) {
147 case 'k':
148 case 'K':
149 size *= (1LL<<10);
150 break;
151 case 'm':
152 case 'M':
153 size *= (1LL<<20);
154 break;
155 case 'g':
156 case 'G':
157 size *= (1LL<<30);
158 break;
159 default:
160 size = -1;
162 if (*next) size = -1;
164 return size;
167 static const int kInvalidLevel = INT_MIN;
169 int ParseLevel(const char* argument) {
170 char* next;
171 long long int level = strtoll(argument, &next, 0);
172 if (*next || level < INT_MIN || level > INT_MAX)
173 level = kInvalidLevel;
174 return level;
179 int main(int argc, char** argv) {
181 // Memory for top level code, file buffers, stream buffers, etc.
182 // This is currently a rather arbitrary value.
183 static const int64 kMemoryOverhead = (1LL << 20);
184 static const int64 kMaxMemory = (3LL << 30);
186 // Process command line options
187 CommandLineOptions command_line_options;
188 int first_argument = command_line_options.ParseOptions(argc, argv);
189 verbosity = command_line_options.GetVerbosity();
190 dcsbwt::statistics = command_line_options.GetStatistics();
192 if (argc - first_argument < 2) Fail("Too few arguments", "");
193 if (argc - first_argument > 2) Fail("Too many arguments", "");
195 dcsbwt::BwtCompressor::Options compressor_options;
196 bool ok = compressor_options.SetTransformOptions(
197 command_line_options.GetTransform());
198 if (!ok) Fail("Invalid option argument: --transform=",
199 command_line_options.GetTransform().c_str());
200 ok = compressor_options.SetCompressionOptions(
201 command_line_options.GetCompression());
202 if (!ok) Fail("Invalid option argument: --compression=",
203 command_line_options.GetCompression().c_str());
205 int64 memory_budget = command_line_options.GetMemory();
206 int64 available_memory = memory_budget - kMemoryOverhead;
207 ok = compressor_options.SetMemoryBudget(available_memory);
208 if (!ok)
209 Fail("Invalid (possibly too small) memory budget: ", memory_budget);
210 if (verbosity > 0) {
211 std::clog << "Compressing with memory budget of " << memory_budget
212 << " bytes (" << available_memory << " after general overhead)"
213 << std::endl;;
216 int64 blocksize = command_line_options.GetBlocksize();
217 if (0 == blocksize) {
218 if (verbosity > 0)
219 std::clog << "No block size given: using suggested block size"
220 << " (no larger than "
221 << compressor_options.SuggestedBlockSize() << ")"
222 << std::endl;
223 } else {
224 if (blocksize > compressor_options.MaxBlockSize())
225 Fail("Too large blocksize", blocksize);
226 if (verbosity > 0)
227 std::clog << "Using block size " << blocksize
228 << " (max=" << compressor_options.MaxBlockSize() << ")"
229 << std::endl;
232 // Open files and set up streams
233 const char* infilename = argv[first_argument];
234 if (verbosity > 0)
235 std::cout << "Opening input file: " << infilename
236 << std::endl;
237 std::FILE* infile = std::fopen(infilename, "r");
238 if (!infile) {
239 std::cerr << "Cannot open input file " << infilename << '\n';
240 std::exit(EXIT_FAILURE);
243 std::fseek(infile, 0, SEEK_END);
244 int64 inputsize = std::ftell(infile);
245 std::rewind(infile);
246 if (verbosity > 0) {
247 std::clog << "Input: " << inputsize << " bytes from file "<< infilename
248 << std::endl;
250 dcsbwt::InStreamFromFile instream(infile);
252 const char* outfilename = argv[first_argument + 1];
253 if (verbosity > 0) {
254 std::clog << "Opening output file: " << outfilename << std::endl;
256 std::FILE* outfile = std::fopen(outfilename, "w");
257 if (!outfile) {
258 std::cerr << "Cannot open output file " << outfilename << '\n';
259 std::fclose(infile);
260 std::exit(EXIT_FAILURE);
262 dcsbwt::OutStreamToFile outstream(outfile);
264 // Compress
265 dcsbwt::BwtCompressor::Compress(&instream, inputsize, &outstream,
266 compressor_options, blocksize);
267 // Close files
268 std::fclose(outfile);
269 std::fclose(infile);
271 return 0;