modified: myjupyterlab.sh
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / src / dcsunbwt.cc
blob37330256ba520efdd26ec788c3ce3275dac437fa
1 // Copyright 2007 Google Inc.
2 //
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // This is the main program for the decompressor.
19 #include "bwt_compress.h"
20 #include "stream.h"
21 #include "inttypes.h"
23 #include <string>
24 #include <iostream>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <getopt.h>
29 namespace dcsbwt {
30 int verbosity;
31 int statistics;
34 using dcsbwt::verbosity;
36 void Fail(const char* message, const char* argument) {
37 if (message)
38 std::cerr << message << argument << "\n";
39 std::cerr << "Usage: dcsunbwt [options] inputfile outputfile\n"
40 << " --transform=args\n"
41 << " --memory=size[k|K|m|M|g|G]\n"
42 << " --verbosity=level\n";
43 std::exit(EXIT_FAILURE);
46 void Fail(const char* message, int64 argument) {
47 char buffer[15];
48 snprintf(buffer, 15, "%lld", argument);
49 Fail(message, buffer);
52 class CommandLineOptions {
53 public:
54 CommandLineOptions()
55 : transform_("f"),
56 memory_(900LL * (1LL<<20)),
57 verbosity_(0) {}
59 std::string GetTransform() const { return transform_; }
60 long long int GetMemory() const { return memory_; }
61 int GetVerbosity() const { return verbosity_; }
62 int GetStatistics() const { return 0; }
64 int ParseOptions(int argc, char** argv) {
65 while (1) {
66 static struct option long_options[] = {
67 {"transform", 1, 0, 't'},
68 {"memory", 1, 0, 'm'},
69 {"verbosity", 1, 0, 'v'},
70 {0, 0, 0, 0}
73 int option_index = 0;
74 int c = getopt_long(argc, argv, "t:m:v:",
75 long_options, &option_index);
77 if (-1 == c) break;
79 char* next = NULL;
80 long long int size = 0;
81 int level;
82 switch (c) {
83 case 't':
84 transform_ = optarg;
85 break;
86 case 'm':
87 size = ParseSize(optarg);
88 if (size < 0)
89 Fail("invalid option argument: --memory=", optarg);
90 memory_ = size;
91 break;
92 case 'v':
93 level = ParseLevel(optarg);
94 if (level == kInvalidLevel)
95 Fail("invalid option argument: --verbosity=", optarg);
96 verbosity_ = level;
97 break;
98 case '?':
99 Fail(NULL, "");
100 default:
101 std::cerr << "Unexpected error: getopt returned: " << c << '\n';
102 std::exit(EXIT_FAILURE);
105 return optind;
108 private:
109 std::string transform_;
110 long long int memory_;
111 int verbosity_;
113 long long int ParseSize(const char* argument) {
114 char* next;
115 long long int size = strtoll(argument, &next, 0);
116 if (*next) {
117 switch (*next++) {
118 case 'k':
119 case 'K':
120 size *= (1LL<<10);
121 break;
122 case 'm':
123 case 'M':
124 size *= (1LL<<20);
125 break;
126 case 'g':
127 case 'G':
128 size *= (1LL<<30);
129 break;
130 default:
131 size = -1;
133 if (*next) size = -1;
135 return size;
138 static const int kInvalidLevel = INT_MIN;
140 int ParseLevel(const char* argument) {
141 char* next;
142 long long int level = strtoll(argument, &next, 0);
143 if (*next || level < INT_MIN || level > INT_MAX)
144 level = kInvalidLevel;
145 return level;
150 int main(int argc, char** argv) {
152 // Memory for top level code, file buffers, stream buffers, etc.
153 // This is currently a rather arbitrary value.
154 static const int64 kMemoryOverhead = (1LL << 20);
155 static const int64 kMaxMemory = (3LL << 30);
156 static const int kInputBufferSize = 1 << 16;
158 // Process command line options
159 CommandLineOptions command_line_options;
160 int first_argument = command_line_options.ParseOptions(argc, argv);
161 verbosity = command_line_options.GetVerbosity();
162 dcsbwt::statistics = command_line_options.GetStatistics();
164 if (argc - first_argument < 2) Fail("Too few arguments", "");
165 if (argc - first_argument > 2) Fail("Too many arguments", "");
167 dcsbwt::BwtDecompressor::Options decompressor_options;
168 bool ok = decompressor_options.SetTransformOptions(
169 command_line_options.GetTransform());
170 if (!ok) Fail("Invalid option argument: --transform=",
171 command_line_options.GetTransform().c_str());
173 int64 memory_budget = command_line_options.GetMemory();
174 int64 available_memory = memory_budget - kMemoryOverhead;
175 ok = decompressor_options.SetMemoryBudget(available_memory);
176 if (!ok)
177 Fail("Invalid (possibly too small) memory budget: ", memory_budget);
179 // Open files and setup streams
180 const char* infilename = argv[first_argument];
181 if (verbosity > 0)
182 std::cout << "Opening input file: " << infilename
183 << std::endl;
184 std::FILE* infile = std::fopen(infilename, "r");
185 if (!infile) {
186 std::cerr << "Cannot open input file " << infilename << '\n';
187 std::exit(EXIT_FAILURE);
189 dcsbwt::InStreamFromFile instream(infile);
190 dcsbwt::InStreamBuffer inbuffer(kInputBufferSize);
191 inbuffer.Connect(&instream);
193 const char* outfilename = argv[first_argument + 1];
194 if (verbosity > 0) {
195 std::clog << "Opening output file: " << outfilename << std::endl;
197 std::FILE* outfile = std::fopen(outfilename, "w");
198 if (!outfile) {
199 std::cerr << "Cannot open output file " << outfilename << '\n';
200 std::fclose(infile);
201 std::exit(EXIT_FAILURE);
203 dcsbwt::OutStreamToFile outstream(outfile);
205 // Decompress
206 int64 result = dcsbwt::BwtDecompressor::Decompress(
207 &inbuffer, &outstream, decompressor_options);
208 if (result < 0) {
209 std::cerr << "Corrupted or non-BWT-compressed file: " << infilename
210 << '\n';
211 std::fclose(outfile);
212 inbuffer.Disconnect();
213 std::fclose(infile);
214 std::exit(EXIT_FAILURE);
215 } else if (result > 0) {
216 std::cerr << "Cannot decompress a block of size " << result
217 << " under the memory budget of "
218 << memory_budget << '\n';
219 std::fclose(outfile);
220 inbuffer.Disconnect();
221 std::fclose(infile);
222 std::exit(EXIT_FAILURE);
225 // Close files and streams
226 std::fclose(outfile);
227 inbuffer.Disconnect();
228 std::fclose(infile);
230 return 0;