modified: makefile
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / src / stream_compressor.h
blob1cd90bc92dd0eed098cd7bc4819537327f72c1c7
1 // Copyright 2007 Google Inc.
2 //
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // Base classes for stream compressors and decompressors.
18 // A stream (de)compressor can handle an arbitrary stream
19 // of data in a nearly online fashion.
20 // The streams are based on the stream protocol described in stream.h.
22 // BASIC USAGE
24 // * Initialize compressor: * Initialize decompressor:
25 // StreamCompressor* compressor StreamDecompressor* decompressor
26 // = StreamCompressor::Connect(output); = StreamDecompressor::Connect(input);
28 // * Start compressing block: * Start decompressing block:
29 // compressor->WriteBegin(); decompressor->ReadBegin();
31 // * Compress block: * Decompress block:
32 // compressor->Write(...); decompressor->Read(...);
33 // compressor->Write(...); decompressor->Read(...);
34 // ... ...
36 // * Finish compressing block: * Finish decompressing block
37 // compressor->WriteEnd(); decompressor->ReadEnd();
39 // * Compress more blocks: * Decompress more blocks
40 // compressor->WriteBegin(); decompressor->ReadBegin();
41 // ... ...
42 // compressor->WriteEnd(); decompressor->ReadEnd();
43 // ... ...
45 // * End compression: * End decompression:
46 // compressor->Disconnect(); decompressor->Disconnect();
48 // SYNCHRONIZATION
50 // Synchronization between compressor and decompressor happens
51 // at block boundaries:
52 // * The sizes of individual writes and reads do not have to match.
53 // * The client must ensure that at the end of the block (i.e., just
54 // before calling WriteEnd/ReadEnd), the compressor and
55 // decompressor are synchronized on the uncompressed data, i.e.,
56 // that the compressor and the decompressor have processed
57 // the same amount of uncompressed data.
58 // * WriteEnd/ReadEnd then ensures that the compressor and decompressor
59 // are synchronized on the compressed data, too. Then it is possible
60 // to write some data (such as the size of the next block) to the
61 // output past the compressor as long as the same amount of data
62 // is read past the decompressor.
64 // OPTIONS
66 // Compression options can be provided like this:
68 // string option_string = ...;
69 // StreamCompressor::Options options;
70 // if ( ! options.Set(option_string)) { ... // error: invalid option_string }
71 // StreamCompressor* compressor = StreamCompressor::Connect(output, options);
72 // ...
74 // The compressor writes the options to the beginning of the compressed data
75 // and the decompressor reads them from there (both during Connect()).
77 // The first character of option_string specifies the compression algorithm;
78 // the rest provides options specific to the algorithm.
79 // See stream_compressor.cc for available algorithms and algorithm-specific
80 // files for the rest of the options.
82 // IMPLEMENTING (DE)COMPRESSORS
84 // New stream compression/decompression algorithms can be defined
85 // by deriving from StreamCompressor/StreamDecompressor.
86 // The simple TrivialCompressor/TrivialDecompressor
87 // in stream_compressor.cc is a useful example.
89 // The set of compression algorithms is hard-coded in the definition
90 // of the functions StreamCompressor::Options::Set() and
91 // StreamDecompressor::Connect() (in stream_compressor.cc).
92 // Those are the only places that need to be modified when adding a new
93 // compression algorithm. In particular, there is no need to modify
94 // this file stream_compressor.h.
96 // Changes to an already supported algorithm including changing
97 // the algorithm-specific options should need no modification of either
98 // stream_compressor.h or stream_compressor.cc.
100 #ifndef DCSBWT_STREAM_COMPRESSOR_H__
101 #define DCSBWT_STREAM_COMPRESSOR_H__
103 #include <string>
104 #include <cstddef> // for size_t
106 #include "stream.h"
108 namespace dcsbwt {
110 // StreamCompressor is the base class for stream compressors.
112 // A derived class should:
113 // 1. define the pure virtual functions Write, WriteBegin and WriteEndPrivate
114 // 2. have a nested class derived from StreamCompressor::OptionsBase
115 // 3. add an entry to StreamCompressor::Options::Set in stream_compressor.cc
116 // 4. write compressed output using Emit and EmitByte (which do buffering)
118 class StreamCompressor : public OutStream {
119 protected:
120 class OptionsBase;
122 public:
123 static const int kBufferSize;
125 // A class representing all options affecting compression and decompression.
126 class Options {
127 public:
128 Options() : algorithm_specific_options_(NULL) { Set("t"); }
130 // Set/Get serialization mechanism is the simplest way
131 // to implement copying.
132 Options(const Options& other) : algorithm_specific_options_(NULL)
133 { Set(other.Get()); }
134 const Options& operator=(const Options& other) {
135 Set(other.Get());
136 return *this;
138 ~Options() {
139 if (algorithm_specific_options_)
140 delete algorithm_specific_options_;
143 // Initialize options from a string.
144 // Returns false in case of an invalid options_string
145 bool Set(const std::string& options_string);
147 // Get() returns a string that:
148 // 1. as an argument to Set() sets any options object to this object's
149 // current state.
150 // 2. when written to the compressed stream and read by
151 // StreamDecompressor, sets the decompressor in the the equivalent
152 // state.
153 // 3. does not contain '\n' (which is used as a terminator in the stream)
154 std::string Get() const {
155 if (algorithm_specific_options_) {
156 return compression_algorithm_ + algorithm_specific_options_->Get();
157 } else {
158 return std::string();
161 int64 SizeInBytes() const {
162 return kBufferSize + algorithm_specific_options_->SizeInBytes();
164 private:
165 friend class StreamCompressor;
166 StreamCompressor* GetCompressor();
168 char compression_algorithm_;
169 OptionsBase* algorithm_specific_options_;
172 StreamCompressor() : output_(kBufferSize) {}
173 virtual ~StreamCompressor() {}
175 // Create a compressor with the compressed output going to output.
176 // The compression format specified by options is written to output.
177 static StreamCompressor* Connect(OutStream* output,
178 Options options);
179 // Disconnect deletes this
180 OutStream* Disconnect();
182 virtual void WriteBegin() =0;
184 void WriteEnd() { WriteEndPrivate(); output_.Flush(); }
186 // inherits from OutStream:
187 // virtual void Write() =0;
189 protected:
191 // A compressor class derived from StreamCompressor should
192 // have a nested class derived from OptionsBase.
193 class OptionsBase {
194 public:
195 OptionsBase() {}
196 virtual ~OptionsBase() {}
198 // Set/Get should behave as StreamCompressor::Options::Set/Get
199 // The algorithm identifying initial character is omitted from
200 // the input to Set and should be omitted from the output of Get.
201 virtual bool Set(const std::string& options_string) =0;
202 virtual std::string Get() const =0;
203 virtual int64 SizeInBytes() const =0;
205 // Returns a compressor object corresponding to the current options.
206 virtual StreamCompressor* GetCompressor() =0;
209 // All compressed output should be written using Emit and EmitByte.
210 inline void Emit(const char* bytes, size_t n) { output_.Write(bytes, n); }
211 inline void EmitByte(unsigned char byte) {
212 output_.WriteByte(byte);
215 private:
216 OutStreamBuffer output_;
218 virtual void WriteEndPrivate() =0;
219 virtual void ConnectPrivate(OutStreamBuffer* out) {};
220 virtual void DisconnectPrivate() {};
222 StreamCompressor(const StreamCompressor&);
223 StreamCompressor& operator=(const StreamCompressor&);
226 ////////////////////////////////////////////////////////////////////
227 // StreamDecompressor is the base class for stream decompressors
229 // A derived class should:
230 // 1. define the pure virtual functions Read, ReadBegin and ReadEnd
231 // 2. define a static factory member function Create
232 // 3. add a call to Create in StreamDecompressor::Connect
233 // 4. read compressed data using GetCompressed and GetCompressedByte
234 ////////////////////////////////////////////////////////////////////
235 class StreamDecompressor : public InStream {
236 public:
237 // Factory method for creating the decompressor.
238 // The decompression algorithm and its options are read from input.
239 // Returns NULL if (the beginning of) the input is invalid.
240 static StreamDecompressor* Connect(InStreamBuffer* input);
242 StreamDecompressor() { }
243 virtual ~StreamDecompressor() { }
245 // Disconnect deletes this.
246 InStreamBuffer* Disconnect();
248 // Returns the string read from input at creation.
249 std::string GetFormat() const { return format_; }
251 virtual void ReadBegin() =0;
252 virtual void ReadEnd() =0;
253 // inherits from InStream:
254 // virtual void Read(char* bytes, size_t n) =0;
256 virtual int64 SizeInBytes() const =0;
258 protected:
260 // All compressed data should be read using GetCompressed
261 // and GetCompressedByte.
262 inline void GetCompressed(char* bytes, size_t n) {
263 input_->ReadFast(bytes, n);
265 inline unsigned char GetCompressedByte() {
266 unsigned char byte = input_->ReadByte();
267 return byte;
270 private:
271 StreamMaster<InStreamBuffer> input_;
272 std::string format_;
274 virtual void ConnectPrivate(InStreamBuffer* out) {};
275 virtual void DisconnectPrivate() {};
277 StreamDecompressor(const StreamDecompressor&);
278 StreamDecompressor& operator=(const StreamDecompressor&);
281 } // namespace dcsbwt
283 #endif // DCSBWT_STREAM_COMPRESSOR_H__