1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // Base classes for stream compressors and decompressors.
18 // A stream (de)compressor can handle an arbitrary stream
19 // of data in a nearly online fashion.
20 // The streams are based on the stream protocol described in stream.h.
24 // * Initialize compressor: * Initialize decompressor:
25 // StreamCompressor* compressor StreamDecompressor* decompressor
26 // = StreamCompressor::Connect(output); = StreamDecompressor::Connect(input);
28 // * Start compressing block: * Start decompressing block:
29 // compressor->WriteBegin(); decompressor->ReadBegin();
31 // * Compress block: * Decompress block:
32 // compressor->Write(...); decompressor->Read(...);
33 // compressor->Write(...); decompressor->Read(...);
36 // * Finish compressing block: * Finish decompressing block
37 // compressor->WriteEnd(); decompressor->ReadEnd();
39 // * Compress more blocks: * Decompress more blocks
40 // compressor->WriteBegin(); decompressor->ReadBegin();
42 // compressor->WriteEnd(); decompressor->ReadEnd();
45 // * End compression: * End decompression:
46 // compressor->Disconnect(); decompressor->Disconnect();
50 // Synchronization between compressor and decompressor happens
51 // at block boundaries:
52 // * The sizes of individual writes and reads do not have to match.
53 // * The client must ensure that at the end of the block (i.e., just
54 // before calling WriteEnd/ReadEnd), the compressor and
55 // decompressor are synchronized on the uncompressed data, i.e.,
56 // that the compressor and the decompressor have processed
57 // the same amount of uncompressed data.
58 // * WriteEnd/ReadEnd then ensures that the compressor and decompressor
59 // are synchronized on the compressed data, too. Then it is possible
60 // to write some data (such as the size of the next block) to the
61 // output past the compressor as long as the same amount of data
62 // is read past the decompressor.
66 // Compression options can be provided like this:
68 // string option_string = ...;
69 // StreamCompressor::Options options;
70 // if ( ! options.Set(option_string)) { ... // error: invalid option_string }
71 // StreamCompressor* compressor = StreamCompressor::Connect(output, options);
74 // The compressor writes the options to the beginning of the compressed data
75 // and the decompressor reads them from there (both during Connect()).
77 // The first character of option_string specifies the compression algorithm;
78 // the rest provides options specific to the algorithm.
79 // See stream_compressor.cc for available algorithms and algorithm-specific
80 // files for the rest of the options.
82 // IMPLEMENTING (DE)COMPRESSORS
84 // New stream compression/decompression algorithms can be defined
85 // by deriving from StreamCompressor/StreamDecompressor.
86 // The simple TrivialCompressor/TrivialDecompressor
87 // in stream_compressor.cc is a useful example.
89 // The set of compression algorithms is hard-coded in the definition
90 // of the functions StreamCompressor::Options::Set() and
91 // StreamDecompressor::Connect() (in stream_compressor.cc).
92 // Those are the only places that need to be modified when adding a new
93 // compression algorithm. In particular, there is no need to modify
94 // this file stream_compressor.h.
96 // Changes to an already supported algorithm including changing
97 // the algorithm-specific options should need no modification of either
98 // stream_compressor.h or stream_compressor.cc.
100 #ifndef DCSBWT_STREAM_COMPRESSOR_H__
101 #define DCSBWT_STREAM_COMPRESSOR_H__
104 #include <cstddef> // for size_t
110 // StreamCompressor is the base class for stream compressors.
112 // A derived class should:
113 // 1. define the pure virtual functions Write, WriteBegin and WriteEndPrivate
114 // 2. have a nested class derived from StreamCompressor::OptionsBase
115 // 3. add an entry to StreamCompressor::Options::Set in stream_compressor.cc
116 // 4. write compressed output using Emit and EmitByte (which do buffering)
118 class StreamCompressor
: public OutStream
{
123 static const int kBufferSize
;
125 // A class representing all options affecting compression and decompression.
128 Options() : algorithm_specific_options_(NULL
) { Set("t"); }
130 // Set/Get serialization mechanism is the simplest way
131 // to implement copying.
132 Options(const Options
& other
) : algorithm_specific_options_(NULL
)
133 { Set(other
.Get()); }
134 const Options
& operator=(const Options
& other
) {
139 if (algorithm_specific_options_
)
140 delete algorithm_specific_options_
;
143 // Initialize options from a string.
144 // Returns false in case of an invalid options_string
145 bool Set(const std::string
& options_string
);
147 // Get() returns a string that:
148 // 1. as an argument to Set() sets any options object to this object's
150 // 2. when written to the compressed stream and read by
151 // StreamDecompressor, sets the decompressor in the the equivalent
153 // 3. does not contain '\n' (which is used as a terminator in the stream)
154 std::string
Get() const {
155 if (algorithm_specific_options_
) {
156 return compression_algorithm_
+ algorithm_specific_options_
->Get();
158 return std::string();
161 int64
SizeInBytes() const {
162 return kBufferSize
+ algorithm_specific_options_
->SizeInBytes();
165 friend class StreamCompressor
;
166 StreamCompressor
* GetCompressor();
168 char compression_algorithm_
;
169 OptionsBase
* algorithm_specific_options_
;
172 StreamCompressor() : output_(kBufferSize
) {}
173 virtual ~StreamCompressor() {}
175 // Create a compressor with the compressed output going to output.
176 // The compression format specified by options is written to output.
177 static StreamCompressor
* Connect(OutStream
* output
,
179 // Disconnect deletes this
180 OutStream
* Disconnect();
182 virtual void WriteBegin() =0;
184 void WriteEnd() { WriteEndPrivate(); output_
.Flush(); }
186 // inherits from OutStream:
187 // virtual void Write() =0;
191 // A compressor class derived from StreamCompressor should
192 // have a nested class derived from OptionsBase.
196 virtual ~OptionsBase() {}
198 // Set/Get should behave as StreamCompressor::Options::Set/Get
199 // The algorithm identifying initial character is omitted from
200 // the input to Set and should be omitted from the output of Get.
201 virtual bool Set(const std::string
& options_string
) =0;
202 virtual std::string
Get() const =0;
203 virtual int64
SizeInBytes() const =0;
205 // Returns a compressor object corresponding to the current options.
206 virtual StreamCompressor
* GetCompressor() =0;
209 // All compressed output should be written using Emit and EmitByte.
210 inline void Emit(const char* bytes
, size_t n
) { output_
.Write(bytes
, n
); }
211 inline void EmitByte(unsigned char byte
) {
212 output_
.WriteByte(byte
);
216 OutStreamBuffer output_
;
218 virtual void WriteEndPrivate() =0;
219 virtual void ConnectPrivate(OutStreamBuffer
* out
) {};
220 virtual void DisconnectPrivate() {};
222 StreamCompressor(const StreamCompressor
&);
223 StreamCompressor
& operator=(const StreamCompressor
&);
226 ////////////////////////////////////////////////////////////////////
227 // StreamDecompressor is the base class for stream decompressors
229 // A derived class should:
230 // 1. define the pure virtual functions Read, ReadBegin and ReadEnd
231 // 2. define a static factory member function Create
232 // 3. add a call to Create in StreamDecompressor::Connect
233 // 4. read compressed data using GetCompressed and GetCompressedByte
234 ////////////////////////////////////////////////////////////////////
235 class StreamDecompressor
: public InStream
{
237 // Factory method for creating the decompressor.
238 // The decompression algorithm and its options are read from input.
239 // Returns NULL if (the beginning of) the input is invalid.
240 static StreamDecompressor
* Connect(InStreamBuffer
* input
);
242 StreamDecompressor() { }
243 virtual ~StreamDecompressor() { }
245 // Disconnect deletes this.
246 InStreamBuffer
* Disconnect();
248 // Returns the string read from input at creation.
249 std::string
GetFormat() const { return format_
; }
251 virtual void ReadBegin() =0;
252 virtual void ReadEnd() =0;
253 // inherits from InStream:
254 // virtual void Read(char* bytes, size_t n) =0;
256 virtual int64
SizeInBytes() const =0;
260 // All compressed data should be read using GetCompressed
261 // and GetCompressedByte.
262 inline void GetCompressed(char* bytes
, size_t n
) {
263 input_
->ReadFast(bytes
, n
);
265 inline unsigned char GetCompressedByte() {
266 unsigned char byte
= input_
->ReadByte();
271 StreamMaster
<InStreamBuffer
> input_
;
274 virtual void ConnectPrivate(InStreamBuffer
* out
) {};
275 virtual void DisconnectPrivate() {};
277 StreamDecompressor(const StreamDecompressor
&);
278 StreamDecompressor
& operator=(const StreamDecompressor
&);
281 } // namespace dcsbwt
283 #endif // DCSBWT_STREAM_COMPRESSOR_H__