modified: makefile
[GalaxyCodeBases.git] / tools / bwt / dcs-bwt / src / stream.h
blob98ddad445245ebcb1b7a555a94c0d493dbcd5a6a
1 // Copyright 2007 Google Inc.
2 //
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // An abstract interface for byte streams defined by two
18 // base classes OutStream and InStream
19 // useful for building byte stream processing pipelines
20 // with interchangeable components.
21 // There are also derived classes providing buffering and file access.
22 // TODO: Add more derived classes: string/array access etc.
24 // BASIC PROTOCOL
26 // When two parties want to pass a byte stream between them, the supplier
27 // obviously decides what the content of the stream is but either or
28 // both parties may want to have a say on other issues:
29 // - When to move data, how much to move at a time, and when to stop for good?
30 // - How to perform the actual transfer of data?
32 // In the protocol used here, one party is the master and the other party
33 // is the servant. The master decides on all the issues mentioned above:
34 // - The servant is a class derived from either OutStream or
35 // InStream depending on the direction of the stream.
36 // - The master holds a pointer or a reference to the servant and
37 // calls the virtual member function Write or Read to move bytes.
38 // - The data transfer is done using a memory area (a buffer) that
39 // the master supplies (as an argument to the call), and the
40 // servant performs the actual movement of data.
42 // The protocol provides no way for a servant to report errors such as
43 // an end of data, an end of capacity, or invalid stream content.
44 // There are also no methods for setting up or cleaning up.
45 // A derived class can and should offer such mechanism when necessary
46 // but to a master that knows only the static type OutStream or InStream,
47 // there is only Read or Write that never fails.
49 // EXAMPLE
51 // A simple compressor pipeline that reads from infile and
52 // writes to outfile might be setup up and run like this:
54 // InStreamFromFile instream(infile);
55 // OutStreamToFile outstream(outfile);
56 // Compressor compressor;
57 // compressor.Connect(outstream);
58 // char buffer[kBufferSize];
59 // while (...) {
60 // instream.Read(buffer, kBufferSize);
61 // compressor.Write(buffer, kBufferSize);
62 // }
63 // ...
64 // instream.Read(buffer, num_remaining_bytes);
65 // compressor.Write(buffer, num_remaining_bytes);
66 // compressor.Disconnect();
68 // Compressor might be defined like this:
70 // class Compressor : public OutStream {
71 // public:
72 // void Connect(OutStream* out) { output_.Connect(out); }
73 // void Disconnect() { output_.Disconnect(); }
74 // virtual void Write(const char* bytes, size_t n) {
75 // for (;n; --n) CompressByte(*bytes++);
76 // }
77 // private:
78 // OutStreamBuffer output_;
79 // void EmitByte(unsigned char byte) { output_.WriteByte(byte); }
80 // void CompressByte(unsigned char byte);
81 // };
83 #ifndef DCSBWT_STREAM_H__
84 #define DCSBWT_STREAM_H__
86 #include "inttypes.h"
88 #include <assert.h>
89 #include <vector>
90 #include <cstdio>
92 namespace dcsbwt {
94 ////////////////////////////////////////////////////////////////
95 // OutStream is the base class for servants on the receiving end
96 // of a byte stream.
98 // Typical usage:
100 // class MyOutStream : public OutStream {
101 // ...
102 // public: virtual void Write(const char* bytes, size_t n) { ... }
103 // ...
104 // };
105 ////////////////////////////////////////////////////////////////
106 class OutStream {
107 public:
108 OutStream() { }
109 virtual ~OutStream() { }
111 // Do something with the data in [bytes, bytes+n)
112 virtual void Write(const char* bytes, size_t n) =0;
114 private:
115 OutStream(const OutStream&);
116 OutStream& operator=(const OutStream&);
119 ////////////////////////////////////////////////////////////////
120 // InStream is the base class for servants on the supplying end
121 // of a byte stream.
123 // Usage is analogous to OutStream.
124 ////////////////////////////////////////////////////////////////
125 class InStream {
126 public:
127 InStream() { }
128 virtual ~InStream() { }
130 // Fill [bytes, bytes+n) with data
131 virtual void Read(char* begin, size_t n) =0;
133 private:
134 InStream(const InStream&);
135 InStream& operator=(const InStream&);
138 ////////////////////////////////////////////////////////////////
139 // StreamMaster helps the master end of a stream connection in setting up
140 // and maintaining the connection. In particular, it provides some
141 // protection against trying to use an uninitialized servant pointer.
143 // Being a template, StreamMaster can be used with InStream and OutStream
144 // as well as their derivatives. All public members of the servant
145 // can be accessed through operator->
147 // StreamMaster is typically used as a member of a class rather than
148 // a base class; see the buffer classes below for examples.
150 // Typical usage:
152 // class C {
153 // public:
154 // void Connect(OutStream* servant) { master_.Connect(servant); }
155 // void foo() { ... master_->Write(...); ... }
156 // OutStream* Disconnect() { return master_.Disconnect(); }
157 // private:
158 // StreamMaster<OutStream> master_;
159 // };
160 ////////////////////////////////////////////////////////////////
161 template <typename Servant>
162 class StreamMaster {
163 public:
164 typedef Servant ServantType;
166 StreamMaster() : servant_(NULL) {}
167 ~StreamMaster() { assert(servant_ == NULL); }
169 void Connect(Servant* newservant) {
170 assert(servant_ == NULL);
171 assert(newservant != NULL);
172 servant_ = newservant;
174 Servant* Disconnect() {
175 assert(servant_ != NULL);
176 Servant* oldservant = servant_;
177 servant_ = NULL;
178 return oldservant;
180 bool IsConnected() const { return NULL != servant_; }
182 Servant* operator->() { return GetServant(); }
183 operator Servant* () { return GetServant(); }
184 Servant* GetServant() { assert(servant_ != NULL); return servant_; }
186 private:
187 Servant* servant_;
188 StreamMaster(const StreamMaster&);
189 StreamMaster& operator=(const StreamMaster&);
192 ////////////////////////////////////////////////////////////////
193 // OutStreamBuffer provides buffering for an outstream master.
194 // The main purpose is to make small writes faster by avoiding
195 // a (virtual) function call for every write.
196 // Writing a single byte is particularly simple and fast.
198 // Typical usage:
200 // class C {
201 // public:
202 // void Connect(OutStream* servant) { buffer_.Connect(servant); }
203 // void foo() { ... buffer_.Write(...); ... }
204 // void bar() { ... buffer_.WriteByte(...); ... }
205 // OutStream* Disconnect() { return buffer_.Disconnect(); }
206 // private:
207 // OutStreamBuffer buffer_;
208 // };
209 ////////////////////////////////////////////////////////////////
210 class OutStreamBuffer {
211 public:
212 static const int kDefaultBufferSize = (1 << 14);
213 explicit OutStreamBuffer(size_t buffer_size = kDefaultBufferSize)
214 : buffer_(buffer_size), next_free_slot_(buffer_.begin()) {}
215 ~OutStreamBuffer() { }
217 void Connect(OutStream* servant) { master_.Connect(servant); }
218 OutStream* Disconnect() { Flush(); return master_.Disconnect(); }
219 bool IsConnected() const { return master_.IsConnected(); }
221 inline void Write(const char* bytes, size_t n) {
222 assert(IsConnected());
223 if (n < FreeSpace()) WriteToBuffer(bytes, n);
224 else FlushAndWrite(bytes, n);
225 assert(FreeSpace() > 0);
227 inline void WriteByte(unsigned char byte) {
228 assert(IsConnected());
229 assert(FreeSpace() > 0);
230 *next_free_slot_++ = byte;
231 if (FreeSpace() == 0) Flush();
233 void Flush();
235 // Change the size of the buffer.
236 // Any data in the buffer is flushed.
237 // Can be used for releasing the space taken by the buffer
238 // by giving a small value as an argument.
239 void Reset(size_t size = kDefaultBufferSize);
241 private:
242 StreamMaster<OutStream> master_;
243 std::vector<char> buffer_;
244 std::vector<char>::iterator next_free_slot_;
246 inline size_t FreeSpace() const {
247 assert(buffer_.end() - next_free_slot_ >= 0);
248 return buffer_.end() - next_free_slot_;
250 inline void WriteToBuffer(const char* bytes, size_t n) {
251 next_free_slot_ = std::copy(bytes, bytes+n, next_free_slot_);
253 void FlushAndWrite(const char* bytes, size_t n);
255 OutStreamBuffer(const OutStreamBuffer&);
256 OutStreamBuffer& operator=(const OutStreamBuffer&);
259 ////////////////////////////////////////////////////////////////
260 // InStreamBuffer is the InStream counterpart to OutStreamBuffer (see above).
262 // Unlike OutStreamBuffer, InStreamBuffer does not support flushing.
263 // Flushing would push data from servant to master, which is against
264 // basic idea of the protocol. More appropriate would be to send
265 // the unused data back to where it came from, but it would be
266 // unreasonable to expect every InStream object to be able to
267 // move data backwards. For example, a decompressor cannot in general
268 // reverse the decompression (which is not the same as compression).
270 // Thus any unused data is kept in the buffer, and can be read even if
271 // the buffer is disconnected from a servant or connected to a new servant.
272 // Only explicit calls to Clear() or Reset() discard the data.
273 // The amount of unused data can be found with AvailableInBuffer().
275 // InStreamBuffer is a subclass of InStream, so that any master
276 // can access the data in the buffer.
277 // NOTE: Read(...) is the virtual function of InStream.
278 // ReadFast(...) is an inlined non-virtual function that does
279 // the same thing (but faster).
281 // If losing data in the buffer at the end is not a problem, InStreamBuffer
282 // can be used internally similarly to OutStreamBuffer:
284 // class C {
285 // public:
286 // void Connect(InStream* servant) { buffer_.Connect(servant); }
287 // void foo() { ... buffer_.ReadFast(...); ... }
288 // void bar() { ... buffer_.ReadByte(...); ... }
289 // InStream* Disconnect() { return buffer_.Disconnect(); }
290 // private:
291 // InStreamBuffer buffer_;
292 // };
294 // If the remaining data should not be lost, one can instead do this:
296 // class C {
297 // public:
298 // void Connect(InStreamBuffer* buffer) { buffer_.Connect(buffer); }
299 // void foo() { ... buffer_->ReadFast(...); ... }
300 // void bar() { ... buffer_->ReadByte(...); ... }
301 // InStreamBuffer* Disconnect() { return buffer_.Disconnect(); }
302 // private:
303 // StreamMaster<InStreamBuffer> buffer_;
304 // };
305 ////////////////////////////////////////////////////////////////
306 class InStreamBuffer : public InStream {
307 public:
308 static const int kDefaultBufferSize = (1 << 12);
309 explicit InStreamBuffer(size_t buffer_size = kDefaultBufferSize)
310 : buffer_(buffer_size), next_unused_byte_(buffer_.end()) {}
311 virtual ~InStreamBuffer() { }
313 void Connect(InStream* servant) { master_.Connect(servant); }
314 InStream* Disconnect() { return master_.Disconnect(); }
315 bool IsConnected() const { return master_.IsConnected(); }
317 virtual void Read(char* bytes, size_t n) { ReadFast(bytes, n); }
318 inline void ReadFast(char* bytes, size_t n) {
319 if (n <= AvailableInBuffer()) ReadFromBuffer(bytes, n);
320 else ReadAndRefill(bytes, n);
322 inline unsigned char ReadByte() {
323 if (AvailableInBuffer() == 0) Refill();
324 return *next_unused_byte_++;
327 void Clear() { next_unused_byte_ = buffer_.end(); }
329 // Change the size of the buffer.
330 // WARNING: Any data in the buffer is lost.
331 // Mainly useful for releasing the space taken by the buffer
332 // by giving a small argument.
333 void Reset(size_t size = kDefaultBufferSize);
335 inline size_t AvailableInBuffer() const {
336 assert(buffer_.end() - next_unused_byte_ >= 0);
337 return buffer_.end() - next_unused_byte_;
340 private:
341 StreamMaster<InStream> master_;
342 std::vector<char> buffer_;
343 std::vector<char>::iterator next_unused_byte_;
345 inline void ReadFromBuffer(char* bytes, size_t n) {
346 assert(n <= AvailableInBuffer());
347 std::copy(next_unused_byte_, next_unused_byte_ + n, bytes);
348 next_unused_byte_ += n;
350 void ReadAndRefill(char* bytes, size_t n);
351 void Refill();
353 InStreamBuffer(const InStreamBuffer&);
354 InStreamBuffer& operator=(const InStreamBuffer&);
357 ////////////////////////////////////////////////////////////////
358 // OutStreamToFile and InStreamFromFile are used for streaming
359 // to/from a file.
360 ////////////////////////////////////////////////////////////////
361 class OutStreamToFile : public OutStream {
362 public:
363 explicit OutStreamToFile(FILE* file) : file_(file), no_errors_(true) {}
364 virtual ~OutStreamToFile() {}
366 virtual void Write(const char* bytes, size_t n) {
367 if (fwrite(bytes, 1, n, file_) != n) no_errors_ = false;
369 for (; n; --n) {
370 unsigned char byte = *bytes++;
371 std::clog << "Wrote to file: " << int(byte);
376 bool NoErrors() const { return no_errors_; }
378 private:
379 FILE* file_;
380 bool no_errors_;
382 OutStreamToFile(const OutStreamToFile&);
383 OutStreamToFile& operator=(const OutStreamToFile&);
386 class InStreamFromFile : public InStream {
387 public:
388 explicit InStreamFromFile(FILE* file) : file_(file), bytes_read_(0) {}
389 virtual ~InStreamFromFile() {}
391 virtual void Read(char* bytes, size_t n) {
392 int64 size = fread(bytes, 1, n, file_);
394 for (; size; --size) {
395 unsigned char byte = *bytes++;
396 std::clog << "Read from file: " << int(byte);
399 bytes_read_ += size;
402 // Over-reading due to internal buffers is acceptable behaviour.
403 // Thus no error is reported even if reading failed.
404 // A client may use BytesReadFromFile() instead to check that the expected
405 // number of bytes was actually read from the file.
406 int64 BytesReadFromFile() const { return bytes_read_; }
408 private:
409 FILE* file_;
410 int64 bytes_read_;
412 InStreamFromFile(const InStreamFromFile&);
413 InStreamFromFile& operator=(const InStreamFromFile&);
416 } // namespace dcsbwt
418 #endif // DCSBWT_STREAM_H__