1 // Copyright 2007 Google Inc.
3 // This program is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU General Public License
5 // as published by the Free Software Foundation; either version 2
6 // of the License, or (at your option) any later version.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // An abstract interface for byte streams defined by two
18 // base classes OutStream and InStream
19 // useful for building byte stream processing pipelines
20 // with interchangeable components.
21 // There are also derived classes providing buffering and file access.
22 // TODO: Add more derived classes: string/array access etc.
26 // When two parties want to pass a byte stream between them, the supplier
27 // obviously decides what the content of the stream is but either or
28 // both parties may want to have a say on other issues:
29 // - When to move data, how much to move at a time, and when to stop for good?
30 // - How to perform the actual transfer of data?
32 // In the protocol used here, one party is the master and the other party
33 // is the servant. The master decides on all the issues mentioned above:
34 // - The servant is a class derived from either OutStream or
35 // InStream depending on the direction of the stream.
36 // - The master holds a pointer or a reference to the servant and
37 // calls the virtual member function Write or Read to move bytes.
38 // - The data transfer is done using a memory area (a buffer) that
39 // the master supplies (as an argument to the call), and the
40 // servant performs the actual movement of data.
42 // The protocol provides no way for a servant to report errors such as
43 // an end of data, an end of capacity, or invalid stream content.
44 // There are also no methods for setting up or cleaning up.
45 // A derived class can and should offer such mechanism when necessary
46 // but to a master that knows only the static type OutStream or InStream,
47 // there is only Read or Write that never fails.
51 // A simple compressor pipeline that reads from infile and
52 // writes to outfile might be setup up and run like this:
54 // InStreamFromFile instream(infile);
55 // OutStreamToFile outstream(outfile);
56 // Compressor compressor;
57 // compressor.Connect(outstream);
58 // char buffer[kBufferSize];
60 // instream.Read(buffer, kBufferSize);
61 // compressor.Write(buffer, kBufferSize);
64 // instream.Read(buffer, num_remaining_bytes);
65 // compressor.Write(buffer, num_remaining_bytes);
66 // compressor.Disconnect();
68 // Compressor might be defined like this:
70 // class Compressor : public OutStream {
72 // void Connect(OutStream* out) { output_.Connect(out); }
73 // void Disconnect() { output_.Disconnect(); }
74 // virtual void Write(const char* bytes, size_t n) {
75 // for (;n; --n) CompressByte(*bytes++);
78 // OutStreamBuffer output_;
79 // void EmitByte(unsigned char byte) { output_.WriteByte(byte); }
80 // void CompressByte(unsigned char byte);
83 #ifndef DCSBWT_STREAM_H__
84 #define DCSBWT_STREAM_H__
94 ////////////////////////////////////////////////////////////////
95 // OutStream is the base class for servants on the receiving end
100 // class MyOutStream : public OutStream {
102 // public: virtual void Write(const char* bytes, size_t n) { ... }
105 ////////////////////////////////////////////////////////////////
109 virtual ~OutStream() { }
111 // Do something with the data in [bytes, bytes+n)
112 virtual void Write(const char* bytes
, size_t n
) =0;
115 OutStream(const OutStream
&);
116 OutStream
& operator=(const OutStream
&);
119 ////////////////////////////////////////////////////////////////
120 // InStream is the base class for servants on the supplying end
123 // Usage is analogous to OutStream.
124 ////////////////////////////////////////////////////////////////
128 virtual ~InStream() { }
130 // Fill [bytes, bytes+n) with data
131 virtual void Read(char* begin
, size_t n
) =0;
134 InStream(const InStream
&);
135 InStream
& operator=(const InStream
&);
138 ////////////////////////////////////////////////////////////////
139 // StreamMaster helps the master end of a stream connection in setting up
140 // and maintaining the connection. In particular, it provides some
141 // protection against trying to use an uninitialized servant pointer.
143 // Being a template, StreamMaster can be used with InStream and OutStream
144 // as well as their derivatives. All public members of the servant
145 // can be accessed through operator->
147 // StreamMaster is typically used as a member of a class rather than
148 // a base class; see the buffer classes below for examples.
154 // void Connect(OutStream* servant) { master_.Connect(servant); }
155 // void foo() { ... master_->Write(...); ... }
156 // OutStream* Disconnect() { return master_.Disconnect(); }
158 // StreamMaster<OutStream> master_;
160 ////////////////////////////////////////////////////////////////
161 template <typename Servant
>
164 typedef Servant ServantType
;
166 StreamMaster() : servant_(NULL
) {}
167 ~StreamMaster() { assert(servant_
== NULL
); }
169 void Connect(Servant
* newservant
) {
170 assert(servant_
== NULL
);
171 assert(newservant
!= NULL
);
172 servant_
= newservant
;
174 Servant
* Disconnect() {
175 assert(servant_
!= NULL
);
176 Servant
* oldservant
= servant_
;
180 bool IsConnected() const { return NULL
!= servant_
; }
182 Servant
* operator->() { return GetServant(); }
183 operator Servant
* () { return GetServant(); }
184 Servant
* GetServant() { assert(servant_
!= NULL
); return servant_
; }
188 StreamMaster(const StreamMaster
&);
189 StreamMaster
& operator=(const StreamMaster
&);
192 ////////////////////////////////////////////////////////////////
193 // OutStreamBuffer provides buffering for an outstream master.
194 // The main purpose is to make small writes faster by avoiding
195 // a (virtual) function call for every write.
196 // Writing a single byte is particularly simple and fast.
202 // void Connect(OutStream* servant) { buffer_.Connect(servant); }
203 // void foo() { ... buffer_.Write(...); ... }
204 // void bar() { ... buffer_.WriteByte(...); ... }
205 // OutStream* Disconnect() { return buffer_.Disconnect(); }
207 // OutStreamBuffer buffer_;
209 ////////////////////////////////////////////////////////////////
210 class OutStreamBuffer
{
212 static const int kDefaultBufferSize
= (1 << 14);
213 explicit OutStreamBuffer(size_t buffer_size
= kDefaultBufferSize
)
214 : buffer_(buffer_size
), next_free_slot_(buffer_
.begin()) {}
215 ~OutStreamBuffer() { }
217 void Connect(OutStream
* servant
) { master_
.Connect(servant
); }
218 OutStream
* Disconnect() { Flush(); return master_
.Disconnect(); }
219 bool IsConnected() const { return master_
.IsConnected(); }
221 inline void Write(const char* bytes
, size_t n
) {
222 assert(IsConnected());
223 if (n
< FreeSpace()) WriteToBuffer(bytes
, n
);
224 else FlushAndWrite(bytes
, n
);
225 assert(FreeSpace() > 0);
227 inline void WriteByte(unsigned char byte
) {
228 assert(IsConnected());
229 assert(FreeSpace() > 0);
230 *next_free_slot_
++ = byte
;
231 if (FreeSpace() == 0) Flush();
235 // Change the size of the buffer.
236 // Any data in the buffer is flushed.
237 // Can be used for releasing the space taken by the buffer
238 // by giving a small value as an argument.
239 void Reset(size_t size
= kDefaultBufferSize
);
242 StreamMaster
<OutStream
> master_
;
243 std::vector
<char> buffer_
;
244 std::vector
<char>::iterator next_free_slot_
;
246 inline size_t FreeSpace() const {
247 assert(buffer_
.end() - next_free_slot_
>= 0);
248 return buffer_
.end() - next_free_slot_
;
250 inline void WriteToBuffer(const char* bytes
, size_t n
) {
251 next_free_slot_
= std::copy(bytes
, bytes
+n
, next_free_slot_
);
253 void FlushAndWrite(const char* bytes
, size_t n
);
255 OutStreamBuffer(const OutStreamBuffer
&);
256 OutStreamBuffer
& operator=(const OutStreamBuffer
&);
259 ////////////////////////////////////////////////////////////////
260 // InStreamBuffer is the InStream counterpart to OutStreamBuffer (see above).
262 // Unlike OutStreamBuffer, InStreamBuffer does not support flushing.
263 // Flushing would push data from servant to master, which is against
264 // basic idea of the protocol. More appropriate would be to send
265 // the unused data back to where it came from, but it would be
266 // unreasonable to expect every InStream object to be able to
267 // move data backwards. For example, a decompressor cannot in general
268 // reverse the decompression (which is not the same as compression).
270 // Thus any unused data is kept in the buffer, and can be read even if
271 // the buffer is disconnected from a servant or connected to a new servant.
272 // Only explicit calls to Clear() or Reset() discard the data.
273 // The amount of unused data can be found with AvailableInBuffer().
275 // InStreamBuffer is a subclass of InStream, so that any master
276 // can access the data in the buffer.
277 // NOTE: Read(...) is the virtual function of InStream.
278 // ReadFast(...) is an inlined non-virtual function that does
279 // the same thing (but faster).
281 // If losing data in the buffer at the end is not a problem, InStreamBuffer
282 // can be used internally similarly to OutStreamBuffer:
286 // void Connect(InStream* servant) { buffer_.Connect(servant); }
287 // void foo() { ... buffer_.ReadFast(...); ... }
288 // void bar() { ... buffer_.ReadByte(...); ... }
289 // InStream* Disconnect() { return buffer_.Disconnect(); }
291 // InStreamBuffer buffer_;
294 // If the remaining data should not be lost, one can instead do this:
298 // void Connect(InStreamBuffer* buffer) { buffer_.Connect(buffer); }
299 // void foo() { ... buffer_->ReadFast(...); ... }
300 // void bar() { ... buffer_->ReadByte(...); ... }
301 // InStreamBuffer* Disconnect() { return buffer_.Disconnect(); }
303 // StreamMaster<InStreamBuffer> buffer_;
305 ////////////////////////////////////////////////////////////////
306 class InStreamBuffer
: public InStream
{
308 static const int kDefaultBufferSize
= (1 << 12);
309 explicit InStreamBuffer(size_t buffer_size
= kDefaultBufferSize
)
310 : buffer_(buffer_size
), next_unused_byte_(buffer_
.end()) {}
311 virtual ~InStreamBuffer() { }
313 void Connect(InStream
* servant
) { master_
.Connect(servant
); }
314 InStream
* Disconnect() { return master_
.Disconnect(); }
315 bool IsConnected() const { return master_
.IsConnected(); }
317 virtual void Read(char* bytes
, size_t n
) { ReadFast(bytes
, n
); }
318 inline void ReadFast(char* bytes
, size_t n
) {
319 if (n
<= AvailableInBuffer()) ReadFromBuffer(bytes
, n
);
320 else ReadAndRefill(bytes
, n
);
322 inline unsigned char ReadByte() {
323 if (AvailableInBuffer() == 0) Refill();
324 return *next_unused_byte_
++;
327 void Clear() { next_unused_byte_
= buffer_
.end(); }
329 // Change the size of the buffer.
330 // WARNING: Any data in the buffer is lost.
331 // Mainly useful for releasing the space taken by the buffer
332 // by giving a small argument.
333 void Reset(size_t size
= kDefaultBufferSize
);
335 inline size_t AvailableInBuffer() const {
336 assert(buffer_
.end() - next_unused_byte_
>= 0);
337 return buffer_
.end() - next_unused_byte_
;
341 StreamMaster
<InStream
> master_
;
342 std::vector
<char> buffer_
;
343 std::vector
<char>::iterator next_unused_byte_
;
345 inline void ReadFromBuffer(char* bytes
, size_t n
) {
346 assert(n
<= AvailableInBuffer());
347 std::copy(next_unused_byte_
, next_unused_byte_
+ n
, bytes
);
348 next_unused_byte_
+= n
;
350 void ReadAndRefill(char* bytes
, size_t n
);
353 InStreamBuffer(const InStreamBuffer
&);
354 InStreamBuffer
& operator=(const InStreamBuffer
&);
357 ////////////////////////////////////////////////////////////////
358 // OutStreamToFile and InStreamFromFile are used for streaming
360 ////////////////////////////////////////////////////////////////
361 class OutStreamToFile
: public OutStream
{
363 explicit OutStreamToFile(FILE* file
) : file_(file
), no_errors_(true) {}
364 virtual ~OutStreamToFile() {}
366 virtual void Write(const char* bytes
, size_t n
) {
367 if (fwrite(bytes
, 1, n
, file_
) != n
) no_errors_
= false;
370 unsigned char byte = *bytes++;
371 std::clog << "Wrote to file: " << int(byte);
376 bool NoErrors() const { return no_errors_
; }
382 OutStreamToFile(const OutStreamToFile
&);
383 OutStreamToFile
& operator=(const OutStreamToFile
&);
386 class InStreamFromFile
: public InStream
{
388 explicit InStreamFromFile(FILE* file
) : file_(file
), bytes_read_(0) {}
389 virtual ~InStreamFromFile() {}
391 virtual void Read(char* bytes
, size_t n
) {
392 int64 size
= fread(bytes
, 1, n
, file_
);
394 for (; size; --size) {
395 unsigned char byte = *bytes++;
396 std::clog << "Read from file: " << int(byte);
402 // Over-reading due to internal buffers is acceptable behaviour.
403 // Thus no error is reported even if reading failed.
404 // A client may use BytesReadFromFile() instead to check that the expected
405 // number of bytes was actually read from the file.
406 int64
BytesReadFromFile() const { return bytes_read_
; }
412 InStreamFromFile(const InStreamFromFile
&);
413 InStreamFromFile
& operator=(const InStreamFromFile
&);
416 } // namespace dcsbwt
418 #endif // DCSBWT_STREAM_H__