1 // (C) 2006, Matt Mahoney, mmahoney (at) cs.fit.edu
2 // Copyright 2007 Google Inc.
4 // This program is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU General Public License
6 // as published by the Free Software Foundation; either version 2
7 // of the License, or (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 // This is an algorithm for entropy compression of the Burrows-Wheeler
18 // transform of a text. It was originally written by Matt Mahoney as
19 // bbb.cpp - big block BWT compressor version 1, Aug. 31, 2006.
20 // http://cs.fit.edu/~mmahoney/compression/bbb.cpp
22 // Only the entropy compression of bbb.cpp is included here.
23 // Other parts, including the computation of the transform, is not included.
24 // The entropy compressor has been fitted with an interface that
25 // ties it with the rest of the dcs-bwt-compressor.
27 // The class Predictor has seen the following modifications:
28 // - static variables in member function p() has been changed into
29 // data members of Predictor.
30 // - The APM chain has been modified a bit.
31 // Here is the original documentation of the entropy coding with
32 // the changes to bbb.cpp added in square brackets.
36 // BWT data is best coded with an order 0 model. The transformed text tends
37 // to have long runs of identical bytes (e.g. "nnbaaa"). The BWT data is
38 // modeled with a modified PAQ with just one context (no mixing) followed
39 // by a 5 stage SSE (APM) and bitwise arithmetic coding. Modeling typically
40 // takes about as much time as sorting and unsorting in slow mode.
41 // The model uses about 5 MB memory.
42 // [ Now reduced to about 256KB of memory. ]
44 // The order 0 model consists of a mapping:
46 // order 1, 2, 3 contexts ----------+
48 // order 0 context -> bit history -> p -> APM chain -> arithmetic coder
51 // Bits are coded one at a time. The arithmetic coder maintains a range
52 // [lo, hi), initially [0, 1) and repeatedly subdivides the range in proportion
53 // to p(0), p(1), the next bit probabilites predicted by the model. The final
54 // output is the shortest base 256 number x such that lo <= x < hi. As the
55 // leading bytes of x become known, they are output. To decompress, the model
56 // predictions are repeated as during compression, then the actual bit is
57 // determined by which half of the subrange contains x.
59 // The model inputs a bytewise order 0 context consisting of the last 0 to 7
60 // bits of the current byte, plus the number of bits. There are a total of
61 // 255 possible bitwise contexts. For each context, a table (t1) maintains
62 // an 8 bit state representing the history of 0 and 1 bits previously seen.
63 // This history is mapped by another table (a StateMap sm) to a probability,
64 // p, that the next bit will be 1. This table is adaptive: after each
65 // prediction, the mapping (state -> p) is adjusted to improve the last
68 // The output of the StateMap is passed through a series of 6 more adaptive
69 // tables, (Adaptive Probability Maps, or APM) each of which maps a context
70 // and the input probability to an output probability. The input probability
71 // is interpolated between 33 bins on a nonlinear scale with smaller bins
72 // near 0 and 1. After each prediction, the corresponding table entries
73 // on both sides of p are adjusted to improve the last prediction.
74 // The APM chain is like this:
76 // + A11 ->+ +--->---+ +--->---+
78 // p ->+ +-> A2 -> A3 +-> A4 -+-+-> A5 -+-> Encoder
82 // [ The APM chain has been modified into:
84 // +--->---+ +--->---+
86 // p --> A2 -> A3 +-> A5 -+-+-> A4 -+-> Encoder
90 // A11 and A12 both take c0 (the preceding bits of the current byte) as
91 // additional context, but one is fast adapting and the other is slow
92 // adapting. Their outputs are averaged.
94 // A2 is an order 1 context (previous byte and current partial byte).
95 // [ A2 has been modified so that it uses only two bits of information
96 // from the previous byte: what is the bit in the current bit position
97 // and whether the preceding bits are same or different from c0. ]
99 // A3 takes the previous (but not current) byte as context, plus 2 bits
100 // that depend on the current run length (0, 1, 2-3, or 4+), the number
101 // of times the last byte was repeated.
102 // [ A3 now only takes the two bits on run length. ]
104 // A4 takes the current byte and the low 5 bits of the second byte back.
105 // The output is averaged with 3/4 weight to the A3 output with 1/4 weight.
106 // [ A4 has been moved after A5, it takes only the current byte (not the
107 // 5 additional bits), and the averaging weights are 1/2 and 1/2. ]
109 // A5 takes a 14 bit hash of an order 3 context (last 3 bytes plus
110 // current partial byte) and is averaged with 1/2 weight to the A4 output.
111 // [ A5 takes now 11 bit hash of an order 4 context. ]
113 // The StateMap, state table, APM, Encoder, and associated code (Array,
114 // squash(), stretch()) are taken from PAQ8 with minor non-functional
115 // changes (e.g. removing global context).
118 #ifndef DCSBWT_BBB_COMPRESS_H__
119 #define DCSBWT_BBB_COMPRESS_H__
121 #include "stream_compressor.h"
122 #include "inttypes.h"
131 //////////////////////////////////////////////////////////////////
132 // Array<T, ALIGN> a(n); creates n elements of T initialized to 0 bits.
133 // Constructors for T are not called.
134 // Indexing is bounds checked if assertions are on.
135 // a.size() returns n.
136 // a.resize(n) changes size to n, padding with 0 bits or truncating.
137 // Copy and assignment are not supported.
138 // Memory is aligned on a ALIGN byte boundary (power of 2), default is none.
139 //////////////////////////////////////////////////////////////////
140 template <class T
, int ALIGN
=0> class Array
{
142 explicit Array(int i
=0) { create(i
);}
144 T
& operator[](int i
) {
149 const T
& operator[](int i
) const {
154 int size() const { return n
; }
155 void resize(int i
); // change size to i
158 int reserved
; // actual size
159 char *ptr
; // allocated memory, zeroed
160 T
* data
; // start of n elements of aligned data
162 void create(int i
); // create with size i
164 Array(const Array
&); // no copy or assignment
165 Array
& operator=(const Array
&);
168 template<class T
, int ALIGN
> void Array
<T
, ALIGN
>::resize(int i
) {
177 if (savedata
&& saveptr
) {
178 memcpy(data
, savedata
, sizeof(T
)*std::min(i
, saven
));
183 template<class T
, int ALIGN
> void Array
<T
, ALIGN
>::create(int i
) {
190 const int sz
=ALIGN
+n
*sizeof(T
);
191 ptr
= (char*)calloc(sz
, 1);
192 if (!ptr
) fprintf(stderr
, "Out of memory\n"), exit(1);
193 data
= (ALIGN
? (T
*)(ptr
+ALIGN
-(((long)ptr
)&(ALIGN
-1))) : (T
*)ptr
);
194 assert((char*)data
>= ptr
);
195 assert((char*)data
<= ptr
+ALIGN
);
198 template<class T
, int ALIGN
> Array
<T
, ALIGN
>::~Array() {
202 //////////////////////////////////////////////////////////////////
203 // A StateMap maps a nonstationary counter state to a probability.
204 // After each mapping, the mapping is adjusted to improve future
205 // predictions. Methods:
207 // sm.p(y, cx) converts state cx (0-255) to a probability (0-4095),
208 // and trains by updating the previous prediction with y (0-1).
210 // Counter state -> probability * 256
211 //////////////////////////////////////////////////////////////////
215 int p(int y
, int cx
) {
216 assert(cx
>=0 && cx
<t
.size());
217 t
[cxt
]+=((y
<<16)-t
[cxt
]+128) >> 8;
218 return t
[cxt
=cx
] >> 4;
222 Array
<uint16
> t
; // 256 states -> probability * 64K
226 //////////////////////////////////////////////////////////////////
227 // APM maps a probability and a context into a new probability
228 // that bit y will next be 1. After each guess it updates
229 // its state to improve future guesses. Methods:
231 // APM a(N) creates with N contexts, uses 66*N bytes memory.
232 // a.p(y, pr, cx, rate=8) returned adjusted probability in context cx (0 to
233 // N-1). rate determines the learning rate (smaller = faster, default 8).
234 // Probabilities are scaled 12 bits (0-4095). Update on last bit y (0-1).
235 //////////////////////////////////////////////////////////////////
239 int p(int y
, int pr
=2048, int cxt
=0, int rate
=8);
242 int index
; // last p, context
243 const int N
; // number of contexts
244 Array
<uint16
> t
; // [N][33]: p, context -> p
247 //////////////////////////// Predictor //////////////////////////
250 Predictor(): pr(2048), c0(1), c4(0), bpos(0), t1(256), cp(&t1
[0]),
252 // a11(256), a12(256), a2(65536), a3(1024), a4(8192), a5(16384)
253 a2(1024), a3(4), a5(2048), a4(256) {}
255 int p() const {return pr
;}
259 int pr
; // next return value of p() (0-4095)
260 int c0
; // bitwise context: last 0-7 bits with a leading 1 (1-255)
261 uint32 c4
; // last 4 whole bytes, last is in low 8 bits
262 int bpos
; // number of bits in c0 (0-7)
263 Array
<uint8
> t1
; // context -> state
264 StateMap sm
; // state -> pr
265 uint8
* cp
; // context pointer
266 int run
; // count of consecutive identical bytes (0-65535)
267 int runcxt
; // (0-3) if run is 0, 1, 2-3, 4+
268 //APM a11, a12, a2, a3, a4, a5;
274 class BbbCompressor
: public StreamCompressor
{
276 class Options
: public StreamCompressor::OptionsBase
{
278 virtual bool Set(const std::string
& options
) {
279 return 0 == options
.length();
281 virtual std::string
Get() const { return std::string(); }
282 int64
SizeInBytes() const { return 256 << 10; }
283 virtual StreamCompressor
* GetCompressor() { return new BbbCompressor
; }
287 virtual ~BbbCompressor() { }
289 virtual void WriteBegin();
290 virtual void WriteEndPrivate();
291 virtual void Write(const char* bytes
, size_t n
) {
292 for (; n
; --n
) { CompressByte(*bytes
++); }
296 bbb::Predictor predictor_
;
301 void CompressByte(unsigned char byte
) {
302 for (int i
=7; i
>=0; --i
) { CompressBit((byte
>>i
)&1); }
304 void CompressBit(int bit
);
308 class BbbDecompressor
: public StreamDecompressor
{
310 static StreamDecompressor
* Create(const std::string
&) {
311 return new BbbDecompressor
;
314 virtual ~BbbDecompressor() {}
316 virtual void ReadBegin();
317 virtual void ReadEnd() {}
318 virtual void Read(char* bytes
, size_t n
) {
319 for (; n
; --n
) *bytes
++ = GetUncompressedByte();
322 virtual int64
SizeInBytes() const { return 6 << 20; }
325 bbb::Predictor predictor_
;
330 unsigned char GetUncompressedByte() {
331 int byte
= GetUncompressedBit();
332 for (int i
= 7; i
; --i
) byte
= (byte
<<1) + GetUncompressedBit();
335 int GetUncompressedBit();
338 } // namespace dcsbwt
340 #endif // DCSBWT_BBB_COMPRESS_H__