liblzma: Improve documentation in index.h
[xz/debian.git] / src / liblzma / rangecoder / range_encoder.h
blobd794eabbccea21beb04a64cc515b936430dd782d
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file range_encoder.h
4 /// \brief Range Encoder
5 ///
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
12 ///////////////////////////////////////////////////////////////////////////////
14 #ifndef LZMA_RANGE_ENCODER_H
15 #define LZMA_RANGE_ENCODER_H
17 #include "range_common.h"
18 #include "price.h"
21 /// Maximum number of symbols that can be put pending into lzma_range_encoder
22 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
23 /// (match with big distance and length followed by range encoder flush).
24 #define RC_SYMBOLS_MAX 53
27 typedef struct {
28 uint64_t low;
29 uint64_t cache_size;
30 uint32_t range;
31 uint8_t cache;
33 /// Number of bytes written out by rc_encode() -> rc_shift_low()
34 uint64_t out_total;
36 /// Number of symbols in the tables
37 size_t count;
39 /// rc_encode()'s position in the tables
40 size_t pos;
42 /// Symbols to encode
43 enum {
44 RC_BIT_0,
45 RC_BIT_1,
46 RC_DIRECT_0,
47 RC_DIRECT_1,
48 RC_FLUSH,
49 } symbols[RC_SYMBOLS_MAX];
51 /// Probabilities associated with RC_BIT_0 or RC_BIT_1
52 probability *probs[RC_SYMBOLS_MAX];
54 } lzma_range_encoder;
57 static inline void
58 rc_reset(lzma_range_encoder *rc)
60 rc->low = 0;
61 rc->cache_size = 1;
62 rc->range = UINT32_MAX;
63 rc->cache = 0;
64 rc->out_total = 0;
65 rc->count = 0;
66 rc->pos = 0;
70 static inline void
71 rc_forget(lzma_range_encoder *rc)
73 // This must not be called when rc_encode() is partially done.
74 assert(rc->pos == 0);
75 rc->count = 0;
79 static inline void
80 rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
82 rc->symbols[rc->count] = bit;
83 rc->probs[rc->count] = prob;
84 ++rc->count;
88 static inline void
89 rc_bittree(lzma_range_encoder *rc, probability *probs,
90 uint32_t bit_count, uint32_t symbol)
92 uint32_t model_index = 1;
94 do {
95 const uint32_t bit = (symbol >> --bit_count) & 1;
96 rc_bit(rc, &probs[model_index], bit);
97 model_index = (model_index << 1) + bit;
98 } while (bit_count != 0);
102 static inline void
103 rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
104 uint32_t bit_count, uint32_t symbol)
106 uint32_t model_index = 1;
108 do {
109 const uint32_t bit = symbol & 1;
110 symbol >>= 1;
111 rc_bit(rc, &probs[model_index], bit);
112 model_index = (model_index << 1) + bit;
113 } while (--bit_count != 0);
117 static inline void
118 rc_direct(lzma_range_encoder *rc,
119 uint32_t value, uint32_t bit_count)
121 do {
122 rc->symbols[rc->count++]
123 = RC_DIRECT_0 + ((value >> --bit_count) & 1);
124 } while (bit_count != 0);
128 static inline void
129 rc_flush(lzma_range_encoder *rc)
131 for (size_t i = 0; i < 5; ++i)
132 rc->symbols[rc->count++] = RC_FLUSH;
136 static inline bool
137 rc_shift_low(lzma_range_encoder *rc,
138 uint8_t *out, size_t *out_pos, size_t out_size)
140 if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
141 || (uint32_t)(rc->low >> 32) != 0) {
142 do {
143 if (*out_pos == out_size)
144 return true;
146 out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
147 ++*out_pos;
148 ++rc->out_total;
149 rc->cache = 0xFF;
151 } while (--rc->cache_size != 0);
153 rc->cache = (rc->low >> 24) & 0xFF;
156 ++rc->cache_size;
157 rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
159 return false;
163 // NOTE: The last two arguments are uint64_t instead of size_t because in
164 // the dummy version these refer to the size of the whole range-encoded
165 // output stream, not just to the currently available output buffer space.
166 static inline bool
167 rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache,
168 uint64_t *out_pos, uint64_t out_size)
170 if ((uint32_t)(*low) < (uint32_t)(0xFF000000)
171 || (uint32_t)(*low >> 32) != 0) {
172 do {
173 if (*out_pos == out_size)
174 return true;
176 ++*out_pos;
177 *cache = 0xFF;
179 } while (--*cache_size != 0);
181 *cache = (*low >> 24) & 0xFF;
184 ++*cache_size;
185 *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS;
187 return false;
191 static inline bool
192 rc_encode(lzma_range_encoder *rc,
193 uint8_t *out, size_t *out_pos, size_t out_size)
195 assert(rc->count <= RC_SYMBOLS_MAX);
197 while (rc->pos < rc->count) {
198 // Normalize
199 if (rc->range < RC_TOP_VALUE) {
200 if (rc_shift_low(rc, out, out_pos, out_size))
201 return true;
203 rc->range <<= RC_SHIFT_BITS;
206 // Encode a bit
207 switch (rc->symbols[rc->pos]) {
208 case RC_BIT_0: {
209 probability prob = *rc->probs[rc->pos];
210 rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
211 * prob;
212 prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
213 *rc->probs[rc->pos] = prob;
214 break;
217 case RC_BIT_1: {
218 probability prob = *rc->probs[rc->pos];
219 const uint32_t bound = prob * (rc->range
220 >> RC_BIT_MODEL_TOTAL_BITS);
221 rc->low += bound;
222 rc->range -= bound;
223 prob -= prob >> RC_MOVE_BITS;
224 *rc->probs[rc->pos] = prob;
225 break;
228 case RC_DIRECT_0:
229 rc->range >>= 1;
230 break;
232 case RC_DIRECT_1:
233 rc->range >>= 1;
234 rc->low += rc->range;
235 break;
237 case RC_FLUSH:
238 // Prevent further normalizations.
239 rc->range = UINT32_MAX;
241 // Flush the last five bytes (see rc_flush()).
242 do {
243 if (rc_shift_low(rc, out, out_pos, out_size))
244 return true;
245 } while (++rc->pos < rc->count);
247 // Reset the range encoder so we are ready to continue
248 // encoding if we weren't finishing the stream.
249 rc_reset(rc);
250 return false;
252 default:
253 assert(0);
254 break;
257 ++rc->pos;
260 rc->count = 0;
261 rc->pos = 0;
263 return false;
267 static inline bool
268 rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit)
270 assert(rc->count <= RC_SYMBOLS_MAX);
272 uint64_t low = rc->low;
273 uint64_t cache_size = rc->cache_size;
274 uint32_t range = rc->range;
275 uint8_t cache = rc->cache;
276 uint64_t out_pos = rc->out_total;
278 size_t pos = rc->pos;
280 while (true) {
281 // Normalize
282 if (range < RC_TOP_VALUE) {
283 if (rc_shift_low_dummy(&low, &cache_size, &cache,
284 &out_pos, out_limit))
285 return true;
287 range <<= RC_SHIFT_BITS;
290 // This check is here because the normalization above must
291 // be done before flushing the last bytes.
292 if (pos == rc->count)
293 break;
295 // Encode a bit
296 switch (rc->symbols[pos]) {
297 case RC_BIT_0: {
298 probability prob = *rc->probs[pos];
299 range = (range >> RC_BIT_MODEL_TOTAL_BITS)
300 * prob;
301 break;
304 case RC_BIT_1: {
305 probability prob = *rc->probs[pos];
306 const uint32_t bound = prob * (range
307 >> RC_BIT_MODEL_TOTAL_BITS);
308 low += bound;
309 range -= bound;
310 break;
313 case RC_DIRECT_0:
314 range >>= 1;
315 break;
317 case RC_DIRECT_1:
318 range >>= 1;
319 low += range;
320 break;
322 case RC_FLUSH:
323 default:
324 assert(0);
325 break;
328 ++pos;
331 // Flush the last bytes. This isn't in rc->symbols[] so we do
332 // it after the above loop to take into account the size of
333 // the flushing that will be done at the end of the stream.
334 for (pos = 0; pos < 5; ++pos) {
335 if (rc_shift_low_dummy(&low, &cache_size,
336 &cache, &out_pos, out_limit))
337 return true;
340 return false;
344 static inline uint64_t
345 rc_pending(const lzma_range_encoder *rc)
347 return rc->cache_size + 5 - 1;
350 #endif