1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file range_encoder.h
4 /// \brief Range Encoder
6 // Authors: Igor Pavlov
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
12 ///////////////////////////////////////////////////////////////////////////////
14 #ifndef LZMA_RANGE_ENCODER_H
15 #define LZMA_RANGE_ENCODER_H
17 #include "range_common.h"
21 /// Maximum number of symbols that can be put pending into lzma_range_encoder
22 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
23 /// (match with big distance and length followed by range encoder flush).
24 #define RC_SYMBOLS_MAX 53
33 /// Number of bytes written out by rc_encode() -> rc_shift_low()
36 /// Number of symbols in the tables
39 /// rc_encode()'s position in the tables
49 } symbols
[RC_SYMBOLS_MAX
];
51 /// Probabilities associated with RC_BIT_0 or RC_BIT_1
52 probability
*probs
[RC_SYMBOLS_MAX
];
58 rc_reset(lzma_range_encoder
*rc
)
62 rc
->range
= UINT32_MAX
;
71 rc_forget(lzma_range_encoder
*rc
)
73 // This must not be called when rc_encode() is partially done.
80 rc_bit(lzma_range_encoder
*rc
, probability
*prob
, uint32_t bit
)
82 rc
->symbols
[rc
->count
] = bit
;
83 rc
->probs
[rc
->count
] = prob
;
89 rc_bittree(lzma_range_encoder
*rc
, probability
*probs
,
90 uint32_t bit_count
, uint32_t symbol
)
92 uint32_t model_index
= 1;
95 const uint32_t bit
= (symbol
>> --bit_count
) & 1;
96 rc_bit(rc
, &probs
[model_index
], bit
);
97 model_index
= (model_index
<< 1) + bit
;
98 } while (bit_count
!= 0);
103 rc_bittree_reverse(lzma_range_encoder
*rc
, probability
*probs
,
104 uint32_t bit_count
, uint32_t symbol
)
106 uint32_t model_index
= 1;
109 const uint32_t bit
= symbol
& 1;
111 rc_bit(rc
, &probs
[model_index
], bit
);
112 model_index
= (model_index
<< 1) + bit
;
113 } while (--bit_count
!= 0);
118 rc_direct(lzma_range_encoder
*rc
,
119 uint32_t value
, uint32_t bit_count
)
122 rc
->symbols
[rc
->count
++]
123 = RC_DIRECT_0
+ ((value
>> --bit_count
) & 1);
124 } while (bit_count
!= 0);
129 rc_flush(lzma_range_encoder
*rc
)
131 for (size_t i
= 0; i
< 5; ++i
)
132 rc
->symbols
[rc
->count
++] = RC_FLUSH
;
137 rc_shift_low(lzma_range_encoder
*rc
,
138 uint8_t *out
, size_t *out_pos
, size_t out_size
)
140 if ((uint32_t)(rc
->low
) < (uint32_t)(0xFF000000)
141 || (uint32_t)(rc
->low
>> 32) != 0) {
143 if (*out_pos
== out_size
)
146 out
[*out_pos
] = rc
->cache
+ (uint8_t)(rc
->low
>> 32);
151 } while (--rc
->cache_size
!= 0);
153 rc
->cache
= (rc
->low
>> 24) & 0xFF;
157 rc
->low
= (rc
->low
& 0x00FFFFFF) << RC_SHIFT_BITS
;
163 // NOTE: The last two arguments are uint64_t instead of size_t because in
164 // the dummy version these refer to the size of the whole range-encoded
165 // output stream, not just to the currently available output buffer space.
167 rc_shift_low_dummy(uint64_t *low
, uint64_t *cache_size
, uint8_t *cache
,
168 uint64_t *out_pos
, uint64_t out_size
)
170 if ((uint32_t)(*low
) < (uint32_t)(0xFF000000)
171 || (uint32_t)(*low
>> 32) != 0) {
173 if (*out_pos
== out_size
)
179 } while (--*cache_size
!= 0);
181 *cache
= (*low
>> 24) & 0xFF;
185 *low
= (*low
& 0x00FFFFFF) << RC_SHIFT_BITS
;
192 rc_encode(lzma_range_encoder
*rc
,
193 uint8_t *out
, size_t *out_pos
, size_t out_size
)
195 assert(rc
->count
<= RC_SYMBOLS_MAX
);
197 while (rc
->pos
< rc
->count
) {
199 if (rc
->range
< RC_TOP_VALUE
) {
200 if (rc_shift_low(rc
, out
, out_pos
, out_size
))
203 rc
->range
<<= RC_SHIFT_BITS
;
207 switch (rc
->symbols
[rc
->pos
]) {
209 probability prob
= *rc
->probs
[rc
->pos
];
210 rc
->range
= (rc
->range
>> RC_BIT_MODEL_TOTAL_BITS
)
212 prob
+= (RC_BIT_MODEL_TOTAL
- prob
) >> RC_MOVE_BITS
;
213 *rc
->probs
[rc
->pos
] = prob
;
218 probability prob
= *rc
->probs
[rc
->pos
];
219 const uint32_t bound
= prob
* (rc
->range
220 >> RC_BIT_MODEL_TOTAL_BITS
);
223 prob
-= prob
>> RC_MOVE_BITS
;
224 *rc
->probs
[rc
->pos
] = prob
;
234 rc
->low
+= rc
->range
;
238 // Prevent further normalizations.
239 rc
->range
= UINT32_MAX
;
241 // Flush the last five bytes (see rc_flush()).
243 if (rc_shift_low(rc
, out
, out_pos
, out_size
))
245 } while (++rc
->pos
< rc
->count
);
247 // Reset the range encoder so we are ready to continue
248 // encoding if we weren't finishing the stream.
268 rc_encode_dummy(const lzma_range_encoder
*rc
, uint64_t out_limit
)
270 assert(rc
->count
<= RC_SYMBOLS_MAX
);
272 uint64_t low
= rc
->low
;
273 uint64_t cache_size
= rc
->cache_size
;
274 uint32_t range
= rc
->range
;
275 uint8_t cache
= rc
->cache
;
276 uint64_t out_pos
= rc
->out_total
;
278 size_t pos
= rc
->pos
;
282 if (range
< RC_TOP_VALUE
) {
283 if (rc_shift_low_dummy(&low
, &cache_size
, &cache
,
284 &out_pos
, out_limit
))
287 range
<<= RC_SHIFT_BITS
;
290 // This check is here because the normalization above must
291 // be done before flushing the last bytes.
292 if (pos
== rc
->count
)
296 switch (rc
->symbols
[pos
]) {
298 probability prob
= *rc
->probs
[pos
];
299 range
= (range
>> RC_BIT_MODEL_TOTAL_BITS
)
305 probability prob
= *rc
->probs
[pos
];
306 const uint32_t bound
= prob
* (range
307 >> RC_BIT_MODEL_TOTAL_BITS
);
331 // Flush the last bytes. This isn't in rc->symbols[] so we do
332 // it after the above loop to take into account the size of
333 // the flushing that will be done at the end of the stream.
334 for (pos
= 0; pos
< 5; ++pos
) {
335 if (rc_shift_low_dummy(&low
, &cache_size
,
336 &cache
, &out_pos
, out_limit
))
344 static inline uint64_t
345 rc_pending(const lzma_range_encoder
*rc
)
347 return rc
->cache_size
+ 5 - 1;