1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
5 /// \file range_encoder.h
6 /// \brief Range Encoder
8 // Authors: Igor Pavlov
11 ///////////////////////////////////////////////////////////////////////////////
13 #ifndef LZMA_RANGE_ENCODER_H
14 #define LZMA_RANGE_ENCODER_H
16 #include "range_common.h"
20 /// Maximum number of symbols that can be put pending into lzma_range_encoder
21 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
22 /// (match with big distance and length followed by range encoder flush).
23 #define RC_SYMBOLS_MAX 53
32 /// Number of bytes written out by rc_encode() -> rc_shift_low()
35 /// Number of symbols in the tables
38 /// rc_encode()'s position in the tables
48 } symbols
[RC_SYMBOLS_MAX
];
50 /// Probabilities associated with RC_BIT_0 or RC_BIT_1
51 probability
*probs
[RC_SYMBOLS_MAX
];
57 rc_reset(lzma_range_encoder
*rc
)
61 rc
->range
= UINT32_MAX
;
70 rc_forget(lzma_range_encoder
*rc
)
72 // This must not be called when rc_encode() is partially done.
79 rc_bit(lzma_range_encoder
*rc
, probability
*prob
, uint32_t bit
)
81 rc
->symbols
[rc
->count
] = bit
;
82 rc
->probs
[rc
->count
] = prob
;
88 rc_bittree(lzma_range_encoder
*rc
, probability
*probs
,
89 uint32_t bit_count
, uint32_t symbol
)
91 uint32_t model_index
= 1;
94 const uint32_t bit
= (symbol
>> --bit_count
) & 1;
95 rc_bit(rc
, &probs
[model_index
], bit
);
96 model_index
= (model_index
<< 1) + bit
;
97 } while (bit_count
!= 0);
102 rc_bittree_reverse(lzma_range_encoder
*rc
, probability
*probs
,
103 uint32_t bit_count
, uint32_t symbol
)
105 uint32_t model_index
= 1;
108 const uint32_t bit
= symbol
& 1;
110 rc_bit(rc
, &probs
[model_index
], bit
);
111 model_index
= (model_index
<< 1) + bit
;
112 } while (--bit_count
!= 0);
117 rc_direct(lzma_range_encoder
*rc
,
118 uint32_t value
, uint32_t bit_count
)
121 rc
->symbols
[rc
->count
++]
122 = RC_DIRECT_0
+ ((value
>> --bit_count
) & 1);
123 } while (bit_count
!= 0);
128 rc_flush(lzma_range_encoder
*rc
)
130 for (size_t i
= 0; i
< 5; ++i
)
131 rc
->symbols
[rc
->count
++] = RC_FLUSH
;
136 rc_shift_low(lzma_range_encoder
*rc
,
137 uint8_t *out
, size_t *out_pos
, size_t out_size
)
139 if ((uint32_t)(rc
->low
) < (uint32_t)(0xFF000000)
140 || (uint32_t)(rc
->low
>> 32) != 0) {
142 if (*out_pos
== out_size
)
145 out
[*out_pos
] = rc
->cache
+ (uint8_t)(rc
->low
>> 32);
150 } while (--rc
->cache_size
!= 0);
152 rc
->cache
= (rc
->low
>> 24) & 0xFF;
156 rc
->low
= (rc
->low
& 0x00FFFFFF) << RC_SHIFT_BITS
;
162 // NOTE: The last two arguments are uint64_t instead of size_t because in
163 // the dummy version these refer to the size of the whole range-encoded
164 // output stream, not just to the currently available output buffer space.
166 rc_shift_low_dummy(uint64_t *low
, uint64_t *cache_size
, uint8_t *cache
,
167 uint64_t *out_pos
, uint64_t out_size
)
169 if ((uint32_t)(*low
) < (uint32_t)(0xFF000000)
170 || (uint32_t)(*low
>> 32) != 0) {
172 if (*out_pos
== out_size
)
178 } while (--*cache_size
!= 0);
180 *cache
= (*low
>> 24) & 0xFF;
184 *low
= (*low
& 0x00FFFFFF) << RC_SHIFT_BITS
;
191 rc_encode(lzma_range_encoder
*rc
,
192 uint8_t *out
, size_t *out_pos
, size_t out_size
)
194 assert(rc
->count
<= RC_SYMBOLS_MAX
);
196 while (rc
->pos
< rc
->count
) {
198 if (rc
->range
< RC_TOP_VALUE
) {
199 if (rc_shift_low(rc
, out
, out_pos
, out_size
))
202 rc
->range
<<= RC_SHIFT_BITS
;
206 switch (rc
->symbols
[rc
->pos
]) {
208 probability prob
= *rc
->probs
[rc
->pos
];
209 rc
->range
= (rc
->range
>> RC_BIT_MODEL_TOTAL_BITS
)
211 prob
+= (RC_BIT_MODEL_TOTAL
- prob
) >> RC_MOVE_BITS
;
212 *rc
->probs
[rc
->pos
] = prob
;
217 probability prob
= *rc
->probs
[rc
->pos
];
218 const uint32_t bound
= prob
* (rc
->range
219 >> RC_BIT_MODEL_TOTAL_BITS
);
222 prob
-= prob
>> RC_MOVE_BITS
;
223 *rc
->probs
[rc
->pos
] = prob
;
233 rc
->low
+= rc
->range
;
237 // Prevent further normalizations.
238 rc
->range
= UINT32_MAX
;
240 // Flush the last five bytes (see rc_flush()).
242 if (rc_shift_low(rc
, out
, out_pos
, out_size
))
244 } while (++rc
->pos
< rc
->count
);
246 // Reset the range encoder so we are ready to continue
247 // encoding if we weren't finishing the stream.
267 rc_encode_dummy(const lzma_range_encoder
*rc
, uint64_t out_limit
)
269 assert(rc
->count
<= RC_SYMBOLS_MAX
);
271 uint64_t low
= rc
->low
;
272 uint64_t cache_size
= rc
->cache_size
;
273 uint32_t range
= rc
->range
;
274 uint8_t cache
= rc
->cache
;
275 uint64_t out_pos
= rc
->out_total
;
277 size_t pos
= rc
->pos
;
281 if (range
< RC_TOP_VALUE
) {
282 if (rc_shift_low_dummy(&low
, &cache_size
, &cache
,
283 &out_pos
, out_limit
))
286 range
<<= RC_SHIFT_BITS
;
289 // This check is here because the normalization above must
290 // be done before flushing the last bytes.
291 if (pos
== rc
->count
)
295 switch (rc
->symbols
[pos
]) {
297 probability prob
= *rc
->probs
[pos
];
298 range
= (range
>> RC_BIT_MODEL_TOTAL_BITS
)
304 probability prob
= *rc
->probs
[pos
];
305 const uint32_t bound
= prob
* (range
306 >> RC_BIT_MODEL_TOTAL_BITS
);
330 // Flush the last bytes. This isn't in rc->symbols[] so we do
331 // it after the above loop to take into account the size of
332 // the flushing that will be done at the end of the stream.
333 for (pos
= 0; pos
< 5; ++pos
) {
334 if (rc_shift_low_dummy(&low
, &cache_size
,
335 &cache
, &out_pos
, out_limit
))
343 static inline uint64_t
344 rc_pending(const lzma_range_encoder
*rc
)
346 return rc
->cache_size
+ 5 - 1;