src/liblzma/rangecoder/range_encoder.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       range_encoder.h
   4 /// \brief      Range Encoder
   5 ///
   6 //  Authors:    Igor Pavlov
   7 //              Lasse Collin
   8 //
   9 //  This file has been put into the public domain.
  10 //  You can do whatever you want with this file.
  11 //
  12 ///////////////////////////////////////////////////////////////////////////////
  13
  14 #ifndef LZMA_RANGE_ENCODER_H
  15 #define LZMA_RANGE_ENCODER_H
  16
  17 #include "range_common.h"
  18 #include "price.h"
  19
  20
  21 /// Maximum number of symbols that can be put pending into lzma_range_encoder
  22 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
  23 /// (match with big distance and length followed by range encoder flush).
  24 #define RC_SYMBOLS_MAX 53
  25
  26
  27 typedef struct {
  28         uint64_t low;
  29         uint64_t cache_size;
  30         uint32_t range;
  31         uint8_t cache;
  32
  33         /// Number of bytes written out by rc_encode() -> rc_shift_low()
  34         uint64_t out_total;
  35
  36         /// Number of symbols in the tables
  37         size_t count;
  38
  39         /// rc_encode()'s position in the tables
  40         size_t pos;
  41
  42         /// Symbols to encode
  43         enum {
  44                 RC_BIT_0,
  45                 RC_BIT_1,
  46                 RC_DIRECT_0,
  47                 RC_DIRECT_1,
  48                 RC_FLUSH,
  49         } symbols[RC_SYMBOLS_MAX];
  50
  51         /// Probabilities associated with RC_BIT_0 or RC_BIT_1
  52         probability *probs[RC_SYMBOLS_MAX];
  53
  54 } lzma_range_encoder;
  55
  56
  57 static inline void
  58 rc_reset(lzma_range_encoder *rc)
  59 {
  60         rc->low = 0;
  61         rc->cache_size = 1;
  62         rc->range = UINT32_MAX;
  63         rc->cache = 0;
  64         rc->out_total = 0;
  65         rc->count = 0;
  66         rc->pos = 0;
  67 }
  68
  69
  70 static inline void
  71 rc_forget(lzma_range_encoder *rc)
  72 {
  73         // This must not be called when rc_encode() is partially done.
  74         assert(rc->pos == 0);
  75         rc->count = 0;
  76 }
  77
  78
  79 static inline void
  80 rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
  81 {
  82         rc->symbols[rc->count] = bit;
  83         rc->probs[rc->count] = prob;
  84         ++rc->count;
  85 }
  86
  87
  88 static inline void
  89 rc_bittree(lzma_range_encoder *rc, probability *probs,
  90                 uint32_t bit_count, uint32_t symbol)
  91 {
  92         uint32_t model_index = 1;
  93
  94         do {
  95                 const uint32_t bit = (symbol >> --bit_count) & 1;
  96                 rc_bit(rc, &probs[model_index], bit);
  97                 model_index = (model_index << 1) + bit;
  98         } while (bit_count != 0);
  99 }
 100
 101
 102 static inline void
 103 rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
 104                 uint32_t bit_count, uint32_t symbol)
 105 {
 106         uint32_t model_index = 1;
 107
 108         do {
 109                 const uint32_t bit = symbol & 1;
 110                 symbol >>= 1;
 111                 rc_bit(rc, &probs[model_index], bit);
 112                 model_index = (model_index << 1) + bit;
 113         } while (--bit_count != 0);
 114 }
 115
 116
 117 static inline void
 118 rc_direct(lzma_range_encoder *rc,
 119                 uint32_t value, uint32_t bit_count)
 120 {
 121         do {
 122                 rc->symbols[rc->count++]
 123                                 = RC_DIRECT_0 + ((value >> --bit_count) & 1);
 124         } while (bit_count != 0);
 125 }
 126
 127
 128 static inline void
 129 rc_flush(lzma_range_encoder *rc)
 130 {
 131         for (size_t i = 0; i < 5; ++i)
 132                 rc->symbols[rc->count++] = RC_FLUSH;
 133 }
 134
 135
 136 static inline bool
 137 rc_shift_low(lzma_range_encoder *rc,
 138                 uint8_t *out, size_t *out_pos, size_t out_size)
 139 {
 140         if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
 141                         || (uint32_t)(rc->low >> 32) != 0) {
 142                 do {
 143                         if (*out_pos == out_size)
 144                                 return true;
 145
 146                         out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
 147                         ++*out_pos;
 148                         ++rc->out_total;
 149                         rc->cache = 0xFF;
 150
 151                 } while (--rc->cache_size != 0);
 152
 153                 rc->cache = (rc->low >> 24) & 0xFF;
 154         }
 155
 156         ++rc->cache_size;
 157         rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
 158
 159         return false;
 160 }
 161
 162
 163 // NOTE: The last two arguments are uint64_t instead of size_t because in
 164 // the dummy version these refer to the size of the whole range-encoded
 165 // output stream, not just to the currently available output buffer space.
 166 static inline bool
 167 rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache,
 168                 uint64_t *out_pos, uint64_t out_size)
 169 {
 170         if ((uint32_t)(*low) < (uint32_t)(0xFF000000)
 171                         || (uint32_t)(*low >> 32) != 0) {
 172                 do {
 173                         if (*out_pos == out_size)
 174                                 return true;
 175
 176                         ++*out_pos;
 177                         *cache = 0xFF;
 178
 179                 } while (--*cache_size != 0);
 180
 181                 *cache = (*low >> 24) & 0xFF;
 182         }
 183
 184         ++*cache_size;
 185         *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS;
 186
 187         return false;
 188 }
 189
 190
 191 static inline bool
 192 rc_encode(lzma_range_encoder *rc,
 193                 uint8_t *out, size_t *out_pos, size_t out_size)
 194 {
 195         assert(rc->count <= RC_SYMBOLS_MAX);
 196
 197         while (rc->pos < rc->count) {
 198                 // Normalize
 199                 if (rc->range < RC_TOP_VALUE) {
 200                         if (rc_shift_low(rc, out, out_pos, out_size))
 201                                 return true;
 202
 203                         rc->range <<= RC_SHIFT_BITS;
 204                 }
 205
 206                 // Encode a bit
 207                 switch (rc->symbols[rc->pos]) {
 208                 case RC_BIT_0: {
 209                         probability prob = *rc->probs[rc->pos];
 210                         rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
 211                                         * prob;
 212                         prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
 213                         *rc->probs[rc->pos] = prob;
 214                         break;
 215                 }
 216
 217                 case RC_BIT_1: {
 218                         probability prob = *rc->probs[rc->pos];
 219                         const uint32_t bound = prob * (rc->range
 220                                         >> RC_BIT_MODEL_TOTAL_BITS);
 221                         rc->low += bound;
 222                         rc->range -= bound;
 223                         prob -= prob >> RC_MOVE_BITS;
 224                         *rc->probs[rc->pos] = prob;
 225                         break;
 226                 }
 227
 228                 case RC_DIRECT_0:
 229                         rc->range >>= 1;
 230                         break;
 231
 232                 case RC_DIRECT_1:
 233                         rc->range >>= 1;
 234                         rc->low += rc->range;
 235                         break;
 236
 237                 case RC_FLUSH:
 238                         // Prevent further normalizations.
 239                         rc->range = UINT32_MAX;
 240
 241                         // Flush the last five bytes (see rc_flush()).
 242                         do {
 243                                 if (rc_shift_low(rc, out, out_pos, out_size))
 244                                         return true;
 245                         } while (++rc->pos < rc->count);
 246
 247                         // Reset the range encoder so we are ready to continue
 248                         // encoding if we weren't finishing the stream.
 249                         rc_reset(rc);
 250                         return false;
 251
 252                 default:
 253                         assert(0);
 254                         break;
 255                 }
 256
 257                 ++rc->pos;
 258         }
 259
 260         rc->count = 0;
 261         rc->pos = 0;
 262
 263         return false;
 264 }
 265
 266
 267 static inline bool
 268 rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit)
 269 {
 270         assert(rc->count <= RC_SYMBOLS_MAX);
 271
 272         uint64_t low = rc->low;
 273         uint64_t cache_size = rc->cache_size;
 274         uint32_t range = rc->range;
 275         uint8_t cache = rc->cache;
 276         uint64_t out_pos = rc->out_total;
 277
 278         size_t pos = rc->pos;
 279
 280         while (true) {
 281                 // Normalize
 282                 if (range < RC_TOP_VALUE) {
 283                         if (rc_shift_low_dummy(&low, &cache_size, &cache,
 284                                         &out_pos, out_limit))
 285                                 return true;
 286
 287                         range <<= RC_SHIFT_BITS;
 288                 }
 289
 290                 // This check is here because the normalization above must
 291                 // be done before flushing the last bytes.
 292                 if (pos == rc->count)
 293                         break;
 294
 295                 // Encode a bit
 296                 switch (rc->symbols[pos]) {
 297                 case RC_BIT_0: {
 298                         probability prob = *rc->probs[pos];
 299                         range = (range >> RC_BIT_MODEL_TOTAL_BITS)
 300                                         * prob;
 301                         break;
 302                 }
 303
 304                 case RC_BIT_1: {
 305                         probability prob = *rc->probs[pos];
 306                         const uint32_t bound = prob * (range
 307                                         >> RC_BIT_MODEL_TOTAL_BITS);
 308                         low += bound;
 309                         range -= bound;
 310                         break;
 311                 }
 312
 313                 case RC_DIRECT_0:
 314                         range >>= 1;
 315                         break;
 316
 317                 case RC_DIRECT_1:
 318                         range >>= 1;
 319                         low += range;
 320                         break;
 321
 322                 case RC_FLUSH:
 323                 default:
 324                         assert(0);
 325                         break;
 326                 }
 327
 328                 ++pos;
 329         }
 330
 331         // Flush the last bytes. This isn't in rc->symbols[] so we do
 332         // it after the above loop to take into account the size of
 333         // the flushing that will be done at the end of the stream.
 334         for (pos = 0; pos < 5; ++pos) {
 335                 if (rc_shift_low_dummy(&low, &cache_size,
 336                                 &cache, &out_pos, out_limit))
 337                         return true;
 338         }
 339
 340         return false;
 341 }
 342
 343
 344 static inline uint64_t
 345 rc_pending(const lzma_range_encoder *rc)
 346 {
 347         return rc->cache_size + 5 - 1;
 348 }
 349
 350 #endif