src/liblzma/lz/lz_decoder.h

   1 // SPDX-License-Identifier: 0BSD
   2
   3 ///////////////////////////////////////////////////////////////////////////////
   4 //
   5 /// \file       lz_decoder.h
   6 /// \brief      LZ out window
   7 ///
   8 //  Authors:    Igor Pavlov
   9 //              Lasse Collin
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #ifndef LZMA_LZ_DECODER_H
  14 #define LZMA_LZ_DECODER_H
  15
  16 #include "common.h"
  17
  18
  19 /// Maximum length of a match rounded up to a nice power of 2 which is
  20 /// a good size for aligned memcpy(). The allocated dictionary buffer will
  21 /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
  22 ///
  23 /// (1) Every time the decoder reaches the end of the dictionary buffer,
  24 ///     the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
  25 ///     This way dict_repeat() will only need to copy from one place,
  26 ///     never from both the end and beginning of the buffer.
  27 ///
  28 /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
  29 ///     the oldest byte still in the dictionary and the current write
  30 ///     position. This way dict_repeat(dict, dict->size - 1, &len)
  31 ///     won't need memmove() as the copying cannot overlap.
  32 ///
  33 /// Note that memcpy() still cannot be used if distance < len.
  34 ///
  35 /// LZMA's longest match length is 273 so pick a multiple of 16 above that.
  36 #define LZ_DICT_REPEAT_MAX 288
  37
  38
  39 typedef struct {
  40         /// Pointer to the dictionary buffer.
  41         uint8_t *buf;
  42
  43         /// Write position in dictionary. The next byte will be written to
  44         /// buf[pos].
  45         size_t pos;
  46
  47         /// Indicates how full the dictionary is. This is used by
  48         /// dict_is_distance_valid() to detect corrupt files that would
  49         /// read beyond the beginning of the dictionary.
  50         size_t full;
  51
  52         /// Write limit
  53         size_t limit;
  54
  55         /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
  56         /// larger than the actual dictionary size. This is enforced by
  57         /// how the value for "full" is set; it can be at most
  58         /// "size - 2 * LZ_DICT_REPEAT_MAX".
  59         size_t size;
  60
  61         /// True once the dictionary has become full and the writing position
  62         /// has been wrapped in decode_buffer() in lz_decoder.c.
  63         bool has_wrapped;
  64
  65         /// True when dictionary should be reset before decoding more data.
  66         bool need_reset;
  67
  68 } lzma_dict;
  69
  70
  71 typedef struct {
  72         size_t dict_size;
  73         const uint8_t *preset_dict;
  74         size_t preset_dict_size;
  75 } lzma_lz_options;
  76
  77
  78 typedef struct {
  79         /// Data specific to the LZ-based decoder
  80         void *coder;
  81
  82         /// Function to decode from in[] to *dict
  83         lzma_ret (*code)(void *coder,
  84                         lzma_dict *restrict dict, const uint8_t *restrict in,
  85                         size_t *restrict in_pos, size_t in_size);
  86
  87         void (*reset)(void *coder, const void *options);
  88
  89         /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
  90         /// then allow_eopm will always be true.
  91         void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
  92                         bool allow_eopm);
  93
  94         /// Free allocated resources
  95         void (*end)(void *coder, const lzma_allocator *allocator);
  96
  97 } lzma_lz_decoder;
  98
  99
 100 #define LZMA_LZ_DECODER_INIT \
 101         (lzma_lz_decoder){ \
 102                 .coder = NULL, \
 103                 .code = NULL, \
 104                 .reset = NULL, \
 105                 .set_uncompressed = NULL, \
 106                 .end = NULL, \
 107         }
 108
 109
 110 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
 111                 const lzma_allocator *allocator,
 112                 const lzma_filter_info *filters,
 113                 lzma_ret (*lz_init)(lzma_lz_decoder *lz,
 114                         const lzma_allocator *allocator,
 115                         lzma_vli id, const void *options,
 116                         lzma_lz_options *lz_options));
 117
 118 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
 119
 120
 121 //////////////////////
 122 // Inline functions //
 123 //////////////////////
 124
 125 /// Get a byte from the history buffer.
 126 static inline uint8_t
 127 dict_get(const lzma_dict *const dict, const uint32_t distance)
 128 {
 129         return dict->buf[dict->pos - distance - 1
 130                         + (distance < dict->pos
 131                                 ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
 132 }
 133
 134
 135 /// Optimized version of dict_get(dict, 0)
 136 static inline uint8_t
 137 dict_get0(const lzma_dict *const dict)
 138 {
 139         return dict->buf[dict->pos - 1];
 140 }
 141
 142
 143 /// Test if dictionary is empty.
 144 static inline bool
 145 dict_is_empty(const lzma_dict *const dict)
 146 {
 147         return dict->full == 0;
 148 }
 149
 150
 151 /// Validate the match distance
 152 static inline bool
 153 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
 154 {
 155         return dict->full > distance;
 156 }
 157
 158
 159 /// Repeat *len bytes at distance.
 160 static inline bool
 161 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
 162 {
 163         // Don't write past the end of the dictionary.
 164         const size_t dict_avail = dict->limit - dict->pos;
 165         uint32_t left = my_min(dict_avail, *len);
 166         *len -= left;
 167
 168         size_t back = dict->pos - distance - 1;
 169         if (distance >= dict->pos)
 170                 back += dict->size - LZ_DICT_REPEAT_MAX;
 171
 172         // Repeat a block of data from the history. Because memcpy() is faster
 173         // than copying byte by byte in a loop, the copying process gets split
 174         // into two cases.
 175         if (distance < left) {
 176                 // Source and target areas overlap, thus we can't use
 177                 // memcpy() nor even memmove() safely.
 178                 do {
 179                         dict->buf[dict->pos++] = dict->buf[back++];
 180                 } while (--left > 0);
 181         } else {
 182                 memcpy(dict->buf + dict->pos, dict->buf + back, left);
 183                 dict->pos += left;
 184         }
 185
 186         // Update how full the dictionary is.
 187         if (!dict->has_wrapped)
 188                 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
 189
 190         return *len != 0;
 191 }
 192
 193
 194 static inline void
 195 dict_put(lzma_dict *dict, uint8_t byte)
 196 {
 197         dict->buf[dict->pos++] = byte;
 198
 199         if (!dict->has_wrapped)
 200                 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
 201 }
 202
 203
 204 /// Puts one byte into the dictionary. Returns true if the dictionary was
 205 /// already full and the byte couldn't be added.
 206 static inline bool
 207 dict_put_safe(lzma_dict *dict, uint8_t byte)
 208 {
 209         if (unlikely(dict->pos == dict->limit))
 210                 return true;
 211
 212         dict_put(dict, byte);
 213         return false;
 214 }
 215
 216
 217 /// Copies arbitrary amount of data into the dictionary.
 218 static inline void
 219 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
 220                 size_t *restrict in_pos, size_t in_size,
 221                 size_t *restrict left)
 222 {
 223         // NOTE: If we are being given more data than the size of the
 224         // dictionary, it could be possible to optimize the LZ decoder
 225         // so that not everything needs to go through the dictionary.
 226         // This shouldn't be very common thing in practice though, and
 227         // the slowdown of one extra memcpy() isn't bad compared to how
 228         // much time it would have taken if the data were compressed.
 229
 230         if (in_size - *in_pos > *left)
 231                 in_size = *in_pos + *left;
 232
 233         *left -= lzma_bufcpy(in, in_pos, in_size,
 234                         dict->buf, &dict->pos, dict->limit);
 235
 236         if (!dict->has_wrapped)
 237                 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
 238
 239         return;
 240 }
 241
 242
 243 static inline void
 244 dict_reset(lzma_dict *dict)
 245 {
 246         dict->need_reset = true;
 247         return;
 248 }
 249
 250 #endif