1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
6 /// \brief LZ out window
8 // Authors: Igor Pavlov
11 ///////////////////////////////////////////////////////////////////////////////
13 #ifndef LZMA_LZ_DECODER_H
14 #define LZMA_LZ_DECODER_H
19 /// Maximum length of a match rounded up to a nice power of 2 which is
20 /// a good size for aligned memcpy(). The allocated dictionary buffer will
21 /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
23 /// (1) Every time the decoder reaches the end of the dictionary buffer,
24 /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
25 /// This way dict_repeat() will only need to copy from one place,
26 /// never from both the end and beginning of the buffer.
28 /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
29 /// the oldest byte still in the dictionary and the current write
30 /// position. This way dict_repeat(dict, dict->size - 1, &len)
31 /// won't need memmove() as the copying cannot overlap.
33 /// Note that memcpy() still cannot be used if distance < len.
35 /// LZMA's longest match length is 273 so pick a multiple of 16 above that.
36 #define LZ_DICT_REPEAT_MAX 288
40 /// Pointer to the dictionary buffer.
43 /// Write position in dictionary. The next byte will be written to
47 /// Indicates how full the dictionary is. This is used by
48 /// dict_is_distance_valid() to detect corrupt files that would
49 /// read beyond the beginning of the dictionary.
55 /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
56 /// larger than the actual dictionary size. This is enforced by
57 /// how the value for "full" is set; it can be at most
58 /// "size - 2 * LZ_DICT_REPEAT_MAX".
61 /// True once the dictionary has become full and the writing position
62 /// has been wrapped in decode_buffer() in lz_decoder.c.
65 /// True when dictionary should be reset before decoding more data.
73 const uint8_t *preset_dict
;
74 size_t preset_dict_size
;
79 /// Data specific to the LZ-based decoder
82 /// Function to decode from in[] to *dict
83 lzma_ret (*code
)(void *coder
,
84 lzma_dict
*restrict dict
, const uint8_t *restrict in
,
85 size_t *restrict in_pos
, size_t in_size
);
87 void (*reset
)(void *coder
, const void *options
);
89 /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
90 /// then allow_eopm will always be true.
91 void (*set_uncompressed
)(void *coder
, lzma_vli uncompressed_size
,
94 /// Free allocated resources
95 void (*end
)(void *coder
, const lzma_allocator
*allocator
);
100 #define LZMA_LZ_DECODER_INIT \
105 .set_uncompressed = NULL, \
110 extern lzma_ret
lzma_lz_decoder_init(lzma_next_coder
*next
,
111 const lzma_allocator
*allocator
,
112 const lzma_filter_info
*filters
,
113 lzma_ret (*lz_init
)(lzma_lz_decoder
*lz
,
114 const lzma_allocator
*allocator
,
115 lzma_vli id
, const void *options
,
116 lzma_lz_options
*lz_options
));
118 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size
);
121 //////////////////////
122 // Inline functions //
123 //////////////////////
125 /// Get a byte from the history buffer.
126 static inline uint8_t
127 dict_get(const lzma_dict
*const dict
, const uint32_t distance
)
129 return dict
->buf
[dict
->pos
- distance
- 1
130 + (distance
< dict
->pos
131 ? 0 : dict
->size
- LZ_DICT_REPEAT_MAX
)];
135 /// Optimized version of dict_get(dict, 0)
136 static inline uint8_t
137 dict_get0(const lzma_dict
*const dict
)
139 return dict
->buf
[dict
->pos
- 1];
143 /// Test if dictionary is empty.
145 dict_is_empty(const lzma_dict
*const dict
)
147 return dict
->full
== 0;
151 /// Validate the match distance
153 dict_is_distance_valid(const lzma_dict
*const dict
, const size_t distance
)
155 return dict
->full
> distance
;
159 /// Repeat *len bytes at distance.
161 dict_repeat(lzma_dict
*dict
, uint32_t distance
, uint32_t *len
)
163 // Don't write past the end of the dictionary.
164 const size_t dict_avail
= dict
->limit
- dict
->pos
;
165 uint32_t left
= my_min(dict_avail
, *len
);
168 size_t back
= dict
->pos
- distance
- 1;
169 if (distance
>= dict
->pos
)
170 back
+= dict
->size
- LZ_DICT_REPEAT_MAX
;
172 // Repeat a block of data from the history. Because memcpy() is faster
173 // than copying byte by byte in a loop, the copying process gets split
175 if (distance
< left
) {
176 // Source and target areas overlap, thus we can't use
177 // memcpy() nor even memmove() safely.
179 dict
->buf
[dict
->pos
++] = dict
->buf
[back
++];
180 } while (--left
> 0);
182 memcpy(dict
->buf
+ dict
->pos
, dict
->buf
+ back
, left
);
186 // Update how full the dictionary is.
187 if (!dict
->has_wrapped
)
188 dict
->full
= dict
->pos
- 2 * LZ_DICT_REPEAT_MAX
;
195 dict_put(lzma_dict
*dict
, uint8_t byte
)
197 dict
->buf
[dict
->pos
++] = byte
;
199 if (!dict
->has_wrapped
)
200 dict
->full
= dict
->pos
- 2 * LZ_DICT_REPEAT_MAX
;
204 /// Puts one byte into the dictionary. Returns true if the dictionary was
205 /// already full and the byte couldn't be added.
207 dict_put_safe(lzma_dict
*dict
, uint8_t byte
)
209 if (unlikely(dict
->pos
== dict
->limit
))
212 dict_put(dict
, byte
);
217 /// Copies arbitrary amount of data into the dictionary.
219 dict_write(lzma_dict
*restrict dict
, const uint8_t *restrict in
,
220 size_t *restrict in_pos
, size_t in_size
,
221 size_t *restrict left
)
223 // NOTE: If we are being given more data than the size of the
224 // dictionary, it could be possible to optimize the LZ decoder
225 // so that not everything needs to go through the dictionary.
226 // This shouldn't be very common thing in practice though, and
227 // the slowdown of one extra memcpy() isn't bad compared to how
228 // much time it would have taken if the data were compressed.
230 if (in_size
- *in_pos
> *left
)
231 in_size
= *in_pos
+ *left
;
233 *left
-= lzma_bufcpy(in
, in_pos
, in_size
,
234 dict
->buf
, &dict
->pos
, dict
->limit
);
236 if (!dict
->has_wrapped
)
237 dict
->full
= dict
->pos
- 2 * LZ_DICT_REPEAT_MAX
;
244 dict_reset(lzma_dict
*dict
)
246 dict
->need_reset
= true;