1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
6 /// \brief LZ out window
8 // Authors: Igor Pavlov
11 ///////////////////////////////////////////////////////////////////////////////
13 // liblzma supports multiple LZ77-based filters. The LZ part is shared
14 // between these filters. The LZ code takes care of dictionary handling
15 // and passing the data between filters in the chain. The filter-specific
16 // part decodes from the input buffer to the dictionary.
19 #include "lz_decoder.h"
23 /// Dictionary (history buffer)
26 /// The actual LZ-based decoder e.g. LZMA
29 /// Next filter in the chain, if any. Note that LZMA and LZMA2 are
30 /// only allowed as the last filter, but the long-range filter in
31 /// future can be in the middle of the chain.
34 /// True if the next filter in the chain has returned LZMA_STREAM_END.
37 /// True if the LZ decoder (e.g. LZMA) has detected end of payload
38 /// marker. This may become true before next_finished becomes true.
41 /// Temporary buffer needed when the LZ-based filter is not the last
42 /// filter in the chain. The output of the next filter is first
43 /// decoded into buffer[], which is then used as input for the actual
48 uint8_t buffer
[LZMA_BUFFER_SIZE
];
54 lz_decoder_reset(lzma_coder
*coder
)
56 coder
->dict
.pos
= 2 * LZ_DICT_REPEAT_MAX
;
58 coder
->dict
.buf
[2 * LZ_DICT_REPEAT_MAX
- 1] = '\0';
59 coder
->dict
.has_wrapped
= false;
60 coder
->dict
.need_reset
= false;
66 decode_buffer(lzma_coder
*coder
,
67 const uint8_t *restrict in
, size_t *restrict in_pos
,
68 size_t in_size
, uint8_t *restrict out
,
69 size_t *restrict out_pos
, size_t out_size
)
72 // Wrap the dictionary if needed.
73 if (coder
->dict
.pos
== coder
->dict
.size
) {
74 // See the comment of #define LZ_DICT_REPEAT_MAX.
75 coder
->dict
.pos
= LZ_DICT_REPEAT_MAX
;
76 coder
->dict
.has_wrapped
= true;
77 memcpy(coder
->dict
.buf
, coder
->dict
.buf
83 // Store the current dictionary position. It is needed to know
84 // where to start copying to the out[] buffer.
85 const size_t dict_start
= coder
->dict
.pos
;
87 // Calculate how much we allow coder->lz.code() to decode.
88 // It must not decode past the end of the dictionary
89 // buffer, and we don't want it to decode more than is
90 // actually needed to fill the out[] buffer.
91 coder
->dict
.limit
= coder
->dict
.pos
92 + my_min(out_size
- *out_pos
,
93 coder
->dict
.size
- coder
->dict
.pos
);
95 // Call the coder->lz.code() to do the actual decoding.
96 const lzma_ret ret
= coder
->lz
.code(
97 coder
->lz
.coder
, &coder
->dict
,
100 // Copy the decoded data from the dictionary to the out[]
101 // buffer. Do it conditionally because out can be NULL
102 // (in which case copy_size is always 0). Calling memcpy()
103 // with a null-pointer is undefined even if the third
105 const size_t copy_size
= coder
->dict
.pos
- dict_start
;
106 assert(copy_size
<= out_size
- *out_pos
);
109 memcpy(out
+ *out_pos
, coder
->dict
.buf
+ dict_start
,
112 *out_pos
+= copy_size
;
114 // Reset the dictionary if so requested by coder->lz.code().
115 if (coder
->dict
.need_reset
) {
116 lz_decoder_reset(coder
);
118 // Since we reset dictionary, we don't check if
119 // dictionary became full.
120 if (ret
!= LZMA_OK
|| *out_pos
== out_size
)
123 // Return if everything got decoded or an error
124 // occurred, or if there's no more data to decode.
126 // Note that detecting if there's something to decode
127 // is done by looking if dictionary become full
128 // instead of looking if *in_pos == in_size. This
129 // is because it is possible that all the input was
130 // consumed already but some data is pending to be
131 // written to the dictionary.
132 if (ret
!= LZMA_OK
|| *out_pos
== out_size
133 || coder
->dict
.pos
< coder
->dict
.size
)
141 lz_decode(void *coder_ptr
, const lzma_allocator
*allocator
,
142 const uint8_t *restrict in
, size_t *restrict in_pos
,
143 size_t in_size
, uint8_t *restrict out
,
144 size_t *restrict out_pos
, size_t out_size
,
147 lzma_coder
*coder
= coder_ptr
;
149 if (coder
->next
.code
== NULL
)
150 return decode_buffer(coder
, in
, in_pos
, in_size
,
151 out
, out_pos
, out_size
);
153 // We aren't the last coder in the chain, we need to decode
154 // our input to a temporary buffer.
155 while (*out_pos
< out_size
) {
156 // Fill the temporary buffer if it is empty.
157 if (!coder
->next_finished
158 && coder
->temp
.pos
== coder
->temp
.size
) {
160 coder
->temp
.size
= 0;
162 const lzma_ret ret
= coder
->next
.code(
164 allocator
, in
, in_pos
, in_size
,
165 coder
->temp
.buffer
, &coder
->temp
.size
,
166 LZMA_BUFFER_SIZE
, action
);
168 if (ret
== LZMA_STREAM_END
)
169 coder
->next_finished
= true;
170 else if (ret
!= LZMA_OK
|| coder
->temp
.size
== 0)
174 if (coder
->this_finished
) {
175 if (coder
->temp
.size
!= 0)
176 return LZMA_DATA_ERROR
;
178 if (coder
->next_finished
)
179 return LZMA_STREAM_END
;
184 const lzma_ret ret
= decode_buffer(coder
, coder
->temp
.buffer
,
185 &coder
->temp
.pos
, coder
->temp
.size
,
186 out
, out_pos
, out_size
);
188 if (ret
== LZMA_STREAM_END
)
189 coder
->this_finished
= true;
190 else if (ret
!= LZMA_OK
)
192 else if (coder
->next_finished
&& *out_pos
< out_size
)
193 return LZMA_DATA_ERROR
;
201 lz_decoder_end(void *coder_ptr
, const lzma_allocator
*allocator
)
203 lzma_coder
*coder
= coder_ptr
;
205 lzma_next_end(&coder
->next
, allocator
);
206 lzma_free(coder
->dict
.buf
, allocator
);
208 if (coder
->lz
.end
!= NULL
)
209 coder
->lz
.end(coder
->lz
.coder
, allocator
);
211 lzma_free(coder
->lz
.coder
, allocator
);
213 lzma_free(coder
, allocator
);
219 lzma_lz_decoder_init(lzma_next_coder
*next
, const lzma_allocator
*allocator
,
220 const lzma_filter_info
*filters
,
221 lzma_ret (*lz_init
)(lzma_lz_decoder
*lz
,
222 const lzma_allocator
*allocator
,
223 lzma_vli id
, const void *options
,
224 lzma_lz_options
*lz_options
))
226 // Allocate the base structure if it isn't already allocated.
227 lzma_coder
*coder
= next
->coder
;
229 coder
= lzma_alloc(sizeof(lzma_coder
), allocator
);
231 return LZMA_MEM_ERROR
;
234 next
->code
= &lz_decode
;
235 next
->end
= &lz_decoder_end
;
237 coder
->dict
.buf
= NULL
;
238 coder
->dict
.size
= 0;
239 coder
->lz
= LZMA_LZ_DECODER_INIT
;
240 coder
->next
= LZMA_NEXT_CODER_INIT
;
243 // Allocate and initialize the LZ-based decoder. It will also give
244 // us the dictionary size.
245 lzma_lz_options lz_options
;
246 return_if_error(lz_init(&coder
->lz
, allocator
,
247 filters
[0].id
, filters
[0].options
, &lz_options
));
249 // If the dictionary size is very small, increase it to 4096 bytes.
250 // This is to prevent constant wrapping of the dictionary, which
251 // would slow things down. The downside is that since we don't check
252 // separately for the real dictionary size, we may happily accept
254 if (lz_options
.dict_size
< 4096)
255 lz_options
.dict_size
= 4096;
257 // Make dictionary size a multiple of 16. Some LZ-based decoders like
258 // LZMA use the lowest bits lzma_dict.pos to know the alignment of the
259 // data. Aligned buffer is also good when memcpying from the
260 // dictionary to the output buffer, since applications are
261 // recommended to give aligned buffers to liblzma.
263 // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
264 // needed for alloc_size.
266 // Avoid integer overflow.
267 if (lz_options
.dict_size
> SIZE_MAX
- 15 - 2 * LZ_DICT_REPEAT_MAX
)
268 return LZMA_MEM_ERROR
;
270 lz_options
.dict_size
= (lz_options
.dict_size
+ 15) & ~((size_t)(15));
272 // Reserve extra space as explained in the comment
273 // of #define LZ_DICT_REPEAT_MAX.
274 const size_t alloc_size
275 = lz_options
.dict_size
+ 2 * LZ_DICT_REPEAT_MAX
;
277 // Allocate and initialize the dictionary.
278 if (coder
->dict
.size
!= alloc_size
) {
279 lzma_free(coder
->dict
.buf
, allocator
);
280 coder
->dict
.buf
= lzma_alloc(alloc_size
, allocator
);
281 if (coder
->dict
.buf
== NULL
)
282 return LZMA_MEM_ERROR
;
284 // NOTE: Yes, alloc_size, not lz_options.dict_size. The way
285 // coder->dict.full is updated will take care that we will
286 // still reject distances larger than lz_options.dict_size.
287 coder
->dict
.size
= alloc_size
;
290 lz_decoder_reset(next
->coder
);
292 // Use the preset dictionary if it was given to us.
293 if (lz_options
.preset_dict
!= NULL
294 && lz_options
.preset_dict_size
> 0) {
295 // If the preset dictionary is bigger than the actual
296 // dictionary, copy only the tail.
297 const size_t copy_size
= my_min(lz_options
.preset_dict_size
,
298 lz_options
.dict_size
);
299 const size_t offset
= lz_options
.preset_dict_size
- copy_size
;
300 memcpy(coder
->dict
.buf
+ coder
->dict
.pos
,
301 lz_options
.preset_dict
+ offset
,
304 // dict.pos isn't zero after lz_decoder_reset().
305 coder
->dict
.pos
+= copy_size
;
306 coder
->dict
.full
= copy_size
;
309 // Miscellaneous initializations
310 coder
->next_finished
= false;
311 coder
->this_finished
= false;
313 coder
->temp
.size
= 0;
315 // Initialize the next filter in the chain, if any.
316 return lzma_next_filter_init(&coder
->next
, allocator
, filters
+ 1);
321 lzma_lz_decoder_memusage(size_t dictionary_size
)
323 return sizeof(lzma_coder
) + (uint64_t)(dictionary_size
);