1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
5 /// \file lzip_decoder.c
6 /// \brief Decodes .lz (lzip) files
8 // Author: Michał Górny
11 ///////////////////////////////////////////////////////////////////////////////
13 #include "lzip_decoder.h"
14 #include "lzma_decoder.h"
18 // .lz format version 0 lacks the 64-bit Member size field in the footer.
19 #define LZIP_V0_FOOTER_SIZE 12
20 #define LZIP_V1_FOOTER_SIZE 20
21 #define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE
23 // lc/lp/pb are hardcoded in the .lz format.
39 /// .lz member format version
42 /// CRC32 of the uncompressed data in the .lz member
45 /// Uncompressed size of the .lz member
46 uint64_t uncompressed_size
;
48 /// Compressed size of the .lz member
51 /// Memory usage limit
54 /// Amount of memory actually needed
57 /// If true, LZMA_GET_CHECK is returned after decoding the header
58 /// fields. As all files use CRC32 this is redundant but it's
59 /// implemented anyway since the initialization functions supports
60 /// all other flags in addition to LZMA_TELL_ANY_CHECK.
63 /// If true, we won't calculate or verify the CRC32 of
64 /// the uncompressed data.
67 /// If true, we will decode concatenated .lz members and stop if
68 /// non-.lz data is seen after at least one member has been
69 /// successfully decoded.
72 /// When decoding concatenated .lz members, this is true as long as
73 /// we are decoding the first .lz member. This is needed to avoid
74 /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at
75 /// the end of the file.
78 /// Reading position in the header and footer fields
81 /// Buffer to hold the .lz footer fields
82 uint8_t buffer
[LZIP_FOOTER_SIZE_MAX
];
84 /// Options decoded from the .lz header that needed to initialize
85 /// the LZMA1 decoder.
86 lzma_options_lzma options
;
89 lzma_next_coder lzma_decoder
;
95 lzip_decode(void *coder_ptr
, const lzma_allocator
*allocator
,
96 const uint8_t *restrict in
, size_t *restrict in_pos
,
97 size_t in_size
, uint8_t *restrict out
,
98 size_t *restrict out_pos
, size_t out_size
, lzma_action action
)
100 lzma_lzip_coder
*coder
= coder_ptr
;
103 switch (coder
->sequence
) {
104 case SEQ_ID_STRING
: {
105 // The "ID string" or magic bytes are "LZIP" in US-ASCII.
106 const uint8_t lzip_id_string
[4] = { 0x4C, 0x5A, 0x49, 0x50 };
108 while (coder
->pos
< sizeof(lzip_id_string
)) {
109 if (*in_pos
>= in_size
) {
110 // If we are on the 2nd+ concatenated member
111 // and the input ends before we can read
112 // the magic bytes, we discard the bytes that
113 // were already read (up to 3) and finish.
114 // See the reasoning below.
115 return !coder
->first_member
116 && action
== LZMA_FINISH
117 ? LZMA_STREAM_END
: LZMA_OK
;
120 if (in
[*in_pos
] != lzip_id_string
[coder
->pos
]) {
121 // The .lz format allows putting non-.lz data
122 // at the end of the file. If we have seen
123 // at least one valid .lz member already,
124 // then we won't consume the byte at *in_pos
125 // and will return LZMA_STREAM_END. This way
126 // apps can easily locate and read the non-.lz
127 // data after the .lz member(s).
129 // NOTE: If the first 1-3 bytes of the non-.lz
130 // data match the .lz ID string then the first
131 // 1-3 bytes of the junk will get ignored by
132 // us. If apps want to properly locate the
133 // trailing data they must ensure that the
134 // first byte of their custom data isn't the
135 // same as the first byte of .lz ID string.
136 // With the liblzma API we cannot rewind the
137 // input position across calls to lzma_code().
138 return !coder
->first_member
139 ? LZMA_STREAM_END
: LZMA_FORMAT_ERROR
;
149 coder
->uncompressed_size
= 0;
150 coder
->member_size
= sizeof(lzip_id_string
);
152 coder
->sequence
= SEQ_VERSION
;
158 if (*in_pos
>= in_size
)
161 coder
->version
= in
[(*in_pos
)++];
163 // We support version 0 and unextended version 1.
164 if (coder
->version
> 1)
165 return LZMA_OPTIONS_ERROR
;
167 ++coder
->member_size
;
168 coder
->sequence
= SEQ_DICT_SIZE
;
170 // .lz versions 0 and 1 use CRC32 as the integrity check
171 // so if the application wanted to know that
172 // (LZMA_TELL_ANY_CHECK) we can tell it now.
173 if (coder
->tell_any_check
)
174 return LZMA_GET_CHECK
;
178 case SEQ_DICT_SIZE
: {
179 if (*in_pos
>= in_size
)
182 const uint32_t ds
= in
[(*in_pos
)++];
183 ++coder
->member_size
;
185 // The five lowest bits are for the base-2 logarithm of
186 // the dictionary size and the highest three bits are
187 // the fractional part (0/16 to 7/16) that will be
188 // subtracted to get the final value.
190 // For example, with 0xB5:
193 // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB
194 const uint32_t b2log
= ds
& 0x1F;
195 const uint32_t fracnum
= ds
>> 5;
197 // The format versions 0 and 1 allow dictionary size in the
198 // range [4 KiB, 512 MiB].
199 if (b2log
< 12 || b2log
> 29 || (b2log
== 12 && fracnum
> 0))
200 return LZMA_DATA_ERROR
;
202 // 2^[b2log] - 2^[b2log] * [fracnum] / 16
203 // = 2^[b2log] - [fracnum] * 2^([b2log] - 4)
204 coder
->options
.dict_size
= (UINT32_C(1) << b2log
)
205 - (fracnum
<< (b2log
- 4));
207 assert(coder
->options
.dict_size
>= 4096);
208 assert(coder
->options
.dict_size
<= (UINT32_C(512) << 20));
210 coder
->options
.preset_dict
= NULL
;
211 coder
->options
.lc
= LZIP_LC
;
212 coder
->options
.lp
= LZIP_LP
;
213 coder
->options
.pb
= LZIP_PB
;
215 // Calculate the memory usage.
216 coder
->memusage
= lzma_lzma_decoder_memusage(&coder
->options
)
217 + LZMA_MEMUSAGE_BASE
;
219 // Initialization is a separate step because if we return
220 // LZMA_MEMLIMIT_ERROR we need to be able to restart after
221 // the memlimit has been increased.
222 coder
->sequence
= SEQ_CODER_INIT
;
227 case SEQ_CODER_INIT
: {
228 if (coder
->memusage
> coder
->memlimit
)
229 return LZMA_MEMLIMIT_ERROR
;
231 const lzma_filter_info filters
[2] = {
233 .id
= LZMA_FILTER_LZMA1
,
234 .init
= &lzma_lzma_decoder_init
,
235 .options
= &coder
->options
,
241 return_if_error(lzma_next_filter_init(&coder
->lzma_decoder
,
242 allocator
, filters
));
245 coder
->sequence
= SEQ_LZMA_STREAM
;
250 case SEQ_LZMA_STREAM
: {
251 const size_t in_start
= *in_pos
;
252 const size_t out_start
= *out_pos
;
254 const lzma_ret ret
= coder
->lzma_decoder
.code(
255 coder
->lzma_decoder
.coder
, allocator
,
256 in
, in_pos
, in_size
, out
, out_pos
, out_size
,
259 const size_t out_used
= *out_pos
- out_start
;
261 coder
->member_size
+= *in_pos
- in_start
;
262 coder
->uncompressed_size
+= out_used
;
264 // Don't update the CRC32 if the integrity check will be
265 // ignored or if there was no new output. The latter is
266 // important in case out == NULL to avoid null pointer + 0
267 // which is undefined behavior.
268 if (!coder
->ignore_check
&& out_used
> 0)
269 coder
->crc32
= lzma_crc32(out
+ out_start
, out_used
,
272 if (ret
!= LZMA_STREAM_END
)
275 coder
->sequence
= SEQ_MEMBER_FOOTER
;
280 case SEQ_MEMBER_FOOTER
: {
281 // The footer of .lz version 0 lacks the Member size field.
282 // This is the only difference between version 0 and
283 // unextended version 1 formats.
284 const size_t footer_size
= coder
->version
== 0
285 ? LZIP_V0_FOOTER_SIZE
286 : LZIP_V1_FOOTER_SIZE
;
288 // Copy the CRC32, Data size, and Member size fields to
289 // the internal buffer.
290 lzma_bufcpy(in
, in_pos
, in_size
, coder
->buffer
, &coder
->pos
,
293 // Return if we didn't get the whole footer yet.
294 if (coder
->pos
< footer_size
)
298 coder
->member_size
+= footer_size
;
300 // Check that the footer fields match the observed data.
301 if (!coder
->ignore_check
302 && coder
->crc32
!= read32le(&coder
->buffer
[0]))
303 return LZMA_DATA_ERROR
;
305 if (coder
->uncompressed_size
!= read64le(&coder
->buffer
[4]))
306 return LZMA_DATA_ERROR
;
308 if (coder
->version
> 0) {
309 // .lz version 0 has no Member size field.
310 if (coder
->member_size
!= read64le(&coder
->buffer
[12]))
311 return LZMA_DATA_ERROR
;
314 // Decoding is finished if we weren't requested to decode
315 // more than one .lz member.
316 if (!coder
->concatenated
)
317 return LZMA_STREAM_END
;
319 coder
->first_member
= false;
320 coder
->sequence
= SEQ_ID_STRING
;
326 return LZMA_PROG_ERROR
;
334 lzip_decoder_end(void *coder_ptr
, const lzma_allocator
*allocator
)
336 lzma_lzip_coder
*coder
= coder_ptr
;
337 lzma_next_end(&coder
->lzma_decoder
, allocator
);
338 lzma_free(coder
, allocator
);
344 lzip_decoder_get_check(const void *coder_ptr
lzma_attribute((__unused__
)))
346 return LZMA_CHECK_CRC32
;
351 lzip_decoder_memconfig(void *coder_ptr
, uint64_t *memusage
,
352 uint64_t *old_memlimit
, uint64_t new_memlimit
)
354 lzma_lzip_coder
*coder
= coder_ptr
;
356 *memusage
= coder
->memusage
;
357 *old_memlimit
= coder
->memlimit
;
359 if (new_memlimit
!= 0) {
360 if (new_memlimit
< coder
->memusage
)
361 return LZMA_MEMLIMIT_ERROR
;
363 coder
->memlimit
= new_memlimit
;
371 lzma_lzip_decoder_init(
372 lzma_next_coder
*next
, const lzma_allocator
*allocator
,
373 uint64_t memlimit
, uint32_t flags
)
375 lzma_next_coder_init(&lzma_lzip_decoder_init
, next
, allocator
);
377 if (flags
& ~LZMA_SUPPORTED_FLAGS
)
378 return LZMA_OPTIONS_ERROR
;
380 lzma_lzip_coder
*coder
= next
->coder
;
382 coder
= lzma_alloc(sizeof(lzma_lzip_coder
), allocator
);
384 return LZMA_MEM_ERROR
;
387 next
->code
= &lzip_decode
;
388 next
->end
= &lzip_decoder_end
;
389 next
->get_check
= &lzip_decoder_get_check
;
390 next
->memconfig
= &lzip_decoder_memconfig
;
392 coder
->lzma_decoder
= LZMA_NEXT_CODER_INIT
;
395 coder
->sequence
= SEQ_ID_STRING
;
396 coder
->memlimit
= my_max(1, memlimit
);
397 coder
->memusage
= LZMA_MEMUSAGE_BASE
;
398 coder
->tell_any_check
= (flags
& LZMA_TELL_ANY_CHECK
) != 0;
399 coder
->ignore_check
= (flags
& LZMA_IGNORE_CHECK
) != 0;
400 coder
->concatenated
= (flags
& LZMA_CONCATENATED
) != 0;
401 coder
->first_member
= true;
408 extern LZMA_API(lzma_ret
)
409 lzma_lzip_decoder(lzma_stream
*strm
, uint64_t memlimit
, uint32_t flags
)
411 lzma_next_strm_init(lzma_lzip_decoder_init
, strm
, memlimit
, flags
);
413 strm
->internal
->supported_actions
[LZMA_RUN
] = true;
414 strm
->internal
->supported_actions
[LZMA_FINISH
] = true;