Update THANKS
[xz/debian.git] / tests / test_microlzma.c
blobc1d99d77fc3dc311a4cf3f2574f82b883bde6238
1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file test_microlzma.c
6 /// \brief Tests MicroLZMA encoding and decoding
7 //
8 // Author: Jia Tan
9 //
10 ///////////////////////////////////////////////////////////////////////////////
12 #include "tests.h"
14 #define BUFFER_SIZE 1024
17 #ifdef HAVE_ENCODER_LZMA1
19 // MicroLZMA encoded "Hello\nWorld\n" output size in bytes.
20 #define ENCODED_OUTPUT_SIZE 17
22 // Byte array of "Hello\nWorld\n". This is used for various encoder tests.
23 static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A,
24 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A };
26 // This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n".
27 // The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0.
28 // This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe
29 // considering the encoded "Hello\nWorld\n" can successfully be decoded at
30 // this time. This is to test for regressions that cause MicroLZMA output
31 // to change.
32 static const uint32_t hello_world_encoded_crc = 0x3CDE40A8;
35 // Function implementation borrowed from lzma_decoder.c. It is needed to
36 // ensure the first byte of a MicroLZMA stream is set correctly with the
37 // negation of the LZMA properties.
38 static bool
39 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
41 if (byte > (4 * 5 + 4) * 9 + 8)
42 return true;
44 // See the file format specification to understand this.
45 options->pb = byte / (9 * 5);
46 byte -= options->pb * 9 * 5;
47 options->lp = byte / 9;
48 options->lc = byte - options->lp * 9;
50 return options->lc + options->lp > LZMA_LCLP_MAX;
54 ///////////////////
55 // Encoder tests //
56 ///////////////////
58 // This tests a few of the basic options. These options are not unique to
59 // MicroLZMA in any way, its mostly ensuring that the options are actually
60 // being checked before initializing the decoder internals.
61 static void
62 test_encode_options(void)
64 lzma_stream strm = LZMA_STREAM_INIT;
65 lzma_options_lzma opt_lzma;
67 // Initialize with default options.
68 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
70 // NULL stream
71 assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma),
72 LZMA_PROG_ERROR);
74 // lc/lp/pb = 5/0/2 (lc invalid)
75 opt_lzma.lc = 5;
76 opt_lzma.lp = 0;
77 opt_lzma.pb = 2;
78 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
79 LZMA_OPTIONS_ERROR);
81 // lc/lp/pb = 0/5/2 (lp invalid)
82 opt_lzma.lc = 0;
83 opt_lzma.lp = 5;
84 opt_lzma.pb = 2;
85 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
86 LZMA_OPTIONS_ERROR);
88 // lc/lp/pb = 3/2/2 (lc + lp invalid)
89 opt_lzma.lc = 3;
90 opt_lzma.lp = 2;
91 opt_lzma.pb = 2;
92 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
93 LZMA_OPTIONS_ERROR);
95 // lc/lp/pb = 3/0/5 (pb invalid)
96 opt_lzma.lc = 3;
97 opt_lzma.lp = 0;
98 opt_lzma.pb = 5;
99 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
100 LZMA_OPTIONS_ERROR);
102 // Zero out lp, pb, lc options to not interfere with later tests.
103 opt_lzma.lp = 0;
104 opt_lzma.pb = 0;
105 opt_lzma.lc = 0;
107 // Set invalid dictionary size.
108 opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1;
109 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
110 LZMA_OPTIONS_ERROR);
112 // Maximum dictionary size for the encoder, as described in lzma12.h
113 // is 1.5 GiB.
114 opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1;
115 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
116 LZMA_OPTIONS_ERROR);
118 lzma_end(&strm);
122 static void
123 test_encode_basic(void)
125 lzma_stream strm = LZMA_STREAM_INIT;
126 lzma_options_lzma opt_lzma;
128 // The lzma_lzma_preset return value is inverse of what it perhaps
129 // should be, that is, it returns false on success.
130 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
132 // Initialize the encoder using the default options.
133 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
135 uint8_t output[BUFFER_SIZE];
137 strm.next_in = hello_world;
138 strm.avail_in = sizeof(hello_world);
139 strm.next_out = output;
140 strm.avail_out = sizeof(output);
142 // Everything must be encoded in one lzma_code() call.
143 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
145 // Check that the entire input was consumed.
146 assert_uint_eq(strm.total_in, sizeof(hello_world));
148 // Check that the first byte in the output stream is not 0x00.
149 // In a regular raw LZMA stream the first byte is always 0x00.
150 // In MicroLZMA the first byte replaced by the bitwise-negation
151 // of the LZMA properties.
152 assert_uint(output[0], !=, 0x00);
154 const uint8_t props = ~output[0];
156 lzma_options_lzma test_options;
157 assert_false(lzma_lzma_lclppb_decode(&test_options, props));
159 assert_uint_eq(opt_lzma.lc, test_options.lc);
160 assert_uint_eq(opt_lzma.lp, test_options.lp);
161 assert_uint_eq(opt_lzma.pb, test_options.pb);
163 // Compute the check over the output data. This is compared to
164 // the expected check value.
165 const uint32_t check_val = lzma_crc32(output, strm.total_out, 0);
167 assert_uint_eq(check_val, hello_world_encoded_crc);
169 lzma_end(&strm);
173 // This tests the behavior when strm.avail_out is so small it cannot hold
174 // the header plus 1 encoded byte (< 6).
175 static void
176 test_encode_small_out(void)
178 lzma_stream strm = LZMA_STREAM_INIT;
179 lzma_options_lzma opt_lzma;
181 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
183 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
185 uint8_t output[BUFFER_SIZE];
187 strm.next_in = hello_world;
188 strm.avail_in = sizeof(hello_world);
189 strm.next_out = output;
190 strm.avail_out = 5;
192 // LZMA_PROG_ERROR is expected when strm.avail_out < 6
193 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR);
195 // The encoder must be reset because coders cannot be used again
196 // after returning LZMA_PROG_ERROR.
197 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
199 // Reset strm.avail_out to be > 6, but not enough to hold all of the
200 // compressed data.
201 strm.avail_out = ENCODED_OUTPUT_SIZE - 1;
203 // Encoding should not return an error now.
204 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
205 assert_uint(strm.total_in, <, sizeof(hello_world));
207 lzma_end(&strm);
211 // LZMA_FINISH is the only supported action. All others must
212 // return LZMA_PROG_ERROR.
213 static void
214 test_encode_actions(void)
216 lzma_stream strm = LZMA_STREAM_INIT;
217 lzma_options_lzma opt_lzma;
219 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
221 const lzma_action actions[] = {
222 LZMA_RUN,
223 LZMA_SYNC_FLUSH,
224 LZMA_FULL_FLUSH,
225 LZMA_FULL_BARRIER,
228 for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) {
229 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
230 LZMA_OK);
232 uint8_t output[BUFFER_SIZE];
234 strm.next_in = hello_world;
235 strm.avail_in = sizeof(hello_world);
236 strm.next_out = output;
237 strm.avail_out = sizeof(output);
239 assert_lzma_ret(lzma_code(&strm, actions[i]),
240 LZMA_PROG_ERROR);
243 lzma_end(&strm);
245 #endif // HAVE_ENCODER_LZMA1
248 ///////////////////
249 // Decoder tests //
250 ///////////////////
252 #if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1)
254 // Byte array of "Goodbye World!". This is used for various decoder tests.
255 static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62,
256 0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 };
258 static uint8_t *goodbye_world_encoded = NULL;
259 static size_t goodbye_world_encoded_size = 0;
262 // Helper function to encode data and return the compressed size.
263 static size_t
264 basic_microlzma_encode(const uint8_t *input, size_t in_size,
265 uint8_t **compressed)
267 lzma_stream strm = LZMA_STREAM_INIT;
268 lzma_options_lzma opt_lzma;
270 // Lazy way to set the output size since the input should never
271 // inflate by much in these simple test cases. This is tested to
272 // be large enough after encoding to fit the entire input, so if
273 // this assumption does not hold then this will fail.
274 const size_t out_size = in_size << 1;
276 *compressed = tuktest_malloc(out_size);
278 // Always encode with the default options for simplicity.
279 if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT))
280 goto decoder_setup_error;
282 if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK)
283 goto decoder_setup_error;
285 strm.next_in = input;
286 strm.avail_in = in_size;
287 strm.next_out = *compressed;
288 strm.avail_out = out_size;
290 if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END)
291 goto decoder_setup_error;
293 // Check that the entire input was consumed and that it fit into
294 // the output buffer.
295 if (strm.total_in != in_size)
296 goto decoder_setup_error;
298 lzma_end(&strm);
300 // lzma_end() doesn't touch other members of lzma_stream than
301 // lzma_stream.internal so using strm.total_out here is fine.
302 return strm.total_out;
304 decoder_setup_error:
305 tuktest_error("Failed to initialize decoder tests");
306 return 0;
310 static void
311 test_decode_options(void)
313 // NULL stream
314 assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE,
315 sizeof(hello_world), true,
316 LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR);
318 // Uncompressed size larger than max
319 lzma_stream strm = LZMA_STREAM_INIT;
320 assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE,
321 LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT),
322 LZMA_OPTIONS_ERROR);
326 // Test that decoding succeeds when uncomp_size is correct regardless of
327 // the value of uncomp_size_is_exact.
328 static void
329 test_decode_uncomp_size_is_exact(void)
331 lzma_stream strm = LZMA_STREAM_INIT;
333 assert_lzma_ret(lzma_microlzma_decoder(&strm,
334 goodbye_world_encoded_size,
335 sizeof(goodbye_world), true,
336 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
338 uint8_t output[BUFFER_SIZE];
340 strm.next_in = goodbye_world_encoded;
341 strm.avail_in = goodbye_world_encoded_size;
342 strm.next_out = output;
343 strm.avail_out = sizeof(output);
345 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
346 assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
348 assert_uint_eq(strm.total_out, sizeof(goodbye_world));
349 assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
351 // Reset decoder with uncomp_size_is_exact set to false and
352 // uncomp_size set to correct value. Also test using the
353 // uncompressed size as the dictionary size.
354 assert_lzma_ret(lzma_microlzma_decoder(&strm,
355 goodbye_world_encoded_size,
356 sizeof(goodbye_world), false,
357 sizeof(goodbye_world)), LZMA_OK);
359 strm.next_in = goodbye_world_encoded;
360 strm.avail_in = goodbye_world_encoded_size;
361 strm.next_out = output;
362 strm.avail_out = sizeof(output);
364 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
365 assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
367 assert_uint_eq(strm.total_out, sizeof(goodbye_world));
368 assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
370 lzma_end(&strm);
374 // This tests decoding when MicroLZMA decoder is called with
375 // an incorrect uncompressed size.
376 static void
377 test_decode_uncomp_size_wrong(void)
379 lzma_stream strm = LZMA_STREAM_INIT;
380 assert_lzma_ret(lzma_microlzma_decoder(&strm,
381 goodbye_world_encoded_size,
382 sizeof(goodbye_world) + 1, false,
383 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
385 uint8_t output[BUFFER_SIZE];
387 strm.next_in = goodbye_world_encoded;
388 strm.avail_in = goodbye_world_encoded_size;
389 strm.next_out = output;
390 strm.avail_out = sizeof(output);
392 // LZMA_OK should be returned because the input size given was
393 // larger than the actual encoded size. The decoder is expecting
394 // more input to possibly fill the uncompressed size that was set.
395 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
397 assert_uint_eq(strm.total_out, sizeof(goodbye_world));
399 assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
401 // Next, test with uncomp_size_is_exact set.
402 assert_lzma_ret(lzma_microlzma_decoder(&strm,
403 goodbye_world_encoded_size,
404 sizeof(goodbye_world) + 1, true,
405 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
407 strm.next_in = goodbye_world_encoded;
408 strm.avail_in = goodbye_world_encoded_size;
409 strm.next_out = output;
410 strm.avail_out = sizeof(output);
412 // No error detected, even though all input was consumed and there
413 // is more room in the output buffer.
415 // FIXME? LZMA_FINISH tells that no more input is coming and
416 // the MicroLZMA decoder knows the exact compressed size from
417 // the initialization as well. So should it return LZMA_DATA_ERROR
418 // on the first call instead of relying on the generic lzma_code()
419 // logic to eventually get LZMA_BUF_ERROR?
420 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
421 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
422 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR);
424 assert_uint_eq(strm.total_out, sizeof(goodbye_world));
425 assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
427 // Reset stream with uncomp_size smaller than the real
428 // uncompressed size.
429 assert_lzma_ret(lzma_microlzma_decoder(&strm,
430 goodbye_world_encoded_size,
431 ARRAY_SIZE(hello_world) - 1, true,
432 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
434 strm.next_in = goodbye_world_encoded;
435 strm.avail_in = goodbye_world_encoded_size;
436 strm.next_out = output;
437 strm.avail_out = sizeof(output);
439 // This case actually results in an error since it decodes the full
440 // uncompressed size but the range coder is not in the proper state
441 // for the stream to end.
442 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
444 lzma_end(&strm);
448 static void
449 test_decode_comp_size_wrong(void)
451 lzma_stream strm = LZMA_STREAM_INIT;
453 // goodbye_world_encoded_size + 1 is safe because extra space was
454 // allocated for goodbye_world_encoded. The extra space isn't
455 // initialized but it shouldn't be read either, thus Valgrind
456 // has to remain happy with this code.
457 assert_lzma_ret(lzma_microlzma_decoder(&strm,
458 goodbye_world_encoded_size + 1,
459 sizeof(goodbye_world), true,
460 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
462 uint8_t output[BUFFER_SIZE];
464 strm.next_in = goodbye_world_encoded;
465 strm.avail_in = goodbye_world_encoded_size;
466 strm.next_out = output;
467 strm.avail_out = sizeof(output);
469 // When uncomp_size_is_exact is set, the compressed size must be
470 // correct or else LZMA_DATA_ERROR is returned.
471 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR);
473 assert_lzma_ret(lzma_microlzma_decoder(&strm,
474 goodbye_world_encoded_size + 1,
475 sizeof(goodbye_world), false,
476 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
478 strm.next_in = goodbye_world_encoded;
479 strm.avail_in = goodbye_world_encoded_size;
480 strm.next_out = output;
481 strm.avail_out = sizeof(output);
483 // When uncomp_size_is_exact is not set, the decoder does not
484 // detect when the compressed size is wrong as long as all of the
485 // expected output has been decoded. This is because the decoder
486 // assumes that the real uncompressed size might be bigger than
487 // the specified value and in that case more input might be needed
488 // as well.
489 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
491 lzma_end(&strm);
495 static void
496 test_decode_bad_lzma_properties(void)
498 // Alter first byte to encode invalid LZMA properties.
499 uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size);
500 memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size);
502 // lc=3, lp=2, pb=2
503 compressed[0] = (uint8_t)~0x6FU;
505 lzma_stream strm = LZMA_STREAM_INIT;
506 assert_lzma_ret(lzma_microlzma_decoder(&strm,
507 goodbye_world_encoded_size,
508 sizeof(goodbye_world), false,
509 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
511 uint8_t output[BUFFER_SIZE];
513 strm.next_in = compressed;
514 strm.avail_in = goodbye_world_encoded_size;
515 strm.next_out = output;
516 strm.avail_out = sizeof(output);
518 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR);
520 // Use valid, but incorrect LZMA properties.
521 // lc=3, lp=1, pb=2
522 compressed[0] = (uint8_t)~0x66;
524 assert_lzma_ret(lzma_microlzma_decoder(&strm,
525 goodbye_world_encoded_size,
526 ARRAY_SIZE(goodbye_world), true,
527 LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
529 strm.next_in = compressed;
530 strm.avail_in = goodbye_world_encoded_size;
531 strm.next_out = output;
532 strm.avail_out = sizeof(output);
534 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
536 lzma_end(&strm);
538 #endif
541 extern int
542 main(int argc, char **argv)
544 tuktest_start(argc, argv);
546 #ifndef HAVE_ENCODER_LZMA1
547 tuktest_early_skip("LZMA1 encoder disabled");
548 #else
549 tuktest_run(test_encode_options);
550 tuktest_run(test_encode_basic);
551 tuktest_run(test_encode_small_out);
552 tuktest_run(test_encode_actions);
554 // MicroLZMA decoder tests require the basic encoder functionality.
555 # ifdef HAVE_DECODER_LZMA1
556 goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world,
557 sizeof(goodbye_world), &goodbye_world_encoded);
559 tuktest_run(test_decode_options);
560 tuktest_run(test_decode_uncomp_size_is_exact);
561 tuktest_run(test_decode_uncomp_size_wrong);
562 tuktest_run(test_decode_comp_size_wrong);
563 tuktest_run(test_decode_bad_lzma_properties);
564 # endif
566 return tuktest_end();
567 #endif