mfplat: Read queue subscriber within the critical section.
[wine/zf.git] / dlls / mspatcha / lzxd_dec.c
blobfe04723b3a7a66feffaf6a993ef988a9abda9a26
1 /*
2 * LZXD decoder
4 * Copyright 2019 Conor McCarthy
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * TODO
21 * - Implememnt interleaved decoding
24 #include <stdarg.h>
25 #include <assert.h>
27 #include "windef.h"
28 #include "wine/heap.h"
29 #include "wine/debug.h"
31 #include "patchapi.h"
33 #include "lzxd_dec.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(mspatcha);
38 #define ELEM_SIZE 2
39 #define MAX_CODE_LEN 16
40 #define MAX_ALIGN_CODE_LEN 7
41 #define PRE_LEN_BITS 4
42 #define MAX_PRE_CODE_LEN ((1 << PRE_LEN_BITS) - 1)
43 #define MAIN_TABLE_SIZE (1 << MAX_CODE_LEN)
44 #define ALIGN_TABLE_SIZE (1 << MAX_ALIGN_CODE_LEN)
45 #define HUFF_ERROR 0xFFFF
46 #define REP_COUNT 3
47 #define MAX_POS_SLOTS 290
48 #define ALIGN_CODE_COUNT 8
49 #define PRE_LEN_CODE_COUNT 20
50 #define MAIN_CODE_COUNT(slots) (256 + 8 * (slots))
51 #define MAX_MAIN_CODES MAIN_CODE_COUNT(MAX_POS_SLOTS)
52 #define LEN_CODE_COUNT 249
53 #define MAX_CHUNK_UNCOMPRESSED_SIZE 0x8000
54 #define E8_TRANSFORM_LIMIT_BITS 30
55 #define E8_TRANSFORM_DEAD_ZONE 10
57 struct LZXD_dec {
58 /* use byte pointers instead of uint16 for simplicity on uncompressed
59 * chunks, and the stream is not 16-bit aligned anyway */
60 const BYTE *stream_buf;
61 /* the next word to load into the bit cache */
62 const BYTE *src;
63 /* location of the next chunk size field */
64 const BYTE *chunk_end;
65 /* position in the output where the maximum allowed decompressed chunk size is reached */
66 size_t uncomp_chunk_end;
67 /* end of the input */
68 const BYTE *stream_end;
69 /* bit cache */
70 UINT32 bits;
71 /* number of unused bits in the cache starting from bit 0 */
72 unsigned bit_pos;
73 /* number of padding bits added trying to read at the chunk end */
74 unsigned tail_bits;
75 /* repeat matches */
76 size_t reps[REP_COUNT];
77 /* distance slot count is required for loading code lengths */
78 unsigned dist_slot_count;
79 /* huffman code lengths */
80 BYTE align_lengths[ALIGN_CODE_COUNT];
81 BYTE main_lengths[MAX_MAIN_CODES];
82 BYTE len_lengths[LEN_CODE_COUNT];
83 UINT16 align_table[ALIGN_TABLE_SIZE];
84 UINT16 main_table[MAIN_TABLE_SIZE];
85 UINT16 len_table[MAIN_TABLE_SIZE];
88 /* PA19 container format is unaligned, so the LZXD stream is not aligned either.
89 * None of this is super optimized but it's fast enough for installer work.
91 static inline UINT16 read_uint16(struct LZXD_dec *dec)
93 /* bounds check was done before calling */
94 UINT16 u = dec->src[0] | (dec->src[1] << 8);
95 dec->src += ELEM_SIZE;
96 return u;
99 /* load the next chunk size, reset bit_pos and set up limits
101 static int init_chunk(struct LZXD_dec *dec, size_t index, size_t buf_limit)
103 UINT32 chunk_size;
105 if (dec->src + ELEM_SIZE > dec->stream_end)
106 return -1;
108 /* error if tail padding bits were decoded as input */
109 if (dec->bit_pos < dec->tail_bits)
110 return -1;
112 chunk_size = read_uint16(dec);
114 dec->chunk_end = dec->src + chunk_size;
115 if (dec->chunk_end > dec->stream_end)
116 return -1;
118 dec->bit_pos = 0;
119 dec->tail_bits = 0;
121 dec->uncomp_chunk_end = min(buf_limit, index + MAX_CHUNK_UNCOMPRESSED_SIZE);
123 return 0;
126 /* ensure at least 17 bits are loaded but do not advance
128 static inline void prime_bits(struct LZXD_dec *dec)
130 while (dec->bit_pos < 17)
132 if (dec->src + ELEM_SIZE <= dec->chunk_end)
134 dec->bits = (dec->bits << 16) | read_uint16(dec);
136 else
138 /* Need to pad at the end of the chunk to decode the last codes.
139 In an error state, 0xFFFF sends the decoder down the right
140 side of the huffman tree to error out sooner. */
141 dec->bits = (dec->bits << 16) | 0xFFFF;
142 dec->tail_bits += 16;
144 dec->bit_pos += 16;
148 /* read and advance n bits
150 static inline UINT32 read_bits(struct LZXD_dec *dec, unsigned n)
152 UINT32 bits;
154 dec->bit_pos -= n;
155 bits = dec->bits >> dec->bit_pos;
156 bits &= ((1 << n) - 1);
158 while (dec->bit_pos < 17)
160 if (dec->src + ELEM_SIZE <= dec->chunk_end)
162 dec->bits = (dec->bits << 16) | read_uint16(dec);
164 else
166 /* tail padding */
167 dec->bits = (dec->bits << 16) | 0xFFFF;
168 dec->tail_bits += 16;
170 dec->bit_pos += 16;
173 return bits;
176 /* read n bits but do not advance
178 static inline UINT32 peek_bits(struct LZXD_dec *dec, unsigned n)
180 UINT32 bits = dec->bits >> (dec->bit_pos - n);
181 return bits & ((1 << n) - 1);
184 static inline void advance_bits(struct LZXD_dec *dec, unsigned length)
186 dec->bit_pos -= length;
187 prime_bits(dec);
190 /* read a 16-bit aligned UINT32
192 static UINT32 read_uint32(struct LZXD_dec *dec)
194 UINT32 u = 0;
195 unsigned n = 0;
197 assert((dec->bit_pos & 0xF) == 0);
199 while (dec->bit_pos)
201 dec->bit_pos -= 16;
202 u |= ((dec->bits >> dec->bit_pos) & 0xFFFF) << n;
203 n += 16;
205 while (n < 32 && dec->src + ELEM_SIZE <= dec->chunk_end)
207 u |= read_uint16(dec) << n;
208 n += 16;
211 return u;
214 static int make_huffman_codes(unsigned *codes, const BYTE *lengths, unsigned count)
216 unsigned len_count[MAX_CODE_LEN + 1];
217 unsigned next_code[MAX_CODE_LEN + 1];
218 unsigned i;
219 unsigned code = 0;
221 memset(len_count, 0, sizeof(len_count));
222 for (i = 0; i < count; ++i)
223 ++len_count[lengths[i]];
224 len_count[0] = 0;
226 for (i = 1; i <= MAX_CODE_LEN; ++i)
228 code = (code + len_count[i - 1]) << 1;
229 next_code[i] = code;
231 for (i = 0; i < count; i++)
233 unsigned len = lengths[i];
234 if (len)
236 /* test for bad code tree */
237 if (next_code[len] >= (1U << len))
238 return -1;
240 codes[i] = next_code[len];
241 ++next_code[len];
244 return 0;
247 void make_decode_table(UINT16 *table, const unsigned *codes,
248 const BYTE *lengths, unsigned max_len, unsigned count)
250 const size_t table_size = (size_t)1 << max_len;
251 size_t i;
253 for (i = 0; i < table_size; i++)
254 table[i] = HUFF_ERROR;
256 for (i = 0; i < count; i++) if (lengths[i])
258 BYTE diff = (BYTE)max_len - lengths[i];
259 size_t n = codes[i] << diff;
260 size_t end = n + ((size_t)1 << diff);
261 for (; n < end; ++n)
262 table[n] = (UINT16)i;
266 #define ret_if_failed(r_) do { int err_ = r_; if(err_) return err_; } while(0)
268 static int decode_lengths(struct LZXD_dec *dec,
269 BYTE *lengths, unsigned index, unsigned count)
271 unsigned codes[PRE_LEN_CODE_COUNT];
272 BYTE pre_lens[PRE_LEN_CODE_COUNT];
273 size_t i;
274 unsigned repeats = 1;
276 for (i = 0; i < PRE_LEN_CODE_COUNT; ++i)
277 pre_lens[i] = (BYTE)read_bits(dec, PRE_LEN_BITS);
279 ret_if_failed(make_huffman_codes(codes, pre_lens, PRE_LEN_CODE_COUNT));
280 make_decode_table(dec->main_table, codes, pre_lens, MAX_PRE_CODE_LEN, PRE_LEN_CODE_COUNT);
282 while (index < count)
284 UINT32 bits = peek_bits(dec, MAX_PRE_CODE_LEN);
285 UINT16 sym = dec->main_table[bits];
286 if (sym == HUFF_ERROR)
287 return -1;
288 advance_bits(dec, pre_lens[sym]);
290 if (sym < 17)
292 sym = (lengths[index] + 17 - sym) % 17;
295 lengths[index] = (BYTE)sym;
296 ++index;
297 --repeats;
298 } while (repeats && index < count);
300 repeats = 1;
302 else if (sym < 19)
304 unsigned zeros;
306 sym -= 13;
307 zeros = read_bits(dec, sym) + (1 << sym) - 12;
310 lengths[index] = 0;
311 ++index;
312 --zeros;
313 } while (zeros && index < count);
315 else
317 /* the repeat count applies to the next symbol */
318 repeats = 4 + read_bits(dec, 1);
321 return 0;
324 /* distance decoder for block_type == 1
326 static size_t decode_dist_verbatim(struct LZXD_dec *dec, unsigned dist_slot)
328 size_t dist;
329 unsigned footer_bits = 17;
331 if (dist_slot < 38)
333 footer_bits = (dist_slot >> 1) - 1;
334 dist = ((size_t)2 + (dist_slot & 1)) << footer_bits;
336 else
338 dist = ((size_t)1 << 19) + ((size_t)1 << 17) * (dist_slot - 38);
340 return dist + read_bits(dec, footer_bits);
343 /* distance decoder for block_type == 2
345 static size_t decode_dist_aligned(struct LZXD_dec *dec, unsigned dist_slot)
347 size_t dist;
348 unsigned footer_bits = 17;
350 if (dist_slot < 38)
352 footer_bits = (dist_slot >> 1) - 1;
353 dist = ((size_t)2 + (dist_slot & 1)) << footer_bits;
355 else
357 dist = ((size_t)1 << 19) + ((size_t)1 << 17) * (dist_slot - 38);
359 if (footer_bits >= 3)
361 UINT32 bits;
362 UINT16 sym;
364 footer_bits -= 3;
365 if (footer_bits)
366 dist += read_bits(dec, footer_bits) << 3;
368 bits = peek_bits(dec, MAX_ALIGN_CODE_LEN);
369 sym = dec->align_table[bits];
370 if (sym == HUFF_ERROR)
371 return ~(size_t)0;
372 advance_bits(dec, dec->align_lengths[sym]);
374 dist += sym;
376 else
378 dist += read_bits(dec, footer_bits);
380 return dist;
383 static inline void align_16_or_maybe_advance_anyway(struct LZXD_dec *dec)
385 dec->bit_pos &= 0x30;
386 /* The specification requires 16 bits of zero padding in some cases where the stream is already aligned, but
387 * the logic behind the choice to pad any particular block is undefined (it's a feature!). Fortunately it
388 * seems always to coincide with a bit_pos of 0x20, but 0x20 doesn't always mean padding, so we test for zero
389 * too. A remote chance of failure may still exist but I've never seen one occur. */
390 if (dec->bit_pos == 0x20 && (dec->bits >> 16) == 0)
391 dec->bit_pos = 0x10;
394 static int copy_uncompressed(struct LZXD_dec *dec, BYTE *base, size_t *index_ptr, size_t buf_limit, UINT32 block_size)
396 size_t index = *index_ptr;
397 size_t end = index + block_size;
398 size_t realign;
400 if (end > buf_limit)
401 return -1;
402 /* save the current alignment */
403 realign = (dec->src - dec->stream_buf) & 1;
405 while (dec->src < dec->stream_end)
407 /* now treat the input as an unaligned byte stream */
408 size_t to_copy = min(end - index, dec->uncomp_chunk_end - index);
409 to_copy = min(to_copy, (size_t)(dec->stream_end - dec->src));
411 memcpy(base + index, dec->src, to_copy);
412 index += to_copy;
413 dec->src += to_copy;
415 if (index == end)
417 /* realign at the end of the block */
418 dec->src += ((dec->src - dec->stream_buf) & 1) ^ realign;
419 /* fill the bit cache for block header decoding */
420 prime_bits(dec);
421 break;
423 /* chunk sizes are also unaligned */
424 ret_if_failed(init_chunk(dec, index, buf_limit));
426 *index_ptr = index;
427 return 0;
430 static int prime_next_chunk(struct LZXD_dec *dec, size_t index, size_t buf_limit)
432 if (dec->src < dec->chunk_end)
433 return -1;
434 ret_if_failed(init_chunk(dec, index, buf_limit));
435 prime_bits(dec);
436 return 0;
439 #define MAX_LONG_MATCH_CODE_LEN 3
440 #define LONG_MATCH_TABLE_SIZE (1 << MAX_LONG_MATCH_CODE_LEN)
442 struct long_match {
443 BYTE code_len;
444 unsigned extra_bits;
445 unsigned base;
448 static const struct long_match long_match_table[LONG_MATCH_TABLE_SIZE] = {
449 {1, 8, 0x101},
450 {1, 8, 0x101},
451 {1, 8, 0x101},
452 {1, 8, 0x101},
453 {2, 10, 0x201},
454 {2, 10, 0x201},
455 {3, 12, 0x601},
456 {3, 15, 0x101}
459 static int decode_lzxd_block(struct LZXD_dec *dec, BYTE *base, size_t predef_size, size_t *index_ptr, size_t buf_limit)
461 unsigned codes[MAX_MAIN_CODES];
462 unsigned main_code_count;
463 UINT32 block_type;
464 UINT32 block_size;
465 size_t i;
466 size_t block_limit;
467 size_t index = *index_ptr;
468 size_t (*dist_decoder)(struct LZXD_dec *dec, unsigned dist_slot);
470 if (index >= dec->uncomp_chunk_end && prime_next_chunk(dec, index, buf_limit))
471 return -1;
473 block_type = read_bits(dec, 3);
475 /* check for invalid block types */
476 if (block_type == 0 || block_type > 3)
477 return -1;
479 block_size = read_bits(dec, 8);
480 block_size = (block_size << 8) | read_bits(dec, 8);
481 block_size = (block_size << 8) | read_bits(dec, 8);
483 if (block_type == 3)
485 /* uncompressed block */
486 align_16_or_maybe_advance_anyway(dec);
487 /* must have run out of coffee at the office */
488 for (i = 0; i < REP_COUNT; ++i)
490 dec->reps[i] = read_uint32(dec);
491 if (dec->reps[i] == 0)
492 return -1;
494 /* copy the block to output */
495 return copy_uncompressed(dec, base, index_ptr, buf_limit, block_size);
497 else if (block_type == 2)
499 /* distance alignment decoder will be used */
500 for (i = 0; i < ALIGN_CODE_COUNT; ++i)
501 dec->align_lengths[i] = read_bits(dec, 3);
504 main_code_count = MAIN_CODE_COUNT(dec->dist_slot_count);
505 ret_if_failed(decode_lengths(dec, dec->main_lengths, 0, 256));
506 ret_if_failed(decode_lengths(dec, dec->main_lengths, 256, main_code_count));
507 ret_if_failed(decode_lengths(dec, dec->len_lengths, 0, LEN_CODE_COUNT));
509 dist_decoder = (block_type == 2) ? decode_dist_aligned : decode_dist_verbatim;
511 if (block_type == 2)
513 ret_if_failed(make_huffman_codes(codes, dec->align_lengths, ALIGN_CODE_COUNT));
514 make_decode_table(dec->align_table, codes, dec->align_lengths, MAX_ALIGN_CODE_LEN, ALIGN_CODE_COUNT);
517 ret_if_failed(make_huffman_codes(codes, dec->main_lengths, main_code_count));
518 make_decode_table(dec->main_table, codes, dec->main_lengths, MAX_CODE_LEN, main_code_count);
520 ret_if_failed(make_huffman_codes(codes, dec->len_lengths, LEN_CODE_COUNT));
521 make_decode_table(dec->len_table, codes, dec->len_lengths, MAX_CODE_LEN, LEN_CODE_COUNT);
523 block_limit = min(buf_limit, index + block_size);
525 while (index < block_limit)
527 UINT32 bits;
528 UINT16 sym;
530 if (index >= dec->uncomp_chunk_end && prime_next_chunk(dec, index, buf_limit))
531 return -1;
533 bits = peek_bits(dec, MAX_CODE_LEN);
534 sym = dec->main_table[bits];
535 if (sym == HUFF_ERROR)
536 return -1;
537 advance_bits(dec, dec->main_lengths[sym]);
539 if (sym < 256)
541 /* literal */
542 base[index] = (BYTE)sym;
543 ++index;
545 else {
546 size_t length;
547 size_t dist;
548 size_t end;
549 unsigned dist_slot;
551 sym -= 256;
552 length = (sym & 7) + 2;
553 dist_slot = sym >> 3;
555 if (length == 9)
557 /* extra length bits */
558 bits = peek_bits(dec, MAX_CODE_LEN);
559 sym = dec->len_table[bits];
560 if (sym == HUFF_ERROR)
561 return -1;
562 advance_bits(dec, dec->len_lengths[sym]);
564 length += sym;
566 dist = dist_slot;
567 if (dist_slot > 3)
569 /* extra distance bits */
570 dist = dist_decoder(dec, dist_slot);
571 if (dist == ~(size_t)0)
572 return -1;
574 if (length == 257)
576 /* extra-long match length */
577 bits = peek_bits(dec, MAX_LONG_MATCH_CODE_LEN);
578 advance_bits(dec, long_match_table[bits].code_len);
580 length = long_match_table[bits].base;
581 length += read_bits(dec, long_match_table[bits].extra_bits);
583 if (dist < 3)
585 /* repeat match */
586 size_t rep = dist;
587 dist = dec->reps[dist];
588 dec->reps[rep] = dec->reps[0];
590 else
592 dist -= REP_COUNT - 1;
593 dec->reps[2] = dec->reps[1];
594 dec->reps[1] = dec->reps[0];
596 dec->reps[0] = dist;
598 while (dist > index && length && index < block_limit)
600 /* undocumented: the encoder assumes an imaginary buffer
601 * of zeros exists before the start of the real buffer */
602 base[index] = 0;
603 ++index;
604 --length;
607 end = min(index + length, block_limit);
608 while (index < end)
610 base[index] = base[index - dist];
611 ++index;
615 /* error if tail padding bits were decoded as input */
616 if (dec->bit_pos < dec->tail_bits)
617 return -1;
619 *index_ptr = index;
620 return 0;
623 static void reverse_e8_transform(BYTE *decode_buf, ptrdiff_t len, ptrdiff_t e8_file_size)
625 ptrdiff_t limit = min((ptrdiff_t)1 << E8_TRANSFORM_LIMIT_BITS, len);
626 ptrdiff_t i;
628 for (i = 0; i < limit; )
630 ptrdiff_t end = min(i + MAX_CHUNK_UNCOMPRESSED_SIZE - E8_TRANSFORM_DEAD_ZONE,
631 limit - E8_TRANSFORM_DEAD_ZONE);
632 ptrdiff_t next = i + MAX_CHUNK_UNCOMPRESSED_SIZE;
634 for (; i < end; ++i)
636 if (decode_buf[i] == 0xE8)
638 ptrdiff_t delta;
639 ptrdiff_t value = (ptrdiff_t)decode_buf[i + 1] |
640 decode_buf[i + 2] << 8 |
641 decode_buf[i + 3] << 16 |
642 decode_buf[i + 4] << 24;
644 if (value >= -i && value < e8_file_size)
646 if (value >= 0)
647 delta = value - i;
648 else
649 delta = value + e8_file_size;
651 decode_buf[i + 1] = (BYTE)delta;
652 decode_buf[i + 2] = (BYTE)(delta >> 8);
653 decode_buf[i + 3] = (BYTE)(delta >> 16);
654 decode_buf[i + 4] = (BYTE)(delta >> 24);
656 i += 4;
659 i = next;
663 DWORD decode_lzxd_stream(const BYTE *src, const size_t input_size,
664 BYTE *dst, const size_t output_size,
665 const size_t predef_size,
666 DWORD large_window,
667 PPATCH_PROGRESS_CALLBACK progress_fn,
668 PVOID progress_ctx)
670 struct LZXD_dec *dec;
671 const BYTE *end = src + input_size;
672 size_t buf_size = predef_size + output_size;
673 UINT32 e8;
674 UINT32 e8_file_size = 0;
675 DWORD err = ERROR_SUCCESS;
677 TRACE("decoding stream of size %u to size %u, starting at %u\n",
678 (unsigned)input_size, (unsigned)output_size, (unsigned)predef_size);
680 if (input_size == 0)
681 return (output_size == 0) ? ERROR_SUCCESS : ERROR_PATCH_CORRUPT;
683 if (progress_fn != NULL && !progress_fn(progress_ctx, 0, (ULONG)output_size))
684 return ERROR_CANCELLED;
686 dec = heap_alloc(sizeof(*dec));
687 if (dec == NULL)
688 return ERROR_OUTOFMEMORY;
690 memset(dec->main_lengths, 0, sizeof(dec->main_lengths));
691 memset(dec->len_lengths, 0, sizeof(dec->len_lengths));
692 dec->reps[0] = 1;
693 dec->reps[1] = 1;
694 dec->reps[2] = 1;
696 /* apparently the window size is not recorded and must be deduced from the file sizes */
698 unsigned max_window = large_window ? MAX_LARGE_WINDOW : MAX_NORMAL_WINDOW;
699 size_t window = (size_t)1 << 17;
700 /* round up the old file size per the lzxd spec - correctness verified by fuzz tests */
701 size_t total = (predef_size + 0x7FFF) & ~0x7FFF;
702 unsigned delta;
704 total += output_size;
705 dec->dist_slot_count = 34;
706 while (window < total && window < ((size_t)1 << 19))
708 dec->dist_slot_count += 2;
709 window <<= 1;
711 delta = 4;
712 while (window < total && window < max_window)
714 dec->dist_slot_count += delta;
715 delta <<= 1;
716 window <<= 1;
718 TRACE("setting window to 0x%X\n", (unsigned)window);
721 dec->bit_pos = 0;
722 dec->tail_bits = 0;
723 dec->stream_buf = src;
724 dec->src = src;
725 dec->stream_end = end;
726 dec->chunk_end = dec->src;
728 /* load the first chunk size and set the end pointer */
729 if(init_chunk(dec, predef_size, buf_size))
731 err = ERROR_PATCH_DECODE_FAILURE;
732 goto free_dec;
735 /* fill the bit cache */
736 prime_bits(dec);
738 e8 = read_bits(dec, 1);
739 if (e8)
741 /* E8 transform was used */
742 e8_file_size = read_bits(dec, 16) << 16;
743 e8_file_size |= read_bits(dec, 16);
744 TRACE("E8 transform detected; file size %u\n", e8_file_size);
748 size_t index = predef_size;
749 while (dec->src < dec->stream_end && index < buf_size)
751 if (decode_lzxd_block(dec, dst, predef_size, &index, buf_size))
753 err = ERROR_PATCH_DECODE_FAILURE;
754 goto free_dec;
756 if (progress_fn != NULL && !progress_fn(progress_ctx, (ULONG)(index - predef_size), (ULONG)output_size))
758 err = ERROR_CANCELLED;
759 goto free_dec;
764 if (e8)
765 reverse_e8_transform(dst + predef_size, output_size, e8_file_size);
767 free_dec:
768 heap_free(dec);
770 return err;