4 * Copyright 2019 Conor McCarthy
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 * - Implememnt interleaved decoding
28 #include "wine/heap.h"
29 #include "wine/debug.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(mspatcha
);
39 #define MAX_CODE_LEN 16
40 #define MAX_ALIGN_CODE_LEN 7
41 #define PRE_LEN_BITS 4
42 #define MAX_PRE_CODE_LEN ((1 << PRE_LEN_BITS) - 1)
43 #define MAIN_TABLE_SIZE (1 << MAX_CODE_LEN)
44 #define ALIGN_TABLE_SIZE (1 << MAX_ALIGN_CODE_LEN)
45 #define HUFF_ERROR 0xFFFF
47 #define MAX_POS_SLOTS 290
48 #define ALIGN_CODE_COUNT 8
49 #define PRE_LEN_CODE_COUNT 20
50 #define MAIN_CODE_COUNT(slots) (256 + 8 * (slots))
51 #define MAX_MAIN_CODES MAIN_CODE_COUNT(MAX_POS_SLOTS)
52 #define LEN_CODE_COUNT 249
53 #define MAX_CHUNK_UNCOMPRESSED_SIZE 0x8000
54 #define E8_TRANSFORM_LIMIT_BITS 30
55 #define E8_TRANSFORM_DEAD_ZONE 10
58 /* use byte pointers instead of uint16 for simplicity on uncompressed
59 * chunks, and the stream is not 16-bit aligned anyway */
60 const BYTE
*stream_buf
;
61 /* the next word to load into the bit cache */
63 /* location of the next chunk size field */
64 const BYTE
*chunk_end
;
65 /* position in the output where the maximum allowed decompressed chunk size is reached */
66 size_t uncomp_chunk_end
;
67 /* end of the input */
68 const BYTE
*stream_end
;
71 /* number of unused bits in the cache starting from bit 0 */
73 /* number of padding bits added trying to read at the chunk end */
76 size_t reps
[REP_COUNT
];
77 /* distance slot count is required for loading code lengths */
78 unsigned dist_slot_count
;
79 /* huffman code lengths */
80 BYTE align_lengths
[ALIGN_CODE_COUNT
];
81 BYTE main_lengths
[MAX_MAIN_CODES
];
82 BYTE len_lengths
[LEN_CODE_COUNT
];
83 UINT16 align_table
[ALIGN_TABLE_SIZE
];
84 UINT16 main_table
[MAIN_TABLE_SIZE
];
85 UINT16 len_table
[MAIN_TABLE_SIZE
];
88 /* PA19 container format is unaligned, so the LZXD stream is not aligned either.
89 * None of this is super optimized but it's fast enough for installer work.
91 static inline UINT16
read_uint16(struct LZXD_dec
*dec
)
93 /* bounds check was done before calling */
94 UINT16 u
= dec
->src
[0] | (dec
->src
[1] << 8);
95 dec
->src
+= ELEM_SIZE
;
99 /* load the next chunk size, reset bit_pos and set up limits
101 static int init_chunk(struct LZXD_dec
*dec
, size_t index
, size_t buf_limit
)
105 if (dec
->src
+ ELEM_SIZE
> dec
->stream_end
)
108 /* error if tail padding bits were decoded as input */
109 if (dec
->bit_pos
< dec
->tail_bits
)
112 chunk_size
= read_uint16(dec
);
114 dec
->chunk_end
= dec
->src
+ chunk_size
;
115 if (dec
->chunk_end
> dec
->stream_end
)
121 dec
->uncomp_chunk_end
= min(buf_limit
, index
+ MAX_CHUNK_UNCOMPRESSED_SIZE
);
126 /* ensure at least 17 bits are loaded but do not advance
128 static inline void prime_bits(struct LZXD_dec
*dec
)
130 while (dec
->bit_pos
< 17)
132 if (dec
->src
+ ELEM_SIZE
<= dec
->chunk_end
)
134 dec
->bits
= (dec
->bits
<< 16) | read_uint16(dec
);
138 /* Need to pad at the end of the chunk to decode the last codes.
139 In an error state, 0xFFFF sends the decoder down the right
140 side of the huffman tree to error out sooner. */
141 dec
->bits
= (dec
->bits
<< 16) | 0xFFFF;
142 dec
->tail_bits
+= 16;
148 /* read and advance n bits
150 static inline UINT32
read_bits(struct LZXD_dec
*dec
, unsigned n
)
155 bits
= dec
->bits
>> dec
->bit_pos
;
156 bits
&= ((1 << n
) - 1);
158 while (dec
->bit_pos
< 17)
160 if (dec
->src
+ ELEM_SIZE
<= dec
->chunk_end
)
162 dec
->bits
= (dec
->bits
<< 16) | read_uint16(dec
);
167 dec
->bits
= (dec
->bits
<< 16) | 0xFFFF;
168 dec
->tail_bits
+= 16;
176 /* read n bits but do not advance
178 static inline UINT32
peek_bits(struct LZXD_dec
*dec
, unsigned n
)
180 UINT32 bits
= dec
->bits
>> (dec
->bit_pos
- n
);
181 return bits
& ((1 << n
) - 1);
184 static inline void advance_bits(struct LZXD_dec
*dec
, unsigned length
)
186 dec
->bit_pos
-= length
;
190 /* read a 16-bit aligned UINT32
192 static UINT32
read_uint32(struct LZXD_dec
*dec
)
197 assert((dec
->bit_pos
& 0xF) == 0);
202 u
|= ((dec
->bits
>> dec
->bit_pos
) & 0xFFFF) << n
;
205 while (n
< 32 && dec
->src
+ ELEM_SIZE
<= dec
->chunk_end
)
207 u
|= read_uint16(dec
) << n
;
214 static int make_huffman_codes(unsigned *codes
, const BYTE
*lengths
, unsigned count
)
216 unsigned len_count
[MAX_CODE_LEN
+ 1];
217 unsigned next_code
[MAX_CODE_LEN
+ 1];
221 memset(len_count
, 0, sizeof(len_count
));
222 for (i
= 0; i
< count
; ++i
)
223 ++len_count
[lengths
[i
]];
226 for (i
= 1; i
<= MAX_CODE_LEN
; ++i
)
228 code
= (code
+ len_count
[i
- 1]) << 1;
231 for (i
= 0; i
< count
; i
++)
233 unsigned len
= lengths
[i
];
236 /* test for bad code tree */
237 if (next_code
[len
] >= (1U << len
))
240 codes
[i
] = next_code
[len
];
247 void make_decode_table(UINT16
*table
, const unsigned *codes
,
248 const BYTE
*lengths
, unsigned max_len
, unsigned count
)
250 const size_t table_size
= (size_t)1 << max_len
;
253 for (i
= 0; i
< table_size
; i
++)
254 table
[i
] = HUFF_ERROR
;
256 for (i
= 0; i
< count
; i
++) if (lengths
[i
])
258 BYTE diff
= (BYTE
)max_len
- lengths
[i
];
259 size_t n
= codes
[i
] << diff
;
260 size_t end
= n
+ ((size_t)1 << diff
);
262 table
[n
] = (UINT16
)i
;
266 #define ret_if_failed(r_) do { int err_ = r_; if(err_) return err_; } while(0)
268 static int decode_lengths(struct LZXD_dec
*dec
,
269 BYTE
*lengths
, unsigned index
, unsigned count
)
271 unsigned codes
[PRE_LEN_CODE_COUNT
];
272 BYTE pre_lens
[PRE_LEN_CODE_COUNT
];
274 unsigned repeats
= 1;
276 for (i
= 0; i
< PRE_LEN_CODE_COUNT
; ++i
)
277 pre_lens
[i
] = (BYTE
)read_bits(dec
, PRE_LEN_BITS
);
279 ret_if_failed(make_huffman_codes(codes
, pre_lens
, PRE_LEN_CODE_COUNT
));
280 make_decode_table(dec
->main_table
, codes
, pre_lens
, MAX_PRE_CODE_LEN
, PRE_LEN_CODE_COUNT
);
282 while (index
< count
)
284 UINT32 bits
= peek_bits(dec
, MAX_PRE_CODE_LEN
);
285 UINT16 sym
= dec
->main_table
[bits
];
286 if (sym
== HUFF_ERROR
)
288 advance_bits(dec
, pre_lens
[sym
]);
292 sym
= (lengths
[index
] + 17 - sym
) % 17;
295 lengths
[index
] = (BYTE
)sym
;
298 } while (repeats
&& index
< count
);
307 zeros
= read_bits(dec
, sym
) + (1 << sym
) - 12;
313 } while (zeros
&& index
< count
);
317 /* the repeat count applies to the next symbol */
318 repeats
= 4 + read_bits(dec
, 1);
324 /* distance decoder for block_type == 1
326 static size_t decode_dist_verbatim(struct LZXD_dec
*dec
, unsigned dist_slot
)
329 unsigned footer_bits
= 17;
333 footer_bits
= (dist_slot
>> 1) - 1;
334 dist
= ((size_t)2 + (dist_slot
& 1)) << footer_bits
;
338 dist
= ((size_t)1 << 19) + ((size_t)1 << 17) * (dist_slot
- 38);
340 return dist
+ read_bits(dec
, footer_bits
);
343 /* distance decoder for block_type == 2
345 static size_t decode_dist_aligned(struct LZXD_dec
*dec
, unsigned dist_slot
)
348 unsigned footer_bits
= 17;
352 footer_bits
= (dist_slot
>> 1) - 1;
353 dist
= ((size_t)2 + (dist_slot
& 1)) << footer_bits
;
357 dist
= ((size_t)1 << 19) + ((size_t)1 << 17) * (dist_slot
- 38);
359 if (footer_bits
>= 3)
366 dist
+= read_bits(dec
, footer_bits
) << 3;
368 bits
= peek_bits(dec
, MAX_ALIGN_CODE_LEN
);
369 sym
= dec
->align_table
[bits
];
370 if (sym
== HUFF_ERROR
)
372 advance_bits(dec
, dec
->align_lengths
[sym
]);
378 dist
+= read_bits(dec
, footer_bits
);
383 static inline void align_16_or_maybe_advance_anyway(struct LZXD_dec
*dec
)
385 dec
->bit_pos
&= 0x30;
386 /* The specification requires 16 bits of zero padding in some cases where the stream is already aligned, but
387 * the logic behind the choice to pad any particular block is undefined (it's a feature!). Fortunately it
388 * seems always to coincide with a bit_pos of 0x20, but 0x20 doesn't always mean padding, so we test for zero
389 * too. A remote chance of failure may still exist but I've never seen one occur. */
390 if (dec
->bit_pos
== 0x20 && (dec
->bits
>> 16) == 0)
394 static int copy_uncompressed(struct LZXD_dec
*dec
, BYTE
*base
, size_t *index_ptr
, size_t buf_limit
, UINT32 block_size
)
396 size_t index
= *index_ptr
;
397 size_t end
= index
+ block_size
;
402 /* save the current alignment */
403 realign
= (dec
->src
- dec
->stream_buf
) & 1;
405 while (dec
->src
< dec
->stream_end
)
407 /* now treat the input as an unaligned byte stream */
408 size_t to_copy
= min(end
- index
, dec
->uncomp_chunk_end
- index
);
409 to_copy
= min(to_copy
, (size_t)(dec
->stream_end
- dec
->src
));
411 memcpy(base
+ index
, dec
->src
, to_copy
);
417 /* realign at the end of the block */
418 dec
->src
+= ((dec
->src
- dec
->stream_buf
) & 1) ^ realign
;
419 /* fill the bit cache for block header decoding */
423 /* chunk sizes are also unaligned */
424 ret_if_failed(init_chunk(dec
, index
, buf_limit
));
430 static int prime_next_chunk(struct LZXD_dec
*dec
, size_t index
, size_t buf_limit
)
432 if (dec
->src
< dec
->chunk_end
)
434 ret_if_failed(init_chunk(dec
, index
, buf_limit
));
439 #define MAX_LONG_MATCH_CODE_LEN 3
440 #define LONG_MATCH_TABLE_SIZE (1 << MAX_LONG_MATCH_CODE_LEN)
448 static const struct long_match long_match_table
[LONG_MATCH_TABLE_SIZE
] = {
459 static int decode_lzxd_block(struct LZXD_dec
*dec
, BYTE
*base
, size_t predef_size
, size_t *index_ptr
, size_t buf_limit
)
461 unsigned codes
[MAX_MAIN_CODES
];
462 unsigned main_code_count
;
467 size_t index
= *index_ptr
;
468 size_t (*dist_decoder
)(struct LZXD_dec
*dec
, unsigned dist_slot
);
470 if (index
>= dec
->uncomp_chunk_end
&& prime_next_chunk(dec
, index
, buf_limit
))
473 block_type
= read_bits(dec
, 3);
475 /* check for invalid block types */
476 if (block_type
== 0 || block_type
> 3)
479 block_size
= read_bits(dec
, 8);
480 block_size
= (block_size
<< 8) | read_bits(dec
, 8);
481 block_size
= (block_size
<< 8) | read_bits(dec
, 8);
485 /* uncompressed block */
486 align_16_or_maybe_advance_anyway(dec
);
487 /* must have run out of coffee at the office */
488 for (i
= 0; i
< REP_COUNT
; ++i
)
490 dec
->reps
[i
] = read_uint32(dec
);
491 if (dec
->reps
[i
] == 0)
494 /* copy the block to output */
495 return copy_uncompressed(dec
, base
, index_ptr
, buf_limit
, block_size
);
497 else if (block_type
== 2)
499 /* distance alignment decoder will be used */
500 for (i
= 0; i
< ALIGN_CODE_COUNT
; ++i
)
501 dec
->align_lengths
[i
] = read_bits(dec
, 3);
504 main_code_count
= MAIN_CODE_COUNT(dec
->dist_slot_count
);
505 ret_if_failed(decode_lengths(dec
, dec
->main_lengths
, 0, 256));
506 ret_if_failed(decode_lengths(dec
, dec
->main_lengths
, 256, main_code_count
));
507 ret_if_failed(decode_lengths(dec
, dec
->len_lengths
, 0, LEN_CODE_COUNT
));
509 dist_decoder
= (block_type
== 2) ? decode_dist_aligned
: decode_dist_verbatim
;
513 ret_if_failed(make_huffman_codes(codes
, dec
->align_lengths
, ALIGN_CODE_COUNT
));
514 make_decode_table(dec
->align_table
, codes
, dec
->align_lengths
, MAX_ALIGN_CODE_LEN
, ALIGN_CODE_COUNT
);
517 ret_if_failed(make_huffman_codes(codes
, dec
->main_lengths
, main_code_count
));
518 make_decode_table(dec
->main_table
, codes
, dec
->main_lengths
, MAX_CODE_LEN
, main_code_count
);
520 ret_if_failed(make_huffman_codes(codes
, dec
->len_lengths
, LEN_CODE_COUNT
));
521 make_decode_table(dec
->len_table
, codes
, dec
->len_lengths
, MAX_CODE_LEN
, LEN_CODE_COUNT
);
523 block_limit
= min(buf_limit
, index
+ block_size
);
525 while (index
< block_limit
)
530 if (index
>= dec
->uncomp_chunk_end
&& prime_next_chunk(dec
, index
, buf_limit
))
533 bits
= peek_bits(dec
, MAX_CODE_LEN
);
534 sym
= dec
->main_table
[bits
];
535 if (sym
== HUFF_ERROR
)
537 advance_bits(dec
, dec
->main_lengths
[sym
]);
542 base
[index
] = (BYTE
)sym
;
552 length
= (sym
& 7) + 2;
553 dist_slot
= sym
>> 3;
557 /* extra length bits */
558 bits
= peek_bits(dec
, MAX_CODE_LEN
);
559 sym
= dec
->len_table
[bits
];
560 if (sym
== HUFF_ERROR
)
562 advance_bits(dec
, dec
->len_lengths
[sym
]);
569 /* extra distance bits */
570 dist
= dist_decoder(dec
, dist_slot
);
571 if (dist
== ~(size_t)0)
576 /* extra-long match length */
577 bits
= peek_bits(dec
, MAX_LONG_MATCH_CODE_LEN
);
578 advance_bits(dec
, long_match_table
[bits
].code_len
);
580 length
= long_match_table
[bits
].base
;
581 length
+= read_bits(dec
, long_match_table
[bits
].extra_bits
);
587 dist
= dec
->reps
[dist
];
588 dec
->reps
[rep
] = dec
->reps
[0];
592 dist
-= REP_COUNT
- 1;
593 dec
->reps
[2] = dec
->reps
[1];
594 dec
->reps
[1] = dec
->reps
[0];
598 while (dist
> index
&& length
&& index
< block_limit
)
600 /* undocumented: the encoder assumes an imaginary buffer
601 * of zeros exists before the start of the real buffer */
607 end
= min(index
+ length
, block_limit
);
610 base
[index
] = base
[index
- dist
];
615 /* error if tail padding bits were decoded as input */
616 if (dec
->bit_pos
< dec
->tail_bits
)
623 static void reverse_e8_transform(BYTE
*decode_buf
, ptrdiff_t len
, ptrdiff_t e8_file_size
)
625 ptrdiff_t limit
= min((ptrdiff_t)1 << E8_TRANSFORM_LIMIT_BITS
, len
);
628 for (i
= 0; i
< limit
; )
630 ptrdiff_t end
= min(i
+ MAX_CHUNK_UNCOMPRESSED_SIZE
- E8_TRANSFORM_DEAD_ZONE
,
631 limit
- E8_TRANSFORM_DEAD_ZONE
);
632 ptrdiff_t next
= i
+ MAX_CHUNK_UNCOMPRESSED_SIZE
;
636 if (decode_buf
[i
] == 0xE8)
639 ptrdiff_t value
= (ptrdiff_t)decode_buf
[i
+ 1] |
640 decode_buf
[i
+ 2] << 8 |
641 decode_buf
[i
+ 3] << 16 |
642 decode_buf
[i
+ 4] << 24;
644 if (value
>= -i
&& value
< e8_file_size
)
649 delta
= value
+ e8_file_size
;
651 decode_buf
[i
+ 1] = (BYTE
)delta
;
652 decode_buf
[i
+ 2] = (BYTE
)(delta
>> 8);
653 decode_buf
[i
+ 3] = (BYTE
)(delta
>> 16);
654 decode_buf
[i
+ 4] = (BYTE
)(delta
>> 24);
663 DWORD
decode_lzxd_stream(const BYTE
*src
, const size_t input_size
,
664 BYTE
*dst
, const size_t output_size
,
665 const size_t predef_size
,
667 PPATCH_PROGRESS_CALLBACK progress_fn
,
670 struct LZXD_dec
*dec
;
671 const BYTE
*end
= src
+ input_size
;
672 size_t buf_size
= predef_size
+ output_size
;
674 UINT32 e8_file_size
= 0;
675 DWORD err
= ERROR_SUCCESS
;
677 TRACE("decoding stream of size %u to size %u, starting at %u\n",
678 (unsigned)input_size
, (unsigned)output_size
, (unsigned)predef_size
);
681 return (output_size
== 0) ? ERROR_SUCCESS
: ERROR_PATCH_CORRUPT
;
683 if (progress_fn
!= NULL
&& !progress_fn(progress_ctx
, 0, (ULONG
)output_size
))
684 return ERROR_CANCELLED
;
686 dec
= heap_alloc(sizeof(*dec
));
688 return ERROR_OUTOFMEMORY
;
690 memset(dec
->main_lengths
, 0, sizeof(dec
->main_lengths
));
691 memset(dec
->len_lengths
, 0, sizeof(dec
->len_lengths
));
696 /* apparently the window size is not recorded and must be deduced from the file sizes */
698 unsigned max_window
= large_window
? MAX_LARGE_WINDOW
: MAX_NORMAL_WINDOW
;
699 size_t window
= (size_t)1 << 17;
700 /* round up the old file size per the lzxd spec - correctness verified by fuzz tests */
701 size_t total
= (predef_size
+ 0x7FFF) & ~0x7FFF;
704 total
+= output_size
;
705 dec
->dist_slot_count
= 34;
706 while (window
< total
&& window
< ((size_t)1 << 19))
708 dec
->dist_slot_count
+= 2;
712 while (window
< total
&& window
< max_window
)
714 dec
->dist_slot_count
+= delta
;
718 TRACE("setting window to 0x%X\n", (unsigned)window
);
723 dec
->stream_buf
= src
;
725 dec
->stream_end
= end
;
726 dec
->chunk_end
= dec
->src
;
728 /* load the first chunk size and set the end pointer */
729 if(init_chunk(dec
, predef_size
, buf_size
))
731 err
= ERROR_PATCH_DECODE_FAILURE
;
735 /* fill the bit cache */
738 e8
= read_bits(dec
, 1);
741 /* E8 transform was used */
742 e8_file_size
= read_bits(dec
, 16) << 16;
743 e8_file_size
|= read_bits(dec
, 16);
744 TRACE("E8 transform detected; file size %u\n", e8_file_size
);
748 size_t index
= predef_size
;
749 while (dec
->src
< dec
->stream_end
&& index
< buf_size
)
751 if (decode_lzxd_block(dec
, dst
, predef_size
, &index
, buf_size
))
753 err
= ERROR_PATCH_DECODE_FAILURE
;
756 if (progress_fn
!= NULL
&& !progress_fn(progress_ctx
, (ULONG
)(index
- predef_size
), (ULONG
)output_size
))
758 err
= ERROR_CANCELLED
;
765 reverse_e8_transform(dst
+ predef_size
, output_size
, e8_file_size
);