1 // See the file readme-lzhuf.txt for more information about this file.
2 // Modifications for Deark are Copyright (C) 2021 Jason Summers, and have the
3 // same terms of use as the main part of Deark.
4 // Alternatively, at your option, the modifications for Deark may be treated
7 // Intro from the original software:
9 /**************************************************************
11 written by Haruyasu Yoshizaki 11/20/1988
12 some minor changes 4/6/1989
13 comments translated by Haruhiko Okumura 4/7/1989
14 **************************************************************/
16 // Max of #literals + #special_codes + #match_lengths.
17 // Must be at least 256+59 (lh1:58, CRLZH:59)
18 #define LZHUF_MAX_N_CHAR (256 + 60)
20 #define LZHUF_MAX_T (LZHUF_MAX_N_CHAR * 2 - 1) /* size of table */
21 #define LZHUF_MAX_FREQ 0x8000 /* updates tree when the */
22 /* root frequency comes to this value. */
28 struct de_lh1_params lh1p
;
29 struct de_dfilter_ctx
*dfctx
;
30 struct de_dfilter_out_params
*dcmpro
; // same as dfctx->dcmpro
31 struct de_dfilter_results
*dres
; // same as dfctx->dres
35 UI lzhuf_N_CHAR
; /* kinds of characters (character code = 0..N_CHAR-1) */
37 UI lzhuf_R
; /* position of root */ /* (LZHUF_T - 1) */
39 UI dpparam_dcode_shift
;
44 i64 total_nbytes_processed
;
47 struct de_lz77buffer
*ringbuf
;
48 struct de_bitbuf_lowlevel bbll
;
50 u16 freq
[LZHUF_MAX_T
+ 1]; /* frequency table */
52 u16 prnt
[LZHUF_MAX_T
+ LZHUF_MAX_N_CHAR
]; /* pointers to parent nodes, except for the */
53 /* elements [T..T + N_CHAR - 1] which are used to get */
54 /* the positions of leaves corresponding to the codes. */
56 u16 son
[LZHUF_MAX_T
]; /* pointers to child nodes (son[], son[] + 1) */
59 // These getters/setters are ugly, but it's too difficult for me to follow the
60 // dancing variables in the lzhuf code, and convince myself that there are no
62 // Assuming the code is safe, these functions can be replaced by simple macros.
64 static u16
get_freq(struct lzahuf_ctx
*cctx
, UI idx
)
66 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->freq
)) return cctx
->freq
[idx
];
71 static void set_freq(struct lzahuf_ctx
*cctx
, UI idx
, u16 val
)
73 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->freq
)) cctx
->freq
[idx
] = val
;
74 else cctx
->errflag
= 1;
77 static u16
get_son(struct lzahuf_ctx
*cctx
, UI idx
)
79 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->son
)) return cctx
->son
[idx
];
84 static void set_son(struct lzahuf_ctx
*cctx
, UI idx
, u16 val
)
86 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->son
)) cctx
->son
[idx
] = val
;
87 else cctx
->errflag
= 1;
90 static u16
get_prnt(struct lzahuf_ctx
*cctx
, UI idx
)
92 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->prnt
)) return cctx
->prnt
[idx
];
97 static void set_prnt(struct lzahuf_ctx
*cctx
, UI idx
, u16 val
)
99 if(idx
< (UI
)DE_ARRAYCOUNT(cctx
->prnt
)) cctx
->prnt
[idx
] = val
;
100 else cctx
->errflag
= 1;
103 static UI
lzhuf_getbits(struct lzahuf_ctx
*cctx
, UI nbits
)
105 if(cctx
->bbll
.nbits_in_bitbuf
< nbits
) {
109 return (UI
)de_bitbuf_lowlevel_get_bits(&cctx
->bbll
, nbits
);
112 /* initialization of tree */
114 static void lzhuf_StartHuff(struct lzahuf_ctx
*cctx
)
118 for (i
= 0; i
< cctx
->lzhuf_N_CHAR
; i
++) {
119 set_freq(cctx
, i
, 1);
120 set_son(cctx
, i
, i
+ cctx
->lzhuf_T
);
121 set_prnt(cctx
, i
+ cctx
->lzhuf_T
, i
);
124 j
= cctx
->lzhuf_N_CHAR
;
125 while (j
<= cctx
->lzhuf_R
) {
126 set_freq(cctx
, j
, get_freq(cctx
, i
) + get_freq(cctx
, i
+ 1));
128 set_prnt(cctx
, i
, j
);
129 set_prnt(cctx
, i
+ 1, j
);
133 set_freq(cctx
, cctx
->lzhuf_T
, 0xffff);
134 set_prnt(cctx
, cctx
->lzhuf_R
, 0);
138 /* reconstruction of tree */
140 static void lzhuf_reconst(struct lzahuf_ctx
*cctx
)
145 /* collect leaf nodes in the first half of the table */
146 /* and replace the freq by (freq + 1) / 2. */
148 for (i
= 0; i
< cctx
->lzhuf_T
; i
++) {
149 if (get_son(cctx
, i
) >= cctx
->lzhuf_T
) {
150 set_freq(cctx
, j
, (get_freq(cctx
, i
) + 1) / 2);
151 set_son(cctx
, j
, get_son(cctx
, i
));
155 /* begin constructing tree by connecting sons */
156 for (i
= 0, j
= cctx
->lzhuf_N_CHAR
; j
< cctx
->lzhuf_T
; i
+= 2, j
++) {
158 set_freq(cctx
, j
, get_freq(cctx
, i
) + get_freq(cctx
, k
));
159 f
= get_freq(cctx
, j
);
162 while(f
< get_freq(cctx
, k
)) {
169 // son[] is smaller than freq[], so bounds check uses son[].
170 if(l
> (UI
)DE_ARRAYCOUNT(cctx
->son
) ||
171 k
+1+l
> (UI
)DE_ARRAYCOUNT(cctx
->son
))
177 de_memmove(&cctx
->freq
[k
+ 1], &cctx
->freq
[k
], l
*sizeof(cctx
->freq
[0]));
178 set_freq(cctx
, k
, f
);
179 de_memmove(&cctx
->son
[k
+ 1], &cctx
->son
[k
], l
*sizeof(cctx
->son
[0]));
183 for (i
= 0; i
< cctx
->lzhuf_T
; i
++) {
184 k
= get_son(cctx
, i
);
185 if (k
>= cctx
->lzhuf_T
) {
186 set_prnt(cctx
, k
, i
);
188 set_prnt(cctx
, k
, i
);
189 set_prnt(cctx
, k
+ 1, i
);
195 /* increment frequency of given code by one, and update tree */
197 static void lzhuf_update(struct lzahuf_ctx
*cctx
, UI c
)
204 r_freq
= get_freq(cctx
, cctx
->lzhuf_R
);
205 if(r_freq
> LZHUF_MAX_FREQ
) {
209 if (r_freq
== LZHUF_MAX_FREQ
) {
211 if(cctx
->errflag
) return;
213 c
= get_prnt(cctx
, c
+ cctx
->lzhuf_T
);
215 if(counter
> (UI
)DE_ARRAYCOUNT(cctx
->prnt
)) { // infinite loop?
221 set_freq(cctx
, c
, get_freq(cctx
, c
)+1);
222 k
= get_freq(cctx
, c
);
224 /* if the order is disturbed, exchange nodes */
226 if (k
> get_freq(cctx
, l
)) {
230 } while(k
> get_freq(cctx
, l
));
233 set_freq(cctx
, c
, get_freq(cctx
, l
));
234 set_freq(cctx
, l
, k
);
236 i
= get_son(cctx
, c
);
237 set_prnt(cctx
, i
, l
);
238 if (i
< cctx
->lzhuf_T
) set_prnt(cctx
, i
+ 1, l
);
240 j
= get_son(cctx
, l
);
243 set_prnt(cctx
, j
, c
);
244 if (j
< cctx
->lzhuf_T
) set_prnt(cctx
, j
+ 1, c
);
249 c
= get_prnt(cctx
, c
);
250 } while (c
!= 0); /* repeat up to root */
253 static UI
lzhuf_DecodeChar(struct lzahuf_ctx
*cctx
)
257 c
= get_son(cctx
, cctx
->lzhuf_R
);
259 /* travel from root to leaf, */
260 /* choosing the smaller child node (son[]) if the read bit is 0, */
261 /* the bigger (son[]+1) if 1 */
262 while (c
< cctx
->lzhuf_T
) {
263 c
+= lzhuf_getbits(cctx
, 1);
264 // There will never be more than 64 bits available, so a hypothetical
265 // infinite loop would be caught here.
266 if(cctx
->errflag
) return 0;
267 c
= get_son(cctx
, c
);
270 lzhuf_update(cctx
, c
);
274 static UI
lzhuf_DecodePosition(struct lzahuf_ctx
*cctx
)
279 /* recover upper bits from table */
280 i
= lzhuf_getbits(cctx
, 8);
281 if(cctx
->errflag
) return 0;
282 fmtutil_get_lzhuf_d_code_and_len(i
, &d_code
, &d_len
);
283 c
= d_code
<< cctx
->dpparam_dcode_shift
;
285 /* read lower bits verbatim */
286 j
= d_len
- cctx
->dpparam_dlen_bias
;
287 i
= (i
<<j
) | lzhuf_getbits(cctx
, j
);
288 if(cctx
->errflag
) return 0;
289 i
&= cctx
->dpparam_mask
;
293 static int lzah_have_enough_output(struct lzahuf_ctx
*cctx
)
295 if(cctx
->dcmpro
->len_known
) {
296 if(cctx
->nbytes_written
>= cctx
->dcmpro
->expected_len
) {
297 cctx
->dfctx
->finished_flag
= 1;
304 static void lzah_lz77buf_writebytecb(struct de_lz77buffer
*rb
, u8 n
)
306 struct lzahuf_ctx
*cctx
= (struct lzahuf_ctx
*)rb
->userdata
;
308 if(lzah_have_enough_output(cctx
)) {
311 dbuf_writebyte(cctx
->dcmpro
->f
, n
);
312 cctx
->nbytes_written
++;
315 static void lzhuf_decodesubtree_nodepair(struct lzahuf_ctx
*cctx
, UI p1
, u64 val
, UI val_nbits
,
316 char *b2buf
, size_t b2buf_len
);
318 static void lzhuf_decodesubtree_node(struct lzahuf_ctx
*cctx
, UI p1
, u64 val
, UI val_nbits
,
319 char *b2buf
, size_t b2buf_len
)
321 if(cctx
->son
[p1
] < cctx
->lzhuf_T
) { // [pointer node]
322 lzhuf_decodesubtree_nodepair(cctx
, cctx
->son
[p1
], val
, val_nbits
, b2buf
, b2buf_len
);
324 else { // [leaf node]
325 de_dbg(cctx
->c
, "code: \"%s\" = %d [%u]",
326 de_print_base2_fixed(b2buf
, b2buf_len
, val
, val_nbits
),
327 (int)cctx
->son
[p1
], (UI
)(cctx
->son
[p1
]-cctx
->lzhuf_T
));
331 // Interpret son[p1] and son[p1+1]
332 static void lzhuf_decodesubtree_nodepair(struct lzahuf_ctx
*cctx
, UI p1
, u64 val
, UI val_nbits
,
333 char *b2buf
, size_t b2buf_len
)
335 if(p1
>= cctx
->lzhuf_T
) return; // error
336 lzhuf_decodesubtree_node(cctx
, p1
, (val
<<1), val_nbits
+1, b2buf
, b2buf_len
);
337 lzhuf_decodesubtree_node(cctx
, p1
+1, (val
<<1)|1, val_nbits
+1, b2buf
, b2buf_len
);
340 static void lzhuf_dumptree(struct lzahuf_ctx
*cctx
)
345 de_dbg(cctx
->c
, "R: %u", (UI
)cctx
->lzhuf_R
);
346 de_dbg(cctx
->c
, "T: %u", (UI
)cctx
->lzhuf_T
);
347 lzhuf_decodesubtree_node(cctx
, cctx
->lzhuf_R
, 0, 0, b2buf
, sizeof(b2buf
));
348 for(i
=0; i
<DE_ARRAYCOUNT(cctx
->son
); i
++) {
349 de_dbg(cctx
->c
, "son[%u]: %u", i
, (UI
)cctx
->son
[i
]);
353 static void lzhuf_Decode_init(struct lzahuf_ctx
*cctx
)
355 UI rb_size
; // LZHUF_N
357 // Defaults, for standard LHarc -lh1-:
358 // codes 0-255 = literals 0x00-0xff
359 // codes 256-313 = match lengths 3-60
360 cctx
->num_special_codes
= 0;
361 cctx
->num_length_codes
= 58;
362 cctx
->match_length_bias
= 253;
363 cctx
->dpparam_dcode_shift
= 6;
364 cctx
->dpparam_dlen_bias
= 2;
365 cctx
->dpparam_mask
= 0x3f;
368 if(cctx
->lh1p
.is_crlzh11
|| cctx
->lh1p
.is_crlzh20
) {
369 // codes 0-255 = literals 0x00-0xff
371 // codes 257-314 = match lengths 3-60
372 cctx
->num_special_codes
= 1;
373 cctx
->num_length_codes
= 58;
374 cctx
->match_length_bias
= 254;
376 if(cctx
->lh1p
.is_crlzh20
) {
377 cctx
->dpparam_dcode_shift
= 5;
378 cctx
->dpparam_dlen_bias
= 3;
379 cctx
->dpparam_mask
= 0x1f;
382 else if(cctx
->lh1p
.is_arc_trimmed
) {
383 // codes 0-255 = literals 0x00-0xff
385 // codes 257-313 = match lengths 3-59
386 cctx
->num_special_codes
= 1;
387 cctx
->num_length_codes
= 57;
388 cctx
->match_length_bias
= 254;
390 else if(cctx
->lh1p
.is_dms_deep
) {
392 cctx
->dpparam_dcode_shift
= 8;
393 cctx
->dpparam_dlen_bias
= 0;
394 cctx
->dpparam_mask
= 0xff;
397 cctx
->lzhuf_N_CHAR
= 256 + cctx
->num_special_codes
+ cctx
->num_length_codes
;
398 if(cctx
->lzhuf_N_CHAR
> LZHUF_MAX_N_CHAR
) {
402 cctx
->lzhuf_T
= cctx
->lzhuf_N_CHAR
* 2 - 1;
403 cctx
->lzhuf_R
= cctx
->lzhuf_T
- 1;
405 cctx
->ringbuf
= de_lz77buffer_create(cctx
->c
, rb_size
);
406 cctx
->ringbuf
->userdata
= (void*)cctx
;
407 cctx
->ringbuf
->writebyte_cb
= lzah_lz77buf_writebytecb
;
408 if(cctx
->lh1p
.history_fill_val
!=0) {
409 de_lz77buffer_clear(cctx
->ringbuf
, cctx
->lh1p
.history_fill_val
);
412 cctx
->bbll
.is_lsb
= 0;
413 de_bitbuf_lowlevel_empty(&cctx
->bbll
);
415 lzhuf_StartHuff(cctx
);
418 lzhuf_dumptree(cctx
);
425 static void lzhuf_Decode_continue(struct lzahuf_ctx
*cctx
, const u8
*buf
, i64 buf_len
, int flush
)
432 if(cctx
->dfctx
->finished_flag
) goto done
;
435 if(cctx
->errflag
) goto done
;
436 if(lzah_have_enough_output(cctx
)) {
440 // We're relying on the assumption that a compression instruction can
441 // never be more than 57 bits in size.
442 // The Huffman-encoded "character" maxes out, I think, at around 15 or
443 // 16 bits -- this follows indirectly from MAX_FREQ, which reduces the
444 // tree depth perdiodically.
445 // For a "match" instruction, an 8-bit code is then read.
446 // Then, some additional bits are read -- the most possible for the
447 // supported formats is 8, for DMS-Deep.
448 // So, about 16+8+8 = 32 bits should be sufficient.
450 // Top off the bitbuf, if possible.
451 while(bufpos
<buf_len
&& cctx
->bbll
.nbits_in_bitbuf
<=(64-8)) {
452 de_bitbuf_lowlevel_add_byte(&cctx
->bbll
, buf
[bufpos
++]);
453 cctx
->total_nbytes_processed
++;
456 if(!flush
&& cctx
->bbll
.nbits_in_bitbuf
<=(64-8)) {
457 // Wait for more data before trying to continue
461 if(cctx
->c
->debug_level
>=4) {
462 de_bitbuf_describe_curpos(&cctx
->bbll
,
463 cctx
->dfctx
->input_file_offset
+cctx
->total_nbytes_processed
,
464 pos_descr
, sizeof(pos_descr
));
467 c
= lzhuf_DecodeChar(cctx
);
468 if(cctx
->errflag
) goto done
;
470 if(c
==256 && cctx
->num_special_codes
>=1) {
471 if(cctx
->c
->debug_level
>=4) {
472 de_dbg(cctx
->c
, "special @%s =%u", pos_descr
, c
);
474 cctx
->dfctx
->finished_flag
= 1;
478 if(cctx
->c
->debug_level
>=4) {
479 de_dbg(cctx
->c
, "lit @%s =%u", pos_descr
, c
);
481 de_lz77buffer_add_literal_byte(cctx
->ringbuf
, (u8
)c
);
484 // i is the distance back
485 i
= lzhuf_DecodePosition(cctx
);
486 if(cctx
->errflag
) goto done
;
488 // j is the match length
489 j
= c
- cctx
->match_length_bias
;
491 if(cctx
->c
->debug_level
>=4) {
492 de_dbg(cctx
->c
, "match @%s dist=%u len=%u", pos_descr
, i
, j
);
495 de_lz77buffer_copy_from_hist(cctx
->ringbuf
,
496 (UI
)(cctx
->ringbuf
->curpos
- (UI
)i
- 1), j
);
501 if(!cctx
->dfctx
->finished_flag
&& !cctx
->errflag
) {
503 // It shouldn't be possible to get here. If we do, it means some input
504 // bytes will be lost.
505 de_dfilter_set_generic_error(cctx
->dfctx
->c
, cctx
->dres
, cctx
->modname
);