1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // Returns a message that is valid until the next operation on dres.
13 const char *de_dfilter_get_errmsg(deark
*c
, struct de_dfilter_results
*dres
)
15 if(dres
->errcode
==0) {
21 return "Unspecified error";
24 // Initialize or reset a dfilter results struct
25 void de_dfilter_results_clear(deark
*c
, struct de_dfilter_results
*dres
)
28 dres
->bytes_consumed_valid
= 0;
29 dres
->bytes_consumed
= 0;
30 dres
->errmsg
[0] = '\0';
33 // Note: It is also okay to init these objects by zeroing out their bytes.
34 void de_dfilter_init_objects(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
35 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
38 de_zeromem(dcmpri
, sizeof(struct de_dfilter_in_params
));
40 de_zeromem(dcmpro
, sizeof(struct de_dfilter_out_params
));
42 de_dfilter_results_clear(c
, dres
);
45 void de_dfilter_set_errorf(deark
*c
, struct de_dfilter_results
*dres
, const char *modname
,
50 if(dres
->errcode
!= 0) return; // Only record the first error
57 de_vsnprintf(tmpbuf
, sizeof(tmpbuf
), fmt
, ap
);
58 de_snprintf(dres
->errmsg
, sizeof(dres
->errmsg
), "[%s] %s", modname
, tmpbuf
);
61 de_vsnprintf(dres
->errmsg
, sizeof(dres
->errmsg
), fmt
, ap
);
66 void de_dfilter_set_generic_error(deark
*c
, struct de_dfilter_results
*dres
, const char *modname
)
68 if(dres
->errcode
!= 0) return;
69 de_dfilter_set_errorf(c
, dres
, modname
, "Unspecified error");
72 // This is a decompression API that uses a "push" input model. The client
73 // sends data to the codec as the data becomes available.
74 // (The client must still be able to consume any amount of output data
76 // This model makes it easier to chain multiple codecs together, and to handle
77 // input data that is not contiguous.
78 // TODO: There's no reason this couldn't be extended to work with "type1" codecs.
80 struct de_dfilter_ctx
*de_dfilter_create(deark
*c
,
81 dfilter_codec_type codec_init_fn
, void *codec_private_params
,
82 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
84 struct de_dfilter_ctx
*dfctx
= NULL
;
86 dfctx
= de_malloc(c
, sizeof(struct de_dfilter_ctx
));
89 dfctx
->dcmpro
= dcmpro
;
92 codec_init_fn(dfctx
, codec_private_params
);
94 // TODO: How should we handle failure to initialize a codec?
99 void de_dfilter_addbuf(struct de_dfilter_ctx
*dfctx
,
100 const u8
*buf
, i64 buf_len
)
102 if(dfctx
->finished_flag
) return;
104 if(dfctx
->codec_addbuf_fn
&& (buf_len
>0)) {
105 dfctx
->codec_addbuf_fn(dfctx
, buf
, buf_len
);
107 if(dfctx
->dres
->errcode
) {
108 dfctx
->finished_flag
= 1;
113 // Commands: (Commands are not supported by all codecs)
114 // DE_DFILTER_COMMAND_SOFTRESET
115 // Reset the decompressor state. Exact function depends on the codec.
117 // DE_DFILTER_COMMAND_REINITIALIZE
118 // Reinitialize a codec, so you don't have to destroy and recreate it in
119 // in order to use it again. Typically used after _finish().
120 // Before using this command, it is okay to change the internal parameters of
121 // the dcmpro and dres given to de_dfilter_create(). You should call
122 // de_dfilter_results_clear or the equivalent if you have already handled
124 void de_dfilter_command(struct de_dfilter_ctx
*dfctx
, int cmd
, UI flags
)
126 // Non-codec-specific things:
128 if(cmd
==DE_DFILTER_COMMAND_REINITIALIZE
) {
129 dfctx
->finished_flag
= 0;
130 dfctx
->dres
->bytes_consumed_valid
= 0;
133 // Codec-specific things:
135 if(dfctx
->codec_command_fn
) {
136 dfctx
->codec_command_fn(dfctx
, cmd
, flags
);
140 // Call this to inform the codec that there are no more compressed bytes.
141 // The codec's 'finish' function should flush any pending output,
142 // and update the decompression results in dfctx->dres.
143 // Some codecs can still be used after this, provided you then call
144 // de_dfilter_command(...,DE_DFILTER_COMMAND_REINITIALIZE).
145 void de_dfilter_finish(struct de_dfilter_ctx
*dfctx
)
147 if(dfctx
->codec_finish_fn
) {
148 dfctx
->codec_finish_fn(dfctx
);
152 void de_dfilter_destroy(struct de_dfilter_ctx
*dfctx
)
158 if(dfctx
->codec_destroy_fn
) {
159 dfctx
->codec_destroy_fn(dfctx
);
165 static int my_dfilter_addslice_buffered_read_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
168 struct de_dfilter_ctx
*dfctx
= (struct de_dfilter_ctx
*)brctx
->userdata
;
170 de_dfilter_addbuf(dfctx
, buf
, buf_len
);
171 if(dfctx
->finished_flag
) return 0;
175 void de_dfilter_addslice(struct de_dfilter_ctx
*dfctx
,
176 dbuf
*inf
, i64 pos
, i64 len
)
178 if(dfctx
->finished_flag
) return;
179 dbuf_buffered_read(inf
, pos
, len
,
180 my_dfilter_addslice_buffered_read_cbfn
, (void*)dfctx
);
183 // Use a "pushable" codec in a non-pushable way.
184 void de_dfilter_decompress_oneshot(deark
*c
,
185 dfilter_codec_type codec_init_fn
, void *codec_private_params
,
186 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
187 struct de_dfilter_results
*dres
)
189 struct de_dfilter_ctx
*dfctx
= NULL
;
191 dfctx
= de_dfilter_create(c
, codec_init_fn
, codec_private_params
,
193 dfctx
->input_file_offset
= dcmpri
->pos
;
194 de_dfilter_addslice(dfctx
, dcmpri
->f
, dcmpri
->pos
, dcmpri
->len
);
195 de_dfilter_finish(dfctx
);
196 de_dfilter_destroy(dfctx
);
199 // Trivial "decompression" of uncompressed data.
200 void fmtutil_decompress_uncompressed(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
201 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
, UI flags
)
206 nbytes_avail
= de_min_int(dcmpri
->len
, dcmpri
->f
->len
- dcmpri
->pos
);
208 if(dcmpro
->len_known
) {
209 len
= dcmpro
->expected_len
;
215 if(len
>nbytes_avail
) len
= nbytes_avail
;
218 dbuf_copy(dcmpri
->f
, dcmpri
->pos
, len
, dcmpro
->f
);
219 dres
->bytes_consumed
= len
;
220 dres
->bytes_consumed_valid
= 1;
223 enum packbits_state_enum
{
224 PACKBITS_STATE_NEUTRAL
= 0,
225 PACKBITS_STATE_COPYING_LITERAL
,
226 PACKBITS_STATE_READING_UNIT_TO_REPEAT
230 size_t nbytes_per_unit
;
231 size_t nbytes_in_unitbuf
;
233 i64 total_nbytes_processed
;
235 enum packbits_state_enum state
;
236 i64 nliteral_bytes_remaining
;
240 static void my_packbits_codec_addbuf(struct de_dfilter_ctx
*dfctx
,
241 const u8
*buf
, i64 buf_len
)
245 struct packbitsctx
*rctx
= (struct packbitsctx
*)dfctx
->codec_private
;
249 for(i
=0; i
<buf_len
; i
++) {
250 if(dfctx
->dcmpro
->len_known
&&
251 (rctx
->nbytes_written
>= dfctx
->dcmpro
->expected_len
))
253 dfctx
->finished_flag
= 1;
258 rctx
->total_nbytes_processed
++;
260 switch(rctx
->state
) {
261 case PACKBITS_STATE_NEUTRAL
: // this is a code byte
262 if(b
>128) { // A compressed run
263 rctx
->repeat_count
= 257 - (i64
)b
;
264 rctx
->state
= PACKBITS_STATE_READING_UNIT_TO_REPEAT
;
266 else if(b
<128) { // An uncompressed run
267 rctx
->nliteral_bytes_remaining
= (1 + (i64
)b
) * (i64
)rctx
->nbytes_per_unit
;
268 rctx
->state
= PACKBITS_STATE_COPYING_LITERAL
;
270 // Else b==128. No-op.
271 // TODO: Some (but not most) ILBM specs say that code 128 is used to
272 // mark the end of compressed data, so maybe there should be options to
273 // tell us what to do when code 128 is encountered.
275 case PACKBITS_STATE_COPYING_LITERAL
: // This byte is uncompressed
276 dbuf_writebyte(dfctx
->dcmpro
->f
, b
);
277 rctx
->nbytes_written
++;
278 rctx
->nliteral_bytes_remaining
--;
279 if(rctx
->nliteral_bytes_remaining
<=0) {
280 rctx
->state
= PACKBITS_STATE_NEUTRAL
;
283 case PACKBITS_STATE_READING_UNIT_TO_REPEAT
:
284 if(rctx
->nbytes_per_unit
==1) { // Optimization for standard PackBits
285 dbuf_write_run(dfctx
->dcmpro
->f
, b
, rctx
->repeat_count
);
286 rctx
->nbytes_written
+= rctx
->repeat_count
;
287 rctx
->state
= PACKBITS_STATE_NEUTRAL
;
290 rctx
->unitbuf
[rctx
->nbytes_in_unitbuf
++] = b
;
291 if(rctx
->nbytes_in_unitbuf
>= rctx
->nbytes_per_unit
) {
294 for(k
=0; k
<rctx
->repeat_count
; k
++) {
295 dbuf_write(dfctx
->dcmpro
->f
, rctx
->unitbuf
, (i64
)rctx
->nbytes_per_unit
);
297 rctx
->nbytes_in_unitbuf
= 0;
298 rctx
->nbytes_written
+= rctx
->repeat_count
* (i64
)rctx
->nbytes_per_unit
;
299 rctx
->state
= PACKBITS_STATE_NEUTRAL
;
307 static void my_packbits_codec_command(struct de_dfilter_ctx
*dfctx
, int cmd
, UI flags
)
309 struct packbitsctx
*rctx
= (struct packbitsctx
*)dfctx
->codec_private
;
311 if(cmd
==DE_DFILTER_COMMAND_SOFTRESET
|| cmd
==DE_DFILTER_COMMAND_REINITIALIZE
) {
312 // "soft reset" - reset the low-level compression state, but don't update
313 // dres, or the total-bytes counters, etc.
314 rctx
->state
= PACKBITS_STATE_NEUTRAL
;
315 rctx
->nbytes_in_unitbuf
= 0;
316 rctx
->nliteral_bytes_remaining
= 0;
317 rctx
->repeat_count
= 0;
319 if(cmd
==DE_DFILTER_COMMAND_REINITIALIZE
) {
320 rctx
->total_nbytes_processed
= 0;
321 rctx
->nbytes_written
= 0;
325 static void my_packbits_codec_finish(struct de_dfilter_ctx
*dfctx
)
327 struct packbitsctx
*rctx
= (struct packbitsctx
*)dfctx
->codec_private
;
330 dfctx
->dres
->bytes_consumed
= rctx
->total_nbytes_processed
;
331 dfctx
->dres
->bytes_consumed_valid
= 1;
334 static void my_packbits_codec_destroy(struct de_dfilter_ctx
*dfctx
)
336 struct packbitsctx
*rctx
= (struct packbitsctx
*)dfctx
->codec_private
;
339 de_free(dfctx
->c
, rctx
);
341 dfctx
->codec_private
= NULL
;
344 // codec_private_params: de_packbits_params, or NULL for default params.
345 void dfilter_packbits_codec(struct de_dfilter_ctx
*dfctx
, void *codec_private_params
)
347 struct packbitsctx
*rctx
= NULL
;
348 struct de_packbits_params
*pbparams
= (struct de_packbits_params
*)codec_private_params
;
350 rctx
= de_malloc(dfctx
->c
, sizeof(struct packbitsctx
));
351 rctx
->nbytes_per_unit
= 1;
352 if(pbparams
&& pbparams
->is_packbits16
) {
353 rctx
->nbytes_per_unit
= 2;
355 dfctx
->codec_private
= (void*)rctx
;
356 dfctx
->codec_addbuf_fn
= my_packbits_codec_addbuf
;
357 dfctx
->codec_finish_fn
= my_packbits_codec_finish
;
358 dfctx
->codec_command_fn
= my_packbits_codec_command
;
359 dfctx
->codec_destroy_fn
= my_packbits_codec_destroy
;
362 void fmtutil_decompress_packbits_ex(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
363 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
364 struct de_packbits_params
*pbparams
)
366 de_dfilter_decompress_oneshot(c
, dfilter_packbits_codec
, (void*)pbparams
,
367 dcmpri
, dcmpro
, dres
);
370 // Returns 0 on failure (currently impossible).
371 int fmtutil_decompress_packbits(dbuf
*f
, i64 pos1
, i64 len
,
372 dbuf
*unc_pixels
, i64
*cmpr_bytes_consumed
)
374 struct de_dfilter_results dres
;
375 struct de_dfilter_in_params dcmpri
;
376 struct de_dfilter_out_params dcmpro
;
378 if(cmpr_bytes_consumed
) *cmpr_bytes_consumed
= 0;
379 de_dfilter_init_objects(f
->c
, &dcmpri
, &dcmpro
, &dres
);
384 dcmpro
.f
= unc_pixels
;
385 if(unc_pixels
->has_len_limit
) {
386 dcmpro
.len_known
= 1;
387 dcmpro
.expected_len
= unc_pixels
->len_limit
- unc_pixels
->len
;
390 de_dfilter_decompress_oneshot(f
->c
, dfilter_packbits_codec
, NULL
,
391 &dcmpri
, &dcmpro
, &dres
);
393 if(cmpr_bytes_consumed
&& dres
.bytes_consumed_valid
) {
394 *cmpr_bytes_consumed
= dres
.bytes_consumed
;
396 if(dres
.errcode
!= 0) return 0;
400 void fmtutil_decompress_rle90_ex(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
401 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
404 de_dfilter_decompress_oneshot(c
, dfilter_rle90_codec
, NULL
,
405 dcmpri
, dcmpro
, dres
);
409 i64 total_nbytes_processed
;
412 int countcode_pending
;
415 static void my_rle90_codec_addbuf(struct de_dfilter_ctx
*dfctx
,
416 const u8
*buf
, i64 buf_len
)
420 struct rle90ctx
*rctx
= (struct rle90ctx
*)dfctx
->codec_private
;
424 for(i
=0; i
<buf_len
; i
++) {
425 if(dfctx
->dcmpro
->len_known
&&
426 (rctx
->nbytes_written
>= dfctx
->dcmpro
->expected_len
))
428 dfctx
->finished_flag
= 1;
433 rctx
->total_nbytes_processed
++;
435 if(rctx
->countcode_pending
&& b
==0) {
436 // Not RLE, just an escaped 0x90 byte.
437 dbuf_writebyte(dfctx
->dcmpro
->f
, 0x90);
438 rctx
->nbytes_written
++;
439 rctx
->last_output_byte
= 0x90;
440 rctx
->countcode_pending
= 0;
442 else if(rctx
->countcode_pending
) {
445 // RLE. We already emitted one byte (because the byte to repeat
446 // comes before the repeat count), so write countcode-1 bytes.
448 if(dfctx
->dcmpro
->len_known
&&
449 (rctx
->nbytes_written
+count
> dfctx
->dcmpro
->expected_len
))
451 count
= dfctx
->dcmpro
->expected_len
- rctx
->nbytes_written
;
453 dbuf_write_run(dfctx
->dcmpro
->f
, rctx
->last_output_byte
, count
);
454 rctx
->nbytes_written
+= count
;
456 rctx
->countcode_pending
= 0;
459 rctx
->countcode_pending
= 1;
462 dbuf_writebyte(dfctx
->dcmpro
->f
, b
);
463 rctx
->nbytes_written
++;
464 rctx
->last_output_byte
= b
;
469 static void my_rle90_codec_finish(struct de_dfilter_ctx
*dfctx
)
471 struct rle90ctx
*rctx
= (struct rle90ctx
*)dfctx
->codec_private
;
474 dfctx
->dres
->bytes_consumed
= rctx
->total_nbytes_processed
;
475 dfctx
->dres
->bytes_consumed_valid
= 1;
478 static void my_rle90_codec_destroy(struct de_dfilter_ctx
*dfctx
)
480 struct rle90ctx
*rctx
= (struct rle90ctx
*)dfctx
->codec_private
;
483 de_free(dfctx
->c
, rctx
);
485 dfctx
->codec_private
= NULL
;
488 // RLE algorithm occasionally called "RLE90". Variants of this are used by
489 // BinHex, ARC, StuffIt, and others.
490 // codec_private_params: Unused, must be NULL.
491 void dfilter_rle90_codec(struct de_dfilter_ctx
*dfctx
, void *codec_private_params
)
493 struct rle90ctx
*rctx
= NULL
;
495 rctx
= de_malloc(dfctx
->c
, sizeof(struct rle90ctx
));
496 dfctx
->codec_private
= (void*)rctx
;
497 dfctx
->codec_addbuf_fn
= my_rle90_codec_addbuf
;
498 dfctx
->codec_finish_fn
= my_rle90_codec_finish
;
499 dfctx
->codec_destroy_fn
= my_rle90_codec_destroy
;
505 struct de_dfilter_out_params
*dcmpro
;
506 struct de_lz77buffer
*ringbuf
;
509 static void szdd_lz77buf_writebytecb(struct de_lz77buffer
*rb
, const u8 n
)
511 struct szdd_ctx
*sctx
= (struct szdd_ctx
*)rb
->userdata
;
513 if(sctx
->stop_flag
) return;
514 if(sctx
->dcmpro
->len_known
) {
515 if(sctx
->nbytes_written
>= sctx
->dcmpro
->expected_len
) {
521 dbuf_writebyte(sctx
->dcmpro
->f
, n
);
522 sctx
->nbytes_written
++;
525 static void szdd_init_window_default(struct de_lz77buffer
*ringbuf
)
527 de_lz77buffer_clear(ringbuf
, 0x20);
528 ringbuf
->curpos
= 4096 - 16;
531 static void szdd_init_window_lz5(struct de_lz77buffer
*ringbuf
)
536 de_zeromem(ringbuf
->buf
, 4096);
538 for(i
=1; i
<256; i
++) {
539 de_memset(&ringbuf
->buf
[wpos
], i
, 13);
542 for(i
=0; i
<256; i
++) {
543 ringbuf
->buf
[wpos
++] = i
;
545 for(i
=255; i
>=0; i
--) {
546 ringbuf
->buf
[wpos
++] = i
;
549 de_memset(&ringbuf
->buf
[wpos
], 0x20, 110);
551 ringbuf
->curpos
= (UI
)wpos
;
554 // Partially based on the libmspack's format documentation at
555 // <https://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html>
557 // 0x1: LArc lz5 mode
558 void fmtutil_decompress_szdd(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
559 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
, unsigned int flags
)
561 i64 pos
= dcmpri
->pos
;
562 i64 endpos
= dcmpri
->pos
+ dcmpri
->len
;
563 struct szdd_ctx
*sctx
= NULL
;
565 sctx
= de_malloc(c
, sizeof(struct szdd_ctx
));
566 sctx
->dcmpro
= dcmpro
;
567 sctx
->ringbuf
= de_lz77buffer_create(c
, 4096);
568 sctx
->ringbuf
->writebyte_cb
= szdd_lz77buf_writebytecb
;
569 sctx
->ringbuf
->userdata
= (void*)sctx
;
572 szdd_init_window_lz5(sctx
->ringbuf
);
575 szdd_init_window_default(sctx
->ringbuf
);
582 if(pos
+1 > endpos
) goto unc_done
; // Out of input data
583 control
= (UI
)dbuf_getbyte(dcmpri
->f
, pos
++);
585 for(cbit
=0x01; cbit
<=0x80; cbit
<<=1) {
586 if(control
& cbit
) { // literal
589 if(pos
+1 > endpos
) goto unc_done
;
590 b
= dbuf_getbyte(dcmpri
->f
, pos
++);
591 de_lz77buffer_add_literal_byte(sctx
->ringbuf
, b
);
592 if(sctx
->stop_flag
) goto unc_done
;
599 if(pos
+2 > endpos
) goto unc_done
;
600 x0
= (UI
)dbuf_getbyte_p(dcmpri
->f
, &pos
);
601 x1
= (UI
)dbuf_getbyte_p(dcmpri
->f
, &pos
);
602 matchpos
= ((x1
& 0xf0) << 4) | x0
;
603 matchlen
= (x1
& 0x0f) + 3;
604 de_lz77buffer_copy_from_hist(sctx
->ringbuf
, matchpos
, matchlen
);
605 if(sctx
->stop_flag
) goto unc_done
;
611 dres
->bytes_consumed_valid
= 1;
612 dres
->bytes_consumed
= pos
- dcmpri
->pos
;
614 de_lz77buffer_destroy(c
, sctx
->ringbuf
);
619 //======================= hlp_lz77 =======================
624 struct de_dfilter_out_params
*dcmpro
;
625 struct de_lz77buffer
*ringbuf
;
628 static void hlplz77_lz77buf_writebytecb(struct de_lz77buffer
*rb
, const u8 n
)
630 struct hlplz77ctx
*sctx
= (struct hlplz77ctx
*)rb
->userdata
;
632 if(sctx
->stop_flag
) return;
633 if(sctx
->dcmpro
->len_known
) {
634 if(sctx
->nbytes_written
>= sctx
->dcmpro
->expected_len
) {
640 dbuf_writebyte(sctx
->dcmpro
->f
, n
);
641 sctx
->nbytes_written
++;
644 // This is very similar to the mscompress SZDD algorithm, but
645 // gratuitously different.
646 void fmtutil_hlp_lz77_codectype1(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
647 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
648 void *codec_private_params
)
650 i64 pos
= dcmpri
->pos
;
651 i64 endpos
= dcmpri
->pos
+ dcmpri
->len
;
652 struct hlplz77ctx
*sctx
= NULL
;
654 sctx
= de_malloc(c
, sizeof(struct hlplz77ctx
));
655 sctx
->dcmpro
= dcmpro
;
656 sctx
->ringbuf
= de_lz77buffer_create(c
, 4096);
657 sctx
->ringbuf
->writebyte_cb
= hlplz77_lz77buf_writebytecb
;
658 sctx
->ringbuf
->userdata
= (void*)sctx
;
659 de_lz77buffer_clear(sctx
->ringbuf
, 0x20);
665 if(pos
+1 > endpos
) goto unc_done
; // Out of input data
666 control
= (UI
)dbuf_getbyte(dcmpri
->f
, pos
++);
668 for(cbit
=0x01; cbit
<=0x80; cbit
<<=1) {
669 if((control
& cbit
)==0) { // literal
672 if(pos
+1 > endpos
) goto unc_done
;
673 b
= dbuf_getbyte(dcmpri
->f
, pos
++);
674 de_lz77buffer_add_literal_byte(sctx
->ringbuf
, b
);
675 if(sctx
->stop_flag
) goto unc_done
;
682 if(pos
+2 > endpos
) goto unc_done
;
683 x
= (UI
)dbuf_getu16le_p(dcmpri
->f
, &pos
);
684 matchlen
= (x
>>12) + 3;
685 matchpos
= sctx
->ringbuf
->curpos
- ((x
& 0x0fff)+1);
686 de_lz77buffer_copy_from_hist(sctx
->ringbuf
, matchpos
, matchlen
);
687 if(sctx
->stop_flag
) goto unc_done
;
693 dres
->bytes_consumed_valid
= 1;
694 dres
->bytes_consumed
= pos
- dcmpri
->pos
;
696 de_lz77buffer_destroy(c
, sctx
->ringbuf
);
701 //========================================================
703 struct my_2layer_userdata
{
704 struct de_dfilter_ctx
*dfctx_codec2
;
705 i64 intermediate_nbytes
;
708 static void my_2layer_write_cb(dbuf
*f
, void *userdata
,
709 const u8
*buf
, i64 size
)
711 struct my_2layer_userdata
*u
= (struct my_2layer_userdata
*)userdata
;
713 de_dfilter_addbuf(u
->dfctx_codec2
, buf
, size
);
714 u
->intermediate_nbytes
+= size
;
717 // If src indicates error and dst does not, copy the error from src to dst.
718 void de_dfilter_transfer_error(deark
*c
, struct de_dfilter_results
*src
,
719 struct de_dfilter_results
*dst
)
721 if(src
->errcode
&& !dst
->errcode
) {
722 dst
->errcode
= src
->errcode
;
723 de_strlcpy(dst
->errmsg
, src
->errmsg
, sizeof(dst
->errmsg
));
727 // Decompress an arbitrary two-layer compressed format.
728 // tlp->codec1* is the first one that will be used during decompression (i.e. the second
729 // method used when during *compression*).
730 void de_dfilter_decompress_two_layer(deark
*c
, struct de_dcmpr_two_layer_params
*tlp
)
732 dbuf
*outf_codec1
= NULL
;
733 struct de_dfilter_out_params dcmpro_codec1
;
734 struct de_dfilter_results dres_codec2
;
735 struct my_2layer_userdata u
;
736 struct de_dfilter_ctx
*dfctx_codec2
= NULL
;
738 de_dfilter_init_objects(c
, NULL
, &dcmpro_codec1
, NULL
);
739 de_dfilter_init_objects(c
, NULL
, NULL
, &dres_codec2
);
740 de_zeromem(&u
, sizeof(struct my_2layer_userdata
));
742 // Make a custom dbuf. The output from the first decompressor will be written
743 // to it, and it will relay that output to the second decompressor.
744 outf_codec1
= dbuf_create_custom_dbuf(c
, 0, 0);
745 outf_codec1
->userdata_for_customwrite
= (void*)&u
;
746 outf_codec1
->customwrite_fn
= my_2layer_write_cb
;
748 dcmpro_codec1
.f
= outf_codec1
;
749 if(tlp
->intermed_len_known
) {
750 dcmpro_codec1
.len_known
= 1;
751 dcmpro_codec1
.expected_len
= tlp
->intermed_expected_len
;
754 dcmpro_codec1
.len_known
= 0;
755 dcmpro_codec1
.expected_len
= 0;
758 dfctx_codec2
= de_dfilter_create(c
, tlp
->codec2
, tlp
->codec2_private_params
, tlp
->dcmpro
, &dres_codec2
);
759 u
.dfctx_codec2
= dfctx_codec2
;
761 // The first codec in the chain does not need the advanced (de_dfilter_create) API.
762 if(tlp
->codec1_type1
) {
763 tlp
->codec1_type1(c
, tlp
->dcmpri
, &dcmpro_codec1
, tlp
->dres
, tlp
->codec1_private_params
);
766 de_dfilter_decompress_oneshot(c
, tlp
->codec1_pushable
, tlp
->codec1_private_params
,
767 tlp
->dcmpri
, &dcmpro_codec1
, tlp
->dres
);
769 de_dfilter_finish(dfctx_codec2
);
771 if(tlp
->dres
->errcode
) goto done
;
772 de_dbg2(c
, "size after intermediate decompression: %"I64_FMT
, u
.intermediate_nbytes
);
774 if(dres_codec2
.errcode
) {
775 // An error occurred in codec2, and not in codec1.
776 // Copy the error info to the dres that will be returned to the caller.
777 de_dfilter_transfer_error(c
, &dres_codec2
, tlp
->dres
);
782 de_dfilter_destroy(dfctx_codec2
);
783 dbuf_close(outf_codec1
);
786 struct de_lz77buffer
*de_lz77buffer_create(deark
*c
, UI bufsize
)
788 struct de_lz77buffer
*rb
;
790 rb
= de_malloc(c
, sizeof(struct de_lz77buffer
));
791 rb
->buf
= de_malloc(c
, (i64
)bufsize
);
792 rb
->bufsize
= bufsize
;
793 rb
->mask
= bufsize
- 1;
797 void de_lz77buffer_destroy(deark
*c
, struct de_lz77buffer
*rb
)
804 // Set all bytes to the same value, and reset the current position to 0.
805 void de_lz77buffer_clear(struct de_lz77buffer
*rb
, UI val
)
807 de_memset(rb
->buf
, val
, rb
->bufsize
);
811 void de_lz77buffer_set_curpos(struct de_lz77buffer
*rb
, UI newpos
)
813 rb
->curpos
= newpos
& rb
->mask
;
816 void de_lz77buffer_add_literal_byte(struct de_lz77buffer
*rb
, u8 b
)
818 rb
->writebyte_cb(rb
, b
);
819 rb
->buf
[rb
->curpos
] = b
;
820 rb
->curpos
= (rb
->curpos
+1) & rb
->mask
;
823 void de_lz77buffer_copy_from_hist(struct de_lz77buffer
*rb
,
824 UI startpos
, UI count
)
829 frompos
= startpos
& rb
->mask
;
830 for(i
=0; i
<count
; i
++) {
831 de_lz77buffer_add_literal_byte(rb
, rb
->buf
[frompos
]);
832 frompos
= (frompos
+1) & rb
->mask
;
836 ///////////////////////////////////
837 // "Squeeze"-style Huffman decoder
839 // The first node you add allows for 2 symbols, and each additional node adds 1.
840 // So in general, you need one less node than the number of symbols.
841 // The max number of symbols is 257: 256 byte values, plus a special "stop" code.
842 #define SQUEEZE_MAX_NODES 256
844 struct squeeze_data_item
{
848 struct squeeze_node
{
850 struct squeeze_data_item child
[2];
855 struct de_dfilter_in_params
*dcmpri
;
856 struct de_dfilter_out_params
*dcmpro
;
857 struct de_dfilter_results
*dres
;
861 struct fmtutil_huffman_decoder
*ht
;
862 struct de_bitreader bitrd
;
863 struct squeeze_node tmpnodes
[SQUEEZE_MAX_NODES
]; // Temporary use when decoding the node table
866 static void squeeze_interpret_node(struct squeeze_ctx
*sqctx
,
867 i64 nodenum
, u64 currcode
, UI currcode_nbits
);
869 static void squeeze_interpret_dval(struct squeeze_ctx
*sqctx
,
870 i16 dval
, u64 currcode
, UI currcode_nbits
)
874 if(dval
>=0) { // a pointer to a node
875 if((i64
)dval
< sqctx
->nodecount
) {
876 squeeze_interpret_node(sqctx
, (i64
)dval
, currcode
, currcode_nbits
);
879 else if(dval
>=(-257) && dval
<=(-1)) {
880 fmtutil_huffman_valtype adj_value
;
882 // -257 => 256 (stop code)
883 // -256 => 255 (byte value)
884 // -255 => 254 (byte value)
886 // -1 => 0 (byte value)
887 adj_value
= -(((fmtutil_huffman_valtype
)dval
)+1);
888 if(sqctx
->c
->debug_level
>=2) {
889 de_dbg3(sqctx
->c
, "code: \"%s\" = %d",
890 de_print_base2_fixed(b2buf
, sizeof(b2buf
), currcode
, currcode_nbits
),
893 fmtutil_huffman_add_code(sqctx
->c
, sqctx
->ht
->bk
, currcode
, currcode_nbits
, adj_value
);
895 // TODO: Report errors?
898 static void squeeze_interpret_node(struct squeeze_ctx
*sqctx
,
899 i64 nodenum
, u64 currcode
, UI currcode_nbits
)
901 // TODO: Report errors?
902 if(nodenum
<0 || nodenum
>=sqctx
->nodecount
) return;
903 if(sqctx
->tmpnodes
[nodenum
].in_use
) return; // Loops are bad
904 if(currcode_nbits
>=FMTUTIL_HUFFMAN_MAX_CODE_LENGTH
) return;
906 sqctx
->tmpnodes
[nodenum
].in_use
= 1;
907 squeeze_interpret_dval(sqctx
, sqctx
->tmpnodes
[nodenum
].child
[0].dval
, currcode
<<1, currcode_nbits
+1);
908 squeeze_interpret_dval(sqctx
, sqctx
->tmpnodes
[nodenum
].child
[1].dval
, ((currcode
<<1) | 1), currcode_nbits
+1);
909 sqctx
->tmpnodes
[nodenum
].in_use
= 0;
912 static int squeeze_process_nodetable(deark
*c
, struct squeeze_ctx
*sqctx
)
916 // It feels a little wrong to go to the trouble of decoding this node table into
917 // the form required by our Huffman library's API, when we know it's going to
918 // just convert it back into a table much like it was originally. Maybe there
919 // should be a better way to do this.
920 de_dbg3(c
, "interpreted huffman codebook:");
922 squeeze_interpret_node(sqctx
, 0, 0, 0);
923 de_dbg_indent(c
, -1);
925 if(c
->debug_level
>=4) {
926 fmtutil_huffman_dump(c
, sqctx
->ht
);
933 static int squeeze_read_nodetable(deark
*c
, struct squeeze_ctx
*sqctx
)
938 if(sqctx
->bitrd
.curpos
+2 > sqctx
->bitrd
.endpos
) goto done
;
939 sqctx
->nodecount
= dbuf_getu16le_p(sqctx
->dcmpri
->f
, &sqctx
->bitrd
.curpos
);
940 de_dbg(c
, "node count: %d", (int)sqctx
->nodecount
);
941 if(sqctx
->nodecount
> SQUEEZE_MAX_NODES
) {
942 de_dfilter_set_errorf(c
, sqctx
->dres
, sqctx
->modname
,
943 "Invalid node count");
947 de_dbg2(c
, "node table nodes at %"I64_FMT
, sqctx
->bitrd
.curpos
);
949 for(k
=0; k
<sqctx
->nodecount
; k
++) {
950 sqctx
->tmpnodes
[k
].child
[0].dval
= (i16
)dbuf_geti16le_p(sqctx
->dcmpri
->f
, &sqctx
->bitrd
.curpos
);
951 sqctx
->tmpnodes
[k
].child
[1].dval
= (i16
)dbuf_geti16le_p(sqctx
->dcmpri
->f
, &sqctx
->bitrd
.curpos
);
952 if(c
->debug_level
>= 2) {
953 de_dbg2(c
, "nodetable[%d]: %d %d", (int)k
, (int)sqctx
->tmpnodes
[k
].child
[0].dval
,
954 (int)sqctx
->tmpnodes
[k
].child
[1].dval
);
957 de_dbg_indent(c
, -1);
958 if(sqctx
->bitrd
.curpos
> sqctx
->bitrd
.endpos
) goto done
;
960 if(!squeeze_process_nodetable(c
, sqctx
)) goto done
;
967 static int squeeze_read_codes(deark
*c
, struct squeeze_ctx
*sqctx
)
971 de_dbg(c
, "huffman-compressed data at %"I64_FMT
, sqctx
->bitrd
.curpos
);
972 sqctx
->bitrd
.bbll
.is_lsb
= 1;
973 de_bitbuf_lowlevel_empty(&sqctx
->bitrd
.bbll
);
975 if(fmtutil_huffman_get_max_bits(sqctx
->ht
->bk
) < 1) {
976 // Empty tree? Assume this is an empty file.
983 fmtutil_huffman_valtype val
= 0;
985 ret
= fmtutil_huffman_read_next_value(sqctx
->ht
->bk
, &sqctx
->bitrd
, &val
, NULL
);
986 if(!ret
|| val
<0 || val
>256) {
987 if(sqctx
->bitrd
.eof_flag
) {
991 de_dfilter_set_errorf(c
, sqctx
->dres
, sqctx
->modname
, "Huffman decode error");
996 if(val
>=0 && val
<=255) {
997 dbuf_writebyte(sqctx
->dcmpro
->f
, (u8
)val
);
998 sqctx
->nbytes_written
++;
999 if(sqctx
->dcmpro
->len_known
&& (sqctx
->nbytes_written
>= sqctx
->dcmpro
->expected_len
)) {
1004 else if(val
==256) { // STOP code
1014 void fmtutil_huff_squeeze_codectype1(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
1015 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
1016 void *codec_private_params
)
1018 struct squeeze_ctx
*sqctx
= NULL
;
1021 sqctx
= de_malloc(c
, sizeof(struct squeeze_ctx
));
1023 sqctx
->modname
= "unsqueeze";
1024 sqctx
->dcmpri
= dcmpri
;
1025 sqctx
->dcmpro
= dcmpro
;
1028 sqctx
->bitrd
.f
= dcmpri
->f
;
1029 sqctx
->bitrd
.curpos
= dcmpri
->pos
;
1030 sqctx
->bitrd
.endpos
= dcmpri
->pos
+ dcmpri
->len
;
1032 sqctx
->ht
= fmtutil_huffman_create_decoder(c
, 257, 257);
1034 if(!squeeze_read_nodetable(c
, sqctx
)) goto done
;
1035 if(!squeeze_read_codes(c
, sqctx
)) goto done
;
1037 dres
->bytes_consumed
= sqctx
->bitrd
.curpos
- dcmpri
->pos
;
1038 if(dres
->bytes_consumed
> dcmpri
->len
) {
1039 dres
->bytes_consumed
= dcmpri
->len
;
1041 dres
->bytes_consumed_valid
= 1;
1045 if(!ok
|| dres
->errcode
) {
1046 de_dfilter_set_errorf(c
, dres
, sqctx
->modname
, "Squeeze decompression failed");
1050 fmtutil_huffman_destroy_decoder(c
, sqctx
->ht
);