Cleaned up some Huffman-related debug messages
[deark.git] / src / fmtutil-cmpr.c
blob63cd7f2889a0cd4ee38ae2e2106f800ad07d8eaf
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompression, etc.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // Returns a message that is valid until the next operation on dres.
13 const char *de_dfilter_get_errmsg(deark *c, struct de_dfilter_results *dres)
15 if(dres->errcode==0) {
16 return "No error";
18 if(dres->errmsg[0]) {
19 return dres->errmsg;
21 return "Unspecified error";
24 // Initialize or reset a dfilter results struct
25 void de_dfilter_results_clear(deark *c, struct de_dfilter_results *dres)
27 dres->errcode = 0;
28 dres->bytes_consumed_valid = 0;
29 dres->bytes_consumed = 0;
30 dres->errmsg[0] = '\0';
33 // Note: It is also okay to init these objects by zeroing out their bytes.
34 void de_dfilter_init_objects(deark *c, struct de_dfilter_in_params *dcmpri,
35 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
37 if(dcmpri)
38 de_zeromem(dcmpri, sizeof(struct de_dfilter_in_params));
39 if(dcmpro)
40 de_zeromem(dcmpro, sizeof(struct de_dfilter_out_params));
41 if(dres)
42 de_dfilter_results_clear(c, dres);
45 void de_dfilter_set_errorf(deark *c, struct de_dfilter_results *dres, const char *modname,
46 const char *fmt, ...)
48 va_list ap;
50 if(dres->errcode != 0) return; // Only record the first error
51 dres->errcode = 1;
53 va_start(ap, fmt);
54 if(modname) {
55 char tmpbuf[80];
57 de_vsnprintf(tmpbuf, sizeof(tmpbuf), fmt, ap);
58 de_snprintf(dres->errmsg, sizeof(dres->errmsg), "[%s] %s", modname, tmpbuf);
60 else {
61 de_vsnprintf(dres->errmsg, sizeof(dres->errmsg), fmt, ap);
63 va_end(ap);
66 void de_dfilter_set_generic_error(deark *c, struct de_dfilter_results *dres, const char *modname)
68 if(dres->errcode != 0) return;
69 de_dfilter_set_errorf(c, dres, modname, "Unspecified error");
72 // This is a decompression API that uses a "push" input model. The client
73 // sends data to the codec as the data becomes available.
74 // (The client must still be able to consume any amount of output data
75 // immediately.)
76 // This model makes it easier to chain multiple codecs together, and to handle
77 // input data that is not contiguous.
78 // TODO: There's no reason this couldn't be extended to work with "type1" codecs.
80 struct de_dfilter_ctx *de_dfilter_create(deark *c,
81 dfilter_codec_type codec_init_fn, void *codec_private_params,
82 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
84 struct de_dfilter_ctx *dfctx = NULL;
86 dfctx = de_malloc(c, sizeof(struct de_dfilter_ctx));
87 dfctx->c = c;
88 dfctx->dres = dres;
89 dfctx->dcmpro = dcmpro;
91 if(codec_init_fn) {
92 codec_init_fn(dfctx, codec_private_params);
94 // TODO: How should we handle failure to initialize a codec?
96 return dfctx;
99 void de_dfilter_addbuf(struct de_dfilter_ctx *dfctx,
100 const u8 *buf, i64 buf_len)
102 if(dfctx->codec_addbuf_fn && (buf_len>0)) {
103 dfctx->codec_addbuf_fn(dfctx, buf, buf_len);
107 // Commands: (Commands are not supported by all codecs)
108 // DE_DFILTER_COMMAND_SOFTRESET
109 // Reset the decompressor state. Exact function depends on the codec.
111 // DE_DFILTER_COMMAND_REINITIALIZE
112 // Reinitialize a codec, so you don't have to destroy and recreate it in
113 // in order to use it again. Typically used after _finish().
114 // Before using this command, it is okay to change the internal paramters of
115 // the dcmpro and dres given to de_dfilter_create(). You should call
116 // de_dfilter_results_clear or the equivalent if you have already handled
117 // previous errors.
118 void de_dfilter_command(struct de_dfilter_ctx *dfctx, int cmd, UI flags)
120 // Non-codec-specific things:
122 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
123 dfctx->finished_flag = 0;
124 dfctx->dres->bytes_consumed_valid = 0;
127 // Codec-specific things:
129 if(dfctx->codec_command_fn) {
130 dfctx->codec_command_fn(dfctx, cmd);
134 // Call this to inform the codec that there are no more compressed bytes.
135 // The codec's 'finish' function should flush any pending output,
136 // and update the decompression results in dfctx->dres.
137 // Some codecs can still be used after this, provided you then call
138 // de_dfilter_command(...,DE_DFILTER_COMMAND_REINITIALIZE).
139 void de_dfilter_finish(struct de_dfilter_ctx *dfctx)
141 if(dfctx->codec_finish_fn) {
142 dfctx->codec_finish_fn(dfctx);
146 void de_dfilter_destroy(struct de_dfilter_ctx *dfctx)
148 deark *c;
150 if(!dfctx) return;
151 c = dfctx->c;
152 if(dfctx->codec_destroy_fn) {
153 dfctx->codec_destroy_fn(dfctx);
156 de_free(c, dfctx);
159 static int my_dfilter_addslice_buffered_read_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
160 i64 buf_len)
162 struct de_dfilter_ctx *dfctx = (struct de_dfilter_ctx *)brctx->userdata;
164 de_dfilter_addbuf(dfctx, buf, buf_len);
165 if(dfctx->finished_flag) return 0;
166 return 1;
169 void de_dfilter_addslice(struct de_dfilter_ctx *dfctx,
170 dbuf *inf, i64 pos, i64 len)
172 dbuf_buffered_read(inf, pos, len,
173 my_dfilter_addslice_buffered_read_cbfn, (void*)dfctx);
176 // Use a "pushable" codec in a non-pushable way.
177 void de_dfilter_decompress_oneshot(deark *c,
178 dfilter_codec_type codec_init_fn, void *codec_private_params,
179 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
180 struct de_dfilter_results *dres)
182 struct de_dfilter_ctx *dfctx = NULL;
184 dfctx = de_dfilter_create(c, codec_init_fn, codec_private_params,
185 dcmpro, dres);
186 de_dfilter_addslice(dfctx, dcmpri->f, dcmpri->pos, dcmpri->len);
187 de_dfilter_finish(dfctx);
188 de_dfilter_destroy(dfctx);
191 // Trivial "decompression" of uncompressed data.
192 void fmtutil_decompress_uncompressed(deark *c, struct de_dfilter_in_params *dcmpri,
193 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, UI flags)
195 i64 len;
196 i64 nbytes_avail;
198 nbytes_avail = de_min_int(dcmpri->len, dcmpri->f->len - dcmpri->pos);
200 if(dcmpro->len_known) {
201 len = dcmpro->expected_len;
203 else {
204 len = dcmpri->len;
207 if(len>nbytes_avail) len = nbytes_avail;
208 if(len<0) len = 0;
210 dbuf_copy(dcmpri->f, dcmpri->pos, len, dcmpro->f);
211 dres->bytes_consumed = len;
212 dres->bytes_consumed_valid = 1;
215 enum packbits_state_enum {
216 PACKBITS_STATE_NEUTRAL = 0,
217 PACKBITS_STATE_COPYING_LITERAL,
218 PACKBITS_STATE_READING_UNIT_TO_REPEAT
221 struct packbitsctx {
222 size_t nbytes_per_unit;
223 size_t nbytes_in_unitbuf;
224 u8 unitbuf[2];
225 i64 total_nbytes_processed;
226 i64 nbytes_written;
227 enum packbits_state_enum state;
228 i64 nliteral_bytes_remaining;
229 i64 repeat_count;
232 static void my_packbits_codec_addbuf(struct de_dfilter_ctx *dfctx,
233 const u8 *buf, i64 buf_len)
235 int i;
236 u8 b;
237 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
239 if(!rctx) return;
241 for(i=0; i<buf_len; i++) {
242 if(dfctx->dcmpro->len_known &&
243 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
245 dfctx->finished_flag = 1;
246 break;
249 b = buf[i];
250 rctx->total_nbytes_processed++;
252 switch(rctx->state) {
253 case PACKBITS_STATE_NEUTRAL: // this is a code byte
254 if(b>128) { // A compressed run
255 rctx->repeat_count = 257 - (i64)b;
256 rctx->state = PACKBITS_STATE_READING_UNIT_TO_REPEAT;
258 else if(b<128) { // An uncompressed run
259 rctx->nliteral_bytes_remaining = (1 + (i64)b) * (i64)rctx->nbytes_per_unit;
260 rctx->state = PACKBITS_STATE_COPYING_LITERAL;
262 // Else b==128. No-op.
263 // TODO: Some (but not most) ILBM specs say that code 128 is used to
264 // mark the end of compressed data, so maybe there should be options to
265 // tell us what to do when code 128 is encountered.
266 break;
267 case PACKBITS_STATE_COPYING_LITERAL: // This byte is uncompressed
268 dbuf_writebyte(dfctx->dcmpro->f, b);
269 rctx->nbytes_written++;
270 rctx->nliteral_bytes_remaining--;
271 if(rctx->nliteral_bytes_remaining<=0) {
272 rctx->state = PACKBITS_STATE_NEUTRAL;
274 break;
275 case PACKBITS_STATE_READING_UNIT_TO_REPEAT:
276 if(rctx->nbytes_per_unit==1) { // Optimization for standard PackBits
277 dbuf_write_run(dfctx->dcmpro->f, b, rctx->repeat_count);
278 rctx->nbytes_written += rctx->repeat_count;
279 rctx->state = PACKBITS_STATE_NEUTRAL;
281 else {
282 rctx->unitbuf[rctx->nbytes_in_unitbuf++] = b;
283 if(rctx->nbytes_in_unitbuf >= rctx->nbytes_per_unit) {
284 i64 k;
286 for(k=0; k<rctx->repeat_count; k++) {
287 dbuf_write(dfctx->dcmpro->f, rctx->unitbuf, (i64)rctx->nbytes_per_unit);
289 rctx->nbytes_in_unitbuf = 0;
290 rctx->nbytes_written += rctx->repeat_count * (i64)rctx->nbytes_per_unit;
291 rctx->state = PACKBITS_STATE_NEUTRAL;
294 break;
299 static void my_packbits_codec_command(struct de_dfilter_ctx *dfctx, int cmd)
301 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
303 if(cmd==DE_DFILTER_COMMAND_SOFTRESET || cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
304 // "soft reset" - reset the low-level compression state, but don't update
305 // dres, or the total-bytes counters, etc.
306 rctx->state = PACKBITS_STATE_NEUTRAL;
307 rctx->nbytes_in_unitbuf = 0;
308 rctx->nliteral_bytes_remaining = 0;
309 rctx->repeat_count = 0;
311 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
312 rctx->total_nbytes_processed = 0;
313 rctx->nbytes_written = 0;
317 static void my_packbits_codec_finish(struct de_dfilter_ctx *dfctx)
319 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
321 if(!rctx) return;
322 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
323 dfctx->dres->bytes_consumed_valid = 1;
326 static void my_packbits_codec_destroy(struct de_dfilter_ctx *dfctx)
328 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
330 if(rctx) {
331 de_free(dfctx->c, rctx);
333 dfctx->codec_private = NULL;
336 // codec_private_params: de_packbits_params, or NULL for default params.
337 void dfilter_packbits_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
339 struct packbitsctx *rctx = NULL;
340 struct de_packbits_params *pbparams = (struct de_packbits_params*)codec_private_params;
342 rctx = de_malloc(dfctx->c, sizeof(struct packbitsctx));
343 rctx->nbytes_per_unit = 1;
344 if(pbparams && pbparams->is_packbits16) {
345 rctx->nbytes_per_unit = 2;
347 dfctx->codec_private = (void*)rctx;
348 dfctx->codec_addbuf_fn = my_packbits_codec_addbuf;
349 dfctx->codec_finish_fn = my_packbits_codec_finish;
350 dfctx->codec_command_fn = my_packbits_codec_command;
351 dfctx->codec_destroy_fn = my_packbits_codec_destroy;
354 void fmtutil_decompress_packbits_ex(deark *c, struct de_dfilter_in_params *dcmpri,
355 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
356 struct de_packbits_params *pbparams)
358 de_dfilter_decompress_oneshot(c, dfilter_packbits_codec, (void*)pbparams,
359 dcmpri, dcmpro, dres);
362 // Returns 0 on failure (currently impossible).
363 int fmtutil_decompress_packbits(dbuf *f, i64 pos1, i64 len,
364 dbuf *unc_pixels, i64 *cmpr_bytes_consumed)
366 struct de_dfilter_results dres;
367 struct de_dfilter_in_params dcmpri;
368 struct de_dfilter_out_params dcmpro;
370 if(cmpr_bytes_consumed) *cmpr_bytes_consumed = 0;
371 de_dfilter_init_objects(f->c, &dcmpri, &dcmpro, &dres);
373 dcmpri.f = f;
374 dcmpri.pos = pos1;
375 dcmpri.len = len;
376 dcmpro.f = unc_pixels;
377 if(unc_pixels->has_len_limit) {
378 dcmpro.len_known = 1;
379 dcmpro.expected_len = unc_pixels->len_limit - unc_pixels->len;
382 de_dfilter_decompress_oneshot(f->c, dfilter_packbits_codec, NULL,
383 &dcmpri, &dcmpro, &dres);
385 if(cmpr_bytes_consumed && dres.bytes_consumed_valid) {
386 *cmpr_bytes_consumed = dres.bytes_consumed;
388 if(dres.errcode != 0) return 0;
389 return 1;
392 void fmtutil_decompress_rle90_ex(deark *c, struct de_dfilter_in_params *dcmpri,
393 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
394 unsigned int flags)
396 de_dfilter_decompress_oneshot(c, dfilter_rle90_codec, NULL,
397 dcmpri, dcmpro, dres);
400 struct rle90ctx {
401 i64 total_nbytes_processed;
402 i64 nbytes_written;
403 u8 last_output_byte;
404 int countcode_pending;
407 static void my_rle90_codec_addbuf(struct de_dfilter_ctx *dfctx,
408 const u8 *buf, i64 buf_len)
410 int i;
411 u8 b;
412 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
414 if(!rctx) return;
416 for(i=0; i<buf_len; i++) {
417 if(dfctx->dcmpro->len_known &&
418 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
420 dfctx->finished_flag = 1;
421 break;
424 b = buf[i];
425 rctx->total_nbytes_processed++;
427 if(rctx->countcode_pending && b==0) {
428 // Not RLE, just an escaped 0x90 byte.
429 dbuf_writebyte(dfctx->dcmpro->f, 0x90);
430 rctx->nbytes_written++;
431 rctx->last_output_byte = 0x90;
432 rctx->countcode_pending = 0;
434 else if(rctx->countcode_pending) {
435 i64 count;
437 // RLE. We already emitted one byte (because the byte to repeat
438 // comes before the repeat count), so write countcode-1 bytes.
439 count = (i64)(b-1);
440 if(dfctx->dcmpro->len_known &&
441 (rctx->nbytes_written+count > dfctx->dcmpro->expected_len))
443 count = dfctx->dcmpro->expected_len - rctx->nbytes_written;
445 dbuf_write_run(dfctx->dcmpro->f, rctx->last_output_byte, count);
446 rctx->nbytes_written += count;
448 rctx->countcode_pending = 0;
450 else if(b==0x90) {
451 rctx->countcode_pending = 1;
453 else {
454 dbuf_writebyte(dfctx->dcmpro->f, b);
455 rctx->nbytes_written++;
456 rctx->last_output_byte = b;
461 static void my_rle90_codec_finish(struct de_dfilter_ctx *dfctx)
463 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
465 if(!rctx) return;
466 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
467 dfctx->dres->bytes_consumed_valid = 1;
470 static void my_rle90_codec_destroy(struct de_dfilter_ctx *dfctx)
472 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
474 if(rctx) {
475 de_free(dfctx->c, rctx);
477 dfctx->codec_private = NULL;
480 // RLE algorithm occasionally called "RLE90". Variants of this are used by
481 // BinHex, ARC, StuffIt, and others.
482 // codec_private_params: Unused, must be NULL.
483 void dfilter_rle90_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
485 struct rle90ctx *rctx = NULL;
487 rctx = de_malloc(dfctx->c, sizeof(struct rle90ctx));
488 dfctx->codec_private = (void*)rctx;
489 dfctx->codec_addbuf_fn = my_rle90_codec_addbuf;
490 dfctx->codec_finish_fn = my_rle90_codec_finish;
491 dfctx->codec_destroy_fn = my_rle90_codec_destroy;
494 struct szdd_ctx {
495 i64 nbytes_written;
496 int stop_flag;
497 struct de_dfilter_out_params *dcmpro;
498 struct de_lz77buffer *ringbuf;
501 static void szdd_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
503 struct szdd_ctx *sctx = (struct szdd_ctx*)rb->userdata;
505 if(sctx->stop_flag) return;
506 if(sctx->dcmpro->len_known) {
507 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
508 sctx->stop_flag = 1;
509 return;
513 dbuf_writebyte(sctx->dcmpro->f, n);
514 sctx->nbytes_written++;
517 static void szdd_init_window_default(struct de_lz77buffer *ringbuf)
519 de_lz77buffer_clear(ringbuf, 0x20);
520 ringbuf->curpos = 4096 - 16;
523 static void szdd_init_window_lz5(struct de_lz77buffer *ringbuf)
525 size_t wpos;
526 int i;
528 de_zeromem(ringbuf->buf, 4096);
529 wpos = 13;
530 for(i=1; i<256; i++) {
531 de_memset(&ringbuf->buf[wpos], i, 13);
532 wpos += 13;
534 for(i=0; i<256; i++) {
535 ringbuf->buf[wpos++] = i;
537 for(i=255; i>=0; i--) {
538 ringbuf->buf[wpos++] = i;
540 wpos += 128;
541 de_memset(&ringbuf->buf[wpos], 0x20, 110);
542 wpos += 110;
543 ringbuf->curpos = (UI)wpos;
546 // Partially based on the libmspack's format documentation at
547 // <https://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html>
548 // flags:
549 // 0x1: LArc lz5 mode
550 void fmtutil_decompress_szdd(deark *c, struct de_dfilter_in_params *dcmpri,
551 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, unsigned int flags)
553 i64 pos = dcmpri->pos;
554 i64 endpos = dcmpri->pos + dcmpri->len;
555 struct szdd_ctx *sctx = NULL;
557 sctx = de_malloc(c, sizeof(struct szdd_ctx));
558 sctx->dcmpro = dcmpro;
559 sctx->ringbuf = de_lz77buffer_create(c, 4096);
560 sctx->ringbuf->writebyte_cb = szdd_lz77buf_writebytecb;
561 sctx->ringbuf->userdata = (void*)sctx;
563 if(flags & 0x1) {
564 szdd_init_window_lz5(sctx->ringbuf);
566 else {
567 szdd_init_window_default(sctx->ringbuf);
570 while(1) {
571 UI control;
572 UI cbit;
574 if(pos+1 > endpos) goto unc_done; // Out of input data
575 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
577 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
578 if(control & cbit) { // literal
579 u8 b;
581 if(pos+1 > endpos) goto unc_done;
582 b = dbuf_getbyte(dcmpri->f, pos++);
583 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
584 if(sctx->stop_flag) goto unc_done;
586 else { // match
587 UI x0, x1;
588 UI matchpos;
589 UI matchlen;
591 if(pos+2 > endpos) goto unc_done;
592 x0 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
593 x1 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
594 matchpos = ((x1 & 0xf0) << 4) | x0;
595 matchlen = (x1 & 0x0f) + 3;
596 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
597 if(sctx->stop_flag) goto unc_done;
602 unc_done:
603 dres->bytes_consumed_valid = 1;
604 dres->bytes_consumed = pos - dcmpri->pos;
605 if(sctx) {
606 de_lz77buffer_destroy(c, sctx->ringbuf);
607 de_free(c, sctx);
611 //======================= hlp_lz77 =======================
613 struct hlplz77ctx {
614 i64 nbytes_written;
615 int stop_flag;
616 struct de_dfilter_out_params *dcmpro;
617 struct de_lz77buffer *ringbuf;
620 static void hlplz77_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
622 struct hlplz77ctx *sctx = (struct hlplz77ctx*)rb->userdata;
624 if(sctx->stop_flag) return;
625 if(sctx->dcmpro->len_known) {
626 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
627 sctx->stop_flag = 1;
628 return;
632 dbuf_writebyte(sctx->dcmpro->f, n);
633 sctx->nbytes_written++;
636 // This is very similar to the mscompress SZDD algorithm, but
637 // gratuitously different.
638 void fmtutil_hlp_lz77_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
639 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
640 void *codec_private_params)
642 i64 pos = dcmpri->pos;
643 i64 endpos = dcmpri->pos + dcmpri->len;
644 struct hlplz77ctx *sctx = NULL;
646 sctx = de_malloc(c, sizeof(struct hlplz77ctx));
647 sctx->dcmpro = dcmpro;
648 sctx->ringbuf = de_lz77buffer_create(c, 4096);
649 sctx->ringbuf->writebyte_cb = hlplz77_lz77buf_writebytecb;
650 sctx->ringbuf->userdata = (void*)sctx;
651 de_lz77buffer_clear(sctx->ringbuf, 0x20);
653 while(1) {
654 UI control;
655 UI cbit;
657 if(pos+1 > endpos) goto unc_done; // Out of input data
658 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
660 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
661 if((control & cbit)==0) { // literal
662 u8 b;
664 if(pos+1 > endpos) goto unc_done;
665 b = dbuf_getbyte(dcmpri->f, pos++);
666 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
667 if(sctx->stop_flag) goto unc_done;
669 else { // match
670 UI x;
671 UI matchpos;
672 UI matchlen;
674 if(pos+2 > endpos) goto unc_done;
675 x = (UI)dbuf_getu16le_p(dcmpri->f, &pos);
676 matchlen = (x>>12) + 3;
677 matchpos = sctx->ringbuf->curpos - ((x & 0x0fff)+1);
678 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
679 if(sctx->stop_flag) goto unc_done;
684 unc_done:
685 dres->bytes_consumed_valid = 1;
686 dres->bytes_consumed = pos - dcmpri->pos;
687 if(sctx) {
688 de_lz77buffer_destroy(c, sctx->ringbuf);
689 de_free(c, sctx);
693 //========================================================
695 struct my_2layer_userdata {
696 struct de_dfilter_ctx *dfctx_codec2;
697 i64 intermediate_nbytes;
700 static void my_2layer_write_cb(dbuf *f, void *userdata,
701 const u8 *buf, i64 size)
703 struct my_2layer_userdata *u = (struct my_2layer_userdata*)userdata;
705 de_dfilter_addbuf(u->dfctx_codec2, buf, size);
706 u->intermediate_nbytes += size;
709 static void dres_transfer_error(deark *c, struct de_dfilter_results *src,
710 struct de_dfilter_results *dst)
712 if(src->errcode) {
713 dst->errcode = src->errcode;
714 de_strlcpy(dst->errmsg, src->errmsg, sizeof(dst->errmsg));
718 // Decompress an arbitrary two-layer compressed format.
719 // tlp->codec1* is the first one that will be used during decompression (i.e. the second
720 // method used when during *compression*).
721 void de_dfilter_decompress_two_layer(deark *c, struct de_dcmpr_two_layer_params *tlp)
723 dbuf *outf_codec1 = NULL;
724 struct de_dfilter_out_params dcmpro_codec1;
725 struct de_dfilter_results dres_codec2;
726 struct my_2layer_userdata u;
727 struct de_dfilter_ctx *dfctx_codec2 = NULL;
729 de_dfilter_init_objects(c, NULL, &dcmpro_codec1, NULL);
730 de_dfilter_init_objects(c, NULL, NULL, &dres_codec2);
731 de_zeromem(&u, sizeof(struct my_2layer_userdata));
733 // Make a custom dbuf. The output from the first decompressor will be written
734 // to it, and it will relay that output to the second decompressor.
735 outf_codec1 = dbuf_create_custom_dbuf(c, 0, 0);
736 outf_codec1->userdata_for_customwrite = (void*)&u;
737 outf_codec1->customwrite_fn = my_2layer_write_cb;
739 dcmpro_codec1.f = outf_codec1;
740 if(tlp->intermed_len_known) {
741 dcmpro_codec1.len_known = 1;
742 dcmpro_codec1.expected_len = tlp->intermed_expected_len;
744 else {
745 dcmpro_codec1.len_known = 0;
746 dcmpro_codec1.expected_len = 0;
749 dfctx_codec2 = de_dfilter_create(c, tlp->codec2, tlp->codec2_private_params, tlp->dcmpro, &dres_codec2);
750 u.dfctx_codec2 = dfctx_codec2;
752 // The first codec in the chain does not need the advanced (de_dfilter_create) API.
753 if(tlp->codec1_type1) {
754 tlp->codec1_type1(c, tlp->dcmpri, &dcmpro_codec1, tlp->dres, tlp->codec1_private_params);
756 else {
757 de_dfilter_decompress_oneshot(c, tlp->codec1_pushable, tlp->codec1_private_params,
758 tlp->dcmpri, &dcmpro_codec1, tlp->dres);
760 de_dfilter_finish(dfctx_codec2);
762 if(tlp->dres->errcode) goto done;
763 de_dbg2(c, "size after intermediate decompression: %"I64_FMT, u.intermediate_nbytes);
765 if(dres_codec2.errcode) {
766 // An error occurred in codec2, and not in codec1.
767 // Copy the error info to the dres that will be returned to the caller.
768 // TODO: Make a cleaner way to do this.
769 dres_transfer_error(c, &dres_codec2, tlp->dres);
770 goto done;
773 done:
774 de_dfilter_destroy(dfctx_codec2);
775 dbuf_close(outf_codec1);
778 // TODO: Retire this function.
779 void de_dfilter_decompress_two_layer_type2(deark *c,
780 dfilter_codec_type codec1, void *codec1_private_params,
781 dfilter_codec_type codec2, void *codec2_private_params,
782 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
783 struct de_dfilter_results *dres)
785 struct de_dcmpr_two_layer_params tlp;
787 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
788 tlp.codec1_pushable = codec1;
789 tlp.codec1_private_params = codec1_private_params;
790 tlp.codec2 = codec2;
791 tlp.codec2_private_params = codec2_private_params;
792 tlp.dcmpri = dcmpri;
793 tlp.dcmpro = dcmpro;
794 tlp.dres = dres;
795 de_dfilter_decompress_two_layer(c, &tlp);
798 struct de_lz77buffer *de_lz77buffer_create(deark *c, UI bufsize)
800 struct de_lz77buffer *rb;
802 rb = de_malloc(c, sizeof(struct de_lz77buffer));
803 rb->buf = de_malloc(c, (i64)bufsize);
804 rb->bufsize = bufsize;
805 rb->mask = bufsize - 1;
806 return rb;
809 void de_lz77buffer_destroy(deark *c, struct de_lz77buffer *rb)
811 if(!rb) return;
812 de_free(c, rb->buf);
813 de_free(c, rb);
816 // Set all bytes to the same value, and reset the current position to 0.
817 void de_lz77buffer_clear(struct de_lz77buffer *rb, UI val)
819 de_memset(rb->buf, val, rb->bufsize);
820 rb->curpos = 0;
823 void de_lz77buffer_set_curpos(struct de_lz77buffer *rb, UI newpos)
825 rb->curpos = newpos & rb->mask;
828 void de_lz77buffer_add_literal_byte(struct de_lz77buffer *rb, u8 b)
830 rb->writebyte_cb(rb, b);
831 rb->buf[rb->curpos] = b;
832 rb->curpos = (rb->curpos+1) & rb->mask;
835 void de_lz77buffer_copy_from_hist(struct de_lz77buffer *rb,
836 UI startpos, UI count)
838 UI frompos;
839 UI i;
841 frompos = startpos & rb->mask;
842 for(i=0; i<count; i++) {
843 de_lz77buffer_add_literal_byte(rb, rb->buf[frompos]);
844 frompos = (frompos+1) & rb->mask;
848 ///////////////////////////////////
849 // "Squeeze"-style Huffman decoder
851 // The first node you add allows for 2 symbols, and each additional node adds 1.
852 // So in general, you need one less node than the number of symbols.
853 // The max number of symbols is 257: 256 byte values, plus a special "stop" code.
854 #define SQUEEZE_MAX_NODES 256
856 struct squeeze_data_item {
857 i16 dval;
860 struct squeeze_node {
861 u8 in_use;
862 struct squeeze_data_item child[2];
865 struct squeeze_ctx {
866 deark *c;
867 struct de_dfilter_in_params *dcmpri;
868 struct de_dfilter_out_params *dcmpro;
869 struct de_dfilter_results *dres;
870 const char *modname;
871 i64 nbytes_written;
872 i64 nodecount;
873 struct fmtutil_huffman_tree *ht;
874 struct de_bitreader bitrd;
875 struct squeeze_node tmpnodes[SQUEEZE_MAX_NODES]; // Temporary use when decoding the node table
878 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
879 i64 nodenum, u64 currcode, UI currcode_nbits);
881 static void squeeze_interpret_dval(struct squeeze_ctx *sqctx,
882 i16 dval, u64 currcode, UI currcode_nbits)
884 char b2buf[72];
886 if(dval>=0) { // a pointer to a node
887 if((i64)dval < sqctx->nodecount) {
888 squeeze_interpret_node(sqctx, (i64)dval, currcode, currcode_nbits);
891 else if(dval>=(-257) && dval<=(-1)) {
892 fmtutil_huffman_valtype adj_value;
894 // -257 => 256 (stop code)
895 // -256 => 255 (byte value)
896 // -255 => 254 (byte value)
897 // ...
898 // -1 => 0 (byte value)
899 adj_value = -(((fmtutil_huffman_valtype)dval)+1);
900 if(sqctx->c->debug_level>=2) {
901 de_dbg3(sqctx->c, "code: \"%s\" = %d",
902 de_print_base2_fixed(b2buf, sizeof(b2buf), currcode, currcode_nbits),
903 (int)adj_value);
905 fmtutil_huffman_add_code(sqctx->c, sqctx->ht, currcode, currcode_nbits, adj_value);
907 // TODO: Report errors?
910 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
911 i64 nodenum, u64 currcode, UI currcode_nbits)
913 // TODO: Report errors?
914 if(nodenum<0 || nodenum>=sqctx->nodecount) return;
915 if(sqctx->tmpnodes[nodenum].in_use) return; // Loops are bad
916 if(currcode_nbits>=FMTUTIL_HUFFMAN_MAX_CODE_LENGTH) return;
918 sqctx->tmpnodes[nodenum].in_use = 1;
919 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[0].dval, currcode<<1, currcode_nbits+1);
920 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[1].dval, ((currcode<<1) | 1), currcode_nbits+1);
921 sqctx->tmpnodes[nodenum].in_use = 0;
924 static int squeeze_process_nodetable(deark *c, struct squeeze_ctx *sqctx)
926 int retval = 0;
928 // It feels a little wrong to go to the trouble of decoding this node table into
929 // the form required by our Huffman library's API, when we know it's going to
930 // just convert it back into a table much like it was originally. Maybe there
931 // should be a better way to do this.
932 de_dbg3(c, "interpreted huffman codebook:");
933 de_dbg_indent(c, 1);
934 squeeze_interpret_node(sqctx, 0, 0, 0);
935 de_dbg_indent(c, -1);
937 if(c->debug_level>=4) {
938 fmtutil_huffman_dump(c, sqctx->ht);
941 retval = 1;
942 return retval;
945 static int squeeze_read_nodetable(deark *c, struct squeeze_ctx *sqctx)
947 i64 k;
948 int retval = 0;
950 if(sqctx->bitrd.curpos+2 > sqctx->bitrd.endpos) goto done;
951 sqctx->nodecount = dbuf_getu16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
952 de_dbg(c, "node count: %d", (int)sqctx->nodecount);
953 if(sqctx->nodecount > SQUEEZE_MAX_NODES) {
954 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname,
955 "Invalid node count");
956 goto done;
959 de_dbg2(c, "node table nodes at %"I64_FMT, sqctx->bitrd.curpos);
960 de_dbg_indent(c, 1);
961 for(k=0; k<sqctx->nodecount; k++) {
962 sqctx->tmpnodes[k].child[0].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
963 sqctx->tmpnodes[k].child[1].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
964 if(c->debug_level >= 2) {
965 de_dbg2(c, "nodetable[%d]: %d %d", (int)k, (int)sqctx->tmpnodes[k].child[0].dval,
966 (int)sqctx->tmpnodes[k].child[1].dval);
969 de_dbg_indent(c, -1);
970 if(sqctx->bitrd.curpos > sqctx->bitrd.endpos) goto done;
972 if(!squeeze_process_nodetable(c, sqctx)) goto done;
974 retval = 1;
975 done:
976 return retval;
979 static int squeeze_read_codes(deark *c, struct squeeze_ctx *sqctx)
981 int retval = 0;
983 de_dbg(c, "huffman-compressed data at %"I64_FMT, sqctx->bitrd.curpos);
984 sqctx->bitrd.bbll.is_lsb = 1;
985 de_bitbuf_lowelevel_empty(&sqctx->bitrd.bbll);
987 if(fmtutil_huffman_get_max_bits(sqctx->ht) < 1) {
988 // Empty tree? Assume this is an empty file.
989 retval = 1;
990 goto done;
993 while(1) {
994 int ret;
995 fmtutil_huffman_valtype val = 0;
997 ret = fmtutil_huffman_read_next_value(sqctx->ht, &sqctx->bitrd, &val, NULL);
998 if(!ret || val<0 || val>256) {
999 if(sqctx->bitrd.eof_flag) {
1000 retval = 1;
1002 else {
1003 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname, "Huffman decode error");
1005 goto done;
1008 if(val>=0 && val<=255) {
1009 dbuf_writebyte(sqctx->dcmpro->f, (u8)val);
1010 sqctx->nbytes_written++;
1011 if(sqctx->dcmpro->len_known && (sqctx->nbytes_written >= sqctx->dcmpro->expected_len)) {
1012 retval = 1;
1013 goto done;
1016 else if(val==256) { // STOP code
1017 retval = 1;
1018 goto done;
1022 done:
1023 return retval;
1026 void fmtutil_huff_squeeze_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
1027 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
1028 void *codec_private_params)
1030 struct squeeze_ctx *sqctx = NULL;
1031 int ok = 0;
1033 sqctx = de_malloc(c, sizeof(struct squeeze_ctx));
1034 sqctx->c = c;
1035 sqctx->modname = "unsqueeze";
1036 sqctx->dcmpri = dcmpri;
1037 sqctx->dcmpro = dcmpro;
1038 sqctx->dres = dres;
1040 sqctx->bitrd.f = dcmpri->f;
1041 sqctx->bitrd.curpos = dcmpri->pos;
1042 sqctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
1044 sqctx->ht = fmtutil_huffman_create_tree(c, 257, 257);
1046 if(!squeeze_read_nodetable(c, sqctx)) goto done;
1047 if(!squeeze_read_codes(c, sqctx)) goto done;
1049 dres->bytes_consumed = sqctx->bitrd.curpos - dcmpri->pos;
1050 if(dres->bytes_consumed > dcmpri->len) {
1051 dres->bytes_consumed = dcmpri->len;
1053 dres->bytes_consumed_valid = 1;
1054 ok = 1;
1056 done:
1057 if(!ok || dres->errcode) {
1058 de_dfilter_set_errorf(c, dres, sqctx->modname, "Squeeze decompression failed");
1061 if(sqctx) {
1062 fmtutil_huffman_destroy_tree(c, sqctx->ht);
1063 de_free(c, sqctx);