Edited some module descriptions
[deark.git] / src / fmtutil-cmpr.c
blob5d880c2a92d5e0175b62190da3fc650ef5106956
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompression, etc.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // Returns a message that is valid until the next operation on dres.
13 const char *de_dfilter_get_errmsg(deark *c, struct de_dfilter_results *dres)
15 if(dres->errcode==0) {
16 return "No error";
18 if(dres->errmsg[0]) {
19 return dres->errmsg;
21 return "Unspecified error";
24 // Initialize or reset a dfilter results struct
25 void de_dfilter_results_clear(deark *c, struct de_dfilter_results *dres)
27 dres->errcode = 0;
28 dres->bytes_consumed_valid = 0;
29 dres->bytes_consumed = 0;
30 dres->errmsg[0] = '\0';
33 // Note: It is also okay to init these objects by zeroing out their bytes.
34 void de_dfilter_init_objects(deark *c, struct de_dfilter_in_params *dcmpri,
35 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
37 if(dcmpri)
38 de_zeromem(dcmpri, sizeof(struct de_dfilter_in_params));
39 if(dcmpro)
40 de_zeromem(dcmpro, sizeof(struct de_dfilter_out_params));
41 if(dres)
42 de_dfilter_results_clear(c, dres);
45 void de_dfilter_set_errorf(deark *c, struct de_dfilter_results *dres, const char *modname,
46 const char *fmt, ...)
48 va_list ap;
50 if(dres->errcode != 0) return; // Only record the first error
51 dres->errcode = 1;
53 va_start(ap, fmt);
54 if(modname) {
55 char tmpbuf[80];
57 de_vsnprintf(tmpbuf, sizeof(tmpbuf), fmt, ap);
58 de_snprintf(dres->errmsg, sizeof(dres->errmsg), "[%s] %s", modname, tmpbuf);
60 else {
61 de_vsnprintf(dres->errmsg, sizeof(dres->errmsg), fmt, ap);
63 va_end(ap);
66 void de_dfilter_set_generic_error(deark *c, struct de_dfilter_results *dres, const char *modname)
68 if(dres->errcode != 0) return;
69 de_dfilter_set_errorf(c, dres, modname, "Unspecified error");
72 // This is a decompression API that uses a "push" input model. The client
73 // sends data to the codec as the data becomes available.
74 // (The client must still be able to consume any amount of output data
75 // immediately.)
76 // This model makes it easier to chain multiple codecs together, and to handle
77 // input data that is not contiguous.
78 // TODO: There's no reason this couldn't be extended to work with "type1" codecs.
80 struct de_dfilter_ctx *de_dfilter_create(deark *c,
81 dfilter_codec_type codec_init_fn, void *codec_private_params,
82 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
84 struct de_dfilter_ctx *dfctx = NULL;
86 dfctx = de_malloc(c, sizeof(struct de_dfilter_ctx));
87 dfctx->c = c;
88 dfctx->dres = dres;
89 dfctx->dcmpro = dcmpro;
91 if(codec_init_fn) {
92 codec_init_fn(dfctx, codec_private_params);
94 // TODO: How should we handle failure to initialize a codec?
96 return dfctx;
99 void de_dfilter_addbuf(struct de_dfilter_ctx *dfctx,
100 const u8 *buf, i64 buf_len)
102 if(dfctx->codec_addbuf_fn && (buf_len>0)) {
103 dfctx->codec_addbuf_fn(dfctx, buf, buf_len);
107 // Commands: (Commands are not supported by all codecs)
108 // DE_DFILTER_COMMAND_SOFTRESET
109 // Reset the decompressor state. Exact function depends on the codec.
111 // DE_DFILTER_COMMAND_REINITIALIZE
112 // Reinitialize a codec, so you don't have to destroy and recreate it in
113 // in order to use it again. Typically used after _finish().
114 // Before using this command, it is okay to change the internal paramters of
115 // the dcmpro and dres given to de_dfilter_create(). You should call
116 // de_dfilter_results_clear or the equivalent if you have already handled
117 // previous errors.
118 void de_dfilter_command(struct de_dfilter_ctx *dfctx, int cmd, UI flags)
120 // Non-codec-specific things:
122 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
123 dfctx->finished_flag = 0;
124 dfctx->dres->bytes_consumed_valid = 0;
127 // Codec-specific things:
129 if(dfctx->codec_command_fn) {
130 dfctx->codec_command_fn(dfctx, cmd);
134 // Call this to inform the codec that there are no more compressed bytes.
135 // The codec's 'finish' function should flush any pending output,
136 // and update the decompression results in dfctx->dres.
137 // Some codecs can still be used after this, provided you then call
138 // de_dfilter_command(...,DE_DFILTER_COMMAND_REINITIALIZE).
139 void de_dfilter_finish(struct de_dfilter_ctx *dfctx)
141 if(dfctx->codec_finish_fn) {
142 dfctx->codec_finish_fn(dfctx);
146 void de_dfilter_destroy(struct de_dfilter_ctx *dfctx)
148 deark *c;
150 if(!dfctx) return;
151 c = dfctx->c;
152 if(dfctx->codec_destroy_fn) {
153 dfctx->codec_destroy_fn(dfctx);
156 de_free(c, dfctx);
159 static int my_dfilter_addslice_buffered_read_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
160 i64 buf_len)
162 struct de_dfilter_ctx *dfctx = (struct de_dfilter_ctx *)brctx->userdata;
164 de_dfilter_addbuf(dfctx, buf, buf_len);
165 if(dfctx->finished_flag) return 0;
166 return 1;
169 void de_dfilter_addslice(struct de_dfilter_ctx *dfctx,
170 dbuf *inf, i64 pos, i64 len)
172 if(dfctx->finished_flag) return;
173 dbuf_buffered_read(inf, pos, len,
174 my_dfilter_addslice_buffered_read_cbfn, (void*)dfctx);
177 // Use a "pushable" codec in a non-pushable way.
178 void de_dfilter_decompress_oneshot(deark *c,
179 dfilter_codec_type codec_init_fn, void *codec_private_params,
180 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
181 struct de_dfilter_results *dres)
183 struct de_dfilter_ctx *dfctx = NULL;
185 dfctx = de_dfilter_create(c, codec_init_fn, codec_private_params,
186 dcmpro, dres);
187 de_dfilter_addslice(dfctx, dcmpri->f, dcmpri->pos, dcmpri->len);
188 de_dfilter_finish(dfctx);
189 de_dfilter_destroy(dfctx);
192 // Trivial "decompression" of uncompressed data.
193 void fmtutil_decompress_uncompressed(deark *c, struct de_dfilter_in_params *dcmpri,
194 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, UI flags)
196 i64 len;
197 i64 nbytes_avail;
199 nbytes_avail = de_min_int(dcmpri->len, dcmpri->f->len - dcmpri->pos);
201 if(dcmpro->len_known) {
202 len = dcmpro->expected_len;
204 else {
205 len = dcmpri->len;
208 if(len>nbytes_avail) len = nbytes_avail;
209 if(len<0) len = 0;
211 dbuf_copy(dcmpri->f, dcmpri->pos, len, dcmpro->f);
212 dres->bytes_consumed = len;
213 dres->bytes_consumed_valid = 1;
216 enum packbits_state_enum {
217 PACKBITS_STATE_NEUTRAL = 0,
218 PACKBITS_STATE_COPYING_LITERAL,
219 PACKBITS_STATE_READING_UNIT_TO_REPEAT
222 struct packbitsctx {
223 size_t nbytes_per_unit;
224 size_t nbytes_in_unitbuf;
225 u8 unitbuf[2];
226 i64 total_nbytes_processed;
227 i64 nbytes_written;
228 enum packbits_state_enum state;
229 i64 nliteral_bytes_remaining;
230 i64 repeat_count;
233 static void my_packbits_codec_addbuf(struct de_dfilter_ctx *dfctx,
234 const u8 *buf, i64 buf_len)
236 int i;
237 u8 b;
238 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
240 if(!rctx) return;
242 for(i=0; i<buf_len; i++) {
243 if(dfctx->dcmpro->len_known &&
244 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
246 dfctx->finished_flag = 1;
247 break;
250 b = buf[i];
251 rctx->total_nbytes_processed++;
253 switch(rctx->state) {
254 case PACKBITS_STATE_NEUTRAL: // this is a code byte
255 if(b>128) { // A compressed run
256 rctx->repeat_count = 257 - (i64)b;
257 rctx->state = PACKBITS_STATE_READING_UNIT_TO_REPEAT;
259 else if(b<128) { // An uncompressed run
260 rctx->nliteral_bytes_remaining = (1 + (i64)b) * (i64)rctx->nbytes_per_unit;
261 rctx->state = PACKBITS_STATE_COPYING_LITERAL;
263 // Else b==128. No-op.
264 // TODO: Some (but not most) ILBM specs say that code 128 is used to
265 // mark the end of compressed data, so maybe there should be options to
266 // tell us what to do when code 128 is encountered.
267 break;
268 case PACKBITS_STATE_COPYING_LITERAL: // This byte is uncompressed
269 dbuf_writebyte(dfctx->dcmpro->f, b);
270 rctx->nbytes_written++;
271 rctx->nliteral_bytes_remaining--;
272 if(rctx->nliteral_bytes_remaining<=0) {
273 rctx->state = PACKBITS_STATE_NEUTRAL;
275 break;
276 case PACKBITS_STATE_READING_UNIT_TO_REPEAT:
277 if(rctx->nbytes_per_unit==1) { // Optimization for standard PackBits
278 dbuf_write_run(dfctx->dcmpro->f, b, rctx->repeat_count);
279 rctx->nbytes_written += rctx->repeat_count;
280 rctx->state = PACKBITS_STATE_NEUTRAL;
282 else {
283 rctx->unitbuf[rctx->nbytes_in_unitbuf++] = b;
284 if(rctx->nbytes_in_unitbuf >= rctx->nbytes_per_unit) {
285 i64 k;
287 for(k=0; k<rctx->repeat_count; k++) {
288 dbuf_write(dfctx->dcmpro->f, rctx->unitbuf, (i64)rctx->nbytes_per_unit);
290 rctx->nbytes_in_unitbuf = 0;
291 rctx->nbytes_written += rctx->repeat_count * (i64)rctx->nbytes_per_unit;
292 rctx->state = PACKBITS_STATE_NEUTRAL;
295 break;
300 static void my_packbits_codec_command(struct de_dfilter_ctx *dfctx, int cmd)
302 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
304 if(cmd==DE_DFILTER_COMMAND_SOFTRESET || cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
305 // "soft reset" - reset the low-level compression state, but don't update
306 // dres, or the total-bytes counters, etc.
307 rctx->state = PACKBITS_STATE_NEUTRAL;
308 rctx->nbytes_in_unitbuf = 0;
309 rctx->nliteral_bytes_remaining = 0;
310 rctx->repeat_count = 0;
312 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
313 rctx->total_nbytes_processed = 0;
314 rctx->nbytes_written = 0;
318 static void my_packbits_codec_finish(struct de_dfilter_ctx *dfctx)
320 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
322 if(!rctx) return;
323 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
324 dfctx->dres->bytes_consumed_valid = 1;
327 static void my_packbits_codec_destroy(struct de_dfilter_ctx *dfctx)
329 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
331 if(rctx) {
332 de_free(dfctx->c, rctx);
334 dfctx->codec_private = NULL;
337 // codec_private_params: de_packbits_params, or NULL for default params.
338 void dfilter_packbits_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
340 struct packbitsctx *rctx = NULL;
341 struct de_packbits_params *pbparams = (struct de_packbits_params*)codec_private_params;
343 rctx = de_malloc(dfctx->c, sizeof(struct packbitsctx));
344 rctx->nbytes_per_unit = 1;
345 if(pbparams && pbparams->is_packbits16) {
346 rctx->nbytes_per_unit = 2;
348 dfctx->codec_private = (void*)rctx;
349 dfctx->codec_addbuf_fn = my_packbits_codec_addbuf;
350 dfctx->codec_finish_fn = my_packbits_codec_finish;
351 dfctx->codec_command_fn = my_packbits_codec_command;
352 dfctx->codec_destroy_fn = my_packbits_codec_destroy;
355 void fmtutil_decompress_packbits_ex(deark *c, struct de_dfilter_in_params *dcmpri,
356 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
357 struct de_packbits_params *pbparams)
359 de_dfilter_decompress_oneshot(c, dfilter_packbits_codec, (void*)pbparams,
360 dcmpri, dcmpro, dres);
363 // Returns 0 on failure (currently impossible).
364 int fmtutil_decompress_packbits(dbuf *f, i64 pos1, i64 len,
365 dbuf *unc_pixels, i64 *cmpr_bytes_consumed)
367 struct de_dfilter_results dres;
368 struct de_dfilter_in_params dcmpri;
369 struct de_dfilter_out_params dcmpro;
371 if(cmpr_bytes_consumed) *cmpr_bytes_consumed = 0;
372 de_dfilter_init_objects(f->c, &dcmpri, &dcmpro, &dres);
374 dcmpri.f = f;
375 dcmpri.pos = pos1;
376 dcmpri.len = len;
377 dcmpro.f = unc_pixels;
378 if(unc_pixels->has_len_limit) {
379 dcmpro.len_known = 1;
380 dcmpro.expected_len = unc_pixels->len_limit - unc_pixels->len;
383 de_dfilter_decompress_oneshot(f->c, dfilter_packbits_codec, NULL,
384 &dcmpri, &dcmpro, &dres);
386 if(cmpr_bytes_consumed && dres.bytes_consumed_valid) {
387 *cmpr_bytes_consumed = dres.bytes_consumed;
389 if(dres.errcode != 0) return 0;
390 return 1;
393 void fmtutil_decompress_rle90_ex(deark *c, struct de_dfilter_in_params *dcmpri,
394 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
395 unsigned int flags)
397 de_dfilter_decompress_oneshot(c, dfilter_rle90_codec, NULL,
398 dcmpri, dcmpro, dres);
401 struct rle90ctx {
402 i64 total_nbytes_processed;
403 i64 nbytes_written;
404 u8 last_output_byte;
405 int countcode_pending;
408 static void my_rle90_codec_addbuf(struct de_dfilter_ctx *dfctx,
409 const u8 *buf, i64 buf_len)
411 int i;
412 u8 b;
413 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
415 if(!rctx) return;
417 for(i=0; i<buf_len; i++) {
418 if(dfctx->dcmpro->len_known &&
419 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
421 dfctx->finished_flag = 1;
422 break;
425 b = buf[i];
426 rctx->total_nbytes_processed++;
428 if(rctx->countcode_pending && b==0) {
429 // Not RLE, just an escaped 0x90 byte.
430 dbuf_writebyte(dfctx->dcmpro->f, 0x90);
431 rctx->nbytes_written++;
432 rctx->last_output_byte = 0x90;
433 rctx->countcode_pending = 0;
435 else if(rctx->countcode_pending) {
436 i64 count;
438 // RLE. We already emitted one byte (because the byte to repeat
439 // comes before the repeat count), so write countcode-1 bytes.
440 count = (i64)(b-1);
441 if(dfctx->dcmpro->len_known &&
442 (rctx->nbytes_written+count > dfctx->dcmpro->expected_len))
444 count = dfctx->dcmpro->expected_len - rctx->nbytes_written;
446 dbuf_write_run(dfctx->dcmpro->f, rctx->last_output_byte, count);
447 rctx->nbytes_written += count;
449 rctx->countcode_pending = 0;
451 else if(b==0x90) {
452 rctx->countcode_pending = 1;
454 else {
455 dbuf_writebyte(dfctx->dcmpro->f, b);
456 rctx->nbytes_written++;
457 rctx->last_output_byte = b;
462 static void my_rle90_codec_finish(struct de_dfilter_ctx *dfctx)
464 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
466 if(!rctx) return;
467 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
468 dfctx->dres->bytes_consumed_valid = 1;
471 static void my_rle90_codec_destroy(struct de_dfilter_ctx *dfctx)
473 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
475 if(rctx) {
476 de_free(dfctx->c, rctx);
478 dfctx->codec_private = NULL;
481 // RLE algorithm occasionally called "RLE90". Variants of this are used by
482 // BinHex, ARC, StuffIt, and others.
483 // codec_private_params: Unused, must be NULL.
484 void dfilter_rle90_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
486 struct rle90ctx *rctx = NULL;
488 rctx = de_malloc(dfctx->c, sizeof(struct rle90ctx));
489 dfctx->codec_private = (void*)rctx;
490 dfctx->codec_addbuf_fn = my_rle90_codec_addbuf;
491 dfctx->codec_finish_fn = my_rle90_codec_finish;
492 dfctx->codec_destroy_fn = my_rle90_codec_destroy;
495 struct szdd_ctx {
496 i64 nbytes_written;
497 int stop_flag;
498 struct de_dfilter_out_params *dcmpro;
499 struct de_lz77buffer *ringbuf;
502 static void szdd_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
504 struct szdd_ctx *sctx = (struct szdd_ctx*)rb->userdata;
506 if(sctx->stop_flag) return;
507 if(sctx->dcmpro->len_known) {
508 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
509 sctx->stop_flag = 1;
510 return;
514 dbuf_writebyte(sctx->dcmpro->f, n);
515 sctx->nbytes_written++;
518 static void szdd_init_window_default(struct de_lz77buffer *ringbuf)
520 de_lz77buffer_clear(ringbuf, 0x20);
521 ringbuf->curpos = 4096 - 16;
524 static void szdd_init_window_lz5(struct de_lz77buffer *ringbuf)
526 size_t wpos;
527 int i;
529 de_zeromem(ringbuf->buf, 4096);
530 wpos = 13;
531 for(i=1; i<256; i++) {
532 de_memset(&ringbuf->buf[wpos], i, 13);
533 wpos += 13;
535 for(i=0; i<256; i++) {
536 ringbuf->buf[wpos++] = i;
538 for(i=255; i>=0; i--) {
539 ringbuf->buf[wpos++] = i;
541 wpos += 128;
542 de_memset(&ringbuf->buf[wpos], 0x20, 110);
543 wpos += 110;
544 ringbuf->curpos = (UI)wpos;
547 // Partially based on the libmspack's format documentation at
548 // <https://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html>
549 // flags:
550 // 0x1: LArc lz5 mode
551 void fmtutil_decompress_szdd(deark *c, struct de_dfilter_in_params *dcmpri,
552 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, unsigned int flags)
554 i64 pos = dcmpri->pos;
555 i64 endpos = dcmpri->pos + dcmpri->len;
556 struct szdd_ctx *sctx = NULL;
558 sctx = de_malloc(c, sizeof(struct szdd_ctx));
559 sctx->dcmpro = dcmpro;
560 sctx->ringbuf = de_lz77buffer_create(c, 4096);
561 sctx->ringbuf->writebyte_cb = szdd_lz77buf_writebytecb;
562 sctx->ringbuf->userdata = (void*)sctx;
564 if(flags & 0x1) {
565 szdd_init_window_lz5(sctx->ringbuf);
567 else {
568 szdd_init_window_default(sctx->ringbuf);
571 while(1) {
572 UI control;
573 UI cbit;
575 if(pos+1 > endpos) goto unc_done; // Out of input data
576 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
578 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
579 if(control & cbit) { // literal
580 u8 b;
582 if(pos+1 > endpos) goto unc_done;
583 b = dbuf_getbyte(dcmpri->f, pos++);
584 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
585 if(sctx->stop_flag) goto unc_done;
587 else { // match
588 UI x0, x1;
589 UI matchpos;
590 UI matchlen;
592 if(pos+2 > endpos) goto unc_done;
593 x0 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
594 x1 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
595 matchpos = ((x1 & 0xf0) << 4) | x0;
596 matchlen = (x1 & 0x0f) + 3;
597 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
598 if(sctx->stop_flag) goto unc_done;
603 unc_done:
604 dres->bytes_consumed_valid = 1;
605 dres->bytes_consumed = pos - dcmpri->pos;
606 if(sctx) {
607 de_lz77buffer_destroy(c, sctx->ringbuf);
608 de_free(c, sctx);
612 //======================= hlp_lz77 =======================
614 struct hlplz77ctx {
615 i64 nbytes_written;
616 int stop_flag;
617 struct de_dfilter_out_params *dcmpro;
618 struct de_lz77buffer *ringbuf;
621 static void hlplz77_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
623 struct hlplz77ctx *sctx = (struct hlplz77ctx*)rb->userdata;
625 if(sctx->stop_flag) return;
626 if(sctx->dcmpro->len_known) {
627 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
628 sctx->stop_flag = 1;
629 return;
633 dbuf_writebyte(sctx->dcmpro->f, n);
634 sctx->nbytes_written++;
637 // This is very similar to the mscompress SZDD algorithm, but
638 // gratuitously different.
639 void fmtutil_hlp_lz77_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
640 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
641 void *codec_private_params)
643 i64 pos = dcmpri->pos;
644 i64 endpos = dcmpri->pos + dcmpri->len;
645 struct hlplz77ctx *sctx = NULL;
647 sctx = de_malloc(c, sizeof(struct hlplz77ctx));
648 sctx->dcmpro = dcmpro;
649 sctx->ringbuf = de_lz77buffer_create(c, 4096);
650 sctx->ringbuf->writebyte_cb = hlplz77_lz77buf_writebytecb;
651 sctx->ringbuf->userdata = (void*)sctx;
652 de_lz77buffer_clear(sctx->ringbuf, 0x20);
654 while(1) {
655 UI control;
656 UI cbit;
658 if(pos+1 > endpos) goto unc_done; // Out of input data
659 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
661 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
662 if((control & cbit)==0) { // literal
663 u8 b;
665 if(pos+1 > endpos) goto unc_done;
666 b = dbuf_getbyte(dcmpri->f, pos++);
667 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
668 if(sctx->stop_flag) goto unc_done;
670 else { // match
671 UI x;
672 UI matchpos;
673 UI matchlen;
675 if(pos+2 > endpos) goto unc_done;
676 x = (UI)dbuf_getu16le_p(dcmpri->f, &pos);
677 matchlen = (x>>12) + 3;
678 matchpos = sctx->ringbuf->curpos - ((x & 0x0fff)+1);
679 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
680 if(sctx->stop_flag) goto unc_done;
685 unc_done:
686 dres->bytes_consumed_valid = 1;
687 dres->bytes_consumed = pos - dcmpri->pos;
688 if(sctx) {
689 de_lz77buffer_destroy(c, sctx->ringbuf);
690 de_free(c, sctx);
694 //========================================================
696 struct my_2layer_userdata {
697 struct de_dfilter_ctx *dfctx_codec2;
698 i64 intermediate_nbytes;
701 static void my_2layer_write_cb(dbuf *f, void *userdata,
702 const u8 *buf, i64 size)
704 struct my_2layer_userdata *u = (struct my_2layer_userdata*)userdata;
706 de_dfilter_addbuf(u->dfctx_codec2, buf, size);
707 u->intermediate_nbytes += size;
710 static void dres_transfer_error(deark *c, struct de_dfilter_results *src,
711 struct de_dfilter_results *dst)
713 if(src->errcode) {
714 dst->errcode = src->errcode;
715 de_strlcpy(dst->errmsg, src->errmsg, sizeof(dst->errmsg));
719 // Decompress an arbitrary two-layer compressed format.
720 // tlp->codec1* is the first one that will be used during decompression (i.e. the second
721 // method used when during *compression*).
722 void de_dfilter_decompress_two_layer(deark *c, struct de_dcmpr_two_layer_params *tlp)
724 dbuf *outf_codec1 = NULL;
725 struct de_dfilter_out_params dcmpro_codec1;
726 struct de_dfilter_results dres_codec2;
727 struct my_2layer_userdata u;
728 struct de_dfilter_ctx *dfctx_codec2 = NULL;
730 de_dfilter_init_objects(c, NULL, &dcmpro_codec1, NULL);
731 de_dfilter_init_objects(c, NULL, NULL, &dres_codec2);
732 de_zeromem(&u, sizeof(struct my_2layer_userdata));
734 // Make a custom dbuf. The output from the first decompressor will be written
735 // to it, and it will relay that output to the second decompressor.
736 outf_codec1 = dbuf_create_custom_dbuf(c, 0, 0);
737 outf_codec1->userdata_for_customwrite = (void*)&u;
738 outf_codec1->customwrite_fn = my_2layer_write_cb;
740 dcmpro_codec1.f = outf_codec1;
741 if(tlp->intermed_len_known) {
742 dcmpro_codec1.len_known = 1;
743 dcmpro_codec1.expected_len = tlp->intermed_expected_len;
745 else {
746 dcmpro_codec1.len_known = 0;
747 dcmpro_codec1.expected_len = 0;
750 dfctx_codec2 = de_dfilter_create(c, tlp->codec2, tlp->codec2_private_params, tlp->dcmpro, &dres_codec2);
751 u.dfctx_codec2 = dfctx_codec2;
753 // The first codec in the chain does not need the advanced (de_dfilter_create) API.
754 if(tlp->codec1_type1) {
755 tlp->codec1_type1(c, tlp->dcmpri, &dcmpro_codec1, tlp->dres, tlp->codec1_private_params);
757 else {
758 de_dfilter_decompress_oneshot(c, tlp->codec1_pushable, tlp->codec1_private_params,
759 tlp->dcmpri, &dcmpro_codec1, tlp->dres);
761 de_dfilter_finish(dfctx_codec2);
763 if(tlp->dres->errcode) goto done;
764 de_dbg2(c, "size after intermediate decompression: %"I64_FMT, u.intermediate_nbytes);
766 if(dres_codec2.errcode) {
767 // An error occurred in codec2, and not in codec1.
768 // Copy the error info to the dres that will be returned to the caller.
769 // TODO: Make a cleaner way to do this.
770 dres_transfer_error(c, &dres_codec2, tlp->dres);
771 goto done;
774 done:
775 de_dfilter_destroy(dfctx_codec2);
776 dbuf_close(outf_codec1);
779 // TODO: Retire this function.
780 void de_dfilter_decompress_two_layer_type2(deark *c,
781 dfilter_codec_type codec1, void *codec1_private_params,
782 dfilter_codec_type codec2, void *codec2_private_params,
783 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
784 struct de_dfilter_results *dres)
786 struct de_dcmpr_two_layer_params tlp;
788 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
789 tlp.codec1_pushable = codec1;
790 tlp.codec1_private_params = codec1_private_params;
791 tlp.codec2 = codec2;
792 tlp.codec2_private_params = codec2_private_params;
793 tlp.dcmpri = dcmpri;
794 tlp.dcmpro = dcmpro;
795 tlp.dres = dres;
796 de_dfilter_decompress_two_layer(c, &tlp);
799 struct de_lz77buffer *de_lz77buffer_create(deark *c, UI bufsize)
801 struct de_lz77buffer *rb;
803 rb = de_malloc(c, sizeof(struct de_lz77buffer));
804 rb->buf = de_malloc(c, (i64)bufsize);
805 rb->bufsize = bufsize;
806 rb->mask = bufsize - 1;
807 return rb;
810 void de_lz77buffer_destroy(deark *c, struct de_lz77buffer *rb)
812 if(!rb) return;
813 de_free(c, rb->buf);
814 de_free(c, rb);
817 // Set all bytes to the same value, and reset the current position to 0.
818 void de_lz77buffer_clear(struct de_lz77buffer *rb, UI val)
820 de_memset(rb->buf, val, rb->bufsize);
821 rb->curpos = 0;
824 void de_lz77buffer_set_curpos(struct de_lz77buffer *rb, UI newpos)
826 rb->curpos = newpos & rb->mask;
829 void de_lz77buffer_add_literal_byte(struct de_lz77buffer *rb, u8 b)
831 rb->writebyte_cb(rb, b);
832 rb->buf[rb->curpos] = b;
833 rb->curpos = (rb->curpos+1) & rb->mask;
836 void de_lz77buffer_copy_from_hist(struct de_lz77buffer *rb,
837 UI startpos, UI count)
839 UI frompos;
840 UI i;
842 frompos = startpos & rb->mask;
843 for(i=0; i<count; i++) {
844 de_lz77buffer_add_literal_byte(rb, rb->buf[frompos]);
845 frompos = (frompos+1) & rb->mask;
849 ///////////////////////////////////
850 // "Squeeze"-style Huffman decoder
852 // The first node you add allows for 2 symbols, and each additional node adds 1.
853 // So in general, you need one less node than the number of symbols.
854 // The max number of symbols is 257: 256 byte values, plus a special "stop" code.
855 #define SQUEEZE_MAX_NODES 256
857 struct squeeze_data_item {
858 i16 dval;
861 struct squeeze_node {
862 u8 in_use;
863 struct squeeze_data_item child[2];
866 struct squeeze_ctx {
867 deark *c;
868 struct de_dfilter_in_params *dcmpri;
869 struct de_dfilter_out_params *dcmpro;
870 struct de_dfilter_results *dres;
871 const char *modname;
872 i64 nbytes_written;
873 i64 nodecount;
874 struct fmtutil_huffman_decoder *ht;
875 struct de_bitreader bitrd;
876 struct squeeze_node tmpnodes[SQUEEZE_MAX_NODES]; // Temporary use when decoding the node table
879 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
880 i64 nodenum, u64 currcode, UI currcode_nbits);
882 static void squeeze_interpret_dval(struct squeeze_ctx *sqctx,
883 i16 dval, u64 currcode, UI currcode_nbits)
885 char b2buf[72];
887 if(dval>=0) { // a pointer to a node
888 if((i64)dval < sqctx->nodecount) {
889 squeeze_interpret_node(sqctx, (i64)dval, currcode, currcode_nbits);
892 else if(dval>=(-257) && dval<=(-1)) {
893 fmtutil_huffman_valtype adj_value;
895 // -257 => 256 (stop code)
896 // -256 => 255 (byte value)
897 // -255 => 254 (byte value)
898 // ...
899 // -1 => 0 (byte value)
900 adj_value = -(((fmtutil_huffman_valtype)dval)+1);
901 if(sqctx->c->debug_level>=2) {
902 de_dbg3(sqctx->c, "code: \"%s\" = %d",
903 de_print_base2_fixed(b2buf, sizeof(b2buf), currcode, currcode_nbits),
904 (int)adj_value);
906 fmtutil_huffman_add_code(sqctx->c, sqctx->ht->bk, currcode, currcode_nbits, adj_value);
908 // TODO: Report errors?
911 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
912 i64 nodenum, u64 currcode, UI currcode_nbits)
914 // TODO: Report errors?
915 if(nodenum<0 || nodenum>=sqctx->nodecount) return;
916 if(sqctx->tmpnodes[nodenum].in_use) return; // Loops are bad
917 if(currcode_nbits>=FMTUTIL_HUFFMAN_MAX_CODE_LENGTH) return;
919 sqctx->tmpnodes[nodenum].in_use = 1;
920 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[0].dval, currcode<<1, currcode_nbits+1);
921 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[1].dval, ((currcode<<1) | 1), currcode_nbits+1);
922 sqctx->tmpnodes[nodenum].in_use = 0;
925 static int squeeze_process_nodetable(deark *c, struct squeeze_ctx *sqctx)
927 int retval = 0;
929 // It feels a little wrong to go to the trouble of decoding this node table into
930 // the form required by our Huffman library's API, when we know it's going to
931 // just convert it back into a table much like it was originally. Maybe there
932 // should be a better way to do this.
933 de_dbg3(c, "interpreted huffman codebook:");
934 de_dbg_indent(c, 1);
935 squeeze_interpret_node(sqctx, 0, 0, 0);
936 de_dbg_indent(c, -1);
938 if(c->debug_level>=4) {
939 fmtutil_huffman_dump(c, sqctx->ht);
942 retval = 1;
943 return retval;
946 static int squeeze_read_nodetable(deark *c, struct squeeze_ctx *sqctx)
948 i64 k;
949 int retval = 0;
951 if(sqctx->bitrd.curpos+2 > sqctx->bitrd.endpos) goto done;
952 sqctx->nodecount = dbuf_getu16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
953 de_dbg(c, "node count: %d", (int)sqctx->nodecount);
954 if(sqctx->nodecount > SQUEEZE_MAX_NODES) {
955 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname,
956 "Invalid node count");
957 goto done;
960 de_dbg2(c, "node table nodes at %"I64_FMT, sqctx->bitrd.curpos);
961 de_dbg_indent(c, 1);
962 for(k=0; k<sqctx->nodecount; k++) {
963 sqctx->tmpnodes[k].child[0].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
964 sqctx->tmpnodes[k].child[1].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
965 if(c->debug_level >= 2) {
966 de_dbg2(c, "nodetable[%d]: %d %d", (int)k, (int)sqctx->tmpnodes[k].child[0].dval,
967 (int)sqctx->tmpnodes[k].child[1].dval);
970 de_dbg_indent(c, -1);
971 if(sqctx->bitrd.curpos > sqctx->bitrd.endpos) goto done;
973 if(!squeeze_process_nodetable(c, sqctx)) goto done;
975 retval = 1;
976 done:
977 return retval;
980 static int squeeze_read_codes(deark *c, struct squeeze_ctx *sqctx)
982 int retval = 0;
984 de_dbg(c, "huffman-compressed data at %"I64_FMT, sqctx->bitrd.curpos);
985 sqctx->bitrd.bbll.is_lsb = 1;
986 de_bitbuf_lowlevel_empty(&sqctx->bitrd.bbll);
988 if(fmtutil_huffman_get_max_bits(sqctx->ht->bk) < 1) {
989 // Empty tree? Assume this is an empty file.
990 retval = 1;
991 goto done;
994 while(1) {
995 int ret;
996 fmtutil_huffman_valtype val = 0;
998 ret = fmtutil_huffman_read_next_value(sqctx->ht->bk, &sqctx->bitrd, &val, NULL);
999 if(!ret || val<0 || val>256) {
1000 if(sqctx->bitrd.eof_flag) {
1001 retval = 1;
1003 else {
1004 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname, "Huffman decode error");
1006 goto done;
1009 if(val>=0 && val<=255) {
1010 dbuf_writebyte(sqctx->dcmpro->f, (u8)val);
1011 sqctx->nbytes_written++;
1012 if(sqctx->dcmpro->len_known && (sqctx->nbytes_written >= sqctx->dcmpro->expected_len)) {
1013 retval = 1;
1014 goto done;
1017 else if(val==256) { // STOP code
1018 retval = 1;
1019 goto done;
1023 done:
1024 return retval;
1027 void fmtutil_huff_squeeze_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
1028 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
1029 void *codec_private_params)
1031 struct squeeze_ctx *sqctx = NULL;
1032 int ok = 0;
1034 sqctx = de_malloc(c, sizeof(struct squeeze_ctx));
1035 sqctx->c = c;
1036 sqctx->modname = "unsqueeze";
1037 sqctx->dcmpri = dcmpri;
1038 sqctx->dcmpro = dcmpro;
1039 sqctx->dres = dres;
1041 sqctx->bitrd.f = dcmpri->f;
1042 sqctx->bitrd.curpos = dcmpri->pos;
1043 sqctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
1045 sqctx->ht = fmtutil_huffman_create_decoder(c, 257, 257);
1047 if(!squeeze_read_nodetable(c, sqctx)) goto done;
1048 if(!squeeze_read_codes(c, sqctx)) goto done;
1050 dres->bytes_consumed = sqctx->bitrd.curpos - dcmpri->pos;
1051 if(dres->bytes_consumed > dcmpri->len) {
1052 dres->bytes_consumed = dcmpri->len;
1054 dres->bytes_consumed_valid = 1;
1055 ok = 1;
1057 done:
1058 if(!ok || dres->errcode) {
1059 de_dfilter_set_errorf(c, dres, sqctx->modname, "Squeeze decompression failed");
1062 if(sqctx) {
1063 fmtutil_huffman_destroy_decoder(c, sqctx->ht);
1064 de_free(c, sqctx);