Cleanup - Made the codec API more consistent
[deark.git] / src / fmtutil-cmpr.c
blob32c599496b7cbd467e9132f37a65f87de5d99711
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompression, etc.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // Returns a message that is valid until the next operation on dres.
13 const char *de_dfilter_get_errmsg(deark *c, struct de_dfilter_results *dres)
15 if(dres->errcode==0) {
16 return "No error";
18 if(dres->errmsg[0]) {
19 return dres->errmsg;
21 return "Unspecified error";
24 // Initialize or reset a dfilter results struct
25 void de_dfilter_results_clear(deark *c, struct de_dfilter_results *dres)
27 dres->errcode = 0;
28 dres->bytes_consumed_valid = 0;
29 dres->bytes_consumed = 0;
30 dres->errmsg[0] = '\0';
33 // Note: It is also okay to init these objects by zeroing out their bytes.
34 void de_dfilter_init_objects(deark *c, struct de_dfilter_in_params *dcmpri,
35 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
37 if(dcmpri)
38 de_zeromem(dcmpri, sizeof(struct de_dfilter_in_params));
39 if(dcmpro)
40 de_zeromem(dcmpro, sizeof(struct de_dfilter_out_params));
41 if(dres)
42 de_dfilter_results_clear(c, dres);
45 void de_dfilter_set_errorf(deark *c, struct de_dfilter_results *dres, const char *modname,
46 const char *fmt, ...)
48 va_list ap;
50 if(dres->errcode != 0) return; // Only record the first error
51 dres->errcode = 1;
53 va_start(ap, fmt);
54 if(modname) {
55 char tmpbuf[80];
57 de_vsnprintf(tmpbuf, sizeof(tmpbuf), fmt, ap);
58 de_snprintf(dres->errmsg, sizeof(dres->errmsg), "[%s] %s", modname, tmpbuf);
60 else {
61 de_vsnprintf(dres->errmsg, sizeof(dres->errmsg), fmt, ap);
63 va_end(ap);
66 void de_dfilter_set_generic_error(deark *c, struct de_dfilter_results *dres, const char *modname)
68 if(dres->errcode != 0) return;
69 de_dfilter_set_errorf(c, dres, modname, "Unspecified error");
72 // This is a decompression API that uses a "push" input model. The client
73 // sends data to the codec as the data becomes available.
74 // (The client must still be able to consume any amount of output data
75 // immediately.)
76 // This model makes it easier to chain multiple codecs together, and to handle
77 // input data that is not contiguous.
78 // TODO: There's no reason this couldn't be extended to work with "type1" codecs.
80 struct de_dfilter_ctx *de_dfilter_create(deark *c,
81 dfilter_codec_type codec_init_fn, void *codec_private_params,
82 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
84 struct de_dfilter_ctx *dfctx = NULL;
86 dfctx = de_malloc(c, sizeof(struct de_dfilter_ctx));
87 dfctx->c = c;
88 dfctx->dres = dres;
89 dfctx->dcmpro = dcmpro;
91 if(codec_init_fn) {
92 codec_init_fn(dfctx, codec_private_params);
94 // TODO: How should we handle failure to initialize a codec?
96 return dfctx;
99 void de_dfilter_addbuf(struct de_dfilter_ctx *dfctx,
100 const u8 *buf, i64 buf_len)
102 if(dfctx->finished_flag) return;
104 if(dfctx->codec_addbuf_fn && (buf_len>0)) {
105 dfctx->codec_addbuf_fn(dfctx, buf, buf_len);
107 if(dfctx->dres->errcode) {
108 dfctx->finished_flag = 1;
113 // Commands: (Commands are not supported by all codecs)
114 // DE_DFILTER_COMMAND_SOFTRESET
115 // Reset the decompressor state. Exact function depends on the codec.
117 // DE_DFILTER_COMMAND_REINITIALIZE
118 // Reinitialize a codec, so you don't have to destroy and recreate it in
119 // in order to use it again. Typically used after _finish().
120 // Before using this command, it is okay to change the internal parameters of
121 // the dcmpro and dres given to de_dfilter_create(). You should call
122 // de_dfilter_results_clear or the equivalent if you have already handled
123 // previous errors.
124 void de_dfilter_command(struct de_dfilter_ctx *dfctx, int cmd, UI flags)
126 // Non-codec-specific things:
128 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
129 dfctx->finished_flag = 0;
130 dfctx->dres->bytes_consumed_valid = 0;
133 // Codec-specific things:
135 if(dfctx->codec_command_fn) {
136 dfctx->codec_command_fn(dfctx, cmd, flags);
140 // Call this to inform the codec that there are no more compressed bytes.
141 // The codec's 'finish' function should flush any pending output,
142 // and update the decompression results in dfctx->dres.
143 // Some codecs can still be used after this, provided you then call
144 // de_dfilter_command(...,DE_DFILTER_COMMAND_REINITIALIZE).
145 void de_dfilter_finish(struct de_dfilter_ctx *dfctx)
147 if(dfctx->codec_finish_fn) {
148 dfctx->codec_finish_fn(dfctx);
152 void de_dfilter_destroy(struct de_dfilter_ctx *dfctx)
154 deark *c;
156 if(!dfctx) return;
157 c = dfctx->c;
158 if(dfctx->codec_destroy_fn) {
159 dfctx->codec_destroy_fn(dfctx);
162 de_free(c, dfctx);
165 static int my_dfilter_addslice_buffered_read_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
166 i64 buf_len)
168 struct de_dfilter_ctx *dfctx = (struct de_dfilter_ctx *)brctx->userdata;
170 de_dfilter_addbuf(dfctx, buf, buf_len);
171 if(dfctx->finished_flag) return 0;
172 return 1;
175 void de_dfilter_addslice(struct de_dfilter_ctx *dfctx,
176 dbuf *inf, i64 pos, i64 len)
178 if(dfctx->finished_flag) return;
179 dbuf_buffered_read(inf, pos, len,
180 my_dfilter_addslice_buffered_read_cbfn, (void*)dfctx);
183 // Use a "pushable" codec in a non-pushable way.
184 void de_dfilter_decompress_oneshot(deark *c,
185 dfilter_codec_type codec_init_fn, void *codec_private_params,
186 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
187 struct de_dfilter_results *dres)
189 struct de_dfilter_ctx *dfctx = NULL;
191 dfctx = de_dfilter_create(c, codec_init_fn, codec_private_params,
192 dcmpro, dres);
193 dfctx->input_file_offset = dcmpri->pos;
194 de_dfilter_addslice(dfctx, dcmpri->f, dcmpri->pos, dcmpri->len);
195 de_dfilter_finish(dfctx);
196 de_dfilter_destroy(dfctx);
199 // Trivial "decompression" of uncompressed data.
200 void fmtutil_decompress_uncompressed(deark *c, struct de_dfilter_in_params *dcmpri,
201 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, UI flags)
203 i64 len;
204 i64 nbytes_avail;
206 nbytes_avail = de_min_int(dcmpri->len, dcmpri->f->len - dcmpri->pos);
208 if(dcmpro->len_known) {
209 len = dcmpro->expected_len;
211 else {
212 len = dcmpri->len;
215 if(len>nbytes_avail) len = nbytes_avail;
216 if(len<0) len = 0;
218 dbuf_copy(dcmpri->f, dcmpri->pos, len, dcmpro->f);
219 dres->bytes_consumed = len;
220 dres->bytes_consumed_valid = 1;
223 enum packbits_state_enum {
224 PACKBITS_STATE_NEUTRAL = 0,
225 PACKBITS_STATE_COPYING_LITERAL,
226 PACKBITS_STATE_READING_UNIT_TO_REPEAT
229 struct packbitsctx {
230 size_t nbytes_per_unit;
231 size_t nbytes_in_unitbuf;
232 u8 unitbuf[2];
233 i64 total_nbytes_processed;
234 i64 nbytes_written;
235 enum packbits_state_enum state;
236 i64 nliteral_bytes_remaining;
237 i64 repeat_count;
240 static void my_packbits_codec_addbuf(struct de_dfilter_ctx *dfctx,
241 const u8 *buf, i64 buf_len)
243 int i;
244 u8 b;
245 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
247 if(!rctx) return;
249 for(i=0; i<buf_len; i++) {
250 if(dfctx->dcmpro->len_known &&
251 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
253 dfctx->finished_flag = 1;
254 break;
257 b = buf[i];
258 rctx->total_nbytes_processed++;
260 switch(rctx->state) {
261 case PACKBITS_STATE_NEUTRAL: // this is a code byte
262 if(b>128) { // A compressed run
263 rctx->repeat_count = 257 - (i64)b;
264 rctx->state = PACKBITS_STATE_READING_UNIT_TO_REPEAT;
266 else if(b<128) { // An uncompressed run
267 rctx->nliteral_bytes_remaining = (1 + (i64)b) * (i64)rctx->nbytes_per_unit;
268 rctx->state = PACKBITS_STATE_COPYING_LITERAL;
270 // Else b==128. No-op.
271 // TODO: Some (but not most) ILBM specs say that code 128 is used to
272 // mark the end of compressed data, so maybe there should be options to
273 // tell us what to do when code 128 is encountered.
274 break;
275 case PACKBITS_STATE_COPYING_LITERAL: // This byte is uncompressed
276 dbuf_writebyte(dfctx->dcmpro->f, b);
277 rctx->nbytes_written++;
278 rctx->nliteral_bytes_remaining--;
279 if(rctx->nliteral_bytes_remaining<=0) {
280 rctx->state = PACKBITS_STATE_NEUTRAL;
282 break;
283 case PACKBITS_STATE_READING_UNIT_TO_REPEAT:
284 if(rctx->nbytes_per_unit==1) { // Optimization for standard PackBits
285 dbuf_write_run(dfctx->dcmpro->f, b, rctx->repeat_count);
286 rctx->nbytes_written += rctx->repeat_count;
287 rctx->state = PACKBITS_STATE_NEUTRAL;
289 else {
290 rctx->unitbuf[rctx->nbytes_in_unitbuf++] = b;
291 if(rctx->nbytes_in_unitbuf >= rctx->nbytes_per_unit) {
292 i64 k;
294 for(k=0; k<rctx->repeat_count; k++) {
295 dbuf_write(dfctx->dcmpro->f, rctx->unitbuf, (i64)rctx->nbytes_per_unit);
297 rctx->nbytes_in_unitbuf = 0;
298 rctx->nbytes_written += rctx->repeat_count * (i64)rctx->nbytes_per_unit;
299 rctx->state = PACKBITS_STATE_NEUTRAL;
302 break;
307 static void my_packbits_codec_command(struct de_dfilter_ctx *dfctx, int cmd, UI flags)
309 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
311 if(cmd==DE_DFILTER_COMMAND_SOFTRESET || cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
312 // "soft reset" - reset the low-level compression state, but don't update
313 // dres, or the total-bytes counters, etc.
314 rctx->state = PACKBITS_STATE_NEUTRAL;
315 rctx->nbytes_in_unitbuf = 0;
316 rctx->nliteral_bytes_remaining = 0;
317 rctx->repeat_count = 0;
319 if(cmd==DE_DFILTER_COMMAND_REINITIALIZE) {
320 rctx->total_nbytes_processed = 0;
321 rctx->nbytes_written = 0;
325 static void my_packbits_codec_finish(struct de_dfilter_ctx *dfctx)
327 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
329 if(!rctx) return;
330 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
331 dfctx->dres->bytes_consumed_valid = 1;
334 static void my_packbits_codec_destroy(struct de_dfilter_ctx *dfctx)
336 struct packbitsctx *rctx = (struct packbitsctx*)dfctx->codec_private;
338 if(rctx) {
339 de_free(dfctx->c, rctx);
341 dfctx->codec_private = NULL;
344 // codec_private_params: de_packbits_params, or NULL for default params.
345 void dfilter_packbits_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
347 struct packbitsctx *rctx = NULL;
348 struct de_packbits_params *pbparams = (struct de_packbits_params*)codec_private_params;
350 rctx = de_malloc(dfctx->c, sizeof(struct packbitsctx));
351 rctx->nbytes_per_unit = 1;
352 if(pbparams && pbparams->is_packbits16) {
353 rctx->nbytes_per_unit = 2;
355 dfctx->codec_private = (void*)rctx;
356 dfctx->codec_addbuf_fn = my_packbits_codec_addbuf;
357 dfctx->codec_finish_fn = my_packbits_codec_finish;
358 dfctx->codec_command_fn = my_packbits_codec_command;
359 dfctx->codec_destroy_fn = my_packbits_codec_destroy;
362 void fmtutil_decompress_packbits_ex(deark *c, struct de_dfilter_in_params *dcmpri,
363 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
364 struct de_packbits_params *pbparams)
366 de_dfilter_decompress_oneshot(c, dfilter_packbits_codec, (void*)pbparams,
367 dcmpri, dcmpro, dres);
370 // Returns 0 on failure (currently impossible).
371 int fmtutil_decompress_packbits(dbuf *f, i64 pos1, i64 len,
372 dbuf *unc_pixels, i64 *cmpr_bytes_consumed)
374 struct de_dfilter_results dres;
375 struct de_dfilter_in_params dcmpri;
376 struct de_dfilter_out_params dcmpro;
378 if(cmpr_bytes_consumed) *cmpr_bytes_consumed = 0;
379 de_dfilter_init_objects(f->c, &dcmpri, &dcmpro, &dres);
381 dcmpri.f = f;
382 dcmpri.pos = pos1;
383 dcmpri.len = len;
384 dcmpro.f = unc_pixels;
385 if(unc_pixels->has_len_limit) {
386 dcmpro.len_known = 1;
387 dcmpro.expected_len = unc_pixels->len_limit - unc_pixels->len;
390 de_dfilter_decompress_oneshot(f->c, dfilter_packbits_codec, NULL,
391 &dcmpri, &dcmpro, &dres);
393 if(cmpr_bytes_consumed && dres.bytes_consumed_valid) {
394 *cmpr_bytes_consumed = dres.bytes_consumed;
396 if(dres.errcode != 0) return 0;
397 return 1;
400 void fmtutil_decompress_rle90_ex(deark *c, struct de_dfilter_in_params *dcmpri,
401 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
402 unsigned int flags)
404 de_dfilter_decompress_oneshot(c, dfilter_rle90_codec, NULL,
405 dcmpri, dcmpro, dres);
408 struct rle90ctx {
409 i64 total_nbytes_processed;
410 i64 nbytes_written;
411 u8 last_output_byte;
412 int countcode_pending;
415 static void my_rle90_codec_addbuf(struct de_dfilter_ctx *dfctx,
416 const u8 *buf, i64 buf_len)
418 int i;
419 u8 b;
420 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
422 if(!rctx) return;
424 for(i=0; i<buf_len; i++) {
425 if(dfctx->dcmpro->len_known &&
426 (rctx->nbytes_written >= dfctx->dcmpro->expected_len))
428 dfctx->finished_flag = 1;
429 break;
432 b = buf[i];
433 rctx->total_nbytes_processed++;
435 if(rctx->countcode_pending && b==0) {
436 // Not RLE, just an escaped 0x90 byte.
437 dbuf_writebyte(dfctx->dcmpro->f, 0x90);
438 rctx->nbytes_written++;
439 rctx->last_output_byte = 0x90;
440 rctx->countcode_pending = 0;
442 else if(rctx->countcode_pending) {
443 i64 count;
445 // RLE. We already emitted one byte (because the byte to repeat
446 // comes before the repeat count), so write countcode-1 bytes.
447 count = (i64)(b-1);
448 if(dfctx->dcmpro->len_known &&
449 (rctx->nbytes_written+count > dfctx->dcmpro->expected_len))
451 count = dfctx->dcmpro->expected_len - rctx->nbytes_written;
453 dbuf_write_run(dfctx->dcmpro->f, rctx->last_output_byte, count);
454 rctx->nbytes_written += count;
456 rctx->countcode_pending = 0;
458 else if(b==0x90) {
459 rctx->countcode_pending = 1;
461 else {
462 dbuf_writebyte(dfctx->dcmpro->f, b);
463 rctx->nbytes_written++;
464 rctx->last_output_byte = b;
469 static void my_rle90_codec_finish(struct de_dfilter_ctx *dfctx)
471 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
473 if(!rctx) return;
474 dfctx->dres->bytes_consumed = rctx->total_nbytes_processed;
475 dfctx->dres->bytes_consumed_valid = 1;
478 static void my_rle90_codec_destroy(struct de_dfilter_ctx *dfctx)
480 struct rle90ctx *rctx = (struct rle90ctx*)dfctx->codec_private;
482 if(rctx) {
483 de_free(dfctx->c, rctx);
485 dfctx->codec_private = NULL;
488 // RLE algorithm occasionally called "RLE90". Variants of this are used by
489 // BinHex, ARC, StuffIt, and others.
490 // codec_private_params: Unused, must be NULL.
491 void dfilter_rle90_codec(struct de_dfilter_ctx *dfctx, void *codec_private_params)
493 struct rle90ctx *rctx = NULL;
495 rctx = de_malloc(dfctx->c, sizeof(struct rle90ctx));
496 dfctx->codec_private = (void*)rctx;
497 dfctx->codec_addbuf_fn = my_rle90_codec_addbuf;
498 dfctx->codec_finish_fn = my_rle90_codec_finish;
499 dfctx->codec_destroy_fn = my_rle90_codec_destroy;
502 struct szdd_ctx {
503 i64 nbytes_written;
504 int stop_flag;
505 struct de_dfilter_out_params *dcmpro;
506 struct de_lz77buffer *ringbuf;
509 static void szdd_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
511 struct szdd_ctx *sctx = (struct szdd_ctx*)rb->userdata;
513 if(sctx->stop_flag) return;
514 if(sctx->dcmpro->len_known) {
515 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
516 sctx->stop_flag = 1;
517 return;
521 dbuf_writebyte(sctx->dcmpro->f, n);
522 sctx->nbytes_written++;
525 static void szdd_init_window_default(struct de_lz77buffer *ringbuf)
527 de_lz77buffer_clear(ringbuf, 0x20);
528 ringbuf->curpos = 4096 - 16;
531 static void szdd_init_window_lz5(struct de_lz77buffer *ringbuf)
533 size_t wpos;
534 int i;
536 de_zeromem(ringbuf->buf, 4096);
537 wpos = 13;
538 for(i=1; i<256; i++) {
539 de_memset(&ringbuf->buf[wpos], i, 13);
540 wpos += 13;
542 for(i=0; i<256; i++) {
543 ringbuf->buf[wpos++] = i;
545 for(i=255; i>=0; i--) {
546 ringbuf->buf[wpos++] = i;
548 wpos += 128;
549 de_memset(&ringbuf->buf[wpos], 0x20, 110);
550 wpos += 110;
551 ringbuf->curpos = (UI)wpos;
554 // Partially based on the libmspack's format documentation at
555 // <https://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html>
556 // flags:
557 // 0x1: LArc lz5 mode
558 void fmtutil_decompress_szdd(deark *c, struct de_dfilter_in_params *dcmpri,
559 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, unsigned int flags)
561 i64 pos = dcmpri->pos;
562 i64 endpos = dcmpri->pos + dcmpri->len;
563 struct szdd_ctx *sctx = NULL;
565 sctx = de_malloc(c, sizeof(struct szdd_ctx));
566 sctx->dcmpro = dcmpro;
567 sctx->ringbuf = de_lz77buffer_create(c, 4096);
568 sctx->ringbuf->writebyte_cb = szdd_lz77buf_writebytecb;
569 sctx->ringbuf->userdata = (void*)sctx;
571 if(flags & 0x1) {
572 szdd_init_window_lz5(sctx->ringbuf);
574 else {
575 szdd_init_window_default(sctx->ringbuf);
578 while(1) {
579 UI control;
580 UI cbit;
582 if(pos+1 > endpos) goto unc_done; // Out of input data
583 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
585 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
586 if(control & cbit) { // literal
587 u8 b;
589 if(pos+1 > endpos) goto unc_done;
590 b = dbuf_getbyte(dcmpri->f, pos++);
591 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
592 if(sctx->stop_flag) goto unc_done;
594 else { // match
595 UI x0, x1;
596 UI matchpos;
597 UI matchlen;
599 if(pos+2 > endpos) goto unc_done;
600 x0 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
601 x1 = (UI)dbuf_getbyte_p(dcmpri->f, &pos);
602 matchpos = ((x1 & 0xf0) << 4) | x0;
603 matchlen = (x1 & 0x0f) + 3;
604 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
605 if(sctx->stop_flag) goto unc_done;
610 unc_done:
611 dres->bytes_consumed_valid = 1;
612 dres->bytes_consumed = pos - dcmpri->pos;
613 if(sctx) {
614 de_lz77buffer_destroy(c, sctx->ringbuf);
615 de_free(c, sctx);
619 //======================= hlp_lz77 =======================
621 struct hlplz77ctx {
622 i64 nbytes_written;
623 int stop_flag;
624 struct de_dfilter_out_params *dcmpro;
625 struct de_lz77buffer *ringbuf;
628 static void hlplz77_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
630 struct hlplz77ctx *sctx = (struct hlplz77ctx*)rb->userdata;
632 if(sctx->stop_flag) return;
633 if(sctx->dcmpro->len_known) {
634 if(sctx->nbytes_written >= sctx->dcmpro->expected_len) {
635 sctx->stop_flag = 1;
636 return;
640 dbuf_writebyte(sctx->dcmpro->f, n);
641 sctx->nbytes_written++;
644 // This is very similar to the mscompress SZDD algorithm, but
645 // gratuitously different.
646 void fmtutil_hlp_lz77_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
647 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
648 void *codec_private_params)
650 i64 pos = dcmpri->pos;
651 i64 endpos = dcmpri->pos + dcmpri->len;
652 struct hlplz77ctx *sctx = NULL;
654 sctx = de_malloc(c, sizeof(struct hlplz77ctx));
655 sctx->dcmpro = dcmpro;
656 sctx->ringbuf = de_lz77buffer_create(c, 4096);
657 sctx->ringbuf->writebyte_cb = hlplz77_lz77buf_writebytecb;
658 sctx->ringbuf->userdata = (void*)sctx;
659 de_lz77buffer_clear(sctx->ringbuf, 0x20);
661 while(1) {
662 UI control;
663 UI cbit;
665 if(pos+1 > endpos) goto unc_done; // Out of input data
666 control = (UI)dbuf_getbyte(dcmpri->f, pos++);
668 for(cbit=0x01; cbit<=0x80; cbit<<=1) {
669 if((control & cbit)==0) { // literal
670 u8 b;
672 if(pos+1 > endpos) goto unc_done;
673 b = dbuf_getbyte(dcmpri->f, pos++);
674 de_lz77buffer_add_literal_byte(sctx->ringbuf, b);
675 if(sctx->stop_flag) goto unc_done;
677 else { // match
678 UI x;
679 UI matchpos;
680 UI matchlen;
682 if(pos+2 > endpos) goto unc_done;
683 x = (UI)dbuf_getu16le_p(dcmpri->f, &pos);
684 matchlen = (x>>12) + 3;
685 matchpos = sctx->ringbuf->curpos - ((x & 0x0fff)+1);
686 de_lz77buffer_copy_from_hist(sctx->ringbuf, matchpos, matchlen);
687 if(sctx->stop_flag) goto unc_done;
692 unc_done:
693 dres->bytes_consumed_valid = 1;
694 dres->bytes_consumed = pos - dcmpri->pos;
695 if(sctx) {
696 de_lz77buffer_destroy(c, sctx->ringbuf);
697 de_free(c, sctx);
701 //========================================================
703 struct my_2layer_userdata {
704 struct de_dfilter_ctx *dfctx_codec2;
705 i64 intermediate_nbytes;
708 static void my_2layer_write_cb(dbuf *f, void *userdata,
709 const u8 *buf, i64 size)
711 struct my_2layer_userdata *u = (struct my_2layer_userdata*)userdata;
713 de_dfilter_addbuf(u->dfctx_codec2, buf, size);
714 u->intermediate_nbytes += size;
717 // If src indicates error and dst does not, copy the error from src to dst.
718 void de_dfilter_transfer_error(deark *c, struct de_dfilter_results *src,
719 struct de_dfilter_results *dst)
721 if(src->errcode && !dst->errcode) {
722 dst->errcode = src->errcode;
723 de_strlcpy(dst->errmsg, src->errmsg, sizeof(dst->errmsg));
727 // Decompress an arbitrary two-layer compressed format.
728 // tlp->codec1* is the first one that will be used during decompression (i.e. the second
729 // method used when during *compression*).
730 void de_dfilter_decompress_two_layer(deark *c, struct de_dcmpr_two_layer_params *tlp)
732 dbuf *outf_codec1 = NULL;
733 struct de_dfilter_out_params dcmpro_codec1;
734 struct de_dfilter_results dres_codec2;
735 struct my_2layer_userdata u;
736 struct de_dfilter_ctx *dfctx_codec2 = NULL;
738 de_dfilter_init_objects(c, NULL, &dcmpro_codec1, NULL);
739 de_dfilter_init_objects(c, NULL, NULL, &dres_codec2);
740 de_zeromem(&u, sizeof(struct my_2layer_userdata));
742 // Make a custom dbuf. The output from the first decompressor will be written
743 // to it, and it will relay that output to the second decompressor.
744 outf_codec1 = dbuf_create_custom_dbuf(c, 0, 0);
745 outf_codec1->userdata_for_customwrite = (void*)&u;
746 outf_codec1->customwrite_fn = my_2layer_write_cb;
748 dcmpro_codec1.f = outf_codec1;
749 if(tlp->intermed_len_known) {
750 dcmpro_codec1.len_known = 1;
751 dcmpro_codec1.expected_len = tlp->intermed_expected_len;
753 else {
754 dcmpro_codec1.len_known = 0;
755 dcmpro_codec1.expected_len = 0;
758 dfctx_codec2 = de_dfilter_create(c, tlp->codec2, tlp->codec2_private_params, tlp->dcmpro, &dres_codec2);
759 u.dfctx_codec2 = dfctx_codec2;
761 // The first codec in the chain does not need the advanced (de_dfilter_create) API.
762 if(tlp->codec1_type1) {
763 tlp->codec1_type1(c, tlp->dcmpri, &dcmpro_codec1, tlp->dres, tlp->codec1_private_params);
765 else {
766 de_dfilter_decompress_oneshot(c, tlp->codec1_pushable, tlp->codec1_private_params,
767 tlp->dcmpri, &dcmpro_codec1, tlp->dres);
769 de_dfilter_finish(dfctx_codec2);
771 if(tlp->dres->errcode) goto done;
772 de_dbg2(c, "size after intermediate decompression: %"I64_FMT, u.intermediate_nbytes);
774 if(dres_codec2.errcode) {
775 // An error occurred in codec2, and not in codec1.
776 // Copy the error info to the dres that will be returned to the caller.
777 de_dfilter_transfer_error(c, &dres_codec2, tlp->dres);
778 goto done;
781 done:
782 de_dfilter_destroy(dfctx_codec2);
783 dbuf_close(outf_codec1);
786 struct de_lz77buffer *de_lz77buffer_create(deark *c, UI bufsize)
788 struct de_lz77buffer *rb;
790 rb = de_malloc(c, sizeof(struct de_lz77buffer));
791 rb->buf = de_malloc(c, (i64)bufsize);
792 rb->bufsize = bufsize;
793 rb->mask = bufsize - 1;
794 return rb;
797 void de_lz77buffer_destroy(deark *c, struct de_lz77buffer *rb)
799 if(!rb) return;
800 de_free(c, rb->buf);
801 de_free(c, rb);
804 // Set all bytes to the same value, and reset the current position to 0.
805 void de_lz77buffer_clear(struct de_lz77buffer *rb, UI val)
807 de_memset(rb->buf, val, rb->bufsize);
808 rb->curpos = 0;
811 void de_lz77buffer_set_curpos(struct de_lz77buffer *rb, UI newpos)
813 rb->curpos = newpos & rb->mask;
816 void de_lz77buffer_add_literal_byte(struct de_lz77buffer *rb, u8 b)
818 rb->writebyte_cb(rb, b);
819 rb->buf[rb->curpos] = b;
820 rb->curpos = (rb->curpos+1) & rb->mask;
823 void de_lz77buffer_copy_from_hist(struct de_lz77buffer *rb,
824 UI startpos, UI count)
826 UI frompos;
827 UI i;
829 frompos = startpos & rb->mask;
830 for(i=0; i<count; i++) {
831 de_lz77buffer_add_literal_byte(rb, rb->buf[frompos]);
832 frompos = (frompos+1) & rb->mask;
836 ///////////////////////////////////
837 // "Squeeze"-style Huffman decoder
839 // The first node you add allows for 2 symbols, and each additional node adds 1.
840 // So in general, you need one less node than the number of symbols.
841 // The max number of symbols is 257: 256 byte values, plus a special "stop" code.
842 #define SQUEEZE_MAX_NODES 256
844 struct squeeze_data_item {
845 i16 dval;
848 struct squeeze_node {
849 u8 in_use;
850 struct squeeze_data_item child[2];
853 struct squeeze_ctx {
854 deark *c;
855 struct de_dfilter_in_params *dcmpri;
856 struct de_dfilter_out_params *dcmpro;
857 struct de_dfilter_results *dres;
858 const char *modname;
859 i64 nbytes_written;
860 i64 nodecount;
861 struct fmtutil_huffman_decoder *ht;
862 struct de_bitreader bitrd;
863 struct squeeze_node tmpnodes[SQUEEZE_MAX_NODES]; // Temporary use when decoding the node table
866 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
867 i64 nodenum, u64 currcode, UI currcode_nbits);
869 static void squeeze_interpret_dval(struct squeeze_ctx *sqctx,
870 i16 dval, u64 currcode, UI currcode_nbits)
872 char b2buf[72];
874 if(dval>=0) { // a pointer to a node
875 if((i64)dval < sqctx->nodecount) {
876 squeeze_interpret_node(sqctx, (i64)dval, currcode, currcode_nbits);
879 else if(dval>=(-257) && dval<=(-1)) {
880 fmtutil_huffman_valtype adj_value;
882 // -257 => 256 (stop code)
883 // -256 => 255 (byte value)
884 // -255 => 254 (byte value)
885 // ...
886 // -1 => 0 (byte value)
887 adj_value = -(((fmtutil_huffman_valtype)dval)+1);
888 if(sqctx->c->debug_level>=2) {
889 de_dbg3(sqctx->c, "code: \"%s\" = %d",
890 de_print_base2_fixed(b2buf, sizeof(b2buf), currcode, currcode_nbits),
891 (int)adj_value);
893 fmtutil_huffman_add_code(sqctx->c, sqctx->ht->bk, currcode, currcode_nbits, adj_value);
895 // TODO: Report errors?
898 static void squeeze_interpret_node(struct squeeze_ctx *sqctx,
899 i64 nodenum, u64 currcode, UI currcode_nbits)
901 // TODO: Report errors?
902 if(nodenum<0 || nodenum>=sqctx->nodecount) return;
903 if(sqctx->tmpnodes[nodenum].in_use) return; // Loops are bad
904 if(currcode_nbits>=FMTUTIL_HUFFMAN_MAX_CODE_LENGTH) return;
906 sqctx->tmpnodes[nodenum].in_use = 1;
907 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[0].dval, currcode<<1, currcode_nbits+1);
908 squeeze_interpret_dval(sqctx, sqctx->tmpnodes[nodenum].child[1].dval, ((currcode<<1) | 1), currcode_nbits+1);
909 sqctx->tmpnodes[nodenum].in_use = 0;
912 static int squeeze_process_nodetable(deark *c, struct squeeze_ctx *sqctx)
914 int retval = 0;
916 // It feels a little wrong to go to the trouble of decoding this node table into
917 // the form required by our Huffman library's API, when we know it's going to
918 // just convert it back into a table much like it was originally. Maybe there
919 // should be a better way to do this.
920 de_dbg3(c, "interpreted huffman codebook:");
921 de_dbg_indent(c, 1);
922 squeeze_interpret_node(sqctx, 0, 0, 0);
923 de_dbg_indent(c, -1);
925 if(c->debug_level>=4) {
926 fmtutil_huffman_dump(c, sqctx->ht);
929 retval = 1;
930 return retval;
933 static int squeeze_read_nodetable(deark *c, struct squeeze_ctx *sqctx)
935 i64 k;
936 int retval = 0;
938 if(sqctx->bitrd.curpos+2 > sqctx->bitrd.endpos) goto done;
939 sqctx->nodecount = dbuf_getu16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
940 de_dbg(c, "node count: %d", (int)sqctx->nodecount);
941 if(sqctx->nodecount > SQUEEZE_MAX_NODES) {
942 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname,
943 "Invalid node count");
944 goto done;
947 de_dbg2(c, "node table nodes at %"I64_FMT, sqctx->bitrd.curpos);
948 de_dbg_indent(c, 1);
949 for(k=0; k<sqctx->nodecount; k++) {
950 sqctx->tmpnodes[k].child[0].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
951 sqctx->tmpnodes[k].child[1].dval = (i16)dbuf_geti16le_p(sqctx->dcmpri->f, &sqctx->bitrd.curpos);
952 if(c->debug_level >= 2) {
953 de_dbg2(c, "nodetable[%d]: %d %d", (int)k, (int)sqctx->tmpnodes[k].child[0].dval,
954 (int)sqctx->tmpnodes[k].child[1].dval);
957 de_dbg_indent(c, -1);
958 if(sqctx->bitrd.curpos > sqctx->bitrd.endpos) goto done;
960 if(!squeeze_process_nodetable(c, sqctx)) goto done;
962 retval = 1;
963 done:
964 return retval;
967 static int squeeze_read_codes(deark *c, struct squeeze_ctx *sqctx)
969 int retval = 0;
971 de_dbg(c, "huffman-compressed data at %"I64_FMT, sqctx->bitrd.curpos);
972 sqctx->bitrd.bbll.is_lsb = 1;
973 de_bitbuf_lowlevel_empty(&sqctx->bitrd.bbll);
975 if(fmtutil_huffman_get_max_bits(sqctx->ht->bk) < 1) {
976 // Empty tree? Assume this is an empty file.
977 retval = 1;
978 goto done;
981 while(1) {
982 int ret;
983 fmtutil_huffman_valtype val = 0;
985 ret = fmtutil_huffman_read_next_value(sqctx->ht->bk, &sqctx->bitrd, &val, NULL);
986 if(!ret || val<0 || val>256) {
987 if(sqctx->bitrd.eof_flag) {
988 retval = 1;
990 else {
991 de_dfilter_set_errorf(c, sqctx->dres, sqctx->modname, "Huffman decode error");
993 goto done;
996 if(val>=0 && val<=255) {
997 dbuf_writebyte(sqctx->dcmpro->f, (u8)val);
998 sqctx->nbytes_written++;
999 if(sqctx->dcmpro->len_known && (sqctx->nbytes_written >= sqctx->dcmpro->expected_len)) {
1000 retval = 1;
1001 goto done;
1004 else if(val==256) { // STOP code
1005 retval = 1;
1006 goto done;
1010 done:
1011 return retval;
1014 void fmtutil_huff_squeeze_codectype1(deark *c, struct de_dfilter_in_params *dcmpri,
1015 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
1016 void *codec_private_params)
1018 struct squeeze_ctx *sqctx = NULL;
1019 int ok = 0;
1021 sqctx = de_malloc(c, sizeof(struct squeeze_ctx));
1022 sqctx->c = c;
1023 sqctx->modname = "unsqueeze";
1024 sqctx->dcmpri = dcmpri;
1025 sqctx->dcmpro = dcmpro;
1026 sqctx->dres = dres;
1028 sqctx->bitrd.f = dcmpri->f;
1029 sqctx->bitrd.curpos = dcmpri->pos;
1030 sqctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
1032 sqctx->ht = fmtutil_huffman_create_decoder(c, 257, 257);
1034 if(!squeeze_read_nodetable(c, sqctx)) goto done;
1035 if(!squeeze_read_codes(c, sqctx)) goto done;
1037 dres->bytes_consumed = sqctx->bitrd.curpos - dcmpri->pos;
1038 if(dres->bytes_consumed > dcmpri->len) {
1039 dres->bytes_consumed = dcmpri->len;
1041 dres->bytes_consumed_valid = 1;
1042 ok = 1;
1044 done:
1045 if(!ok || dres->errcode) {
1046 de_dfilter_set_errorf(c, dres, sqctx->modname, "Squeeze decompression failed");
1049 if(sqctx) {
1050 fmtutil_huffman_destroy_decoder(c, sqctx->ht);
1051 de_free(c, sqctx);