iccprofile: Decode 'dict' data type
[deark.git] / modules / mscompress.c
blobd43263024c742feaa0ec7d0ba32be3006176f77f
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // MS-DOS installation compression (compress.exe, expand.exe, MSLZ, etc.)
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_mscompress);
11 #define FMT_SZDD 1
12 #define FMT_KWAJ 2
14 #define CMPR_NONE 0
15 #define CMPR_XOR 1
16 #define CMPR_SZDD 2
17 #define CMPR_LZHUFF 3
18 #define CMPR_MSZIP 4
20 typedef struct localctx_struct {
21 int fmt;
22 int input_encoding;
23 UI cmpr_meth;
24 i64 cmpr_data_pos;
25 i64 cmpr_data_len;
26 u8 uncmpr_len_known;
27 i64 uncmpr_len;
28 de_ucstring *filename;
29 } lctx;
31 static int cmpr_meth_is_supported(lctx *d, UI n)
33 switch(n) {
34 case CMPR_NONE:
35 case CMPR_XOR:
36 case CMPR_SZDD:
37 case CMPR_LZHUFF:
38 case CMPR_MSZIP:
39 return 1;
41 return 0;
44 static const char *get_cmpr_meth_name(UI n)
46 char *name = NULL;
48 switch(n) {
49 case CMPR_NONE: name="uncompressed"; break;
50 case CMPR_XOR: name="XOR"; break;
51 case CMPR_SZDD: name="SZDD"; break;
52 case CMPR_LZHUFF: name="LZ+Huffman"; break;
53 case CMPR_MSZIP: name="MSZIP"; break;
55 return name?name:"?";
58 static int do_header_SZDD(deark *c, lctx *d, i64 pos1)
60 u8 cmpr_mode;
61 u8 fnchar;
62 i64 pos = pos1;
63 char tmps[80];
64 int retval = 0;
66 de_dbg(c, "header at %d", (int)pos);
67 de_dbg_indent(c, 1);
69 d->cmpr_data_pos = 14;
70 d->cmpr_data_len = c->infile->len - d->cmpr_data_pos;
72 pos += 8; // signature
74 cmpr_mode = de_getbyte(pos++);
75 de_dbg(c, "compression mode: 0x%02x ('%c')", (unsigned int)cmpr_mode,
76 de_byte_to_printable_char(cmpr_mode));
77 if(cmpr_mode != 0x41) {
78 de_err(c, "Unsupported compression mode");
79 goto done;
81 d->cmpr_meth = CMPR_SZDD;
83 fnchar = de_getbyte(pos++);
84 if(fnchar>=32 && fnchar<=126) {
85 de_snprintf(tmps, sizeof(tmps), " ('%c')", fnchar);
87 else if(fnchar==0) {
88 de_snprintf(tmps, sizeof(tmps), " (unknown)");
90 else {
91 de_strlcpy(tmps, "", sizeof(tmps));
93 de_dbg(c, "missing filename char: 0x%02x%s", (unsigned int)fnchar, tmps);
95 d->uncmpr_len = de_getu32le(pos);
96 d->uncmpr_len_known = 1;
97 de_dbg(c, "uncompressed len: %"I64_FMT"", d->uncmpr_len);
98 //pos += 4;
100 retval = 1;
101 done:
102 de_dbg_indent(c, -1);
103 return retval;
106 static int do_header_KWAJ(deark *c, lctx *d, i64 pos1)
108 unsigned int flags;
109 i64 pos = pos1;
110 i64 n;
111 i64 foundpos;
112 int retval = 0;
113 int ret;
115 de_dbg(c, "header at %d", (int)pos);
116 de_dbg_indent(c, 1);
118 pos += 8; // signature
120 d->cmpr_meth = (UI)de_getu16le_p(&pos);
121 de_dbg(c, "compression method: %u (%s)", d->cmpr_meth, get_cmpr_meth_name(d->cmpr_meth));
123 d->cmpr_data_pos = de_getu16le_p(&pos);
124 de_dbg(c, "compressed data offset: %"I64_FMT, d->cmpr_data_pos);
125 d->cmpr_data_len = c->infile->len - d->cmpr_data_pos;
127 flags = (UI)de_getu16le_p(&pos);
128 de_dbg(c, "header extension flags: 0x%04x", flags);
130 if(flags & 0x0001) { // bit 0
131 d->uncmpr_len = de_getu32le_p(&pos);
132 d->uncmpr_len_known = 1;
133 de_dbg(c, "uncompressed len: %"I64_FMT"", d->uncmpr_len);
135 if(flags & 0x0002) { // bit 1
136 pos += 2;
138 if(flags & 0x0004) { // bit 2
139 n = de_getu16le_p(&pos);
140 pos += n;
142 if(flags & 0x0008) { // bit 3, base part of filename
143 foundpos = 0;
144 ret = dbuf_search_byte(c->infile, 0x00, pos, 9, &foundpos);
145 if(!ret) goto header_extensions_done;
146 d->filename = ucstring_create(c);
147 dbuf_read_to_ucstring(c->infile, pos, foundpos-pos, d->filename, 0, d->input_encoding);
148 pos = foundpos+1;
150 if(flags & 0x0010) { // bit 4, filename extension
151 foundpos = 0;
152 ret = dbuf_search_byte(c->infile, 0x00, pos, 4, &foundpos);
153 if(!ret) goto header_extensions_done;
154 if(d->filename && (foundpos-pos > 0)) {
155 ucstring_append_char(d->filename, '.');
156 dbuf_read_to_ucstring(c->infile, pos, foundpos-pos, d->filename, 0, d->input_encoding);
158 pos = foundpos+1;
160 if(flags & 0x0020) { // bit 5
161 // TODO (comment?)
164 header_extensions_done:
165 if(ucstring_isnonempty(d->filename)) {
166 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(d->filename));
169 // If no compression, don't copy/convert more bytes than given by the uncmpr_len field.
170 if(d->uncmpr_len_known && (d->cmpr_meth==CMPR_NONE || d->cmpr_meth==CMPR_XOR) &&
171 d->uncmpr_len < d->cmpr_data_len)
173 d->cmpr_data_len = d->uncmpr_len;
176 retval = 1;
178 de_dbg_indent(c, -1);
179 return retval;
182 #define MSLZH_SYMLEN_TYPE u8 // Assumed to be unsigned
184 #define MSLZH_VALUE_TYPE u8 // Type of a decoded symbol
186 struct mslzh_tree {
187 UI enctype;
188 UI num_symbols;
189 MSLZH_SYMLEN_TYPE *symlengths; // array[num_symbols]
190 struct fmtutil_huffman_decoder *fmtuht;
193 struct mslzh_context {
194 deark *c;
195 struct de_dfilter_out_params *dcmpro;
196 i64 nbytes_written;
197 int error_flag; // Bad data in the LZ77 part should not set this flag. Set eof_flag instead.
199 // bitrd.eof_flag: Always set if error_flag is set.
200 struct de_bitreader bitrd;
202 struct de_dfilter_results *dres;
203 const char *modname;
204 struct de_lz77buffer *ringbuf;
205 #define MSLZH_TREE_IDX_MATCHLEN 0
206 #define MSLZH_TREE_IDX_MATCHLEN2 1
207 #define MSLZH_TREE_IDX_LITLEN 2
208 #define MSLZH_TREE_IDX_OFFSET 3
209 #define MSLZH_TREE_IDX_LITERAL 4
210 #define MSLZH_NUM_TREES 5
211 struct mslzh_tree htree[MSLZH_NUM_TREES];
214 static void mslzh_set_errorflag(struct mslzh_context *lzhctx)
216 lzhctx->error_flag = 1;
217 lzhctx->bitrd.eof_flag = 1;
220 static UI mslzh_getbits(struct mslzh_context *lzhctx, UI nbits)
222 return (UI)de_bitreader_getbits(&lzhctx->bitrd, nbits);
225 static void mslzh_read_huffman_tree_enctype_0(struct mslzh_context *lzhctx, struct mslzh_tree *htr)
227 MSLZH_SYMLEN_TYPE n;
228 UI sym_idx;
230 n = (MSLZH_SYMLEN_TYPE)de_log2_rounded_up((i64)htr->num_symbols);
231 for(sym_idx=0; sym_idx<htr->num_symbols; sym_idx++) {
232 htr->symlengths[sym_idx] = n;
236 static void mslzh_read_huffman_tree_enctype_1(struct mslzh_context *lzhctx, struct mslzh_tree *htr)
238 MSLZH_SYMLEN_TYPE prev_sym_len;
239 UI sym_idx;
240 UI n;
242 htr->symlengths[0] = (MSLZH_SYMLEN_TYPE)mslzh_getbits(lzhctx, 4);
243 prev_sym_len = htr->symlengths[0];
245 for(sym_idx=1; sym_idx<htr->num_symbols; sym_idx++) {
246 if(lzhctx->bitrd.eof_flag) goto done;
248 n = mslzh_getbits(lzhctx, 1);
249 if(n==0) { // 0
250 htr->symlengths[sym_idx] = prev_sym_len;
252 else { // 1...
253 n = mslzh_getbits(lzhctx, 1);
254 if(n==0) { // 10
255 htr->symlengths[sym_idx] = prev_sym_len + 1;
257 else { // 11...
258 htr->symlengths[sym_idx] = (MSLZH_SYMLEN_TYPE)mslzh_getbits(lzhctx, 4);
262 prev_sym_len = htr->symlengths[sym_idx];
264 done:
268 static void mslzh_read_huffman_tree_enctype_2(struct mslzh_context *lzhctx, struct mslzh_tree *htr)
270 MSLZH_SYMLEN_TYPE prev_sym_len;
271 UI sym_idx;
272 UI n;
274 htr->symlengths[0] = (MSLZH_SYMLEN_TYPE)mslzh_getbits(lzhctx, 4);
275 prev_sym_len = htr->symlengths[0];
277 for(sym_idx=1; sym_idx<htr->num_symbols; sym_idx++) {
278 if(lzhctx->bitrd.eof_flag) goto done;
280 n = mslzh_getbits(lzhctx, 2);
281 if(n==3) {
282 htr->symlengths[sym_idx] = (MSLZH_SYMLEN_TYPE)mslzh_getbits(lzhctx, 4);
284 else {
285 htr->symlengths[sym_idx] = prev_sym_len + (MSLZH_SYMLEN_TYPE)n - 1;
288 prev_sym_len = htr->symlengths[sym_idx];
290 done:
294 static void mslzh_read_huffman_tree_enctype_3(struct mslzh_context *lzhctx, struct mslzh_tree *htr)
296 UI sym_idx;
298 for(sym_idx=0; sym_idx<htr->num_symbols; sym_idx++) {
299 if(lzhctx->bitrd.eof_flag) goto done;
300 htr->symlengths[sym_idx] = (MSLZH_SYMLEN_TYPE)mslzh_getbits(lzhctx, 4);
302 done:
306 // On error, sets lzhctx->eof_flag
307 static MSLZH_VALUE_TYPE mslzh_getnextcode(struct mslzh_context *lzhctx,
308 struct mslzh_tree *htr)
310 fmtutil_huffman_valtype val = 0;
311 int ret;
313 fmtutil_huffman_reset_cursor(htr->fmtuht->cursor); // Should be unnecessary
315 ret = fmtutil_huffman_read_next_value(htr->fmtuht->bk, &lzhctx->bitrd, &val, NULL);
316 if(!ret) return 0;
317 return (MSLZH_VALUE_TYPE)val;
320 static void mslzh_read_huffman_tree(struct mslzh_context *lzhctx, UI idx)
322 UI i;
323 int saved_indent_level;
324 deark *c = lzhctx->c;
325 struct mslzh_tree *htr = &lzhctx->htree[idx];
326 char tmps[32];
328 de_dbg_indent_save(c, &saved_indent_level);
329 de_dbg(lzhctx->c, "huffman tree #%u at %s, nsyms=%u, enctype=%u",
330 idx, de_bitreader_describe_curpos(&lzhctx->bitrd, tmps, sizeof(tmps)),
331 htr->num_symbols, htr->enctype);
332 de_dbg_indent(c, 1);
334 htr->symlengths = de_mallocarray(c, htr->num_symbols, sizeof(htr->symlengths[0]));
336 switch(htr->enctype) {
337 case 0:
338 mslzh_read_huffman_tree_enctype_0(lzhctx, htr);
339 break;
340 case 1:
341 mslzh_read_huffman_tree_enctype_1(lzhctx, htr);
342 break;
343 case 2:
344 mslzh_read_huffman_tree_enctype_2(lzhctx, htr);
345 break;
346 case 3:
347 mslzh_read_huffman_tree_enctype_3(lzhctx, htr);
348 break;
349 default:
350 mslzh_set_errorflag(lzhctx);
353 if(lzhctx->bitrd.eof_flag) {
354 mslzh_set_errorflag(lzhctx);
355 goto done;
358 for(i=0; i<htr->num_symbols; i++) {
359 de_dbg2(c, "length[%u] = %u", i, (UI)htr->symlengths[i]);
360 fmtutil_huffman_record_a_code_length(c, htr->fmtuht->builder, (fmtutil_huffman_valtype)i,
361 (UI)htr->symlengths[i]);
364 if(!fmtutil_huffman_make_canonical_code(c, htr->fmtuht->bk, htr->fmtuht->builder, 0)) {
365 de_dfilter_set_errorf(c, lzhctx->dres, lzhctx->modname, "Failed to construct Huffman tree");
366 mslzh_set_errorflag(lzhctx);
367 goto done;
370 if(c->debug_level>=4) {
371 fmtutil_huffman_dump(c, htr->fmtuht);
374 done:
375 de_free(c, htr->symlengths);
376 htr->symlengths = NULL;
377 de_dbg_indent_restore(c, saved_indent_level);
380 static int mslzh_have_enough_output(struct mslzh_context *lzhctx)
382 if(lzhctx->dcmpro->len_known &&
383 (lzhctx->nbytes_written>=lzhctx->dcmpro->expected_len))
385 return 1;
387 return 0;
390 static void mslzh_lz77buf_writebytecb(struct de_lz77buffer *rb, u8 n)
392 struct mslzh_context *lzhctx = (struct mslzh_context*)rb->userdata;
394 if(mslzh_have_enough_output(lzhctx)) return;
395 dbuf_writebyte(lzhctx->dcmpro->f, n);
396 lzhctx->nbytes_written++;
399 static void mslzh_decompress_main(struct mslzh_context *lzhctx)
401 MSLZH_VALUE_TYPE v;
402 struct mslzh_tree *curr_matchlen_table;
403 char tmps[32];
405 de_dbg(lzhctx->c, "LZ data at %s",
406 de_bitreader_describe_curpos(&lzhctx->bitrd, tmps, sizeof(tmps)));
408 curr_matchlen_table = &lzhctx->htree[MSLZH_TREE_IDX_MATCHLEN];
410 while(1) {
411 if(mslzh_have_enough_output(lzhctx)) goto unc_done;
412 if(lzhctx->bitrd.eof_flag) goto unc_done;
414 v = mslzh_getnextcode(lzhctx, curr_matchlen_table);
415 if(lzhctx->bitrd.eof_flag) goto unc_done;
417 if(v!=0) { // match
418 UI matchlen;
419 UI matchpos;
420 UI x, y;
422 matchlen = v + 2;
424 x = mslzh_getnextcode(lzhctx, &lzhctx->htree[MSLZH_TREE_IDX_OFFSET]);
425 y = mslzh_getbits(lzhctx, 6);
426 if(lzhctx->bitrd.eof_flag) goto unc_done;
428 // This may underflow -- that's ok.
429 matchpos = lzhctx->ringbuf->curpos - (x<<6 | y);
431 curr_matchlen_table = &lzhctx->htree[MSLZH_TREE_IDX_MATCHLEN];
433 de_lz77buffer_copy_from_hist(lzhctx->ringbuf, matchpos, matchlen);
435 else { // run of literals
436 UI x;
437 UI count;
438 UI i;
440 x = mslzh_getnextcode(lzhctx, &lzhctx->htree[MSLZH_TREE_IDX_LITLEN]);
441 if(lzhctx->bitrd.eof_flag) goto unc_done;
442 if(x != 31) {
443 curr_matchlen_table = &lzhctx->htree[MSLZH_TREE_IDX_MATCHLEN2];
445 // read & emit x+1 literals using LITERAL table
446 count = x+1;
447 for(i=0; i<count; i++) {
448 v = mslzh_getnextcode(lzhctx, &lzhctx->htree[MSLZH_TREE_IDX_LITERAL]);
449 if(lzhctx->bitrd.eof_flag) goto unc_done;
450 de_lz77buffer_add_literal_byte(lzhctx->ringbuf, (u8)v);
455 unc_done:
459 static void do_decompress_LZHUFF(deark *c, struct de_dfilter_in_params *dcmpri,
460 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
462 struct mslzh_context *lzhctx = NULL;
463 i64 k;
464 int saved_indent_level;
466 de_dbg_indent_save(c, &saved_indent_level);
467 lzhctx = de_malloc(c, sizeof(struct mslzh_context));
468 lzhctx->c = c;
469 lzhctx->modname = "lzhuff";
470 lzhctx->dcmpro = dcmpro;
471 lzhctx->dres = dres;
473 lzhctx->bitrd.f = dcmpri->f;
474 lzhctx->bitrd.curpos = dcmpri->pos;
475 lzhctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
477 lzhctx->htree[MSLZH_TREE_IDX_MATCHLEN].num_symbols = 16;
478 lzhctx->htree[MSLZH_TREE_IDX_MATCHLEN2].num_symbols = 16;
479 lzhctx->htree[MSLZH_TREE_IDX_LITLEN].num_symbols = 32;
480 lzhctx->htree[MSLZH_TREE_IDX_OFFSET].num_symbols = 64;
481 lzhctx->htree[MSLZH_TREE_IDX_LITERAL].num_symbols = 256;
483 for(k=0; k<MSLZH_NUM_TREES; k++) {
484 lzhctx->htree[k].fmtuht = fmtutil_huffman_create_decoder(c,
485 lzhctx->htree[k].num_symbols, lzhctx->htree[k].num_symbols);
488 // 3-byte header
489 de_dbg(c, "LZH header at %"I64_FMT, lzhctx->bitrd.curpos);
490 de_dbg_indent(c, 1);
491 for(k=0; k<MSLZH_NUM_TREES; k++) {
492 lzhctx->htree[k].enctype = mslzh_getbits(lzhctx, 4);
493 de_dbg2(c, "huffman tree enctype[%d] = %u", (int)k, lzhctx->htree[k].enctype);
495 (void)mslzh_getbits(lzhctx, 4); // unused
496 if(lzhctx->bitrd.eof_flag) {
497 mslzh_set_errorflag(lzhctx);
498 goto done;
500 de_dbg_indent(c, -1);
502 for(k=0; k<MSLZH_NUM_TREES; k++) {
503 mslzh_read_huffman_tree(lzhctx, (UI)k);
504 if(lzhctx->bitrd.eof_flag) {
505 mslzh_set_errorflag(lzhctx);
506 goto done;
510 lzhctx->ringbuf = de_lz77buffer_create(c, 4096);
511 lzhctx->ringbuf->writebyte_cb = mslzh_lz77buf_writebytecb;
512 lzhctx->ringbuf->userdata = (void*)lzhctx;
513 de_lz77buffer_clear(lzhctx->ringbuf, 0x20);
515 mslzh_decompress_main(lzhctx);
517 done:
518 if(lzhctx) {
519 size_t tr;
521 if(lzhctx->error_flag) {
522 de_dfilter_set_generic_error(c, dres, lzhctx->modname);
525 de_lz77buffer_destroy(c, lzhctx->ringbuf);
527 for(tr=0; tr<MSLZH_NUM_TREES; tr++) {
528 fmtutil_huffman_destroy_decoder(c, lzhctx->htree[tr].fmtuht);
530 de_free(c, lzhctx);
532 de_dbg_indent_restore(c, saved_indent_level);
535 static int XOR_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
536 i64 buf_len)
538 i64 k;
539 dbuf *f = (dbuf*)brctx->userdata;
541 for(k=0; k<buf_len; k++) {
542 dbuf_writebyte(f, buf[k] ^ (u8)0xff);
544 return 1;
547 static void do_decompress_XOR(deark *c, struct de_dfilter_in_params *dcmpri,
548 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
550 dbuf_buffered_read(dcmpri->f, dcmpri->pos, dcmpri->len, XOR_cbfn, (void*)dcmpro->f);
553 static void do_decompress_MSZIP(deark *c, struct de_dfilter_in_params *dcmpri1,
554 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
556 const char *modname = "mszip";
557 i64 pos = dcmpri1->pos;
558 int saved_indent_level;
559 struct de_dfilter_in_params dcmpri2;
560 struct de_lz77buffer *ringbuf = NULL;
562 de_dbg_indent_save(c, &saved_indent_level);
564 // The ring buffer has to persist between blocks. So create our own, and
565 // tell the deflate codec to use it.
566 ringbuf = de_lz77buffer_create(c, 32768);
568 dcmpri2.f = dcmpri1->f;
570 while(1) {
571 i64 blkpos;
572 i64 blklen_raw;
573 i64 blk_dlen;
574 i64 outlen_before;
575 i64 unc_bytes_this_block;
576 UI sig;
577 struct de_deflate_params inflparams;
579 if(pos > dcmpri1->pos + dcmpri1->len -4) {
580 goto done;
582 blkpos = pos;
583 de_dbg(c, "MSZIP block at %"I64_FMT, blkpos);
584 de_dbg_indent(c, 1);
585 blklen_raw = dbuf_getu16le_p(dcmpri1->f, &pos);
586 blk_dlen = blklen_raw - 2;
587 sig = (UI)dbuf_getu16be_p(dcmpri1->f, &pos);
588 if(sig != 0x434b) { // "CK"
589 de_dfilter_set_errorf(c, dres, modname, "Failed to find MSZIP block "
590 "at %"I64_FMT, blkpos);
591 goto done;
593 de_dbg(c, "block dpos: %"I64_FMT", dlen: %d", pos, (int)blk_dlen);
594 if(blk_dlen < 0) goto done;
595 dcmpri2.pos = pos;
596 dcmpri2.len = blk_dlen;
597 de_zeromem(&inflparams, sizeof(struct de_deflate_params));
598 inflparams.flags = 0;
599 inflparams.ringbuf_to_use = ringbuf;
600 outlen_before = dcmpro->f->len;
602 fmtutil_deflate_codectype1(c, &dcmpri2, dcmpro, dres, (void*)&inflparams);
603 if(dres->errcode) goto done;
605 pos += blk_dlen;
606 unc_bytes_this_block = dcmpro->f->len - outlen_before;
607 de_dbg(c, "decompressed to: %"I64_FMT, unc_bytes_this_block);
608 if(unc_bytes_this_block < 32768) break; // Presumably we're done.
610 de_dbg_indent(c, -1);
613 done:
614 dres->bytes_consumed_valid = 1;
615 dres->bytes_consumed = pos - dcmpri1->pos;
616 de_lz77buffer_destroy(c, ringbuf);
617 de_dbg_indent_restore(c, saved_indent_level);
620 static void do_decompress(deark *c, lctx *d, dbuf *outf)
622 struct de_dfilter_in_params dcmpri;
623 struct de_dfilter_out_params dcmpro;
624 struct de_dfilter_results dres;
626 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
627 dcmpri.f = c->infile;
628 dcmpri.pos = d->cmpr_data_pos;
629 dcmpri.len = d->cmpr_data_len;
631 dcmpro.f = outf;
632 dcmpro.len_known = d->uncmpr_len_known;
633 dcmpro.expected_len = d->uncmpr_len;
635 switch(d->cmpr_meth) {
636 case CMPR_NONE:
637 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
638 break;
639 case CMPR_XOR:
640 do_decompress_XOR(c, &dcmpri, &dcmpro, &dres);
641 break;
642 case CMPR_SZDD:
643 fmtutil_decompress_szdd(c, &dcmpri, &dcmpro, &dres, 0);
644 break;
645 case CMPR_LZHUFF:
646 do_decompress_LZHUFF(c, &dcmpri, &dcmpro, &dres);
647 break;
648 case CMPR_MSZIP:
649 do_decompress_MSZIP(c, &dcmpri, &dcmpro, &dres);
650 break;
653 if(dres.errcode) {
654 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
655 goto done;
658 if(dres.bytes_consumed_valid) {
659 de_dbg(c, "decompressed %"I64_FMT" to %"I64_FMT" bytes",
660 dres.bytes_consumed, outf->len);
663 if(d->uncmpr_len_known && (outf->len != d->uncmpr_len)) {
664 de_warn(c, "Expected %"I64_FMT" output bytes, got %"I64_FMT,
665 d->uncmpr_len, outf->len);
668 done:
672 static void do_extract_file(deark *c, lctx *d)
674 dbuf *outf = NULL;
675 de_finfo *fi = NULL;
677 de_dbg(c, "compressed data at %"I64_FMT, d->cmpr_data_pos);
678 if(!cmpr_meth_is_supported(d, d->cmpr_meth)) {
679 de_err(c, "Compression method %u (%s) is not supported", d->cmpr_meth,
680 get_cmpr_meth_name(d->cmpr_meth));
681 goto done;
683 if(d->cmpr_data_len<0) goto done;
685 de_dbg_indent(c, 1);
686 fi = de_finfo_create(c);
687 if(ucstring_isnonempty(d->filename)) {
688 de_finfo_set_name_from_ucstring(c, fi, d->filename, 0);
689 fi->original_filename_flag = 1;
691 else {
692 de_finfo_set_name_from_sz(c, fi, "bin", 0, DE_ENCODING_LATIN1);
694 outf = dbuf_create_output_file(c, NULL, fi, 0);
695 do_decompress(c, d, outf);
696 de_dbg_indent(c, -1);
698 done:
699 dbuf_close(outf);
700 de_finfo_destroy(c, fi);
703 static int detect_fmt_internal(deark *c)
705 u8 buf[8];
707 de_read(buf, 0, sizeof(buf));
708 if(!de_memcmp(buf, "\x53\x5a\x44\x44\x88\xf0\x27\x33", 8))
709 return FMT_SZDD;
711 if(!de_memcmp(buf, "\x4b\x57\x41\x4a\x88\xf0\x27\xd1", 8))
712 return FMT_KWAJ;
713 return 0;
716 static void de_run_mscompress(deark *c, de_module_params *mparams)
718 lctx *d = NULL;
720 d = de_malloc(c, sizeof(lctx));
721 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
723 d->fmt = detect_fmt_internal(c);
724 if(d->fmt==FMT_SZDD) {
725 de_declare_fmt(c, "MS Installation Compression, SZDD variant");
727 else if(d->fmt==FMT_KWAJ) {
728 de_declare_fmt(c, "MS Installation Compression, KWAJ variant");
730 else {
731 de_err(c, "Unidentified format");
732 goto done;
735 if(d->fmt==FMT_KWAJ) {
736 if(!do_header_KWAJ(c, d, 0)) goto done;
738 else {
739 if(!do_header_SZDD(c, d, 0)) goto done;
742 do_extract_file(c, d);
744 done:
745 if(d) {
746 ucstring_destroy(d->filename);
747 de_free(c, d);
751 static int de_identify_mscompress(deark *c)
753 int fmt;
754 fmt = detect_fmt_internal(c);
755 if(fmt!=0) return 100;
756 return 0;
759 void de_module_mscompress(deark *c, struct deark_module_info *mi)
761 mi->id = "mscompress";
762 mi->desc = "MS-DOS Installation Compression";
763 mi->run_fn = de_run_mscompress;
764 mi->identify_fn = de_identify_mscompress;