fnt: Improved error handling, etc.
[deark.git] / modules / diet.c
blob78bcd9b7b37041058fce1ed5722432aca42e3be6
1 // This file is part of Deark.
2 // Copyright (C) 2023 Jason Summers
3 // See the file COPYING for terms of use.
5 // DIET compression format
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_diet);
11 #define MAX_DIET_DCMPR_LEN 4194304
13 enum ftype_enum {
14 FTYPE_UNKNOWN=0, FTYPE_DATA, FTYPE_COM, FTYPE_EXE
17 enum fmt_enum {
18 FMT_UNKNOWN=0,
19 FMT_DATA_100, // v1.00, 1.00d
20 FMT_DATA_102, // v1.02b, 1.10a, 1.20
21 FMT_DATA_144, // v1.44, 1.45f
22 FMT_COM_100,
23 FMT_COM_102,
24 FMT_COM_144,
25 FMT_EXE_100,
26 FMT_EXE_102,
27 FMT_EXE_144,
28 FMT_EXE_145F
31 struct diet_identify_data {
32 // The change log from v1.45f suggests there should be at least 20-25
33 // versions of DIET, but just 7 are known to exist:
34 // 1.00, 1.00d, 1.02b, 1.10a, 1.20, 1.44, 1.45f
35 enum fmt_enum fmt;
36 enum ftype_enum ftype;
37 u8 dlz_pos_known;
38 u8 crc_pos_known;
39 u8 cmpr_pos_known;
40 u8 maybe_lglz;
41 i64 dlz_pos;
42 i64 crc_pos;
43 i64 cmpr_pos;
46 typedef struct localctx_struct_diet {
47 struct diet_identify_data idd;
48 u8 errflag;
49 u8 need_errmsg;
50 u8 cmpr_len_known;
51 u8 orig_len_known;
52 u8 raw_mode; // 0xff = not set
53 u8 hdr_flags1; // Valid if dlz_pos_known (otherwise 0)
54 u8 hdr_flags2; // Valid if dlz_pos_known (otherwise 0)
55 i64 cmpr_len;
56 i64 orig_len;
57 i64 cmpr_pos;
58 u32 crc_reported;
59 struct fmtutil_exe_info *ei;
60 dbuf *o_dcmpr_code;
61 i64 o_dcmpr_code_nbytes_written;
62 i64 dcmpr_cur_ipos;
63 struct de_bitbuf_lowlevel bbll;
64 } lctx;
66 // idmode==1: We're in the 'identify' phase -- Do just enough to
67 // detect COM & data formats.
68 static void identify_diet_fmt(deark *c, struct diet_identify_data *idd, u8 idmode)
70 static const u8 *sig_9d89 = (const u8*)"\x9d\x89";
71 static const u8 *sig_dlz = (const u8*)"dlz";
72 static const u8 *sig_int21 = (const u8*)"\xb4\x4c\xcd\x21";
73 static const u8 *sig_old = (const u8*)"\xfd\xf3\xa5\xfc\x8b\xf7\xbf\x00";
74 static const u8 *sig_8edb = (const u8*)"\x8e\xdb\x8e\xc0\x33\xf6\x33\xff\xb9";
75 u8 buf[9];
77 de_read(buf, 0, sizeof(buf));
79 if(buf[0]==0xbe) {
80 if(!dbuf_memcmp(c->infile, 35, sig_dlz, 3)) {
81 if(!dbuf_memcmp(c->infile, 17, sig_old, 8))
83 idd->ftype = FTYPE_COM;
84 idd->fmt = FMT_COM_102;
85 idd->dlz_pos_known = 1;
86 idd->dlz_pos = 35;
87 goto done;
92 if(buf[0]==0xbf) {
93 if(!dbuf_memcmp(c->infile, 17, sig_old, 8))
95 idd->ftype = FTYPE_COM;
96 idd->fmt = FMT_COM_100;
97 idd->crc_pos_known = 1;
98 idd->crc_pos = 35;
99 idd->cmpr_pos_known = 1;
100 idd->cmpr_pos = 37;
101 goto done;
105 if(buf[0]==0xf9) {
106 if(!dbuf_memcmp(c->infile, 65, sig_dlz, 3)) {
107 if(!dbuf_memcmp(c->infile, 10, sig_9d89, 2)) {
108 idd->ftype = FTYPE_COM;
109 idd->fmt = FMT_COM_144;
110 idd->dlz_pos_known = 1;
111 idd->dlz_pos = 65;
112 goto done;
117 if(buf[0]==0xb4) {
118 if(!de_memcmp(&buf[0], sig_int21, 4)) {
119 if(!de_memcmp(&buf[4], sig_9d89, 2)) {
120 idd->ftype = FTYPE_DATA;
121 if(!de_memcmp(&buf[6], sig_dlz, 3)) {
122 idd->fmt = FMT_DATA_144;
123 idd->dlz_pos_known = 1;
124 idd->dlz_pos = 6;
125 goto done;
127 idd->fmt = FMT_DATA_100;
128 idd->crc_pos_known = 1;
129 idd->crc_pos = 6;
130 idd->cmpr_pos_known = 1;
131 idd->cmpr_pos = 8;
132 goto done;
137 if(buf[0]==0x9d) {
138 if(!de_memcmp(&buf[0], sig_9d89, 2)) {
139 if(!de_memcmp(&buf[2], sig_dlz, 3)) {
140 idd->ftype = FTYPE_DATA;
141 idd->fmt = FMT_DATA_102;
142 idd->dlz_pos_known = 1;
143 idd->dlz_pos = 2;
144 goto done;
149 // Don't autodetect EXE -- It's handled by the "exe" module.
150 if(idmode) goto done;
152 if((buf[0]=='M' && buf[1]=='Z') || (buf[0]=='Z' && buf[1]=='M')) {
153 i64 codestart;
154 i64 sig_8edb_pos_rel = 0;
155 u8 x;
157 // TODO?: Probing for the 8e db 8e... byte pattern is good enough for
158 // all the DIET-EXE files I've encountered. But it probably ought to be
159 // improved, somehow.
160 // I've found some files in which the "dlz" signature has been modified,
161 // so checking for it wouldn't help much.
163 codestart = 16 * de_getu16le(8); // Expected to be 32
165 if(!dbuf_memcmp(c->infile, codestart-32+77, sig_8edb, 8)) {
166 sig_8edb_pos_rel = 77-32;
168 else if(!dbuf_memcmp(c->infile, codestart-32+72, sig_8edb, 8)) {
169 sig_8edb_pos_rel = 72-32;
171 else if(!dbuf_memcmp(c->infile, codestart-32+52, sig_8edb, 8)) {
172 sig_8edb_pos_rel = 52-32;
174 else if(!dbuf_memcmp(c->infile, codestart-32+55, sig_8edb, 8)) {
175 sig_8edb_pos_rel = 55-32;
178 if(sig_8edb_pos_rel==0) goto done;
180 x = de_getbyte(codestart+sig_8edb_pos_rel+26);
181 if(x==0x95) {
182 idd->maybe_lglz = 1;
185 if(sig_8edb_pos_rel == 77-32) {
186 idd->ftype = FTYPE_EXE;
187 idd->fmt = FMT_EXE_145F;
188 idd->dlz_pos_known = 1;
189 idd->dlz_pos = codestart-32+108;
190 goto done;
193 if(sig_8edb_pos_rel == 72-32) {
194 idd->ftype = FTYPE_EXE;
195 idd->fmt = FMT_EXE_144;
196 idd->dlz_pos_known = 1;
197 idd->dlz_pos = codestart-32+107;
198 goto done;
201 if(sig_8edb_pos_rel == 52-32) {
202 idd->ftype = FTYPE_EXE;
203 idd->fmt = FMT_EXE_102;
204 idd->dlz_pos_known = 1;
205 idd->dlz_pos = codestart-32+87;
206 goto done;
209 if(sig_8edb_pos_rel == 55-32) {
210 idd->ftype = FTYPE_EXE;
211 idd->fmt = FMT_EXE_100;
212 idd->crc_pos_known = 1;
213 idd->crc_pos = 18;
214 idd->cmpr_pos_known = 1;
215 idd->cmpr_pos = codestart-32+90;
216 goto done;
220 done:
221 if(idd->dlz_pos_known) {
222 idd->crc_pos_known = 1;
223 idd->cmpr_pos_known = 1;
224 idd->crc_pos = idd->dlz_pos + 6;
225 idd->cmpr_pos = idd->dlz_pos + 11;
229 static void fill_bitbuf(deark *c, lctx *d)
231 UI i;
233 if(d->errflag) return;
234 if(d->dcmpr_cur_ipos+2 > c->infile->len) {
235 d->errflag = 1;
236 d->need_errmsg = 1;
237 return;
240 for(i=0; i<2; i++) {
241 u8 b;
242 b = de_getbyte_p(&d->dcmpr_cur_ipos);
243 de_bitbuf_lowlevel_add_byte(&d->bbll, b);
247 static u8 diet_getbit(deark *c, lctx *d)
249 u8 v;
251 if(d->errflag) return 0;
253 if(d->bbll.nbits_in_bitbuf==0) {
254 fill_bitbuf(c, d);
257 v = (u8)de_bitbuf_lowlevel_get_bits(&d->bbll, 1);
259 if(d->bbll.nbits_in_bitbuf==0) {
260 fill_bitbuf(c, d);
263 return v;
266 static void my_lz77buf_writebytecb(struct de_lz77buffer *rb, u8 n)
268 lctx *d = (lctx*)rb->userdata;
270 dbuf_writebyte(d->o_dcmpr_code, n);
271 d->o_dcmpr_code_nbytes_written++;
274 static UI read_matchlen(deark *c, lctx *d)
276 UI matchlen;
277 u8 x, x1, x2, x3, x4, x5;
278 u8 v;
279 UI nbits_read = 0;
281 // Read up to 4 bits, stopping early if we get a 1.
282 while(1) {
283 x = diet_getbit(c, d);
284 nbits_read++;
285 if(x) {
286 matchlen = 2+nbits_read;
287 goto done;
289 if(nbits_read>=4) break;
291 // At this point we've read 4 bits, all 0.
293 x1 = diet_getbit(c, d);
294 x2 = diet_getbit(c, d);
296 if(x1==1) { // length 7-8
297 matchlen = 7+x2;
298 goto done;
301 if(x2==0) { // length 9-16
302 x3 = diet_getbit(c, d);
303 x4 = diet_getbit(c, d);
304 x5 = diet_getbit(c, d);
305 matchlen = 9 + 4*(UI)x3 + 2*(UI)x4 + (UI)x5;
306 goto done;
309 // length 17-272
310 v = de_getbyte_p(&d->dcmpr_cur_ipos);
311 matchlen = 17 + (UI)v;
313 done:
314 return matchlen;
317 static void do_decompress_code(deark *c, lctx *d)
319 struct de_lz77buffer *ringbuf = NULL;
320 u8 v;
321 u8 x1, x2;
322 u8 a1, a2, a3, a4, a5, a6, a7, a8;
324 if(d->cmpr_pos + d->cmpr_len > c->infile->len) {
325 d->errflag = 1;
326 d->need_errmsg = 1;
328 de_dbg(c, "decompressing cmpr code at %"I64_FMT, d->cmpr_pos);
329 de_dbg_indent(c, 1);
331 ringbuf = de_lz77buffer_create(c, 8192);
332 ringbuf->userdata = (void*)d;
333 ringbuf->writebyte_cb = my_lz77buf_writebytecb;
335 d->dcmpr_cur_ipos = d->cmpr_pos;
336 d->bbll.is_lsb = 1;
337 de_bitbuf_lowlevel_empty(&d->bbll);
339 while(1) {
340 UI matchpos = 0;
341 UI matchlen;
343 if(d->errflag) goto done;
345 x1 = diet_getbit(c, d);
346 if(x1) { // 1... -> literal byte
347 u8 b;
349 b = de_getbyte_p(&d->dcmpr_cur_ipos);
350 if(c->debug_level>=4) {
351 de_dbg(c, "lit 0x%02x", (UI)b);
353 de_lz77buffer_add_literal_byte(ringbuf, b);
354 continue;
357 x2 = diet_getbit(c, d);
358 v = de_getbyte_p(&d->dcmpr_cur_ipos);
360 if(x2==0) { // 00[XX]... -> 2-byte match or special code
361 a1 = diet_getbit(c, d); // Always need at least 1 more bit
362 if(a1) { // "long" two-byte match
363 matchlen = 2;
364 a2 = diet_getbit(c, d);
365 a3 = diet_getbit(c, d);
366 a4 = diet_getbit(c, d);
367 matchpos = 2303 - (1024*(UI)a2 + 512*(UI)a3 + 256*(UI)a4 + v);
368 goto ready_for_match;
370 else if(v!=0xff) { // "short" two-byte match
371 matchlen = 2;
372 matchpos = 0xff - (UI)v;
373 goto ready_for_match;
376 // special code
378 a2 = diet_getbit(c, d);
379 if(a2==0) { // 00[FF]00
380 de_dbg3(c, "stop code");
381 goto after_decompress;
384 // 00[FF]01
385 if(d->idd.ftype==FTYPE_EXE) {
386 de_dbg3(c, "segment refresh");
387 continue;
389 de_err(c, "Unsupported feature");
390 d->errflag = 1;
391 goto done;
394 // 01[v] -> 3 or more byte match
396 a1 = diet_getbit(c, d);
397 a2 = diet_getbit(c, d);
399 if(a2) { // 01[v]?1
400 matchpos = 511 - (256*(UI)a1 + (UI)v);
401 goto ready_for_len;
404 a3 = diet_getbit(c, d);
405 if(a3) { // 01[v]?01
406 matchpos = 1023 - (256*(UI)a1 + (UI)v);
407 goto ready_for_len;
410 // 01[v]?00
411 a4 = diet_getbit(c, d);
412 a5 = diet_getbit(c, d);
414 if(a5) { // 01[v]?00?1
415 matchpos = 2047 - (512*(UI)a1 + 256* (UI)a4 + (UI)v);
416 goto ready_for_len;
419 // 01[v]?00?0
420 a6 = diet_getbit(c, d);
421 a7 = diet_getbit(c, d);
423 if(a7) { // 01[v]?00?0?1
424 matchpos = 4095 - (1024*(UI)a1 + 512*(UI)a4 + 256*(UI)a6 + (UI)v);
425 goto ready_for_len;
428 // 01[v]?00?0?0
429 a8 = diet_getbit(c, d);
430 matchpos = 8191 - (2048*(UI)a1 + 1024*(UI)a4 + 512*(UI)a6 + 256*(UI)a8 + (UI)v);
433 ready_for_len:
434 matchlen = read_matchlen(c, d);
435 if(d->errflag) goto done;
437 ready_for_match:
438 if(c->debug_level>=3) {
439 de_dbg3(c, "match pos=%u len=%u", matchpos+1, matchlen);
441 if((i64)matchpos+1 > d->o_dcmpr_code_nbytes_written) {
442 // Match refers to data before the beginning of the file --
443 // DIET doesn't do this.
444 d->errflag = 1;
445 d->need_errmsg = 1;
446 goto done;
448 if(matchlen > (i64)matchpos+1) {
449 // Some matching data hasn't been decompressed yet.
450 // This is a legitimate feature of LZ77, but DIET apparently doesn't
451 // use it.
452 d->errflag = 1;
453 d->need_errmsg = 1;
454 goto done;
457 de_lz77buffer_copy_from_hist(ringbuf,
458 (UI)(ringbuf->curpos-1-matchpos), matchlen);
461 after_decompress:
462 de_dbg(c, "decompressed %"I64_FMT" bytes to %"I64_FMT, (d->dcmpr_cur_ipos-d->cmpr_pos),
463 d->o_dcmpr_code_nbytes_written);
465 done:
466 dbuf_flush(d->o_dcmpr_code);
467 de_lz77buffer_destroy(c, ringbuf);
468 de_dbg_indent(c, -1);
471 static void write_data_or_com_file(deark *c, lctx *d)
473 dbuf *outf = NULL;
474 const char *ext;
476 if(d->idd.ftype==FTYPE_COM) ext = "com";
477 else ext = "bin";
479 outf = dbuf_create_output_file(c, ext, NULL, 0);
480 dbuf_copy(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, outf);
482 if(d->idd.ftype==FTYPE_COM) {
483 de_stdwarn_execomp(c);
486 dbuf_close(outf);
489 struct exe_dcmpr_ctx {
490 i64 mz_pos; // pos in d->o_dcmpr_code
491 i64 encoded_reloc_tbl_pos; // pos in d->o_dcmpr_code
492 i64 encoded_reloc_tbl_size; // size in d->o_dcmpr_code
493 i64 cdata1_size;
494 i64 cdata2_size; // Size in the original file; may be abbreviated in d->o_dcmpr_code
495 struct fmtutil_exe_info o_ei;
498 // For v1.00 format, there doesn't seem to be a good way to figure out the exact
499 // offset of the "MZ" header within the blob of bytes produced by the main
500 // decompression algorithm.
501 // I've never seen DIET fail to correctly decompress such a file, but I'm
502 // starting to suspect it might be possible to construct a pathological file
503 // for which it fails.
504 // We can narrow it down to 16 possibilities, and if there's exactly one that
505 // is potentially valid, we go with it. In practice, this should be good enough.
506 static void find_v100_mz_pos(deark *c, lctx *d, struct exe_dcmpr_ctx *ectx,
507 i64 mz_pos_approx)
509 i64 cmpr_endpos;
510 i64 params_pos = 0;
511 i64 reloc_tbl_rel = 0;
512 i64 nrelocs_r = 0;
513 i64 nbytes_to_search;
514 int found_count = 0;
515 u8 fclass = 0; // 0=unknown, 1=has params at params_pos, 2=no reloc table
516 i64 i;
517 i64 n;
518 i64 foundpos;
519 int ret;
520 int saved_indent_level;
522 de_dbg_indent_save(c, &saved_indent_level);
523 de_dbg(c, "[searching for MZ pos in intermed. data]");
524 de_dbg_indent(c, 1);
526 cmpr_endpos = d->cmpr_pos + d->cmpr_len;
527 de_dbg(c, "cmpr data end: %"I64_FMT, cmpr_endpos);
529 // Sanity check. This part of the decompressor always seems to start with
530 // these bytes.
531 n = de_getu32be(cmpr_endpos);
532 if(n != 0xd1edfecaU) {
533 d->errflag = 1;
534 d->need_errmsg = 1;
535 goto done;
538 // We're looking for some parameters in the part of the code that
539 // appears after the compressed data. (But if there are 0 relocations,
540 // then the params won't be present, and we have to detect that as
541 // a special case.)
542 // Probing at precise offsets doesn't seem to be robust enough, so we
543 // resort to doing a search for characteristic byte patterns.
545 nbytes_to_search = de_min_int(d->ei->end_of_dos_code - cmpr_endpos, 1000);
547 if(fclass==0) {
548 ret = dbuf_search(c->infile, (const u8*)"\x5d\x0e\x1f\xbe", 4, cmpr_endpos,
549 nbytes_to_search, &foundpos);
550 if(ret) {
551 fclass = 1;
552 params_pos = foundpos+4;
556 if(fclass==0) {
557 // The case where there are no relocations
558 ret = dbuf_search(c->infile, (const u8*)"\x5d\x07\x1f\x81", 4, cmpr_endpos,
559 nbytes_to_search, &foundpos);
560 if(ret) {
561 fclass = 2;
562 nrelocs_r = 0;
566 if(fclass==0) {
567 d->errflag = 1;
568 d->need_errmsg = 1;
569 goto done;
572 if(fclass==1) {
573 de_dbg(c, "params pos: %"I64_FMT" (b+%"I64_FMT"; e-%"I64_FMT")",
574 params_pos, params_pos - cmpr_endpos,
575 d->ei->end_of_dos_code - params_pos);
576 reloc_tbl_rel = de_getu16le(params_pos);
577 de_dbg(c, "reloc tbl intermed. pos: approx_MZ+%"I64_FMT, reloc_tbl_rel);
578 nrelocs_r = de_getu16le(params_pos+3);
581 de_dbg(c, "nrelocs (reported): %"I64_FMT, nrelocs_r);
583 // Search for the MZ header, hopefully avoiding false positives.
584 for(i=0; i<16; i++) {
585 int sig;
587 // Look for "MZ" or "ZM"
588 sig = (int)dbuf_getbyte(d->o_dcmpr_code, mz_pos_approx+i);
589 if(sig!='M' && sig!='Z') continue;
590 sig += (int)dbuf_getbyte(d->o_dcmpr_code, mz_pos_approx+i+1);
591 if(sig != 'M'+'Z') continue;
593 // Validate the reloc count
594 n = dbuf_getu16le(d->o_dcmpr_code, mz_pos_approx+i+6);
595 if(n!=nrelocs_r) continue;
597 // Validate the reloc pos if possible
598 if(fclass==1) {
599 n = dbuf_getu16le(d->o_dcmpr_code, mz_pos_approx+i+24);
600 if(i+n!=reloc_tbl_rel) continue;
603 found_count++;
604 if(found_count>1) {
605 d->errflag = 1;
606 d->need_errmsg = 1;
607 goto done;
609 ectx->mz_pos = mz_pos_approx+i;
610 break;
613 if(found_count!=1) {
614 d->errflag = 1;
615 d->need_errmsg = 1;
616 goto done;
619 de_dbg(c, "MZ header found at %"I64_FMT, ectx->mz_pos);
621 done:
622 de_dbg_indent_restore(c, saved_indent_level);
625 // Caller creates and passes empty o_orig_header to us.
626 static void find_exe_params(deark *c, lctx *d, struct exe_dcmpr_ctx *ectx,
627 dbuf *o_orig_header)
629 i64 iparam1;
630 i64 n;
631 i64 ioffset1;
632 i64 mz_pos_approx;
633 u8 byte3;
635 if(d->errflag) return;
637 switch(d->idd.fmt) {
638 case FMT_EXE_100:
639 case FMT_EXE_102:
640 ioffset1 = 53;
641 break;
642 case FMT_EXE_144:
643 ioffset1 = 73;
644 break;
645 case FMT_EXE_145F:
646 ioffset1 = 26;
647 break;
648 default:
649 d->errflag = 1;
650 d->need_errmsg = 1;
651 goto done;
654 iparam1 = de_getu16le(d->ei->entry_point + ioffset1);
655 mz_pos_approx = iparam1 * 16;
656 de_dbg(c, "approx MZ pos in intermed. data: %"I64_FMT, mz_pos_approx);
658 if(d->idd.fmt==FMT_EXE_100) {
659 find_v100_mz_pos(c, d, ectx, mz_pos_approx);
660 if(d->errflag) goto done;
662 else {
663 if(!d->orig_len_known || !(d->hdr_flags1 & 0x20)) {
664 d->errflag = 1;
665 d->need_errmsg = 1;
666 goto done;
669 ectx->mz_pos = mz_pos_approx + (d->orig_len % 16);
670 de_dbg(c, "expected MZ pos in intermed. data: %"I64_FMT, ectx->mz_pos);
671 // Verify that this seems to be the right place.
672 n = dbuf_getu16be(d->o_dcmpr_code, ectx->mz_pos);
673 if(n!=0x4d5a && n!=0x5a4d) {
674 d->errflag = 1;
675 d->need_errmsg = 1;
676 goto done;
680 // Note: DIET elides trailing 0-valued bytes from the intermediate format
681 // we store in o_dcmpr_code.
682 // In some cases, o_dcmpr_code ends even before the end of the 28-byte
683 // MZ header.
684 // So, this and later calls to dbuf_copy() may read beyond the end of
685 // o_dcmpr_code. That's by design -- we rely on dbuf_copy to replace
686 // missing bytes with 0-valued bytes.
687 dbuf_copy(d->o_dcmpr_code, ectx->mz_pos, 28, o_orig_header);
689 byte3 = dbuf_getbyte(o_orig_header, 3);
690 dbuf_writebyte_at(o_orig_header, 3, (byte3 & 0x01));
692 fmtutil_collect_exe_info(c, o_orig_header, &ectx->o_ei);
694 // collect_exe_info() will not have calculated the overlay len, because we
695 // didn't tell it the correct file size. So, patch it up here.
696 ectx->o_ei.overlay_len = ectx->o_ei.start_of_dos_code + ectx->mz_pos -
697 ectx->o_ei.end_of_dos_code;
698 if(ectx->o_ei.overlay_len<0) ectx->o_ei.overlay_len = 0;
700 ectx->encoded_reloc_tbl_pos = ectx->mz_pos + ectx->o_ei.reloc_table_pos;
702 if(ectx->o_ei.num_relocs==0) {
703 ectx->cdata1_size = ectx->o_ei.start_of_dos_code - 28;
704 ectx->cdata2_size = 0;
706 else {
707 ectx->cdata1_size = ectx->o_ei.reloc_table_pos - 28;
708 ectx->cdata2_size = ectx->o_ei.start_of_dos_code - (ectx->o_ei.reloc_table_pos +
709 4*ectx->o_ei.num_relocs);
711 if(ectx->cdata1_size<0 || ectx->cdata2_size<0) {
712 d->errflag = 1;
713 d->need_errmsg = 1;
714 goto done;
717 done:
718 if(d->errflag && d->need_errmsg) {
719 de_err(c, "Unsupported variety of DIET-EXE file");
720 d->need_errmsg = 0;
724 // Sets ectx->encoded_reloc_tbl_size
725 static void decode_reloc_tbl(deark *c, lctx *d, struct exe_dcmpr_ctx *ectx,
726 dbuf *inf, i64 ipos1, i64 nrelocs, dbuf *outf)
728 UI seg = 0;
729 UI offs = 0;
730 i64 i;
731 i64 ipos = ipos1;
733 for(i=0; i<nrelocs; i++) {
734 UI n;
736 n = (UI)dbuf_getu16le_p(inf, &ipos);
737 if(n & 0x8000) {
738 // Special code: segment stays the same, and offset is adjusted
739 // relative to the previous offset.
740 if(n >= 0xc000) {
741 offs += n;
743 else {
744 offs += (n-0x8000);
746 offs &= 0xffff;
748 else {
749 seg = n;
750 offs = (UI)dbuf_getu16le_p(inf, &ipos);
753 dbuf_writeu16le(outf, (i64)offs);
754 dbuf_writeu16le(outf, (i64)seg);
757 if(d->hdr_flags1 & 0x20) {
758 ectx->encoded_reloc_tbl_size = ipos - ipos1;
760 else {
761 ectx->encoded_reloc_tbl_size = nrelocs*4;
765 static void write_exe_file(deark *c, lctx *d)
767 struct exe_dcmpr_ctx *ectx = NULL;
768 dbuf *outf = NULL;
769 dbuf *o_orig_header = NULL;
770 dbuf *reloc_tbl = NULL;
771 int saved_indent_level;
773 de_dbg_indent_save(c, &saved_indent_level);
774 if(!d->o_dcmpr_code) goto done;
776 ectx = de_malloc(c, sizeof(struct exe_dcmpr_ctx));
778 de_dbg(c, "[writing EXE]");
779 de_dbg_indent(c, 1);
780 fmtutil_collect_exe_info(c, c->infile, d->ei);
782 o_orig_header = dbuf_create_membuf(c, 28, 0);
783 find_exe_params(c, d, ectx, o_orig_header);
784 if(d->errflag) goto done;
786 outf = dbuf_create_output_file(c, "exe", NULL, 0);
788 // 28-byte MZ header
789 dbuf_copy(o_orig_header, 0, 28, outf);
791 // Copy the custom data up to the relocation table.
792 // (If there's no relocation table, this will be everything up to the
793 // code image).
794 dbuf_copy(d->o_dcmpr_code, ectx->mz_pos+28, ectx->cdata1_size, outf);
796 if(ectx->o_ei.num_relocs!=0) {
797 // Relocation table
798 reloc_tbl = dbuf_create_membuf(c, 4*ectx->o_ei.num_relocs, 0);
799 decode_reloc_tbl(c, d, ectx, d->o_dcmpr_code, ectx->encoded_reloc_tbl_pos,
800 ectx->o_ei.num_relocs, reloc_tbl);
801 dbuf_copy(reloc_tbl, 0, 4*ectx->o_ei.num_relocs, outf);
803 // Custom data following the relocation table
804 dbuf_copy(d->o_dcmpr_code,
805 ectx->encoded_reloc_tbl_pos + ectx->encoded_reloc_tbl_size,
806 ectx->cdata2_size, outf);
809 // Code image and (internal, compressed) overlay
810 dbuf_copy(d->o_dcmpr_code, 0, ectx->mz_pos, outf);
812 // Copy external overlay. Pristine DIET-compressed files never have such a
813 // thing, but some other workflows (e.g. ARJ v2.00 SFX) create such files.
814 if(d->ei->overlay_len>0) {
815 if(ectx->o_ei.overlay_len>0) {
816 de_warn(c, "Ignoring overlay at %"I64_FMT" -- file already "
817 "has an overlay", d->ei->end_of_dos_code);
819 else {
820 de_dbg(c, "overlay data at %"I64_FMT", len=%"I64_FMT, d->ei->end_of_dos_code,
821 d->ei->overlay_len);
822 dbuf_copy(c->infile, d->ei->end_of_dos_code, d->ei->overlay_len, outf);
826 done:
827 dbuf_close(reloc_tbl);
828 if(outf) {
829 dbuf_close(outf);
830 de_stdwarn_execomp(c);
832 dbuf_close(o_orig_header);
833 if(ectx) {
834 de_free(c, ectx);
836 de_dbg_indent_restore(c, saved_indent_level);
839 static void read_header(deark *c, lctx *d)
841 i64 pos = 0;
842 i64 n;
843 u8 x;
844 de_ucstring *flags_str = NULL;
845 int saved_indent_level;
847 de_dbg_indent_save(c, &saved_indent_level);
849 flags_str = ucstring_create(c);
851 if(d->idd.dlz_pos_known) {
852 de_dbg(c, "header at %"I64_FMT, d->idd.dlz_pos);
853 de_dbg_indent(c, 1);
855 pos = d->idd.dlz_pos + 3;
856 x = de_getbyte_p(&pos);
857 d->hdr_flags1 = x & 0xf0;
858 if(d->hdr_flags1 & 0x80) ucstring_append_flags_item(flags_str, "has following block");
859 if(d->hdr_flags1 & 0x20) ucstring_append_flags_item(flags_str, "new EXE format");
860 if(d->hdr_flags1 & 0x10) ucstring_append_flags_item(flags_str, "has segment refresh data");
861 de_dbg(c, "flags: 0x%02x (%s)", d->hdr_flags1, ucstring_getpsz(flags_str));
862 d->cmpr_len = (i64)(x & 0x0f)<<16;
863 n = de_getu16le_p(&pos);
864 d->cmpr_len |= n;
865 de_dbg(c, "cmpr len: %"I64_FMT, d->cmpr_len);
866 d->cmpr_len_known = 1;
868 else if(d->idd.fmt==FMT_EXE_100) {
869 d->cmpr_len = de_getu32le(32);
870 d->cmpr_len &= 0xfffff; // Dunno if this is 24-bits, or maybe just 20 bits
871 de_dbg(c, "cmpr len: %"I64_FMT, d->cmpr_len);
872 d->cmpr_len_known = 1;
875 if(d->idd.crc_pos_known) {
876 d->crc_reported = (u32)de_getu16le(d->idd.crc_pos);
877 de_dbg(c, "crc (reported): 0x%04x", (UI)d->crc_reported);
880 if(d->idd.dlz_pos_known) {
881 pos = d->idd.dlz_pos + 8;
882 x = de_getbyte_p(&pos);
883 d->orig_len = (i64)(x & 0xfc)<<14;
884 d->hdr_flags2 = (i64)(x & 0x03); // probably unused
885 n = de_getu16le_p(&pos);
886 d->orig_len |= n;
887 de_dbg(c, "orig len: %"I64_FMT, d->orig_len);
888 d->orig_len_known = 1;
891 if(!d->cmpr_len_known && d->idd.fmt==FMT_DATA_100) {
892 d->cmpr_len = c->infile->len - d->cmpr_pos;
893 d->cmpr_len_known = 1;
896 ucstring_destroy(flags_str);
897 de_dbg_indent_restore(c, saved_indent_level);
900 static void check_diet_crc(deark *c, lctx *d)
902 u32 crc_calc;
903 struct de_crcobj *crco = NULL;
905 if(!d->cmpr_len_known) {
906 // TODO: For v1.00 COM format, we don't know how to figure out the
907 // compressed data size, and it doesn't end at the end of the file.
908 // (Testing the CRC *after* decompression, after we've found the "stop"
909 // code, isn't the right thing to do for this type of CRC.)
910 goto done;
912 if(d->cmpr_pos+d->cmpr_len > c->infile->len) goto done;
914 crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
915 de_crcobj_addslice(crco, c->infile, d->cmpr_pos, d->cmpr_len);
916 crc_calc = de_crcobj_getval(crco);
917 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calc);
918 // Unfortunately, this is a CRC of the *compressed* data, so we can't use it
919 // to tell if we decompressed the data correctly.
920 if(crc_calc!=d->crc_reported) {
921 de_warn(c, "CRC check failed (expected 0x%04x, got 0x%04x). "
922 "File may be corrupted.", (UI)d->crc_reported,
923 (UI)crc_calc);
925 done:
926 de_crcobj_destroy(crco);
929 static void check_unsupp_features(deark *c, lctx *d)
931 if(d->hdr_flags1&0x80) {
932 d->errflag = 1;
933 d->need_errmsg = 1;
934 goto done;
937 done:
941 static void de_run_diet(deark *c, de_module_params *mparams)
943 lctx *d = NULL;
944 const char *fmtn = NULL;
946 d = de_malloc(c, sizeof(lctx));
947 d->ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
948 d->raw_mode = (u8)de_get_ext_option_bool(c, "diet:raw", 0xff);
950 identify_diet_fmt(c, &d->idd, 0);
951 switch(d->idd.fmt) {
952 case FMT_DATA_100:
953 fmtn = "file (v1.00)";
954 break;
955 case FMT_DATA_102:
956 fmtn = "file (v1.02-1.20)";
957 break;
958 case FMT_DATA_144:
959 fmtn = "file (v1.44-1.45)";
960 break;
961 case FMT_COM_100:
962 fmtn = "COM (v1.00)";
963 break;
964 case FMT_COM_102:
965 fmtn = "COM (v1.02-1.20)";
966 break;
967 case FMT_COM_144:
968 fmtn = "COM (v1.44-1.45)";
969 break;
970 case FMT_EXE_100:
971 fmtn = "EXE (v1.00)";
972 break;
973 case FMT_EXE_102:
974 fmtn = "EXE (v1.02-1.20)";
975 break;
976 case FMT_EXE_144:
977 fmtn = "EXE (v1.44)";
978 break;
979 case FMT_EXE_145F:
980 fmtn = "EXE (v1.45)";
981 break;
982 default:
983 break;
986 if(fmtn) {
987 de_declare_fmtf(c, "DIET-compressed %s", fmtn);
990 if(d->idd.maybe_lglz) {
991 de_warn(c, "This file might be LGLZ-compressed, not DIET");
994 if(!fmtn || !d->idd.cmpr_pos_known) {
995 d->errflag = 1;
996 d->need_errmsg = 1;
997 goto done;
999 d->cmpr_pos = d->idd.cmpr_pos;
1001 read_header(c, d);
1002 if(d->errflag) goto done;
1004 check_diet_crc(c, d);
1006 check_unsupp_features(c, d);
1007 if(d->errflag) goto done;
1009 d->o_dcmpr_code = dbuf_create_membuf(c,
1010 (d->orig_len_known ? d->orig_len : MAX_DIET_DCMPR_LEN), 0x1);
1011 dbuf_enable_wbuffer(d->o_dcmpr_code);
1013 do_decompress_code(c, d);
1014 if(d->errflag) goto done;
1015 if(d->idd.ftype==FTYPE_DATA || d->idd.ftype==FTYPE_COM ||
1016 (d->idd.ftype==FTYPE_EXE && d->raw_mode==1))
1018 write_data_or_com_file(c, d);
1020 else if(d->idd.ftype==FTYPE_EXE && d->raw_mode!=1) {
1021 write_exe_file(c, d);
1024 done:
1025 if(d) {
1026 de_free(c, d->ei);
1027 if(d->need_errmsg) {
1028 de_err(c, "Bad or unsupported file");
1030 dbuf_close(d->o_dcmpr_code);
1031 de_free(c, d);
1035 static int de_identify_diet(deark *c)
1037 struct diet_identify_data idd;
1039 de_zeromem(&idd, sizeof(struct diet_identify_data));
1040 identify_diet_fmt(c, &idd, 1);
1041 if(idd.ftype!=FTYPE_UNKNOWN) return 90;
1042 return 0;
1045 static void de_help_diet(deark *c)
1047 de_msg(c, "-opt diet:raw : Instead of an EXE file, write raw decompressed data");
1050 void de_module_diet(deark *c, struct deark_module_info *mi)
1052 mi->id = "diet";
1053 mi->desc = "DIET compression";
1054 mi->run_fn = de_run_diet;
1055 mi->identify_fn = de_identify_diet;
1056 mi->help_fn = de_help_diet;