1 // This file is part of Deark.
2 // Copyright (C) 2023 Jason Summers
3 // See the file COPYING for terms of use.
5 // DIET compression format
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_diet
);
11 #define MAX_DIET_DCMPR_LEN 4194304
14 FTYPE_UNKNOWN
=0, FTYPE_DATA
, FTYPE_COM
, FTYPE_EXE
19 FMT_DATA_100
, // v1.00, 1.00d
20 FMT_DATA_102
, // v1.02b, 1.10a, 1.20
21 FMT_DATA_144
, // v1.44, 1.45f
31 struct diet_identify_data
{
32 // The change log from v1.45f suggests there should be at least 20-25
33 // versions of DIET, but just 7 are known to exist:
34 // 1.00, 1.00d, 1.02b, 1.10a, 1.20, 1.44, 1.45f
36 enum ftype_enum ftype
;
46 typedef struct localctx_struct_diet
{
47 struct diet_identify_data idd
;
52 u8 raw_mode
; // 0xff = not set
53 u8 hdr_flags1
; // Valid if dlz_pos_known (otherwise 0)
54 u8 hdr_flags2
; // Valid if dlz_pos_known (otherwise 0)
59 struct fmtutil_exe_info
*ei
;
61 i64 o_dcmpr_code_nbytes_written
;
63 struct de_bitbuf_lowlevel bbll
;
66 // idmode==1: We're in the 'identify' phase -- Do just enough to
67 // detect COM & data formats.
68 static void identify_diet_fmt(deark
*c
, struct diet_identify_data
*idd
, u8 idmode
)
70 static const u8
*sig_9d89
= (const u8
*)"\x9d\x89";
71 static const u8
*sig_dlz
= (const u8
*)"dlz";
72 static const u8
*sig_int21
= (const u8
*)"\xb4\x4c\xcd\x21";
73 static const u8
*sig_old
= (const u8
*)"\xfd\xf3\xa5\xfc\x8b\xf7\xbf\x00";
74 static const u8
*sig_8edb
= (const u8
*)"\x8e\xdb\x8e\xc0\x33\xf6\x33\xff\xb9";
77 de_read(buf
, 0, sizeof(buf
));
80 if(!dbuf_memcmp(c
->infile
, 35, sig_dlz
, 3)) {
81 if(!dbuf_memcmp(c
->infile
, 17, sig_old
, 8))
83 idd
->ftype
= FTYPE_COM
;
84 idd
->fmt
= FMT_COM_102
;
85 idd
->dlz_pos_known
= 1;
93 if(!dbuf_memcmp(c
->infile
, 17, sig_old
, 8))
95 idd
->ftype
= FTYPE_COM
;
96 idd
->fmt
= FMT_COM_100
;
97 idd
->crc_pos_known
= 1;
99 idd
->cmpr_pos_known
= 1;
106 if(!dbuf_memcmp(c
->infile
, 65, sig_dlz
, 3)) {
107 if(!dbuf_memcmp(c
->infile
, 10, sig_9d89
, 2)) {
108 idd
->ftype
= FTYPE_COM
;
109 idd
->fmt
= FMT_COM_144
;
110 idd
->dlz_pos_known
= 1;
118 if(!de_memcmp(&buf
[0], sig_int21
, 4)) {
119 if(!de_memcmp(&buf
[4], sig_9d89
, 2)) {
120 idd
->ftype
= FTYPE_DATA
;
121 if(!de_memcmp(&buf
[6], sig_dlz
, 3)) {
122 idd
->fmt
= FMT_DATA_144
;
123 idd
->dlz_pos_known
= 1;
127 idd
->fmt
= FMT_DATA_100
;
128 idd
->crc_pos_known
= 1;
130 idd
->cmpr_pos_known
= 1;
138 if(!de_memcmp(&buf
[0], sig_9d89
, 2)) {
139 if(!de_memcmp(&buf
[2], sig_dlz
, 3)) {
140 idd
->ftype
= FTYPE_DATA
;
141 idd
->fmt
= FMT_DATA_102
;
142 idd
->dlz_pos_known
= 1;
149 // Don't autodetect EXE -- It's handled by the "exe" module.
150 if(idmode
) goto done
;
152 if((buf
[0]=='M' && buf
[1]=='Z') || (buf
[0]=='Z' && buf
[1]=='M')) {
154 i64 sig_8edb_pos_rel
= 0;
157 // TODO?: Probing for the 8e db 8e... byte pattern is good enough for
158 // all the DIET-EXE files I've encountered. But it probably ought to be
159 // improved, somehow.
160 // I've found some files in which the "dlz" signature has been modified,
161 // so checking for it wouldn't help much.
163 codestart
= 16 * de_getu16le(8); // Expected to be 32
165 if(!dbuf_memcmp(c
->infile
, codestart
-32+77, sig_8edb
, 8)) {
166 sig_8edb_pos_rel
= 77-32;
168 else if(!dbuf_memcmp(c
->infile
, codestart
-32+72, sig_8edb
, 8)) {
169 sig_8edb_pos_rel
= 72-32;
171 else if(!dbuf_memcmp(c
->infile
, codestart
-32+52, sig_8edb
, 8)) {
172 sig_8edb_pos_rel
= 52-32;
174 else if(!dbuf_memcmp(c
->infile
, codestart
-32+55, sig_8edb
, 8)) {
175 sig_8edb_pos_rel
= 55-32;
178 if(sig_8edb_pos_rel
==0) goto done
;
180 x
= de_getbyte(codestart
+sig_8edb_pos_rel
+26);
185 if(sig_8edb_pos_rel
== 77-32) {
186 idd
->ftype
= FTYPE_EXE
;
187 idd
->fmt
= FMT_EXE_145F
;
188 idd
->dlz_pos_known
= 1;
189 idd
->dlz_pos
= codestart
-32+108;
193 if(sig_8edb_pos_rel
== 72-32) {
194 idd
->ftype
= FTYPE_EXE
;
195 idd
->fmt
= FMT_EXE_144
;
196 idd
->dlz_pos_known
= 1;
197 idd
->dlz_pos
= codestart
-32+107;
201 if(sig_8edb_pos_rel
== 52-32) {
202 idd
->ftype
= FTYPE_EXE
;
203 idd
->fmt
= FMT_EXE_102
;
204 idd
->dlz_pos_known
= 1;
205 idd
->dlz_pos
= codestart
-32+87;
209 if(sig_8edb_pos_rel
== 55-32) {
210 idd
->ftype
= FTYPE_EXE
;
211 idd
->fmt
= FMT_EXE_100
;
212 idd
->crc_pos_known
= 1;
214 idd
->cmpr_pos_known
= 1;
215 idd
->cmpr_pos
= codestart
-32+90;
221 if(idd
->dlz_pos_known
) {
222 idd
->crc_pos_known
= 1;
223 idd
->cmpr_pos_known
= 1;
224 idd
->crc_pos
= idd
->dlz_pos
+ 6;
225 idd
->cmpr_pos
= idd
->dlz_pos
+ 11;
229 static void fill_bitbuf(deark
*c
, lctx
*d
)
233 if(d
->errflag
) return;
234 if(d
->dcmpr_cur_ipos
+2 > c
->infile
->len
) {
242 b
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
243 de_bitbuf_lowlevel_add_byte(&d
->bbll
, b
);
247 static u8
diet_getbit(deark
*c
, lctx
*d
)
251 if(d
->errflag
) return 0;
253 if(d
->bbll
.nbits_in_bitbuf
==0) {
257 v
= (u8
)de_bitbuf_lowlevel_get_bits(&d
->bbll
, 1);
259 if(d
->bbll
.nbits_in_bitbuf
==0) {
266 static void my_lz77buf_writebytecb(struct de_lz77buffer
*rb
, u8 n
)
268 lctx
*d
= (lctx
*)rb
->userdata
;
270 dbuf_writebyte(d
->o_dcmpr_code
, n
);
271 d
->o_dcmpr_code_nbytes_written
++;
274 static UI
read_matchlen(deark
*c
, lctx
*d
)
277 u8 x
, x1
, x2
, x3
, x4
, x5
;
281 // Read up to 4 bits, stopping early if we get a 1.
283 x
= diet_getbit(c
, d
);
286 matchlen
= 2+nbits_read
;
289 if(nbits_read
>=4) break;
291 // At this point we've read 4 bits, all 0.
293 x1
= diet_getbit(c
, d
);
294 x2
= diet_getbit(c
, d
);
296 if(x1
==1) { // length 7-8
301 if(x2
==0) { // length 9-16
302 x3
= diet_getbit(c
, d
);
303 x4
= diet_getbit(c
, d
);
304 x5
= diet_getbit(c
, d
);
305 matchlen
= 9 + 4*(UI
)x3
+ 2*(UI
)x4
+ (UI
)x5
;
310 v
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
311 matchlen
= 17 + (UI
)v
;
317 static void do_decompress_code(deark
*c
, lctx
*d
)
319 struct de_lz77buffer
*ringbuf
= NULL
;
322 u8 a1
, a2
, a3
, a4
, a5
, a6
, a7
, a8
;
324 if(d
->cmpr_pos
+ d
->cmpr_len
> c
->infile
->len
) {
328 de_dbg(c
, "decompressing cmpr code at %"I64_FMT
, d
->cmpr_pos
);
331 ringbuf
= de_lz77buffer_create(c
, 8192);
332 ringbuf
->userdata
= (void*)d
;
333 ringbuf
->writebyte_cb
= my_lz77buf_writebytecb
;
335 d
->dcmpr_cur_ipos
= d
->cmpr_pos
;
337 de_bitbuf_lowlevel_empty(&d
->bbll
);
343 if(d
->errflag
) goto done
;
345 x1
= diet_getbit(c
, d
);
346 if(x1
) { // 1... -> literal byte
349 b
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
350 if(c
->debug_level
>=4) {
351 de_dbg(c
, "lit 0x%02x", (UI
)b
);
353 de_lz77buffer_add_literal_byte(ringbuf
, b
);
357 x2
= diet_getbit(c
, d
);
358 v
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
360 if(x2
==0) { // 00[XX]... -> 2-byte match or special code
361 a1
= diet_getbit(c
, d
); // Always need at least 1 more bit
362 if(a1
) { // "long" two-byte match
364 a2
= diet_getbit(c
, d
);
365 a3
= diet_getbit(c
, d
);
366 a4
= diet_getbit(c
, d
);
367 matchpos
= 2303 - (1024*(UI
)a2
+ 512*(UI
)a3
+ 256*(UI
)a4
+ v
);
368 goto ready_for_match
;
370 else if(v
!=0xff) { // "short" two-byte match
372 matchpos
= 0xff - (UI
)v
;
373 goto ready_for_match
;
378 a2
= diet_getbit(c
, d
);
379 if(a2
==0) { // 00[FF]00
380 de_dbg3(c
, "stop code");
381 goto after_decompress
;
385 if(d
->idd
.ftype
==FTYPE_EXE
) {
386 de_dbg3(c
, "segment refresh");
389 de_err(c
, "Unsupported feature");
394 // 01[v] -> 3 or more byte match
396 a1
= diet_getbit(c
, d
);
397 a2
= diet_getbit(c
, d
);
400 matchpos
= 511 - (256*(UI
)a1
+ (UI
)v
);
404 a3
= diet_getbit(c
, d
);
406 matchpos
= 1023 - (256*(UI
)a1
+ (UI
)v
);
411 a4
= diet_getbit(c
, d
);
412 a5
= diet_getbit(c
, d
);
414 if(a5
) { // 01[v]?00?1
415 matchpos
= 2047 - (512*(UI
)a1
+ 256* (UI
)a4
+ (UI
)v
);
420 a6
= diet_getbit(c
, d
);
421 a7
= diet_getbit(c
, d
);
423 if(a7
) { // 01[v]?00?0?1
424 matchpos
= 4095 - (1024*(UI
)a1
+ 512*(UI
)a4
+ 256*(UI
)a6
+ (UI
)v
);
429 a8
= diet_getbit(c
, d
);
430 matchpos
= 8191 - (2048*(UI
)a1
+ 1024*(UI
)a4
+ 512*(UI
)a6
+ 256*(UI
)a8
+ (UI
)v
);
434 matchlen
= read_matchlen(c
, d
);
435 if(d
->errflag
) goto done
;
438 if(c
->debug_level
>=3) {
439 de_dbg3(c
, "match pos=%u len=%u", matchpos
+1, matchlen
);
441 if((i64
)matchpos
+1 > d
->o_dcmpr_code_nbytes_written
) {
442 // Match refers to data before the beginning of the file --
443 // DIET doesn't do this.
448 if(matchlen
> (i64
)matchpos
+1) {
449 // Some matching data hasn't been decompressed yet.
450 // This is a legitimate feature of LZ77, but DIET apparently doesn't
457 de_lz77buffer_copy_from_hist(ringbuf
,
458 (UI
)(ringbuf
->curpos
-1-matchpos
), matchlen
);
462 de_dbg(c
, "decompressed %"I64_FMT
" bytes to %"I64_FMT
, (d
->dcmpr_cur_ipos
-d
->cmpr_pos
),
463 d
->o_dcmpr_code_nbytes_written
);
466 dbuf_flush(d
->o_dcmpr_code
);
467 de_lz77buffer_destroy(c
, ringbuf
);
468 de_dbg_indent(c
, -1);
471 static void write_data_or_com_file(deark
*c
, lctx
*d
)
476 if(d
->idd
.ftype
==FTYPE_COM
) ext
= "com";
479 outf
= dbuf_create_output_file(c
, ext
, NULL
, 0);
480 dbuf_copy(d
->o_dcmpr_code
, 0, d
->o_dcmpr_code
->len
, outf
);
482 if(d
->idd
.ftype
==FTYPE_COM
) {
483 de_stdwarn_execomp(c
);
489 struct exe_dcmpr_ctx
{
490 i64 mz_pos
; // pos in d->o_dcmpr_code
491 i64 encoded_reloc_tbl_pos
; // pos in d->o_dcmpr_code
492 i64 encoded_reloc_tbl_size
; // size in d->o_dcmpr_code
494 i64 cdata2_size
; // Size in the original file; may be abbreviated in d->o_dcmpr_code
495 struct fmtutil_exe_info o_ei
;
498 // For v1.00 format, there doesn't seem to be a good way to figure out the exact
499 // offset of the "MZ" header within the blob of bytes produced by the main
500 // decompression algorithm.
501 // I've never seen DIET fail to correctly decompress such a file, but I'm
502 // starting to suspect it might be possible to construct a pathological file
503 // for which it fails.
504 // We can narrow it down to 16 possibilities, and if there's exactly one that
505 // is potentially valid, we go with it. In practice, this should be good enough.
506 static void find_v100_mz_pos(deark
*c
, lctx
*d
, struct exe_dcmpr_ctx
*ectx
,
511 i64 reloc_tbl_rel
= 0;
513 i64 nbytes_to_search
;
515 u8 fclass
= 0; // 0=unknown, 1=has params at params_pos, 2=no reloc table
520 int saved_indent_level
;
522 de_dbg_indent_save(c
, &saved_indent_level
);
523 de_dbg(c
, "[searching for MZ pos in intermed. data]");
526 cmpr_endpos
= d
->cmpr_pos
+ d
->cmpr_len
;
527 de_dbg(c
, "cmpr data end: %"I64_FMT
, cmpr_endpos
);
529 // Sanity check. This part of the decompressor always seems to start with
531 n
= de_getu32be(cmpr_endpos
);
532 if(n
!= 0xd1edfecaU
) {
538 // We're looking for some parameters in the part of the code that
539 // appears after the compressed data. (But if there are 0 relocations,
540 // then the params won't be present, and we have to detect that as
542 // Probing at precise offsets doesn't seem to be robust enough, so we
543 // resort to doing a search for characteristic byte patterns.
545 nbytes_to_search
= de_min_int(d
->ei
->end_of_dos_code
- cmpr_endpos
, 1000);
548 ret
= dbuf_search(c
->infile
, (const u8
*)"\x5d\x0e\x1f\xbe", 4, cmpr_endpos
,
549 nbytes_to_search
, &foundpos
);
552 params_pos
= foundpos
+4;
557 // The case where there are no relocations
558 ret
= dbuf_search(c
->infile
, (const u8
*)"\x5d\x07\x1f\x81", 4, cmpr_endpos
,
559 nbytes_to_search
, &foundpos
);
573 de_dbg(c
, "params pos: %"I64_FMT
" (b+%"I64_FMT
"; e-%"I64_FMT
")",
574 params_pos
, params_pos
- cmpr_endpos
,
575 d
->ei
->end_of_dos_code
- params_pos
);
576 reloc_tbl_rel
= de_getu16le(params_pos
);
577 de_dbg(c
, "reloc tbl intermed. pos: approx_MZ+%"I64_FMT
, reloc_tbl_rel
);
578 nrelocs_r
= de_getu16le(params_pos
+3);
581 de_dbg(c
, "nrelocs (reported): %"I64_FMT
, nrelocs_r
);
583 // Search for the MZ header, hopefully avoiding false positives.
584 for(i
=0; i
<16; i
++) {
587 // Look for "MZ" or "ZM"
588 sig
= (int)dbuf_getbyte(d
->o_dcmpr_code
, mz_pos_approx
+i
);
589 if(sig
!='M' && sig
!='Z') continue;
590 sig
+= (int)dbuf_getbyte(d
->o_dcmpr_code
, mz_pos_approx
+i
+1);
591 if(sig
!= 'M'+'Z') continue;
593 // Validate the reloc count
594 n
= dbuf_getu16le(d
->o_dcmpr_code
, mz_pos_approx
+i
+6);
595 if(n
!=nrelocs_r
) continue;
597 // Validate the reloc pos if possible
599 n
= dbuf_getu16le(d
->o_dcmpr_code
, mz_pos_approx
+i
+24);
600 if(i
+n
!=reloc_tbl_rel
) continue;
609 ectx
->mz_pos
= mz_pos_approx
+i
;
619 de_dbg(c
, "MZ header found at %"I64_FMT
, ectx
->mz_pos
);
622 de_dbg_indent_restore(c
, saved_indent_level
);
625 // Caller creates and passes empty o_orig_header to us.
626 static void find_exe_params(deark
*c
, lctx
*d
, struct exe_dcmpr_ctx
*ectx
,
635 if(d
->errflag
) return;
654 iparam1
= de_getu16le(d
->ei
->entry_point
+ ioffset1
);
655 mz_pos_approx
= iparam1
* 16;
656 de_dbg(c
, "approx MZ pos in intermed. data: %"I64_FMT
, mz_pos_approx
);
658 if(d
->idd
.fmt
==FMT_EXE_100
) {
659 find_v100_mz_pos(c
, d
, ectx
, mz_pos_approx
);
660 if(d
->errflag
) goto done
;
663 if(!d
->orig_len_known
|| !(d
->hdr_flags1
& 0x20)) {
669 ectx
->mz_pos
= mz_pos_approx
+ (d
->orig_len
% 16);
670 de_dbg(c
, "expected MZ pos in intermed. data: %"I64_FMT
, ectx
->mz_pos
);
671 // Verify that this seems to be the right place.
672 n
= dbuf_getu16be(d
->o_dcmpr_code
, ectx
->mz_pos
);
673 if(n
!=0x4d5a && n
!=0x5a4d) {
680 // Note: DIET elides trailing 0-valued bytes from the intermediate format
681 // we store in o_dcmpr_code.
682 // In some cases, o_dcmpr_code ends even before the end of the 28-byte
684 // So, this and later calls to dbuf_copy() may read beyond the end of
685 // o_dcmpr_code. That's by design -- we rely on dbuf_copy to replace
686 // missing bytes with 0-valued bytes.
687 dbuf_copy(d
->o_dcmpr_code
, ectx
->mz_pos
, 28, o_orig_header
);
689 byte3
= dbuf_getbyte(o_orig_header
, 3);
690 dbuf_writebyte_at(o_orig_header
, 3, (byte3
& 0x01));
692 fmtutil_collect_exe_info(c
, o_orig_header
, &ectx
->o_ei
);
694 // collect_exe_info() will not have calculated the overlay len, because we
695 // didn't tell it the correct file size. So, patch it up here.
696 ectx
->o_ei
.overlay_len
= ectx
->o_ei
.start_of_dos_code
+ ectx
->mz_pos
-
697 ectx
->o_ei
.end_of_dos_code
;
698 if(ectx
->o_ei
.overlay_len
<0) ectx
->o_ei
.overlay_len
= 0;
700 ectx
->encoded_reloc_tbl_pos
= ectx
->mz_pos
+ ectx
->o_ei
.reloc_table_pos
;
702 if(ectx
->o_ei
.num_relocs
==0) {
703 ectx
->cdata1_size
= ectx
->o_ei
.start_of_dos_code
- 28;
704 ectx
->cdata2_size
= 0;
707 ectx
->cdata1_size
= ectx
->o_ei
.reloc_table_pos
- 28;
708 ectx
->cdata2_size
= ectx
->o_ei
.start_of_dos_code
- (ectx
->o_ei
.reloc_table_pos
+
709 4*ectx
->o_ei
.num_relocs
);
711 if(ectx
->cdata1_size
<0 || ectx
->cdata2_size
<0) {
718 if(d
->errflag
&& d
->need_errmsg
) {
719 de_err(c
, "Unsupported variety of DIET-EXE file");
724 // Sets ectx->encoded_reloc_tbl_size
725 static void decode_reloc_tbl(deark
*c
, lctx
*d
, struct exe_dcmpr_ctx
*ectx
,
726 dbuf
*inf
, i64 ipos1
, i64 nrelocs
, dbuf
*outf
)
733 for(i
=0; i
<nrelocs
; i
++) {
736 n
= (UI
)dbuf_getu16le_p(inf
, &ipos
);
738 // Special code: segment stays the same, and offset is adjusted
739 // relative to the previous offset.
750 offs
= (UI
)dbuf_getu16le_p(inf
, &ipos
);
753 dbuf_writeu16le(outf
, (i64
)offs
);
754 dbuf_writeu16le(outf
, (i64
)seg
);
757 if(d
->hdr_flags1
& 0x20) {
758 ectx
->encoded_reloc_tbl_size
= ipos
- ipos1
;
761 ectx
->encoded_reloc_tbl_size
= nrelocs
*4;
765 static void write_exe_file(deark
*c
, lctx
*d
)
767 struct exe_dcmpr_ctx
*ectx
= NULL
;
769 dbuf
*o_orig_header
= NULL
;
770 dbuf
*reloc_tbl
= NULL
;
771 int saved_indent_level
;
773 de_dbg_indent_save(c
, &saved_indent_level
);
774 if(!d
->o_dcmpr_code
) goto done
;
776 ectx
= de_malloc(c
, sizeof(struct exe_dcmpr_ctx
));
778 de_dbg(c
, "[writing EXE]");
780 fmtutil_collect_exe_info(c
, c
->infile
, d
->ei
);
782 o_orig_header
= dbuf_create_membuf(c
, 28, 0);
783 find_exe_params(c
, d
, ectx
, o_orig_header
);
784 if(d
->errflag
) goto done
;
786 outf
= dbuf_create_output_file(c
, "exe", NULL
, 0);
789 dbuf_copy(o_orig_header
, 0, 28, outf
);
791 // Copy the custom data up to the relocation table.
792 // (If there's no relocation table, this will be everything up to the
794 dbuf_copy(d
->o_dcmpr_code
, ectx
->mz_pos
+28, ectx
->cdata1_size
, outf
);
796 if(ectx
->o_ei
.num_relocs
!=0) {
798 reloc_tbl
= dbuf_create_membuf(c
, 4*ectx
->o_ei
.num_relocs
, 0);
799 decode_reloc_tbl(c
, d
, ectx
, d
->o_dcmpr_code
, ectx
->encoded_reloc_tbl_pos
,
800 ectx
->o_ei
.num_relocs
, reloc_tbl
);
801 dbuf_copy(reloc_tbl
, 0, 4*ectx
->o_ei
.num_relocs
, outf
);
803 // Custom data following the relocation table
804 dbuf_copy(d
->o_dcmpr_code
,
805 ectx
->encoded_reloc_tbl_pos
+ ectx
->encoded_reloc_tbl_size
,
806 ectx
->cdata2_size
, outf
);
809 // Code image and (internal, compressed) overlay
810 dbuf_copy(d
->o_dcmpr_code
, 0, ectx
->mz_pos
, outf
);
812 // Copy external overlay. Pristine DIET-compressed files never have such a
813 // thing, but some other workflows (e.g. ARJ v2.00 SFX) create such files.
814 if(d
->ei
->overlay_len
>0) {
815 if(ectx
->o_ei
.overlay_len
>0) {
816 de_warn(c
, "Ignoring overlay at %"I64_FMT
" -- file already "
817 "has an overlay", d
->ei
->end_of_dos_code
);
820 de_dbg(c
, "overlay data at %"I64_FMT
", len=%"I64_FMT
, d
->ei
->end_of_dos_code
,
822 dbuf_copy(c
->infile
, d
->ei
->end_of_dos_code
, d
->ei
->overlay_len
, outf
);
827 dbuf_close(reloc_tbl
);
830 de_stdwarn_execomp(c
);
832 dbuf_close(o_orig_header
);
836 de_dbg_indent_restore(c
, saved_indent_level
);
839 static void read_header(deark
*c
, lctx
*d
)
844 de_ucstring
*flags_str
= NULL
;
845 int saved_indent_level
;
847 de_dbg_indent_save(c
, &saved_indent_level
);
849 flags_str
= ucstring_create(c
);
851 if(d
->idd
.dlz_pos_known
) {
852 de_dbg(c
, "header at %"I64_FMT
, d
->idd
.dlz_pos
);
855 pos
= d
->idd
.dlz_pos
+ 3;
856 x
= de_getbyte_p(&pos
);
857 d
->hdr_flags1
= x
& 0xf0;
858 if(d
->hdr_flags1
& 0x80) ucstring_append_flags_item(flags_str
, "has following block");
859 if(d
->hdr_flags1
& 0x20) ucstring_append_flags_item(flags_str
, "new EXE format");
860 if(d
->hdr_flags1
& 0x10) ucstring_append_flags_item(flags_str
, "has segment refresh data");
861 de_dbg(c
, "flags: 0x%02x (%s)", d
->hdr_flags1
, ucstring_getpsz(flags_str
));
862 d
->cmpr_len
= (i64
)(x
& 0x0f)<<16;
863 n
= de_getu16le_p(&pos
);
865 de_dbg(c
, "cmpr len: %"I64_FMT
, d
->cmpr_len
);
866 d
->cmpr_len_known
= 1;
868 else if(d
->idd
.fmt
==FMT_EXE_100
) {
869 d
->cmpr_len
= de_getu32le(32);
870 d
->cmpr_len
&= 0xfffff; // Dunno if this is 24-bits, or maybe just 20 bits
871 de_dbg(c
, "cmpr len: %"I64_FMT
, d
->cmpr_len
);
872 d
->cmpr_len_known
= 1;
875 if(d
->idd
.crc_pos_known
) {
876 d
->crc_reported
= (u32
)de_getu16le(d
->idd
.crc_pos
);
877 de_dbg(c
, "crc (reported): 0x%04x", (UI
)d
->crc_reported
);
880 if(d
->idd
.dlz_pos_known
) {
881 pos
= d
->idd
.dlz_pos
+ 8;
882 x
= de_getbyte_p(&pos
);
883 d
->orig_len
= (i64
)(x
& 0xfc)<<14;
884 d
->hdr_flags2
= (i64
)(x
& 0x03); // probably unused
885 n
= de_getu16le_p(&pos
);
887 de_dbg(c
, "orig len: %"I64_FMT
, d
->orig_len
);
888 d
->orig_len_known
= 1;
891 if(!d
->cmpr_len_known
&& d
->idd
.fmt
==FMT_DATA_100
) {
892 d
->cmpr_len
= c
->infile
->len
- d
->cmpr_pos
;
893 d
->cmpr_len_known
= 1;
896 ucstring_destroy(flags_str
);
897 de_dbg_indent_restore(c
, saved_indent_level
);
900 static void check_diet_crc(deark
*c
, lctx
*d
)
903 struct de_crcobj
*crco
= NULL
;
905 if(!d
->cmpr_len_known
) {
906 // TODO: For v1.00 COM format, we don't know how to figure out the
907 // compressed data size, and it doesn't end at the end of the file.
908 // (Testing the CRC *after* decompression, after we've found the "stop"
909 // code, isn't the right thing to do for this type of CRC.)
912 if(d
->cmpr_pos
+d
->cmpr_len
> c
->infile
->len
) goto done
;
914 crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
915 de_crcobj_addslice(crco
, c
->infile
, d
->cmpr_pos
, d
->cmpr_len
);
916 crc_calc
= de_crcobj_getval(crco
);
917 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)crc_calc
);
918 // Unfortunately, this is a CRC of the *compressed* data, so we can't use it
919 // to tell if we decompressed the data correctly.
920 if(crc_calc
!=d
->crc_reported
) {
921 de_warn(c
, "CRC check failed (expected 0x%04x, got 0x%04x). "
922 "File may be corrupted.", (UI
)d
->crc_reported
,
926 de_crcobj_destroy(crco
);
929 static void check_unsupp_features(deark
*c
, lctx
*d
)
931 if(d
->hdr_flags1
&0x80) {
941 static void de_run_diet(deark
*c
, de_module_params
*mparams
)
944 const char *fmtn
= NULL
;
946 d
= de_malloc(c
, sizeof(lctx
));
947 d
->ei
= de_malloc(c
, sizeof(struct fmtutil_exe_info
));
948 d
->raw_mode
= (u8
)de_get_ext_option_bool(c
, "diet:raw", 0xff);
950 identify_diet_fmt(c
, &d
->idd
, 0);
953 fmtn
= "file (v1.00)";
956 fmtn
= "file (v1.02-1.20)";
959 fmtn
= "file (v1.44-1.45)";
962 fmtn
= "COM (v1.00)";
965 fmtn
= "COM (v1.02-1.20)";
968 fmtn
= "COM (v1.44-1.45)";
971 fmtn
= "EXE (v1.00)";
974 fmtn
= "EXE (v1.02-1.20)";
977 fmtn
= "EXE (v1.44)";
980 fmtn
= "EXE (v1.45)";
987 de_declare_fmtf(c
, "DIET-compressed %s", fmtn
);
990 if(d
->idd
.maybe_lglz
) {
991 de_warn(c
, "This file might be LGLZ-compressed, not DIET");
994 if(!fmtn
|| !d
->idd
.cmpr_pos_known
) {
999 d
->cmpr_pos
= d
->idd
.cmpr_pos
;
1002 if(d
->errflag
) goto done
;
1004 check_diet_crc(c
, d
);
1006 check_unsupp_features(c
, d
);
1007 if(d
->errflag
) goto done
;
1009 d
->o_dcmpr_code
= dbuf_create_membuf(c
,
1010 (d
->orig_len_known
? d
->orig_len
: MAX_DIET_DCMPR_LEN
), 0x1);
1011 dbuf_enable_wbuffer(d
->o_dcmpr_code
);
1013 do_decompress_code(c
, d
);
1014 if(d
->errflag
) goto done
;
1015 if(d
->idd
.ftype
==FTYPE_DATA
|| d
->idd
.ftype
==FTYPE_COM
||
1016 (d
->idd
.ftype
==FTYPE_EXE
&& d
->raw_mode
==1))
1018 write_data_or_com_file(c
, d
);
1020 else if(d
->idd
.ftype
==FTYPE_EXE
&& d
->raw_mode
!=1) {
1021 write_exe_file(c
, d
);
1027 if(d
->need_errmsg
) {
1028 de_err(c
, "Bad or unsupported file");
1030 dbuf_close(d
->o_dcmpr_code
);
1035 static int de_identify_diet(deark
*c
)
1037 struct diet_identify_data idd
;
1039 de_zeromem(&idd
, sizeof(struct diet_identify_data
));
1040 identify_diet_fmt(c
, &idd
, 1);
1041 if(idd
.ftype
!=FTYPE_UNKNOWN
) return 90;
1045 static void de_help_diet(deark
*c
)
1047 de_msg(c
, "-opt diet:raw : Instead of an EXE file, write raw decompressed data");
1050 void de_module_diet(deark
*c
, struct deark_module_info
*mi
)
1053 mi
->desc
= "DIET compression";
1054 mi
->run_fn
= de_run_diet
;
1055 mi
->identify_fn
= de_identify_diet
;
1056 mi
->help_fn
= de_help_diet
;