1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompress LZEXE executable compression
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_lzexe
);
11 // Don't change this, unless it's also changed in fmtutil-exe.c.
12 #define LZEXE_VER_090 1
13 #define LZEXE_VER_091 2
14 #define LZEXE_VER_091E 3
15 #define LZEXE_VER_LHARK_SFX 102
16 #define LZEXE_VER_PCX2EXE 202
30 typedef struct localctx_struct
{
31 int ver
; // 1=0.90, 2=0.91, 3=0.91e
35 u8 raw_mode
; // 0xff = not set
36 u8 can_decompress_to_exe
;
37 u8 can_decompress_to_raw
;
38 struct fmtutil_exe_info
*ei
;
45 struct ohdr_struct ohdr
;
51 struct de_bitbuf_lowlevel bbll
;
54 // Read what we need from the 28-byte DOS header
55 static void do_read_header(deark
*c
, lctx
*d
)
57 d
->ihdr_minmem
= (UI
)de_getu16le(10);
58 d
->ihdr_maxmem
= (UI
)de_getu16le(12);
60 if(d
->ei
->regCS
< 0) {
61 // CS is signed. If it's ever negative in an LZEXE'd file, I'm not sure
62 // how to handle that.
67 static void read_special_hdr(deark
*c
, lctx
*d
, i64 pos1
)
71 de_dbg(c
, "LZEXE private info at %"I64_FMT
, pos1
);
74 d
->ohdr
.regIP
= de_getu16le_p(&pos
);
75 de_dbg(c
, "ip: %u", (UI
)d
->ohdr
.regIP
);
76 d
->ohdr
.regCS
= de_geti16le_p(&pos
);
77 de_dbg(c
, "cs: %d", (int)d
->ohdr
.regCS
);
78 d
->ohdr
.regSP
= de_getu16le_p(&pos
);
79 de_dbg(c
, "sp: %u", (UI
)d
->ohdr
.regSP
);
80 d
->ohdr
.regSS
= de_geti16le_p(&pos
);
81 de_dbg(c
, "ss: %d", (int)d
->ohdr
.regSS
);
82 d
->ohdr
.cmpr_len_para
= de_getu16le_p(&pos
);
83 de_dbg(c
, "cmpr len: %u ("DE_CHAR_TIMES
"16=%"I64_FMT
")", (int)d
->ohdr
.cmpr_len_para
,
84 (i64
)(d
->ohdr
.cmpr_len_para
*16));
86 // TODO: These fields could be named better
87 d
->ohdr
.field5
= de_getu16le_p(&pos
);
88 de_dbg(c
, "field5: %u", (UI
)d
->ohdr
.field5
);
89 d
->ohdr
.field6
= de_getu16le_p(&pos
);
90 de_dbg(c
, "field6: %u", (UI
)d
->ohdr
.field6
);
91 if(d
->ver
==LZEXE_VER_090
) {
92 d
->ohdr
.field7
= de_getu16le_p(&pos
);
93 de_dbg(c
, "field7: %u", (UI
)d
->ohdr
.field7
);
94 d
->ohdr
.field8
= de_getu16le_p(&pos
);
95 de_dbg(c
, "field8: %u", (UI
)d
->ohdr
.field8
);
101 static void do_decode_reloc_tbl_v090(deark
*c
, lctx
*d
)
106 pos
= d
->special_hdr_pos
+ 413;
107 endpos
= d
->end_of_reloc_tbl
;
108 if(!fmtutil_decompress_exepack_reloc_tbl(c
, pos
, endpos
, d
->o_reloc_table
)) {
113 static void do_decode_reloc_tbl_v091(deark
*c
, lctx
*d
)
119 ipos
= d
->special_hdr_pos
+ 344;
120 de_dbg(c
, "compressed reloc table: pos=%"I64_FMT
, ipos
);
126 if(ipos
>=d
->end_of_reloc_tbl
|| reloc_count
>65535) {
131 x
= (UI
)de_getbyte_p(&ipos
);
135 x2
= (UI
)de_getu16le_p(&ipos
);
151 de_dbg2(c
, "reloc: %05x", reloc
);
152 dbuf_writeu16le(d
->o_reloc_table
, (i64
)(reloc
&0x0f));
153 dbuf_writeu16le(d
->o_reloc_table
, (i64
)(reloc
>>4));
156 de_dbg(c
, "reloc count: %d", (int)reloc_count
);
159 de_dbg_indent(c
, -1);
162 static void fill_bitbuf(deark
*c
, lctx
*d
)
166 if(d
->errflag
) return;
167 if(d
->dcmpr_cur_ipos
+2 > d
->special_hdr_pos
)
175 b
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
176 de_bitbuf_lowlevel_add_byte(&d
->bbll
, b
);
180 static u8
lzexe_getbit(deark
*c
, lctx
*d
)
184 if(d
->errflag
) return 0;
186 if(d
->bbll
.nbits_in_bitbuf
==0) {
190 v
= (u8
)de_bitbuf_lowlevel_get_bits(&d
->bbll
, 1);
192 if(d
->bbll
.nbits_in_bitbuf
==0) {
199 static void my_lz77buf_writebytecb(struct de_lz77buffer
*rb
, u8 n
)
201 lctx
*d
= (lctx
*)rb
->userdata
;
203 dbuf_writebyte(d
->o_dcmpr_code
, n
);
206 static void do_decompress_code(deark
*c
, lctx
*d
)
209 struct de_lz77buffer
*ringbuf
= NULL
;
211 // (I'd expect ipos1 to always equal d->ei->start_of_dos_code, but anyway...)
212 ipos1
= d
->special_hdr_pos
- d
->ohdr
.cmpr_len_para
*16;
213 de_dbg(c
, "decompressing cmpr code at %"I64_FMT
, ipos1
);
216 ringbuf
= de_lz77buffer_create(c
, 8192);
217 ringbuf
->userdata
= (void*)d
;
218 ringbuf
->writebyte_cb
= my_lz77buf_writebytecb
;
220 d
->dcmpr_cur_ipos
= ipos1
;
222 de_bitbuf_lowlevel_empty(&d
->bbll
);
230 if(d
->errflag
) goto done
;
232 x
= lzexe_getbit(c
, d
);
236 b
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
237 de_lz77buffer_add_literal_byte(ringbuf
, (u8
)b
);
241 x
= lzexe_getbit(c
, d
);
243 x2
= lzexe_getbit(c
, d
);
244 x3
= lzexe_getbit(c
, d
);
245 matchlen
= ((UI
)x2
<<1) + (UI
)x3
+ 2;
246 matchpos
= 0xffU
-(UI
)de_getbyte_p(&d
->dcmpr_cur_ipos
);
251 lb
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
252 hb
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
254 matchpos
= 0x1fffU
- ((((UI
)(hb
& 0xf8))<<5) | (UI
)lb
);
260 xb
= de_getbyte_p(&d
->dcmpr_cur_ipos
);
263 de_dbg3(c
, "eof code");
264 goto after_decompress
;
267 continue; // something about segments...
270 matchlen
= (UI
)xb
+ 1;
275 matchlen
= (UI
)(hb
& 0x07) + 2;
279 if(c
->debug_level
>=3) {
280 de_dbg3(c
, "match (%u) pos=%u len=%u", (UI
)matchtype
, matchpos
+1, matchlen
);
282 de_lz77buffer_copy_from_hist(ringbuf
,
283 (UI
)(ringbuf
->curpos
-1-matchpos
), matchlen
);
287 dbuf_flush(d
->o_dcmpr_code
);
288 de_dbg(c
, "decompressed %"I64_FMT
" bytes to %"I64_FMT
, (d
->dcmpr_cur_ipos
-ipos1
),
289 d
->o_dcmpr_code
->len
);
292 de_lz77buffer_destroy(c
, ringbuf
);
293 de_dbg_indent(c
, -1);
296 static void do_write_data_only(deark
*c
, lctx
*d
)
298 if(!d
->o_dcmpr_code
) return;
299 dbuf_create_file_from_slice(d
->o_dcmpr_code
, 0, d
->o_dcmpr_code
->len
, "bin", NULL
, 0);
302 // Generate the decompressed file
303 static void do_write_dcmpr(deark
*c
, lctx
*d
)
310 de_dbg(c
, "generating output file");
313 outf
= dbuf_create_output_file(c
, "exe", NULL
, 0);
315 #define O_RELOC_POS 28
316 o_start_of_code
= de_pad_to_n(O_RELOC_POS
+ d
->o_reloc_table
->len
, (i64
)d
->o_code_alignment
);
318 // Generate 28-byte header
319 dbuf_writeu16le(outf
, 0x5a4d); // 0 signature
321 o_file_size
= o_start_of_code
+ d
->o_dcmpr_code
->len
;
322 dbuf_writeu16le(outf
, o_file_size
%512); // 2 # of bytes in last page
323 dbuf_writeu16le(outf
, (o_file_size
+511)/512); // 4 # of pages
325 dbuf_writeu16le(outf
, d
->o_reloc_table
->len
/4); // 6 # of reloc tbl entries
326 dbuf_writeu16le(outf
, o_start_of_code
/ 16); // 8 hdrsize/16
328 // This logic is from unlzexe v0.7+ (A. Modra).
329 minmem
= d
->ihdr_minmem
;
330 maxmem
= d
->ihdr_maxmem
;
331 if(d
->ihdr_maxmem
!=0) {
332 minmem
-= (UI
)d
->ohdr
.field5
+ (((UI
)d
->ohdr
.field6
+15)/16) + 9;
334 if(d
->ihdr_maxmem
!= 0xffff) {
335 maxmem
-= (d
->ihdr_minmem
-minmem
);
339 dbuf_writeu16le(outf
, (i64
)minmem
); // 10 # of paragraphs required
340 dbuf_writeu16le(outf
, (i64
)maxmem
); // 12 # of paragraphs requested
342 dbuf_writei16le(outf
, d
->ohdr
.regSS
); // 14
343 dbuf_writeu16le(outf
, d
->ohdr
.regSP
); // 16
344 dbuf_writeu16le(outf
, 0); // 18 checksum
345 dbuf_writeu16le(outf
, d
->ohdr
.regIP
); // 20
346 dbuf_writei16le(outf
, d
->ohdr
.regCS
); // 22
347 dbuf_writeu16le(outf
, O_RELOC_POS
); // 24 reloc_tbl_pos
348 dbuf_writeu16le(outf
, 0); // 26 overlay indicator
350 // Write the relocation table
351 dbuf_truncate(outf
, O_RELOC_POS
);
352 dbuf_copy(d
->o_reloc_table
, 0, d
->o_reloc_table
->len
, outf
);
354 // Write the decompressed code
355 dbuf_truncate(outf
, o_start_of_code
);
356 dbuf_copy(d
->o_dcmpr_code
, 0, d
->o_dcmpr_code
->len
, outf
);
358 // Copy the overlay segment.
359 // Normal LZEXE files never have such a thing, but some third-party utilities
360 // construct such files.
361 if(d
->ei
->overlay_len
>0) {
362 de_dbg(c
, "overlay data at %"I64_FMT
", len=%"I64_FMT
, d
->ei
->end_of_dos_code
,
364 dbuf_copy(c
->infile
, d
->ei
->end_of_dos_code
, d
->ei
->overlay_len
, outf
);
368 de_dbg_indent(c
, -1);
370 de_stdwarn_execomp(c
);
374 static void read_reloc_tbl(deark
*c
, lctx
*d
)
376 d
->end_of_reloc_tbl
= d
->special_hdr_pos
+ d
->ohdr
.field6
;
377 if(d
->end_of_reloc_tbl
> c
->infile
->len
) {
381 if(d
->ver
==LZEXE_VER_090
) {
382 do_decode_reloc_tbl_v090(c
, d
);
385 do_decode_reloc_tbl_v091(c
, d
);
391 // Refer to detect_execomp_lzexe() (in another file).
392 static const char *get_lzexe_subfmt_name(int n
)
394 const char *name
= NULL
;
397 case LZEXE_VER_090
: name
= "v0.90"; break;
398 case LZEXE_VER_091
: name
= "v0.91"; break;
399 case LZEXE_VER_091E
: name
= "v0.91e"; break;
400 case LZEXE_VER_LHARK_SFX
: name
= "v0.91-LHARK-SFX"; break;
401 case LZEXE_VER_PCX2EXE
: name
= "v0.91-PCX2EXE"; break;
403 return name
?name
:"?";
406 static void de_run_lzexe(deark
*c
, de_module_params
*mparams
)
410 struct fmtutil_specialexe_detection_data edd
;
412 d
= de_malloc(c
, sizeof(lctx
));
414 d
->ei
= de_malloc(c
, sizeof(struct fmtutil_exe_info
));
416 d
->raw_mode
= (u8
)de_get_ext_option_bool(c
, "lzexe:raw", 0xff);
418 s
= de_get_ext_option(c
, "execomp:align");
420 d
->o_code_alignment
= de_atoi(s
);
422 if(d
->o_code_alignment
!= 512) {
423 d
->o_code_alignment
= 16;
426 fmtutil_collect_exe_info(c
, c
->infile
, d
->ei
);
428 de_zeromem(&edd
, sizeof(struct fmtutil_specialexe_detection_data
));
429 edd
.restrict_to_fmt
= DE_SPECIALEXEFMT_LZEXE
;
430 fmtutil_detect_execomp(c
, d
->ei
, &edd
);
431 if(edd
.detected_fmt
!=DE_SPECIALEXEFMT_LZEXE
) {
432 de_err(c
, "Not an LZEXE-compressed file");
435 de_declare_fmt(c
, "LZEXE-compressed EXE");
436 d
->ver
= (int)edd
.detected_subfmt
;
437 de_dbg(c
, "LZEXE variant: %s", get_lzexe_subfmt_name(d
->ver
));
439 d
->can_decompress_to_exe
= 1;
440 d
->can_decompress_to_raw
= 1;
442 else if(d
->ver
==LZEXE_VER_LHARK_SFX
|| d
->ver
==LZEXE_VER_PCX2EXE
) {
443 d
->ei
->regCS
= edd
.regCS_2
;
444 d
->ei
->regIP
= edd
.regIP_2
;
445 d
->ei
->entry_point
= d
->ei
->start_of_dos_code
+ d
->ei
->regCS
*16 + d
->ei
->regIP
;
446 d
->can_decompress_to_exe
= 0;
447 d
->can_decompress_to_raw
= 1;
450 if(!d
->can_decompress_to_raw
||
451 (d
->raw_mode
==0 && !d
->can_decompress_to_exe
))
453 de_err(c
, "Unsupported LZEXE variant");
457 if(d
->raw_mode
==0xff && !d
->can_decompress_to_exe
) {
458 de_err(c
, "This LZEXE variant is not fully supported");
459 de_info(c
, "Note: Try \"-opt lzexe:raw\" to decompress the raw data");
463 d
->o_reloc_table
= dbuf_create_membuf(c
, 0, 0);
464 d
->o_dcmpr_code
= dbuf_create_membuf(c
, 0, 0);
465 dbuf_enable_wbuffer(d
->o_dcmpr_code
);
467 do_read_header(c
, d
);
468 if(d
->errflag
) goto done
;
470 d
->special_hdr_pos
= d
->ei
->start_of_dos_code
+ d
->ei
->regCS
*16;
471 if(d
->special_hdr_pos
> c
->infile
->len
) {
475 read_special_hdr(c
, d
, d
->special_hdr_pos
);
476 if(d
->errflag
) goto done
;
478 // TODO? Should we do this even if raw_mode==1?
479 read_reloc_tbl(c
, d
);
480 if(d
->errflag
) goto done
;
482 do_decompress_code(c
, d
);
483 dbuf_flush(d
->o_dcmpr_code
);
484 if(d
->errflag
) goto done
;
487 do_write_data_only(c
, d
);
490 do_write_dcmpr(c
, d
);
496 if(d
->errflag
&& !d
->errmsg_handled
) {
497 de_err(c
, "LZEXE decompression failed");
500 dbuf_close(d
->o_reloc_table
);
501 dbuf_close(d
->o_dcmpr_code
);
507 static void de_help_lzexe(deark
*c
)
509 de_msg(c
, "-opt lzexe:raw : Instead of an EXE file, write raw decompressed data");
510 de_msg(c
, "-opt execomp:align=<16|512> : Alignment of code segment "
514 void de_module_lzexe(deark
*c
, struct deark_module_info
*mi
)
517 mi
->desc
= "LZEXE-compressed EXE";
518 mi
->run_fn
= de_run_lzexe
;
519 mi
->help_fn
= de_help_lzexe
;