fnt: Improved error handling, etc.
[deark.git] / modules / lzexe.c
bloba7015860e1c21b94eb4994827048f28772a405d9
1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompress LZEXE executable compression
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_lzexe);
11 // Don't change this, unless it's also changed in fmtutil-exe.c.
12 #define LZEXE_VER_090 1
13 #define LZEXE_VER_091 2
14 #define LZEXE_VER_091E 3
15 #define LZEXE_VER_LHARK_SFX 102
16 #define LZEXE_VER_PCX2EXE 202
18 struct ohdr_struct {
19 i64 regSS;
20 i64 regSP;
21 i64 regCS;
22 i64 regIP;
23 i64 cmpr_len_para;
24 i64 field5;
25 i64 field6;
26 i64 field7;
27 i64 field8;
30 typedef struct localctx_struct {
31 int ver; // 1=0.90, 2=0.91, 3=0.91e
32 int errflag;
33 int errmsg_handled;
34 int o_code_alignment;
35 u8 raw_mode; // 0xff = not set
36 u8 can_decompress_to_exe;
37 u8 can_decompress_to_raw;
38 struct fmtutil_exe_info *ei;
40 UI ihdr_minmem;
41 UI ihdr_maxmem;
43 i64 special_hdr_pos;
44 i64 end_of_reloc_tbl;
45 struct ohdr_struct ohdr;
47 dbuf *o_reloc_table;
48 dbuf *o_dcmpr_code;
50 i64 dcmpr_cur_ipos;
51 struct de_bitbuf_lowlevel bbll;
52 } lctx;
54 // Read what we need from the 28-byte DOS header
55 static void do_read_header(deark *c, lctx *d)
57 d->ihdr_minmem = (UI)de_getu16le(10);
58 d->ihdr_maxmem = (UI)de_getu16le(12);
60 if(d->ei->regCS < 0) {
61 // CS is signed. If it's ever negative in an LZEXE'd file, I'm not sure
62 // how to handle that.
63 d->errflag = 1;
67 static void read_special_hdr(deark *c, lctx *d, i64 pos1)
69 i64 pos = pos1;
71 de_dbg(c, "LZEXE private info at %"I64_FMT, pos1);
72 de_dbg_indent(c, 1);
74 d->ohdr.regIP = de_getu16le_p(&pos);
75 de_dbg(c, "ip: %u", (UI)d->ohdr.regIP);
76 d->ohdr.regCS = de_geti16le_p(&pos);
77 de_dbg(c, "cs: %d", (int)d->ohdr.regCS);
78 d->ohdr.regSP = de_getu16le_p(&pos);
79 de_dbg(c, "sp: %u", (UI)d->ohdr.regSP);
80 d->ohdr.regSS = de_geti16le_p(&pos);
81 de_dbg(c, "ss: %d", (int)d->ohdr.regSS);
82 d->ohdr.cmpr_len_para = de_getu16le_p(&pos);
83 de_dbg(c, "cmpr len: %u ("DE_CHAR_TIMES"16=%"I64_FMT")", (int)d->ohdr.cmpr_len_para,
84 (i64)(d->ohdr.cmpr_len_para*16));
86 // TODO: These fields could be named better
87 d->ohdr.field5 = de_getu16le_p(&pos);
88 de_dbg(c, "field5: %u", (UI)d->ohdr.field5);
89 d->ohdr.field6 = de_getu16le_p(&pos);
90 de_dbg(c, "field6: %u", (UI)d->ohdr.field6);
91 if(d->ver==LZEXE_VER_090) {
92 d->ohdr.field7 = de_getu16le_p(&pos);
93 de_dbg(c, "field7: %u", (UI)d->ohdr.field7);
94 d->ohdr.field8 = de_getu16le_p(&pos);
95 de_dbg(c, "field8: %u", (UI)d->ohdr.field8);
98 de_dbg_indent(c, -1);
101 static void do_decode_reloc_tbl_v090(deark *c, lctx *d)
103 i64 pos;
104 i64 endpos;
106 pos = d->special_hdr_pos + 413;
107 endpos = d->end_of_reloc_tbl;
108 if(!fmtutil_decompress_exepack_reloc_tbl(c, pos, endpos, d->o_reloc_table)) {
109 d->errflag = 1;
113 static void do_decode_reloc_tbl_v091(deark *c, lctx *d)
115 i64 ipos;
116 int reloc_count = 0;
117 UI reloc = 0;
119 ipos = d->special_hdr_pos + 344;
120 de_dbg(c, "compressed reloc table: pos=%"I64_FMT, ipos);
121 de_dbg_indent(c, 1);
123 while(1) {
124 u8 x;
126 if(ipos>=d->end_of_reloc_tbl || reloc_count>65535) {
127 d->errflag = 1;
128 goto done;
131 x = (UI)de_getbyte_p(&ipos);
132 if(x==0) {
133 UI x2;
135 x2 = (UI)de_getu16le_p(&ipos);
136 if(x2==0) {
137 reloc += 0xfff0;
138 continue;
140 else if(x2==1) {
141 break;
143 else {
144 reloc += x2;
147 else {
148 reloc += (UI)x;
151 de_dbg2(c, "reloc: %05x", reloc);
152 dbuf_writeu16le(d->o_reloc_table, (i64)(reloc&0x0f));
153 dbuf_writeu16le(d->o_reloc_table, (i64)(reloc>>4));
154 reloc_count++;
156 de_dbg(c, "reloc count: %d", (int)reloc_count);
158 done:
159 de_dbg_indent(c, -1);
162 static void fill_bitbuf(deark *c, lctx *d)
164 UI i;
166 if(d->errflag) return;
167 if(d->dcmpr_cur_ipos+2 > d->special_hdr_pos)
169 d->errflag = 1;
170 return;
173 for(i=0; i<2; i++) {
174 u8 b;
175 b = de_getbyte_p(&d->dcmpr_cur_ipos);
176 de_bitbuf_lowlevel_add_byte(&d->bbll, b);
180 static u8 lzexe_getbit(deark *c, lctx *d)
182 u8 v;
184 if(d->errflag) return 0;
186 if(d->bbll.nbits_in_bitbuf==0) {
187 fill_bitbuf(c, d);
190 v = (u8)de_bitbuf_lowlevel_get_bits(&d->bbll, 1);
192 if(d->bbll.nbits_in_bitbuf==0) {
193 fill_bitbuf(c, d);
196 return v;
199 static void my_lz77buf_writebytecb(struct de_lz77buffer *rb, u8 n)
201 lctx *d = (lctx*)rb->userdata;
203 dbuf_writebyte(d->o_dcmpr_code, n);
206 static void do_decompress_code(deark *c, lctx *d)
208 i64 ipos1;
209 struct de_lz77buffer *ringbuf = NULL;
211 // (I'd expect ipos1 to always equal d->ei->start_of_dos_code, but anyway...)
212 ipos1 = d->special_hdr_pos - d->ohdr.cmpr_len_para*16;
213 de_dbg(c, "decompressing cmpr code at %"I64_FMT, ipos1);
214 de_dbg_indent(c, 1);
216 ringbuf = de_lz77buffer_create(c, 8192);
217 ringbuf->userdata = (void*)d;
218 ringbuf->writebyte_cb = my_lz77buf_writebytecb;
220 d->dcmpr_cur_ipos = ipos1;
221 d->bbll.is_lsb = 1;
222 de_bitbuf_lowlevel_empty(&d->bbll);
224 while(1) {
225 u8 x, x2, x3;
226 UI matchpos;
227 UI matchlen;
228 u8 matchtype = 1;
230 if(d->errflag) goto done;
232 x = lzexe_getbit(c, d);
233 if(x) { // 1...
234 u8 b;
236 b = de_getbyte_p(&d->dcmpr_cur_ipos);
237 de_lz77buffer_add_literal_byte(ringbuf, (u8)b);
238 continue;
241 x = lzexe_getbit(c, d);
242 if(x==0) { // 00...
243 x2 = lzexe_getbit(c, d);
244 x3 = lzexe_getbit(c, d);
245 matchlen = ((UI)x2<<1) + (UI)x3 + 2;
246 matchpos = 0xffU-(UI)de_getbyte_p(&d->dcmpr_cur_ipos);
248 else { // 01...
249 u8 lb, hb;
251 lb = de_getbyte_p(&d->dcmpr_cur_ipos);
252 hb = de_getbyte_p(&d->dcmpr_cur_ipos);
254 matchpos = 0x1fffU - ((((UI)(hb & 0xf8))<<5) | (UI)lb);
256 if((hb & 0x07)==0) {
257 u8 xb;
259 matchtype = 3;
260 xb = de_getbyte_p(&d->dcmpr_cur_ipos);
262 if(xb==0) {
263 de_dbg3(c, "eof code");
264 goto after_decompress;
266 else if(xb==1) {
267 continue; // something about segments...
269 else {
270 matchlen = (UI)xb + 1;
273 else {
274 matchtype = 2;
275 matchlen = (UI)(hb & 0x07) + 2;
279 if(c->debug_level>=3) {
280 de_dbg3(c, "match (%u) pos=%u len=%u", (UI)matchtype, matchpos+1, matchlen);
282 de_lz77buffer_copy_from_hist(ringbuf,
283 (UI)(ringbuf->curpos-1-matchpos), matchlen);
286 after_decompress:
287 dbuf_flush(d->o_dcmpr_code);
288 de_dbg(c, "decompressed %"I64_FMT" bytes to %"I64_FMT, (d->dcmpr_cur_ipos-ipos1),
289 d->o_dcmpr_code->len);
291 done:
292 de_lz77buffer_destroy(c, ringbuf);
293 de_dbg_indent(c, -1);
296 static void do_write_data_only(deark *c, lctx *d)
298 if(!d->o_dcmpr_code) return;
299 dbuf_create_file_from_slice(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, "bin", NULL, 0);
302 // Generate the decompressed file
303 static void do_write_dcmpr(deark *c, lctx *d)
305 dbuf *outf = NULL;
306 i64 o_file_size;
307 i64 o_start_of_code;
308 UI minmem, maxmem;
310 de_dbg(c, "generating output file");
311 de_dbg_indent(c, 1);
313 outf = dbuf_create_output_file(c, "exe", NULL, 0);
315 #define O_RELOC_POS 28
316 o_start_of_code = de_pad_to_n(O_RELOC_POS + d->o_reloc_table->len, (i64)d->o_code_alignment);
318 // Generate 28-byte header
319 dbuf_writeu16le(outf, 0x5a4d); // 0 signature
321 o_file_size = o_start_of_code + d->o_dcmpr_code->len;
322 dbuf_writeu16le(outf, o_file_size%512); // 2 # of bytes in last page
323 dbuf_writeu16le(outf, (o_file_size+511)/512); // 4 # of pages
325 dbuf_writeu16le(outf, d->o_reloc_table->len/4); // 6 # of reloc tbl entries
326 dbuf_writeu16le(outf, o_start_of_code / 16); // 8 hdrsize/16
328 // This logic is from unlzexe v0.7+ (A. Modra).
329 minmem = d->ihdr_minmem;
330 maxmem = d->ihdr_maxmem;
331 if(d->ihdr_maxmem!=0) {
332 minmem -= (UI)d->ohdr.field5 + (((UI)d->ohdr.field6+15)/16) + 9;
333 minmem &= 0xffff;
334 if(d->ihdr_maxmem != 0xffff) {
335 maxmem -= (d->ihdr_minmem-minmem);
336 maxmem &= 0xffff;
339 dbuf_writeu16le(outf, (i64)minmem); // 10 # of paragraphs required
340 dbuf_writeu16le(outf, (i64)maxmem); // 12 # of paragraphs requested
342 dbuf_writei16le(outf, d->ohdr.regSS); // 14
343 dbuf_writeu16le(outf, d->ohdr.regSP); // 16
344 dbuf_writeu16le(outf, 0); // 18 checksum
345 dbuf_writeu16le(outf, d->ohdr.regIP); // 20
346 dbuf_writei16le(outf, d->ohdr.regCS); // 22
347 dbuf_writeu16le(outf, O_RELOC_POS); // 24 reloc_tbl_pos
348 dbuf_writeu16le(outf, 0); // 26 overlay indicator
350 // Write the relocation table
351 dbuf_truncate(outf, O_RELOC_POS);
352 dbuf_copy(d->o_reloc_table, 0, d->o_reloc_table->len, outf);
354 // Write the decompressed code
355 dbuf_truncate(outf, o_start_of_code);
356 dbuf_copy(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, outf);
358 // Copy the overlay segment.
359 // Normal LZEXE files never have such a thing, but some third-party utilities
360 // construct such files.
361 if(d->ei->overlay_len>0) {
362 de_dbg(c, "overlay data at %"I64_FMT", len=%"I64_FMT, d->ei->end_of_dos_code,
363 d->ei->overlay_len);
364 dbuf_copy(c->infile, d->ei->end_of_dos_code, d->ei->overlay_len, outf);
367 dbuf_close(outf);
368 de_dbg_indent(c, -1);
369 if(!d->errflag) {
370 de_stdwarn_execomp(c);
374 static void read_reloc_tbl(deark *c, lctx *d)
376 d->end_of_reloc_tbl = d->special_hdr_pos + d->ohdr.field6;
377 if(d->end_of_reloc_tbl > c->infile->len) {
378 d->errflag = 1;
379 goto done;
381 if(d->ver==LZEXE_VER_090) {
382 do_decode_reloc_tbl_v090(c, d);
384 else {
385 do_decode_reloc_tbl_v091(c, d);
387 done:
391 // Refer to detect_execomp_lzexe() (in another file).
392 static const char *get_lzexe_subfmt_name(int n)
394 const char *name = NULL;
396 switch(n) {
397 case LZEXE_VER_090: name = "v0.90"; break;
398 case LZEXE_VER_091: name = "v0.91"; break;
399 case LZEXE_VER_091E: name = "v0.91e"; break;
400 case LZEXE_VER_LHARK_SFX: name = "v0.91-LHARK-SFX"; break;
401 case LZEXE_VER_PCX2EXE: name = "v0.91-PCX2EXE"; break;
403 return name?name:"?";
406 static void de_run_lzexe(deark *c, de_module_params *mparams)
408 lctx *d = NULL;
409 const char *s;
410 struct fmtutil_specialexe_detection_data edd;
412 d = de_malloc(c, sizeof(lctx));
414 d->ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
416 d->raw_mode = (u8)de_get_ext_option_bool(c, "lzexe:raw", 0xff);
418 s = de_get_ext_option(c, "execomp:align");
419 if(s) {
420 d->o_code_alignment = de_atoi(s);
422 if(d->o_code_alignment != 512) {
423 d->o_code_alignment = 16;
426 fmtutil_collect_exe_info(c, c->infile, d->ei);
428 de_zeromem(&edd, sizeof(struct fmtutil_specialexe_detection_data));
429 edd.restrict_to_fmt = DE_SPECIALEXEFMT_LZEXE;
430 fmtutil_detect_execomp(c, d->ei, &edd);
431 if(edd.detected_fmt!=DE_SPECIALEXEFMT_LZEXE) {
432 de_err(c, "Not an LZEXE-compressed file");
433 goto done;
435 de_declare_fmt(c, "LZEXE-compressed EXE");
436 d->ver = (int)edd.detected_subfmt;
437 de_dbg(c, "LZEXE variant: %s", get_lzexe_subfmt_name(d->ver));
438 if(d->ver<=3) {
439 d->can_decompress_to_exe = 1;
440 d->can_decompress_to_raw = 1;
442 else if(d->ver==LZEXE_VER_LHARK_SFX || d->ver==LZEXE_VER_PCX2EXE) {
443 d->ei->regCS = edd.regCS_2;
444 d->ei->regIP = edd.regIP_2;
445 d->ei->entry_point = d->ei->start_of_dos_code + d->ei->regCS*16 + d->ei->regIP;
446 d->can_decompress_to_exe = 0;
447 d->can_decompress_to_raw = 1;
450 if(!d->can_decompress_to_raw ||
451 (d->raw_mode==0 && !d->can_decompress_to_exe))
453 de_err(c, "Unsupported LZEXE variant");
454 goto done;
457 if(d->raw_mode==0xff && !d->can_decompress_to_exe) {
458 de_err(c, "This LZEXE variant is not fully supported");
459 de_info(c, "Note: Try \"-opt lzexe:raw\" to decompress the raw data");
460 goto done;
463 d->o_reloc_table = dbuf_create_membuf(c, 0, 0);
464 d->o_dcmpr_code = dbuf_create_membuf(c, 0, 0);
465 dbuf_enable_wbuffer(d->o_dcmpr_code);
467 do_read_header(c, d);
468 if(d->errflag) goto done;
470 d->special_hdr_pos = d->ei->start_of_dos_code + d->ei->regCS*16;
471 if(d->special_hdr_pos > c->infile->len) {
472 d->errflag = 1;
473 return;
475 read_special_hdr(c, d, d->special_hdr_pos);
476 if(d->errflag) goto done;
478 // TODO? Should we do this even if raw_mode==1?
479 read_reloc_tbl(c, d);
480 if(d->errflag) goto done;
482 do_decompress_code(c, d);
483 dbuf_flush(d->o_dcmpr_code);
484 if(d->errflag) goto done;
486 if(d->raw_mode==1) {
487 do_write_data_only(c, d);
489 else {
490 do_write_dcmpr(c, d);
493 done:
495 if(d) {
496 if(d->errflag && !d->errmsg_handled) {
497 de_err(c, "LZEXE decompression failed");
500 dbuf_close(d->o_reloc_table);
501 dbuf_close(d->o_dcmpr_code);
502 de_free(c, d->ei);
503 de_free(c, d);
507 static void de_help_lzexe(deark *c)
509 de_msg(c, "-opt lzexe:raw : Instead of an EXE file, write raw decompressed data");
510 de_msg(c, "-opt execomp:align=<16|512> : Alignment of code segment "
511 "(in output file)");
514 void de_module_lzexe(deark *c, struct deark_module_info *mi)
516 mi->id = "lzexe";
517 mi->desc = "LZEXE-compressed EXE";
518 mi->run_fn = de_run_lzexe;
519 mi->help_fn = de_help_lzexe;