fnt: Improved error handling, etc.
[deark.git] / modules / exepack.c
blobc109336ceb75bafac4c23bbf116694ffad188d79
1 // This file is part of Deark.
2 // Copyright (C) 2022 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompress EXEPACK executable compression
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_exepack);
11 struct ohdr_struct {
12 i64 regSS;
13 i64 regSP;
14 i64 regCS;
15 i64 regIP;
16 i64 exepack_size;
17 i64 dest_len;
18 i64 skip_len;
21 typedef struct localctx_struct {
22 int errflag;
23 int errmsg_handled;
24 struct fmtutil_exe_info *ei; // For the original, compressed, file
26 u8 detected_subfmt;
27 i64 hdrpos; // Start of exepack header (i.e. the IP field)
28 i64 decoder_len;
30 dbuf *o_reloc_table;
31 dbuf *o_dcmpr_code;
33 // Fields from EXEPACK header. Some will be written to the reconstructed
34 // decompressed file.
35 struct ohdr_struct ohdr;
36 } lctx;
38 static void do_read_reloc_tbl(deark *c, lctx *d)
40 i64 reloc_pos;
41 i64 reloc_endpos;
43 reloc_pos = d->ei->entry_point + d->decoder_len;
44 reloc_endpos = d->hdrpos + d->ohdr.exepack_size;
46 if(!fmtutil_decompress_exepack_reloc_tbl(c, reloc_pos, reloc_endpos, d->o_reloc_table)) {
47 d->errflag = 1;
51 static void do_decompress_code(deark *c, lctx *d)
53 i64 compressed_len;
54 i64 uncompressed_len;
55 u8 *buf = NULL;
56 i64 buf_alloc;
57 i64 src, dst;
59 compressed_len = 16*(d->ei->regCS - d->ohdr.skip_len + 1);
60 uncompressed_len = 16*(d->ohdr.dest_len - d->ohdr.skip_len + 1);
61 de_dbg(c, "compressed data: pos=%"I64_FMT", len=%"I64_FMT", end=%"I64_FMT,
62 d->ei->start_of_dos_code, compressed_len,
63 d->ei->start_of_dos_code + compressed_len);
64 de_dbg(c, "uncompressed len: %"I64_FMT, uncompressed_len);
65 if(compressed_len<0 || uncompressed_len<0 ||
66 (d->ei->start_of_dos_code+compressed_len > c->infile->len))
68 d->errflag = 1; goto done;
71 // TODO: It would be safer to do all the work inside a membuf, but the nature
72 // of the EXEPACK algorithm could make that inefficient.
73 buf_alloc = de_max_int(compressed_len, uncompressed_len);
74 // Though the size is untrusted, it's impossible for it to be more than about 1MB.
75 buf = de_malloc(c, buf_alloc);
76 de_read(buf, d->ei->start_of_dos_code, compressed_len);
78 src = compressed_len;
79 dst = uncompressed_len;
81 while(1) {
82 u8 opcode;
84 if(src<1) { d->errflag = 1; goto done; };
85 opcode = buf[--src];
87 if(opcode>=0xb0 && opcode<=0xb3) { // opcodes followed by a count
88 UI i;
89 UI count;
91 if(src<2) { d->errflag = 1; goto done; };
92 count = (UI)buf[--src];
93 count = (count<<8) | buf[--src];
94 if(dst<(i64)count) { d->errflag = 1; goto done; };
96 if(opcode==0xb0 || opcode==0xb1) { // (run)
97 u8 n;
99 if(src<1) { d->errflag = 1; goto done; };
100 n = buf[--src];
101 for(i=0; i<count; i++) {
102 buf[--dst] = n;
105 else { // 0xb2 or 0xb3 (noncompressed bytes)
106 if(src<(i64)count) { d->errflag = 1; goto done; };
107 for(i=0; i<count; i++) {
108 buf[--dst] = buf[--src];
112 else if(opcode==0xff && dst==uncompressed_len) {
113 ; // Filler byte(s) at the end of compressed data
115 else {
116 d->errflag = 1;
117 goto done;
120 if(opcode==0xb1 || opcode==0xb3) {
121 break; // Normal completion
125 dbuf_write(d->o_dcmpr_code, buf, uncompressed_len);
126 done:
127 de_free(c, buf);
130 static void find_decoder_len(deark *c, lctx *d)
132 int ret;
133 int method = 0;
134 i64 foundpos = 0;
135 i64 haystack_pos;
136 i64 haystack_len;
138 haystack_pos = d->ei->entry_point+220;
139 haystack_len = 100;
141 // Look for the error message.
142 ret = dbuf_search(c->infile, (const u8*)"Packed file is corrupt", 22,
143 haystack_pos, haystack_len, &foundpos);
144 if(ret) {
145 d->decoder_len = foundpos+22 - d->ei->entry_point;
146 method = 1;
147 goto done;
150 // If that fails, look for the byte pattern that immediately precedes the
151 // error message.
152 ret = dbuf_search(c->infile, (const u8*)"\xcd\x21\xb8\xff\x4c\xcd\x21", 7,
153 haystack_pos, haystack_len, &foundpos);
154 if(ret) {
155 d->decoder_len = foundpos+7+22 - d->ei->entry_point;
156 method = 2;
157 goto done;
160 // Last resort: Guess the length.
161 switch(d->detected_subfmt) {
162 case 1: d->decoder_len = 258; break;
163 case 2: d->decoder_len = 279; break;
164 case 3: d->decoder_len = 277; break;
165 case 4: case 10: d->decoder_len = 283; break;
166 case 5: case 6: d->decoder_len = 290; break;
168 method = 3;
170 done:
171 if(d->decoder_len) {
172 de_dbg(c, "decoder len: %"I64_FMT" (found by method %d)", d->decoder_len, method);
174 else {
175 de_err(c, "Could not find relocation table");
176 d->errmsg_handled = 1;
177 d->errflag = 1;
181 static void do_read_header(deark *c, lctx *d)
183 i64 hdrsize;
184 i64 pos;
185 int saved_indent_level;
187 de_dbg_indent_save(c, &saved_indent_level);
188 d->hdrpos = d->ei->start_of_dos_code + d->ei->regCS * 16;
189 hdrsize = d->ei->regIP;
190 de_dbg(c, "exepack header at %"I64_FMT", len=%d", d->hdrpos, (int)hdrsize);
191 if(hdrsize!=16 && hdrsize!=18) {
192 d->errflag = 1;
193 goto done;
195 pos = d->hdrpos;
196 de_dbg_indent(c, 1);
197 d->ohdr.regIP = de_getu16le_p(&pos);
198 de_dbg(c, "ip: %u", (UI)d->ohdr.regIP);
199 d->ohdr.regCS = de_geti16le_p(&pos);
200 de_dbg(c, "cs: %d", (int)d->ohdr.regCS);
201 pos += 2; // "mem_start", just a placeholder
202 d->ohdr.exepack_size = de_getu16le_p(&pos);
203 de_dbg(c, "exepack size: %u", (UI)d->ohdr.exepack_size);
204 d->ohdr.regSP = de_getu16le_p(&pos);
205 de_dbg(c, "sp: %u", (UI)d->ohdr.regSP);
206 d->ohdr.regSS = de_geti16le_p(&pos);
207 de_dbg(c, "ss: %d", (int)d->ohdr.regSS);
208 d->ohdr.dest_len = de_getu16le_p(&pos);
209 de_dbg(c, "dest len: %u", (UI)d->ohdr.dest_len);
210 if(hdrsize>=18) {
211 d->ohdr.skip_len = de_getu16le_p(&pos);
212 de_dbg(c, "skip len: %u", (UI)d->ohdr.skip_len);
214 else {
215 d->ohdr.skip_len = 1;
218 done:
219 de_dbg_indent_restore(c, saved_indent_level);
222 static void do_write_dcmpr(deark *c, lctx *d)
224 dbuf *outf = NULL;
225 i64 ihdr_minmem, ihdr_maxmem;
226 i64 o_minmem;
227 i64 o_start_of_code; // o_ means output
228 i64 o_reloc_pos; // Where we actually write the reloc table
229 i64 o_reloc_pos_field; // What we write to the header
230 i64 o_file_size; // not including overlay
231 i64 cmprprog_mem_tot; // total memory consumed+reserved by the exepacked program
232 u8 have_reloc_pos;
233 i64 elided_bytes_pos = 28;
234 i64 elided_bytes_len = 0;
236 outf = dbuf_create_output_file(c, "exe", NULL, 0);
238 if(d->ei->reloc_table_pos<28 || d->ei->reloc_table_pos>d->ei->start_of_dos_code) {
239 have_reloc_pos = 0;
241 else if (d->o_reloc_table->len==0) {
242 // EXEPACK behaves differently if an empty reloc table has an offset <=28,
243 // versus >28.
244 // That's why this is ">28", when it seems like it should be ">=28".
245 // (TODO: Test different EXEPACK versions.)
246 have_reloc_pos = (d->ei->reloc_table_pos>28);
248 else {
249 have_reloc_pos = 1;
252 if(d->o_reloc_table->len==0 && !have_reloc_pos) {
253 // In this case, EXEPACK retains everything up to start_of_code,
254 // so we ought to keep it.
255 o_reloc_pos_field = d->ei->reloc_table_pos;
256 o_reloc_pos = d->ei->start_of_dos_code;
257 o_start_of_code = d->ei->start_of_dos_code;
259 else {
260 if(have_reloc_pos) {
261 o_reloc_pos = d->ei->reloc_table_pos;
263 else {
264 o_reloc_pos = 28;
266 o_reloc_pos_field = o_reloc_pos;
267 o_start_of_code = de_pad_to_n(o_reloc_pos + d->o_reloc_table->len, 16);
269 elided_bytes_pos = o_reloc_pos;
270 elided_bytes_len = d->ei->start_of_dos_code - elided_bytes_pos;
273 o_file_size = o_start_of_code + d->o_dcmpr_code->len;
275 ihdr_minmem = de_getu16le(10);
276 ihdr_maxmem = de_getu16le(12);
278 cmprprog_mem_tot = (d->ei->end_of_dos_code - d->ei->start_of_dos_code) + ihdr_minmem*16;
279 cmprprog_mem_tot = de_pad_to_n(cmprprog_mem_tot, 16);
281 // Try to set minmem so that the total memory is the same, or at least does
282 // not decrease.
283 if(cmprprog_mem_tot >= d->o_dcmpr_code->len) {
284 // This could be an overestimate, for small programs.
285 o_minmem = de_pad_to_n(cmprprog_mem_tot - d->o_dcmpr_code->len, 16)/16;
287 else {
288 o_minmem = 0;
291 // Generate 28-byte header
292 dbuf_writeu16le(outf, 0x5a4d); // 0 signature
293 dbuf_writeu16le(outf, o_file_size%512); // 2 # of bytes in last page
294 dbuf_writeu16le(outf, (o_file_size+511)/512); // 4 # of pages
295 dbuf_writeu16le(outf, d->o_reloc_table->len/4); // 6 # of reloc tbl entries
296 dbuf_writeu16le(outf, o_start_of_code / 16); // 8 hdrsize/16
297 dbuf_writeu16le(outf, o_minmem);
298 dbuf_writeu16le(outf, ihdr_maxmem);
299 dbuf_writei16le(outf, d->ohdr.regSS); // 14 ss
300 dbuf_writeu16le(outf, d->ohdr.regSP); // 16 sp
301 dbuf_writeu16le(outf, 0); // 18 checksum
302 dbuf_writeu16le(outf, d->ohdr.regIP); // 20 ip
303 dbuf_writei16le(outf, d->ohdr.regCS); // 22 cs
304 dbuf_writeu16le(outf, o_reloc_pos_field); // 24 reloc_tbl_pos
305 dbuf_writeu16le(outf, 0); // 26 overlay indicator
307 // Copy extra data between header and reloc table
308 dbuf_copy(c->infile, 28, o_reloc_pos-28, outf);
310 // Write the relocation table
311 dbuf_truncate(outf, o_reloc_pos);
312 dbuf_copy(d->o_reloc_table, 0, d->o_reloc_table->len, outf);
314 // Write the decompressed code
315 dbuf_truncate(outf, o_start_of_code);
316 dbuf_copy(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, outf);
318 // Copy overlay data
319 if(d->ei->overlay_len>0) {
320 de_dbg(c, "overlay data at %"I64_FMT", len=%"I64_FMT, d->ei->end_of_dos_code,
321 d->ei->overlay_len);
322 dbuf_copy(c->infile, d->ei->end_of_dos_code, d->ei->overlay_len, outf);
325 dbuf_close(outf);
327 if(elided_bytes_len>0) {
328 // Microsoft has created (hacked?) files like this, that have data where
329 // they shouldn't.
330 if(!dbuf_is_all_zeroes(c->infile, elided_bytes_pos, elided_bytes_len)) {
331 de_warn(c, "Some unexpected data (%"I64_FMT" bytes at %"I64_FMT") was not "
332 "preserved. This file might not run properly.",
333 elided_bytes_len, elided_bytes_pos);
337 if(!d->errflag) {
338 de_stdwarn_execomp(c);
342 static const char *get_variant_name(u8 v)
344 const char *name = NULL;
346 switch(v) {
347 case 1: name="EXEPACK 3.00/4.00/etc."; break;
348 case 2: name="EXEPACK 4.03"; break;
349 case 3: name="LINK 3.60/etc."; break;
350 case 4: name="EXEPACK 4.05/4.06"; break;
351 case 5: name="LINK 5.60/etc. var. 1"; break;
352 case 6: name="LINK 5.60/etc. var. 2"; break;
353 case 7: name="WordPerfect variant"; break;
354 case 10: name="exepack_DF"; break;
355 case 11: name="EXPAKFIX-patched"; break;
356 case 12: name="EXEPATCK-patched"; break;
358 return name?name:"?";
361 static void de_run_exepack(deark *c, de_module_params *mparams)
363 lctx *d = NULL;
364 struct fmtutil_specialexe_detection_data edd;
366 d = de_malloc(c, sizeof(lctx));
367 d->o_reloc_table = dbuf_create_membuf(c, 0, 0);
368 d->o_dcmpr_code = dbuf_create_membuf(c, 0, 0);
369 d->ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
370 fmtutil_collect_exe_info(c, c->infile, d->ei);
372 de_zeromem(&edd, sizeof(struct fmtutil_specialexe_detection_data));
373 edd.restrict_to_fmt = DE_SPECIALEXEFMT_EXEPACK;
374 fmtutil_detect_execomp(c, d->ei, &edd);
375 if(edd.detected_fmt != DE_SPECIALEXEFMT_EXEPACK) {
376 de_err(c, "Not an EXEPACK-compressed file");
377 goto done;
379 de_declare_fmt(c, "EXEPACK-compressed EXE");
381 d->detected_subfmt = edd.detected_subfmt;
382 de_dbg(c, "variant id: %u (%s)", (UI)d->detected_subfmt, get_variant_name(d->detected_subfmt));
384 do_read_header(c, d);
385 if(d->errflag) goto done;
386 find_decoder_len(c, d);
387 if(d->errflag) goto done;
389 do_read_reloc_tbl(c, d);
390 if(d->errflag) goto done;
392 do_decompress_code(c, d);
393 if(d->errflag) goto done;
395 do_write_dcmpr(c, d);
397 done:
398 if(d) {
399 if(d->errflag && !d->errmsg_handled) {
400 de_err(c, "EXEPACK decompression failed");
402 dbuf_close(d->o_reloc_table);
403 dbuf_close(d->o_dcmpr_code);
404 de_free(c, d->ei);
405 de_free(c, d);
409 void de_module_exepack(deark *c, struct deark_module_info *mi)
411 mi->id = "exepack";
412 mi->desc = "EXEPACK-compressed EXE";
413 mi->run_fn = de_run_exepack;