1 // This file is part of Deark.
2 // Copyright (C) 2022 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompress EXEPACK executable compression
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_exepack
);
21 typedef struct localctx_struct
{
24 struct fmtutil_exe_info
*ei
; // For the original, compressed, file
27 i64 hdrpos
; // Start of exepack header (i.e. the IP field)
33 // Fields from EXEPACK header. Some will be written to the reconstructed
35 struct ohdr_struct ohdr
;
38 static void do_read_reloc_tbl(deark
*c
, lctx
*d
)
43 reloc_pos
= d
->ei
->entry_point
+ d
->decoder_len
;
44 reloc_endpos
= d
->hdrpos
+ d
->ohdr
.exepack_size
;
46 if(!fmtutil_decompress_exepack_reloc_tbl(c
, reloc_pos
, reloc_endpos
, d
->o_reloc_table
)) {
51 static void do_decompress_code(deark
*c
, lctx
*d
)
59 compressed_len
= 16*(d
->ei
->regCS
- d
->ohdr
.skip_len
+ 1);
60 uncompressed_len
= 16*(d
->ohdr
.dest_len
- d
->ohdr
.skip_len
+ 1);
61 de_dbg(c
, "compressed data: pos=%"I64_FMT
", len=%"I64_FMT
", end=%"I64_FMT
,
62 d
->ei
->start_of_dos_code
, compressed_len
,
63 d
->ei
->start_of_dos_code
+ compressed_len
);
64 de_dbg(c
, "uncompressed len: %"I64_FMT
, uncompressed_len
);
65 if(compressed_len
<0 || uncompressed_len
<0 ||
66 (d
->ei
->start_of_dos_code
+compressed_len
> c
->infile
->len
))
68 d
->errflag
= 1; goto done
;
71 // TODO: It would be safer to do all the work inside a membuf, but the nature
72 // of the EXEPACK algorithm could make that inefficient.
73 buf_alloc
= de_max_int(compressed_len
, uncompressed_len
);
74 // Though the size is untrusted, it's impossible for it to be more than about 1MB.
75 buf
= de_malloc(c
, buf_alloc
);
76 de_read(buf
, d
->ei
->start_of_dos_code
, compressed_len
);
79 dst
= uncompressed_len
;
84 if(src
<1) { d
->errflag
= 1; goto done
; };
87 if(opcode
>=0xb0 && opcode
<=0xb3) { // opcodes followed by a count
91 if(src
<2) { d
->errflag
= 1; goto done
; };
92 count
= (UI
)buf
[--src
];
93 count
= (count
<<8) | buf
[--src
];
94 if(dst
<(i64
)count
) { d
->errflag
= 1; goto done
; };
96 if(opcode
==0xb0 || opcode
==0xb1) { // (run)
99 if(src
<1) { d
->errflag
= 1; goto done
; };
101 for(i
=0; i
<count
; i
++) {
105 else { // 0xb2 or 0xb3 (noncompressed bytes)
106 if(src
<(i64
)count
) { d
->errflag
= 1; goto done
; };
107 for(i
=0; i
<count
; i
++) {
108 buf
[--dst
] = buf
[--src
];
112 else if(opcode
==0xff && dst
==uncompressed_len
) {
113 ; // Filler byte(s) at the end of compressed data
120 if(opcode
==0xb1 || opcode
==0xb3) {
121 break; // Normal completion
125 dbuf_write(d
->o_dcmpr_code
, buf
, uncompressed_len
);
130 static void find_decoder_len(deark
*c
, lctx
*d
)
138 haystack_pos
= d
->ei
->entry_point
+220;
141 // Look for the error message.
142 ret
= dbuf_search(c
->infile
, (const u8
*)"Packed file is corrupt", 22,
143 haystack_pos
, haystack_len
, &foundpos
);
145 d
->decoder_len
= foundpos
+22 - d
->ei
->entry_point
;
150 // If that fails, look for the byte pattern that immediately precedes the
152 ret
= dbuf_search(c
->infile
, (const u8
*)"\xcd\x21\xb8\xff\x4c\xcd\x21", 7,
153 haystack_pos
, haystack_len
, &foundpos
);
155 d
->decoder_len
= foundpos
+7+22 - d
->ei
->entry_point
;
160 // Last resort: Guess the length.
161 switch(d
->detected_subfmt
) {
162 case 1: d
->decoder_len
= 258; break;
163 case 2: d
->decoder_len
= 279; break;
164 case 3: d
->decoder_len
= 277; break;
165 case 4: case 10: d
->decoder_len
= 283; break;
166 case 5: case 6: d
->decoder_len
= 290; break;
172 de_dbg(c
, "decoder len: %"I64_FMT
" (found by method %d)", d
->decoder_len
, method
);
175 de_err(c
, "Could not find relocation table");
176 d
->errmsg_handled
= 1;
181 static void do_read_header(deark
*c
, lctx
*d
)
185 int saved_indent_level
;
187 de_dbg_indent_save(c
, &saved_indent_level
);
188 d
->hdrpos
= d
->ei
->start_of_dos_code
+ d
->ei
->regCS
* 16;
189 hdrsize
= d
->ei
->regIP
;
190 de_dbg(c
, "exepack header at %"I64_FMT
", len=%d", d
->hdrpos
, (int)hdrsize
);
191 if(hdrsize
!=16 && hdrsize
!=18) {
197 d
->ohdr
.regIP
= de_getu16le_p(&pos
);
198 de_dbg(c
, "ip: %u", (UI
)d
->ohdr
.regIP
);
199 d
->ohdr
.regCS
= de_geti16le_p(&pos
);
200 de_dbg(c
, "cs: %d", (int)d
->ohdr
.regCS
);
201 pos
+= 2; // "mem_start", just a placeholder
202 d
->ohdr
.exepack_size
= de_getu16le_p(&pos
);
203 de_dbg(c
, "exepack size: %u", (UI
)d
->ohdr
.exepack_size
);
204 d
->ohdr
.regSP
= de_getu16le_p(&pos
);
205 de_dbg(c
, "sp: %u", (UI
)d
->ohdr
.regSP
);
206 d
->ohdr
.regSS
= de_geti16le_p(&pos
);
207 de_dbg(c
, "ss: %d", (int)d
->ohdr
.regSS
);
208 d
->ohdr
.dest_len
= de_getu16le_p(&pos
);
209 de_dbg(c
, "dest len: %u", (UI
)d
->ohdr
.dest_len
);
211 d
->ohdr
.skip_len
= de_getu16le_p(&pos
);
212 de_dbg(c
, "skip len: %u", (UI
)d
->ohdr
.skip_len
);
215 d
->ohdr
.skip_len
= 1;
219 de_dbg_indent_restore(c
, saved_indent_level
);
222 static void do_write_dcmpr(deark
*c
, lctx
*d
)
225 i64 ihdr_minmem
, ihdr_maxmem
;
227 i64 o_start_of_code
; // o_ means output
228 i64 o_reloc_pos
; // Where we actually write the reloc table
229 i64 o_reloc_pos_field
; // What we write to the header
230 i64 o_file_size
; // not including overlay
231 i64 cmprprog_mem_tot
; // total memory consumed+reserved by the exepacked program
233 i64 elided_bytes_pos
= 28;
234 i64 elided_bytes_len
= 0;
236 outf
= dbuf_create_output_file(c
, "exe", NULL
, 0);
238 if(d
->ei
->reloc_table_pos
<28 || d
->ei
->reloc_table_pos
>d
->ei
->start_of_dos_code
) {
241 else if (d
->o_reloc_table
->len
==0) {
242 // EXEPACK behaves differently if an empty reloc table has an offset <=28,
244 // That's why this is ">28", when it seems like it should be ">=28".
245 // (TODO: Test different EXEPACK versions.)
246 have_reloc_pos
= (d
->ei
->reloc_table_pos
>28);
252 if(d
->o_reloc_table
->len
==0 && !have_reloc_pos
) {
253 // In this case, EXEPACK retains everything up to start_of_code,
254 // so we ought to keep it.
255 o_reloc_pos_field
= d
->ei
->reloc_table_pos
;
256 o_reloc_pos
= d
->ei
->start_of_dos_code
;
257 o_start_of_code
= d
->ei
->start_of_dos_code
;
261 o_reloc_pos
= d
->ei
->reloc_table_pos
;
266 o_reloc_pos_field
= o_reloc_pos
;
267 o_start_of_code
= de_pad_to_n(o_reloc_pos
+ d
->o_reloc_table
->len
, 16);
269 elided_bytes_pos
= o_reloc_pos
;
270 elided_bytes_len
= d
->ei
->start_of_dos_code
- elided_bytes_pos
;
273 o_file_size
= o_start_of_code
+ d
->o_dcmpr_code
->len
;
275 ihdr_minmem
= de_getu16le(10);
276 ihdr_maxmem
= de_getu16le(12);
278 cmprprog_mem_tot
= (d
->ei
->end_of_dos_code
- d
->ei
->start_of_dos_code
) + ihdr_minmem
*16;
279 cmprprog_mem_tot
= de_pad_to_n(cmprprog_mem_tot
, 16);
281 // Try to set minmem so that the total memory is the same, or at least does
283 if(cmprprog_mem_tot
>= d
->o_dcmpr_code
->len
) {
284 // This could be an overestimate, for small programs.
285 o_minmem
= de_pad_to_n(cmprprog_mem_tot
- d
->o_dcmpr_code
->len
, 16)/16;
291 // Generate 28-byte header
292 dbuf_writeu16le(outf
, 0x5a4d); // 0 signature
293 dbuf_writeu16le(outf
, o_file_size
%512); // 2 # of bytes in last page
294 dbuf_writeu16le(outf
, (o_file_size
+511)/512); // 4 # of pages
295 dbuf_writeu16le(outf
, d
->o_reloc_table
->len
/4); // 6 # of reloc tbl entries
296 dbuf_writeu16le(outf
, o_start_of_code
/ 16); // 8 hdrsize/16
297 dbuf_writeu16le(outf
, o_minmem
);
298 dbuf_writeu16le(outf
, ihdr_maxmem
);
299 dbuf_writei16le(outf
, d
->ohdr
.regSS
); // 14 ss
300 dbuf_writeu16le(outf
, d
->ohdr
.regSP
); // 16 sp
301 dbuf_writeu16le(outf
, 0); // 18 checksum
302 dbuf_writeu16le(outf
, d
->ohdr
.regIP
); // 20 ip
303 dbuf_writei16le(outf
, d
->ohdr
.regCS
); // 22 cs
304 dbuf_writeu16le(outf
, o_reloc_pos_field
); // 24 reloc_tbl_pos
305 dbuf_writeu16le(outf
, 0); // 26 overlay indicator
307 // Copy extra data between header and reloc table
308 dbuf_copy(c
->infile
, 28, o_reloc_pos
-28, outf
);
310 // Write the relocation table
311 dbuf_truncate(outf
, o_reloc_pos
);
312 dbuf_copy(d
->o_reloc_table
, 0, d
->o_reloc_table
->len
, outf
);
314 // Write the decompressed code
315 dbuf_truncate(outf
, o_start_of_code
);
316 dbuf_copy(d
->o_dcmpr_code
, 0, d
->o_dcmpr_code
->len
, outf
);
319 if(d
->ei
->overlay_len
>0) {
320 de_dbg(c
, "overlay data at %"I64_FMT
", len=%"I64_FMT
, d
->ei
->end_of_dos_code
,
322 dbuf_copy(c
->infile
, d
->ei
->end_of_dos_code
, d
->ei
->overlay_len
, outf
);
327 if(elided_bytes_len
>0) {
328 // Microsoft has created (hacked?) files like this, that have data where
330 if(!dbuf_is_all_zeroes(c
->infile
, elided_bytes_pos
, elided_bytes_len
)) {
331 de_warn(c
, "Some unexpected data (%"I64_FMT
" bytes at %"I64_FMT
") was not "
332 "preserved. This file might not run properly.",
333 elided_bytes_len
, elided_bytes_pos
);
338 de_stdwarn_execomp(c
);
342 static const char *get_variant_name(u8 v
)
344 const char *name
= NULL
;
347 case 1: name
="EXEPACK 3.00/4.00/etc."; break;
348 case 2: name
="EXEPACK 4.03"; break;
349 case 3: name
="LINK 3.60/etc."; break;
350 case 4: name
="EXEPACK 4.05/4.06"; break;
351 case 5: name
="LINK 5.60/etc. var. 1"; break;
352 case 6: name
="LINK 5.60/etc. var. 2"; break;
353 case 7: name
="WordPerfect variant"; break;
354 case 10: name
="exepack_DF"; break;
355 case 11: name
="EXPAKFIX-patched"; break;
356 case 12: name
="EXEPATCK-patched"; break;
358 return name
?name
:"?";
361 static void de_run_exepack(deark
*c
, de_module_params
*mparams
)
364 struct fmtutil_specialexe_detection_data edd
;
366 d
= de_malloc(c
, sizeof(lctx
));
367 d
->o_reloc_table
= dbuf_create_membuf(c
, 0, 0);
368 d
->o_dcmpr_code
= dbuf_create_membuf(c
, 0, 0);
369 d
->ei
= de_malloc(c
, sizeof(struct fmtutil_exe_info
));
370 fmtutil_collect_exe_info(c
, c
->infile
, d
->ei
);
372 de_zeromem(&edd
, sizeof(struct fmtutil_specialexe_detection_data
));
373 edd
.restrict_to_fmt
= DE_SPECIALEXEFMT_EXEPACK
;
374 fmtutil_detect_execomp(c
, d
->ei
, &edd
);
375 if(edd
.detected_fmt
!= DE_SPECIALEXEFMT_EXEPACK
) {
376 de_err(c
, "Not an EXEPACK-compressed file");
379 de_declare_fmt(c
, "EXEPACK-compressed EXE");
381 d
->detected_subfmt
= edd
.detected_subfmt
;
382 de_dbg(c
, "variant id: %u (%s)", (UI
)d
->detected_subfmt
, get_variant_name(d
->detected_subfmt
));
384 do_read_header(c
, d
);
385 if(d
->errflag
) goto done
;
386 find_decoder_len(c
, d
);
387 if(d
->errflag
) goto done
;
389 do_read_reloc_tbl(c
, d
);
390 if(d
->errflag
) goto done
;
392 do_decompress_code(c
, d
);
393 if(d
->errflag
) goto done
;
395 do_write_dcmpr(c
, d
);
399 if(d
->errflag
&& !d
->errmsg_handled
) {
400 de_err(c
, "EXEPACK decompression failed");
402 dbuf_close(d
->o_reloc_table
);
403 dbuf_close(d
->o_dcmpr_code
);
409 void de_module_exepack(deark
*c
, struct deark_module_info
*mi
)
412 mi
->desc
= "EXEPACK-compressed EXE";
413 mi
->run_fn
= de_run_exepack
;