fnt: Improved error handling, etc.
[deark.git] / modules / pklite.c
blob7ae916ad2b6694e78f71904aba08538d1bcd4b1b
1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // Decompress PKLITE executable compression
7 // In a PKLITE-compressed EXE file, reliably determining the critical
8 // compression params is quite difficult. They are embedded or encoded in the
9 // machine code, and there are many different versions and variants of the
10 // format to deal with.
12 // While some params are encoded in the "version info" field at offset 28,
13 // this field is not trustworthy.
15 // This module painstakingly walks through the file, looking for known byte
16 // patterns, to identify known components, the parameters contained in them,
17 // and the location of following components.
19 // See also my "pkla" project, a script that does a better job of printing
20 // information about PKLITE-compressed files, and reporting problems.
22 // I thank Sergei Kolzun (private communication) for information about the
23 // v1.20 formats.
25 #include <deark-private.h>
26 #include <deark-fmtutil.h>
27 DE_DECLARE_MODULE(de_module_pklite);
29 // Things we need to figure out, to decompress the main compressed data.
30 struct decompr_params_struct {
31 i64 cmpr_data_pos;
32 u8 extra_cmpr; // 0=no, 1=yes, 2=special
33 u8 large_cmpr;
34 u8 v120_cmpr;
35 u8 offset_xor_key;
38 struct ver_info_struct {
39 UI ver_num; // e.g. 0x103 = 1.03
40 char pklver_str[40];
43 struct footer_struct {
44 i64 regSS;
45 i64 regSP;
46 i64 regCS;
47 i64 regIP;
50 typedef struct localctx_struct {
51 u8 errflag;
52 u8 errmsg_handled;
53 u8 dcmpr_ok;
54 u8 wrote_exe;
55 u8 raw_mode;
57 u8 is_com;
58 u8 data_before_decoder;
59 u8 load_high;
60 u8 has_psp_sig;
61 u8 psp_sig_type;
62 struct decompr_params_struct dparams;
64 struct fmtutil_exe_info *ei; // For the PKLITE file
65 struct fmtutil_exe_info *o_ei; // For the decompressed file
67 UI intro_class_fmtutil;
68 #define INTRO_CLASS_BETA 8
69 #define INTRO_CLASS_BETA_LH 9
70 #define INTRO_CLASS_100 10
71 #define INTRO_CLASS_112 12
72 #define INTRO_CLASS_114 14
73 #define INTRO_CLASS_150 50
74 #define INTRO_CLASS_UN2PACK 100
75 #define INTRO_CLASS_MEGALITE 101
76 #define INTRO_CLASS_COM_BETA 240
77 #define INTRO_CLASS_COM_100 241
78 #define INTRO_CLASS_COM_150 242
79 u8 intro_class;
81 UI initial_key;
82 i64 position2; // The next section after the intro [relative to entry point]
84 #define DESCRAMBLER_CLASS_114 14
85 #define DESCRAMBLER_CLASS_150 50
86 #define DESCRAMBLER_CLASS_150IBM 51
87 #define DESCRAMBLER_CLASS_120VAR1A 101
88 #define DESCRAMBLER_CLASS_120VAR1B 102
89 #define DESCRAMBLER_CLASS_120VAR2 103
90 #define DESCRAMBLER_CLASS_PKZIP204CLIKE 105
91 #define DESCRAMBLER_CLASS_PKLITE201LIKE 110
92 #define DESCRAMBLER_CLASS_CHK4LITE201LIKE 111
93 u8 descrambler_class;
94 u8 scrambled_decompressor;
95 #define SCRAMBLE_METHOD_XOR 1
96 #define SCRAMBLE_METHOD_ADD 2
97 u8 scramble_method;
98 i64 scrambled_word_count;
99 i64 pos_of_last_scrambled_word;
101 i64 copier_pos;
102 #define COPIER_CLASS_COMMON 1
103 #define COPIER_CLASS_150SCR 2
104 #define COPIER_CLASS_120VAR1SMALL 10
105 #define COPIER_CLASS_PKLITE201LIKE 20
106 #define COPIER_CLASS_UN2PACK 100
107 #define COPIER_CLASS_MEGALITE 101
108 #define COPIER_CLASS_OTHER 200
109 #define COPIER_CLASS_COM_BETA 240
110 #define COPIER_CLASS_COM_100 241
111 #define COPIER_CLASS_COM_115 242
112 u8 copier_class;
114 i64 decompr_pos;
115 i64 approx_end_of_decompressor;
116 #define DECOMPR_CLASS_COMMON 1
117 #define DECOMPR_CLASS_BETA 9
118 #define DECOMPR_CLASS_115 15
119 #define DECOMPR_CLASS_120SMALL_OLD 50
120 #define DECOMPR_CLASS_120SMALL 51
121 #define DECOMPR_CLASS_COM_BETA 240
122 #define DECOMPR_CLASS_COM_100 241
123 u8 decompr_class;
125 i64 cmpr_data_endpos; // = reloc_tbl_pos
126 i64 reloc_tbl_endpos;
127 i64 cmpr_data_area_endpos; // where the footer ends
128 i64 footer_pos; // 0 if unknown
129 struct footer_struct footer;
131 dbuf *o_orig_header; // copied or constructed header for the decompressed file
132 dbuf *o_reloc_table;
133 dbuf *o_dcmpr_code;
135 struct ver_info_struct ver_reported;
137 // A copy of the bytes at the EXE entry point, generally up to but not
138 // including the compressed data. The most we expect to need is about 800,
139 // e.g. for PKLITE Pro 2.01 w/ large + extra + checksum.
140 #define EPBYTES_LEN 1000
141 u8 epbytes[EPBYTES_LEN];
142 } lctx;
144 struct decompr_internal_state {
145 lctx *d;
146 dbuf *inf;
147 const struct decompr_params_struct *dparams;
148 u8 has_uncompressed_area;
149 i64 o_dcmpr_code_nbytes_written;
150 i64 dcmpr_cur_ipos;
151 struct de_bitbuf_lowlevel bbll;
152 struct fmtutil_huffman_decoder *lengths_tree;
153 struct fmtutil_huffman_decoder *offsets_tree;
156 #define pkl_memmatch de_memmatch
158 // Search a region of a block of memory for the given pattern.
160 // search endpos is the end of the search region (the first byte beyond it).
161 // The entire pattern must fit into the region.
162 // pattern_len is the number of non-padding bytes. Must be at least 1.
163 // *pfoundpos is relative to the beginning of the 'mem'.
164 static int pkl_search_match(const u8 *mem, i64 mem_len,
165 i64 search_startpos, i64 search_endpos,
166 const u8 *pattern, i64 pattern_len,
167 u8 wildcard, UI flags, i64 *pfoundpos)
169 i64 foundpos2;
170 int ret;
172 *pfoundpos = 0;
174 if(pattern_len<1) return 0;
175 if(search_startpos<0) search_startpos = 0;
176 if(search_endpos>mem_len) search_endpos = mem_len;
177 if(pattern_len > search_endpos-search_startpos) return 0;
179 ret = de_memsearch_match(&mem[search_startpos], search_endpos-search_startpos,
180 pattern, pattern_len, wildcard, &foundpos2);
181 if(ret) {
182 *pfoundpos = search_startpos + foundpos2;
184 return ret;
187 static void info_bytes_to_version_struct(UI ver_info, struct ver_info_struct *v)
189 v->ver_num = ver_info & 0x0fff;
190 de_snprintf(v->pklver_str, sizeof(v->pklver_str), "%u.%02u%s%s%s",
191 (UI)(v->ver_num>>8), (UI)(v->ver_num&0xff),
192 ((ver_info&0x4000)?"/h":""),
193 ((ver_info&0x2000)?"/l":"/s"),
194 ((ver_info&0x1000)?"/e":""));
197 static void do_read_version_info(deark *c, lctx *d, i64 pos)
199 UI ver_info;
201 ver_info = (UI)de_getu16le(pos);
202 info_bytes_to_version_struct(ver_info, &d->ver_reported);
203 de_dbg(c, "reported PKLITE version: %s", d->ver_reported.pklver_str);
206 static i64 ip_to_eprel(lctx *d, i64 ip)
208 i64 n;
210 // TODO: This works, but might not be technically correct.
211 n = d->ei->start_of_dos_code + (ip - 0x0100) - d->ei->entry_point;
212 if(n<0) n=0;
213 return n;
216 static i64 read_and_follow_1byte_jump(lctx *d, i64 pos1)
218 i64 pos2;
220 if(pos1>=EPBYTES_LEN) return 0;
221 pos2 = pos1 + 1 + (i64)d->epbytes[pos1];
222 return pos2;
225 static void analyze_intro(deark *c, lctx *d)
227 int saved_indent_level;
229 de_dbg_indent_save(c, &saved_indent_level);
231 de_dbg(c, "intro at ep+0");
232 de_dbg_indent(c, 1);
234 // FIXME: We shouldn't print these opaque "class" identifiers in the
235 // debug info, but for now it's better than nothing.
236 de_dbg(c, "intro class/prelim: %u", d->intro_class_fmtutil);
238 // Initial DX register is sometimes used as a key.
239 if(pkl_memmatch(&d->epbytes[0], (const u8*)"\xb8??\xba", 4, '?', 0)) {
240 d->initial_key = (UI)de_getu16le_direct(&d->epbytes[4]);
242 else if(pkl_memmatch(&d->epbytes[0], (const u8*)"\x50\xb8??\xba", 5, '?', 0)) {
243 d->initial_key = (UI)de_getu16le_direct(&d->epbytes[5]);
246 if(d->intro_class_fmtutil==90) {
247 d->intro_class = INTRO_CLASS_BETA;
248 d->data_before_decoder = 1;
250 else if(d->intro_class_fmtutil==91) {
251 d->intro_class = INTRO_CLASS_BETA_LH;
252 d->data_before_decoder = 1;
253 d->load_high = 1;
255 else if(d->intro_class_fmtutil==100) {
256 d->intro_class = INTRO_CLASS_100;
257 d->position2 = 16;
259 else if(d->intro_class_fmtutil==112) {
260 if(d->epbytes[13]==0x73) {
261 d->intro_class = INTRO_CLASS_112;
262 d->position2 = 15;
264 else if(d->epbytes[13]==0x72) {
265 d->intro_class = INTRO_CLASS_114;
266 d->position2 = read_and_follow_1byte_jump(d, 14);
269 else if(d->intro_class_fmtutil==150) {
270 if(d->epbytes[14]==0x72) {
271 d->intro_class = INTRO_CLASS_150;
272 d->position2 = read_and_follow_1byte_jump(d, 15);
275 else if(d->intro_class_fmtutil==250) {
276 d->intro_class = INTRO_CLASS_UN2PACK;
277 d->position2 = 34;
279 else if(d->intro_class_fmtutil==251) {
280 if(d->epbytes[13]==0x72) {
281 d->intro_class = INTRO_CLASS_MEGALITE;
282 d->position2 = read_and_follow_1byte_jump(d, 14);
286 if(d->data_before_decoder) return;
288 if(!d->intro_class || !d->position2) {
289 d->errflag = 1;
291 if(!d->errflag) {
292 de_dbg(c, "intro class: %u", (UI)d->intro_class);
293 de_dbg(c, "after intro: ep+%"I64_FMT, d->position2);
296 de_dbg_indent_restore(c, saved_indent_level);
299 static void analyze_descrambler(deark *c, lctx *d)
301 i64 pos;
302 i64 pos_of_endpos_field = 0;
303 i64 pos_of_jmp_field = 0;
304 i64 pos_of_op = 0;
305 i64 pos_of_scrambled_word_count = 0;
306 i64 scrambled_endpos_raw;
307 int saved_indent_level;
309 de_dbg_indent_save(c, &saved_indent_level);
311 switch(d->intro_class) {
312 // Classes that might be scrambled:
313 case INTRO_CLASS_112:
314 case INTRO_CLASS_114:
315 case INTRO_CLASS_150:
316 break;
317 default:
318 goto done;
321 pos = d->position2;
322 if(pos + 200 > EPBYTES_LEN) goto done;
324 if(pkl_memmatch(&d->epbytes[pos],
325 (const u8*)"\x2d\x20\x00\x8e\xd0\x2d??\x50\x52\xb9??\xbe??\x8b\xfe"
326 "\xfd\x90\x49\x74?\xad\x92\x33\xc2\xab\xeb\xf6", 30, '?', 0))
328 d->descrambler_class = DESCRAMBLER_CLASS_114;
329 pos_of_scrambled_word_count = pos+11;
330 pos_of_endpos_field = pos+14;
331 pos_of_jmp_field = pos+22;
332 pos_of_op = pos+25;
334 else if(pkl_memmatch(&d->epbytes[pos],
335 (const u8*)"\x8b\xfc\x81\xef??\x57\x57\x52\xb9??\xbe??\x8b\xfe"
336 "\xfd\x49\x74?\xad\x92\x03\xc2\xab\xeb\xf6", 28, '?', 0))
338 d->descrambler_class = DESCRAMBLER_CLASS_120VAR1A;
339 pos_of_scrambled_word_count = pos+10;
340 pos_of_endpos_field = pos+13;
341 pos_of_jmp_field = pos+20;
342 pos_of_op = pos+23;
345 else if(pkl_memmatch(&d->epbytes[pos],
346 (const u8*)"\x8b\xfc\x81\xef??\x57\x57\x52\xb9??\xbe??\x8b\xfe"
347 "\xfd\x90\x49\x74?\xad\x92\x03\xc2\xab\xeb\xf6", 29, '?', 0))
349 d->descrambler_class = DESCRAMBLER_CLASS_120VAR1B;
350 pos_of_scrambled_word_count = pos+10;
351 pos_of_endpos_field = pos+13;
352 pos_of_jmp_field = pos+21;
353 pos_of_op = pos+24;
356 else if(pkl_memmatch(&d->epbytes[pos],
357 (const u8*)"\x59\x2d\x20\x00\x8e\xd0\x51??\x00\x50\x80\x3e"
358 "\x41\x01\xc3\x75\xe6\x52\xb8??\xbe??\x56\x56\x52\x50\x90"
359 "???????\x74", 38, '?', 0))
361 d->descrambler_class = DESCRAMBLER_CLASS_150;
362 pos_of_scrambled_word_count = pos+20;
363 pos_of_endpos_field = pos+23;
364 pos_of_jmp_field = pos+38;
365 pos_of_op = pos+45;
367 else if(pkl_memmatch(&d->epbytes[pos],
368 (const u8*)"\x2d\x20\x00????????????\xb9??\xbe????????\x74???\x03",
369 32, '?', 0))
371 d->descrambler_class = DESCRAMBLER_CLASS_120VAR2;
372 pos_of_scrambled_word_count = pos+16;
373 pos_of_endpos_field = pos+19;
374 pos_of_jmp_field = pos+28;
375 pos_of_op = pos+31;
377 else if(pkl_memmatch(&d->epbytes[pos],
378 (const u8*)"\x2d\x20\x00????????????\xb9??\xbe?????????\x74???\x03",
379 33, '?', 0))
381 d->descrambler_class = DESCRAMBLER_CLASS_PKZIP204CLIKE;
382 pos_of_scrambled_word_count = pos+16;
383 pos_of_endpos_field = pos+19;
384 pos_of_jmp_field = pos+29;
385 pos_of_op = pos+32;
388 else if(pkl_memmatch(&d->epbytes[pos],
389 (const u8*)"\x2d\x20\x00?????????????????\xb9??\xbe??????????\x74???\x03",
390 39, '?', 0))
392 d->descrambler_class = DESCRAMBLER_CLASS_PKLITE201LIKE;
393 pos_of_scrambled_word_count = pos+21;
394 pos_of_endpos_field = pos+24;
395 pos_of_jmp_field = pos+35;
396 pos_of_op = pos+38;
399 else if(pkl_memmatch(&d->epbytes[pos],
400 (const u8*)"\x8b\xfc\x81?????????????\xbb??\xbe??????\x74???\x03",
401 31, '?', 0))
403 d->descrambler_class = DESCRAMBLER_CLASS_CHK4LITE201LIKE;
404 pos_of_scrambled_word_count = pos+17;
405 pos_of_endpos_field = pos+20;
406 pos_of_jmp_field = pos+27;
407 pos_of_op = pos+30;
410 else if(pkl_memmatch(&d->epbytes[pos],
411 (const u8*)"\x59\x2d\x20\x00\x8e\xd0\x51\x2d??\x50\x52\xb9??\xbe??\x8b\xfe"
412 "\xfd\x90\x49\x74?\xad\x92\x33", 28, '?', 0))
414 d->descrambler_class = DESCRAMBLER_CLASS_150IBM;
415 pos_of_scrambled_word_count = pos+13;
416 pos_of_endpos_field = pos+16;
417 pos_of_jmp_field = pos+24;
418 pos_of_op = pos+27;
421 if(!d->descrambler_class) {
422 goto done;
425 d->scrambled_decompressor = 1;
427 if(d->epbytes[pos_of_op]==0x33) {
428 d->scramble_method = SCRAMBLE_METHOD_XOR;
430 else if(d->epbytes[pos_of_op]==0x03) {
431 d->scramble_method = SCRAMBLE_METHOD_ADD;
433 else {
434 d->errflag = 1;
435 goto done;
438 de_dbg(c, "descrambler at ep+%"I64_FMT, d->position2);
439 de_dbg_indent(c, 1);
440 de_dbg(c, "descrambler class: %u", (UI)d->descrambler_class);
442 de_dbg(c, "scramble method: %u", (UI)d->scramble_method);
443 d->scrambled_word_count = de_getu16le_direct(&d->epbytes[pos_of_scrambled_word_count]);
444 if(d->scrambled_word_count>0) d->scrambled_word_count--;
445 de_dbg(c, "scrambled word count: %u", (UI)d->scrambled_word_count);
446 scrambled_endpos_raw = de_getu16le_direct(&d->epbytes[pos_of_endpos_field]);
447 d->pos_of_last_scrambled_word = ip_to_eprel(d, scrambled_endpos_raw);
448 de_dbg(c, "pos of last scrambled word: %u", (UI)d->pos_of_last_scrambled_word);
449 d->copier_pos = read_and_follow_1byte_jump(d, pos_of_jmp_field);
451 done:
452 de_dbg_indent_restore(c, saved_indent_level);
453 if(!d->errflag) {
454 if(!d->scrambled_decompressor && !d->data_before_decoder) {
455 d->copier_pos = d->position2;
457 de_dbg(c, "is scrambled: %u", (UI)d->scrambled_decompressor);
461 static void descramble_decompressor(deark *c, lctx *d)
463 i64 startpos;
464 i64 pos;
465 UI this_word_scr;
466 UI next_word_scr;
467 UI this_word_dscr;
469 if(!d->scrambled_decompressor || d->scrambled_word_count<1) {
470 goto done;
472 de_dbg(c, "[descrambling]");
474 if(d->pos_of_last_scrambled_word+2 > EPBYTES_LEN) {
475 d->errflag = 1;
476 goto done;
479 startpos = d->pos_of_last_scrambled_word+2-d->scrambled_word_count*2;
480 if(startpos < 0) {
481 d->errflag = 1;
482 goto done;
485 this_word_scr = (UI)de_getu16le_direct(&d->epbytes[startpos]);
487 for(pos=startpos; pos<=d->pos_of_last_scrambled_word; pos+=2) {
488 if(pos==d->pos_of_last_scrambled_word) {
489 next_word_scr = d->initial_key;
491 else {
492 next_word_scr = (UI)de_getu16le_direct(&d->epbytes[pos+2]);
495 if(d->scramble_method==2) {
496 this_word_dscr = (this_word_scr + next_word_scr) & 0xffff;
498 else {
499 this_word_dscr = this_word_scr ^ next_word_scr;
502 de_writeu16le_direct(&d->epbytes[pos], (i64)this_word_dscr);
503 this_word_scr = next_word_scr;
506 done:
510 static void analyze_copier(deark *c, lctx *d)
512 i64 pos_of_decompr_pos_field = 0;
513 i64 foundpos = 0;
514 i64 pos = d->copier_pos;
515 int saved_indent_level;
517 de_dbg_indent_save(c, &saved_indent_level);
519 if(d->data_before_decoder) goto done;
520 if(!pos) {
521 d->errflag = 1;
522 goto done;
524 if(pos+200 > EPBYTES_LEN) {
525 d->errflag = 1;
526 goto done;
529 de_dbg(c, "copier at ep+%u", (UI)pos);
530 de_dbg_indent(c, 1);
532 if(pkl_search_match(d->epbytes, EPBYTES_LEN,
533 pos, pos+75,
534 (const u8*)"\xb9??\x33\xff\x57\xbe??\xfc\xf3\xa5", 12, '?', 0, &foundpos))
536 if(d->epbytes[foundpos+12]==0xcb) {
537 d->copier_class = COPIER_CLASS_COMMON;
539 else if(d->epbytes[foundpos+12]==0xca) {
540 d->copier_class = COPIER_CLASS_150SCR;
542 else {
543 d->copier_class = COPIER_CLASS_OTHER;
545 pos_of_decompr_pos_field = foundpos+7;
548 else if(pkl_search_match(d->epbytes, EPBYTES_LEN,
549 pos, pos+75,
550 (const u8*)"\xb9??\x33\xff\x57\xfc\xbe??\xf3\xa5\xcb", 13, '?', 0, &foundpos))
552 d->copier_class = COPIER_CLASS_PKLITE201LIKE;
553 pos_of_decompr_pos_field = foundpos+8;
556 else if(pkl_search_match(d->epbytes, EPBYTES_LEN,
557 pos, pos+75,
558 (const u8*)"\x57\xb9??\xbe??\xfc\xf3\xa5\xc3", 11, '?', 0, &foundpos))
560 d->copier_class = COPIER_CLASS_120VAR1SMALL;
561 pos_of_decompr_pos_field = foundpos+5;
563 else if(pkl_search_match(d->epbytes, EPBYTES_LEN,
564 pos, pos+75,
565 (const u8*)"\xb9??\x33\xff\x56\xbe??\xfc\xf2\xa5\xca", 13, '?', 0, &foundpos))
567 d->copier_class = COPIER_CLASS_MEGALITE;
568 pos_of_decompr_pos_field = foundpos+7;
570 else if(pkl_search_match(d->epbytes, EPBYTES_LEN,
571 pos, pos+75,
572 (const u8*)"\xb9??\x2b\xff\x57\xbe??\xfc\xf3\xa5\xcb", 13, '?', 0, &foundpos))
574 d->copier_class = COPIER_CLASS_UN2PACK;
575 pos_of_decompr_pos_field = foundpos+7;
578 if(!d->copier_class) {
579 d->errflag = 1;
580 goto done;
583 de_dbg(c, "copier class: %u", (UI)d->copier_class);
584 de_dbg(c, "copier subclass: %"I64_FMT, pos_of_decompr_pos_field-pos);
585 d->decompr_pos = ip_to_eprel(d, de_getu16le_direct(&d->epbytes[pos_of_decompr_pos_field]));
587 done:
588 de_dbg_indent_restore(c, saved_indent_level);
591 static void find_decompr_pos_beta(deark *c, lctx *d)
593 if(pkl_memmatch(&d->epbytes[0x59],
594 (const u8*)"\xf3\xa5\x2e\xa1????????\xcb\xfc", 14, '?', 0))
596 // small
597 d->decompr_pos = 0x66;
599 else if(pkl_memmatch(&d->epbytes[0x5b],
600 (const u8*)"\xf3\xa5\x85\xed????????????\xcb\xfc", 18, '?', 0))
602 // large
603 d->decompr_pos = 0x6c;
605 else if(pkl_memmatch(&d->epbytes[0],
606 (const u8*)"\x2e\x8c\x1e??\xfc\x8c\xc8\x2e\x2b\x06", 11, '?', 0))
608 // load-high
609 d->decompr_pos = 0x5;
613 static void analyze_decompressor(deark *c, lctx *d)
615 i64 pos;
616 i64 n;
617 int saved_indent_level;
619 de_dbg_indent_save(c, &saved_indent_level);
621 if(!d->decompr_pos && d->data_before_decoder) {
622 find_decompr_pos_beta(c, d);
625 pos = d->decompr_pos;
626 if(!pos) {
627 d->errflag = 1;
628 goto done;
630 if(pos+200 > EPBYTES_LEN) {
631 d->errflag = 1;
632 goto done;
635 de_dbg(c, "decompressor at ep+%u", (UI)pos);
636 de_dbg_indent(c, 1);
638 if(pkl_memmatch(&d->epbytes[pos],
639 (const u8*)"\xfd\x8c\xdb\x53\x83\xc3", 6, '?', 0))
641 d->decompr_class = DECOMPR_CLASS_COMMON;
642 n = (i64)d->epbytes[pos+6];
643 n *= 16;
644 d->dparams.cmpr_data_pos = d->ei->entry_point + ip_to_eprel(d, n);
646 else if(pkl_memmatch(&d->epbytes[pos],
647 (const u8*)"\xfd\x8c\xdb\x53\x81\xc3", 6, '?', 0))
649 d->decompr_class = DECOMPR_CLASS_115;
650 n = de_getu16le_direct(&d->epbytes[pos+6]);
651 n *= 16;
652 d->dparams.cmpr_data_pos = d->ei->entry_point + ip_to_eprel(d, n);
654 else if(pkl_memmatch(&d->epbytes[pos],
655 (const u8*)"\xfd\x5f\xc7\x85????\x4f\x4f\xbe??\x03\xf2"
656 "\x8b\xca\xd1\xe9\xf3", 20, '?', 0))
658 d->decompr_class = DECOMPR_CLASS_120SMALL;
659 n = de_getu16le_direct(&d->epbytes[pos+11]);
660 d->dparams.cmpr_data_pos = d->ei->entry_point + 2 + ip_to_eprel(d, n);
663 else if(pkl_memmatch(&d->epbytes[pos],
664 (const u8*)"\xfd\x5f\x4f\x4f\xbe??\x03\xf2\x8b\xca\xd1\xe9\xf3", 14, '?', 0))
666 d->decompr_class = DECOMPR_CLASS_120SMALL_OLD;
667 n = de_getu16le_direct(&d->epbytes[pos+5]);
668 d->dparams.cmpr_data_pos = d->ei->entry_point + 2 + ip_to_eprel(d, n);
672 else if(pkl_memmatch(&d->epbytes[pos],
673 (const u8*)"\xfc\x8c\xc8\x2e\x2b\x06??\x8e\xd8\xbf", 11, '?', 0))
675 d->decompr_class = DECOMPR_CLASS_BETA;
676 d->dparams.cmpr_data_pos = d->ei->start_of_dos_code;
679 if(!d->decompr_class) {
680 d->errflag = 1;
681 goto done;
684 de_dbg(c, "decompressor class: %u", (UI)d->decompr_class);
685 de_dbg(c, "cmpr data pos: %"I64_FMT, d->dparams.cmpr_data_pos);
687 done:
688 de_dbg_indent_restore(c, saved_indent_level);
691 static void analyze_detect_large_and_v120_cmpr(deark *c, lctx *d)
693 i64 foundpos = 0;
694 int ret;
696 if(d->decompr_class==DECOMPR_CLASS_120SMALL ||
697 d->decompr_class==DECOMPR_CLASS_120SMALL_OLD)
699 d->dparams.v120_cmpr = 1;
700 d->dparams.large_cmpr = 0;
701 goto done;
704 // TODO?: A better search function to use when there are no wildcards.
705 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
706 d->approx_end_of_decompressor-60, d->approx_end_of_decompressor,
707 (const u8*)"\x01\x02\x00\x00\x03\x04\x05\x06"
708 "\x00\x00\x00\x00\x00\x00\x00\x00\x07\x08\x09\x0a\x0b", 21, 0x3f,
709 0, &foundpos);
710 if(ret && foundpos>0) {
711 u8 prec_b;
713 prec_b = d->epbytes[foundpos-1];
714 if(prec_b==0x09) {
715 d->dparams.large_cmpr = 0;
717 else if(prec_b==0x18) {
718 d->dparams.large_cmpr = 1;
720 else {
721 d->errflag = 1;
723 goto done;
726 // The only thing left should be v1.20 w/ large cmpr, which always uses extra cmpr
727 if(!d->dparams.extra_cmpr) {
728 d->errflag = 1;
729 goto done;
732 // Files w/o the above pattern, but with the below pattern, are presumed
733 // to be v1.20.
734 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
735 d->approx_end_of_decompressor-50, d->approx_end_of_decompressor,
736 (const u8*)"\x33\xc0\x8b\xd8\x8b\xc8\x8b\xd0\x8b\xe8\x8b\xf0\x8b", 13, 0x3f,
737 0, &foundpos);
738 if(ret) {
739 d->dparams.v120_cmpr = 1;
740 d->dparams.large_cmpr = 1;
741 goto done;
744 d->errflag = 1;
746 done:
747 if(!d->errflag) {
748 de_dbg(c, "large cmpr: %u", (UI)d->dparams.large_cmpr);
749 de_dbg(c, "v1.20 cmpr: %u", (UI)d->dparams.v120_cmpr);
753 static void analyze_detect_obf_offsets(deark *c, lctx *d)
755 i64 foundpos = 0;
756 int ret;
757 u8 has_obf_offsets = 0;
759 if(!d->dparams.v120_cmpr) goto done;
761 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
762 d->decompr_pos+200, d->approx_end_of_decompressor,
763 (const u8*)"\xac\x34?\x8a", 4, 0x3f,
764 0, &foundpos);
765 if(ret) {
766 has_obf_offsets = 1;
767 d->dparams.offset_xor_key = d->epbytes[foundpos+2];
770 done:
771 if(d->dparams.v120_cmpr) {
772 de_dbg(c, "obfuscated offsets: %u", (UI)has_obf_offsets);
773 if(has_obf_offsets) {
774 de_dbg_indent(c, 1);
775 de_dbg(c, "offsets key: 0x%02x", (UI)d->dparams.offset_xor_key);
776 de_dbg_indent(c, -1);
781 static void analyze_detect_extra_cmpr(deark *c, lctx *d)
783 int ret;
784 i64 foundpos;
786 if(d->decompr_pos==0 || d->approx_end_of_decompressor==0) {
787 d->errflag = 1;
788 goto done;
791 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
792 d->decompr_pos, d->approx_end_of_decompressor,
793 (const u8*)"\xad\x95\xb2\x10\x72\x08\xa4\xd1\xed\x4a\x74", 11, 0x3f,
794 0, &foundpos);
795 if(ret) {
796 d->dparams.extra_cmpr = 0;
797 goto done;
800 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
801 d->decompr_pos, d->approx_end_of_decompressor,
802 (const u8*)"\xad\x95\xb2\x10\x72\x0b\xac??\xaa\xd1\xed\x4a\x74", 14, 0x3f,
803 0, &foundpos);
804 if(ret) {
805 if(d->epbytes[foundpos+7]==0x32 && d->epbytes[foundpos+8]==0xc2) {
806 d->dparams.extra_cmpr = 1;
807 goto done;
809 else if(d->epbytes[foundpos+7]==0xf6 && d->epbytes[foundpos+8]==0xd0) {
810 // Customized "v1.23" format seen in files from RemoteAccess v1.11
811 // BBS software by Andrew Milner / Continental Software.
812 // http://cd.textfiles.com/librisbritannia/
813 // https://archive.org/details/LibrisBritannia
814 // ... COMMUNIC/BULLETIN/3220A.ZIP
815 // ... COMMUNIC/BULLETIN/3220B.ZIP
816 d->dparams.extra_cmpr = 2;
817 goto done;
821 d->errflag = 1;
823 done:
824 if(!d->errflag) {
825 de_dbg(c, "extra cmpr: %u", (UI)d->dparams.extra_cmpr);
829 static void analyze_detect_psp_sig(deark *c, lctx *d)
831 int ret;
832 i64 foundpos;
833 const u8 *pattern;
835 if(d->decompr_pos==0 || d->approx_end_of_decompressor==0) {
836 goto done;
839 // It's kind of overkill to always do this search, and always look for both
840 // signatures. We could probably be much more discriminiating, e.g. by
841 // by using the apparent correspondence to scramble_method. But we would
842 // risk false negatives.
844 pattern = (const u8*)"\xc7\x06\x5c\x00\x70\x6b"; // "pk"
845 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
846 d->decompr_pos, d->approx_end_of_decompressor,
847 pattern, 6, 0x3f, 0, &foundpos);
848 if(ret) {
849 d->has_psp_sig = 1;
850 d->psp_sig_type = 2;
851 goto done;
854 pattern = (const u8*)"\xc7\x06\x5c\x00\x50\x4b"; // "PK"
855 ret = pkl_search_match(d->epbytes, EPBYTES_LEN,
856 d->decompr_pos, d->approx_end_of_decompressor,
857 pattern, 6, 0x3f, 0, &foundpos);
858 if(ret) {
859 d->has_psp_sig = 1;
860 d->psp_sig_type = 1;
863 done:
867 // Do whatever we need to do to figure out the compression params
868 // (mainly d->dparams).
869 static void do_analyze_pklite_exe(deark *c, lctx *d)
871 int saved_indent_level;
873 de_dbg_indent_save(c, &saved_indent_level);
875 de_dbg(c, "code start: %"I64_FMT, d->ei->start_of_dos_code);
876 de_dbg(c, "entry point: %"I64_FMT, d->ei->entry_point);
878 de_dbg(c, "[analyzing file]");
879 de_dbg_indent(c, 1);
881 analyze_intro(c, d);
882 if(d->errflag) goto done;
884 analyze_descrambler(c, d);
885 if(d->errflag) goto done;
887 if(d->scrambled_decompressor) {
888 descramble_decompressor(c, d);
889 if(d->errflag) goto done;
892 analyze_copier(c, d);
893 if(d->errflag) goto done;
895 analyze_decompressor(c, d);
896 if(d->errflag) goto done;
898 if(!d->dparams.cmpr_data_pos) {
899 d->errflag = 1;
900 goto done;
903 if(d->data_before_decoder) {
904 d->approx_end_of_decompressor = d->ei->end_of_dos_code - d->ei->entry_point;
905 d->cmpr_data_area_endpos = d->ei->entry_point;
907 else {
908 d->approx_end_of_decompressor = d->dparams.cmpr_data_pos - d->ei->entry_point;
909 d->cmpr_data_area_endpos = d->ei->end_of_dos_code;
911 de_dbg(c, "approx end of decompressor: ep+%"I64_FMT, d->approx_end_of_decompressor);
913 analyze_detect_extra_cmpr(c, d);
914 if(d->errflag) goto done;
915 analyze_detect_large_and_v120_cmpr(c, d);
916 if(d->errflag) goto done;
917 analyze_detect_obf_offsets(c, d);
918 if(d->errflag) goto done;
919 analyze_detect_psp_sig(c, d);
921 done:
922 de_dbg_indent_restore(c, saved_indent_level);
925 static void fill_bitbuf(deark *c, struct decompr_internal_state *dctx)
927 UI i;
929 if(dctx->d->errflag) return;
930 if(dctx->dcmpr_cur_ipos+2 > dctx->inf->len) {
931 de_err(c, "Unexpected end of file during decompression");
932 dctx->d->errflag = 1;
933 dctx->d->errmsg_handled = 1;
934 return;
937 for(i=0; i<2; i++) {
938 u8 b;
939 b = dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos);
940 de_bitbuf_lowlevel_add_byte(&dctx->bbll, b);
944 static u8 pklite_getbit(deark *c, struct decompr_internal_state *dctx)
946 u8 v;
948 if(dctx->d->errflag) return 0;
949 v = (u8)de_bitbuf_lowlevel_get_bits(&dctx->bbll, 1);
951 if(dctx->bbll.nbits_in_bitbuf==0) {
952 fill_bitbuf(c, dctx);
955 return v;
958 static void my_lz77buf_writebytecb(struct de_lz77buffer *rb, u8 n)
960 struct decompr_internal_state *dctx = (struct decompr_internal_state *)rb->userdata;
962 dbuf_writebyte(dctx->d->o_dcmpr_code, n);
963 dctx->o_dcmpr_code_nbytes_written++;
966 // Allocates and populates a huffman_decoder.
967 // Caller supplies htp: A pointer to an initially-NULL pointer.
968 // Caller must eventually call fmtutil_huffman_destroy_decoder() on the returned
969 // pointer.
970 // lengths_and_codes: High 4 bits is the code length (0..12),
971 // low 12 bits is the code.
972 static void huffman_make_tree_from_u16array(deark *c,
973 struct fmtutil_huffman_decoder **htp,
974 const u16 *lengths_and_codes, UI ncodes,
975 const char *dbgtitle)
977 UI n;
978 char b2buf[72];
980 if(*htp) return;
981 *htp = fmtutil_huffman_create_decoder(c, ncodes, ncodes);
982 if(dbgtitle) {
983 de_dbg3(c, "[%s codebook]", dbgtitle);
985 de_dbg_indent(c, 1);
986 for(n=0; n<ncodes; n++) {
987 UI nbits;
988 u64 code;
990 nbits = ((UI)lengths_and_codes[n])>>12;
991 code = ((u64)lengths_and_codes[n]) & 0x0fff;
993 if(dbgtitle && c->debug_level>=3) {
994 de_dbg3(c, "code: \"%s\" = %d",
995 de_print_base2_fixed(b2buf, sizeof(b2buf), code, nbits), (int)n);
997 fmtutil_huffman_add_code(c, (*htp)->bk, code, nbits,
998 (fmtutil_huffman_valtype)n);
1000 de_dbg_indent(c, -1);
1003 static void make_matchlengths_tree(deark *c, struct decompr_internal_state *dctx)
1005 static const char *name = "match lengths";
1006 static const u16 matchlengthsdata_lg[24] = {
1007 0x2003,0x3000,0x4002,0x4003,0x4004,0x500a,0x500b,0x500c,
1008 0x601a,0x601b,0x703a,0x703b,0x703c,0x807a,0x807b,0x807c,
1009 0x90fa,0x90fb,0x90fc,0x90fd,0x90fe,0x90ff,0x601c,0x2002
1011 static const u16 matchlengthsdata_sm[9] = {
1012 0x2000,0x3004,0x3005,0x400c,0x400d,0x400e,0x400f,0x3003,
1013 0x3002
1015 static const u16 matchlengthsdata120_lg[21] = {
1016 0x2003,0x3000,0x4005,0x4006,0x5006,0x5007,0x6008,0x6009,
1017 0x7020,0x7021,0x7022,0x7023,0x8048,0x8049,0x804a,0x9096,
1018 0x9097,0x6013,0x2002,0x4007,0x5005
1020 static const u16 matchlengthsdata120_sm[11] = {
1021 0x2003,0x3000,0x4004,0x4005,0x500e,0x601e,0x601f,0x4006,
1022 0x2002,0x4003,0x4002
1025 if(dctx->dparams->large_cmpr) {
1026 if(dctx->dparams->v120_cmpr) {
1027 huffman_make_tree_from_u16array(c, &dctx->lengths_tree,
1028 matchlengthsdata120_lg, 21, name);
1030 else {
1031 huffman_make_tree_from_u16array(c, &dctx->lengths_tree,
1032 matchlengthsdata_lg, 24, name);
1035 else {
1036 if(dctx->dparams->v120_cmpr) {
1037 huffman_make_tree_from_u16array(c, &dctx->lengths_tree,
1038 matchlengthsdata120_sm, 11, name);
1040 else {
1041 huffman_make_tree_from_u16array(c, &dctx->lengths_tree,
1042 matchlengthsdata_sm, 9, name);
1047 static void make_offsets_tree(deark *c, struct decompr_internal_state *dctx)
1049 static const char *name = "offsets";
1050 static const u16 offsetsdata[32] = {
1051 0x1001,0x4000,0x4001,0x5004,0x5005,0x5006,0x5007,0x6010,
1052 0x6011,0x6012,0x6013,0x6014,0x6015,0x6016,0x702e,0x702f,
1053 0x7030,0x7031,0x7032,0x7033,0x7034,0x7035,0x7036,0x7037,
1054 0x7038,0x7039,0x703a,0x703b,0x703c,0x703d,0x703e,0x703f
1056 static const u16 offsetsdata120[32] = {
1057 0x1001,0x3000,0x5004,0x5005,0x5006,0x5007,0x6010,0x6011,
1058 0x6012,0x6013,0x6014,0x6015,0x702c,0x702d,0x702e,0x702f,
1059 0x7030,0x7031,0x7032,0x7033,0x7034,0x7035,0x7036,0x7037,
1060 0x7038,0x7039,0x703a,0x703b,0x703c,0x703d,0x703e,0x703f
1063 if(dctx->dparams->v120_cmpr) {
1064 huffman_make_tree_from_u16array(c, &dctx->offsets_tree,
1065 offsetsdata120, 32, name);
1067 else {
1068 huffman_make_tree_from_u16array(c, &dctx->offsets_tree,
1069 offsetsdata, 32, name);
1073 static UI read_pklite_code_using_tree(deark *c, struct decompr_internal_state *dctx,
1074 struct fmtutil_huffman_decoder *ht)
1076 int ret;
1077 fmtutil_huffman_valtype val = 0;
1079 while(1) {
1080 u8 b;
1082 b = pklite_getbit(c, dctx);
1083 if(dctx->d->errflag) goto done;
1085 ret = fmtutil_huffman_decode_bit(ht->bk, ht->cursor, b, &val);
1086 if(ret==1) goto done; // finished the code
1087 if(ret!=2) {
1088 dctx->d->errflag = 1;
1089 goto done;
1092 done:
1093 return val;
1096 static void do_uncompressed_area(deark *c, struct decompr_internal_state *dctx,
1097 struct de_lz77buffer *ringbuf)
1099 i64 len;
1100 i64 i;
1102 dctx->has_uncompressed_area = 1;
1103 len = (i64)dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos);
1104 de_dbg3(c, "uncompressed area at %"I64_FMT", len=%"I64_FMT, dctx->dcmpr_cur_ipos, len);
1106 // TODO: The only files with this feature that I have are registered copies of
1107 // PKZIP.EXE. When decompressed with, e.g., UNP, 9 additional seemingly-random
1108 // bytes of data appear out of nowhere before the uncompressed area. I don't
1109 // know what these bytes are for. They appear in the original file, but not in a
1110 // place that makes any sense.
1111 // I don't know whether this also happens in files made by the consumer versions
1112 // of PKLITE.
1113 // For now, we'll just write 9 dummy bytes here.
1114 for(i=0; i<9; i++) {
1115 de_lz77buffer_add_literal_byte(ringbuf, 0x00);
1118 for(i=0; i<len; i++) {
1119 de_lz77buffer_add_literal_byte(ringbuf, dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos));
1123 // Decompress the main part of the file.
1124 // Uses:
1125 // c->infile
1126 // d->dparams.*
1127 // Returns:
1128 // d->o_dcmpr_code
1129 // d->cmpr_data_endpos
1130 // d->errflag
1131 // d->errmsg_handled
1132 static void do_decompress(deark *c, lctx *d)
1134 struct decompr_internal_state *dctx = NULL;
1135 struct de_lz77buffer *ringbuf = NULL;
1136 u8 b;
1137 UI value_of_long_ml_code;
1138 UI value_of_ml2_0_code;
1139 UI value_of_ml2_1_code = 0xffff;
1140 UI value_of_lit0_code = 0xffff;
1141 UI long_matchlen_bias;
1143 de_dbg(c, "decompressing cmpr code at %"I64_FMT, d->dparams.cmpr_data_pos);
1144 de_dbg_indent(c, 1);
1146 dctx = de_malloc(c, sizeof(struct decompr_internal_state));
1147 dctx->d = d;
1148 dctx->inf = c->infile;
1149 dctx->dparams = &d->dparams;
1151 if(d->dparams.large_cmpr) {
1152 if(d->dparams.v120_cmpr) {
1153 // There are 17 normal codes, and 4 special
1154 value_of_long_ml_code = 17;
1155 value_of_ml2_0_code = value_of_long_ml_code+1;
1156 value_of_ml2_1_code = value_of_long_ml_code+2;
1157 value_of_lit0_code = value_of_long_ml_code+3;
1158 long_matchlen_bias = 20;
1160 else {
1161 // There are 22 normal codes, and 2 special
1162 value_of_long_ml_code = 22;
1163 value_of_ml2_0_code = value_of_long_ml_code+1;
1164 long_matchlen_bias = 25;
1167 else {
1168 if(d->dparams.v120_cmpr) {
1169 // There are 7 normal codes, and 4 special
1170 value_of_long_ml_code = 7;
1171 value_of_ml2_0_code = value_of_long_ml_code+1;
1172 value_of_ml2_1_code = value_of_long_ml_code+2;
1173 value_of_lit0_code = value_of_long_ml_code+3;
1174 long_matchlen_bias = 10;
1176 else {
1177 // There are 7 normal codes, and 2 special
1178 value_of_long_ml_code = 7;
1179 value_of_ml2_0_code = value_of_long_ml_code+1;
1180 long_matchlen_bias = 10;
1184 make_matchlengths_tree(c, dctx);
1185 make_offsets_tree(c, dctx);
1187 d->o_dcmpr_code = dbuf_create_membuf(c, 0, 0);
1188 dbuf_set_length_limit(d->o_dcmpr_code, 1048576);
1189 dbuf_enable_wbuffer(d->o_dcmpr_code);
1191 ringbuf = de_lz77buffer_create(c, 8192);
1192 ringbuf->userdata = (void*)dctx;
1193 ringbuf->writebyte_cb = my_lz77buf_writebytecb;
1195 dctx->dcmpr_cur_ipos = d->dparams.cmpr_data_pos;
1196 dctx->bbll.is_lsb = 1;
1197 de_bitbuf_lowlevel_empty(&dctx->bbll);
1199 fill_bitbuf(c, dctx);
1201 while(1) {
1202 u8 x;
1203 UI len_raw;
1204 UI matchlen;
1205 UI offs_hi_bits = 0;
1206 u8 offs_lo_byte;
1207 u8 offs_have_hi_bits = 0;
1208 UI matchpos;
1210 if(d->errflag) goto after_dcmpr;
1212 x = pklite_getbit(c, dctx);
1213 if(x==0) {
1214 b = dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos);
1215 if(d->dparams.extra_cmpr==1) {
1216 b ^= (u8)(dctx->bbll.nbits_in_bitbuf);
1218 else if(d->dparams.extra_cmpr==2) {
1219 b ^= 0xff;
1221 if(c->debug_level>=3) {
1222 de_dbg3(c, "lit 0x%02x", (UI)b);
1224 de_lz77buffer_add_literal_byte(ringbuf, b);
1225 continue;
1228 len_raw = read_pklite_code_using_tree(c, dctx, dctx->lengths_tree);
1229 if(d->errflag) goto after_dcmpr;
1231 if(len_raw<value_of_long_ml_code) {
1232 matchlen = len_raw+3;
1234 else if(len_raw==value_of_ml2_0_code) {
1235 matchlen = 2;
1236 // Leave offs_hi_bits at 0.
1237 offs_have_hi_bits = 1;
1239 else if(len_raw==value_of_long_ml_code) {
1240 b = dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos);
1242 if(b >= 0xfd) {
1243 if(b==0xfd && d->dparams.large_cmpr) {
1244 do_uncompressed_area(c, dctx, ringbuf);
1245 continue;
1247 if(b==0xfe && d->dparams.large_cmpr) {
1248 // (segment separator) Just a no-op?
1249 de_dbg3(c, "code 0xfe");
1250 continue;
1252 if(b==0xff) {
1253 de_dbg3(c, "stop code");
1254 goto after_dcmpr; // Normal completion
1256 de_err(c, "Unexpected code (0x%02x) or unsupported feature", (UI)b);
1257 d->errflag = 1;
1258 d->errmsg_handled = 1;
1259 goto after_dcmpr;
1261 matchlen = (UI)b+long_matchlen_bias;
1263 else if(len_raw==value_of_lit0_code) {
1264 if(c->debug_level>=3) {
1265 de_dbg3(c, "lit 0x00 (special)");
1267 de_lz77buffer_add_literal_byte(ringbuf, 0x00);
1268 continue;
1270 else if(len_raw==value_of_ml2_1_code) {
1271 matchlen = 2;
1272 offs_hi_bits = 1;
1273 offs_have_hi_bits = 1;
1275 else {
1276 d->errflag = 1;
1277 goto done;
1280 if(!offs_have_hi_bits) {
1281 offs_hi_bits = read_pklite_code_using_tree(c, dctx, dctx->offsets_tree);
1284 offs_lo_byte = dbuf_getbyte_p(dctx->inf, &dctx->dcmpr_cur_ipos);
1285 offs_lo_byte ^= d->dparams.offset_xor_key;
1286 if(d->errflag) goto after_dcmpr;
1288 matchpos = (offs_hi_bits<<8) | (UI)offs_lo_byte;
1290 if(c->debug_level>=3) {
1291 de_dbg3(c, "match pos=%u len=%u", matchpos, matchlen);
1294 // PKLITE confirmed to use distances 1 to 8191. Have not observed matchpos=0.
1295 // Have not observed it to use distances larger than the number of bytes
1296 // decompressed so far.
1297 if(matchpos==0 || (i64)matchpos>dctx->o_dcmpr_code_nbytes_written) {
1298 de_err(c, "Bad or unsupported compressed data (dist=%u, expected 1 to %"I64_FMT")",
1299 matchpos, dctx->o_dcmpr_code_nbytes_written);
1300 d->errflag = 1;
1301 d->errmsg_handled = 1;
1302 goto after_dcmpr;
1304 de_lz77buffer_copy_from_hist(ringbuf,
1305 (UI)(ringbuf->curpos-matchpos), matchlen);
1308 after_dcmpr:
1309 if(!d->o_dcmpr_code) goto done;
1310 dbuf_flush(d->o_dcmpr_code);
1312 if(!d->errflag) {
1313 d->cmpr_data_endpos = dctx->dcmpr_cur_ipos;
1314 de_dbg(c, "cmpr data end: %"I64_FMT, d->cmpr_data_endpos);
1315 de_dbg(c, "decompressed %"I64_FMT" bytes to %"I64_FMT,
1316 d->cmpr_data_endpos-d->dparams.cmpr_data_pos, d->o_dcmpr_code->len);
1318 if(dctx->has_uncompressed_area) {
1319 de_warn(c, "This file has an \"uncompressed area\", and might not be "
1320 "decompressed correctly.");
1324 done:
1325 if(dctx) {
1326 fmtutil_huffman_destroy_decoder(c, dctx->lengths_tree);
1327 fmtutil_huffman_destroy_decoder(c, dctx->offsets_tree);
1328 de_free(c, dctx);
1330 de_dbg_indent(c, -1);
1333 #define MAX_RELOCS 65535
1335 static void do_read_reloc_table_short(deark *c, lctx *d, i64 pos1, i64 len)
1337 i64 reloc_count = 0;
1338 i64 pos = pos1;
1339 i64 endpos = pos1+len;
1340 int saved_indent_level;
1342 de_dbg_indent_save(c, &saved_indent_level);
1343 de_dbg(c, "reading 'short' reloc table at %"I64_FMT, pos1);
1344 de_dbg_indent(c, 1);
1346 while(1) {
1347 UI i;
1348 UI count;
1349 i64 seg, offs;
1351 if(pos+1 > endpos) {
1352 d->errflag = 1;
1353 goto done;
1355 count = (UI)de_getbyte_p(&pos);
1356 if(count==0) {
1357 de_dbg2(c, "end-of-data");
1358 break; // normal completion
1360 de_dbg2(c, "count: %u", count);
1362 if(reloc_count+count > MAX_RELOCS) {
1363 d->errflag = 1;
1364 goto done;
1366 if(pos+2+(i64)count*2 > endpos) {
1367 d->errflag = 1;
1368 goto done;
1370 seg = de_getu16le_p(&pos);
1371 de_dbg2(c, "seg: 0x%04x", (UI)seg);
1372 de_dbg_indent(c, 1);
1373 for(i=0; i<count; i++) {
1374 if(reloc_count>=MAX_RELOCS) {
1375 d->errflag = 1;
1376 goto done;
1378 offs = de_getu16le_p(&pos);
1379 de_dbg2(c, "offs: 0x%04x", (UI)offs);
1380 dbuf_writeu16le(d->o_reloc_table, offs);
1381 dbuf_writeu16le(d->o_reloc_table, seg);
1382 reloc_count++;
1384 de_dbg_indent(c, -1);
1387 d->reloc_tbl_endpos = pos;
1388 de_dbg(c, "cmpr reloc table ends at %"I64_FMT", entries=%d", d->reloc_tbl_endpos,
1389 (int)reloc_count);
1391 done:
1392 de_dbg_indent_restore(c, saved_indent_level);
1395 static void do_read_reloc_table_long(deark *c, lctx *d, i64 pos1, i64 len)
1397 i64 reloc_count = 0;
1398 i64 pos = pos1;
1399 i64 seg = 0;
1400 i64 endpos = pos1+len;
1401 int saved_indent_level;
1403 de_dbg_indent_save(c, &saved_indent_level);
1404 de_dbg(c, "reading 'long%s' reloc table at %"I64_FMT,
1405 (d->scramble_method==2?"/reversed":""), pos1);
1406 de_dbg_indent(c, 1);
1407 while(1) {
1408 UI i;
1409 UI count;
1410 i64 offs;
1412 if(pos+2 > endpos) {
1413 d->errflag = 1;
1414 goto done;
1417 count = (UI)de_getu16le_p(&pos);
1418 if(count==0xffff) {
1419 de_dbg2(c, "end-of-data");
1420 break; // normal completion
1422 de_dbg2(c, "count: %u", count);
1424 if(seg > 0xffff) {
1425 d->errflag = 1;
1426 goto done;
1428 de_dbg2(c, "seg: 0x%04x", (UI)seg);
1430 if(reloc_count+count > MAX_RELOCS) {
1431 d->errflag = 1;
1432 goto done;
1434 if(pos+(i64)count*2 > endpos) {
1435 d->errflag = 1;
1436 goto done;
1439 de_dbg_indent(c, 1);
1440 for(i=0; i<count; i++) {
1441 if(d->scramble_method==2) {
1442 offs = de_getu16be_p(&pos);
1444 else {
1445 offs = de_getu16le_p(&pos);
1447 de_dbg2(c, "offs: 0x%04x", (UI)offs);
1448 dbuf_writeu16le(d->o_reloc_table, offs);
1449 dbuf_writeu16le(d->o_reloc_table, seg);
1450 reloc_count++;
1452 de_dbg_indent(c, -1);
1453 seg += 0x0fff;
1456 d->reloc_tbl_endpos = pos;
1457 de_dbg(c, "cmpr reloc table ends at %"I64_FMT", entries=%d", d->reloc_tbl_endpos,
1458 (int)reloc_count);
1460 done:
1461 de_dbg_indent_restore(c, saved_indent_level);
1464 static void do_read_reloc_table(deark *c, lctx *d)
1466 i64 reloc_tbl_len; // number of bytes available for encoded table
1468 d->o_reloc_table = dbuf_create_membuf(c, 0, 0);
1470 reloc_tbl_len = d->cmpr_data_area_endpos - 8 - d->cmpr_data_endpos;
1472 if(d->dparams.extra_cmpr) {
1473 do_read_reloc_table_long(c, d, d->cmpr_data_endpos, reloc_tbl_len);
1475 else {
1476 do_read_reloc_table_short(c, d, d->cmpr_data_endpos, reloc_tbl_len);
1479 if(d->errflag) {
1480 de_err(c, "Failed to decode relocation table");
1481 d->errmsg_handled = 1;
1485 static void find_min_mem_needed(deark *c, lctx *d, i64 *pminmem)
1487 i64 pos;
1488 i64 n;
1489 u8 b;
1491 if(d->data_before_decoder) {
1492 // File from a registered v1.00beta?
1493 return;
1496 pos = d->ei->entry_point;
1497 b = de_getbyte_p(&pos);
1498 if(b==0x50) {
1499 b = de_getbyte_p(&pos);
1501 if(b==0xb8) {
1502 // This is not always exactly right. Not sure that's possible.
1503 n = de_getu16le_p(&pos);
1504 n = (n<<4) + 0x100 - d->o_dcmpr_code->len;
1505 if(n>=0) {
1506 *pminmem = (n+0xf)>>4;
1511 static void do_write_data_only(deark *c, lctx *d)
1513 if(!d->o_dcmpr_code) return;
1514 dbuf_create_file_from_slice(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, "bin", NULL, 0);
1517 // Generate the decompressed file
1518 static void do_write_dcmpr(deark *c, lctx *d)
1520 dbuf *outf = NULL;
1521 i64 amt_to_copy;
1523 if(d->errflag || !d->o_ei || !d->o_orig_header || !d->o_dcmpr_code || !d->o_reloc_table) return;
1524 de_dbg(c, "generating decompressed EXE file");
1525 de_dbg_indent(c, 1);
1527 outf = dbuf_create_output_file(c, "exe", NULL, 0);
1528 d->wrote_exe = 1;
1530 // Write the original header, up to the relocation table
1531 amt_to_copy = de_min_int(d->o_orig_header->len, d->o_ei->reloc_table_pos);
1532 dbuf_copy(d->o_orig_header, 0, amt_to_copy, outf);
1533 dbuf_truncate(outf, d->o_ei->reloc_table_pos);
1535 // Write the relocation table
1536 dbuf_copy(d->o_reloc_table, 0, d->o_reloc_table->len, outf);
1538 // Pad up to the start of DOS code.
1539 // (Note that PKLITE does not record data between the end of the relocation
1540 // table, and the start of DOS code, so we can't reconstruct that.)
1541 dbuf_truncate(outf, d->o_ei->start_of_dos_code);
1543 // Write the decompressed program code
1544 dbuf_copy(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, outf);
1546 // "Overlay" segment
1547 if(d->ei->overlay_len>0) {
1548 dbuf_copy(c->infile, d->ei->end_of_dos_code, d->ei->overlay_len, outf);
1551 dbuf_close(outf);
1552 de_dbg_indent(c, -1);
1555 // Try to read the copy of the original EXE header, into d->o_orig_header.
1556 // Returns 0 if it doesn't exist, or if it seems bad.
1557 static int read_orig_header(deark *c, lctx *d)
1559 i64 orig_hdr_len;
1560 i64 orig_reloc_pos;
1561 i64 n1, n2;
1562 i64 dcmpr_bytes_expected;
1563 i64 orig_hdr_pos;
1564 const char *name;
1565 enum ohdisp_enum {
1566 OHDISP_MISSING_E, OHDISP_MISSING, OHDISP_PRESENT, OHDISP_BAD
1567 } ohdisp;
1569 if(d->dparams.extra_cmpr) {
1570 ohdisp = OHDISP_MISSING_E;
1571 goto done;
1573 orig_hdr_pos = d->ei->reloc_table_pos + 4*d->ei->num_relocs;
1575 orig_hdr_len = d->ei->start_of_dos_code - orig_hdr_pos; // tentative
1576 if(orig_hdr_len < 26) {
1577 ohdisp = OHDISP_MISSING;
1578 goto done;
1581 // Peek at the reloc table offs field to figure out how much to read
1582 orig_reloc_pos = de_getu16le(orig_hdr_pos + 22);
1583 if(orig_reloc_pos>=28 && orig_reloc_pos<2+orig_hdr_len) {
1584 orig_hdr_len = orig_reloc_pos-2;
1587 de_dbg(c, "orig. hdr: at %"I64_FMT", len=(2+)%"I64_FMT, orig_hdr_pos, orig_hdr_len);
1589 n1 = de_getu16le(orig_hdr_pos); // len of final block
1590 n2 = de_getu16le(orig_hdr_pos+2); // numBlocks
1591 if(n1>511 || n2==0) {
1592 ohdisp = OHDISP_BAD;
1593 goto done;
1596 dbuf_copy(c->infile, orig_hdr_pos, orig_hdr_len, d->o_orig_header);
1598 fmtutil_collect_exe_info(c, d->o_orig_header, d->o_ei);
1599 if(d->o_ei->reloc_table_pos<28) {
1600 d->o_ei->reloc_table_pos = 28;
1603 if((d->o_ei->regSS != d->footer.regSS) ||
1604 (d->o_ei->regSP != d->footer.regSP) ||
1605 (d->o_ei->regCS != d->footer.regCS) ||
1606 (d->o_ei->regIP != d->footer.regIP))
1608 ohdisp = OHDISP_BAD;
1609 goto done;
1612 if(d->o_ei->num_relocs != (d->o_reloc_table->len / 4)) {
1613 ohdisp = OHDISP_BAD;
1614 goto done;
1617 dcmpr_bytes_expected = d->o_ei->end_of_dos_code - d->o_ei->start_of_dos_code;
1619 if(d->o_dcmpr_code->len != dcmpr_bytes_expected) {
1620 de_warn(c, "Expected %"I64_FMT" decompressed bytes, got %"I64_FMT, dcmpr_bytes_expected,
1621 d->o_dcmpr_code->len);
1624 ohdisp = OHDISP_PRESENT;
1626 done:
1627 switch(ohdisp) {
1628 case OHDISP_PRESENT: name="present"; break;
1629 case OHDISP_MISSING_E: name="n/a"; break;
1630 case OHDISP_MISSING: name="missing"; break;
1631 default: name="bad"; break;
1633 de_dbg(c, "copy of orig hdr: %s", name);
1634 if(ohdisp==OHDISP_BAD) {
1635 de_warn(c, "Original header seems bad. Ignoring it.");
1637 return (ohdisp==OHDISP_PRESENT);
1640 static void reconstruct_header(deark *c, lctx *d)
1642 i64 num_relocs;
1643 const i64 reloc_table_start = 28;
1644 i64 start_of_dos_code;
1645 i64 end_of_dos_code;
1646 i64 minmem; // in 16-byte units
1647 i64 maxmem;
1649 // "MZ" should already be written
1650 if(d->o_orig_header->len!=2 || !d->footer_pos) {
1651 d->errflag = 1;
1652 return;
1655 // By default, keep the same values as the container. These are likely to
1656 // be higher than the original, but it's better to be too high than too low.
1657 minmem = de_getu16le(10);
1658 maxmem = de_getu16le(12);
1659 if(maxmem==0) {
1660 // Unlikely, but could possibly happen for beta files with the
1661 // load-high option
1662 maxmem = 65535;
1664 find_min_mem_needed(c, d, &minmem);
1665 // TODO: For maxmem, it may be possible to do better.
1666 if(maxmem<minmem) maxmem = minmem;
1668 num_relocs = d->o_reloc_table->len / 4;
1669 start_of_dos_code = de_pad_to_n(reloc_table_start + num_relocs*4, 16);
1670 end_of_dos_code = start_of_dos_code + d->o_dcmpr_code->len;
1671 dbuf_writeu16le(d->o_orig_header, end_of_dos_code%512);
1672 dbuf_writeu16le(d->o_orig_header, (end_of_dos_code+511)/512);
1673 dbuf_writeu16le(d->o_orig_header, num_relocs);
1674 dbuf_writeu16le(d->o_orig_header, start_of_dos_code/16);
1675 dbuf_writeu16le(d->o_orig_header, minmem);
1676 dbuf_writeu16le(d->o_orig_header, maxmem);
1677 dbuf_writei16le(d->o_orig_header, d->footer.regSS);
1678 dbuf_writeu16le(d->o_orig_header, d->footer.regSP);
1679 dbuf_writeu16le(d->o_orig_header, 0); // checksum
1680 dbuf_writeu16le(d->o_orig_header, d->footer.regIP);
1681 dbuf_writei16le(d->o_orig_header, d->footer.regCS);
1682 dbuf_writeu16le(d->o_orig_header, reloc_table_start);
1683 dbuf_writeu16le(d->o_orig_header, 0); // overlay indicator
1685 fmtutil_collect_exe_info(c, d->o_orig_header, d->o_ei);
1688 // Either copy the original header, or if we can't do that,
1689 // construct a new EXE header from other information.
1690 // Creates and populates d->o_orig_header, d->o_ei
1691 static void acquire_new_exe_header(deark *c, lctx *d)
1693 int ret;
1695 d->o_ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
1696 d->o_orig_header = dbuf_create_membuf(c, 0, 0);
1697 dbuf_writeu16le(d->o_orig_header, 0x5a4d); // "MZ"
1699 ret = read_orig_header(c, d);
1700 if(ret) goto done; // If success, we're done. Otherwise try other method.
1702 dbuf_truncate(d->o_orig_header, 2);
1703 reconstruct_header(c, d);
1704 done:
1708 static void do_pklite_exe(deark *c, lctx *d)
1710 d->raw_mode = (u8)de_get_ext_option_bool(c, "pklite:raw", 0xff);
1712 fmtutil_collect_exe_info(c, c->infile, d->ei);
1714 de_read(d->epbytes, d->ei->entry_point, EPBYTES_LEN);
1715 d->intro_class_fmtutil = fmtutil_detect_pklite_by_exe_ep(c, d->epbytes, EPBYTES_LEN, 0xff);
1717 if(d->intro_class_fmtutil==0) {
1718 de_err(c, "Not a PKLITE-compressed file, or not a known type");
1719 d->errflag = 1;
1720 d->errmsg_handled = 1;
1721 goto done;
1724 de_declare_fmt(c, "PKLITE-compressed EXE");
1726 do_read_version_info(c, d, 28);
1728 do_analyze_pklite_exe(c, d);
1729 if(d->errflag) goto done;
1731 do_decompress(c, d);
1732 dbuf_flush(d->o_dcmpr_code);
1733 if(d->errflag) goto done;
1734 d->dcmpr_ok = 1;
1736 if(d->raw_mode==1) {
1737 do_write_data_only(c, d);
1738 goto done;
1741 do_read_reloc_table(c, d);
1742 if(d->errflag) goto done;
1744 d->footer_pos = d->reloc_tbl_endpos;
1745 if(d->footer_pos!=0) {
1746 i64 footer_capacity;
1748 footer_capacity = d->cmpr_data_area_endpos - d->footer_pos;
1749 de_dbg(c, "footer at %"I64_FMT", len=%"I64_FMT, d->footer_pos, footer_capacity);
1750 de_dbg_indent(c, 1);
1752 if(c->debug_level>=3) {
1753 de_dbg_hexdump(c, c->infile, d->footer_pos, footer_capacity, 32, "footer", 0);
1756 // Footer is usually 8 bytes, but there can be up to 15 extra bytes, to
1757 // accommodate the checksum feature.
1758 if(footer_capacity < 8) {
1759 d->footer_pos = 0; // Error
1761 else if(footer_capacity > 8+15) {
1762 de_warn(c, "Unexpected data at end of code segment (near %"I64_FMT")",
1763 d->footer_pos+8);
1766 if(d->footer_pos!=0) {
1767 d->footer.regSS = de_geti16le(d->footer_pos);
1768 d->footer.regSP = de_getu16le(d->footer_pos+2);
1769 d->footer.regCS = de_geti16le(d->footer_pos+4);
1770 d->footer.regIP = de_getu16le(d->footer_pos+6);
1773 de_dbg_indent(c, -1);
1776 if(d->footer_pos==0) {
1777 d->errflag = 1;
1778 goto done;
1781 acquire_new_exe_header(c, d);
1782 if(d->errflag) goto done;
1784 do_write_dcmpr(c, d);
1785 if(d->errflag) goto done;
1787 de_stdwarn_execomp(c);
1788 if(d->has_psp_sig) {
1789 de_warn(c, "This file has a tamper-detection feature (PSP signature \"%s\"). "
1790 "It might not run correctly when decompressed.",
1791 ((d->psp_sig_type==2) ? "pk" : "PK"));
1793 // TODO: It is possible to patch the decompressed file, so that it stands
1794 // a chance of passing this protection check. But it's not easy.
1797 done:
1801 static void analyze_intro_COM(deark *c, lctx *d)
1803 int saved_indent_level;
1805 de_dbg_indent_save(c, &saved_indent_level);
1807 de_dbg(c, "intro at 0");
1808 de_dbg_indent(c, 1);
1810 if(pkl_memmatch(&d->epbytes[0],
1811 (const u8*)"\xb8??\xba??\x3b\xc4\x73", 9, '?', 0))
1813 d->copier_class = INTRO_CLASS_COM_100;
1814 d->position2 = 10;
1816 else if(pkl_memmatch(&d->epbytes[0],
1817 (const u8*)"\x50\xb8??\xba??\x3b\xc4\x73", 10, '?', 0))
1819 d->copier_class = INTRO_CLASS_COM_150;
1820 d->position2 = 11;
1822 else if(pkl_memmatch(&d->epbytes[0],
1823 (const u8*)"\xba??\xa1\x02\x00\x2d??\x8c\xcb??????\x77", 18, '?', 0))
1825 d->copier_class = INTRO_CLASS_COM_BETA;
1826 d->position2 = read_and_follow_1byte_jump(d, 18);
1829 if(!d->position2) {
1830 d->errflag = 1;
1832 de_dbg_indent_restore(c, saved_indent_level);
1835 static void analyze_copier_COM(deark *c, lctx *d)
1837 int saved_indent_level;
1838 i64 pos = d->position2;
1839 i64 pos_of_decompr_pos_field = 0;
1841 de_dbg_indent_save(c, &saved_indent_level);
1842 if(pos<0 || pos+100>EPBYTES_LEN) goto done;
1844 de_dbg(c, "copier at %u", (UI)pos);
1845 de_dbg_indent(c, 1);
1847 if(pkl_memmatch(&d->epbytes[pos],
1848 (const u8*)"\x8b\xc4\x2d??\x25\xf0\xff\x8b\xf8\xb9??\xbe", 14, '?', 0))
1850 d->copier_class = COPIER_CLASS_COM_100;
1851 pos_of_decompr_pos_field = pos+14;
1853 else if(pkl_memmatch(&d->epbytes[pos],
1854 (const u8*)"\x8b\xc4\x2d??\x90\x25\xf0\xff\x8b\xf8\xb9??\x90\xbe", 16, '?', 0))
1856 d->copier_class = COPIER_CLASS_COM_115;
1857 pos_of_decompr_pos_field = pos+16;
1859 else if(pkl_memmatch(&d->epbytes[pos],
1860 (const u8*)"\xfa\xbc\x00\x02\x8e\xd0\xfb", 7, '?', 0))
1862 d->copier_class = COPIER_CLASS_COM_BETA;
1863 d->decompr_pos = pos+24;
1866 if(pos_of_decompr_pos_field) {
1867 d->decompr_pos = de_getu16le_direct(&d->epbytes[pos_of_decompr_pos_field]) - 0x100;
1870 done:
1871 if(!d->decompr_pos) {
1872 d->errflag = 1;
1874 de_dbg_indent_restore(c, saved_indent_level);
1877 static void analyze_decompressor_COM(deark *c, lctx *d)
1879 int saved_indent_level;
1880 i64 pos = d->decompr_pos;
1881 i64 keypos = 0;
1883 de_dbg_indent_save(c, &saved_indent_level);
1884 if(pos<0 || pos+100>EPBYTES_LEN) goto done;
1886 de_dbg(c, "decompressor at %u", (UI)pos);
1887 de_dbg_indent(c, 1);
1889 if(pkl_memmatch(&d->epbytes[pos],
1890 (const u8*)"\xfd\x8b\xf8\x4f\x4f\xbe", 6, '?', 0))
1892 d->decompr_class = DECOMPR_CLASS_COM_100;
1893 keypos = pos+6;
1895 else if(pkl_memmatch(&d->epbytes[pos],
1896 (const u8*)"\xfd\xbe??\x03\xf2\x8b\xfa\x4f\x4f", 10, '?', 0))
1898 d->decompr_class = DECOMPR_CLASS_COM_BETA;
1899 keypos = pos+2;
1902 if(keypos) {
1903 d->dparams.cmpr_data_pos = de_getu16le_direct(&d->epbytes[keypos]) + 2 - 0x100;
1906 done:
1907 if(d->dparams.cmpr_data_pos<1 || d->dparams.cmpr_data_pos>c->infile->len) {
1908 d->dparams.cmpr_data_pos = 0;
1909 d->errflag = 1;
1911 de_dbg_indent_restore(c, saved_indent_level);
1914 // This function's only real purpose is to set d->dparams.cmpr_data_pos.
1915 static void do_analyze_pklite_com(deark *c, lctx *d)
1917 d->dparams.large_cmpr = 0;
1918 d->dparams.extra_cmpr = 0;
1919 d->dparams.v120_cmpr = 0;
1921 analyze_intro_COM(c, d);
1922 if(d->errflag) goto done;
1924 analyze_copier_COM(c, d);
1925 if(d->errflag) goto done;
1927 analyze_decompressor_COM(c, d);
1929 done:
1933 static int pklite_com_has_copyright_string(dbuf *f, i64 verpos)
1935 u8 buf[4];
1937 if(verpos==38) {
1938 return !dbuf_memcmp(f, verpos+2, (const void*)"PK Copyr", 8);
1940 dbuf_read(f, buf, verpos+2, sizeof(buf));
1942 if((buf[0]=='P') && (buf[1]=='K' || buf[1]=='k') &&
1943 (buf[2]=='L' || buf[2]=='l') && (buf[3]=='I' || buf[3]=='i'))
1945 return 1;
1947 return 0;
1950 static int detect_pklite_com_quick(dbuf *f, i64 *pverpos)
1952 u8 b[10];
1954 dbuf_read(f, b, 0, sizeof(b));
1955 if(b[0]==0xb8 && b[3]==0xba && b[6]==0x3b && b[7]==0xc4) {
1956 if(b[9]==0x67) { // Probably v1.00-1.14
1957 *pverpos = 44;
1958 return 1;
1960 else if(b[9]==0x69) { // Probably v1.15 (usually mislabeled as 1.14)
1961 *pverpos = 46;
1962 return 1;
1965 else if(b[0]==0x50 && b[1]==0xb8 && b[4]==0xba && b[7]==0x3b) {
1966 *pverpos = 46; // v1.50-2.01
1967 return 1;
1969 else if(b[0]==0xba && b[3]==0xa1 && b[6]==0x2d && b[7]==0x20) {
1970 *pverpos = 36; // v1.00beta
1971 return 1;
1973 return 0;
1976 static void read_and_process_com_version_number(deark *c, lctx *d, i64 verpos)
1978 de_dbg(c, "version number pos: %"I64_FMT, verpos);
1979 do_read_version_info(c, d, verpos);
1982 static void report_detected_version_number_com(deark *c, lctx *d)
1984 const char *s = "?";
1986 if(d->dparams.cmpr_data_pos==500) {
1987 s = "1.00beta";
1989 else if(d->dparams.cmpr_data_pos==448) {
1990 switch(de_getbyte(260)) {
1991 case 0x1d: s = "1.00-1.03"; break;
1992 case 0x1c: s = "1.05-1.14"; break;
1993 default: s = "1.00-1.14"; break;
1996 else if(d->dparams.cmpr_data_pos==450) {
1997 s = "1.15";
1999 else if(d->dparams.cmpr_data_pos==464) {
2000 s = "1.50-2.01";
2003 de_dbg(c, "detected PKLITE version: %s", s);
2006 static void do_pklite_com(deark *c, lctx *d)
2008 i64 verpos = 0;
2010 if(!detect_pklite_com_quick(c->infile, &verpos)) {
2011 de_err(c, "Not a known/supported PKLITE format");
2012 goto done;
2015 d->is_com = 1;
2016 d->ei->f = c->infile;
2017 de_declare_fmt(c, "PKLITE-compressed COM");
2019 de_read(d->epbytes, 0, EPBYTES_LEN);
2021 read_and_process_com_version_number(c, d, verpos);
2023 if(c->module_disposition==DE_MODDISP_AUTODETECT) {
2024 // Check if the user requested that we not do executable decompression.
2025 // This feels like a hack. I'm not sure how it should work.
2026 if(de_get_ext_option_bool(c, "execomp", 1) == 0) {
2027 goto done;
2031 do_analyze_pklite_com(c, d);
2033 report_detected_version_number_com(c, d);
2035 if(d->errflag || d->dparams.cmpr_data_pos==0) {
2036 de_err(c, "Unsupported PKLITE format version");
2037 goto done;
2040 do_decompress(c, d);
2041 if(!d->o_dcmpr_code) goto done;
2042 dbuf_flush(d->o_dcmpr_code);
2043 if(d->errflag) goto done;
2044 d->dcmpr_ok = 1;
2046 dbuf_create_file_from_slice(d->o_dcmpr_code, 0, d->o_dcmpr_code->len, "com", NULL, 0);
2047 de_stdwarn_execomp(c);
2049 done:
2053 static void de_run_pklite(deark *c, de_module_params *mparams)
2055 lctx *d = NULL;
2056 u8 buf[2];
2058 d = de_malloc(c, sizeof(lctx));
2059 d->ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
2061 de_read(buf, 0, 2);
2062 if((buf[0]=='M' && buf[1]=='Z') || (buf[0]=='Z' && buf[1]=='M')) {
2063 do_pklite_exe(c, d);
2065 else {
2066 do_pklite_com(c, d);
2069 if(d) {
2070 if(d->errflag && !d->errmsg_handled) {
2071 de_err(c, "PKLITE decompression failed");
2074 if(!d->is_com && d->raw_mode==0xff && d->dcmpr_ok && !d->wrote_exe) {
2075 de_info(c, "Note: Try \"-opt pklite:raw\" to decompress the raw data");
2078 dbuf_close(d->o_orig_header);
2079 dbuf_close(d->o_reloc_table);
2080 dbuf_close(d->o_dcmpr_code);
2081 de_free(c, d->o_ei);
2082 de_free(c, d->ei);
2083 de_free(c, d);
2087 // By design, only detects COM format.
2088 // EXE files are handled by the "exe" module by default.
2089 static int de_identify_pklite(deark *c)
2091 i64 verpos;
2093 if(c->infile->len>65280) return 0;
2094 if(detect_pklite_com_quick(c->infile, &verpos)) {
2095 if(pklite_com_has_copyright_string(c->infile, verpos)) {
2096 return 100;
2098 // TODO: False positives may be possible. Maybe we should be more
2099 // discriminating.
2100 return 15;
2102 return 0;
2105 static void de_help_pklite(deark *c)
2107 de_msg(c, "-opt pklite:raw : Instead of an EXE file, write raw decompressed data");
2110 void de_module_pklite(deark *c, struct deark_module_info *mi)
2112 mi->id = "pklite";
2113 mi->desc = "PKLITE-compressed EXE/COM";
2114 mi->run_fn = de_run_pklite;
2115 mi->identify_fn = de_identify_pklite;
2116 mi->help_fn = de_help_pklite;