show_gmr: Improved identification
[deark.git] / modules / exectext.c
blob27ef691b0a4a728c795db3bd928641c899a5d9bd
1 // This file is part of Deark.
2 // Copyright (C) 2024 Jason Summers
3 // See the file COPYING for terms of use.
5 // TXT2COM, etc.
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_txt2com);
10 DE_DECLARE_MODULE(de_module_show_gmr);
11 DE_DECLARE_MODULE(de_module_asc2com);
12 DE_DECLARE_MODULE(de_module_doc2com);
13 DE_DECLARE_MODULE(de_module_doc2com_dkn);
14 DE_DECLARE_MODULE(de_module_gtxt);
15 DE_DECLARE_MODULE(de_module_readmake);
17 typedef struct localctx_exectext {
18 de_encoding input_encoding;
19 UI fmtcode;
20 u8 opt_fmtconv;
21 u8 opt_encconv;
22 u8 errflag;
23 u8 need_errmsg;
24 u8 found_text;
25 u8 is_encrypted;
26 u8 allow_tlen_0;
27 i64 tpos;
28 i64 tlen;
29 u8 chartypes[32]; // 1=printable, 2=control
30 } lctx;
32 static void exectext_check_tpos(deark *c, lctx *d)
34 if(d->tpos<1 || d->tlen<0 || d->tpos+d->tlen>c->infile->len) {
35 d->errflag = 1;
36 d->need_errmsg = 1;
40 #if 0
41 static void exectext_extract_verbatim(deark *c, lctx *d)
43 dbuf *outf = NULL;
45 exectext_check_tpos(c, d);
46 if(d->errflag) goto done;
48 outf = dbuf_create_output_file(c, "txt", NULL, 0);
49 dbuf_copy(c->infile, d->tpos, d->tlen, outf);
51 done:
52 dbuf_close(outf);
54 #endif
56 // For byte values 0-31, leaves them or converts them, depending on the flags
57 // in d->chartypes[].
58 // Note for TXT2COM:
59 // - dbuf_copy_slice_convert_to_utf8() in HYBRID mode doesn't quite do what
60 // we want, mainly because it treats 0x00 and 0x09 as controls, while
61 // TXT2COM treats them as graphics.
62 // - Early versions of TXT2COM stop when they see 0x1a, but later versions don't.
63 // We behave like later versions.
64 // - We might not handle an unpaired LF or CR byte exactly like TXT2COM does.
65 static void exectext_convert_and_write(deark *c, lctx *d, dbuf *outf)
67 struct de_encconv_state es;
68 i64 endpos = d->tpos + d->tlen;
69 i64 pos;
71 de_encconv_init(&es, DE_EXTENC_MAKE(d->input_encoding, DE_ENCSUBTYPE_PRINTABLE));
72 if(c->write_bom) {
73 dbuf_write_uchar_as_utf8(outf, 0xfeff);
76 pos = d->tpos;
77 while(pos < endpos) {
78 u8 x;
80 x = de_getbyte_p(&pos);
81 if(x<32 && d->chartypes[x]!=0) {
82 dbuf_writebyte(outf, x);
84 else {
85 de_rune u;
87 u = de_char_to_unicode_ex((i32)x, &es);
88 dbuf_write_uchar_as_utf8(outf, u);
93 // Extract or convert in the typical way.
94 // - Validates the source position
95 // - Respects d->input_encoding if relevant
96 // - Respects d->opt_encconv and d->chartypes[]
97 static void exectext_extract_default(deark *c, lctx *d)
99 dbuf *outf = NULL;
101 if(d->errflag) goto done;
102 if(d->tpos<=0 || d->tlen<0 || d->tpos+d->tlen>c->infile->len ||
103 (d->tlen==0 && !d->allow_tlen_0))
105 d->errflag = 1;
106 d->need_errmsg = 1;
107 goto done;
110 outf = dbuf_create_output_file(c, "txt", NULL, 0);
111 if(d->opt_encconv) {
112 dbuf_enable_wbuffer(outf);
113 exectext_convert_and_write(c, d, outf);
115 else {
116 dbuf_copy(c->infile, d->tpos, d->tlen, outf);
119 done:
120 dbuf_close(outf);
123 static void txt2com_read_textpos(deark *c, lctx *d, i64 pos1)
125 i64 pos = pos1;
126 i64 pos_of_tlen;
128 de_dbg(c, "pos of tlen pointer: %"I64_FMT, pos1);
130 pos_of_tlen = de_getu16le_p(&pos) - 256;
131 de_dbg(c, "pos of tlen: %"I64_FMT, pos_of_tlen);
133 pos += 2;
135 d->tpos = de_getu16le_p(&pos) - 256;
136 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
138 d->tlen = de_getu16le(pos_of_tlen);
139 de_dbg(c, "tlen: %"I64_FMT, d->tlen);
142 // For all TXT2COM versions, and TXT2RES v1.0.
143 static void txt2com_search1(deark *c, lctx *d)
145 #define TXT2COM_BUF_POS1 700
146 #define TXT2COM_BUF_LEN1 3000
147 u8 *mem = NULL;
148 i64 foundpos;
149 int ret;
151 mem = de_malloc(c, TXT2COM_BUF_LEN1);
152 de_read(mem, TXT2COM_BUF_POS1, TXT2COM_BUF_LEN1);
153 ret = de_memsearch_match(mem, TXT2COM_BUF_LEN1,
154 (const u8*)"\x8b\xd8\xb4\x40\x8b\x0e??\x8d\x16??\xcd\x21\xb4\x3e", 16,
155 '?', &foundpos);
156 if(!ret) goto done;
157 d->found_text = 1;
158 txt2com_read_textpos(c, d, TXT2COM_BUF_POS1+foundpos+6);
160 done:
161 de_free(c, mem);
164 // For:
165 // * TXT2RES v2.03 (= code variant 1)
166 // * TXT2RES v2.06 (= code variant 1)
167 // * TXT2RES v2.10 (= code variant 1)
168 // * TXT2PAS v2.03 (= code variant 2)
169 // * TXT2PAS v2.06 (= code variant 3)
170 // * TXT2PAS v2.10 (= code variant 3)
171 // The code variants have enough common bytes that we try to get away with
172 // only doing a single search.
173 static void txt2com_search2(deark *c, lctx *d)
175 #define TXT2COM_BUF_POS2 7500
176 #define TXT2COM_BUF_LEN2 4000
177 u8 *mem = NULL;
178 i64 foundpos;
179 int ret;
181 mem = de_malloc(c, TXT2COM_BUF_LEN2);
182 de_read(mem, TXT2COM_BUF_POS2, TXT2COM_BUF_LEN2);
183 ret = de_memsearch_match(mem, TXT2COM_BUF_LEN2,
184 (const u8*)"\xcd?\xa1??\xd1\xe0\x03\x06??\x8d???\x03", 16,
185 '?', &foundpos);
186 if(!ret) goto done;
187 d->found_text = 1;
188 txt2com_read_textpos(c, d, TXT2COM_BUF_POS2+foundpos+9);
190 done:
191 de_free(c, mem);
194 static void destroy_lctx(deark *c, lctx *d)
196 if(!d) return;
197 de_free(c, d);
200 static void de_run_txt2com(deark *c, de_module_params *mparams)
202 lctx *d = NULL;
204 d = de_malloc(c, sizeof(lctx));
205 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
206 d->opt_encconv = (u8)de_get_ext_option_bool(c, "text:encconv", 1);
207 if(d->input_encoding==DE_ENCODING_ASCII) {
208 d->opt_encconv = 0;
210 d->chartypes[10] = 1;
211 d->chartypes[13] = 1;
212 de_declare_fmt(c, "TXT2COM");
214 txt2com_search1(c, d);
215 if(!d->found_text) {
216 txt2com_search2(c, d);
218 if(!d->found_text) {
219 d->need_errmsg = 1;
220 goto done;
222 if(d->errflag) goto done;
224 exectext_extract_default(c, d);
226 done:
227 if(d) {
228 if(d->need_errmsg) {
229 de_err(c, "Not a TXT2COM file, or unsupported version");
231 destroy_lctx(c, d);
235 static int de_identify_txt2com(deark *c)
237 u8 b1;
238 u8 flag = 0;
239 u8 buf[28];
240 const char *ids[3] = {"TXT2COM C", "TXT2RES C", "TXT2PAS C"};
242 if(c->infile->len>65280) return 0;
243 b1 = de_getbyte(0);
244 if(b1!=0x8d && b1!=0xe8 && b1!=0xe9) return 0;
245 de_read(buf, 0, sizeof(buf));
246 if(b1==0x8d) {
247 if(!de_memcmp(&buf[14], ids[0], 9)) flag = 1;
249 else if(b1==0xe8) {
250 if(!de_memcmp(&buf[5], ids[0], 9)) flag = 1;
252 else if(b1==0xe9) {
253 if(!de_memcmp(&buf[3], ids[0], 9)) flag = 1;
254 else if(!de_memcmp(&buf[3], ids[1], 9)) flag = 1;
255 else if(!de_memcmp(&buf[3], ids[2], 9)) flag = 1;
257 return flag ? 92 : 0;
260 static void print_encconv_option(deark *c)
262 de_msg(c, "-opt text:encconv=0 : Don't convert to UTF-8");
265 static void de_help_txt2com(deark *c)
267 print_encconv_option(c);
270 void de_module_txt2com(deark *c, struct deark_module_info *mi)
272 mi->id = "txt2com";
273 mi->desc = "TXT2COM (K. P. Graham)";
274 mi->run_fn = de_run_txt2com;
275 mi->identify_fn = de_identify_txt2com;
276 mi->help_fn = de_help_txt2com;
279 ///////////////////////////////////////////////////
280 // SHOW (Gary M. Raymond, Simple Software)
282 // Finding the text in a precise way seems difficult.
283 // Instead, we search for the byte pattern that appears right before the start
284 // of the text.
285 // The text *length* does not seem to be present in the file at all. The text
286 // just ends at the 0x1a byte that should be at the end of the file.
287 static void showgmr_search(deark *c, lctx *d)
289 #define SHOW_BUF_POS1 1800
290 #define SHOW_BUF_LEN1 1200
291 u8 *mem = NULL;
292 i64 foundpos;
293 int ret;
295 mem = de_malloc(c, SHOW_BUF_LEN1);
296 de_read(mem, SHOW_BUF_POS1, SHOW_BUF_LEN1);
298 // v2.0, 2.0A, 2.1(?)
299 ret = de_memsearch_match(mem, SHOW_BUF_LEN1,
300 (const u8*)"\x06?\x03\x19\xa1\x6c\x00\x3b\x06?\x03\x72\xf7\x58\x1f\xc3", 16,
301 '?', &foundpos);
302 if(ret) {
303 d->found_text = 1;
304 d->tpos = SHOW_BUF_POS1+foundpos+16;
305 goto done;
308 // v1.0, 1.4, 1.5
309 ret = de_memsearch_match(mem, SHOW_BUF_LEN1,
310 (const u8*)"\x4e\x8a\x04\x3c\x0a\x75\xf9\x4d\x75\xf5\x46\x89\x36?\x02\xc3", 16,
311 '?', &foundpos);
312 if(ret) {
313 d->found_text = 1;
314 d->tpos = SHOW_BUF_POS1+foundpos+16;
315 goto done;
318 done:
319 de_free(c, mem);
322 static void de_run_show_gmr(deark *c, de_module_params *mparams)
324 lctx *d = NULL;
326 d = de_malloc(c, sizeof(lctx));
327 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
328 d->opt_encconv = (u8)de_get_ext_option_bool(c, "text:encconv", 1);
329 if(d->input_encoding==DE_ENCODING_ASCII) {
330 d->opt_encconv = 0;
332 de_declare_fmt(c, "SHOW (executable text)");
333 d->chartypes[10] = 1;
334 d->chartypes[13] = 1;
336 showgmr_search(c, d);
337 if(!d->found_text) {
338 d->need_errmsg = 1;
339 goto done;
341 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
343 d->tlen = c->infile->len - d->tpos;
344 if(de_getbyte(c->infile->len-1) == 0x1a) {
345 d->tlen--;
348 exectext_extract_default(c, d);
350 done:
351 if(d) {
352 if(d->need_errmsg) {
353 de_err(c, "Not a SHOW file, or unsupported version");
355 destroy_lctx(c, d);
359 static int de_identify_show_gmr(deark *c)
361 if(c->infile->len>65280) return 0;
362 if(de_getbyte(0) != 0xe9) return 0;
363 // Testing the last byte of the file may screen out corrupt files, but
364 // more importantly screens out the SHOW.COM utility itself, which
365 // annoyingly has the same the start-of-file signature as the files it
366 // generates.
367 if(de_getbyte(c->infile->len-1) != 0x1a) return 0;
368 // Byte at offset 3 is 0x30 in pristine files, but it's used to save the
369 // color scheme (it's a self-modifying COM file).
370 if(dbuf_memcmp(c->infile, 4,
371 (const u8*)"\x00\x1f\xa0\x00\x00\x53\x48\x4f\x57", 9))
373 return 0;
375 return 100;
378 static void de_help_show_gmr(deark *c)
380 print_encconv_option(c);
383 void de_module_show_gmr(deark *c, struct deark_module_info *mi)
385 mi->id = "show_gmr";
386 mi->desc = "SHOW (G. M. Raymond)";
387 mi->run_fn = de_run_show_gmr;
388 mi->identify_fn = de_identify_show_gmr;
389 mi->help_fn = de_help_show_gmr;
392 ///////////////////////////////////////////////////
393 // Asc2Com (MorganSoft)
395 struct asc2com_detection_data {
396 u8 found;
397 u8 is_compressed;
398 UI fmtcode;
399 i64 tpos;
402 struct asc2com_idinfo {
403 const u8 sig1[3];
404 // flags&0x03: sig2 type 1=\x49\xe3..., 2="ASC2COM"
405 // flags&0x80: compressed
406 u8 flags;
407 u16 sig2pos;
408 u16 txtpos;
409 UI fmtcode;
412 // lister codes: 0=full/default, 1=page, 2=lite,
413 // 3=wide, 4=print, 5=compressed
414 static const struct asc2com_idinfo asc2com_idinfo_arr[] = {
415 { {0xe8,0xd2,0x00}, 0x01, 867, 1350, 0x11020000 }, // 1.10b
416 { {0xe8,0x25,0x01}, 0x01, 1283, 1819, 0x12510100 }, // 1.25 (?)
417 { {0xe8,0x25,0x01}, 0x01, 1288, 1840, 0x12510200 }, // 1.25 (?)
418 { {0xe8,0x1d,0x01}, 0x01, 1360, 1877, 0x13010000 }, // 1.30
419 { {0xe9,0x18,0x05}, 0x01, 2827, 3734, 0x16510100 }, // 1.65 full (?)
420 { {0xe9,0x18,0x05}, 0x01, 2834, 3750, 0x16610000 }, // 1.66 full
421 { {0xe9,0x1d,0x05}, 0x01, 2916, 4050, 0x17510000 }, // 1.75 full
422 { {0xe9,0x18,0x05}, 0x01, 2911, 4051, 0x17610000 }, // 1.76 full
423 { {0xe9,0x12,0x06}, 0x01, 3203, 4517, 0x20010000 }, // 2.00 full
424 { {0xe9,0x21,0x06}, 0x01, 3231, 4533, 0x20060000 }, // 2.00f-2.05 full
426 { {0xe8,0x06,0x01}, 0x01, 1337, 1854, 0x13010001 }, // 1.30 page
427 { {0xe9,0xc4,0x04}, 0x01, 2725, 3638, 0x16510101 }, // 1.65 page (?)
428 { {0xe9,0xc4,0x04}, 0x01, 2732, 3638, 0x16610001 }, // 1.66 page
429 { {0xe9,0xc9,0x04}, 0x01, 2814, 3955, 0x17510001 }, // 1.75-1.76 page
430 { {0xe9,0x12,0x06}, 0x01, 3185, 4485, 0x20010001 }, // 2.00 page
431 { {0xe9,0x21,0x06}, 0x01, 3213, 4517, 0x20060001 }, // 2.00f-2.05 page
433 { {0xe9,0x7e,0x01}, 0x01, 1523, 1555, 0x16510102 }, // 1.65 lite (?)
434 { {0xe9,0x81,0x01}, 0x01, 1526, 1558, 0x16610002 }, // 1.66 lite
435 { {0xe9,0x8f,0x01}, 0x01, 1722, 1799, 0x17510002 }, // 1.75-1.76 lite
436 { {0xe9,0xfc,0x01}, 0x01, 1868, 2005, 0x20010002 }, // 2.00-2.05 lite
438 { {0xe9,0x8c,0x01}, 0x01, 1747, 1816, 0x16610003 }, // 1.66 wide
439 { {0xe9,0xf5,0x01}, 0x01, 2045, 2161, 0x17510003 }, // 1.75-1.76 wide
440 { {0xe9,0x4d,0x02}, 0x01, 2165, 2341, 0x20010003 }, // 2.00-2.05 wide
442 { {0xbb,0x01,0x00}, 0x02, 240, 382, 0x13010004 }, // 1.30 print
443 { {0xeb,0x03,0x00}, 0x02, 245, 387, 0x16610004 }, // 1.66 print
444 { {0xeb,0x2b,0x00}, 0x02, 295, 437, 0x17510004 }, // 1.75-1.76 print
445 { {0xeb,0x40,0x00}, 0x02, 462, 613, 0x20010004 }, // 2.00-2.05 print
447 { {0xe9,0xaa,0x05}, 0x82, 1078, 10263, 0x20010005 }, // 2.00 compr
448 { {0xe9,0xab,0x05}, 0x82, 1078, 10263, 0x20060005 }, // 2.00f compr
449 { {0xe9,0xad,0x05}, 0x82, 1065, 10407, 0x20110005 }, // 2.01 compr
450 { {0xe9,0xa8,0x05}, 0x82, 1065, 10391, 0x20510005 } // 2.05 compr
453 static void asc2com_identify(deark *c, struct asc2com_detection_data *idd, UI idmode)
455 u8 buf[3];
456 size_t k;
457 const struct asc2com_idinfo *found_item = NULL;
459 dbuf_read(c->infile, buf, 0, 3);
460 if(buf[0]!=0xe8 && buf[0]!=0xe9 && buf[0]!=0xbb && buf[0]!=0xeb) return;
462 for(k=0; k<DE_ARRAYCOUNT(asc2com_idinfo_arr); k++) {
463 const struct asc2com_idinfo *t;
464 u8 sig_type;
466 t = &asc2com_idinfo_arr[k];
468 if(buf[0]==t->sig1[0] && buf[1]==t->sig1[1] &&
469 (t->sig1[0]==0xeb || (buf[2]==t->sig1[2])))
472 sig_type = t->flags & 0x03;
473 if(sig_type==1) {
474 if(!dbuf_memcmp(c->infile, (i64)t->sig2pos,
475 (const void*)"\x49\xe3\x0e\x33\xd2\x8a\x14\xfe\xc2\x03\xf2\x49", 12))
477 found_item = t;
480 else if(sig_type==2) {
481 if(!dbuf_memcmp(c->infile, (i64)t->sig2pos,
482 (const void*)"ASC2COM", 7))
484 found_item = t;
489 if(found_item) {
490 break;
493 if(!found_item) return;
494 idd->found = 1;
495 if(idmode) return;
497 idd->tpos = (i64)found_item->txtpos;
498 idd->fmtcode = found_item->fmtcode;
499 if(found_item->flags & 0x80) {
500 idd->is_compressed = 1;
504 static void asc2com_filter(deark *c, lctx *d, dbuf *tmpf,
505 i64 ipos1, i64 endpos, dbuf *outf)
507 i64 ipos;
508 u8 n;
510 ipos = ipos1;
511 while(ipos < endpos) {
512 n = dbuf_getbyte_p(tmpf, &ipos);
513 dbuf_copy(tmpf, ipos, (i64)n, outf);
514 dbuf_write(outf, (const u8*)"\x0d\x0a", 2);
515 ipos += (i64)n;
519 static void asc2com_extract_compressed(deark *c, lctx *d)
521 struct de_dfilter_in_params dcmpri;
522 struct de_dfilter_out_params dcmpro;
523 struct de_dfilter_results dres;
524 struct de_lzw_params delzwp;
525 dbuf *tmpf = NULL;
526 dbuf *outf = NULL;
528 tmpf = dbuf_create_membuf(c, 0, 0);
530 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
531 dcmpri.f = c->infile;
532 dcmpri.pos = d->tpos;
533 dcmpri.len = d->tlen;
534 dcmpro.f = tmpf;
535 dcmpro.len_known = 0;
537 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
538 delzwp.fmt = DE_LZWFMT_ASC2COM;
539 fmtutil_decompress_lzw(c, &dcmpri, &dcmpro, &dres, &delzwp);
540 dbuf_flush(tmpf);
542 if(tmpf->len>0) {
543 outf = dbuf_create_output_file(c, "txt", NULL, 0);
544 dbuf_enable_wbuffer(outf);
545 asc2com_filter(c, d, tmpf, 0, tmpf->len, outf);
548 if(dres.errcode) {
549 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
550 goto done;
553 done:
554 dbuf_close(tmpf);
555 dbuf_close(outf);
558 static void asc2com_extract_uncompressed(deark *c, lctx *d)
560 dbuf *outf = NULL;
562 outf = dbuf_create_output_file(c, "txt", NULL, 0);
563 dbuf_enable_wbuffer(outf);
564 asc2com_filter(c, d, c->infile, d->tpos, d->tlen, outf);
565 dbuf_close(outf);
568 static void de_run_asc2com(deark *c, de_module_params *mparams)
570 lctx *d = NULL;
571 struct asc2com_detection_data idd;
573 de_zeromem(&idd, sizeof(struct asc2com_detection_data));
574 asc2com_identify(c, &idd, 0);
575 if(!idd.found) {
576 de_err(c, "Not a known Asc2Com format");
577 goto done;
579 de_dbg(c, "format code: 0x%08x", idd.fmtcode);
580 de_dbg(c, "compressed: %u", (UI)idd.is_compressed);
582 d = de_malloc(c, sizeof(lctx));
583 d->tpos = idd.tpos;
584 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
585 d->tlen = c->infile->len - d->tlen;
586 // TODO: Can we read and use the original filename?
587 if(idd.is_compressed) {
588 asc2com_extract_compressed(c, d);
590 else {
591 asc2com_extract_uncompressed(c, d);
594 done:
595 destroy_lctx(c, d);
598 static int de_identify_asc2com(deark *c)
600 struct asc2com_detection_data idd;
602 if(c->infile->len>65280) return 0;
603 de_zeromem(&idd, sizeof(struct asc2com_detection_data));
604 asc2com_identify(c, &idd, 1);
605 if(idd.found) return 72;
606 return 0;
609 void de_module_asc2com(deark *c, struct deark_module_info *mi)
611 mi->id = "asc2com";
612 mi->desc = "Asc2Com executable text";
613 mi->run_fn = de_run_asc2com;
614 mi->identify_fn = de_identify_asc2com;
617 ///////////////////////////////////////////////////
618 // DOC2COM (Gerald DePyper)
620 struct doc2com_detection_data {
621 u8 found;
622 UI fmtcode;
625 static void doc2com_detect(deark *c, struct doc2com_detection_data *idd, UI idmode)
627 u8 buf[22];
629 dbuf_read(c->infile, buf, 0, sizeof(buf));
631 if(buf[0]==0xbe && buf[15]==0x72) {
632 if(!de_memcmp(&buf[3], (const void*)"\xb9\x18\x00\xe8\xb2\x01\xe2\xfb\x3b\x36", 10)) {
633 idd->fmtcode = 10; // old unversioned releases
634 idd->found = 1;
637 else if(buf[0]==0xfc && buf[1]==0xbe && buf[16]==0x72) {
638 if(!de_memcmp(&buf[4], (const void*)"\xb9\x18\x00\xe8\x2f\x02\xe2\xfb\x3b\x36", 10)) {
639 idd->fmtcode = 20; // v1.2
640 idd->found = 1;
643 else if(buf[0]==0xfc && buf[5]==0x49) {
644 // Expecting all v1.3+ files to start with:
645 // fc ?? ?? ?? ?? 49 8b 36 ?? ?? 8b fe ac 32 04 aa e2 fa ac 34 ff aa ...
646 // First 3 bytes:
647 // fc 8b 0e if encrypted
648 // fc eb 13 if not encrypted
649 if(!de_memcmp(&buf[10],
650 (const void*)"\x8b\xfe\xac\x32\x04\xaa\xe2\xfa\xac\x34\xff\xaa", 12))
652 idd->fmtcode = 30; // v1.3+
653 idd->found = 1;
658 static void doc2com_analyze(deark *c, lctx *d)
660 i64 pos_a, pos_b, pos_c, pos_d;
661 i64 pos_of_tpos;
662 i64 pos_of_tlen;
663 i64 pos_of_endpos;
664 i64 endpos;
666 if(d->fmtcode==30) {
667 if(de_getbyte(1) != 0xeb) {
668 d->is_encrypted = 1;
672 if(d->fmtcode==10) {
673 pos_of_tpos = 1;
675 else if(d->fmtcode==20) {
676 pos_of_tpos = 2;
678 else if(d->fmtcode==30) {
679 pos_d = de_getu16le(8);
680 pos_of_tpos = pos_d - 0x100;
682 else {
683 d->errflag = 1;
684 d->need_errmsg = 1;
685 goto done;
688 de_dbg(c, "pos of tpos: %"I64_FMT, pos_of_tpos);
689 pos_a = de_getu16le(pos_of_tpos);
690 d->tpos = pos_a - 0x100;
691 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
693 if(d->fmtcode==10 || d->fmtcode==20) {
694 if(d->fmtcode==20) {
695 pos_b = de_getu16le(25);
697 else { // 10
698 pos_b = de_getu16le(24);
700 pos_of_endpos = pos_b - 0x100;
701 de_dbg(c, "pos of endpos: %"I64_FMT, pos_of_endpos);
702 pos_c = de_getu16le(pos_of_endpos);
703 endpos = pos_c - 0x100;
704 de_dbg(c, "endpos: %"I64_FMT, endpos);
705 d->tlen = endpos - d->tpos;
707 else { // 30
708 pos_b = de_getu16le(3);
709 pos_of_tlen = pos_b - 0x100;
710 de_dbg(c, "pos of tlen: %"I64_FMT, pos_of_tlen);
711 d->tlen = de_getu16le(pos_of_tlen);
714 de_dbg(c, "tlen: %"I64_FMT, d->tlen);
715 de_dbg(c, "encrypted: %u", (UI)d->is_encrypted);
716 done:
720 static void doc2com_output(deark *c, lctx *d)
722 dbuf *outf = NULL;
724 if(d->tlen<0 || d->tpos<0 || d->tpos+d->tlen>c->infile->len) {
725 d->errflag = 1;
726 d->need_errmsg = 1;
727 goto done;
730 outf = dbuf_create_output_file(c, "txt", NULL, 0);
731 if(d->is_encrypted) {
732 u8 this_byte = 0;
733 u8 next_byte = 0;
734 u8 init_flag = 0;
735 i64 i;
737 dbuf_enable_wbuffer(outf);
739 for(i=0; i<d->tlen; i++) {
740 u8 b;
742 if(init_flag) {
743 this_byte = next_byte;
745 else {
746 this_byte = de_getbyte(d->tpos+i);
747 init_flag = 1;
750 if(i+1 < d->tlen) {
751 next_byte = de_getbyte(d->tpos+i+1);
753 else {
754 next_byte = 0xff;
757 b = this_byte ^ next_byte;
758 dbuf_writebyte(outf, b);
761 else {
762 dbuf_copy(c->infile, d->tpos, d->tlen, outf);
765 done:
766 dbuf_close(outf);
769 static void de_run_doc2com(deark *c, de_module_params *mparams)
771 lctx *d = NULL;
772 struct doc2com_detection_data idd;
774 d = de_malloc(c, sizeof(lctx));
775 de_zeromem(&idd, sizeof(struct doc2com_detection_data));
776 doc2com_detect(c, &idd, 0);
777 if(!idd.found) {
778 d->need_errmsg = 1;
779 goto done;
781 d->fmtcode = idd.fmtcode;
782 de_dbg(c, "fmt code: %u", d->fmtcode);
783 doc2com_analyze(c, d);
784 if(d->errflag) goto done;
785 doc2com_output(c, d);
787 done:
788 if(d) {
789 if(d->need_errmsg) {
790 de_err(c, "Not a DOC2COM file, or unsupported version");
792 destroy_lctx(c, d);
796 static int de_identify_doc2com(deark *c)
798 struct doc2com_detection_data idd;
799 u8 b;
801 if(c->infile->len>65280) return 0;
802 b = de_getbyte(0);
803 if(b!=0xbe && b!=0xfc) return 0;
805 de_zeromem(&idd, sizeof(struct doc2com_detection_data));
806 doc2com_detect(c, &idd, 1);
807 if(idd.found) return 73;
808 return 0;
811 void de_module_doc2com(deark *c, struct deark_module_info *mi)
813 mi->id = "doc2com";
814 mi->desc = "DOC2COM executable text (G. DePyper)";
815 mi->run_fn = de_run_doc2com;
816 mi->identify_fn = de_identify_doc2com;
819 ///////////////////////////////////////////////////
820 // DOC2COM (Dan K. Nelson)
822 static void de_run_doc2com_dkn(deark *c, de_module_params *mparams)
824 lctx *d = NULL;
825 i64 pos_of_tpos = 0;
826 i64 pos_of_tlen = 0;
827 UI n;
828 UI b1, b2;
830 d = de_malloc(c, sizeof(lctx));
831 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
832 d->opt_encconv = (u8)de_get_ext_option_bool(c, "text:encconv", 1);
833 if(d->input_encoding==DE_ENCODING_ASCII) {
834 d->opt_encconv = 0;
836 d->chartypes[7] = 1;
837 d->chartypes[8] = 1;
838 d->chartypes[9] = 1;
839 d->chartypes[10] = 1;
840 d->chartypes[13] = 1;
841 d->chartypes[27] = 1;
843 n = (UI)de_getu16le(1);
844 if(n==0x0093) {
845 d->fmtcode = 1;
846 pos_of_tpos = 171;
847 pos_of_tlen = 178;
849 else if(n==0x0107) {
850 d->fmtcode = 2;
851 pos_of_tpos = 293;
852 pos_of_tlen = 300;
855 if(d->fmtcode==0) goto done;
856 if(de_getbyte(pos_of_tpos-1)!=0xbf) goto done;
857 de_dbg(c, "fmt code: %u", d->fmtcode);
858 d->tpos = de_getu16le(pos_of_tpos);
859 d->tpos -= 0x100;
860 if(d->fmtcode==2) d->tpos--;
861 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
863 b1 = de_getbyte(pos_of_tlen);
864 b2 = de_getbyte(pos_of_tlen+2);
865 d->tlen = ((UI)b1<<8) | b2;
866 de_dbg(c, "tlen: %"I64_FMT, d->tlen);
868 exectext_extract_default(c, d);
870 done:
871 if(d) {
872 if(d->need_errmsg) {
873 de_err(c, "Not a DOC2COM file, or unsupported version");
875 destroy_lctx(c, d);
879 static int de_identify_doc2com_dkn(deark *c)
881 UI n;
882 i64 pos;
884 if(c->infile->len>65280) return 0;
885 n = (UI)de_getu32be(0);
886 if(n==0xe9930000U) {
887 pos = 27;
889 else if(n==0xe9070100U) {
890 pos = 19;
892 else {
893 return 0;
896 if(dbuf_memcmp(c->infile, pos, (const void*)"Press Home P", 14)) {
897 return 0;
899 return 79;
902 static void de_help_doc2com_dkn(deark *c)
904 print_encconv_option(c);
907 void de_module_doc2com_dkn(deark *c, struct deark_module_info *mi)
909 mi->id = "doc2com_dkn";
910 mi->desc = "DOC2COM executable text (D. Nelson)";
911 mi->run_fn = de_run_doc2com_dkn;
912 mi->identify_fn = de_identify_doc2com_dkn;
913 mi->help_fn = de_help_doc2com_dkn;
916 ///////////////////////////////////////////////////
917 // GTXT / MakeScroll (Eric Gans)
919 // This format can reasonably be converted to plain text.
920 static void gtxt_convert_to_text(deark *c, lctx *d, dbuf *outf, u8 to_utf8)
922 struct de_encconv_state es;
923 u8 esc_mode = 0;
924 u8 esc_mask = 0;
925 i64 endpos = d->tpos + d->tlen;
926 i64 pos;
928 de_encconv_init(&es, DE_EXTENC_MAKE(d->input_encoding, DE_ENCSUBTYPE_PRINTABLE));
929 if(to_utf8 && c->write_bom) {
930 dbuf_write_uchar_as_utf8(outf, 0xfeff);
933 pos = d->tpos;
934 while(pos < endpos) {
935 u8 x_raw, x_mod;
937 x_raw = de_getbyte_p(&pos);
938 x_mod = x_raw;
940 if(esc_mode) {
941 x_mod = x_raw & esc_mask;
942 esc_mode = 0;
944 else if(x_raw=='%') {
945 esc_mode = 1;
946 esc_mask = 0xff;
947 continue;
949 else if(x_raw=='^') {
950 esc_mode = 1;
951 esc_mask = 0x3f;
952 continue;
954 else {
955 // GTXT ignores the high bit, except in '%' escapes.
956 x_mod &= 0x7f;
959 if(to_utf8) {
960 de_rune u;
962 if(x_raw=='~') {
963 u = 0x240c; // Page break -> SYMBOL FOR FORM FEED, I guess.
965 else if(x_mod<32 && d->chartypes[(UI)x_mod]!=0) {
966 u = (de_rune)x_mod;
968 else {
969 u = de_char_to_unicode_ex((i32)x_mod, &es);
971 dbuf_write_uchar_as_utf8(outf, u);
973 else {
974 u8 b;
976 if(x_raw=='~') {
977 b = 0x0c; // Page break -> form feed, I guess.
979 else {
980 b = x_mod;
982 dbuf_writebyte(outf, b);
987 static void de_run_gtxt(deark *c, de_module_params *mparams)
989 lctx *d = NULL;
990 i64 endpos;
991 UI b1, b2;
992 dbuf *outf = NULL;
994 d = de_malloc(c, sizeof(lctx));
995 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
996 d->opt_encconv = (u8)de_get_ext_option_bool(c, "text:encconv", 1);
997 if(d->input_encoding==DE_ENCODING_ASCII) {
998 d->opt_encconv = 0;
1000 d->opt_fmtconv = (u8)de_get_ext_option_bool(c, "text:fmtconv", 1);
1002 d->chartypes[7] = 1;
1003 d->chartypes[8] = 1;
1004 d->chartypes[9] = 1;
1005 d->chartypes[10] = 1;
1006 d->chartypes[13] = 1;
1007 d->chartypes[27] = 1;
1009 d->tpos = 188;
1010 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
1012 endpos = c->infile->len;
1013 b1 = de_getbyte(endpos-2);
1014 b2 = de_getbyte(endpos-1);
1016 if(b1==0) {
1017 endpos -= 2;
1019 else if(b2==0) {
1020 endpos -= 1;
1023 d->tlen = endpos - d->tpos;
1024 de_dbg(c, "tlen: %"I64_FMT, d->tlen);
1025 exectext_check_tpos(c, d);
1026 if(d->errflag) goto done;
1028 outf = dbuf_create_output_file(c, "txt", NULL, 0);
1029 dbuf_enable_wbuffer(outf);
1031 if(d->opt_fmtconv==0) {
1032 dbuf_copy(c->infile, d->tpos, d->tlen, outf);
1034 else if(d->opt_fmtconv && d->opt_encconv==0) {
1035 gtxt_convert_to_text(c, d, outf, 0);
1037 else { // fmtconv==1 & encconv==1
1038 gtxt_convert_to_text(c, d, outf, 1);
1041 done:
1042 if(d) {
1043 if(d->need_errmsg) {
1044 de_err(c, "Not a GTXT file, or unsupported version");
1046 destroy_lctx(c, d);
1048 dbuf_close(outf);
1051 static int de_identify_gtxt(deark *c)
1053 if(c->infile->len>65280) return 0;
1054 if(de_getbyte(0) != 0xbb) return 0;
1056 if(dbuf_memcmp(c->infile, 1, (const void*)"\xbc\x01\xb4\x02\xb1\x00\x8a", 7)) {
1057 return 0;
1059 if(dbuf_memcmp(c->infile, 95, (const void*)"\x73\x01\xc3\x2c\x40\xc3\xcd\x20", 8)) {
1060 return 0;
1062 return 100;
1065 static void de_help_gtxt(deark *c)
1067 de_msg(c, "-opt text:fmtconv=0 : Extract source code");
1068 print_encconv_option(c);
1071 void de_module_gtxt(deark *c, struct deark_module_info *mi)
1073 mi->id = "gtxt";
1074 mi->desc = "GTXT (E. Gans)";
1075 mi->run_fn = de_run_gtxt;
1076 mi->identify_fn = de_identify_gtxt;
1077 mi->help_fn = de_help_gtxt;
1080 ///////////////////////////////////////////////////
1081 // READMAKE (by Bruce Guthrie and Wayne Software)
1083 struct readmake_ctx {
1084 const char *msgpfx;
1085 struct fmtutil_exe_info *ei;
1086 struct fmtutil_specialexe_detection_data edd;
1089 // On apparent success, sets d->tpos to nonzero.
1090 static void readmake_find_text(deark *c, struct readmake_ctx *rmctx, lctx *d)
1092 i64 pos;
1093 i64 n;
1095 pos = rmctx->ei->end_of_dos_code;
1096 n = de_getu32le(pos);
1097 pos += n;
1099 n = de_getu32le(pos);
1100 if(n==4) {
1101 // Later files have an extra field (expected value 4) or segment
1102 // (expected length 4). The segment after that is expected to
1103 // be larger. We use that to tell the difference.
1104 pos += 4;
1105 n = de_getu32le(pos);
1107 else if(n<14) {
1108 d->need_errmsg = 1;
1109 goto done;
1112 pos += n;
1113 d->tpos = pos;
1114 de_dbg(c, "tpos: %"I64_FMT, d->tpos);
1116 // TODO?: There might be a better way to figure out the length, but for
1117 // pristine files we can use the end of file.
1118 d->tlen = c->infile->len - d->tpos;
1120 done:
1124 static void de_run_readmake(deark *c, de_module_params *mparams)
1126 struct readmake_ctx *rmctx = NULL;
1127 lctx *d = NULL;
1129 d = de_malloc(c, sizeof(lctx));
1131 rmctx = de_malloc(c, sizeof(struct readmake_ctx));
1132 rmctx->msgpfx = "[READMAKE] ";
1133 rmctx->ei = de_malloc(c, sizeof(struct fmtutil_exe_info));
1135 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1136 d->opt_encconv = (u8)de_get_ext_option_bool(c, "text:encconv", 1);
1137 if(d->input_encoding==DE_ENCODING_ASCII) {
1138 d->opt_encconv = 0;
1141 d->allow_tlen_0 = 1;
1142 d->chartypes[8] = 1;
1143 d->chartypes[9] = 1;
1144 d->chartypes[10] = 1;
1145 d->chartypes[13] = 1;
1147 fmtutil_collect_exe_info(c, c->infile, rmctx->ei);
1149 rmctx->edd.restrict_to_fmt = DE_SPECIALEXEFMT_READMAKE;
1150 fmtutil_detect_specialexe(c, rmctx->ei, &rmctx->edd);
1151 if(rmctx->edd.detected_fmt!=DE_SPECIALEXEFMT_READMAKE) {
1152 d->need_errmsg = 1;
1153 goto done;
1156 readmake_find_text(c, rmctx, d);
1157 if(d->tpos==0 || d->errflag) goto done;
1159 exectext_extract_default(c, d);
1161 done:
1162 if(d) {
1163 if(d->need_errmsg && rmctx) {
1164 de_err(c, "%sBad or unsupported READMAKE file", rmctx->msgpfx);
1166 de_free(c, d);
1167 d = NULL;
1169 if(rmctx) {
1170 de_free(c, rmctx->ei);
1171 de_free(c, rmctx);
1172 rmctx = NULL;
1176 static void de_help_readmake(deark *c)
1178 print_encconv_option(c);
1181 void de_module_readmake(deark *c, struct deark_module_info *mi)
1183 mi->id = "readmake";
1184 mi->desc = "READMAKE executable text";
1185 mi->run_fn = de_run_readmake;
1186 mi->help_fn = de_help_readmake;