1 // This file is part of Deark.
2 // Copyright (C) 2024 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_txt2com
);
10 DE_DECLARE_MODULE(de_module_show_gmr
);
11 DE_DECLARE_MODULE(de_module_asc2com
);
12 DE_DECLARE_MODULE(de_module_doc2com
);
13 DE_DECLARE_MODULE(de_module_doc2com_dkn
);
14 DE_DECLARE_MODULE(de_module_gtxt
);
15 DE_DECLARE_MODULE(de_module_readmake
);
17 typedef struct localctx_exectext
{
18 de_encoding input_encoding
;
29 u8 chartypes
[32]; // 1=printable, 2=control
32 static void exectext_check_tpos(deark
*c
, lctx
*d
)
34 if(d
->tpos
<1 || d
->tlen
<0 || d
->tpos
+d
->tlen
>c
->infile
->len
) {
41 static void exectext_extract_verbatim(deark
*c
, lctx
*d
)
45 exectext_check_tpos(c
, d
);
46 if(d
->errflag
) goto done
;
48 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
49 dbuf_copy(c
->infile
, d
->tpos
, d
->tlen
, outf
);
56 // For byte values 0-31, leaves them or converts them, depending on the flags
59 // - dbuf_copy_slice_convert_to_utf8() in HYBRID mode doesn't quite do what
60 // we want, mainly because it treats 0x00 and 0x09 as controls, while
61 // TXT2COM treats them as graphics.
62 // - Early versions of TXT2COM stop when they see 0x1a, but later versions don't.
63 // We behave like later versions.
64 // - We might not handle an unpaired LF or CR byte exactly like TXT2COM does.
65 static void exectext_convert_and_write(deark
*c
, lctx
*d
, dbuf
*outf
)
67 struct de_encconv_state es
;
68 i64 endpos
= d
->tpos
+ d
->tlen
;
71 de_encconv_init(&es
, DE_EXTENC_MAKE(d
->input_encoding
, DE_ENCSUBTYPE_PRINTABLE
));
73 dbuf_write_uchar_as_utf8(outf
, 0xfeff);
80 x
= de_getbyte_p(&pos
);
81 if(x
<32 && d
->chartypes
[x
]!=0) {
82 dbuf_writebyte(outf
, x
);
87 u
= de_char_to_unicode_ex((i32
)x
, &es
);
88 dbuf_write_uchar_as_utf8(outf
, u
);
93 // Extract or convert in the typical way.
94 // - Validates the source position
95 // - Respects d->input_encoding if relevant
96 // - Respects d->opt_encconv and d->chartypes[]
97 static void exectext_extract_default(deark
*c
, lctx
*d
)
101 if(d
->errflag
) goto done
;
102 if(d
->tpos
<=0 || d
->tlen
<0 || d
->tpos
+d
->tlen
>c
->infile
->len
||
103 (d
->tlen
==0 && !d
->allow_tlen_0
))
110 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
112 dbuf_enable_wbuffer(outf
);
113 exectext_convert_and_write(c
, d
, outf
);
116 dbuf_copy(c
->infile
, d
->tpos
, d
->tlen
, outf
);
123 static void txt2com_read_textpos(deark
*c
, lctx
*d
, i64 pos1
)
128 de_dbg(c
, "pos of tlen pointer: %"I64_FMT
, pos1
);
130 pos_of_tlen
= de_getu16le_p(&pos
) - 256;
131 de_dbg(c
, "pos of tlen: %"I64_FMT
, pos_of_tlen
);
135 d
->tpos
= de_getu16le_p(&pos
) - 256;
136 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
138 d
->tlen
= de_getu16le(pos_of_tlen
);
139 de_dbg(c
, "tlen: %"I64_FMT
, d
->tlen
);
142 // For all TXT2COM versions, and TXT2RES v1.0.
143 static void txt2com_search1(deark
*c
, lctx
*d
)
145 #define TXT2COM_BUF_POS1 700
146 #define TXT2COM_BUF_LEN1 3000
151 mem
= de_malloc(c
, TXT2COM_BUF_LEN1
);
152 de_read(mem
, TXT2COM_BUF_POS1
, TXT2COM_BUF_LEN1
);
153 ret
= de_memsearch_match(mem
, TXT2COM_BUF_LEN1
,
154 (const u8
*)"\x8b\xd8\xb4\x40\x8b\x0e??\x8d\x16??\xcd\x21\xb4\x3e", 16,
158 txt2com_read_textpos(c
, d
, TXT2COM_BUF_POS1
+foundpos
+6);
165 // * TXT2RES v2.03 (= code variant 1)
166 // * TXT2RES v2.06 (= code variant 1)
167 // * TXT2RES v2.10 (= code variant 1)
168 // * TXT2PAS v2.03 (= code variant 2)
169 // * TXT2PAS v2.06 (= code variant 3)
170 // * TXT2PAS v2.10 (= code variant 3)
171 // The code variants have enough common bytes that we try to get away with
172 // only doing a single search.
173 static void txt2com_search2(deark
*c
, lctx
*d
)
175 #define TXT2COM_BUF_POS2 7500
176 #define TXT2COM_BUF_LEN2 4000
181 mem
= de_malloc(c
, TXT2COM_BUF_LEN2
);
182 de_read(mem
, TXT2COM_BUF_POS2
, TXT2COM_BUF_LEN2
);
183 ret
= de_memsearch_match(mem
, TXT2COM_BUF_LEN2
,
184 (const u8
*)"\xcd?\xa1??\xd1\xe0\x03\x06??\x8d???\x03", 16,
188 txt2com_read_textpos(c
, d
, TXT2COM_BUF_POS2
+foundpos
+9);
194 static void destroy_lctx(deark
*c
, lctx
*d
)
200 static void de_run_txt2com(deark
*c
, de_module_params
*mparams
)
204 d
= de_malloc(c
, sizeof(lctx
));
205 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
206 d
->opt_encconv
= (u8
)de_get_ext_option_bool(c
, "text:encconv", 1);
207 if(d
->input_encoding
==DE_ENCODING_ASCII
) {
210 d
->chartypes
[10] = 1;
211 d
->chartypes
[13] = 1;
212 de_declare_fmt(c
, "TXT2COM");
214 txt2com_search1(c
, d
);
216 txt2com_search2(c
, d
);
222 if(d
->errflag
) goto done
;
224 exectext_extract_default(c
, d
);
229 de_err(c
, "Not a TXT2COM file, or unsupported version");
235 static int de_identify_txt2com(deark
*c
)
240 const char *ids
[3] = {"TXT2COM C", "TXT2RES C", "TXT2PAS C"};
242 if(c
->infile
->len
>65280) return 0;
244 if(b1
!=0x8d && b1
!=0xe8 && b1
!=0xe9) return 0;
245 de_read(buf
, 0, sizeof(buf
));
247 if(!de_memcmp(&buf
[14], ids
[0], 9)) flag
= 1;
250 if(!de_memcmp(&buf
[5], ids
[0], 9)) flag
= 1;
253 if(!de_memcmp(&buf
[3], ids
[0], 9)) flag
= 1;
254 else if(!de_memcmp(&buf
[3], ids
[1], 9)) flag
= 1;
255 else if(!de_memcmp(&buf
[3], ids
[2], 9)) flag
= 1;
257 return flag
? 92 : 0;
260 static void print_encconv_option(deark
*c
)
262 de_msg(c
, "-opt text:encconv=0 : Don't convert to UTF-8");
265 static void de_help_txt2com(deark
*c
)
267 print_encconv_option(c
);
270 void de_module_txt2com(deark
*c
, struct deark_module_info
*mi
)
273 mi
->desc
= "TXT2COM (K. P. Graham)";
274 mi
->run_fn
= de_run_txt2com
;
275 mi
->identify_fn
= de_identify_txt2com
;
276 mi
->help_fn
= de_help_txt2com
;
279 ///////////////////////////////////////////////////
280 // SHOW (Gary M. Raymond, Simple Software)
282 // Finding the text in a precise way seems difficult.
283 // Instead, we search for the byte pattern that appears right before the start
285 // The text *length* does not seem to be present in the file at all. The text
286 // just ends at the 0x1a byte that should be at the end of the file.
287 static void showgmr_search(deark
*c
, lctx
*d
)
289 #define SHOW_BUF_POS1 1800
290 #define SHOW_BUF_LEN1 1200
295 mem
= de_malloc(c
, SHOW_BUF_LEN1
);
296 de_read(mem
, SHOW_BUF_POS1
, SHOW_BUF_LEN1
);
298 // v2.0, 2.0A, 2.1(?)
299 ret
= de_memsearch_match(mem
, SHOW_BUF_LEN1
,
300 (const u8
*)"\x06?\x03\x19\xa1\x6c\x00\x3b\x06?\x03\x72\xf7\x58\x1f\xc3", 16,
304 d
->tpos
= SHOW_BUF_POS1
+foundpos
+16;
309 ret
= de_memsearch_match(mem
, SHOW_BUF_LEN1
,
310 (const u8
*)"\x4e\x8a\x04\x3c\x0a\x75\xf9\x4d\x75\xf5\x46\x89\x36?\x02\xc3", 16,
314 d
->tpos
= SHOW_BUF_POS1
+foundpos
+16;
322 static void de_run_show_gmr(deark
*c
, de_module_params
*mparams
)
326 d
= de_malloc(c
, sizeof(lctx
));
327 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
328 d
->opt_encconv
= (u8
)de_get_ext_option_bool(c
, "text:encconv", 1);
329 if(d
->input_encoding
==DE_ENCODING_ASCII
) {
332 de_declare_fmt(c
, "SHOW (executable text)");
333 d
->chartypes
[10] = 1;
334 d
->chartypes
[13] = 1;
336 showgmr_search(c
, d
);
341 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
343 d
->tlen
= c
->infile
->len
- d
->tpos
;
344 if(de_getbyte(c
->infile
->len
-1) == 0x1a) {
348 exectext_extract_default(c
, d
);
353 de_err(c
, "Not a SHOW file, or unsupported version");
359 static int de_identify_show_gmr(deark
*c
)
361 if(c
->infile
->len
>65280) return 0;
362 if(de_getbyte(0) != 0xe9) return 0;
363 // Testing the last byte of the file may screen out corrupt files, but
364 // more importantly screens out the SHOW.COM utility itself, which
365 // annoyingly has the same the start-of-file signature as the files it
367 if(de_getbyte(c
->infile
->len
-1) != 0x1a) return 0;
368 // Byte at offset 3 is 0x30 in pristine files, but it's used to save the
369 // color scheme (it's a self-modifying COM file).
370 if(dbuf_memcmp(c
->infile
, 4,
371 (const u8
*)"\x00\x1f\xa0\x00\x00\x53\x48\x4f\x57", 9))
378 static void de_help_show_gmr(deark
*c
)
380 print_encconv_option(c
);
383 void de_module_show_gmr(deark
*c
, struct deark_module_info
*mi
)
386 mi
->desc
= "SHOW (G. M. Raymond)";
387 mi
->run_fn
= de_run_show_gmr
;
388 mi
->identify_fn
= de_identify_show_gmr
;
389 mi
->help_fn
= de_help_show_gmr
;
392 ///////////////////////////////////////////////////
393 // Asc2Com (MorganSoft)
395 struct asc2com_detection_data
{
402 struct asc2com_idinfo
{
404 // flags&0x03: sig2 type 1=\x49\xe3..., 2="ASC2COM"
405 // flags&0x80: compressed
412 // lister codes: 0=full/default, 1=page, 2=lite,
413 // 3=wide, 4=print, 5=compressed
414 static const struct asc2com_idinfo asc2com_idinfo_arr
[] = {
415 { {0xe8,0xd2,0x00}, 0x01, 867, 1350, 0x11020000 }, // 1.10b
416 { {0xe8,0x25,0x01}, 0x01, 1283, 1819, 0x12510100 }, // 1.25 (?)
417 { {0xe8,0x25,0x01}, 0x01, 1288, 1840, 0x12510200 }, // 1.25 (?)
418 { {0xe8,0x1d,0x01}, 0x01, 1360, 1877, 0x13010000 }, // 1.30
419 { {0xe9,0x18,0x05}, 0x01, 2827, 3734, 0x16510100 }, // 1.65 full (?)
420 { {0xe9,0x18,0x05}, 0x01, 2834, 3750, 0x16610000 }, // 1.66 full
421 { {0xe9,0x1d,0x05}, 0x01, 2916, 4050, 0x17510000 }, // 1.75 full
422 { {0xe9,0x18,0x05}, 0x01, 2911, 4051, 0x17610000 }, // 1.76 full
423 { {0xe9,0x12,0x06}, 0x01, 3203, 4517, 0x20010000 }, // 2.00 full
424 { {0xe9,0x21,0x06}, 0x01, 3231, 4533, 0x20060000 }, // 2.00f-2.05 full
426 { {0xe8,0x06,0x01}, 0x01, 1337, 1854, 0x13010001 }, // 1.30 page
427 { {0xe9,0xc4,0x04}, 0x01, 2725, 3638, 0x16510101 }, // 1.65 page (?)
428 { {0xe9,0xc4,0x04}, 0x01, 2732, 3638, 0x16610001 }, // 1.66 page
429 { {0xe9,0xc9,0x04}, 0x01, 2814, 3955, 0x17510001 }, // 1.75-1.76 page
430 { {0xe9,0x12,0x06}, 0x01, 3185, 4485, 0x20010001 }, // 2.00 page
431 { {0xe9,0x21,0x06}, 0x01, 3213, 4517, 0x20060001 }, // 2.00f-2.05 page
433 { {0xe9,0x7e,0x01}, 0x01, 1523, 1555, 0x16510102 }, // 1.65 lite (?)
434 { {0xe9,0x81,0x01}, 0x01, 1526, 1558, 0x16610002 }, // 1.66 lite
435 { {0xe9,0x8f,0x01}, 0x01, 1722, 1799, 0x17510002 }, // 1.75-1.76 lite
436 { {0xe9,0xfc,0x01}, 0x01, 1868, 2005, 0x20010002 }, // 2.00-2.05 lite
438 { {0xe9,0x8c,0x01}, 0x01, 1747, 1816, 0x16610003 }, // 1.66 wide
439 { {0xe9,0xf5,0x01}, 0x01, 2045, 2161, 0x17510003 }, // 1.75-1.76 wide
440 { {0xe9,0x4d,0x02}, 0x01, 2165, 2341, 0x20010003 }, // 2.00-2.05 wide
442 { {0xbb,0x01,0x00}, 0x02, 240, 382, 0x13010004 }, // 1.30 print
443 { {0xeb,0x03,0x00}, 0x02, 245, 387, 0x16610004 }, // 1.66 print
444 { {0xeb,0x2b,0x00}, 0x02, 295, 437, 0x17510004 }, // 1.75-1.76 print
445 { {0xeb,0x40,0x00}, 0x02, 462, 613, 0x20010004 }, // 2.00-2.05 print
447 { {0xe9,0xaa,0x05}, 0x82, 1078, 10263, 0x20010005 }, // 2.00 compr
448 { {0xe9,0xab,0x05}, 0x82, 1078, 10263, 0x20060005 }, // 2.00f compr
449 { {0xe9,0xad,0x05}, 0x82, 1065, 10407, 0x20110005 }, // 2.01 compr
450 { {0xe9,0xa8,0x05}, 0x82, 1065, 10391, 0x20510005 } // 2.05 compr
453 static void asc2com_identify(deark
*c
, struct asc2com_detection_data
*idd
, UI idmode
)
457 const struct asc2com_idinfo
*found_item
= NULL
;
459 dbuf_read(c
->infile
, buf
, 0, 3);
460 if(buf
[0]!=0xe8 && buf
[0]!=0xe9 && buf
[0]!=0xbb && buf
[0]!=0xeb) return;
462 for(k
=0; k
<DE_ARRAYCOUNT(asc2com_idinfo_arr
); k
++) {
463 const struct asc2com_idinfo
*t
;
466 t
= &asc2com_idinfo_arr
[k
];
468 if(buf
[0]==t
->sig1
[0] && buf
[1]==t
->sig1
[1] &&
469 (t
->sig1
[0]==0xeb || (buf
[2]==t
->sig1
[2])))
472 sig_type
= t
->flags
& 0x03;
474 if(!dbuf_memcmp(c
->infile
, (i64
)t
->sig2pos
,
475 (const void*)"\x49\xe3\x0e\x33\xd2\x8a\x14\xfe\xc2\x03\xf2\x49", 12))
480 else if(sig_type
==2) {
481 if(!dbuf_memcmp(c
->infile
, (i64
)t
->sig2pos
,
482 (const void*)"ASC2COM", 7))
493 if(!found_item
) return;
497 idd
->tpos
= (i64
)found_item
->txtpos
;
498 idd
->fmtcode
= found_item
->fmtcode
;
499 if(found_item
->flags
& 0x80) {
500 idd
->is_compressed
= 1;
504 static void asc2com_filter(deark
*c
, lctx
*d
, dbuf
*tmpf
,
505 i64 ipos1
, i64 endpos
, dbuf
*outf
)
511 while(ipos
< endpos
) {
512 n
= dbuf_getbyte_p(tmpf
, &ipos
);
513 dbuf_copy(tmpf
, ipos
, (i64
)n
, outf
);
514 dbuf_write(outf
, (const u8
*)"\x0d\x0a", 2);
519 static void asc2com_extract_compressed(deark
*c
, lctx
*d
)
521 struct de_dfilter_in_params dcmpri
;
522 struct de_dfilter_out_params dcmpro
;
523 struct de_dfilter_results dres
;
524 struct de_lzw_params delzwp
;
528 tmpf
= dbuf_create_membuf(c
, 0, 0);
530 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
531 dcmpri
.f
= c
->infile
;
532 dcmpri
.pos
= d
->tpos
;
533 dcmpri
.len
= d
->tlen
;
535 dcmpro
.len_known
= 0;
537 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
538 delzwp
.fmt
= DE_LZWFMT_ASC2COM
;
539 fmtutil_decompress_lzw(c
, &dcmpri
, &dcmpro
, &dres
, &delzwp
);
543 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
544 dbuf_enable_wbuffer(outf
);
545 asc2com_filter(c
, d
, tmpf
, 0, tmpf
->len
, outf
);
549 de_err(c
, "%s", de_dfilter_get_errmsg(c
, &dres
));
558 static void asc2com_extract_uncompressed(deark
*c
, lctx
*d
)
562 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
563 dbuf_enable_wbuffer(outf
);
564 asc2com_filter(c
, d
, c
->infile
, d
->tpos
, d
->tlen
, outf
);
568 static void de_run_asc2com(deark
*c
, de_module_params
*mparams
)
571 struct asc2com_detection_data idd
;
573 de_zeromem(&idd
, sizeof(struct asc2com_detection_data
));
574 asc2com_identify(c
, &idd
, 0);
576 de_err(c
, "Not a known Asc2Com format");
579 de_dbg(c
, "format code: 0x%08x", idd
.fmtcode
);
580 de_dbg(c
, "compressed: %u", (UI
)idd
.is_compressed
);
582 d
= de_malloc(c
, sizeof(lctx
));
584 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
585 d
->tlen
= c
->infile
->len
- d
->tlen
;
586 // TODO: Can we read and use the original filename?
587 if(idd
.is_compressed
) {
588 asc2com_extract_compressed(c
, d
);
591 asc2com_extract_uncompressed(c
, d
);
598 static int de_identify_asc2com(deark
*c
)
600 struct asc2com_detection_data idd
;
602 if(c
->infile
->len
>65280) return 0;
603 de_zeromem(&idd
, sizeof(struct asc2com_detection_data
));
604 asc2com_identify(c
, &idd
, 1);
605 if(idd
.found
) return 72;
609 void de_module_asc2com(deark
*c
, struct deark_module_info
*mi
)
612 mi
->desc
= "Asc2Com executable text";
613 mi
->run_fn
= de_run_asc2com
;
614 mi
->identify_fn
= de_identify_asc2com
;
617 ///////////////////////////////////////////////////
618 // DOC2COM (Gerald DePyper)
620 struct doc2com_detection_data
{
625 static void doc2com_detect(deark
*c
, struct doc2com_detection_data
*idd
, UI idmode
)
629 dbuf_read(c
->infile
, buf
, 0, sizeof(buf
));
631 if(buf
[0]==0xbe && buf
[15]==0x72) {
632 if(!de_memcmp(&buf
[3], (const void*)"\xb9\x18\x00\xe8\xb2\x01\xe2\xfb\x3b\x36", 10)) {
633 idd
->fmtcode
= 10; // old unversioned releases
637 else if(buf
[0]==0xfc && buf
[1]==0xbe && buf
[16]==0x72) {
638 if(!de_memcmp(&buf
[4], (const void*)"\xb9\x18\x00\xe8\x2f\x02\xe2\xfb\x3b\x36", 10)) {
639 idd
->fmtcode
= 20; // v1.2
643 else if(buf
[0]==0xfc && buf
[5]==0x49) {
644 // Expecting all v1.3+ files to start with:
645 // fc ?? ?? ?? ?? 49 8b 36 ?? ?? 8b fe ac 32 04 aa e2 fa ac 34 ff aa ...
647 // fc 8b 0e if encrypted
648 // fc eb 13 if not encrypted
649 if(!de_memcmp(&buf
[10],
650 (const void*)"\x8b\xfe\xac\x32\x04\xaa\xe2\xfa\xac\x34\xff\xaa", 12))
652 idd
->fmtcode
= 30; // v1.3+
658 static void doc2com_analyze(deark
*c
, lctx
*d
)
660 i64 pos_a
, pos_b
, pos_c
, pos_d
;
667 if(de_getbyte(1) != 0xeb) {
675 else if(d
->fmtcode
==20) {
678 else if(d
->fmtcode
==30) {
679 pos_d
= de_getu16le(8);
680 pos_of_tpos
= pos_d
- 0x100;
688 de_dbg(c
, "pos of tpos: %"I64_FMT
, pos_of_tpos
);
689 pos_a
= de_getu16le(pos_of_tpos
);
690 d
->tpos
= pos_a
- 0x100;
691 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
693 if(d
->fmtcode
==10 || d
->fmtcode
==20) {
695 pos_b
= de_getu16le(25);
698 pos_b
= de_getu16le(24);
700 pos_of_endpos
= pos_b
- 0x100;
701 de_dbg(c
, "pos of endpos: %"I64_FMT
, pos_of_endpos
);
702 pos_c
= de_getu16le(pos_of_endpos
);
703 endpos
= pos_c
- 0x100;
704 de_dbg(c
, "endpos: %"I64_FMT
, endpos
);
705 d
->tlen
= endpos
- d
->tpos
;
708 pos_b
= de_getu16le(3);
709 pos_of_tlen
= pos_b
- 0x100;
710 de_dbg(c
, "pos of tlen: %"I64_FMT
, pos_of_tlen
);
711 d
->tlen
= de_getu16le(pos_of_tlen
);
714 de_dbg(c
, "tlen: %"I64_FMT
, d
->tlen
);
715 de_dbg(c
, "encrypted: %u", (UI
)d
->is_encrypted
);
720 static void doc2com_output(deark
*c
, lctx
*d
)
724 if(d
->tlen
<0 || d
->tpos
<0 || d
->tpos
+d
->tlen
>c
->infile
->len
) {
730 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
731 if(d
->is_encrypted
) {
737 dbuf_enable_wbuffer(outf
);
739 for(i
=0; i
<d
->tlen
; i
++) {
743 this_byte
= next_byte
;
746 this_byte
= de_getbyte(d
->tpos
+i
);
751 next_byte
= de_getbyte(d
->tpos
+i
+1);
757 b
= this_byte
^ next_byte
;
758 dbuf_writebyte(outf
, b
);
762 dbuf_copy(c
->infile
, d
->tpos
, d
->tlen
, outf
);
769 static void de_run_doc2com(deark
*c
, de_module_params
*mparams
)
772 struct doc2com_detection_data idd
;
774 d
= de_malloc(c
, sizeof(lctx
));
775 de_zeromem(&idd
, sizeof(struct doc2com_detection_data
));
776 doc2com_detect(c
, &idd
, 0);
781 d
->fmtcode
= idd
.fmtcode
;
782 de_dbg(c
, "fmt code: %u", d
->fmtcode
);
783 doc2com_analyze(c
, d
);
784 if(d
->errflag
) goto done
;
785 doc2com_output(c
, d
);
790 de_err(c
, "Not a DOC2COM file, or unsupported version");
796 static int de_identify_doc2com(deark
*c
)
798 struct doc2com_detection_data idd
;
801 if(c
->infile
->len
>65280) return 0;
803 if(b
!=0xbe && b
!=0xfc) return 0;
805 de_zeromem(&idd
, sizeof(struct doc2com_detection_data
));
806 doc2com_detect(c
, &idd
, 1);
807 if(idd
.found
) return 73;
811 void de_module_doc2com(deark
*c
, struct deark_module_info
*mi
)
814 mi
->desc
= "DOC2COM executable text (G. DePyper)";
815 mi
->run_fn
= de_run_doc2com
;
816 mi
->identify_fn
= de_identify_doc2com
;
819 ///////////////////////////////////////////////////
820 // DOC2COM (Dan K. Nelson)
822 static void de_run_doc2com_dkn(deark
*c
, de_module_params
*mparams
)
830 d
= de_malloc(c
, sizeof(lctx
));
831 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
832 d
->opt_encconv
= (u8
)de_get_ext_option_bool(c
, "text:encconv", 1);
833 if(d
->input_encoding
==DE_ENCODING_ASCII
) {
839 d
->chartypes
[10] = 1;
840 d
->chartypes
[13] = 1;
841 d
->chartypes
[27] = 1;
843 n
= (UI
)de_getu16le(1);
855 if(d
->fmtcode
==0) goto done
;
856 if(de_getbyte(pos_of_tpos
-1)!=0xbf) goto done
;
857 de_dbg(c
, "fmt code: %u", d
->fmtcode
);
858 d
->tpos
= de_getu16le(pos_of_tpos
);
860 if(d
->fmtcode
==2) d
->tpos
--;
861 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
863 b1
= de_getbyte(pos_of_tlen
);
864 b2
= de_getbyte(pos_of_tlen
+2);
865 d
->tlen
= ((UI
)b1
<<8) | b2
;
866 de_dbg(c
, "tlen: %"I64_FMT
, d
->tlen
);
868 exectext_extract_default(c
, d
);
873 de_err(c
, "Not a DOC2COM file, or unsupported version");
879 static int de_identify_doc2com_dkn(deark
*c
)
884 if(c
->infile
->len
>65280) return 0;
885 n
= (UI
)de_getu32be(0);
889 else if(n
==0xe9070100U
) {
896 if(dbuf_memcmp(c
->infile
, pos
, (const void*)"Press Home P", 14)) {
902 static void de_help_doc2com_dkn(deark
*c
)
904 print_encconv_option(c
);
907 void de_module_doc2com_dkn(deark
*c
, struct deark_module_info
*mi
)
909 mi
->id
= "doc2com_dkn";
910 mi
->desc
= "DOC2COM executable text (D. Nelson)";
911 mi
->run_fn
= de_run_doc2com_dkn
;
912 mi
->identify_fn
= de_identify_doc2com_dkn
;
913 mi
->help_fn
= de_help_doc2com_dkn
;
916 ///////////////////////////////////////////////////
917 // GTXT / MakeScroll (Eric Gans)
919 // This format can reasonably be converted to plain text.
920 static void gtxt_convert_to_text(deark
*c
, lctx
*d
, dbuf
*outf
, u8 to_utf8
)
922 struct de_encconv_state es
;
925 i64 endpos
= d
->tpos
+ d
->tlen
;
928 de_encconv_init(&es
, DE_EXTENC_MAKE(d
->input_encoding
, DE_ENCSUBTYPE_PRINTABLE
));
929 if(to_utf8
&& c
->write_bom
) {
930 dbuf_write_uchar_as_utf8(outf
, 0xfeff);
934 while(pos
< endpos
) {
937 x_raw
= de_getbyte_p(&pos
);
941 x_mod
= x_raw
& esc_mask
;
944 else if(x_raw
=='%') {
949 else if(x_raw
=='^') {
955 // GTXT ignores the high bit, except in '%' escapes.
963 u
= 0x240c; // Page break -> SYMBOL FOR FORM FEED, I guess.
965 else if(x_mod
<32 && d
->chartypes
[(UI
)x_mod
]!=0) {
969 u
= de_char_to_unicode_ex((i32
)x_mod
, &es
);
971 dbuf_write_uchar_as_utf8(outf
, u
);
977 b
= 0x0c; // Page break -> form feed, I guess.
982 dbuf_writebyte(outf
, b
);
987 static void de_run_gtxt(deark
*c
, de_module_params
*mparams
)
994 d
= de_malloc(c
, sizeof(lctx
));
995 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
996 d
->opt_encconv
= (u8
)de_get_ext_option_bool(c
, "text:encconv", 1);
997 if(d
->input_encoding
==DE_ENCODING_ASCII
) {
1000 d
->opt_fmtconv
= (u8
)de_get_ext_option_bool(c
, "text:fmtconv", 1);
1002 d
->chartypes
[7] = 1;
1003 d
->chartypes
[8] = 1;
1004 d
->chartypes
[9] = 1;
1005 d
->chartypes
[10] = 1;
1006 d
->chartypes
[13] = 1;
1007 d
->chartypes
[27] = 1;
1010 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
1012 endpos
= c
->infile
->len
;
1013 b1
= de_getbyte(endpos
-2);
1014 b2
= de_getbyte(endpos
-1);
1023 d
->tlen
= endpos
- d
->tpos
;
1024 de_dbg(c
, "tlen: %"I64_FMT
, d
->tlen
);
1025 exectext_check_tpos(c
, d
);
1026 if(d
->errflag
) goto done
;
1028 outf
= dbuf_create_output_file(c
, "txt", NULL
, 0);
1029 dbuf_enable_wbuffer(outf
);
1031 if(d
->opt_fmtconv
==0) {
1032 dbuf_copy(c
->infile
, d
->tpos
, d
->tlen
, outf
);
1034 else if(d
->opt_fmtconv
&& d
->opt_encconv
==0) {
1035 gtxt_convert_to_text(c
, d
, outf
, 0);
1037 else { // fmtconv==1 & encconv==1
1038 gtxt_convert_to_text(c
, d
, outf
, 1);
1043 if(d
->need_errmsg
) {
1044 de_err(c
, "Not a GTXT file, or unsupported version");
1051 static int de_identify_gtxt(deark
*c
)
1053 if(c
->infile
->len
>65280) return 0;
1054 if(de_getbyte(0) != 0xbb) return 0;
1056 if(dbuf_memcmp(c
->infile
, 1, (const void*)"\xbc\x01\xb4\x02\xb1\x00\x8a", 7)) {
1059 if(dbuf_memcmp(c
->infile
, 95, (const void*)"\x73\x01\xc3\x2c\x40\xc3\xcd\x20", 8)) {
1065 static void de_help_gtxt(deark
*c
)
1067 de_msg(c
, "-opt text:fmtconv=0 : Extract source code");
1068 print_encconv_option(c
);
1071 void de_module_gtxt(deark
*c
, struct deark_module_info
*mi
)
1074 mi
->desc
= "GTXT (E. Gans)";
1075 mi
->run_fn
= de_run_gtxt
;
1076 mi
->identify_fn
= de_identify_gtxt
;
1077 mi
->help_fn
= de_help_gtxt
;
1080 ///////////////////////////////////////////////////
1081 // READMAKE (by Bruce Guthrie and Wayne Software)
1083 struct readmake_ctx
{
1085 struct fmtutil_exe_info
*ei
;
1086 struct fmtutil_specialexe_detection_data edd
;
1089 // On apparent success, sets d->tpos to nonzero.
1090 static void readmake_find_text(deark
*c
, struct readmake_ctx
*rmctx
, lctx
*d
)
1095 pos
= rmctx
->ei
->end_of_dos_code
;
1096 n
= de_getu32le(pos
);
1099 n
= de_getu32le(pos
);
1101 // Later files have an extra field (expected value 4) or segment
1102 // (expected length 4). The segment after that is expected to
1103 // be larger. We use that to tell the difference.
1105 n
= de_getu32le(pos
);
1114 de_dbg(c
, "tpos: %"I64_FMT
, d
->tpos
);
1116 // TODO?: There might be a better way to figure out the length, but for
1117 // pristine files we can use the end of file.
1118 d
->tlen
= c
->infile
->len
- d
->tpos
;
1124 static void de_run_readmake(deark
*c
, de_module_params
*mparams
)
1126 struct readmake_ctx
*rmctx
= NULL
;
1129 d
= de_malloc(c
, sizeof(lctx
));
1131 rmctx
= de_malloc(c
, sizeof(struct readmake_ctx
));
1132 rmctx
->msgpfx
= "[READMAKE] ";
1133 rmctx
->ei
= de_malloc(c
, sizeof(struct fmtutil_exe_info
));
1135 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1136 d
->opt_encconv
= (u8
)de_get_ext_option_bool(c
, "text:encconv", 1);
1137 if(d
->input_encoding
==DE_ENCODING_ASCII
) {
1141 d
->allow_tlen_0
= 1;
1142 d
->chartypes
[8] = 1;
1143 d
->chartypes
[9] = 1;
1144 d
->chartypes
[10] = 1;
1145 d
->chartypes
[13] = 1;
1147 fmtutil_collect_exe_info(c
, c
->infile
, rmctx
->ei
);
1149 rmctx
->edd
.restrict_to_fmt
= DE_SPECIALEXEFMT_READMAKE
;
1150 fmtutil_detect_specialexe(c
, rmctx
->ei
, &rmctx
->edd
);
1151 if(rmctx
->edd
.detected_fmt
!=DE_SPECIALEXEFMT_READMAKE
) {
1156 readmake_find_text(c
, rmctx
, d
);
1157 if(d
->tpos
==0 || d
->errflag
) goto done
;
1159 exectext_extract_default(c
, d
);
1163 if(d
->need_errmsg
&& rmctx
) {
1164 de_err(c
, "%sBad or unsupported READMAKE file", rmctx
->msgpfx
);
1170 de_free(c
, rmctx
->ei
);
1176 static void de_help_readmake(deark
*c
)
1178 print_encconv_option(c
);
1181 void de_module_readmake(deark
*c
, struct deark_module_info
*mi
)
1183 mi
->id
= "readmake";
1184 mi
->desc
= "READMAKE executable text";
1185 mi
->run_fn
= de_run_readmake
;
1186 mi
->help_fn
= de_help_readmake
;