1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft EXE executable formats.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_exe
);
14 #define EXE_FMT_PE32 3
15 #define EXE_FMT_PE32PLUS 4
19 #define MAX_RESOURCES 10000
21 #define DE_RT_CURSOR 1
22 #define DE_RT_BITMAP 2
24 #define DE_RT_FONTDIR 7
26 #define DE_RT_GROUP_CURSOR 12
27 #define DE_RT_GROUP_ICON 14
28 #define DE_RT_ANICURSOR 21
29 #define DE_RT_ANIICON 22
30 #define DE_RT_MANIFEST 24
32 struct rsrc_type_info_struct
;
34 typedef struct localctx_struct
{
36 i64 ext_header_offset
;
38 i64 ne_rsrc_tbl_offset
;
39 unsigned int ne_align_shift
;
42 const struct rsrc_type_info_struct
*ne_rsrc_type_info
;
44 i64 lx_page_offset_shift
;
45 i64 lx_object_tbl_offset
;
46 i64 lx_object_tbl_entries
;
47 i64 lx_object_page_tbl_offset
;
48 i64 lx_rsrc_tbl_offset
;
49 i64 lx_rsrc_tbl_entries
;
50 i64 lx_data_pages_offset
;
53 i64 pe_sections_offset
;
54 i64 pe_number_of_sections
;
56 // File offset where the resources start. Some addresses are relative
60 i64 pe_cur_section_virt_addr
;
61 i64 pe_cur_section_data_offset
;
63 i64 pe_cur_name_offset
; // 0 if no name
66 const struct rsrc_type_info_struct
*cur_rsrc_type_info
;
71 struct rsrc_type_info_struct
;
73 typedef void (*rsrc_decoder_fn
)(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
);
75 struct rsrc_type_info_struct
{
79 rsrc_decoder_fn decoder_fn
;
82 static void do_certificate(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
88 // This is a WIN_CERTIFICATE structure.
89 if(pos1
<1 || len
<=8 || (pos1
+len
> c
->infile
->len
)) return;
91 de_dbg(c
, "certificate data at %d", (int)pos1
);
93 dlen
= de_getu32le(pos1
);
94 de_dbg(c
, "length: %d", (int)dlen
); // Includes the 8-byte header
95 revision
= de_getu16le(pos1
+4);
96 de_dbg(c
, "revision: 0x%04x", (unsigned int)revision
);
97 certtype
= de_getu16le(pos1
+6);
98 de_dbg(c
, "cert type: %d", (int)certtype
);
99 if(dlen
<=8 || dlen
> len
) goto done
;
100 if(c
->extract_level
>=2) {
102 if(certtype
==2) ext
="p7b";
104 dbuf_create_file_from_slice(c
->infile
, pos1
+8, dlen
-8, ext
, NULL
, 0);
107 de_dbg_indent(c
, -1);
111 static void do_opt_coff_data_dirs(deark
*c
, lctx
*d
, i64 pos
)
116 i64 pe_security_size
;
118 de_dbg(c
, "COFF/PE optional header (data directories) at %d", (int)pos
);
120 rsrc_tbl_rva
= de_getu32le(pos
+16);
121 // I don't know if rsrc_tbl_rva will be needed for anything. It seems redundant.
122 rsrc_tbl_size
= de_getu32le(pos
+20);
123 de_dbg(c
, "resource table RVA=0x%08x, size=%d", (unsigned int)rsrc_tbl_rva
,
126 pe_security_pos
= de_getu32le(pos
+32);
127 pe_security_size
= de_getu32le(pos
+36);
128 de_dbg(c
, "security pos=0x%08x, size=%d", (unsigned int)pe_security_pos
,
129 (int)pe_security_size
);
130 if(pe_security_pos
>0) {
132 do_certificate(c
, d
, pe_security_pos
, pe_security_size
);
133 de_dbg_indent(c
, -1);
136 de_dbg_indent(c
, -1);
139 static const char *get_subsys_desc(i64 subsystem
)
142 case 2: return " (Windows GUI)";
143 case 3: return " (console)";
148 static void do_opt_coff_nt_header(deark
*c
, lctx
*d
, i64 pos
)
153 de_dbg(c
, "COFF/PE optional header (Windows NT) at %d", (int)pos
);
156 x
= de_getu32le(pos
);
157 de_dbg(c
, "image base offset: 0x%08x", (unsigned int)x
);
159 subsystem
= de_getu16le(pos
+40);
160 de_dbg(c
, "subsystem: %d%s", (int)subsystem
, get_subsys_desc(subsystem
));
162 de_dbg_indent(c
, -1);
165 static void do_opt_coff_nt_header_64(deark
*c
, lctx
*d
, i64 pos
)
170 de_dbg(c
, "COFF/PE32+ optional header (Windows NT) at %d", (int)pos
);
173 base_offset
= de_geti64le(pos
);
174 de_dbg(c
, "image base offset: 0x%016" U64_FMTx
"", (u64
)base_offset
);
176 subsystem
= de_getu16le(pos
+44);
177 de_dbg(c
, "subsystem: %d%s", (int)subsystem
, get_subsys_desc(subsystem
));
179 de_dbg_indent(c
, -1);
182 static void do_opt_coff_header(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
185 i64 coff_opt_hdr_size
;
187 de_dbg(c
, "COFF/PE optional header at %d, size=%d", (int)pos
, (int)len
);
190 sig
= de_getu16le(pos
);
191 de_dbg(c
, "signature: 0x%04x", (int)sig
);
194 coff_opt_hdr_size
= 28;
196 coff_opt_hdr_size
= 24;
199 d
->fmt
= EXE_FMT_PE32
;
200 de_declare_fmt(c
, "PE32");
201 do_opt_coff_nt_header(c
, d
, pos
+coff_opt_hdr_size
);
202 do_opt_coff_data_dirs(c
, d
, pos
+coff_opt_hdr_size
+68);
204 else if(sig
==0x020b) {
205 d
->fmt
= EXE_FMT_PE32PLUS
;
206 de_declare_fmt(c
, "PE32+");
207 do_opt_coff_nt_header_64(c
, d
, pos
+coff_opt_hdr_size
);
208 do_opt_coff_data_dirs(c
, d
, pos
+coff_opt_hdr_size
+88);
210 else if(sig
==0x0107) {
211 de_declare_fmt(c
, "PE ROM image");
214 de_declare_fmt(c
, "Unknown PE file type");
217 de_dbg_indent(c
, -1);
220 static void do_pe_characteristics(deark
*c
, lctx
*d
, unsigned int v
)
222 de_ucstring
*s
= NULL
;
223 s
= ucstring_create(c
);
225 if(v
&0x0001) ucstring_append_flags_item(s
, "relocs_stripped");
226 if(v
&0x0002) ucstring_append_flags_item(s
, "valid_executable");
227 if(v
&0x0004) ucstring_append_flags_item(s
, "COFF_line_numbers_stripped");
228 if(v
&0x0008) ucstring_append_flags_item(s
, "COFF_local_stripped");
229 if(v
&0x0020) ucstring_append_flags_item(s
, "large_address_aware");
230 if(v
&0x0100) ucstring_append_flags_item(s
, "32-bit");
231 if(v
&0x0200) ucstring_append_flags_item(s
, "stripped");
232 if(v
&0x2000) ucstring_append_flags_item(s
, "DLL");
233 // TODO: There are more flags than this.
234 de_dbg(c
, "characteristics: 0x%04x (%s)", v
, ucstring_getpsz(s
));
238 static const char *get_machine_type_name(unsigned int n
)
241 struct mtn_struct
{ unsigned int id
; const char *name
; };
242 static const struct mtn_struct mtn_arr
[] = {
243 { 0x0000, "neutral" },
245 { 0x0166, "MIPS LE" },
246 { 0x0169, "MIPS LE WCE v2" },
247 { 0x01a2, "Hitachi SH3" },
248 { 0x01a3, "Hitachi SH3 DSP" },
249 { 0x01a6, "Hitachi SH4" },
250 { 0x01a8, "Hitachi SH5" },
251 { 0x01c0, "ARM LE" },
252 { 0x01c2, "ARM or Thumb" },
253 { 0x01c4, "ARMv7+ Thumb" },
254 { 0x01d3, "Matsushita AM33" },
255 { 0x01f0, "Power PC LE" },
256 { 0x01f1, "Power PC w/FP" },
257 { 0x0200, "Itanium" },
258 { 0x0266, "MIPS16" },
259 { 0x0366, "MIPS with FPU" },
260 { 0x0466, "MIPS16 with FPU" },
261 { 0x0ebc, "EFI byte code" },
263 { 0x9041, "Mitsubishi M32R LE" },
264 { 0xaa64, "ARMv8 64-bit" }
267 for(i
=0; i
<DE_ARRAYCOUNT(mtn_arr
); i
++) {
268 if(mtn_arr
[i
].id
== n
) {
269 return mtn_arr
[i
].name
;
275 static void do_Rich_segment(deark
*c
, lctx
*d
)
288 segment_end
= d
->ext_header_offset
;
289 if(segment_end
%8) segment_end
-= segment_end
%8;
290 if(segment_end
- segment_start
< 24) return; // No place for a Rich segment
292 // Try to find the "Rich" signature", which starts 8 bytes from the end of
294 // Based on limited research, the Rich signature usually starts 16, 24, or 32
295 // bytes before the "PE" signature.
297 for(p
= segment_end
-8; p
>= segment_start
+16; p
-= 8 ) {
298 n
= (u32
)de_getu32le(p
);
299 if(n
==0x68636952U
) { // "Rich"
305 return; // Rich segment not found
308 // Likely "Rich" signature found at sig_pos
310 key
= (u32
)de_getu32le(sig_pos
+4);
312 // Decode and verify the "start" signature
313 n
= (u32
)de_getu32le(segment_start
);
314 if((n
^ key
) != 0x536e6144U
) { // "Dans"
315 // False positive? Or maybe our detection logic isn't perfect?
319 de_dbg(c
, "\"Rich\" segment detected at %d, sig at %d, len=%d",
320 (int)segment_start
, (int)sig_pos
,
321 (int)(sig_pos
+8 - segment_start
));
325 pos
= segment_start
+ 16;
326 num_entries
= (sig_pos
- pos
)/8;
327 for(k
=0; k
<num_entries
; k
++) {
333 id_and_value
= (u32
)de_getu32le(pos
+8*k
);
334 use_count
= (u32
)de_getu32le(pos
+8*k
+4);
337 id
= (id_and_value
&0xffff0000U
)>>16;
338 value
= id_and_value
&0x0000ffffU
;
339 // TODO: Provide additional information, based on the 'type' and 'build'?
340 de_dbg(c
, "entry[%d]: type=%d, build=%d, use_count=%u",
341 (int)k
, (int)id
, (int)value
, (unsigned int)use_count
);
344 de_dbg_indent(c
, -1);
347 // 'pos' is the start of the 4-byte PE signature.
348 // Following it is a 20-byte COFF header.
349 static void do_pe_coff_header(deark
*c
, lctx
*d
, i64 pos
)
354 de_dbg(c
, "PE header at %d", (int)d
->ext_header_offset
);
357 // a.k.a. "Machine". TODO: Decode this.
358 arch
= (unsigned int)de_getu16le(pos
+4+0);
359 de_dbg(c
, "target architecture: 0x%04x (%s)", arch
,
360 get_machine_type_name(arch
));
362 d
->pe_number_of_sections
= de_getu16le(pos
+4+2);
363 de_dbg(c
, "number of sections: %d", (int)d
->pe_number_of_sections
);
365 d
->pe_opt_hdr_size
= de_getu16le(pos
+4+16);
366 de_dbg(c
, "optional header size: %d", (int)d
->pe_opt_hdr_size
);
368 n
= de_getu16le(pos
+4+18);
369 do_pe_characteristics(c
, d
, (unsigned int)n
);
371 if(d
->pe_opt_hdr_size
>0) {
372 do_opt_coff_header(c
, d
, pos
+4+20, d
->pe_opt_hdr_size
);
373 d
->pe_sections_offset
= pos
+4+20+d
->pe_opt_hdr_size
;
377 de_dbg_indent(c
, -1);
380 static void do_ne_program_flags(deark
*c
, lctx
*d
, u8 flags
)
382 de_ucstring
*s
= NULL
;
383 s
= ucstring_create(c
);
386 case 1: ucstring_append_flags_item(s
, "dgroup_type=single_shared"); break;
387 case 2: ucstring_append_flags_item(s
, "dgroup_type=multiple"); break;
388 case 3: ucstring_append_flags_item(s
, "dgroup_type=null"); break;
391 if(flags
&0x4) ucstring_append_flags_item(s
, "global init");
392 if(flags
&0x8) ucstring_append_flags_item(s
, "protected mode");
393 if(flags
&0x10) ucstring_append_flags_item(s
, "8086");
394 if(flags
&0x20) ucstring_append_flags_item(s
, "80286");
395 if(flags
&0x40) ucstring_append_flags_item(s
, "80386");
396 if(flags
&0x80) ucstring_append_flags_item(s
, "80x87");
398 de_dbg(c
, "program flags: 0x%02x (%s)", (unsigned int)flags
,
404 static void do_ne_app_flags(deark
*c
, lctx
*d
, u8 flags
)
406 de_ucstring
*s
= NULL
;
407 s
= ucstring_create(c
);
410 case 0x1: ucstring_append_flags_item(s
, "type=non-windowed"); break;
411 case 0x2: ucstring_append_flags_item(s
, "type=windowed-compatible"); break;
412 case 0x3: ucstring_append_flags_item(s
, "type=windowed"); break;
415 if(flags
&0x08) ucstring_append_flags_item(s
, "OS/2");
416 if(flags
&0x80) ucstring_append_flags_item(s
, "DLL");
418 de_dbg(c
, "application flags: 0x%02x (%s)", (unsigned int)flags
,
424 static void do_ne_ext_header(deark
*c
, lctx
*d
, i64 pos
)
430 de_dbg(c
, "NE extended header at %d", (int)pos
);
433 b1
= de_getbyte(pos
+2);
434 b2
= de_getbyte(pos
+3);
435 de_dbg(c
, "linker version: %d.%d", (int)b1
,(int)b2
);
437 // 4-5: Offset of entry table
438 // 6-7: length of entry table
439 // 8-11: file load CRC
441 do_ne_program_flags(c
, d
, de_getbyte(pos
+12));
443 do_ne_app_flags(c
, d
, de_getbyte(pos
+13));
445 d
->ne_rsrc_tbl_offset
= de_getu16le(pos
+36);
446 d
->ne_rsrc_tbl_offset
+= pos
;
447 de_dbg(c
, "offset of resource table: %d", (int)d
->ne_rsrc_tbl_offset
);
449 target_os
= de_getbyte(pos
+54);
451 case 1: desc
="OS/2"; break;
452 case 2: desc
="Windows"; break;
453 case 3: desc
="European MS-DOS 4.x"; break;
454 case 4: desc
="Windows 386"; break;
455 case 5: desc
="Borland Operating System Services"; break;
458 de_dbg(c
, "target OS: %d (%s)", (int)target_os
, desc
);
460 de_dbg_indent(c
, -1);
463 static void do_lx_or_le_ext_header(deark
*c
, lctx
*d
, i64 pos
)
467 de_dbg(c
, "%s header at %d", d
->fmt
==EXE_FMT_LE
?"LE":"LX", (int)pos
);
468 x1
= (u8
)de_getbyte(pos
+2);
469 x2
= (u8
)de_getbyte(pos
+3);
470 de_dbg(c
, "byte order, word order: %d, %d", (int)x1
, (int)x2
);
472 de_err(c
, "Unsupported byte order.");
476 if(d
->fmt
==EXE_FMT_LE
) {
477 x1
= de_getu32le(pos
+0x2c);
478 de_dbg(c
, "bytes on last page: %d", (int)x1
);
481 d
->lx_page_offset_shift
= de_getu32le(pos
+0x2c);
482 de_dbg(c
, "page offset shift: %d", (int)d
->lx_page_offset_shift
);
485 x1
= de_getu32le(pos
+0x40);
486 d
->lx_object_tbl_offset
= pos
+ x1
;
487 d
->lx_object_tbl_entries
= de_getu32le(pos
+0x44);
488 de_dbg(c
, "object table offset=%d, entries=%d", (int)d
->lx_object_tbl_offset
, (int)d
->lx_object_tbl_entries
);
490 x1
= de_getu32le(pos
+0x48);
491 d
->lx_object_page_tbl_offset
= pos
+ x1
;
492 de_dbg(c
, "object page table offset=%d", (int)d
->lx_object_page_tbl_offset
);
494 x1
= de_getu32le(pos
+0x50);
495 d
->lx_rsrc_tbl_offset
= pos
+ x1
;
496 d
->lx_rsrc_tbl_entries
= de_getu32le(pos
+0x54);
497 de_dbg(c
, "resource table offset=%d entries=%d", (int)d
->lx_rsrc_tbl_offset
, (int)d
->lx_rsrc_tbl_entries
);
499 d
->lx_data_pages_offset
= de_getu32le(pos
+0x80);
500 de_dbg(c
, "data pages offset=%d", (int)d
->lx_data_pages_offset
);
503 static void do_ext_header(deark
*c
, lctx
*d
)
507 if(d
->ext_header_offset
== 0 || d
->ext_header_offset
>= c
->infile
->len
) {
508 // Give up if ext_header_offset is obviously bad.
512 de_read(buf
, d
->ext_header_offset
, 4);
513 if(!de_memcmp(buf
, "PE\0\0", 4)) {
514 do_Rich_segment(c
, d
);
515 do_pe_coff_header(c
, d
, d
->ext_header_offset
);
516 // If do_pe_coff_header didn't figure out the format...
517 de_declare_fmt(c
, "PE");
519 else if(!de_memcmp(buf
, "NE", 2)) {
520 // TODO: Do "Rich" segments ever exist in files that are not PE files?
521 de_declare_fmt(c
, "NE");
523 do_ne_ext_header(c
, d
, d
->ext_header_offset
);
525 else if(!de_memcmp(buf
, "LX", 2)) {
526 de_declare_fmt(c
, "LX Linear Executable");
528 do_lx_or_le_ext_header(c
, d
, d
->ext_header_offset
);
530 else if(!de_memcmp(buf
, "LE", 2)) {
531 de_declare_fmt(c
, "LE Linear Executable");
533 do_lx_or_le_ext_header(c
, d
, d
->ext_header_offset
);
537 // If we still don't know the format...
538 de_declare_fmt(c
, "Unknown EXE format (maybe MS-DOS)");
541 static void do_fileheader(deark
*c
, lctx
*d
)
543 i64 reloc_tbl_offset
;
545 reloc_tbl_offset
= de_getu16le(24);
546 de_dbg(c
, "relocation table offset: %d", (int)reloc_tbl_offset
);
548 if(reloc_tbl_offset
>=28 && reloc_tbl_offset
<64) {
549 de_declare_fmt(c
, "MS-DOS EXE");
550 d
->fmt
= EXE_FMT_DOS
;
553 d
->ext_header_offset
= de_getu32le(60);
554 de_dbg(c
, "extended header offset: %d", (int)d
->ext_header_offset
);
559 static void do_decode_ddb(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, de_finfo
*fi
)
561 de_module_params
*mparams
= NULL
;
563 de_dbg(c
, "BITMAP16 at %"I64_FMT
, pos1
);
565 mparams
= de_malloc(c
, sizeof(de_module_params
));
566 mparams
->in_params
.fi
= fi
;
567 de_run_module_by_id_on_slice(c
, "ddb", mparams
, c
->infile
, pos1
, len
);
568 de_dbg_indent(c
, -1);
572 // Extract a raw DIB, and write it to a file as a BMP.
573 static void do_extract_BITMAP(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
577 if((d
->fmt
==EXE_FMT_NE
) && (de_getbyte(pos
)==0x02)) {
578 do_decode_ddb(c
, d
, pos
, len
, fi
);
583 de_run_module_by_id_on_slice2(c
, "dib", "X", c
->infile
, pos
, len
);
584 de_dbg_indent(c
, -1);
587 static void do_extract_ico_cur(deark
*c
, lctx
*d
, i64 pos
, i64 len
,
588 int is_cur
, i64 hotspot_x
, i64 hotspot_y
, de_finfo
*fi
)
593 struct de_bmpinfo bi
;
595 // I guess we have to manufacture an ICO/CUR header?
596 // There's usually a GROUP_ICON resource that seems to contain (most of) an
597 // ICO header, but I don't know exactly how it's connected to the icon image(s).
599 if(!fmtutil_get_bmpinfo(c
, c
->infile
, &bi
, pos
, len
, DE_BMPINFO_ICO_FORMAT
)) {
600 de_err(c
, "Invalid bitmap");
604 if(bi
.file_format
==DE_BMPINFO_FMT_PNG
) {
605 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "png", fi
, 0);
609 f
= dbuf_create_output_file(c
, is_cur
?"cur":"ico", fi
, 0);
611 // Write the 6-byte file header.
612 dbuf_writeu16le(f
, 0); // Reserved
613 dbuf_writeu16le(f
, is_cur
?2:1); // Resource ID
614 dbuf_writeu16le(f
, 1); // Number of icons/cursors
620 ncolors
= bi
.num_colors
;
621 if(ncolors
>255) ncolors
= 0;
623 if(bi
.total_size
< len
) {
624 // Strip off useless padding at the end of the image.
628 // Write the 16-byte index entry for the one icon/cursor.
629 dbuf_writebyte(f
, (u8
)w
);
630 dbuf_writebyte(f
, (u8
)h
);
631 dbuf_writebyte(f
, (u8
)ncolors
);
633 dbuf_writebyte(f
, 0);
634 dbuf_writeu16le(f
, hotspot_x
);
635 dbuf_writeu16le(f
, hotspot_y
);
638 dbuf_write_zeroes(f
, 5);
640 dbuf_writeu32le(f
, len
); // Icon/cursor size
641 dbuf_writeu32le(f
, 6+16); // Icon/cursor file offset
643 // Write the non-manufactured part of the file.
644 dbuf_copy(c
->infile
, pos
, len
, f
);
648 static void do_extract_CURSOR(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
650 unsigned int firstword
;
651 i64 hotspot_x
, hotspot_y
;
654 firstword
= (unsigned int)de_getu16le(pos
);
656 // For Win3 icons, the first word is the x hotspot.
657 // For Win1 icons, it is one of the type codes below.
658 if(d
->fmt
==EXE_FMT_NE
&& (firstword
==0x0003 || firstword
==0x0103 ||
661 unsigned int fourthword
;
662 // For Win3 icons, the 4th word is the high word of the
663 // bitmap-info-header-size (definitely 0).
664 // For Win1 icons, it is the width (definitely not 0).
665 fourthword
= (unsigned int)de_getu16le(pos
+6);
667 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "win1.cur", fi
, 0);
672 hotspot_x
= (i64
)firstword
;
673 hotspot_y
= de_getu16le(pos
+2);
674 de_dbg(c
, "hotspot: %d,%d", (int)hotspot_x
, (int)hotspot_y
);
675 do_extract_ico_cur(c
, d
, pos
+4, len
-4, 1, hotspot_x
, hotspot_y
, fi
);
678 static void do_extract_ICON(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
680 if(d
->fmt
==EXE_FMT_NE
&& len
>14) {
681 unsigned int firstword
;
683 firstword
= (unsigned int)de_getu16le(pos
);
684 // For Win3 icons, the first word is the low word of bitmap-info-header-size
685 // (usually 40, definitely not one of the Win1 type codes).
686 // For Win1 icons, it is one of the type codes below.
687 if(firstword
==0x0001 || firstword
==0x0101 || firstword
==0x0201) {
688 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "win1.ico", fi
, 0);
693 do_extract_ico_cur(c
, d
, pos
, len
, 0, 0, 0, fi
);
696 // Try to get the face name and 'points' from a font resource. If successful,
697 // set the filename of the 'fi' object accordingly.
698 // This code is somewhat duplicated in fnt.c, but it's not worth consolidating.
699 static void get_font_facename(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
701 unsigned int fnt_version
;
702 unsigned int dfPoints
;
704 de_ucstring
*s
= NULL
;
707 if(len
<109) goto done
;
708 fnt_version
= (unsigned int)de_getu16le(pos
);
709 if(fnt_version
< 0x0200) goto done
;
710 dfPoints
= (unsigned int)de_getu16le(pos
+68);
711 dfFace
= de_getu32le(pos
+105);
712 if(dfFace
>=len
) goto done
;
713 s
= ucstring_create(c
);
714 dbuf_read_to_ucstring_n(c
->infile
, pos
+dfFace
, 64, len
-dfFace
, s
,
715 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
716 if(s
->len
<1) goto done
;
717 ucstring_printf(s
, DE_ENCODING_LATIN1
, "-%u", dfPoints
);
718 de_finfo_set_name_from_ucstring(c
, fi
, s
, 0);
724 static void do_extract_FONT(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
729 // The "file size" is stored at offset 2. Respect it if possible.
730 fntlen
= de_getu32le(pos
+2);
731 if(fntlen
<6 || fntlen
>len
) {
735 get_font_facename(c
, d
, pos
, fntlen
, fi
);
737 dbuf_create_file_from_slice(c
->infile
, pos
, fntlen
, "fnt", fi
, 0);
740 static void do_extract_MANIFEST(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_finfo
*fi
)
742 if(c
->extract_level
>=2) {
743 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "manifest", fi
, DE_CREATEFLAG_IS_AUX
);
747 static const struct rsrc_type_info_struct rsrc_type_info_arr
[] = {
748 { DE_RT_CURSOR
, 0, "RT_CURSOR", do_extract_CURSOR
},
749 { DE_RT_BITMAP
, 0, "RT_BITMAP", do_extract_BITMAP
},
750 { DE_RT_ICON
, 0, "RT_ICON", do_extract_ICON
},
751 { 4, 0, "RT_MENU", NULL
},
752 { 5, 0, "RT_DIALOG", NULL
},
753 { 6, 0, "RT_STRING", NULL
},
754 { DE_RT_FONTDIR
, 0, "RT_FONTDIR", NULL
},
755 { DE_RT_FONT
, 0, "RT_FONT", do_extract_FONT
},
756 { 9, 0, "RT_ACCELERATOR", NULL
},
757 { 10, 0, "RT_RCDATA", NULL
},
758 { 11, 0, "RT_MESSAGETABLE", NULL
},
759 { DE_RT_GROUP_CURSOR
, 0, "RT_GROUP_CURSOR", NULL
},
760 { DE_RT_GROUP_ICON
, 0, "RT_GROUP_ICON", NULL
},
761 { 16, 0, "RT_VERSION", NULL
},
762 { DE_RT_ANICURSOR
, 0, "RT_ANICURSOR", NULL
},
763 { DE_RT_ANIICON
, 0, "RT_ANIICON", NULL
},
764 { DE_RT_MANIFEST
, 0, "RT_MANIFEST", do_extract_MANIFEST
}
767 static const struct rsrc_type_info_struct
*get_rsrc_type_info(u32 id
)
771 for(i
=0; i
<DE_ARRAYCOUNT(rsrc_type_info_arr
); i
++) {
772 if(id
== rsrc_type_info_arr
[i
].id
) {
773 return &rsrc_type_info_arr
[i
];
779 static int ne_pe_resource_type_is_supported(deark
*c
, lctx
*d
, u32 type_id
)
792 static void do_ne_pe_extract_resource(deark
*c
, lctx
*d
,
793 u32 type_id
, const struct rsrc_type_info_struct
*rsrci
,
794 i64 pos
, i64 len
, de_finfo
*fi
)
796 if(len
<1 || len
>DE_MAX_SANE_OBJECT_SIZE
) return;
798 if(rsrci
&& rsrci
->decoder_fn
) {
799 rsrci
->decoder_fn(c
, d
, pos
, len
, fi
);
804 static void de_finfo_set_name_from_pe_string(deark
*c
, de_finfo
*fi
, dbuf
*f
,
807 i64 nlen
; // in UTF-16 code units (2 bytes each)
808 de_ucstring
*fname
= NULL
;
810 if(!c
->filenames_from_file
) goto done
;
812 // The string length is stored in a two-byte prefix.
813 nlen
= de_getu16le(pos
);
814 if(nlen
<1) goto done
;
816 fname
= ucstring_create(c
);
817 dbuf_read_to_ucstring(c
->infile
, pos
+2, nlen
*2, fname
, 0, DE_ENCODING_UTF16LE
);
818 de_finfo_set_name_from_ucstring(c
, fi
, fname
, 0);
821 ucstring_destroy(fname
);
824 static void do_pe_resource_data_entry(deark
*c
, lctx
*d
, i64 rel_pos
)
828 i64 data_real_offset
;
831 const char *rsrcname
;
833 type_id
= d
->cur_rsrc_type
;
834 if(d
->cur_rsrc_type_info
&& d
->cur_rsrc_type_info
->name
)
835 rsrcname
= d
->cur_rsrc_type_info
->name
;
839 de_dbg(c
, "resource data entry at %d(%d) rsrc_type=%d (%s)",
840 (int)(d
->pe_cur_base_addr
+rel_pos
), (int)rel_pos
, (int)type_id
, rsrcname
);
843 data_virt_addr
= de_getu32le(d
->pe_cur_base_addr
+rel_pos
);
844 data_size
= de_getu32le(d
->pe_cur_base_addr
+rel_pos
+4);
845 de_dbg(c
, "resource data virt. addr=%d (0x%08x), size=%d",
846 (int)data_virt_addr
, (unsigned int)data_virt_addr
, (int)data_size
);
848 data_real_offset
= data_virt_addr
- d
->pe_cur_section_virt_addr
+ d
->pe_cur_section_data_offset
;
849 de_dbg(c
, "data offset in file: %d",
850 (int)data_real_offset
);
852 fi
= de_finfo_create(c
);
854 if(d
->pe_cur_name_offset
) {
855 de_finfo_set_name_from_pe_string(c
, fi
, c
->infile
, d
->pe_cur_name_offset
);
858 do_ne_pe_extract_resource(c
, d
, type_id
, d
->cur_rsrc_type_info
, data_real_offset
, data_size
, fi
);
860 de_finfo_destroy(c
, fi
);
861 de_dbg_indent(c
, -1);
864 static void do_pe_resource_dir_table(deark
*c
, lctx
*d
, i64 rel_pos
, int level
);
866 static void do_pe_resource_node(deark
*c
, lctx
*d
, i64 rel_pos
, int level
)
870 int has_name
, is_branch_node
;
873 orig_indent
= c
->dbg_indent_amount
;
875 d
->rsrc_item_count
++;
876 if(d
->rsrc_item_count
>MAX_RESOURCES
) {
877 de_err(c
, "Too many resources.");
884 name_or_id
= (u32
)de_getu32le(d
->pe_cur_base_addr
+rel_pos
);
885 if(name_or_id
& 0x80000000U
) {
887 name_or_id
-= 0x80000000U
;
889 next_offset
= de_getu32le(d
->pe_cur_base_addr
+rel_pos
+4);
890 if(next_offset
& 0x80000000U
) {
892 next_offset
-= 0x80000000U
;
896 d
->cur_rsrc_type
= name_or_id
;
897 d
->cur_rsrc_type_info
= get_rsrc_type_info((u32
)d
->cur_rsrc_type
);
900 de_dbg(c
, "level %d node at %d(%d) id=%d next-offset=%d is-named=%d is-branch=%d",
901 level
, (int)(d
->pe_cur_base_addr
+rel_pos
), (int)rel_pos
,
902 (int)name_or_id
, (int)next_offset
, has_name
, is_branch_node
);
905 if(!ne_pe_resource_type_is_supported(c
, d
, d
->cur_rsrc_type
)) {
906 const char *rsrcname
;
907 if(d
->cur_rsrc_type_info
&& d
->cur_rsrc_type_info
->name
)
908 rsrcname
= d
->cur_rsrc_type_info
->name
;
912 // We don't support this type of resource, so don't go down this path.
913 de_dbg(c
, "resource type %d (%s) not supported", (int)d
->cur_rsrc_type
, rsrcname
);
917 // If a resource has a name (at level 2), keep track of it so we can
918 // use it in the filename.
921 d
->pe_cur_name_offset
= d
->pe_cur_section_data_offset
+ name_or_id
;
922 de_dbg(c
, "resource name at %d", (int)d
->pe_cur_name_offset
);
925 d
->pe_cur_name_offset
= 0;
929 d
->pe_cur_name_offset
= 0;
932 // If high bit is 1, we need to go deeper.
934 do_pe_resource_dir_table(c
, d
, next_offset
, level
+1);
937 do_pe_resource_data_entry(c
, d
, next_offset
);
941 c
->dbg_indent_amount
= orig_indent
;
944 static void do_pe_resource_dir_table(deark
*c
, lctx
*d
, i64 rel_pos
, int level
)
946 i64 named_node_count
;
947 i64 unnamed_node_count
;
951 // 16-byte "Resource node header" a.k.a "Resource directory table"
954 de_warn(c
, "Resource tree too deep");
958 de_dbg(c
, "resource directory table at %d(%d), level=%d",
959 (unsigned int)(d
->pe_cur_base_addr
+rel_pos
), (unsigned int)rel_pos
, level
);
961 named_node_count
= de_getu16le(d
->pe_cur_base_addr
+rel_pos
+12);
962 unnamed_node_count
= de_getu16le(d
->pe_cur_base_addr
+rel_pos
+14);
963 de_dbg(c
, "number of node entries: named=%d, unnamed=%d", (unsigned int)named_node_count
,
964 (unsigned int)unnamed_node_count
);
966 node_count
= named_node_count
+ unnamed_node_count
;
968 // An array of 8-byte "Resource node entries" follows the Resource node header.
969 for(i
=0; i
<node_count
; i
++) {
970 do_pe_resource_node(c
, d
, rel_pos
+16+8*i
, level
);
974 static void do_pe_resource_section(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
976 d
->pe_cur_base_addr
= pos
;
977 d
->rsrc_item_count
= 0;
978 do_pe_resource_dir_table(c
, d
, 0, 1);
981 static void do_pe_section_header(deark
*c
, lctx
*d
, i64 section_index
, i64 pos
)
983 i64 section_data_size
;
984 struct de_stringreaderdata
*srd
= NULL
;
986 de_dbg(c
, "section[%d] header at %d", (int)section_index
, (unsigned int)pos
);
989 // Section name: "An 8-byte, null-padded UTF-8 encoded string"
990 srd
= dbuf_read_string(c
->infile
, pos
, 8, 8, DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_UTF8
);
991 de_dbg(c
, "section name: \"%s\"", ucstring_getpsz(srd
->str
));
993 d
->pe_cur_section_virt_addr
= de_getu32le(pos
+12);
994 section_data_size
= de_getu32le(pos
+16);
995 d
->pe_cur_section_data_offset
= de_getu32le(pos
+20);
997 de_dbg(c
, "section virt. addr=%d (0x%08x)", (int)d
->pe_cur_section_virt_addr
, (unsigned int)d
->pe_cur_section_virt_addr
);
998 de_dbg(c
, "section data offset=%d, size=%d", (int)d
->pe_cur_section_data_offset
, (int)section_data_size
);
1000 if(!de_strcmp(srd
->sz
, ".rsrc")) {
1001 do_pe_resource_section(c
, d
, d
->pe_cur_section_data_offset
, section_data_size
);
1004 de_destroy_stringreaderdata(c
, srd
);
1005 de_dbg_indent(c
, -1);
1008 static void do_pe_section_table(deark
*c
, lctx
*d
)
1013 pos
= d
->pe_sections_offset
;
1014 de_dbg(c
, "section table at %d", (int)pos
);
1015 de_dbg_indent(c
, 1);
1016 for(i
=0; i
<d
->pe_number_of_sections
; i
++) {
1017 do_pe_section_header(c
, d
, i
, pos
+ 40*i
);
1019 de_dbg_indent(c
, -1);
1022 static void do_ne_one_nameinfo(deark
*c
, lctx
*d
, i64 npos
)
1030 de_finfo
*fi
= NULL
;
1031 int saved_indent_level
;
1033 de_dbg_indent_save(c
, &saved_indent_level
);
1034 rsrc_offset
= de_getu16le(npos
);
1035 if(d
->ne_align_shift
>0) rsrc_offset
<<= d
->ne_align_shift
;
1036 rsrc_size
= de_getu16le(npos
+2);
1037 if(d
->ne_align_shift
>0) rsrc_size
<<= d
->ne_align_shift
;
1039 de_dbg(c
, "NAMEINFO at %d, dpos=%d, dlen=%d", (int)npos
, (int)rsrc_offset
, (int)rsrc_size
);
1040 de_dbg_indent(c
, 1);
1044 x
= de_getu16le(npos
+6);
1051 rnNameOffset
= d
->ne_rsrc_tbl_offset
+ x
;
1055 de_dbg(c
, "id name offset: %d", (int)rnNameOffset
);
1058 de_dbg(c
, "id number: %d", (int)rnID
);
1061 if(!d
->ne_have_type
) goto done
;
1063 fi
= de_finfo_create(c
);
1066 // Names are prefixed with a single-byte length.
1067 x
= (i64
)de_getbyte(rnNameOffset
);
1069 de_ucstring
*rname
= NULL
;
1071 rname
= ucstring_create(c
);
1072 dbuf_read_to_ucstring(c
->infile
, rnNameOffset
+1, x
, rname
, 0, DE_ENCODING_ASCII
);
1073 de_dbg(c
, "resource name: \"%s\"", ucstring_getpsz(rname
));
1074 if(c
->filenames_from_file
)
1075 de_finfo_set_name_from_ucstring(c
, fi
, rname
, 0);
1076 ucstring_destroy(rname
);
1081 const char *rsrcname
;
1083 if(d
->ne_rsrc_type_info
&& d
->ne_rsrc_type_info
->name
)
1084 rsrcname
= d
->ne_rsrc_type_info
->name
;
1088 de_dbg(c
, "resource at %"I64_FMT
", len=%"I64_FMT
", type_id=%d (%s)", rsrc_offset
,
1089 rsrc_size
, (int)d
->ne_rsrc_type_id
, rsrcname
);
1090 de_dbg_indent(c
, 1);
1091 do_ne_pe_extract_resource(c
, d
, d
->ne_rsrc_type_id
, d
->ne_rsrc_type_info
,
1092 rsrc_offset
, rsrc_size
, fi
);
1093 de_dbg_indent(c
, -1);
1097 de_dbg_indent_restore(c
, saved_indent_level
);
1098 de_finfo_destroy(c
, fi
);
1101 static void do_ne_rsrc_tbl(deark
*c
, lctx
*d
)
1109 i64 tot_resources
= 0;
1110 int saved_indent_level
;
1112 de_dbg_indent_save(c
, &saved_indent_level
);
1113 pos
= d
->ne_rsrc_tbl_offset
;
1115 de_dbg(c
, "resource table at %d", (int)pos
);
1116 de_dbg_indent(c
, 1);
1118 d
->ne_align_shift
= (unsigned int)de_getu16le(pos
);
1119 de_dbg(c
, "rscAlignShift: %u", d
->ne_align_shift
);
1121 if(d
->ne_align_shift
>24) {
1122 de_err(c
, "Unreasonable rscAlignShift setting");
1128 x
= de_getu16le(pos
);
1130 // A "type_id" of 0 marks the end of the array
1131 de_dbg(c
, "end of TYPEINFO array found at %d", (int)pos
);
1134 de_dbg(c
, "TYPEINFO #%d at %d", (int)i
, (int)pos
);
1135 de_dbg_indent(c
, 1);
1138 d
->ne_rsrc_type_id
= (u32
)(x
-0x8000);
1139 d
->ne_rsrc_type_info
= get_rsrc_type_info(d
->ne_rsrc_type_id
);
1140 d
->ne_have_type
= 1;
1143 // x represents a relative offset to a name in rscResourceNames.
1144 // TODO: Could the name ever be a standard type (e.g. "ICON"), that
1145 // we ought to support?
1146 d
->ne_rsrc_type_id
= 0;
1147 d
->ne_rsrc_type_info
= NULL
;
1148 d
->ne_have_type
= 0;
1149 // name_offset = d->ne_rsrc_tbl_offset + x;
1152 rsrc_count
= de_getu16le(pos
+2);
1154 de_dbg(c
, "resource type=%d, count=%d", (int)d
->ne_rsrc_type_id
, (int)rsrc_count
);
1156 de_dbg(c
, "resource type=?, count=%d", (int)rsrc_count
);
1158 tot_resources
+= rsrc_count
;
1160 if(tot_resources
>MAX_RESOURCES
) {
1161 de_err(c
, "Too many resources, or invalid resource table.");
1165 // Read the array of NAMEINFO structures.
1166 // (NAMEINFO seems like a misnomer to me. It contains data, not names.)
1167 for(j
=0; j
<rsrc_count
; j
++) {
1168 npos
= pos
+8 + j
*12;
1169 do_ne_one_nameinfo(c
, d
, npos
);
1172 de_dbg_indent(c
, -1);
1173 pos
+= 8 + 12*rsrc_count
;
1177 de_dbg_indent(c
, -1);
1180 de_dbg_indent_restore(c
, saved_indent_level
);
1183 // Sniff the resource data, and return a suitable filename extension.
1184 // Or NULL, if unidentified.
1185 static const char *identify_lx_rsrc(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
1190 if(len
<16) return NULL
;
1191 de_read(buf
, pos
, 2);
1192 if(!de_memcmp(buf
, "BA", 2)) {
1193 // Bitmap Array container format. Read the real type.
1194 de_read(buf
, pos
+14, 2);
1198 if(!de_memcmp(buf
, "BM", 2)) {
1199 return is_ba
? "ba.bmp" : "bmp";
1201 if(!de_memcmp(buf
, "CI", 2) || !de_memcmp(buf
, "IC", 2)) {
1202 return is_ba
? "ba.os2.ico" : "os2.ico";
1204 if(!de_memcmp(buf
, "CP", 2) || !de_memcmp(buf
, "PT", 2)) {
1205 return is_ba
? "ba.ptr" : "ptr";
1210 // Extract a resource from an LX file, given the information from an Object Table
1212 static void do_lx_rsrc(deark
*c
, lctx
*d
,
1213 i64 obj_num
, i64 rsrc_offset
, i64 rsrc_size
, i64 rsrc_type
)
1217 i64 reloc_base_addr
;
1219 i64 page_table_index
;
1220 i64 page_table_entries
;
1221 i64 rsrc_offset_real
;
1222 i64 pg_data_offset_raw
;
1226 if(obj_num
<1 || obj_num
>d
->lx_object_tbl_entries
) {
1227 de_err(c
, "Invalid object number (%d).", (int)obj_num
);
1231 // Read the Object Table
1232 lpos
= d
->lx_object_tbl_offset
+ 24*(obj_num
-1);
1233 de_dbg(c
, "LX object table entry at %d", (int)lpos
);
1235 vsize
= de_getu32le(lpos
);
1236 reloc_base_addr
= de_getu32le(lpos
+4);
1237 flags
= de_getu32le(lpos
+8);
1238 page_table_index
= de_getu32le(lpos
+12);
1239 page_table_entries
= de_getu32le(lpos
+16);
1240 de_dbg(c
, "object #%d: vsize=%d raddr=%d flags=0x%x pti=%d pte=%d", (int)obj_num
,
1241 (int)vsize
, (int)reloc_base_addr
, (unsigned int)flags
, (int)page_table_index
,
1242 (int)page_table_entries
);
1244 if(page_table_index
<1) return;
1246 // Now read the Object Page table
1247 lpos
= d
->lx_object_page_tbl_offset
+ 8*(page_table_index
-1);
1248 de_dbg(c
, "LX page table entry at %d", (int)lpos
);
1250 pg_data_offset_raw
= de_getu32le(lpos
);
1251 //data_size = de_getu16le(lpos+4);
1253 rsrc_offset_real
= pg_data_offset_raw
;
1254 if(d
->lx_page_offset_shift
> 0 ) {
1255 rsrc_offset_real
<<= (unsigned int)d
->lx_page_offset_shift
;
1257 rsrc_offset_real
+= d
->lx_data_pages_offset
;
1258 rsrc_offset_real
+= rsrc_offset
;
1259 de_dbg(c
, "resource offset: %d", (int)rsrc_offset_real
);
1262 // TODO: Support other types of resources.
1263 case 1: // Icon or cursor (?)
1264 case 2: // Bitmap (?)
1265 ext
= identify_lx_rsrc(c
, d
, rsrc_offset_real
, rsrc_size
);
1267 // TODO: This assumes the resource is stored contiguously in the file, but
1268 // for all I know that isn't always the case.
1270 // Unlike in NE and PE format, it seems that image resources in LX files
1271 // include the BITMAPFILEHEADER. That makes it easy.
1272 dbuf_create_file_from_slice(c
->infile
, rsrc_offset_real
, rsrc_size
, ext
, NULL
, 0);
1277 static void do_lx_or_le_rsrc_tbl(deark
*c
, lctx
*d
)
1287 de_dbg(c
, "%s resource table at %d", d
->fmt
==EXE_FMT_LE
?"LE":"LX", (int)d
->lx_rsrc_tbl_offset
);
1288 if(d
->lx_rsrc_tbl_entries
>MAX_RESOURCES
) {
1289 de_err(c
, "Too many resources.");
1293 for(i
=0; i
<d
->lx_rsrc_tbl_entries
; i
++) {
1294 lpos
= d
->lx_rsrc_tbl_offset
+ 14*i
;
1296 type_id
= de_getu16le(lpos
);
1297 name_id
= de_getu16le(lpos
+2);
1298 rsrc_size
= de_getu32le(lpos
+4);
1299 rsrc_object
= de_getu16le(lpos
+8);
1300 rsrc_offset
= de_getu32le(lpos
+10);
1302 de_dbg(c
, "resource #%d: type=%d name=%d size=%d obj=%d offset=%d", (int)i
,
1303 (int)type_id
, (int)name_id
, (int)rsrc_size
, (int)rsrc_object
, (int)rsrc_offset
);
1305 de_dbg_indent(c
, 1);
1306 do_lx_rsrc(c
, d
, rsrc_object
, rsrc_offset
, rsrc_size
, type_id
);
1307 de_dbg_indent(c
, -1);
1311 static void de_run_exe(deark
*c
, de_module_params
*mparams
)
1316 d
= de_malloc(c
, sizeof(lctx
));
1318 do_fileheader(c
, d
);
1320 if((d
->fmt
==EXE_FMT_PE32
|| d
->fmt
==EXE_FMT_PE32PLUS
) && d
->pe_sections_offset
>0) {
1321 do_pe_section_table(c
, d
);
1323 else if(d
->fmt
==EXE_FMT_NE
&& d
->ne_rsrc_tbl_offset
>0) {
1324 do_ne_rsrc_tbl(c
, d
);
1326 else if((d
->fmt
==EXE_FMT_LX
|| d
->fmt
==EXE_FMT_LE
) && d
->lx_rsrc_tbl_offset
>0) {
1327 do_lx_or_le_rsrc_tbl(c
, d
);
1330 if(c
->detection_data
&& c
->detection_data
->zip_eocd_looked_for
) {
1331 // Note: It isn't necessarily possible to get here - It depends on the details
1332 // of how other modules' identify() functions work.
1333 zip_eocd_found
= (int)c
->detection_data
->zip_eocd_found
;
1336 i64 zip_eocd_pos
= 0;
1337 zip_eocd_found
= fmtutil_find_zip_eocd(c
, c
->infile
, &zip_eocd_pos
);
1339 if(zip_eocd_found
) {
1340 de_info(c
, "Note: This might be a self-extracting ZIP file (try \"-m zip\").");
1346 static int de_identify_exe(deark
*c
)
1351 if(buf
[0]=='M' && buf
[1]=='Z') return 80;
1355 void de_module_exe(deark
*c
, struct deark_module_info
*mi
)
1358 mi
->desc
= "Microsoft EXE executable (PE, NE, LX)";
1359 mi
->run_fn
= de_run_exe
;
1360 mi
->identify_fn
= de_identify_exe
;