1 # -*- coding: Latin-1 -*-
2 """pefile, Portable Executable reader module
5 All the PE file basic structures are available with their default names
6 as attributes of the instance returned.
8 Processed elements such as the import table are made available with lowercase
9 names, to differentiate them from the upper case basic structure names.
11 pefile has been tested against the limits of valid PE headers, that is, malware.
12 Lots of packed malware attempt to abuse the format way beyond its standard use.
13 To the best of my knowledge most of the abuses are handled gracefully.
15 Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>
19 For detailed copyright information see the file COPYING in
20 the root of the distribution archive.
23 __author__
= 'Ero Carrera'
24 __version__
= '1.2.9.1'
25 __contact__
= 'ero@dkbza.org'
37 sha1
, sha256
, sha512
, md5
= None, None, None, None
42 sha256
= hashlib
.sha256
43 sha512
= hashlib
.sha512
60 IMAGE_DOS_SIGNATURE
= 0x5A4D
61 IMAGE_OS2_SIGNATURE
= 0x454E
62 IMAGE_OS2_SIGNATURE_LE
= 0x454C
63 IMAGE_VXD_SIGNATURE
= 0x454C
64 IMAGE_NT_SIGNATURE
= 0x00004550
65 IMAGE_NUMBEROF_DIRECTORY_ENTRIES
= 16
66 IMAGE_ORDINAL_FLAG
= 0x80000000L
67 IMAGE_ORDINAL_FLAG64
= 0x8000000000000000L
68 OPTIONAL_HEADER_MAGIC_PE
= 0x10b
69 OPTIONAL_HEADER_MAGIC_PE_PLUS
= 0x20b
72 directory_entry_types
= [
73 ('IMAGE_DIRECTORY_ENTRY_EXPORT', 0),
74 ('IMAGE_DIRECTORY_ENTRY_IMPORT', 1),
75 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', 2),
76 ('IMAGE_DIRECTORY_ENTRY_EXCEPTION', 3),
77 ('IMAGE_DIRECTORY_ENTRY_SECURITY', 4),
78 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', 5),
79 ('IMAGE_DIRECTORY_ENTRY_DEBUG', 6),
80 ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT', 7),
81 ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR', 8),
82 ('IMAGE_DIRECTORY_ENTRY_TLS', 9),
83 ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG', 10),
84 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', 11),
85 ('IMAGE_DIRECTORY_ENTRY_IAT', 12),
86 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', 13),
87 ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
88 ('IMAGE_DIRECTORY_ENTRY_RESERVED', 15) ]
90 DIRECTORY_ENTRY
= dict([(e
[1], e
[0]) for e
in directory_entry_types
]+directory_entry_types
)
93 image_characteristics
= [
94 ('IMAGE_FILE_RELOCS_STRIPPED', 0x0001),
95 ('IMAGE_FILE_EXECUTABLE_IMAGE', 0x0002),
96 ('IMAGE_FILE_LINE_NUMS_STRIPPED', 0x0004),
97 ('IMAGE_FILE_LOCAL_SYMS_STRIPPED', 0x0008),
98 ('IMAGE_FILE_AGGRESIVE_WS_TRIM', 0x0010),
99 ('IMAGE_FILE_LARGE_ADDRESS_AWARE', 0x0020),
100 ('IMAGE_FILE_16BIT_MACHINE', 0x0040),
101 ('IMAGE_FILE_BYTES_REVERSED_LO', 0x0080),
102 ('IMAGE_FILE_32BIT_MACHINE', 0x0100),
103 ('IMAGE_FILE_DEBUG_STRIPPED', 0x0200),
104 ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', 0x0400),
105 ('IMAGE_FILE_NET_RUN_FROM_SWAP', 0x0800),
106 ('IMAGE_FILE_SYSTEM', 0x1000),
107 ('IMAGE_FILE_DLL', 0x2000),
108 ('IMAGE_FILE_UP_SYSTEM_ONLY', 0x4000),
109 ('IMAGE_FILE_BYTES_REVERSED_HI', 0x8000) ]
111 IMAGE_CHARACTERISTICS
= dict([(e
[1], e
[0]) for e
in
112 image_characteristics
]+image_characteristics
)
115 section_characteristics
= [
116 ('IMAGE_SCN_CNT_CODE', 0x00000020),
117 ('IMAGE_SCN_CNT_INITIALIZED_DATA', 0x00000040),
118 ('IMAGE_SCN_CNT_UNINITIALIZED_DATA', 0x00000080),
119 ('IMAGE_SCN_LNK_OTHER', 0x00000100),
120 ('IMAGE_SCN_LNK_INFO', 0x00000200),
121 ('IMAGE_SCN_LNK_REMOVE', 0x00000800),
122 ('IMAGE_SCN_LNK_COMDAT', 0x00001000),
123 ('IMAGE_SCN_MEM_FARDATA', 0x00008000),
124 ('IMAGE_SCN_MEM_PURGEABLE', 0x00020000),
125 ('IMAGE_SCN_MEM_16BIT', 0x00020000),
126 ('IMAGE_SCN_MEM_LOCKED', 0x00040000),
127 ('IMAGE_SCN_MEM_PRELOAD', 0x00080000),
128 ('IMAGE_SCN_ALIGN_1BYTES', 0x00100000),
129 ('IMAGE_SCN_ALIGN_2BYTES', 0x00200000),
130 ('IMAGE_SCN_ALIGN_4BYTES', 0x00300000),
131 ('IMAGE_SCN_ALIGN_8BYTES', 0x00400000),
132 ('IMAGE_SCN_ALIGN_16BYTES', 0x00500000),
133 ('IMAGE_SCN_ALIGN_32BYTES', 0x00600000),
134 ('IMAGE_SCN_ALIGN_64BYTES', 0x00700000),
135 ('IMAGE_SCN_ALIGN_128BYTES', 0x00800000),
136 ('IMAGE_SCN_ALIGN_256BYTES', 0x00900000),
137 ('IMAGE_SCN_ALIGN_512BYTES', 0x00A00000),
138 ('IMAGE_SCN_ALIGN_1024BYTES', 0x00B00000),
139 ('IMAGE_SCN_ALIGN_2048BYTES', 0x00C00000),
140 ('IMAGE_SCN_ALIGN_4096BYTES', 0x00D00000),
141 ('IMAGE_SCN_ALIGN_8192BYTES', 0x00E00000),
142 ('IMAGE_SCN_ALIGN_MASK', 0x00F00000),
143 ('IMAGE_SCN_LNK_NRELOC_OVFL', 0x01000000),
144 ('IMAGE_SCN_MEM_DISCARDABLE', 0x02000000),
145 ('IMAGE_SCN_MEM_NOT_CACHED', 0x04000000),
146 ('IMAGE_SCN_MEM_NOT_PAGED', 0x08000000),
147 ('IMAGE_SCN_MEM_SHARED', 0x10000000),
148 ('IMAGE_SCN_MEM_EXECUTE', 0x20000000),
149 ('IMAGE_SCN_MEM_READ', 0x40000000),
150 ('IMAGE_SCN_MEM_WRITE', 0x80000000L
) ]
152 SECTION_CHARACTERISTICS
= dict([(e
[1], e
[0]) for e
in
153 section_characteristics
]+section_characteristics
)
157 ('IMAGE_DEBUG_TYPE_UNKNOWN', 0),
158 ('IMAGE_DEBUG_TYPE_COFF', 1),
159 ('IMAGE_DEBUG_TYPE_CODEVIEW', 2),
160 ('IMAGE_DEBUG_TYPE_FPO', 3),
161 ('IMAGE_DEBUG_TYPE_MISC', 4),
162 ('IMAGE_DEBUG_TYPE_EXCEPTION', 5),
163 ('IMAGE_DEBUG_TYPE_FIXUP', 6),
164 ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC', 7),
165 ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC', 8),
166 ('IMAGE_DEBUG_TYPE_BORLAND', 9),
167 ('IMAGE_DEBUG_TYPE_RESERVED10', 10) ]
169 DEBUG_TYPE
= dict([(e
[1], e
[0]) for e
in debug_types
]+debug_types
)
173 ('IMAGE_SUBSYSTEM_UNKNOWN', 0),
174 ('IMAGE_SUBSYSTEM_NATIVE', 1),
175 ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
176 ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
177 ('IMAGE_SUBSYSTEM_OS2_CUI', 5),
178 ('IMAGE_SUBSYSTEM_POSIX_CUI', 7),
179 ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI', 9),
180 ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
181 ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
182 ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER', 12),
183 ('IMAGE_SUBSYSTEM_EFI_ROM', 13),
184 ('IMAGE_SUBSYSTEM_XBOX', 14)]
186 SUBSYSTEM_TYPE
= dict([(e
[1], e
[0]) for e
in subsystem_types
]+subsystem_types
)
190 ('IMAGE_FILE_MACHINE_UNKNOWN', 0),
191 ('IMAGE_FILE_MACHINE_AM33', 0x1d3),
192 ('IMAGE_FILE_MACHINE_AMD64', 0x8664),
193 ('IMAGE_FILE_MACHINE_ARM', 0x1c0),
194 ('IMAGE_FILE_MACHINE_EBC', 0xebc),
195 ('IMAGE_FILE_MACHINE_I386', 0x14c),
196 ('IMAGE_FILE_MACHINE_IA64', 0x200),
197 ('IMAGE_FILE_MACHINE_MR32', 0x9041),
198 ('IMAGE_FILE_MACHINE_MIPS16', 0x266),
199 ('IMAGE_FILE_MACHINE_MIPSFPU', 0x366),
200 ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
201 ('IMAGE_FILE_MACHINE_POWERPC', 0x1f0),
202 ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
203 ('IMAGE_FILE_MACHINE_R4000', 0x166),
204 ('IMAGE_FILE_MACHINE_SH3', 0x1a2),
205 ('IMAGE_FILE_MACHINE_SH3DSP', 0x1a3),
206 ('IMAGE_FILE_MACHINE_SH4', 0x1a6),
207 ('IMAGE_FILE_MACHINE_SH5', 0x1a8),
208 ('IMAGE_FILE_MACHINE_THUMB', 0x1c2),
209 ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
212 MACHINE_TYPE
= dict([(e
[1], e
[0]) for e
in machine_types
]+machine_types
)
216 ('IMAGE_REL_BASED_ABSOLUTE', 0),
217 ('IMAGE_REL_BASED_HIGH', 1),
218 ('IMAGE_REL_BASED_LOW', 2),
219 ('IMAGE_REL_BASED_HIGHLOW', 3),
220 ('IMAGE_REL_BASED_HIGHADJ', 4),
221 ('IMAGE_REL_BASED_MIPS_JMPADDR', 5),
222 ('IMAGE_REL_BASED_SECTION', 6),
223 ('IMAGE_REL_BASED_REL', 7),
224 ('IMAGE_REL_BASED_MIPS_JMPADDR16', 9),
225 ('IMAGE_REL_BASED_IA64_IMM64', 9),
226 ('IMAGE_REL_BASED_DIR64', 10),
227 ('IMAGE_REL_BASED_HIGH3ADJ', 11) ]
229 RELOCATION_TYPE
= dict([(e
[1], e
[0]) for e
in relocation_types
]+relocation_types
)
232 dll_characteristics
= [
233 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
234 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
235 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
236 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
237 ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE', 0x0040),
238 ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY', 0x0080),
239 ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT', 0x0100),
240 ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION', 0x0200),
241 ('IMAGE_DLL_CHARACTERISTICS_NO_SEH', 0x0400),
242 ('IMAGE_DLL_CHARACTERISTICS_NO_BIND', 0x0800),
243 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
244 ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER', 0x2000),
245 ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
247 DLL_CHARACTERISTICS
= dict([(e
[1], e
[0]) for e
in dll_characteristics
]+dll_characteristics
)
260 ('RT_ACCELERATOR', 9),
262 ('RT_MESSAGETABLE', 11),
263 ('RT_GROUP_CURSOR', 12),
264 ('RT_GROUP_ICON', 14),
266 ('RT_DLGINCLUDE', 17),
269 ('RT_ANICURSOR', 21),
272 ('RT_MANIFEST', 24) ]
274 RESOURCE_TYPE
= dict([(e
[1], e
[0]) for e
in resource_type
]+resource_type
)
277 # Language definitions
279 ('LANG_NEUTRAL', 0x00),
280 ('LANG_INVARIANT', 0x7f),
281 ('LANG_AFRIKAANS', 0x36),
282 ('LANG_ALBANIAN', 0x1c),
283 ('LANG_ARABIC', 0x01),
284 ('LANG_ARMENIAN', 0x2b),
285 ('LANG_ASSAMESE', 0x4d),
286 ('LANG_AZERI', 0x2c),
287 ('LANG_BASQUE', 0x2d),
288 ('LANG_BELARUSIAN', 0x23),
289 ('LANG_BENGALI', 0x45),
290 ('LANG_BULGARIAN', 0x02),
291 ('LANG_CATALAN', 0x03),
292 ('LANG_CHINESE', 0x04),
293 ('LANG_CROATIAN', 0x1a),
294 ('LANG_CZECH', 0x05),
295 ('LANG_DANISH', 0x06),
296 ('LANG_DIVEHI', 0x65),
297 ('LANG_DUTCH', 0x13),
298 ('LANG_ENGLISH', 0x09),
299 ('LANG_ESTONIAN', 0x25),
300 ('LANG_FAEROESE', 0x38),
301 ('LANG_FARSI', 0x29),
302 ('LANG_FINNISH', 0x0b),
303 ('LANG_FRENCH', 0x0c),
304 ('LANG_GALICIAN', 0x56),
305 ('LANG_GEORGIAN', 0x37),
306 ('LANG_GERMAN', 0x07),
307 ('LANG_GREEK', 0x08),
308 ('LANG_GUJARATI', 0x47),
309 ('LANG_HEBREW', 0x0d),
310 ('LANG_HINDI', 0x39),
311 ('LANG_HUNGARIAN', 0x0e),
312 ('LANG_ICELANDIC', 0x0f),
313 ('LANG_INDONESIAN', 0x21),
314 ('LANG_ITALIAN', 0x10),
315 ('LANG_JAPANESE', 0x11),
316 ('LANG_KANNADA', 0x4b),
317 ('LANG_KASHMIRI', 0x60),
318 ('LANG_KAZAK', 0x3f),
319 ('LANG_KONKANI', 0x57),
320 ('LANG_KOREAN', 0x12),
321 ('LANG_KYRGYZ', 0x40),
322 ('LANG_LATVIAN', 0x26),
323 ('LANG_LITHUANIAN', 0x27),
324 ('LANG_MACEDONIAN', 0x2f),
325 ('LANG_MALAY', 0x3e),
326 ('LANG_MALAYALAM', 0x4c),
327 ('LANG_MANIPURI', 0x58),
328 ('LANG_MARATHI', 0x4e),
329 ('LANG_MONGOLIAN', 0x50),
330 ('LANG_NEPALI', 0x61),
331 ('LANG_NORWEGIAN', 0x14),
332 ('LANG_ORIYA', 0x48),
333 ('LANG_POLISH', 0x15),
334 ('LANG_PORTUGUESE', 0x16),
335 ('LANG_PUNJABI', 0x46),
336 ('LANG_ROMANIAN', 0x18),
337 ('LANG_RUSSIAN', 0x19),
338 ('LANG_SANSKRIT', 0x4f),
339 ('LANG_SERBIAN', 0x1a),
340 ('LANG_SINDHI', 0x59),
341 ('LANG_SLOVAK', 0x1b),
342 ('LANG_SLOVENIAN', 0x24),
343 ('LANG_SPANISH', 0x0a),
344 ('LANG_SWAHILI', 0x41),
345 ('LANG_SWEDISH', 0x1d),
346 ('LANG_SYRIAC', 0x5a),
347 ('LANG_TAMIL', 0x49),
348 ('LANG_TATAR', 0x44),
349 ('LANG_TELUGU', 0x4a),
351 ('LANG_TURKISH', 0x1f),
352 ('LANG_UKRAINIAN', 0x22),
354 ('LANG_UZBEK', 0x43),
355 ('LANG_VIETNAMESE', 0x2a),
356 ('LANG_GAELIC', 0x3c),
357 ('LANG_MALTESE', 0x3a),
358 ('LANG_MAORI', 0x28),
359 ('LANG_RHAETO_ROMANCE',0x17),
360 ('LANG_SAAMI', 0x3b),
361 ('LANG_SORBIAN', 0x2e),
363 ('LANG_TSONGA', 0x31),
364 ('LANG_TSWANA', 0x32),
365 ('LANG_VENDA', 0x33),
366 ('LANG_XHOSA', 0x34),
368 ('LANG_ESPERANTO', 0x8f),
369 ('LANG_WALON', 0x90),
370 ('LANG_CORNISH', 0x91),
371 ('LANG_WELSH', 0x92),
372 ('LANG_BRETON', 0x93) ]
374 LANG
= dict(lang
+[(e
[1], e
[0]) for e
in lang
])
377 # Sublanguage definitions
379 ('SUBLANG_NEUTRAL', 0x00),
380 ('SUBLANG_DEFAULT', 0x01),
381 ('SUBLANG_SYS_DEFAULT', 0x02),
382 ('SUBLANG_ARABIC_SAUDI_ARABIA', 0x01),
383 ('SUBLANG_ARABIC_IRAQ', 0x02),
384 ('SUBLANG_ARABIC_EGYPT', 0x03),
385 ('SUBLANG_ARABIC_LIBYA', 0x04),
386 ('SUBLANG_ARABIC_ALGERIA', 0x05),
387 ('SUBLANG_ARABIC_MOROCCO', 0x06),
388 ('SUBLANG_ARABIC_TUNISIA', 0x07),
389 ('SUBLANG_ARABIC_OMAN', 0x08),
390 ('SUBLANG_ARABIC_YEMEN', 0x09),
391 ('SUBLANG_ARABIC_SYRIA', 0x0a),
392 ('SUBLANG_ARABIC_JORDAN', 0x0b),
393 ('SUBLANG_ARABIC_LEBANON', 0x0c),
394 ('SUBLANG_ARABIC_KUWAIT', 0x0d),
395 ('SUBLANG_ARABIC_UAE', 0x0e),
396 ('SUBLANG_ARABIC_BAHRAIN', 0x0f),
397 ('SUBLANG_ARABIC_QATAR', 0x10),
398 ('SUBLANG_AZERI_LATIN', 0x01),
399 ('SUBLANG_AZERI_CYRILLIC', 0x02),
400 ('SUBLANG_CHINESE_TRADITIONAL', 0x01),
401 ('SUBLANG_CHINESE_SIMPLIFIED', 0x02),
402 ('SUBLANG_CHINESE_HONGKONG', 0x03),
403 ('SUBLANG_CHINESE_SINGAPORE', 0x04),
404 ('SUBLANG_CHINESE_MACAU', 0x05),
405 ('SUBLANG_DUTCH', 0x01),
406 ('SUBLANG_DUTCH_BELGIAN', 0x02),
407 ('SUBLANG_ENGLISH_US', 0x01),
408 ('SUBLANG_ENGLISH_UK', 0x02),
409 ('SUBLANG_ENGLISH_AUS', 0x03),
410 ('SUBLANG_ENGLISH_CAN', 0x04),
411 ('SUBLANG_ENGLISH_NZ', 0x05),
412 ('SUBLANG_ENGLISH_EIRE', 0x06),
413 ('SUBLANG_ENGLISH_SOUTH_AFRICA', 0x07),
414 ('SUBLANG_ENGLISH_JAMAICA', 0x08),
415 ('SUBLANG_ENGLISH_CARIBBEAN', 0x09),
416 ('SUBLANG_ENGLISH_BELIZE', 0x0a),
417 ('SUBLANG_ENGLISH_TRINIDAD', 0x0b),
418 ('SUBLANG_ENGLISH_ZIMBABWE', 0x0c),
419 ('SUBLANG_ENGLISH_PHILIPPINES', 0x0d),
420 ('SUBLANG_FRENCH', 0x01),
421 ('SUBLANG_FRENCH_BELGIAN', 0x02),
422 ('SUBLANG_FRENCH_CANADIAN', 0x03),
423 ('SUBLANG_FRENCH_SWISS', 0x04),
424 ('SUBLANG_FRENCH_LUXEMBOURG', 0x05),
425 ('SUBLANG_FRENCH_MONACO', 0x06),
426 ('SUBLANG_GERMAN', 0x01),
427 ('SUBLANG_GERMAN_SWISS', 0x02),
428 ('SUBLANG_GERMAN_AUSTRIAN', 0x03),
429 ('SUBLANG_GERMAN_LUXEMBOURG', 0x04),
430 ('SUBLANG_GERMAN_LIECHTENSTEIN', 0x05),
431 ('SUBLANG_ITALIAN', 0x01),
432 ('SUBLANG_ITALIAN_SWISS', 0x02),
433 ('SUBLANG_KASHMIRI_SASIA', 0x02),
434 ('SUBLANG_KASHMIRI_INDIA', 0x02),
435 ('SUBLANG_KOREAN', 0x01),
436 ('SUBLANG_LITHUANIAN', 0x01),
437 ('SUBLANG_MALAY_MALAYSIA', 0x01),
438 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM', 0x02),
439 ('SUBLANG_NEPALI_INDIA', 0x02),
440 ('SUBLANG_NORWEGIAN_BOKMAL', 0x01),
441 ('SUBLANG_NORWEGIAN_NYNORSK', 0x02),
442 ('SUBLANG_PORTUGUESE', 0x02),
443 ('SUBLANG_PORTUGUESE_BRAZILIAN', 0x01),
444 ('SUBLANG_SERBIAN_LATIN', 0x02),
445 ('SUBLANG_SERBIAN_CYRILLIC', 0x03),
446 ('SUBLANG_SPANISH', 0x01),
447 ('SUBLANG_SPANISH_MEXICAN', 0x02),
448 ('SUBLANG_SPANISH_MODERN', 0x03),
449 ('SUBLANG_SPANISH_GUATEMALA', 0x04),
450 ('SUBLANG_SPANISH_COSTA_RICA', 0x05),
451 ('SUBLANG_SPANISH_PANAMA', 0x06),
452 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC', 0x07),
453 ('SUBLANG_SPANISH_VENEZUELA', 0x08),
454 ('SUBLANG_SPANISH_COLOMBIA', 0x09),
455 ('SUBLANG_SPANISH_PERU', 0x0a),
456 ('SUBLANG_SPANISH_ARGENTINA', 0x0b),
457 ('SUBLANG_SPANISH_ECUADOR', 0x0c),
458 ('SUBLANG_SPANISH_CHILE', 0x0d),
459 ('SUBLANG_SPANISH_URUGUAY', 0x0e),
460 ('SUBLANG_SPANISH_PARAGUAY', 0x0f),
461 ('SUBLANG_SPANISH_BOLIVIA', 0x10),
462 ('SUBLANG_SPANISH_EL_SALVADOR', 0x11),
463 ('SUBLANG_SPANISH_HONDURAS', 0x12),
464 ('SUBLANG_SPANISH_NICARAGUA', 0x13),
465 ('SUBLANG_SPANISH_PUERTO_RICO', 0x14),
466 ('SUBLANG_SWEDISH', 0x01),
467 ('SUBLANG_SWEDISH_FINLAND', 0x02),
468 ('SUBLANG_URDU_PAKISTAN', 0x01),
469 ('SUBLANG_URDU_INDIA', 0x02),
470 ('SUBLANG_UZBEK_LATIN', 0x01),
471 ('SUBLANG_UZBEK_CYRILLIC', 0x02),
472 ('SUBLANG_DUTCH_SURINAM', 0x03),
473 ('SUBLANG_ROMANIAN', 0x01),
474 ('SUBLANG_ROMANIAN_MOLDAVIA', 0x02),
475 ('SUBLANG_RUSSIAN', 0x01),
476 ('SUBLANG_RUSSIAN_MOLDAVIA', 0x02),
477 ('SUBLANG_CROATIAN', 0x01),
478 ('SUBLANG_LITHUANIAN_CLASSIC', 0x02),
479 ('SUBLANG_GAELIC', 0x01),
480 ('SUBLANG_GAELIC_SCOTTISH', 0x02),
481 ('SUBLANG_GAELIC_MANX', 0x03) ]
483 SUBLANG
= dict(sublang
+[(e
[1], e
[0]) for e
in sublang
])
486 class UnicodeStringWrapperPostProcessor
:
487 """This class attemps to help the process of identifying strings
488 that might be plain Unicode or Pascal. A list of strings will be
489 wrapped on it with the hope the overlappings will help make the
490 decission about their type."""
492 def __init__(self
, pe
, rva_ptr
):
494 self
.rva_ptr
= rva_ptr
499 """Get the RVA of the string."""
505 """Return the escaped ASCII representation of the string."""
507 def convert_char(char
):
508 if char
in string
.printable
:
511 return r
'\x%02x' % ord(char
)
514 return ''.join([convert_char(c
) for c
in self
.string
])
519 def invalidate(self
):
520 """Make this instance None, to express it's no known string type."""
525 def render_pascal_16(self
):
527 self
.string
= self
.pe
.get_string_u_at_rva(
529 max_length
=self
.__get
_pascal
_16_length
())
532 def ask_pascal_16(self
, next_rva_ptr
):
533 """The next RVA is taken to be the one immediately following this one.
535 Such RVA could indicate the natural end of the string and will be checked
536 with the possible length contained in the first word.
539 length
= self
.__get
_pascal
_16_length
()
541 if length
== (next_rva_ptr
- (self
.rva_ptr
+2)) / 2:
548 def __get_pascal_16_length(self
):
550 return self
.__get
_word
_value
_at
_rva
(self
.rva_ptr
)
553 def __get_word_value_at_rva(self
, rva
):
556 data
= self
.pe
.get_data(self
.rva_ptr
, 2)
557 except PEFormatError
, e
:
563 return struct
.unpack('<H', data
)[0]
566 #def render_pascal_8(self):
570 def ask_unicode_16(self
, next_rva_ptr
):
571 """The next RVA is taken to be the one immediately following this one.
573 Such RVA could indicate the natural end of the string and will be checked
574 to see if there's a Unicode NULL character there.
577 if self
.__get
_word
_value
_at
_rva
(next_rva_ptr
-2) == 0:
578 self
.length
= next_rva_ptr
- self
.rva_ptr
584 def render_unicode_16(self
):
587 self
.string
= self
.pe
.get_string_u_at_rva(self
.rva_ptr
)
590 class PEFormatError(Exception):
591 """Generic PE format error exception."""
593 def __init__(self
, value
):
597 return repr(self
.value
)
601 """Convenience class for dumping the PE information."""
607 def add_lines(self
, txt
, indent
=0):
608 """Adds a list of lines.
610 The list can be indented with the optional argument 'indent'.
613 self
.add_line(line
, indent
)
616 def add_line(self
, txt
, indent
=0):
619 The line can be indented with the optional argument 'indent'.
622 self
.add(txt
+'\n', indent
)
625 def add(self
, txt
, indent
=0):
626 """Adds some text, no newline will be appended.
628 The text can be indented with the optional argument 'indent'.
631 if isinstance(txt
, unicode):
636 except UnicodeEncodeError, e
:
641 self
.text
+= ' '*indent
+txt
644 def add_header(self
, txt
):
645 """Adds a header element."""
647 self
.add_line('-'*10+txt
+'-'*10+'\n')
650 def add_newline(self
):
651 """Adds a newline."""
657 """Get the text in its current state."""
664 """Prepare structure object to extract members from data.
666 Format is a list containing definitions for the elements
671 def __init__(self
, format
, name
=None, file_offset
=None):
672 # Format is forced little endian, for big endian non Intel platforms
673 self
.__format
__ = '<'
676 self
.__format
_length
__ = 0
677 self
.__set
_format
__(format
[1])
678 self
._all
_zeroes
= False
679 self
.__unpacked
_data
_elms
__ = None
680 self
.__file
_offset
__ = file_offset
684 self
.name
= format
[0]
687 def __get_format__(self
):
688 return self
.__format
__
691 def get_file_offset(self
):
692 return self
.__file
_offset
__
694 def set_file_offset(self
, offset
):
695 self
.__file
_offset
__ = offset
697 def all_zeroes(self
):
698 """Returns true is the unpacked data is all zeroes."""
700 return self
._all
_zeroes
703 def __set_format__(self
, format
):
707 elm_type
, elm_name
= elm
.split(',', 1)
708 self
.__format
__ += elm_type
710 elm_names
= elm_name
.split(',')
712 for elm_name
in elm_names
:
713 if elm_name
in self
.__keys
__:
714 search_list
= [x
[:len(elm_name
)] for x
in self
.__keys
__]
715 occ_count
= search_list
.count(elm_name
)
716 elm_name
= elm_name
+'_'+str(occ_count
)
717 names
.append(elm_name
)
718 # Some PE header structures have unions on them, so a certain
719 # value might have different names, so each key has a list of
720 # all the possible members referring to the data.
721 self
.__keys
__.append(names
)
723 self
.__format
_length
__ = struct
.calcsize(self
.__format
__)
727 """Return size of the structure."""
729 return self
.__format
_length
__
732 def __unpack__(self
, data
):
734 if len(data
)>self
.__format
_length
__:
735 data
= data
[:self
.__format
_length
__]
738 # Some malware have incorrect header lengths.
739 # Fail gracefully if this occurs
740 # Buggy malware: a29b0118af8b7408444df81701ad5a7f
742 elif len(data
)<self
.__format
_length
__:
743 raise PEFormatError('Data length less than expected header length.')
746 if data
.count(chr(0)) == len(data
):
747 self
._all
_zeroes
= True
749 self
.__unpacked
_data
_elms
__ = struct
.unpack(self
.__format
__, data
)
750 for i
in xrange(len(self
.__unpacked
_data
_elms
__)):
751 for key
in self
.__keys
__[i
]:
752 # self.values[key] = self.__unpacked_data_elms__[i]
753 setattr(self
, key
, self
.__unpacked
_data
_elms
__[i
])
760 for i
in xrange(len(self
.__unpacked
_data
_elms
__)):
762 for key
in self
.__keys
__[i
]:
763 new_val
= getattr(self
, key
)
764 old_val
= self
.__unpacked
_data
_elms
__[i
]
766 # In the case of Unions, when the first changed value
767 # is picked the loop is exited
768 if new_val
!= old_val
:
771 new_values
.append(new_val
)
773 return struct
.pack(self
.__format
__, *new_values
)
777 return '\n'.join( self
.dump() )
780 return '<Structure: %s>' % (' '.join( [' '.join(s
.split()) for s
in self
.dump()] ))
783 def dump(self
, indentation
=0):
784 """Returns a string representation of the structure."""
788 dump
.append('[%s]' % self
.name
)
790 # Refer to the __set_format__ method for an explanation
791 # of the following construct.
792 for keys
in self
.__keys
__:
795 val
= getattr(self
, key
)
796 if isinstance(val
, int) or isinstance(val
, long):
797 val_str
= '0x%-8X' % (val
)
798 if key
== 'TimeDateStamp' or key
== 'dwTimeStamp':
800 val_str
+= ' [%s UTC]' % time
.asctime(time
.gmtime(val
))
801 except exceptions
.ValueError, e
:
802 val_str
+= ' [INVALID TIME]'
804 val_str
= ''.join(filter(lambda c
:c
!= '\0', str(val
)))
806 dump
.append('%-30s %s' % (key
+':', val_str
))
812 class SectionStructure(Structure
):
813 """Convenience section handling class."""
815 def get_data(self
, start
, length
=None):
816 """Get data chunk from a section.
818 Allows to query data from the section by passing the
819 addresses where the PE file would be loaded by default.
820 It is then possible to retrieve code and data by its real
821 addresses as it would be if loaded.
824 offset
= start
- self
.VirtualAddress
831 return self
.data
[offset
:end
]
834 def get_rva_from_offset(self
, offset
):
835 return offset
- self
.PointerToRawData
+ self
.VirtualAddress
838 def get_offset_from_rva(self
, rva
):
839 return (rva
- self
.VirtualAddress
) + self
.PointerToRawData
842 def contains_offset(self
, offset
):
843 """Check whether the section contains the file offset provided."""
845 if not self
.PointerToRawData
:
846 # bss and other sections containing only uninitialized data must have 0
847 # and do not take space in the file
849 return self
.PointerToRawData
<= offset
< self
.VirtualAddress
+ self
.SizeOfRawData
852 def contains_rva(self
, rva
):
853 """Check whether the section contains the address provided."""
855 # PECOFF documentation v8 says:
856 # The total size of the section when loaded into memory.
857 # If this value is greater than SizeOfRawData, the section is zero-padded.
858 # This field is valid only for executable images and should be set to zero
861 if len(self
.data
) < self
.SizeOfRawData
:
862 size
= self
.Misc_VirtualSize
864 size
= max(self
.SizeOfRawData
, self
.Misc_VirtualSize
)
866 return self
.VirtualAddress
<= rva
< self
.VirtualAddress
+ size
868 def contains(self
, rva
):
869 #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
870 return self
.contains_rva(rva
)
873 def set_data(self
, data
):
874 """Set the data belonging to the section."""
879 def get_entropy(self
):
880 """Calculate and return the entropy for the section."""
882 return self
.entropy_H( self
.data
)
885 def get_hash_sha1(self
):
886 """Get the SHA-1 hex-digest of the section's data."""
889 return sha1( self
.data
).hexdigest()
892 def get_hash_sha256(self
):
893 """Get the SHA-256 hex-digest of the section's data."""
895 if sha256
is not None:
896 return sha256( self
.data
).hexdigest()
899 def get_hash_sha512(self
):
900 """Get the SHA-512 hex-digest of the section's data."""
902 if sha512
is not None:
903 return sha512( self
.data
).hexdigest()
906 def get_hash_md5(self
):
907 """Get the MD5 hex-digest of the section's data."""
910 return md5( self
.data
).hexdigest()
913 def entropy_H(self
, data
):
914 """Calculate the entropy of a chunk of data."""
919 occurences
= array
.array('L', [0]*256)
922 occurences
[ord(x
)] += 1
927 p_x
= float(x
) / len(data
)
928 entropy
-= p_x
*math
.log(p_x
, 2)
935 """Generic data container."""
937 def __init__(self
, **args
):
938 for key
, value
in args
.items():
939 setattr(self
, key
, value
)
943 class ImportDescData(DataContainer
):
944 """Holds import descriptor information.
946 dll: name of the imported DLL
947 imports: list of imported symbols (ImportData instances)
948 struct: IMAGE_IMPORT_DESCRIPTOR sctruture
951 class ImportData(DataContainer
):
952 """Holds imported symbol's information.
954 ordinal: Ordinal of the symbol
955 name: Name of the symbol
956 bound: If the symbol is bound, this contains
960 class ExportDirData(DataContainer
):
961 """Holds export directory information.
963 struct: IMAGE_EXPORT_DIRECTORY structure
964 symbols: list of exported symbols (ExportData instances)
967 class ExportData(DataContainer
):
968 """Holds exported symbols' information.
970 ordinal: ordinal of the symbol
971 address: address of the symbol
972 name: name of the symbol (None if the symbol is
973 exported by ordinal only)
974 forwarder: if the symbol is forwarded it will
975 contain the name of the target symbol,
980 class ResourceDirData(DataContainer
):
981 """Holds resource directory information.
983 struct: IMAGE_RESOURCE_DIRECTORY structure
984 entries: list of entries (ResourceDirEntryData instances)
987 class ResourceDirEntryData(DataContainer
):
988 """Holds resource directory entry data.
990 struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure
991 name: If the resource is identified by name this
992 attribute will contain the name string. None
993 otherwise. If identified by id, the id is
994 availabe at 'struct.Id'
995 id: the id, also in struct.Id
996 directory: If this entry has a lower level directory
997 this attribute will point to the
998 ResourceDirData instance representing it.
999 data: If this entry has no futher lower directories
1000 and points to the actual resource data, this
1001 attribute will reference the corresponding
1002 ResourceDataEntryData instance.
1003 (Either of the 'directory' or 'data' attribute will exist,
1007 class ResourceDataEntryData(DataContainer
):
1008 """Holds resource data entry information.
1010 struct: IMAGE_RESOURCE_DATA_ENTRY structure
1011 lang: Primary language ID
1012 sublang: Sublanguage ID
1015 class DebugData(DataContainer
):
1016 """Holds debug information.
1018 struct: IMAGE_DEBUG_DIRECTORY structure
1021 class BaseRelocationData(DataContainer
):
1022 """Holds base relocation information.
1024 struct: IMAGE_BASE_RELOCATION structure
1025 entries: list of relocation data (RelocationData instances)
1028 class RelocationData(DataContainer
):
1029 """Holds relocation information.
1031 type: Type of relocation
1032 The type string is can be obtained by
1033 RELOCATION_TYPE[type]
1034 rva: RVA of the relocation
1037 class TlsData(DataContainer
):
1038 """Holds TLS information.
1040 struct: IMAGE_TLS_DIRECTORY structure
1043 class BoundImportDescData(DataContainer
):
1044 """Holds bound import descriptor data.
1046 This directory entry will provide with information on the
1047 DLLs this PE files has been bound to (if bound at all).
1048 The structure will contain the name and timestamp of the
1049 DLL at the time of binding so that the loader can know
1050 whether it differs from the one currently present in the
1051 system and must, therefore, re-bind the PE's imports.
1053 struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure
1055 entries: list of entries (BoundImportRefData instances)
1056 the entries will exist if this DLL has forwarded
1057 symbols. If so, the destination DLL will have an
1061 class BoundImportRefData(DataContainer
):
1062 """Holds bound import forwader reference data.
1064 Contains the same information as the bound descriptor but
1065 for forwarded DLLs, if any.
1067 struct: IMAGE_BOUND_FORWARDER_REF structure
1073 """A Portable Executable representation.
1075 This class provides access to most of the information in a PE file.
1077 It expects to be supplied the name of the file to load or PE data
1078 to process and an optional argument 'fast_load' (False by default)
1079 which controls whether to load all the directories information,
1080 which can be quite time consuming.
1082 pe = pefile.PE('module.dll')
1083 pe = pefile.PE(name='module.dll')
1085 would load 'module.dll' and process it. If the data would be already
1086 available in a buffer the same could be achieved with:
1088 pe = pefile.PE(data=module_dll_data)
1090 The "fast_load" can be set to a default by setting its value in the
1091 module itself by means,for instance, of a "pefile.fast_load = True".
1092 That will make all the subsequent instances not to load the
1093 whole PE structure. The "full_load" method can be used to parse
1094 the missing data at a later stage.
1096 Basic headers information will be available in the attributes:
1103 All of them will contain among their attrbitues the members of the
1104 corresponding structures as defined in WINNT.H
1106 The raw data corresponding to the header (from the beginning of the
1107 file up to the start of the first section) will be avaiable in the
1108 instance's attribute 'header' as a string.
1110 The sections will be available as a list in the 'sections' attribute.
1111 Each entry will contain as attributes all the structure's members.
1113 Directory entries will be available as attributes (if they exist):
1114 (no other entries are processed at this point)
1116 DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
1117 DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
1118 DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
1119 DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
1120 DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
1122 DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
1124 The following dictionary attributes provide ways of mapping different
1125 constants. They will accept the numeric value and return the string
1126 representation and the opposite, feed in the string and get the
1130 IMAGE_CHARACTERISTICS
1131 SECTION_CHARACTERISTICS
1142 # Format specifications for PE structures.
1145 __IMAGE_DOS_HEADER_format__
= ('IMAGE_DOS_HEADER',
1146 ('H,e_magic', 'H,e_cblp', 'H,e_cp',
1147 'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
1148 'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
1149 'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
1150 'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
1153 __IMAGE_FILE_HEADER_format__
= ('IMAGE_FILE_HEADER',
1154 ('H,Machine', 'H,NumberOfSections',
1155 'L,TimeDateStamp', 'L,PointerToSymbolTable',
1156 'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
1157 'H,Characteristics'))
1159 __IMAGE_DATA_DIRECTORY_format__
= ('IMAGE_DATA_DIRECTORY',
1160 ('L,VirtualAddress', 'L,Size'))
1163 __IMAGE_OPTIONAL_HEADER_format__
= ('IMAGE_OPTIONAL_HEADER',
1164 ('H,Magic', 'B,MajorLinkerVersion',
1165 'B,MinorLinkerVersion', 'L,SizeOfCode',
1166 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1167 'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
1168 'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1169 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1170 'H,MajorImageVersion', 'H,MinorImageVersion',
1171 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1172 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1173 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1174 'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
1175 'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
1176 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1179 __IMAGE_OPTIONAL_HEADER64_format__
= ('IMAGE_OPTIONAL_HEADER64',
1180 ('H,Magic', 'B,MajorLinkerVersion',
1181 'B,MinorLinkerVersion', 'L,SizeOfCode',
1182 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1183 'L,AddressOfEntryPoint', 'L,BaseOfCode',
1184 'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1185 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1186 'H,MajorImageVersion', 'H,MinorImageVersion',
1187 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1188 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1189 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1190 'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
1191 'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
1192 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1195 __IMAGE_NT_HEADERS_format__
= ('IMAGE_NT_HEADERS', ('L,Signature',))
1197 __IMAGE_SECTION_HEADER_format__
= ('IMAGE_SECTION_HEADER',
1198 ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
1199 'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
1200 'L,PointerToRelocations', 'L,PointerToLinenumbers',
1201 'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
1202 'L,Characteristics'))
1204 __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__
= ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
1205 ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
1206 'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
1208 __IMAGE_IMPORT_DESCRIPTOR_format__
= ('IMAGE_IMPORT_DESCRIPTOR',
1209 ('L,OriginalFirstThunk,Characteristics',
1210 'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
1212 __IMAGE_EXPORT_DIRECTORY_format__
= ('IMAGE_EXPORT_DIRECTORY',
1213 ('L,Characteristics',
1214 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
1215 'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
1216 'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
1218 __IMAGE_RESOURCE_DIRECTORY_format__
= ('IMAGE_RESOURCE_DIRECTORY',
1219 ('L,Characteristics',
1220 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
1221 'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
1223 __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__
= ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
1227 __IMAGE_RESOURCE_DATA_ENTRY_format__
= ('IMAGE_RESOURCE_DATA_ENTRY',
1228 ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
1230 __VS_VERSIONINFO_format__
= ( 'VS_VERSIONINFO',
1231 ('H,Length', 'H,ValueLength', 'H,Type' ))
1233 __VS_FIXEDFILEINFO_format__
= ( 'VS_FIXEDFILEINFO',
1234 ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
1235 'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
1236 'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
1238 __StringFileInfo_format__
= ( 'StringFileInfo',
1239 ('H,Length', 'H,ValueLength', 'H,Type' ))
1241 __StringTable_format__
= ( 'StringTable',
1242 ('H,Length', 'H,ValueLength', 'H,Type' ))
1244 __String_format__
= ( 'String',
1245 ('H,Length', 'H,ValueLength', 'H,Type' ))
1247 __Var_format__
= ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
1249 __IMAGE_THUNK_DATA_format__
= ('IMAGE_THUNK_DATA',
1250 ('L,ForwarderString,Function,Ordinal,AddressOfData',))
1252 __IMAGE_THUNK_DATA64_format__
= ('IMAGE_THUNK_DATA',
1253 ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
1255 __IMAGE_DEBUG_DIRECTORY_format__
= ('IMAGE_DEBUG_DIRECTORY',
1256 ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
1257 'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
1258 'L,PointerToRawData'))
1260 __IMAGE_BASE_RELOCATION_format__
= ('IMAGE_BASE_RELOCATION',
1261 ('L,VirtualAddress', 'L,SizeOfBlock') )
1263 __IMAGE_TLS_DIRECTORY_format__
= ('IMAGE_TLS_DIRECTORY',
1264 ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
1265 'L,AddressOfIndex', 'L,AddressOfCallBacks',
1266 'L,SizeOfZeroFill', 'L,Characteristics' ) )
1268 __IMAGE_TLS_DIRECTORY64_format__
= ('IMAGE_TLS_DIRECTORY',
1269 ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
1270 'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
1271 'L,SizeOfZeroFill', 'L,Characteristics' ) )
1273 __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__
= ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
1274 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
1276 __IMAGE_BOUND_FORWARDER_REF_format__
= ('IMAGE_BOUND_FORWARDER_REF',
1277 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
1280 def __init__(self
, name
=None, data
=None, fast_load
=None):
1284 self
.__warnings
= []
1288 if not name
and not data
:
1291 # This list will keep track of all the structures created.
1292 # That will allow for an easy iteration through the list
1293 # in order to save the modifications made
1294 self
.__structures
__ = []
1297 fast_load
= globals()['fast_load']
1298 self
.__parse
__(name
, data
, fast_load
)
1302 def __unpack_data__(self
, format
, data
, file_offset
):
1303 """Apply structure format to raw data.
1305 Returns and unpacked structure object if successful, None otherwise.
1308 structure
= Structure(format
, file_offset
=file_offset
)
1309 #if len(data) < structure.sizeof():
1313 structure
.__unpack
__(data
)
1314 except PEFormatError
, err
:
1315 self
.__warnings
.append(
1316 'Corrupt header "%s" at file offset %d. Exception: %s' % (
1317 format
[0], file_offset
, str(err
)) )
1320 self
.__structures
__.append(structure
)
1326 def __parse__(self
, fname
, data
, fast_load
):
1327 """Parse a Portable Executable file.
1329 Loads a PE file, parsing all its structures and making them available
1330 through the instance's attributes.
1334 fd
= file(fname
, 'rb')
1335 self
.__data
__ = fd
.read()
1338 self
.__data
__ = data
1341 self
.DOS_HEADER
= self
.__unpack
_data
__(
1342 self
.__IMAGE
_DOS
_HEADER
_format
__,
1343 self
.__data
__, file_offset
=0)
1345 if not self
.DOS_HEADER
or self
.DOS_HEADER
.e_magic
!= IMAGE_DOS_SIGNATURE
:
1346 raise PEFormatError('DOS Header magic not found.')
1349 # Check for sane value in e_lfanew
1351 if self
.DOS_HEADER
.e_lfanew
> len(self
.__data
__):
1352 raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
1354 nt_headers_offset
= self
.DOS_HEADER
.e_lfanew
1356 self
.NT_HEADERS
= self
.__unpack
_data
__(
1357 self
.__IMAGE
_NT
_HEADERS
_format
__,
1358 self
.__data
__[nt_headers_offset
:],
1359 file_offset
= nt_headers_offset
)
1361 # We better check the signature right here, before the file screws
1362 # around with sections:
1364 # Some malware will cause the Signature value to not exist at all
1365 if not self
.NT_HEADERS
or not self
.NT_HEADERS
.Signature
:
1366 raise PEFormatError('NT Headers not found.')
1368 if self
.NT_HEADERS
.Signature
!= IMAGE_NT_SIGNATURE
:
1369 raise PEFormatError('Invalid NT Headers signature.')
1371 self
.FILE_HEADER
= self
.__unpack
_data
__(
1372 self
.__IMAGE
_FILE
_HEADER
_format
__,
1373 self
.__data
__[nt_headers_offset
+4:],
1374 file_offset
= nt_headers_offset
+4)
1375 image_flags
= self
.retrieve_flags(IMAGE_CHARACTERISTICS
, 'IMAGE_FILE_')
1377 if not self
.FILE_HEADER
:
1378 raise PEFormatError('File Header missing')
1380 # Set the image's flags according the the Characteristics member
1381 self
.set_flags(self
.FILE_HEADER
, self
.FILE_HEADER
.Characteristics
, image_flags
)
1383 optional_header_offset
= \
1384 nt_headers_offset
+4+self
.FILE_HEADER
.sizeof()
1386 # Note: location of sections can be controlled from PE header:
1387 sections_offset
= optional_header_offset
+ self
.FILE_HEADER
.SizeOfOptionalHeader
1389 self
.OPTIONAL_HEADER
= self
.__unpack
_data
__(
1390 self
.__IMAGE
_OPTIONAL
_HEADER
_format
__,
1391 self
.__data
__[optional_header_offset
:],
1392 file_offset
= optional_header_offset
)
1394 # According to solardesigner's findings for his
1395 # Tiny PE project, the optional header does not
1396 # need fields beyond "Subsystem" in order to be
1397 # loadable by the Windows loader (given that zeroes
1398 # are acceptable values and the header is loaded
1399 # in a zeroed memory page)
1400 # If trying to parse a full Optional Header fails
1401 # we try to parse it again with some 0 padding
1403 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
= 69
1405 if ( self
.OPTIONAL_HEADER
is None and
1406 len(self
.__data
__[optional_header_offset
:])
1407 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
):
1409 # Add enough zeroes to make up for the unused fields
1411 padding_length
= 128
1415 padded_data
= self
.__data
__[optional_header_offset
:] + (
1416 '\0' * padding_length
)
1418 self
.OPTIONAL_HEADER
= self
.__unpack
_data
__(
1419 self
.__IMAGE
_OPTIONAL
_HEADER
_format
__,
1421 file_offset
= optional_header_offset
)
1424 # Check the Magic in the OPTIONAL_HEADER and set the PE file
1427 if self
.OPTIONAL_HEADER
is not None:
1429 if self
.OPTIONAL_HEADER
.Magic
== OPTIONAL_HEADER_MAGIC_PE
:
1431 self
.PE_TYPE
= OPTIONAL_HEADER_MAGIC_PE
1433 elif self
.OPTIONAL_HEADER
.Magic
== OPTIONAL_HEADER_MAGIC_PE_PLUS
:
1435 self
.PE_TYPE
= OPTIONAL_HEADER_MAGIC_PE_PLUS
1437 self
.OPTIONAL_HEADER
= self
.__unpack
_data
__(
1438 self
.__IMAGE
_OPTIONAL
_HEADER
64_format
__,
1439 self
.__data
__[optional_header_offset
:],
1440 file_offset
= optional_header_offset
)
1442 # Again, as explained above, we try to parse
1443 # a reduced form of the Optional Header which
1444 # is still valid despite not including all
1447 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
= 69+4
1449 if ( self
.OPTIONAL_HEADER
is None and
1450 len(self
.__data
__[optional_header_offset
:])
1451 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
):
1453 padding_length
= 128
1454 padded_data
= self
.__data
__[optional_header_offset
:] + (
1455 '\0' * padding_length
)
1456 self
.OPTIONAL_HEADER
= self
.__unpack
_data
__(
1457 self
.__IMAGE
_OPTIONAL
_HEADER
64_format
__,
1459 file_offset
= optional_header_offset
)
1462 if not self
.FILE_HEADER
:
1463 raise PEFormatError('File Header missing')
1467 # Die gracefully if there is no OPTIONAL_HEADER field
1468 # 975440f5ad5e2e4a92c4d9a5f22f75c1
1469 if self
.PE_TYPE
is None or self
.OPTIONAL_HEADER
is None:
1470 raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
1472 dll_characteristics_flags
= self
.retrieve_flags(DLL_CHARACTERISTICS
, 'IMAGE_DLL_CHARACTERISTICS_')
1474 # Set the Dll Characteristics flags according the the DllCharacteristics member
1476 self
.OPTIONAL_HEADER
,
1477 self
.OPTIONAL_HEADER
.DllCharacteristics
,
1478 dll_characteristics_flags
)
1481 self
.OPTIONAL_HEADER
.DATA_DIRECTORY
= []
1482 #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
1483 offset
= (optional_header_offset
+ self
.OPTIONAL_HEADER
.sizeof())
1486 self
.NT_HEADERS
.FILE_HEADER
= self
.FILE_HEADER
1487 self
.NT_HEADERS
.OPTIONAL_HEADER
= self
.OPTIONAL_HEADER
1490 # The NumberOfRvaAndSizes is sanitized to stay within
1491 # reasonable limits so can be casted to an int
1493 if self
.OPTIONAL_HEADER
.NumberOfRvaAndSizes
> 0x10:
1494 self
.__warnings
.append(
1495 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
1496 'Normal values are never larger than 0x10, the value is: 0x%x' %
1497 self
.OPTIONAL_HEADER
.NumberOfRvaAndSizes
)
1499 for i
in xrange(int(0x7fffffffL
& self
.OPTIONAL_HEADER
.NumberOfRvaAndSizes
)):
1501 if len(self
.__data
__[offset
:]) == 0:
1504 if len(self
.__data
__[offset
:]) < 8:
1505 data
= self
.__data
__[offset
:]+'\0'*8
1507 data
= self
.__data
__[offset
:]
1509 dir_entry
= self
.__unpack
_data
__(
1510 self
.__IMAGE
_DATA
_DIRECTORY
_format
__,
1512 file_offset
= offset
)
1514 if dir_entry
is None:
1517 # Would fail if missing an entry
1518 # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
1520 dir_entry
.name
= DIRECTORY_ENTRY
[i
]
1521 except (KeyError, AttributeError):
1524 offset
+= dir_entry
.sizeof()
1526 self
.OPTIONAL_HEADER
.DATA_DIRECTORY
.append(dir_entry
)
1528 # If the offset goes outside the optional header,
1529 # the loop is broken, regardless of how many directories
1530 # NumberOfRvaAndSizes says there are
1532 # We assume a normally sized optional header, hence that we do
1533 # a sizeof() instead of reading SizeOfOptionalHeader.
1534 # Then we add a default number of drectories times their size,
1535 # if we go beyond that, we assume the number of directories
1536 # is wrong and stop processing
1537 if offset
>= (optional_header_offset
+
1538 self
.OPTIONAL_HEADER
.sizeof() + 8*16) :
1543 offset
= self
.parse_sections(sections_offset
)
1546 # There could be a problem if there are no raw data sections
1548 # fc91013eb72529da005110a3403541b6 example
1549 # Should this throw an exception in the minimum header offset
1553 s
.PointerToRawData
for s
in self
.sections
if s
.PointerToRawData
>0]
1555 if len(rawDataPointers
) > 0:
1556 lowest_section_offset
= min(rawDataPointers
)
1558 lowest_section_offset
= None
1560 if not lowest_section_offset
or lowest_section_offset
<offset
:
1561 self
.header
= self
.__data
__[:offset
]
1563 self
.header
= self
.__data
__[:lowest_section_offset
]
1566 # Check whether the entry point lies within a section
1568 if self
.get_section_by_rva(self
.OPTIONAL_HEADER
.AddressOfEntryPoint
) is not None:
1570 # Check whether the entry point lies within the file
1572 ep_offset
= self
.get_offset_from_rva(self
.OPTIONAL_HEADER
.AddressOfEntryPoint
)
1573 if ep_offset
> len(self
.__data
__):
1575 self
.__warnings
.append(
1576 'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
1577 'AddressOfEntryPoint: 0x%x' %
1578 self
.OPTIONAL_HEADER
.AddressOfEntryPoint
)
1582 self
.__warnings
.append(
1583 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
1584 'AddressOfEntryPoint: 0x%x' %
1585 self
.OPTIONAL_HEADER
.AddressOfEntryPoint
)
1589 self
.parse_data_directories()
1592 def get_warnings(self
):
1593 """Return the list of warnings.
1595 Non-critical problems found when parsing the PE file are
1596 appended to a list of warnings. This method returns the
1600 return self
.__warnings
1603 def show_warnings(self
):
1604 """Print the list of warnings.
1606 Non-critical problems found when parsing the PE file are
1607 appended to a list of warnings. This method prints the
1608 full list to standard output.
1611 for warning
in self
.__warnings
:
1615 def full_load(self
):
1616 """Process the data directories.
1618 This mathod will load the data directories which might not have
1619 been loaded if the "fast_load" option was used.
1622 self
.parse_data_directories()
1625 def write(self
, filename
=None):
1626 """Write the PE file.
1628 This function will process all headers and components
1629 of the PE file and include all changes made (by just
1630 assigning to attributes in the PE objects) and write
1631 the changes back to a file whose name is provided as
1632 an argument. The filename is optional.
1633 The data to be written to the file will be returned
1637 file_data
= list(self
.__data
__)
1638 for struct
in self
.__structures
__:
1640 struct_data
= list(struct
.__pack
__())
1641 offset
= struct
.get_file_offset()
1643 file_data
[offset
:offset
+len(struct_data
)] = struct_data
1645 if hasattr(self
, 'VS_VERSIONINFO'):
1646 if hasattr(self
, 'FileInfo'):
1647 for entry
in self
.FileInfo
:
1648 if hasattr(entry
, 'StringTable'):
1649 for st_entry
in entry
.StringTable
:
1650 for key
, entry
in st_entry
.entries
.items():
1652 offsets
= st_entry
.entries_offsets
[key
]
1653 lengths
= st_entry
.entries_lengths
[key
]
1655 if len( entry
) > lengths
[1]:
1658 list(entry
[:lengths
[1]]), ['\0'] * lengths
[1] )
1663 offsets
[1] : offsets
[1] + lengths
[1]*2 ] = l
1668 list(entry
), ['\0'] * len(entry
) )
1673 offsets
[1] : offsets
[1] + len(entry
)*2 ] = l
1675 remainder
= lengths
[1] - len(entry
)
1677 offsets
[1] + len(entry
)*2 :
1678 offsets
[1] + lengths
[1]*2 ] = [
1679 u
'\0' ] * remainder
*2
1681 new_file_data
= ''.join( [ chr(ord(c
)) for c
in file_data
] )
1684 f
= file(filename
, 'wb+')
1685 f
.write(new_file_data
)
1688 return new_file_data
1692 def parse_sections(self
, offset
):
1693 """Fetch the PE file sections.
1695 The sections will be readily available in the "sections" attribute.
1696 Its attributes will contain all the section information plus "data"
1697 a buffer containing the section's data.
1699 The "Characteristics" member will be processed and attributes
1700 representing the section characteristics (with the 'IMAGE_SCN_'
1701 string trimmed from the constant's names) will be added to the
1704 Refer to the SectionStructure class for additional info.
1709 for i
in xrange(self
.FILE_HEADER
.NumberOfSections
):
1710 section
= SectionStructure(self
.__IMAGE
_SECTION
_HEADER
_format
__)
1713 section_offset
= offset
+ section
.sizeof() * i
1714 section
.set_file_offset(section_offset
)
1715 section
.__unpack
__(self
.__data
__[section_offset
:])
1716 self
.__structures
__.append(section
)
1718 if section
.SizeOfRawData
> len(self
.__data
__):
1719 self
.__warnings
.append(
1720 ('Error parsing section %d. ' % i
) +
1721 'SizeOfRawData is larger than file.')
1723 if section
.PointerToRawData
> len(self
.__data
__):
1724 self
.__warnings
.append(
1725 ('Error parsing section %d. ' % i
) +
1726 'PointerToRawData points beyond the end of the file.')
1728 if section
.Misc_VirtualSize
> 0x10000000:
1729 self
.__warnings
.append(
1730 ('Suspicious value found parsing section %d. ' % i
) +
1731 'VirtualSize is extremely large > 256MiB.')
1733 if section
.VirtualAddress
> 0x10000000:
1734 self
.__warnings
.append(
1735 ('Suspicious value found parsing section %d. ' % i
) +
1736 'VirtualAddress is beyond 0x10000000.')
1739 # Some packer used a non-aligned PointerToRawData in the sections,
1740 # which causes several common tools not to load the section data
1741 # properly as they blindly read from the indicated offset.
1742 # It seems that Windows will round the offset down to the largest
1743 # offset multiple of FileAlignment which is smaller than
1744 # PointerToRawData. The following code will do the same.
1747 #alignment = self.OPTIONAL_HEADER.FileAlignment
1748 section_data_start
= section
.PointerToRawData
1750 if ( self
.OPTIONAL_HEADER
.FileAlignment
!= 0 and
1751 (section
.PointerToRawData
% self
.OPTIONAL_HEADER
.FileAlignment
) != 0):
1752 self
.__warnings
.append(
1753 ('Error parsing section %d. ' % i
) +
1754 'Suspicious value for FileAlignment in the Optional Header. ' +
1755 'Normally the PointerToRawData entry of the sections\' structures ' +
1756 'is a multiple of FileAlignment, this might imply the file ' +
1757 'is trying to confuse tools which parse this incorrectly')
1759 section_data_end
= section_data_start
+section
.SizeOfRawData
1760 section
.set_data(self
.__data
__[section_data_start
:section_data_end
])
1762 section_flags
= self
.retrieve_flags(SECTION_CHARACTERISTICS
, 'IMAGE_SCN_')
1764 # Set the section's flags according the the Characteristics member
1765 self
.set_flags(section
, section
.Characteristics
, section_flags
)
1767 if ( section
.__dict
__.get('IMAGE_SCN_MEM_WRITE', False) and
1768 section
.__dict
__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
1770 self
.__warnings
.append(
1771 ('Suspicious flags set for section %d. ' % i
) +
1772 'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
1773 'This might indicate a packed executable.')
1775 self
.sections
.append(section
)
1777 if self
.FILE_HEADER
.NumberOfSections
> 0 and self
.sections
:
1778 return offset
+ self
.sections
[0].sizeof()*self
.FILE_HEADER
.NumberOfSections
1783 def retrieve_flags(self
, flag_dict
, flag_filter
):
1784 """Read the flags from a dictionary and return them in a usable form.
1786 Will return a list of (flag, value) for all flags in "flag_dict"
1787 matching the filter "flag_filter".
1790 return [(f
[0], f
[1]) for f
in flag_dict
.items() if
1791 isinstance(f
[0], str) and f
[0].startswith(flag_filter
)]
1794 def set_flags(self
, obj
, flag_field
, flags
):
1795 """Will process the flags and set attributes in the object accordingly.
1797 The object "obj" will gain attritutes named after the flags provided in
1798 "flags" and valued True/False, matching the results of applyin each
1799 flag value from "flags" to flag_field.
1803 if flag
[1] & flag_field
:
1804 setattr(obj
, flag
[0], True)
1806 setattr(obj
, flag
[0], False)
1810 def parse_data_directories(self
):
1811 """Parse and process the PE file's data directories."""
1813 directory_parsing
= (
1814 ('IMAGE_DIRECTORY_ENTRY_IMPORT', self
.parse_import_directory
),
1815 ('IMAGE_DIRECTORY_ENTRY_EXPORT', self
.parse_export_directory
),
1816 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self
.parse_resources_directory
),
1817 ('IMAGE_DIRECTORY_ENTRY_DEBUG', self
.parse_debug_directory
),
1818 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self
.parse_relocations_directory
),
1819 ('IMAGE_DIRECTORY_ENTRY_TLS', self
.parse_directory_tls
),
1820 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self
.parse_delay_import_directory
),
1821 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self
.parse_directory_bound_imports
) )
1823 for entry
in directory_parsing
:
1827 dir_entry
= self
.OPTIONAL_HEADER
.DATA_DIRECTORY
[
1828 DIRECTORY_ENTRY
[entry
[0]]]
1831 if dir_entry
.VirtualAddress
:
1832 value
= entry
[1](dir_entry
.VirtualAddress
, dir_entry
.Size
)
1834 setattr(self
, entry
[0][6:], value
)
1837 def parse_directory_bound_imports(self
, rva
, size
):
1840 bnd_descr
= Structure(self
.__IMAGE
_BOUND
_IMPORT
_DESCRIPTOR
_format
__)
1841 bnd_descr_size
= bnd_descr
.sizeof()
1847 bnd_descr
= self
.__unpack
_data
__(
1848 self
.__IMAGE
_BOUND
_IMPORT
_DESCRIPTOR
_format
__,
1849 self
.__data
__[rva
:rva
+bnd_descr_size
],
1851 if bnd_descr
is None:
1852 # If can't parse directory then silently return.
1853 # This directory does not necesarily have to be valid to
1854 # still have a valid PE file
1856 self
.__warnings
.append(
1857 'The Bound Imports directory exists but can\'t be parsed.')
1861 if bnd_descr
.all_zeroes():
1864 rva
+= bnd_descr
.sizeof()
1867 for idx
in xrange(bnd_descr
.NumberOfModuleForwarderRefs
):
1868 # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
1869 # IMAGE_BOUND_FORWARDER_REF have the same size.
1870 bnd_frwd_ref
= self
.__unpack
_data
__(
1871 self
.__IMAGE
_BOUND
_FORWARDER
_REF
_format
__,
1872 self
.__data
__[rva
:rva
+bnd_descr_size
],
1875 if not bnd_frwd_ref
:
1876 raise PEFormatError(
1877 "IMAGE_BOUND_FORWARDER_REF cannot be read")
1878 rva
+= bnd_frwd_ref
.sizeof()
1880 name_str
= self
.get_string_from_data(
1881 start
+bnd_frwd_ref
.OffsetModuleName
, self
.__data
__)
1885 forwarder_refs
.append(BoundImportRefData(
1886 struct
= bnd_frwd_ref
,
1889 name_str
= self
.get_string_from_data(
1890 start
+bnd_descr
.OffsetModuleName
, self
.__data
__)
1894 bound_imports
.append(
1895 BoundImportDescData(
1898 entries
= forwarder_refs
))
1900 return bound_imports
1903 def parse_directory_tls(self
, rva
, size
):
1906 if self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE
:
1907 format
= self
.__IMAGE
_TLS
_DIRECTORY
_format
__
1909 elif self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE_PLUS
:
1910 format
= self
.__IMAGE
_TLS
_DIRECTORY
64_format
__
1912 tls_struct
= self
.__unpack
_data
__(
1915 file_offset
= self
.get_offset_from_rva(rva
))
1920 return TlsData( struct
= tls_struct
)
1923 def parse_relocations_directory(self
, rva
, size
):
1926 rlc
= Structure(self
.__IMAGE
_BASE
_RELOCATION
_format
__)
1927 rlc_size
= rlc
.sizeof()
1934 # Malware that has bad rva entries will cause an error.
1935 # Just continue on after an exception
1938 rlc
= self
.__unpack
_data
__(
1939 self
.__IMAGE
_BASE
_RELOCATION
_format
__,
1940 self
.get_data(rva
, rlc_size
),
1941 file_offset
= self
.get_offset_from_rva(rva
) )
1942 except PEFormatError
:
1943 self
.__warnings
.append(
1944 'Invalid relocation information. Can\'t read ' +
1945 'data at RVA: 0x%x' % rva
)
1951 reloc_entries
= self
.parse_relocations(
1952 rva
+rlc_size
, rlc
.VirtualAddress
, rlc
.SizeOfBlock
-rlc_size
)
1957 entries
= reloc_entries
))
1959 if not rlc
.SizeOfBlock
:
1961 rva
+= rlc
.SizeOfBlock
1966 def parse_relocations(self
, data_rva
, rva
, size
):
1969 data
= self
.get_data(data_rva
, size
)
1972 for idx
in xrange(len(data
)/2):
1973 word
= struct
.unpack('<H', data
[idx
*2:(idx
+1)*2])[0]
1974 reloc_type
= (word
>>12)
1975 reloc_offset
= (word
&0x0fff)
1979 rva
= reloc_offset
+rva
))
1984 def parse_debug_directory(self
, rva
, size
):
1987 dbg
= Structure(self
.__IMAGE
_DEBUG
_DIRECTORY
_format
__)
1988 dbg_size
= dbg
.sizeof()
1991 for idx
in xrange(size
/dbg_size
):
1993 data
= self
.get_data(rva
+dbg_size
*idx
, dbg_size
)
1994 except PEFormatError
, e
:
1995 self
.__warnings
.append(
1996 'Invalid debug information. Can\'t read ' +
1997 'data at RVA: 0x%x' % rva
)
2000 dbg
= self
.__unpack
_data
__(
2001 self
.__IMAGE
_DEBUG
_DIRECTORY
_format
__,
2002 data
, file_offset
= self
.get_offset_from_rva(rva
+dbg_size
*idx
))
2014 def parse_resources_directory(self
, rva
, size
=0, base_rva
= None, level
= 0):
2015 """Parse the resources directory.
2017 Given the rva of the resources directory, it will process all
2020 The root will have the corresponding member of its structure,
2021 IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
2022 entries in the directory.
2024 Those entries will have, correspondingly, all the structure's
2025 members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
2026 "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
2027 representing upper layers of the tree. This one will also have
2028 an 'entries' attribute, pointing to the 3rd, and last, level.
2029 Another directory with more entries. Those last entries will
2030 have a new atribute (both 'leaf' or 'data_entry' can be used to
2031 access it). This structure finally points to the resource data.
2032 All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
2033 are available as its attributes.
2039 if base_rva
is None:
2042 resources_section
= self
.get_section_by_rva(rva
)
2045 # If the RVA is invalid all would blow up. Some EXEs seem to be
2046 # specially nasty and have an invalid RVA.
2047 data
= self
.get_data(rva
)
2048 except PEFormatError
, e
:
2049 self
.__warnings
.append(
2050 'Invalid resources directory. Can\'t read ' +
2051 'directory data at RVA: 0x%x' % rva
)
2054 # Get the resource directory structure, that is, the header
2055 # of the table preceding the actual entries
2057 resource_dir
= self
.__unpack
_data
__(
2058 self
.__IMAGE
_RESOURCE
_DIRECTORY
_format
__, data
,
2059 file_offset
= self
.get_offset_from_rva(rva
) )
2060 if resource_dir
is None:
2061 # If can't parse resources directory then silently return.
2062 # This directory does not necesarily have to be valid to
2063 # still have a valid PE file
2064 self
.__warnings
.append(
2065 'Invalid resources directory. Can\'t parse ' +
2066 'directory data at RVA: 0x%x' % rva
)
2071 # Advance the rva to the positon immediately following the directory
2072 # table header and pointing to the first entry in the table
2074 rva
+= resource_dir
.sizeof()
2076 number_of_entries
= (
2077 resource_dir
.NumberOfNamedEntries
+
2078 resource_dir
.NumberOfIdEntries
)
2080 strings_to_postprocess
= list()
2082 for idx
in xrange(number_of_entries
):
2084 res
= self
.parse_resource_entry(rva
)
2086 self
.__warnings
.append(
2087 'Error parsing the resources directory, ' +
2088 'Entry %d is invalid, RVA = 0x%x. ' %
2096 # If all named entries have been processed, only Id ones
2099 if idx
>= resource_dir
.NumberOfNamedEntries
:
2102 ustr_offset
= base_rva
+res
.NameOffset
2104 #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
2105 entry_name
= UnicodeStringWrapperPostProcessor(self
, ustr_offset
)
2106 strings_to_postprocess
.append(entry_name
)
2108 except PEFormatError
, excp
:
2109 self
.__warnings
.append(
2110 'Error parsing the resources directory, ' +
2111 'attempting to read entry name. ' +
2112 'Can\'t read unicode string at offset 0x%x' %
2116 if res
.DataIsDirectory
:
2119 # One trick malware can do is to recursively reference
2120 # the next directory. This causes hilarity to ensue when
2121 # trying to parse everything correctly.
2122 # If the original RVA given to this function is equal to
2123 # the next one to parse, we assume that it's a trick.
2124 # Instead of raising a PEFormatError this would skip some
2125 # reasonable data so we just break.
2127 # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
2128 if original_rva
== (base_rva
+ res
.OffsetToDirectory
):
2133 entry_directory
= self
.parse_resources_directory(
2134 base_rva
+res
.OffsetToDirectory
,
2135 base_rva
=base_rva
, level
= level
+1)
2137 if not entry_directory
:
2140 ResourceDirEntryData(
2144 directory
= entry_directory
))
2147 struct
= self
.parse_resource_data_entry(
2148 base_rva
+ res
.OffsetToDirectory
)
2151 entry_data
= ResourceDataEntryData(
2153 lang
= res
.Name
& 0xff,
2154 sublang
= (res
.Name
>>8) & 0xff)
2157 ResourceDirEntryData(
2168 # Check if this entry contains version information
2170 if level
== 0 and res
.Id
== RESOURCE_TYPE
['RT_VERSION']:
2171 if len(dir_entries
)>0:
2172 last_entry
= dir_entries
[-1]
2174 rt_version_struct
= None
2176 rt_version_struct
= last_entry
.directory
.entries
[0].directory
.entries
[0].data
.struct
2178 # Maybe a malformed directory structure...?
2182 if rt_version_struct
is not None:
2183 self
.parse_version_information(rt_version_struct
)
2188 string_rvas
= [s
.get_rva() for s
in strings_to_postprocess
]
2191 for idx
, s
in enumerate(strings_to_postprocess
):
2192 s
.render_pascal_16()
2195 resource_directory_data
= ResourceDirData(
2196 struct
= resource_dir
,
2197 entries
= dir_entries
)
2199 return resource_directory_data
2202 def parse_resource_data_entry(self
, rva
):
2203 """Parse a data entry from the resources directory."""
2206 # If the RVA is invalid all would blow up. Some EXEs seem to be
2207 # specially nasty and have an invalid RVA.
2208 data
= self
.get_data(rva
)
2209 except PEFormatError
, excp
:
2210 self
.__warnings
.append(
2211 'Error parsing a resource directory data entry, ' +
2212 'the RVA is invalid: 0x%x' % ( rva
) )
2215 data_entry
= self
.__unpack
_data
__(
2216 self
.__IMAGE
_RESOURCE
_DATA
_ENTRY
_format
__, data
,
2217 file_offset
= self
.get_offset_from_rva(rva
) )
2222 def parse_resource_entry(self
, rva
):
2223 """Parse a directory entry from the resources directory."""
2225 resource
= self
.__unpack
_data
__(
2226 self
.__IMAGE
_RESOURCE
_DIRECTORY
_ENTRY
_format
__, self
.get_data(rva
),
2227 file_offset
= self
.get_offset_from_rva(rva
) )
2229 if resource
is None:
2232 #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
2233 resource
.NameOffset
= resource
.Name
& 0x7FFFFFFFL
2235 resource
.__pad
= resource
.Name
& 0xFFFF0000L
2236 resource
.Id
= resource
.Name
& 0x0000FFFFL
2238 resource
.DataIsDirectory
= (resource
.OffsetToData
& 0x80000000L
) >> 31
2239 resource
.OffsetToDirectory
= resource
.OffsetToData
& 0x7FFFFFFFL
2244 def parse_version_information(self
, version_struct
):
2245 """Parse version information structure.
2247 The date will be made available in three attributes of the PE object.
2249 VS_VERSIONINFO will contain the first three fields of the main structure:
2250 'Length', 'ValueLength', and 'Type'
2252 VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes:
2253 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
2254 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
2255 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
2257 FileInfo is a list of all StringFileInfo and VarFileInfo structures.
2259 StringFileInfo structures will have a list as an attribute named 'StringTable'
2260 containing all the StringTable structures. Each of those structures contains a
2261 dictionary 'entries' with all the key/value version information string pairs.
2263 VarFileInfo structures will have a list as an attribute named 'Var' containing
2264 all Var structures. Each Var structure will have a dictionary as an attribute
2265 named 'entry' which will contain the name and value of the Var.
2269 # Retrieve the data for the version info resource
2271 start_offset
= self
.get_offset_from_rva( version_struct
.OffsetToData
)
2272 raw_data
= self
.__data
__[ start_offset
: start_offset
+version_struct
.Size
]
2275 # Map the main structure and the subsequent string
2277 versioninfo_struct
= self
.__unpack
_data
__(
2278 self
.__VS
_VERSIONINFO
_format
__, raw_data
,
2279 file_offset
= start_offset
)
2281 if versioninfo_struct
is None:
2284 ustr_offset
= version_struct
.OffsetToData
+ versioninfo_struct
.sizeof()
2286 versioninfo_string
= self
.get_string_u_at_rva( ustr_offset
)
2287 except PEFormatError
, excp
:
2288 self
.__warnings
.append(
2289 'Error parsing the version information, ' +
2290 'attempting to read VS_VERSION_INFO string. Can\'t ' +
2291 'read unicode string at offset 0x%x' % (
2294 versioninfo_string
= None
2296 # If the structure does not contain the expected name, it's assumed to be invalid
2298 if versioninfo_string
!= u
'VS_VERSION_INFO':
2300 self
.__warnings
.append('Invalid VS_VERSION_INFO block')
2304 # Set the PE object's VS_VERSIONINFO to this one
2306 self
.VS_VERSIONINFO
= versioninfo_struct
2308 # The the Key attribute to point to the unicode string identifying the structure
2310 self
.VS_VERSIONINFO
.Key
= versioninfo_string
2313 # Process the fixed version information, get the offset and structure
2315 fixedfileinfo_offset
= self
.dword_align(
2316 versioninfo_struct
.sizeof() + 2 * (len(versioninfo_string
) + 1),
2317 version_struct
.OffsetToData
)
2318 fixedfileinfo_struct
= self
.__unpack
_data
__(
2319 self
.__VS
_FIXEDFILEINFO
_format
__,
2320 raw_data
[fixedfileinfo_offset
:],
2321 file_offset
= start_offset
+fixedfileinfo_offset
)
2323 if not fixedfileinfo_struct
:
2327 # Set the PE object's VS_FIXEDFILEINFO to this one
2329 self
.VS_FIXEDFILEINFO
= fixedfileinfo_struct
2332 # Start parsing all the StringFileInfo and VarFileInfo structures
2337 stringfileinfo_offset
= self
.dword_align(
2338 fixedfileinfo_offset
+ fixedfileinfo_struct
.sizeof(),
2339 version_struct
.OffsetToData
)
2340 original_stringfileinfo_offset
= stringfileinfo_offset
2343 # Set the PE object's attribute that will contain them all.
2345 self
.FileInfo
= list()
2350 # Process the StringFileInfo/VarFileInfo struct
2352 stringfileinfo_struct
= self
.__unpack
_data
__(
2353 self
.__StringFileInfo
_format
__,
2354 raw_data
[stringfileinfo_offset
:],
2355 file_offset
= start_offset
+stringfileinfo_offset
)
2357 if stringfileinfo_struct
is None:
2358 self
.__warnings
.append(
2359 'Error parsing StringFileInfo/VarFileInfo struct' )
2362 # Get the subsequent string defining the structure.
2364 ustr_offset
= ( version_struct
.OffsetToData
+
2365 stringfileinfo_offset
+ versioninfo_struct
.sizeof() )
2367 stringfileinfo_string
= self
.get_string_u_at_rva( ustr_offset
)
2368 except PEFormatError
, excp
:
2369 self
.__warnings
.append(
2370 'Error parsing the version information, ' +
2371 'attempting to read StringFileInfo string. Can\'t ' +
2372 'read unicode string at offset 0x%x' % ( ustr_offset
) )
2375 # Set such string as the Key attribute
2377 stringfileinfo_struct
.Key
= stringfileinfo_string
2380 # Append the structure to the PE object's list
2382 self
.FileInfo
.append(stringfileinfo_struct
)
2385 # Parse a StringFileInfo entry
2387 if stringfileinfo_string
== u
'StringFileInfo':
2389 if stringfileinfo_struct
.Type
== 1 and stringfileinfo_struct
.ValueLength
== 0:
2391 stringtable_offset
= self
.dword_align(
2392 stringfileinfo_offset
+ stringfileinfo_struct
.sizeof() +
2393 2*(len(stringfileinfo_string
)+1),
2394 version_struct
.OffsetToData
)
2396 stringfileinfo_struct
.StringTable
= list()
2398 # Process the String Table entries
2401 stringtable_struct
= self
.__unpack
_data
__(
2402 self
.__StringTable
_format
__,
2403 raw_data
[stringtable_offset
:],
2404 file_offset
= start_offset
+stringtable_offset
)
2406 if not stringtable_struct
:
2409 ustr_offset
= ( version_struct
.OffsetToData
+ stringtable_offset
+
2410 stringtable_struct
.sizeof() )
2412 stringtable_string
= self
.get_string_u_at_rva( ustr_offset
)
2413 except PEFormatError
, excp
:
2414 self
.__warnings
.append(
2415 'Error parsing the version information, ' +
2416 'attempting to read StringTable string. Can\'t ' +
2417 'read unicode string at offset 0x%x' % ( ustr_offset
) )
2420 stringtable_struct
.LangID
= stringtable_string
2421 stringtable_struct
.entries
= dict()
2422 stringtable_struct
.entries_offsets
= dict()
2423 stringtable_struct
.entries_lengths
= dict()
2424 stringfileinfo_struct
.StringTable
.append(stringtable_struct
)
2426 entry_offset
= self
.dword_align(
2427 stringtable_offset
+ stringtable_struct
.sizeof() +
2428 2*(len(stringtable_string
)+1),
2429 version_struct
.OffsetToData
)
2431 # Process all entries in the string table
2434 while entry_offset
< stringtable_offset
+ stringtable_struct
.Length
:
2436 string_struct
= self
.__unpack
_data
__(
2437 self
.__String
_format
__, raw_data
[entry_offset
:],
2438 file_offset
= start_offset
+entry_offset
)
2440 if not string_struct
:
2443 ustr_offset
= ( version_struct
.OffsetToData
+ entry_offset
+
2444 string_struct
.sizeof() )
2446 key
= self
.get_string_u_at_rva( ustr_offset
)
2447 key_offset
= self
.get_offset_from_rva( ustr_offset
)
2448 except PEFormatError
, excp
:
2449 self
.__warnings
.append(
2450 'Error parsing the version information, ' +
2451 'attempting to read StringTable Key string. Can\'t ' +
2452 'read unicode string at offset 0x%x' % ( ustr_offset
) )
2455 value_offset
= self
.dword_align(
2456 2*(len(key
)+1) + entry_offset
+ string_struct
.sizeof(),
2457 version_struct
.OffsetToData
)
2459 ustr_offset
= version_struct
.OffsetToData
+ value_offset
2461 value
= self
.get_string_u_at_rva( ustr_offset
,
2462 max_length
= string_struct
.ValueLength
)
2463 value_offset
= self
.get_offset_from_rva( ustr_offset
)
2464 except PEFormatError
, excp
:
2465 self
.__warnings
.append(
2466 'Error parsing the version information, ' +
2467 'attempting to read StringTable Value string. ' +
2468 'Can\'t read unicode string at offset 0x%x' % (
2472 if string_struct
.Length
== 0:
2473 entry_offset
= stringtable_offset
+ stringtable_struct
.Length
2475 entry_offset
= self
.dword_align(
2476 string_struct
.Length
+entry_offset
, version_struct
.OffsetToData
)
2481 key_as_char
.append('\\x%02x' %ord(c
))
2483 key_as_char
.append(c
)
2485 key_as_char
= ''.join(key_as_char
)
2487 setattr(stringtable_struct
, key_as_char
, value
)
2488 stringtable_struct
.entries
[key
] = value
2489 stringtable_struct
.entries_offsets
[key
] = (key_offset
, value_offset
)
2490 stringtable_struct
.entries_lengths
[key
] = (len(key
), len(value
))
2493 stringtable_offset
= self
.dword_align(
2494 stringtable_struct
.Length
+ stringtable_offset
,
2495 version_struct
.OffsetToData
)
2496 if stringtable_offset
>= stringfileinfo_struct
.Length
:
2499 # Parse a VarFileInfo entry
2501 elif stringfileinfo_string
== u
'VarFileInfo':
2503 varfileinfo_struct
= stringfileinfo_struct
2504 varfileinfo_struct
.name
= 'VarFileInfo'
2506 if varfileinfo_struct
.Type
== 1 and varfileinfo_struct
.ValueLength
== 0:
2508 var_offset
= self
.dword_align(
2509 stringfileinfo_offset
+ varfileinfo_struct
.sizeof() +
2510 2*(len(stringfileinfo_string
)+1),
2511 version_struct
.OffsetToData
)
2513 varfileinfo_struct
.Var
= list()
2515 # Process all entries
2519 var_struct
= self
.__unpack
_data
__(
2520 self
.__Var
_format
__,
2521 raw_data
[var_offset
:],
2522 file_offset
= start_offset
+var_offset
)
2527 ustr_offset
= ( version_struct
.OffsetToData
+ var_offset
+
2528 var_struct
.sizeof() )
2530 var_string
= self
.get_string_u_at_rva( ustr_offset
)
2531 except PEFormatError
, excp
:
2532 self
.__warnings
.append(
2533 'Error parsing the version information, ' +
2534 'attempting to read VarFileInfo Var string. ' +
2535 'Can\'t read unicode string at offset 0x%x' % (ustr_offset
))
2539 varfileinfo_struct
.Var
.append(var_struct
)
2541 varword_offset
= self
.dword_align(
2542 2*(len(var_string
)+1) + var_offset
+ var_struct
.sizeof(),
2543 version_struct
.OffsetToData
)
2544 orig_varword_offset
= varword_offset
2546 while varword_offset
< orig_varword_offset
+ var_struct
.ValueLength
:
2547 word1
= self
.get_word_from_data(
2548 raw_data
[varword_offset
:varword_offset
+2], 0)
2549 word2
= self
.get_word_from_data(
2550 raw_data
[varword_offset
+2:varword_offset
+4], 0)
2553 var_struct
.entry
= {var_string
: '0x%04x 0x%04x' % (word1
, word2
)}
2555 var_offset
= self
.dword_align(
2556 var_offset
+var_struct
.Length
, version_struct
.OffsetToData
)
2558 if var_offset
<= var_offset
+var_struct
.Length
:
2563 # Increment and align the offset
2565 stringfileinfo_offset
= self
.dword_align(
2566 stringfileinfo_struct
.Length
+stringfileinfo_offset
,
2567 version_struct
.OffsetToData
)
2569 # Check if all the StringFileInfo and VarFileInfo items have been processed
2571 if stringfileinfo_struct
.Length
== 0 or stringfileinfo_offset
>= versioninfo_struct
.Length
:
2576 def parse_export_directory(self
, rva
, size
):
2577 """Parse the export directory.
2579 Given the rva of the export directory, it will process all
2582 The exports will be made available through a list "exports"
2583 containing a tuple with the following elements:
2585 (ordinal, symbol_address, symbol_name)
2587 And also through a dicionary "exports_by_ordinal" whose keys
2588 will be the ordinals and the values tuples of the from:
2590 (symbol_address, symbol_name)
2592 The symbol addresses are relative, not absolute.
2596 export_dir
= self
.__unpack
_data
__(
2597 self
.__IMAGE
_EXPORT
_DIRECTORY
_format
__, self
.get_data(rva
),
2598 file_offset
= self
.get_offset_from_rva(rva
) )
2599 except PEFormatError
:
2600 self
.__warnings
.append(
2601 'Error parsing export directory at RVA: 0x%x' % ( rva
) )
2608 address_of_names
= self
.get_data(
2609 export_dir
.AddressOfNames
, export_dir
.NumberOfNames
*4)
2610 address_of_name_ordinals
= self
.get_data(
2611 export_dir
.AddressOfNameOrdinals
, export_dir
.NumberOfNames
*4)
2612 address_of_functions
= self
.get_data(
2613 export_dir
.AddressOfFunctions
, export_dir
.NumberOfFunctions
*4)
2614 except PEFormatError
:
2615 self
.__warnings
.append(
2616 'Error parsing export directory at RVA: 0x%x' % ( rva
) )
2621 for i
in xrange(export_dir
.NumberOfNames
):
2624 symbol_name
= self
.get_string_at_rva(
2625 self
.get_dword_from_data(address_of_names
, i
))
2627 symbol_ordinal
= self
.get_word_from_data(
2628 address_of_name_ordinals
, i
)
2631 if symbol_ordinal
*4<len(address_of_functions
):
2632 symbol_address
= self
.get_dword_from_data(
2633 address_of_functions
, symbol_ordinal
)
2635 # Corrupt? a bad pointer... we assume it's all
2636 # useless, no exports
2639 # If the funcion's rva points within the export directory
2640 # it will point to a string with the forwarded symbol's string
2641 # instead of pointing the the function start address.
2643 if symbol_address
>=rva
and symbol_address
<rva
+size
:
2644 forwarder_str
= self
.get_string_at_rva(symbol_address
)
2646 forwarder_str
= None
2651 ordinal
= export_dir
.Base
+symbol_ordinal
,
2652 address
= symbol_address
,
2654 forwarder
= forwarder_str
))
2656 ordinals
= [exp
.ordinal
for exp
in exports
]
2658 for idx
in xrange(export_dir
.NumberOfFunctions
):
2660 if not idx
+export_dir
.Base
in ordinals
:
2661 symbol_address
= self
.get_dword_from_data(
2662 address_of_functions
,
2666 # Checking for forwarder again.
2668 if symbol_address
>=rva
and symbol_address
<rva
+size
:
2669 forwarder_str
= self
.get_string_at_rva(symbol_address
)
2671 forwarder_str
= None
2675 ordinal
= export_dir
.Base
+idx
,
2676 address
= symbol_address
,
2678 forwarder
= forwarder_str
))
2680 return ExportDirData(
2681 struct
= export_dir
,
2685 def dword_align(self
, offset
, base
):
2687 return (offset
+3) - ((offset
+3)%4) - base
2691 def parse_delay_import_directory(self
, rva
, size
):
2692 """Walk and parse the delay import directory."""
2697 # If the RVA is invalid all would blow up. Some PEs seem to be
2698 # specially nasty and have an invalid RVA.
2699 data
= self
.get_data(rva
)
2700 except PEFormatError
, e
:
2701 self
.__warnings
.append(
2702 'Error parsing the Delay import directory at RVA: 0x%x' % ( rva
) )
2705 import_desc
= self
.__unpack
_data
__(
2706 self
.__IMAGE
_DELAY
_IMPORT
_DESCRIPTOR
_format
__,
2707 data
, file_offset
= self
.get_offset_from_rva(rva
) )
2710 # If the structure is all zeores, we reached the end of the list
2711 if not import_desc
or import_desc
.all_zeroes():
2715 rva
+= import_desc
.sizeof()
2718 import_data
= self
.parse_imports(
2722 except PEFormatError
, e
:
2723 self
.__warnings
.append(
2724 'Error parsing the Delay import directory. ' +
2725 'Invalid import data at RVA: 0x%x' % ( rva
) )
2732 dll
= self
.get_string_at_rva(import_desc
.szName
)
2734 import_descs
.append(
2736 struct
= import_desc
,
2737 imports
= import_data
,
2744 def parse_import_directory(self
, rva
, size
):
2745 """Walk and parse the import directory."""
2750 # If the RVA is invalid all would blow up. Some EXEs seem to be
2751 # specially nasty and have an invalid RVA.
2752 data
= self
.get_data(rva
)
2753 except PEFormatError
, e
:
2754 self
.__warnings
.append(
2755 'Error parsing the Import directory at RVA: 0x%x' % ( rva
) )
2758 import_desc
= self
.__unpack
_data
__(
2759 self
.__IMAGE
_IMPORT
_DESCRIPTOR
_format
__,
2760 data
, file_offset
= self
.get_offset_from_rva(rva
) )
2762 # If the structure is all zeores, we reached the end of the list
2763 if not import_desc
or import_desc
.all_zeroes():
2766 rva
+= import_desc
.sizeof()
2769 import_data
= self
.parse_imports(
2770 import_desc
.OriginalFirstThunk
,
2771 import_desc
.FirstThunk
,
2772 import_desc
.ForwarderChain
)
2773 except PEFormatError
, excp
:
2774 self
.__warnings
.append(
2775 'Error parsing the Import directory. ' +
2776 'Invalid Import data at RVA: 0x%x' % ( rva
) )
2783 dll
= self
.get_string_at_rva(import_desc
.Name
)
2785 import_descs
.append(
2787 struct
= import_desc
,
2788 imports
= import_data
,
2795 def parse_imports(self
, original_first_thunk
, first_thunk
, forwarder_chain
):
2796 """Parse the imported symbols.
2798 It will fill a list, which will be avalable as the dictionary
2799 attribute "imports". Its keys will be the DLL names and the values
2800 all the symbols imported from that object.
2803 imported_symbols
= []
2804 imports_section
= self
.get_section_by_rva(first_thunk
)
2805 if not imports_section
:
2806 raise PEFormatError
, 'Invalid/corrupt imports.'
2809 # Import Lookup Table. Contains ordinals or pointers to strings.
2810 ilt
= self
.get_import_table(original_first_thunk
)
2811 # Import Address Table. May have identical content to ILT if
2812 # PE file is not bounded, Will contain the address of the
2813 # imported symbols once the binary is loaded or if it is already
2815 iat
= self
.get_import_table(first_thunk
)
2818 # Would crash if iat or ilt had None type
2819 if not iat
and not ilt
:
2820 raise PEFormatError(
2821 'Invalid Import Table information. ' +
2822 'Both ILT and IAT appear to be broken.')
2826 elif iat
and not ilt
:
2828 elif ilt
and ((len(ilt
) and len(iat
)==0) or (len(ilt
) == len(iat
))):
2830 elif (ilt
and len(ilt
))==0 and (iat
and len(iat
)):
2835 for idx
in xrange(len(table
)):
2840 hint_name_table_rva
= None
2842 if table
[idx
].AddressOfData
:
2844 if self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE
:
2845 ordinal_flag
= IMAGE_ORDINAL_FLAG
2846 elif self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE_PLUS
:
2847 ordinal_flag
= IMAGE_ORDINAL_FLAG64
2849 # If imported by ordinal, we will append the ordinal number
2851 if table
[idx
].AddressOfData
& ordinal_flag
:
2852 import_by_ordinal
= True
2853 imp_ord
= table
[idx
].AddressOfData
& 0xffff
2856 import_by_ordinal
= False
2858 hint_name_table_rva
= table
[idx
].AddressOfData
& 0x7fffffff
2859 data
= self
.get_data(hint_name_table_rva
, 2)
2861 imp_hint
= self
.get_word_from_data(data
, 0)
2862 imp_name
= self
.get_string_at_rva(table
[idx
].AddressOfData
+2)
2863 except PEFormatError
, e
:
2866 imp_address
= first_thunk
+self
.OPTIONAL_HEADER
.ImageBase
+idx
*4
2868 if iat
and ilt
and ilt
[idx
].AddressOfData
!= iat
[idx
].AddressOfData
:
2869 imp_bound
= iat
[idx
].AddressOfData
2873 if imp_name
!= '' and (imp_ord
or imp_name
):
2874 imported_symbols
.append(
2876 import_by_ordinal
= import_by_ordinal
,
2881 address
= imp_address
,
2882 hint_name_table_rva
= hint_name_table_rva
))
2884 return imported_symbols
2888 def get_import_table(self
, rva
):
2894 data
= self
.get_data(rva
)
2895 except PEFormatError
, e
:
2896 self
.__warnings
.append(
2897 'Error parsing the import table. ' +
2898 'Invalid data at RVA: 0x%x' % ( rva
) )
2901 if self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE
:
2902 format
= self
.__IMAGE
_THUNK
_DATA
_format
__
2903 elif self
.PE_TYPE
== OPTIONAL_HEADER_MAGIC_PE_PLUS
:
2904 format
= self
.__IMAGE
_THUNK
_DATA
64_format
__
2906 thunk_data
= self
.__unpack
_data
__(
2907 format
, data
, file_offset
=self
.get_offset_from_rva(rva
) )
2909 if not thunk_data
or thunk_data
.all_zeroes():
2912 rva
+= thunk_data
.sizeof()
2914 table
.append(thunk_data
)
2919 def get_memory_mapped_image(self
, max_virtual_address
=0x10000000, ImageBase
=None):
2920 """Returns the data corresponding to the memory layout of the PE file.
2922 The data includes the PE header and the sections loaded at offsets
2923 corresponding to their relative virtual addresses. (the VirtualAddress
2924 section header member).
2925 Any offset in this data corresponds to the absolute memory address
2928 The optional argument 'max_virtual_address' provides with means of limiting
2929 which section are processed.
2930 Any section with their VirtualAddress beyond this value will be skipped.
2931 Normally, sections with values beyond this range are just there to confuse
2932 tools. It's a common trick to see in packed executables.
2934 If the 'ImageBase' optional argument is supplied, the file's relocations
2935 will be applied to the image by calling the 'relocate_image()' method.
2938 # Collect all sections in one code block
2940 for section
in self
.sections
:
2942 # Miscellanous integrity tests.
2943 # Some packer will set these to bogus values to
2944 # make tools go nuts.
2946 if section
.Misc_VirtualSize
== 0 or section
.SizeOfRawData
== 0:
2949 if section
.SizeOfRawData
> len(self
.__data
__):
2952 if section
.PointerToRawData
> len(self
.__data
__):
2955 if section
.VirtualAddress
>= max_virtual_address
:
2958 padding_length
= section
.VirtualAddress
- len(data
)
2960 if padding_length
>0:
2961 data
+= '\0'*padding_length
2962 elif padding_length
<0:
2963 data
= data
[:padding_length
]
2965 data
+= section
.data
2970 def get_data(self
, rva
, length
=None):
2971 """Get data regardless of the section where it lies on.
2973 Given a rva and the size of the chunk to retrieve, this method
2974 will find the section where the data lies and return the data.
2977 s
= self
.get_section_by_rva(rva
)
2980 if rva
<len(self
.header
):
2985 return self
.header
[rva
:end
]
2987 raise PEFormatError
, 'data at RVA can\'t be fetched. Corrupt header?'
2989 return s
.get_data(rva
, length
)
2992 def get_rva_from_offset(self
, offset
):
2993 """Get the rva corresponding to this file offset. """
2995 s
= self
.get_section_by_offset(offset
)
2997 raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset
)
2998 return s
.get_rva_from_offset(offset
)
3000 def get_offset_from_rva(self
, rva
):
3001 """Get the file offset corresponding to this rva.
3003 Given a rva , this method will find the section where the
3004 data lies and return the offset within the file.
3007 s
= self
.get_section_by_rva(rva
)
3010 raise PEFormatError
, 'data at RVA can\'t be fetched. Corrupt header?'
3012 return s
.get_offset_from_rva(rva
)
3015 def get_string_at_rva(self
, rva
):
3016 """Get an ASCII string located at the given address."""
3018 s
= self
.get_section_by_rva(rva
)
3020 if rva
<len(self
.header
):
3021 return self
.get_string_from_data(rva
, self
.header
)
3024 return self
.get_string_from_data(rva
-s
.VirtualAddress
, s
.data
)
3027 def get_string_from_data(self
, offset
, data
):
3028 """Get an ASCII string from within the data."""
3050 def get_string_u_at_rva(self
, rva
, max_length
= 2**16):
3051 """Get an Unicode string located at the given address."""
3054 # If the RVA is invalid all would blow up. Some EXEs seem to be
3055 # specially nasty and have an invalid RVA.
3056 data
= self
.get_data(rva
, 2)
3057 except PEFormatError
, e
:
3060 #length = struct.unpack('<H', data)[0]
3063 for idx
in xrange(max_length
):
3065 uchr
= struct
.unpack('<H', self
.get_data(rva
+2*idx
, 2))[0]
3066 except struct
.error
:
3069 if unichr(uchr
) == u
'\0':
3076 def get_section_by_offset(self
, offset
):
3077 """Get the section containing the given file offset."""
3079 sections
= [s
for s
in self
.sections
if s
.contains_offset(offset
)]
3087 def get_section_by_rva(self
, rva
):
3088 """Get the section containing the given address."""
3090 sections
= [s
for s
in self
.sections
if s
.contains_rva(rva
)]
3098 return self
.dump_info()
3101 def print_info(self
):
3102 """Print all the PE header information in a human readable from."""
3103 print self
.dump_info()
3106 def dump_info(self
, dump
=None):
3107 """Dump all the PE header information into human readable string."""
3113 warnings
= self
.get_warnings()
3115 dump
.add_header('Parsing Warnings')
3116 for warning
in warnings
:
3117 dump
.add_line(warning
)
3121 dump
.add_header('DOS_HEADER')
3122 dump
.add_lines(self
.DOS_HEADER
.dump())
3125 dump
.add_header('NT_HEADERS')
3126 dump
.add_lines(self
.NT_HEADERS
.dump())
3129 dump
.add_header('FILE_HEADER')
3130 dump
.add_lines(self
.FILE_HEADER
.dump())
3132 image_flags
= self
.retrieve_flags(IMAGE_CHARACTERISTICS
, 'IMAGE_FILE_')
3136 for flag
in image_flags
:
3137 if getattr(self
.FILE_HEADER
, flag
[0]):
3138 flags
.append(flag
[0])
3139 dump
.add_line(', '.join(flags
))
3142 if hasattr(self
, 'OPTIONAL_HEADER') and self
.OPTIONAL_HEADER
is not None:
3143 dump
.add_header('OPTIONAL_HEADER')
3144 dump
.add_lines(self
.OPTIONAL_HEADER
.dump())
3146 dll_characteristics_flags
= self
.retrieve_flags(DLL_CHARACTERISTICS
, 'IMAGE_DLL_CHARACTERISTICS_')
3148 dump
.add('DllCharacteristics: ')
3150 for flag
in dll_characteristics_flags
:
3151 if getattr(self
.OPTIONAL_HEADER
, flag
[0]):
3152 flags
.append(flag
[0])
3153 dump
.add_line(', '.join(flags
))
3157 dump
.add_header('PE Sections')
3159 section_flags
= self
.retrieve_flags(SECTION_CHARACTERISTICS
, 'IMAGE_SCN_')
3161 for section
in self
.sections
:
3162 dump
.add_lines(section
.dump())
3165 for flag
in section_flags
:
3166 if getattr(section
, flag
[0]):
3167 flags
.append(flag
[0])
3168 dump
.add_line(', '.join(flags
))
3169 dump
.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section
.get_entropy() )
3171 dump
.add_line('MD5 hash: %s' % section
.get_hash_md5() )
3172 if sha1
is not None:
3173 dump
.add_line('SHA-1 hash: %s' % section
.get_hash_sha1() )
3174 if sha256
is not None:
3175 dump
.add_line('SHA-256 hash: %s' % section
.get_hash_sha256() )
3176 if sha512
is not None:
3177 dump
.add_line('SHA-512 hash: %s' % section
.get_hash_sha512() )
3182 if (hasattr(self
, 'OPTIONAL_HEADER') and
3183 hasattr(self
.OPTIONAL_HEADER
, 'DATA_DIRECTORY') ):
3185 dump
.add_header('Directories')
3186 for idx
in xrange(len(self
.OPTIONAL_HEADER
.DATA_DIRECTORY
)):
3187 directory
= self
.OPTIONAL_HEADER
.DATA_DIRECTORY
[idx
]
3188 dump
.add_lines(directory
.dump())
3192 if hasattr(self
, 'VS_VERSIONINFO'):
3193 dump
.add_header('Version Information')
3194 dump
.add_lines(self
.VS_VERSIONINFO
.dump())
3197 if hasattr(self
, 'VS_FIXEDFILEINFO'):
3198 dump
.add_lines(self
.VS_FIXEDFILEINFO
.dump())
3201 if hasattr(self
, 'FileInfo'):
3202 for entry
in self
.FileInfo
:
3203 dump
.add_lines(entry
.dump())
3206 if hasattr(entry
, 'StringTable'):
3207 for st_entry
in entry
.StringTable
:
3208 [dump
.add_line(' '+line
) for line
in st_entry
.dump()]
3209 dump
.add_line(' LangID: '+st_entry
.LangID
)
3211 for str_entry
in st_entry
.entries
.items():
3212 dump
.add_line(' '+str_entry
[0]+': '+str_entry
[1])
3215 elif hasattr(entry
, 'Var'):
3216 for var_entry
in entry
.Var
:
3217 if hasattr(var_entry
, 'entry'):
3218 [dump
.add_line(' '+line
) for line
in var_entry
.dump()]
3220 ' ' + var_entry
.entry
.keys()[0] +
3221 ': ' + var_entry
.entry
.values()[0])
3227 if hasattr(self
, 'DIRECTORY_ENTRY_EXPORT'):
3228 dump
.add_header('Exported symbols')
3229 dump
.add_lines(self
.DIRECTORY_ENTRY_EXPORT
.struct
.dump())
3231 dump
.add_line('%-10s %-10s %s' % ('Ordinal', 'RVA', 'Name'))
3232 for export
in self
.DIRECTORY_ENTRY_EXPORT
.symbols
:
3233 dump
.add('%-10d 0x%08Xh %s' % (
3234 export
.ordinal
, export
.address
, export
.name
))
3235 if export
.forwarder
:
3236 dump
.add_line(' forwarder: %s' % export
.forwarder
)
3242 if hasattr(self
, 'DIRECTORY_ENTRY_IMPORT'):
3243 dump
.add_header('Imported symbols')
3244 for module
in self
.DIRECTORY_ENTRY_IMPORT
:
3245 dump
.add_lines(module
.struct
.dump())
3247 for symbol
in module
.imports
:
3249 if symbol
.import_by_ordinal
is True:
3250 dump
.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3251 module
.dll
, str(symbol
.ordinal
)))
3253 dump
.add('%s.%s Hint[%s]' % (
3254 module
.dll
, symbol
.name
, str(symbol
.hint
)))
3257 dump
.add_line(' Bound: 0x%08X' % (symbol
.bound
))
3263 if hasattr(self
, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
3264 dump
.add_header('Bound imports')
3265 for bound_imp_desc
in self
.DIRECTORY_ENTRY_BOUND_IMPORT
:
3267 dump
.add_lines(bound_imp_desc
.struct
.dump())
3268 dump
.add_line('DLL: %s' % bound_imp_desc
.name
)
3271 for bound_imp_ref
in bound_imp_desc
.entries
:
3272 dump
.add_lines(bound_imp_ref
.struct
.dump(), 4)
3273 dump
.add_line('DLL: %s' % bound_imp_ref
.name
, 4)
3277 if hasattr(self
, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
3278 dump
.add_header('Delay Imported symbols')
3279 for module
in self
.DIRECTORY_ENTRY_DELAY_IMPORT
:
3281 dump
.add_lines(module
.struct
.dump())
3284 for symbol
in module
.imports
:
3285 if symbol
.import_by_ordinal
is True:
3286 dump
.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3287 module
.dll
, str(symbol
.ordinal
)))
3289 dump
.add('%s.%s Hint[%s]' % (
3290 module
.dll
, symbol
.name
, str(symbol
.hint
)))
3293 dump
.add_line(' Bound: 0x%08X' % (symbol
.bound
))
3299 if hasattr(self
, 'DIRECTORY_ENTRY_RESOURCE'):
3300 dump
.add_header('Resource directory')
3302 dump
.add_lines(self
.DIRECTORY_ENTRY_RESOURCE
.struct
.dump())
3304 for resource_type
in self
.DIRECTORY_ENTRY_RESOURCE
.entries
:
3306 if resource_type
.name
is not None:
3307 dump
.add_line('Name: [%s]' % resource_type
.name
, 2)
3309 dump
.add_line('Id: [0x%X] (%s)' % (
3310 resource_type
.struct
.Id
, RESOURCE_TYPE
.get(
3311 resource_type
.struct
.Id
, '-')),
3314 dump
.add_lines(resource_type
.struct
.dump(), 2)
3316 if hasattr(resource_type
, 'directory'):
3318 dump
.add_lines(resource_type
.directory
.struct
.dump(), 4)
3320 for resource_id
in resource_type
.directory
.entries
:
3322 if resource_id
.name
is not None:
3323 dump
.add_line('Name: [%s]' % resource_id
.name
, 6)
3325 dump
.add_line('Id: [0x%X]' % resource_id
.struct
.Id
, 6)
3327 dump
.add_lines(resource_id
.struct
.dump(), 6)
3329 if hasattr(resource_id
, 'directory'):
3330 dump
.add_lines(resource_id
.directory
.struct
.dump(), 8)
3332 for resource_lang
in resource_id
.directory
.entries
:
3333 # dump.add_line('\\--- LANG [%d,%d][%s]' % (
3334 # resource_lang.data.lang,
3335 # resource_lang.data.sublang,
3336 # LANG[resource_lang.data.lang]), 8)
3337 dump
.add_lines(resource_lang
.struct
.dump(), 10)
3338 dump
.add_lines(resource_lang
.data
.struct
.dump(), 12)
3344 if ( hasattr(self
, 'DIRECTORY_ENTRY_TLS') and
3345 self
.DIRECTORY_ENTRY_TLS
and
3346 self
.DIRECTORY_ENTRY_TLS
.struct
):
3348 dump
.add_header('TLS')
3349 dump
.add_lines(self
.DIRECTORY_ENTRY_TLS
.struct
.dump())
3353 if hasattr(self
, 'DIRECTORY_ENTRY_DEBUG'):
3354 dump
.add_header('Debug information')
3355 for dbg
in self
.DIRECTORY_ENTRY_DEBUG
:
3356 dump
.add_lines(dbg
.struct
.dump())
3358 dump
.add_line('Type: '+DEBUG_TYPE
[dbg
.struct
.Type
])
3360 dump
.add_line('Type: 0x%x(Unknown)' % dbg
.struct
.Type
)
3364 if hasattr(self
, 'DIRECTORY_ENTRY_BASERELOC'):
3365 dump
.add_header('Base relocations')
3366 for base_reloc
in self
.DIRECTORY_ENTRY_BASERELOC
:
3367 dump
.add_lines(base_reloc
.struct
.dump())
3368 for reloc
in base_reloc
.entries
:
3370 dump
.add_line('%08Xh %s' % (
3371 reloc
.rva
, RELOCATION_TYPE
[reloc
.type][16:]), 4)
3373 dump
.add_line('0x%08X 0x%x(Unknown)' % (
3374 reloc
.rva
, reloc
.type), 4)
3378 return dump
.get_text()
3381 def get_physical_by_rva(self
, rva
):
3382 """Gets the physical address in the PE file from an RVA value."""
3384 return self
.get_offset_from_rva(rva
)
3390 # Double-Word get/set
3393 def get_data_from_dword(self
, dword
):
3394 """Return a four byte string representing the double word value. (little endian)."""
3395 return struct
.pack('<L', dword
)
3398 def get_dword_from_data(self
, data
, offset
):
3399 """Convert four bytes of data to a double word (little endian)
3401 'offset' is assumed to index into a dword array. So setting it to
3402 N will return a dword out of the data sarting at offset N*4.
3404 Returns None if the data can't be turned into a double word.
3407 if (offset
+1)*4 > len(data
):
3410 return struct
.unpack('<L', data
[offset
*4:(offset
+1)*4])[0]
3413 def get_dword_at_rva(self
, rva
):
3414 """Return the double word value at the given RVA.
3416 Returns None if the value can't be read, i.e. the RVA can't be mapped
3421 return self
.get_dword_from_data(self
.get_data(rva
)[:4], 0)
3422 except PEFormatError
:
3426 def get_dword_from_offset(self
, offset
):
3427 """Return the double word value at the given file offset. (little endian)"""
3429 if offset
+4 > len(self
.__data
__):
3432 return self
.get_dword_from_data(self
.__data
__[offset
:offset
+4], 0)
3435 def set_dword_at_rva(self
, rva
, dword
):
3436 """Set the double word value at the file offset corresponding to the given RVA."""
3437 return self
.set_bytes_at_rva(rva
, self
.get_data_from_dword(dword
))
3440 def set_dword_at_offset(self
, offset
, dword
):
3441 """Set the double word value at the given file offset."""
3442 return self
.set_bytes_at_offset(offset
, self
.get_data_from_dword(dword
))
3450 def get_data_from_word(self
, word
):
3451 """Return a two byte string representing the word value. (little endian)."""
3452 return struct
.pack('<H', word
)
3455 def get_word_from_data(self
, data
, offset
):
3456 """Convert two bytes of data to a word (little endian)
3458 'offset' is assumed to index into a word array. So setting it to
3459 N will return a dword out of the data sarting at offset N*2.
3461 Returns None if the data can't be turned into a word.
3464 if (offset
+1)*2 > len(data
):
3467 return struct
.unpack('<H', data
[offset
*2:(offset
+1)*2])[0]
3470 def get_word_at_rva(self
, rva
):
3471 """Return the word value at the given RVA.
3473 Returns None if the value can't be read, i.e. the RVA can't be mapped
3478 return self
.get_word_from_data(self
.get_data(rva
)[:2], 0)
3479 except PEFormatError
:
3483 def get_word_from_offset(self
, offset
):
3484 """Return the word value at the given file offset. (little endian)"""
3486 if offset
+2 > len(self
.__data
__):
3489 return self
.get_word_from_data(self
.__data
__[offset
:offset
+2], 0)
3492 def set_word_at_rva(self
, rva
, word
):
3493 """Set the word value at the file offset corresponding to the given RVA."""
3494 return self
.set_bytes_at_rva(rva
, self
.get_data_from_word(word
))
3497 def set_word_at_offset(self
, offset
, word
):
3498 """Set the word value at the given file offset."""
3499 return self
.set_bytes_at_offset(offset
, self
.get_data_from_word(word
))
3506 def get_data_from_qword(self
, word
):
3507 """Return a eight byte string representing the quad-word value. (little endian)."""
3508 return struct
.pack('<Q', word
)
3511 def get_qword_from_data(self
, data
, offset
):
3512 """Convert eight bytes of data to a word (little endian)
3514 'offset' is assumed to index into a word array. So setting it to
3515 N will return a dword out of the data sarting at offset N*8.
3517 Returns None if the data can't be turned into a quad word.
3520 if (offset
+1)*8 > len(data
):
3523 return struct
.unpack('<Q', data
[offset
*8:(offset
+1)*8])[0]
3526 def get_qword_at_rva(self
, rva
):
3527 """Return the quad-word value at the given RVA.
3529 Returns None if the value can't be read, i.e. the RVA can't be mapped
3534 return self
.get_qword_from_data(self
.get_data(rva
)[:8], 0)
3535 except PEFormatError
:
3539 def get_qword_from_offset(self
, offset
):
3540 """Return the quad-word value at the given file offset. (little endian)"""
3542 if offset
+8 > len(self
.__data
__):
3545 return self
.get_qword_from_data(self
.__data
__[offset
:offset
+8], 0)
3548 def set_qword_at_rva(self
, rva
, qword
):
3549 """Set the quad-word value at the file offset corresponding to the given RVA."""
3550 return self
.set_bytes_at_rva(rva
, self
.get_data_from_qword(qword
))
3553 def set_qword_at_offset(self
, offset
, qword
):
3554 """Set the quad-word value at the given file offset."""
3555 return self
.set_bytes_at_offset(offset
, self
.get_data_from_qword(qword
))
3564 def set_bytes_at_rva(self
, rva
, data
):
3565 """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
3567 Return True if successful, False otherwise. It can fail if the
3568 offset is outside the file's boundaries.
3571 offset
= self
.get_physical_by_rva(rva
)
3575 return self
.set_bytes_at_offset(offset
, data
)
3578 def set_bytes_at_offset(self
, offset
, data
):
3579 """Overwrite the bytes at the given file offset with the given string.
3581 Return True if successful, False otherwise. It can fail if the
3582 offset is outside the file's boundaries.
3585 if not isinstance(data
, str):
3586 raise TypeError('data should be of type: str')
3588 if offset
>= 0 and offset
< len(self
.__data
__):
3589 self
.__data
__ = ( self
.__data
__[:offset
] +
3591 self
.__data
__[offset
+len(data
):] )
3595 # Refresh the section's data with the modified information
3597 for section
in self
.sections
:
3598 section_data_start
= section
.PointerToRawData
3599 section_data_end
= section_data_start
+section
.SizeOfRawData
3600 section
.data
= self
.__data
__[section_data_start
:section_data_end
]
3606 def relocate_image(self
, new_ImageBase
):
3607 """Apply the relocation information to the image using the provided new image base.
3609 This method will apply the relocation information to the image. Given the new base,
3610 all the relocations will be processed and both the raw data and the section's data
3611 will be fixed accordingly.
3612 The resulting image can be retrieved as well through the method:
3614 get_memory_mapped_image()
3616 In order to get something that would more closely match what could be found in memory
3617 once the Windows loader finished its work.
3620 relocation_difference
= new_ImageBase
- self
.OPTIONAL_HEADER
.ImageBase
3623 for reloc
in self
.DIRECTORY_ENTRY_BASERELOC
:
3625 virtual_address
= reloc
.struct
.VirtualAddress
3626 size_of_block
= reloc
.struct
.SizeOfBlock
3628 # We iterate with an index because if the relocation is of type
3629 # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
3630 # at once and skip it for the next interation
3633 while entry_idx
<len(reloc
.entries
):
3635 entry
= reloc
.entries
[entry_idx
]
3638 if entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_ABSOLUTE']:
3639 # Nothing to do for this type of relocation
3642 elif entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_HIGH']:
3643 # Fix the high 16bits of a relocation
3645 # Add high 16bits of relocation_difference to the
3646 # 16bit value at RVA=entry.rva
3648 self
.set_word_at_rva(
3650 ( self
.get_word_at_rva(entry
.rva
) + relocation_difference
>>16)&0xffff )
3652 elif entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_LOW']:
3653 # Fix the low 16bits of a relocation
3655 # Add low 16 bits of relocation_difference to the 16bit value
3658 self
.set_word_at_rva(
3660 ( self
.get_word_at_rva(entry
.rva
) + relocation_difference
)&0xffff)
3662 elif entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_HIGHLOW']:
3663 # Handle all high and low parts of a 32bit relocation
3665 # Add relocation_difference to the value at RVA=entry.rva
3667 self
.set_dword_at_rva(
3669 self
.get_dword_at_rva(entry
.rva
)+relocation_difference
)
3671 elif entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_HIGHADJ']:
3672 # Fix the high 16bits of a relocation and adjust
3674 # Add high 16bits of relocation_difference to the 32bit value
3675 # composed from the (16bit value at RVA=entry.rva)<<16 plus
3676 # the 16bit value at the next relocation entry.
3679 # If the next entry is beyond the array's limits,
3680 # abort... the table is corrupt
3682 if entry_idx
== len(reloc
.entries
):
3685 next_entry
= reloc
.entries
[entry_idx
]
3687 self
.set_word_at_rva( entry
.rva
,
3688 ((self
.get_word_at_rva(entry
.rva
)<<16) + next_entry
.rva
+
3689 relocation_difference
& 0xffff0000) >> 16 )
3691 elif entry
.type == RELOCATION_TYPE
['IMAGE_REL_BASED_DIR64']:
3692 # Apply the difference to the 64bit value at the offset
3695 self
.set_qword_at_rva(
3697 self
.get_qword_at_rva(entry
.rva
) + relocation_difference
)
3700 def verify_checksum(self
):
3702 return self
.OPTIONAL_HEADER
.CheckSum
== self
.generate_checksum()
3705 def generate_checksum(self
):
3707 # Get the offset to the CheckSum field in the OptionalHeader
3709 checksum_offset
= self
.OPTIONAL_HEADER
.__file
_offset
__ + 0x40 # 64
3713 for i
in range( len(self
.__data
__) / 4 ):
3715 # Skip the checksum field
3717 if i
== checksum_offset
/ 4:
3720 dword
= struct
.unpack('L', self
.__data
__[ i
*4 : i
*4+4 ])[0]
3721 checksum
= (checksum
& 0xffffffff) + dword
+ (checksum
>>32)
3722 if checksum
> 2**32:
3723 checksum
= (checksum
& 0xffffffff) + (checksum
>> 32)
3725 checksum
= (checksum
& 0xffff) + (checksum
>> 16)
3726 checksum
= (checksum
) + (checksum
>> 16)
3727 checksum
= checksum
& 0xffff
3729 return checksum
+ len(self
.__data
__)