tools/symsrc/pefile.py

   1 # -*- coding: Latin-1 -*-
   2 """pefile, Portable Executable reader module
   3
   4
   5 All the PE file basic structures are available with their default names
   6 as attributes of the instance returned.
   7
   8 Processed elements such as the import table are made available with lowercase
   9 names, to differentiate them from the upper case basic structure names.
  10
  11 pefile has been tested against the limits of valid PE headers, that is, malware.
  12 Lots of packed malware attempt to abuse the format way beyond its standard use.
  13 To the best of my knowledge most of the abuses are handled gracefully.
  14
  15 Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>
  16
  17 All rights reserved.
  18
  19 For detailed copyright information see the file COPYING in
  20 the root of the distribution archive.
  21 """
  22
  23 __author__ = 'Ero Carrera'
  24 __version__ = '1.2.9.1'
  25 __contact__ = 'ero@dkbza.org'
  26
  27
  28 import os
  29 import struct
  30 import time
  31 import math
  32 import re
  33 import exceptions
  34 import string
  35 import array
  36
  37 sha1, sha256, sha512, md5 = None, None, None, None
  38
  39 try:
  40     import hashlib
  41     sha1 = hashlib.sha1
  42     sha256 = hashlib.sha256
  43     sha512 = hashlib.sha512
  44     md5 = hashlib.md5
  45 except ImportError:
  46     try:
  47         import sha
  48         sha1 = sha.new
  49     except ImportError:
  50         pass
  51     try:
  52         import md5
  53         md5 = md5.new
  54     except ImportError:
  55         pass
  56
  57
  58 fast_load = False
  59
  60 IMAGE_DOS_SIGNATURE             = 0x5A4D
  61 IMAGE_OS2_SIGNATURE             = 0x454E
  62 IMAGE_OS2_SIGNATURE_LE          = 0x454C
  63 IMAGE_VXD_SIGNATURE             = 0x454C
  64 IMAGE_NT_SIGNATURE              = 0x00004550
  65 IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
  66 IMAGE_ORDINAL_FLAG              = 0x80000000L
  67 IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
  68 OPTIONAL_HEADER_MAGIC_PE        = 0x10b
  69 OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b
  70
  71
  72 directory_entry_types = [
  73     ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
  74     ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
  75     ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
  76     ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
  77     ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
  78     ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
  79     ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
  80     ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
  81     ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
  82     ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
  83     ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
  84     ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
  85     ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
  86     ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
  87     ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
  88     ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]
  89
  90 DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
  91
  92
  93 image_characteristics = [
  94     ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
  95     ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
  96     ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
  97     ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
  98     ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
  99     ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
 100     ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
 101     ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
 102     ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
 103     ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
 104     ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
 105     ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
 106     ('IMAGE_FILE_SYSTEM',                   0x1000),
 107     ('IMAGE_FILE_DLL',                      0x2000),
 108     ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
 109     ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]
 110
 111 IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
 112     image_characteristics]+image_characteristics)
 113
 114
 115 section_characteristics = [
 116     ('IMAGE_SCN_CNT_CODE',                  0x00000020),
 117     ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
 118     ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
 119     ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
 120     ('IMAGE_SCN_LNK_INFO',                  0x00000200),
 121     ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
 122     ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
 123     ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
 124     ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
 125     ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
 126     ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
 127     ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
 128     ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
 129     ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
 130     ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
 131     ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
 132     ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
 133     ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
 134     ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
 135     ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
 136     ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
 137     ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
 138     ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
 139     ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
 140     ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
 141     ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
 142     ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
 143     ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
 144     ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
 145     ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
 146     ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
 147     ('IMAGE_SCN_MEM_SHARED',                0x10000000),
 148     ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
 149     ('IMAGE_SCN_MEM_READ',                  0x40000000),
 150     ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
 151
 152 SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
 153     section_characteristics]+section_characteristics)
 154
 155
 156 debug_types = [
 157     ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
 158     ('IMAGE_DEBUG_TYPE_COFF',           1),
 159     ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
 160     ('IMAGE_DEBUG_TYPE_FPO',            3),
 161     ('IMAGE_DEBUG_TYPE_MISC',           4),
 162     ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
 163     ('IMAGE_DEBUG_TYPE_FIXUP',          6),
 164     ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
 165     ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
 166     ('IMAGE_DEBUG_TYPE_BORLAND',        9),
 167     ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]
 168
 169 DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)
 170
 171
 172 subsystem_types = [
 173     ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
 174     ('IMAGE_SUBSYSTEM_NATIVE',      1),
 175     ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
 176     ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
 177     ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
 178     ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
 179     ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
 180     ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
 181     ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
 182     ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
 183     ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
 184     ('IMAGE_SUBSYSTEM_XBOX',        14)]
 185
 186 SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)
 187
 188
 189 machine_types = [
 190     ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
 191     ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
 192     ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
 193     ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
 194     ('IMAGE_FILE_MACHINE_EBC',      0xebc),
 195     ('IMAGE_FILE_MACHINE_I386',     0x14c),
 196     ('IMAGE_FILE_MACHINE_IA64',     0x200),
 197     ('IMAGE_FILE_MACHINE_MR32',     0x9041),
 198     ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
 199     ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
 200     ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
 201     ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
 202     ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
 203     ('IMAGE_FILE_MACHINE_R4000',    0x166),
 204     ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
 205     ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
 206     ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
 207     ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
 208     ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
 209     ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
 210  ]
 211
 212 MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)
 213
 214
 215 relocation_types = [
 216     ('IMAGE_REL_BASED_ABSOLUTE',        0),
 217     ('IMAGE_REL_BASED_HIGH',            1),
 218     ('IMAGE_REL_BASED_LOW',             2),
 219     ('IMAGE_REL_BASED_HIGHLOW',         3),
 220     ('IMAGE_REL_BASED_HIGHADJ',         4),
 221     ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
 222     ('IMAGE_REL_BASED_SECTION',         6),
 223     ('IMAGE_REL_BASED_REL',             7),
 224     ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
 225     ('IMAGE_REL_BASED_IA64_IMM64',      9),
 226     ('IMAGE_REL_BASED_DIR64',           10),
 227     ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]
 228
 229 RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)
 230
 231
 232 dll_characteristics = [
 233     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
 234     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
 235     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
 236     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
 237     ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
 238     ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
 239     ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
 240     ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
 241     ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
 242     ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
 243     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
 244     ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
 245     ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
 246
 247 DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)
 248
 249
 250 # Resource types
 251 resource_type = [
 252     ('RT_CURSOR',          1),
 253     ('RT_BITMAP',          2),
 254     ('RT_ICON',            3),
 255     ('RT_MENU',            4),
 256     ('RT_DIALOG',          5),
 257     ('RT_STRING',          6),
 258     ('RT_FONTDIR',         7),
 259     ('RT_FONT',            8),
 260     ('RT_ACCELERATOR',     9),
 261     ('RT_RCDATA',          10),
 262     ('RT_MESSAGETABLE',    11),
 263     ('RT_GROUP_CURSOR',    12),
 264     ('RT_GROUP_ICON',      14),
 265     ('RT_VERSION',         16),
 266     ('RT_DLGINCLUDE',      17),
 267     ('RT_PLUGPLAY',        19),
 268     ('RT_VXD',             20),
 269     ('RT_ANICURSOR',       21),
 270     ('RT_ANIICON',         22),
 271     ('RT_HTML',            23),
 272     ('RT_MANIFEST',        24) ]
 273
 274 RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)
 275
 276
 277 # Language definitions
 278 lang = [
 279  ('LANG_NEUTRAL',       0x00),
 280  ('LANG_INVARIANT',     0x7f),
 281  ('LANG_AFRIKAANS',     0x36),
 282  ('LANG_ALBANIAN',      0x1c),
 283  ('LANG_ARABIC',        0x01),
 284  ('LANG_ARMENIAN',      0x2b),
 285  ('LANG_ASSAMESE',      0x4d),
 286  ('LANG_AZERI',         0x2c),
 287  ('LANG_BASQUE',        0x2d),
 288  ('LANG_BELARUSIAN',    0x23),
 289  ('LANG_BENGALI',       0x45),
 290  ('LANG_BULGARIAN',     0x02),
 291  ('LANG_CATALAN',       0x03),
 292  ('LANG_CHINESE',       0x04),
 293  ('LANG_CROATIAN',      0x1a),
 294  ('LANG_CZECH',         0x05),
 295  ('LANG_DANISH',        0x06),
 296  ('LANG_DIVEHI',        0x65),
 297  ('LANG_DUTCH',         0x13),
 298  ('LANG_ENGLISH',       0x09),
 299  ('LANG_ESTONIAN',      0x25),
 300  ('LANG_FAEROESE',      0x38),
 301  ('LANG_FARSI',         0x29),
 302  ('LANG_FINNISH',       0x0b),
 303  ('LANG_FRENCH',        0x0c),
 304  ('LANG_GALICIAN',      0x56),
 305  ('LANG_GEORGIAN',      0x37),
 306  ('LANG_GERMAN',        0x07),
 307  ('LANG_GREEK',         0x08),
 308  ('LANG_GUJARATI',      0x47),
 309  ('LANG_HEBREW',        0x0d),
 310  ('LANG_HINDI',         0x39),
 311  ('LANG_HUNGARIAN',     0x0e),
 312  ('LANG_ICELANDIC',     0x0f),
 313  ('LANG_INDONESIAN',    0x21),
 314  ('LANG_ITALIAN',       0x10),
 315  ('LANG_JAPANESE',      0x11),
 316  ('LANG_KANNADA',       0x4b),
 317  ('LANG_KASHMIRI',      0x60),
 318  ('LANG_KAZAK',         0x3f),
 319  ('LANG_KONKANI',       0x57),
 320  ('LANG_KOREAN',        0x12),
 321  ('LANG_KYRGYZ',        0x40),
 322  ('LANG_LATVIAN',       0x26),
 323  ('LANG_LITHUANIAN',    0x27),
 324  ('LANG_MACEDONIAN',    0x2f),
 325  ('LANG_MALAY',         0x3e),
 326  ('LANG_MALAYALAM',     0x4c),
 327  ('LANG_MANIPURI',      0x58),
 328  ('LANG_MARATHI',       0x4e),
 329  ('LANG_MONGOLIAN',     0x50),
 330  ('LANG_NEPALI',        0x61),
 331  ('LANG_NORWEGIAN',     0x14),
 332  ('LANG_ORIYA',         0x48),
 333  ('LANG_POLISH',        0x15),
 334  ('LANG_PORTUGUESE',    0x16),
 335  ('LANG_PUNJABI',       0x46),
 336  ('LANG_ROMANIAN',      0x18),
 337  ('LANG_RUSSIAN',       0x19),
 338  ('LANG_SANSKRIT',      0x4f),
 339  ('LANG_SERBIAN',       0x1a),
 340  ('LANG_SINDHI',        0x59),
 341  ('LANG_SLOVAK',        0x1b),
 342  ('LANG_SLOVENIAN',     0x24),
 343  ('LANG_SPANISH',       0x0a),
 344  ('LANG_SWAHILI',       0x41),
 345  ('LANG_SWEDISH',       0x1d),
 346  ('LANG_SYRIAC',        0x5a),
 347  ('LANG_TAMIL',         0x49),
 348  ('LANG_TATAR',         0x44),
 349  ('LANG_TELUGU',        0x4a),
 350  ('LANG_THAI',          0x1e),
 351  ('LANG_TURKISH',       0x1f),
 352  ('LANG_UKRAINIAN',     0x22),
 353  ('LANG_URDU',          0x20),
 354  ('LANG_UZBEK',         0x43),
 355  ('LANG_VIETNAMESE',    0x2a),
 356  ('LANG_GAELIC',        0x3c),
 357  ('LANG_MALTESE',       0x3a),
 358  ('LANG_MAORI',         0x28),
 359  ('LANG_RHAETO_ROMANCE',0x17),
 360  ('LANG_SAAMI',         0x3b),
 361  ('LANG_SORBIAN',       0x2e),
 362  ('LANG_SUTU',          0x30),
 363  ('LANG_TSONGA',        0x31),
 364  ('LANG_TSWANA',        0x32),
 365  ('LANG_VENDA',         0x33),
 366  ('LANG_XHOSA',         0x34),
 367  ('LANG_ZULU',          0x35),
 368  ('LANG_ESPERANTO',     0x8f),
 369  ('LANG_WALON',         0x90),
 370  ('LANG_CORNISH',       0x91),
 371  ('LANG_WELSH',         0x92),
 372  ('LANG_BRETON',        0x93) ]
 373
 374 LANG = dict(lang+[(e[1], e[0]) for e in lang])
 375
 376
 377 # Sublanguage definitions
 378 sublang =  [
 379  ('SUBLANG_NEUTRAL',                        0x00),
 380  ('SUBLANG_DEFAULT',                        0x01),
 381  ('SUBLANG_SYS_DEFAULT',                    0x02),
 382  ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
 383  ('SUBLANG_ARABIC_IRAQ',                    0x02),
 384  ('SUBLANG_ARABIC_EGYPT',                   0x03),
 385  ('SUBLANG_ARABIC_LIBYA',                   0x04),
 386  ('SUBLANG_ARABIC_ALGERIA',                 0x05),
 387  ('SUBLANG_ARABIC_MOROCCO',                 0x06),
 388  ('SUBLANG_ARABIC_TUNISIA',                 0x07),
 389  ('SUBLANG_ARABIC_OMAN',                    0x08),
 390  ('SUBLANG_ARABIC_YEMEN',                   0x09),
 391  ('SUBLANG_ARABIC_SYRIA',                   0x0a),
 392  ('SUBLANG_ARABIC_JORDAN',                  0x0b),
 393  ('SUBLANG_ARABIC_LEBANON',                 0x0c),
 394  ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
 395  ('SUBLANG_ARABIC_UAE',                     0x0e),
 396  ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
 397  ('SUBLANG_ARABIC_QATAR',                   0x10),
 398  ('SUBLANG_AZERI_LATIN',                    0x01),
 399  ('SUBLANG_AZERI_CYRILLIC',                 0x02),
 400  ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
 401  ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
 402  ('SUBLANG_CHINESE_HONGKONG',               0x03),
 403  ('SUBLANG_CHINESE_SINGAPORE',              0x04),
 404  ('SUBLANG_CHINESE_MACAU',                  0x05),
 405  ('SUBLANG_DUTCH',                          0x01),
 406  ('SUBLANG_DUTCH_BELGIAN',                  0x02),
 407  ('SUBLANG_ENGLISH_US',                     0x01),
 408  ('SUBLANG_ENGLISH_UK',                     0x02),
 409  ('SUBLANG_ENGLISH_AUS',                    0x03),
 410  ('SUBLANG_ENGLISH_CAN',                    0x04),
 411  ('SUBLANG_ENGLISH_NZ',                     0x05),
 412  ('SUBLANG_ENGLISH_EIRE',                   0x06),
 413  ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
 414  ('SUBLANG_ENGLISH_JAMAICA',                0x08),
 415  ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
 416  ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
 417  ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
 418  ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
 419  ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
 420  ('SUBLANG_FRENCH',                         0x01),
 421  ('SUBLANG_FRENCH_BELGIAN',                 0x02),
 422  ('SUBLANG_FRENCH_CANADIAN',                0x03),
 423  ('SUBLANG_FRENCH_SWISS',                   0x04),
 424  ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
 425  ('SUBLANG_FRENCH_MONACO',                  0x06),
 426  ('SUBLANG_GERMAN',                         0x01),
 427  ('SUBLANG_GERMAN_SWISS',                   0x02),
 428  ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
 429  ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
 430  ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
 431  ('SUBLANG_ITALIAN',                        0x01),
 432  ('SUBLANG_ITALIAN_SWISS',                  0x02),
 433  ('SUBLANG_KASHMIRI_SASIA',                 0x02),
 434  ('SUBLANG_KASHMIRI_INDIA',                 0x02),
 435  ('SUBLANG_KOREAN',                         0x01),
 436  ('SUBLANG_LITHUANIAN',                     0x01),
 437  ('SUBLANG_MALAY_MALAYSIA',                 0x01),
 438  ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
 439  ('SUBLANG_NEPALI_INDIA',                   0x02),
 440  ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
 441  ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
 442  ('SUBLANG_PORTUGUESE',                     0x02),
 443  ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
 444  ('SUBLANG_SERBIAN_LATIN',                  0x02),
 445  ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
 446  ('SUBLANG_SPANISH',                        0x01),
 447  ('SUBLANG_SPANISH_MEXICAN',                0x02),
 448  ('SUBLANG_SPANISH_MODERN',                 0x03),
 449  ('SUBLANG_SPANISH_GUATEMALA',              0x04),
 450  ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
 451  ('SUBLANG_SPANISH_PANAMA',                 0x06),
 452  ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
 453  ('SUBLANG_SPANISH_VENEZUELA',              0x08),
 454  ('SUBLANG_SPANISH_COLOMBIA',               0x09),
 455  ('SUBLANG_SPANISH_PERU',                   0x0a),
 456  ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
 457  ('SUBLANG_SPANISH_ECUADOR',                0x0c),
 458  ('SUBLANG_SPANISH_CHILE',                  0x0d),
 459  ('SUBLANG_SPANISH_URUGUAY',                0x0e),
 460  ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
 461  ('SUBLANG_SPANISH_BOLIVIA',                0x10),
 462  ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
 463  ('SUBLANG_SPANISH_HONDURAS',               0x12),
 464  ('SUBLANG_SPANISH_NICARAGUA',              0x13),
 465  ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
 466  ('SUBLANG_SWEDISH',                        0x01),
 467  ('SUBLANG_SWEDISH_FINLAND',                0x02),
 468  ('SUBLANG_URDU_PAKISTAN',                  0x01),
 469  ('SUBLANG_URDU_INDIA',                     0x02),
 470  ('SUBLANG_UZBEK_LATIN',                    0x01),
 471  ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
 472  ('SUBLANG_DUTCH_SURINAM',                  0x03),
 473  ('SUBLANG_ROMANIAN',                       0x01),
 474  ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
 475  ('SUBLANG_RUSSIAN',                        0x01),
 476  ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
 477  ('SUBLANG_CROATIAN',                       0x01),
 478  ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
 479  ('SUBLANG_GAELIC',                         0x01),
 480  ('SUBLANG_GAELIC_SCOTTISH',                0x02),
 481  ('SUBLANG_GAELIC_MANX',                    0x03) ]
 482
 483 SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])
 484
 485
 486 class UnicodeStringWrapperPostProcessor:
 487     """This class attemps to help the process of identifying strings
 488     that might be plain Unicode or Pascal. A list of strings will be
 489     wrapped on it with the hope the overlappings will help make the
 490     decission about their type."""
 491
 492     def __init__(self, pe, rva_ptr):
 493         self.pe = pe
 494         self.rva_ptr = rva_ptr
 495         self.string = None
 496
 497
 498     def get_rva(self):
 499         """Get the RVA of the string."""
 500
 501         return self.rva_ptr
 502
 503
 504     def __str__(self):
 505         """Return the escaped ASCII representation of the string."""
 506
 507         def convert_char(char):
 508             if char in string.printable:
 509                 return char
 510             else:
 511                 return r'\x%02x' % ord(char)
 512
 513         if self.string:
 514             return ''.join([convert_char(c) for c in self.string])
 515
 516         return ''
 517
 518
 519     def invalidate(self):
 520         """Make this instance None, to express it's no known string type."""
 521
 522         self = None
 523
 524
 525     def render_pascal_16(self):
 526
 527         self.string = self.pe.get_string_u_at_rva(
 528             self.rva_ptr+2,
 529             max_length=self.__get_pascal_16_length())
 530
 531
 532     def ask_pascal_16(self, next_rva_ptr):
 533         """The next RVA is taken to be the one immediately following this one.
 534
 535         Such RVA could indicate the natural end of the string and will be checked
 536         with the possible length contained in the first word.
 537         """
 538
 539         length = self.__get_pascal_16_length()
 540
 541         if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
 542             self.length = length
 543             return True
 544
 545         return False
 546
 547
 548     def __get_pascal_16_length(self):
 549
 550         return self.__get_word_value_at_rva(self.rva_ptr)
 551
 552
 553     def __get_word_value_at_rva(self, rva):
 554
 555         try:
 556             data = self.pe.get_data(self.rva_ptr, 2)
 557         except PEFormatError, e:
 558             return False
 559
 560         if len(data)<2:
 561             return False
 562
 563         return struct.unpack('<H', data)[0]
 564
 565
 566     #def render_pascal_8(self):
 567     #    """"""
 568
 569
 570     def ask_unicode_16(self, next_rva_ptr):
 571         """The next RVA is taken to be the one immediately following this one.
 572
 573         Such RVA could indicate the natural end of the string and will be checked
 574         to see if there's a Unicode NULL character there.
 575         """
 576
 577         if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
 578             self.length = next_rva_ptr - self.rva_ptr
 579             return True
 580
 581         return False
 582
 583
 584     def render_unicode_16(self):
 585         """"""
 586
 587         self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
 588
 589
 590 class PEFormatError(Exception):
 591     """Generic PE format error exception."""
 592
 593     def __init__(self, value):
 594         self.value = value
 595
 596     def __str__(self):
 597         return repr(self.value)
 598
 599
 600 class Dump:
 601     """Convenience class for dumping the PE information."""
 602
 603     def __init__(self):
 604         self.text = ''
 605
 606
 607     def add_lines(self, txt, indent=0):
 608         """Adds a list of lines.
 609
 610         The list can be indented with the optional argument 'indent'.
 611         """
 612         for line in txt:
 613             self.add_line(line, indent)
 614
 615
 616     def add_line(self, txt, indent=0):
 617         """Adds a line.
 618
 619         The line can be indented with the optional argument 'indent'.
 620         """
 621
 622         self.add(txt+'\n', indent)
 623
 624
 625     def add(self, txt, indent=0):
 626         """Adds some text, no newline will be appended.
 627
 628         The text can be indented with the optional argument 'indent'.
 629         """
 630
 631         if isinstance(txt, unicode):
 632             s = []
 633             for c in txt:
 634                 try:
 635                     s.append(str(c))
 636                 except UnicodeEncodeError, e:
 637                     s.append(repr(c))
 638
 639             txt = ''.join(s)
 640
 641         self.text += ' '*indent+txt
 642
 643
 644     def add_header(self, txt):
 645         """Adds a header element."""
 646
 647         self.add_line('-'*10+txt+'-'*10+'\n')
 648
 649
 650     def add_newline(self):
 651         """Adds a newline."""
 652
 653         self.text += '\n'
 654
 655
 656     def get_text(self):
 657         """Get the text in its current state."""
 658
 659         return self.text
 660
 661
 662
 663 class Structure:
 664     """Prepare structure object to extract members from data.
 665
 666     Format is a list containing definitions for the elements
 667     of the structure.
 668     """
 669
 670
 671     def __init__(self, format, name=None, file_offset=None):
 672         # Format is forced little endian, for big endian non Intel platforms
 673         self.__format__ = '<'
 674         self.__keys__ = []
 675 #        self.values = {}
 676         self.__format_length__ = 0
 677         self.__set_format__(format[1])
 678         self._all_zeroes = False
 679         self.__unpacked_data_elms__ = None
 680         self.__file_offset__ = file_offset
 681         if name:
 682             self.name = name
 683         else:
 684             self.name = format[0]
 685
 686
 687     def __get_format__(self):
 688         return self.__format__
 689
 690
 691     def get_file_offset(self):
 692         return self.__file_offset__
 693
 694     def set_file_offset(self, offset):
 695         self.__file_offset__ = offset
 696
 697     def all_zeroes(self):
 698         """Returns true is the unpacked data is all zeroes."""
 699
 700         return self._all_zeroes
 701
 702
 703     def __set_format__(self, format):
 704
 705         for elm in format:
 706             if ',' in elm:
 707                 elm_type, elm_name = elm.split(',', 1)
 708                 self.__format__ += elm_type
 709
 710                 elm_names = elm_name.split(',')
 711                 names = []
 712                 for elm_name in elm_names:
 713                     if elm_name in self.__keys__:
 714                         search_list = [x[:len(elm_name)] for x in self.__keys__]
 715                         occ_count = search_list.count(elm_name)
 716                         elm_name = elm_name+'_'+str(occ_count)
 717                     names.append(elm_name)
 718                 # Some PE header structures have unions on them, so a certain
 719                 # value might have different names, so each key has a list of
 720                 # all the possible members referring to the data.
 721                 self.__keys__.append(names)
 722
 723         self.__format_length__ = struct.calcsize(self.__format__)
 724
 725
 726     def sizeof(self):
 727         """Return size of the structure."""
 728
 729         return self.__format_length__
 730
 731
 732     def __unpack__(self, data):
 733
 734         if len(data)>self.__format_length__:
 735             data = data[:self.__format_length__]
 736
 737         # OC Patch:
 738         # Some malware have incorrect header lengths.
 739         # Fail gracefully if this occurs
 740         # Buggy malware: a29b0118af8b7408444df81701ad5a7f
 741         #
 742         elif len(data)<self.__format_length__:
 743             raise PEFormatError('Data length less than expected header length.')
 744
 745
 746         if data.count(chr(0)) == len(data):
 747             self._all_zeroes = True
 748
 749         self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
 750         for i in xrange(len(self.__unpacked_data_elms__)):
 751             for key in self.__keys__[i]:
 752 #                self.values[key] = self.__unpacked_data_elms__[i]
 753                 setattr(self, key, self.__unpacked_data_elms__[i])
 754
 755
 756     def __pack__(self):
 757
 758         new_values = []
 759
 760         for i in xrange(len(self.__unpacked_data_elms__)):
 761
 762             for key in self.__keys__[i]:
 763                 new_val = getattr(self, key)
 764                 old_val = self.__unpacked_data_elms__[i]
 765
 766                 # In the case of Unions, when the first changed value
 767                 # is picked the loop is exited
 768                 if new_val != old_val:
 769                     break
 770
 771             new_values.append(new_val)
 772
 773         return struct.pack(self.__format__, *new_values)
 774
 775
 776     def __str__(self):
 777         return '\n'.join( self.dump() )
 778
 779     def __repr__(self):
 780         return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
 781
 782
 783     def dump(self, indentation=0):
 784         """Returns a string representation of the structure."""
 785
 786         dump = []
 787
 788         dump.append('[%s]' % self.name)
 789
 790         # Refer to the __set_format__ method for an explanation
 791         # of the following construct.
 792         for keys in self.__keys__:
 793             for key in keys:
 794
 795                 val = getattr(self, key)
 796                 if isinstance(val, int) or isinstance(val, long):
 797                     val_str = '0x%-8X' % (val)
 798                     if key == 'TimeDateStamp' or key == 'dwTimeStamp':
 799                         try:
 800                             val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
 801                         except exceptions.ValueError, e:
 802                             val_str += ' [INVALID TIME]'
 803                 else:
 804                     val_str = ''.join(filter(lambda c:c != '\0', str(val)))
 805
 806                 dump.append('%-30s %s' % (key+':', val_str))
 807
 808         return dump
 809
 810
 811
 812 class SectionStructure(Structure):
 813     """Convenience section handling class."""
 814
 815     def get_data(self, start, length=None):
 816         """Get data chunk from a section.
 817
 818         Allows to query data from the section by passing the
 819         addresses where the PE file would be loaded by default.
 820         It is then possible to retrieve code and data by its real
 821         addresses as it would be if loaded.
 822         """
 823
 824         offset = start - self.VirtualAddress
 825
 826         if length:
 827             end = offset+length
 828         else:
 829             end = len(self.data)
 830
 831         return self.data[offset:end]
 832
 833
 834     def get_rva_from_offset(self, offset):
 835         return offset - self.PointerToRawData + self.VirtualAddress
 836
 837
 838     def get_offset_from_rva(self, rva):
 839         return (rva - self.VirtualAddress) + self.PointerToRawData
 840
 841
 842     def contains_offset(self, offset):
 843         """Check whether the section contains the file offset provided."""
 844
 845         if not self.PointerToRawData:
 846            # bss and other sections containing only uninitialized data must have 0
 847            # and do not take space in the file
 848            return False
 849         return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData
 850
 851
 852     def contains_rva(self, rva):
 853         """Check whether the section contains the address provided."""
 854
 855         # PECOFF documentation v8 says:
 856         # The total size of the section when loaded into memory.
 857         # If this value is greater than SizeOfRawData, the section is zero-padded.
 858         # This field is valid only for executable images and should be set to zero
 859         # for object files.
 860
 861         if len(self.data) < self.SizeOfRawData:
 862             size = self.Misc_VirtualSize
 863         else:
 864             size = max(self.SizeOfRawData, self.Misc_VirtualSize)
 865
 866         return self.VirtualAddress <= rva < self.VirtualAddress + size
 867
 868     def contains(self, rva):
 869         #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
 870         return self.contains_rva(rva)
 871
 872
 873     def set_data(self, data):
 874         """Set the data belonging to the section."""
 875
 876         self.data = data
 877
 878
 879     def get_entropy(self):
 880         """Calculate and return the entropy for the section."""
 881
 882         return self.entropy_H( self.data )
 883
 884
 885     def get_hash_sha1(self):
 886         """Get the SHA-1 hex-digest of the section's data."""
 887
 888         if sha1 is not None:
 889             return sha1( self.data ).hexdigest()
 890
 891
 892     def get_hash_sha256(self):
 893         """Get the SHA-256 hex-digest of the section's data."""
 894
 895         if sha256 is not None:
 896             return sha256( self.data ).hexdigest()
 897
 898
 899     def get_hash_sha512(self):
 900         """Get the SHA-512 hex-digest of the section's data."""
 901
 902         if sha512 is not None:
 903             return sha512( self.data ).hexdigest()
 904
 905
 906     def get_hash_md5(self):
 907         """Get the MD5 hex-digest of the section's data."""
 908
 909         if md5 is not None:
 910             return md5( self.data ).hexdigest()
 911
 912
 913     def entropy_H(self, data):
 914         """Calculate the entropy of a chunk of data."""
 915
 916         if len(data) == 0:
 917             return 0.0
 918
 919         occurences = array.array('L', [0]*256)
 920
 921         for x in data:
 922             occurences[ord(x)] += 1
 923
 924         entropy = 0
 925         for x in occurences:
 926             if x:
 927                 p_x = float(x) / len(data)
 928                 entropy -= p_x*math.log(p_x, 2)
 929
 930         return entropy
 931
 932
 933
 934 class DataContainer:
 935     """Generic data container."""
 936
 937     def __init__(self, **args):
 938         for key, value in args.items():
 939             setattr(self, key, value)
 940
 941
 942
 943 class ImportDescData(DataContainer):
 944     """Holds import descriptor information.
 945
 946     dll:        name of the imported DLL
 947     imports:    list of imported symbols (ImportData instances)
 948     struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
 949     """
 950
 951 class ImportData(DataContainer):
 952     """Holds imported symbol's information.
 953
 954     ordinal:    Ordinal of the symbol
 955     name:       Name of the symbol
 956     bound:      If the symbol is bound, this contains
 957                 the address.
 958     """
 959
 960 class ExportDirData(DataContainer):
 961     """Holds export directory information.
 962
 963     struct:     IMAGE_EXPORT_DIRECTORY structure
 964     symbols:    list of exported symbols (ExportData instances)
 965 """
 966
 967 class ExportData(DataContainer):
 968     """Holds exported symbols' information.
 969
 970     ordinal:    ordinal of the symbol
 971     address:    address of the symbol
 972     name:       name of the symbol (None if the symbol is
 973                 exported by ordinal only)
 974     forwarder:  if the symbol is forwarded it will
 975                 contain the name of the target symbol,
 976                 None otherwise.
 977     """
 978
 979
 980 class ResourceDirData(DataContainer):
 981     """Holds resource directory information.
 982
 983     struct:     IMAGE_RESOURCE_DIRECTORY structure
 984     entries:    list of entries (ResourceDirEntryData instances)
 985     """
 986
 987 class ResourceDirEntryData(DataContainer):
 988     """Holds resource directory entry data.
 989
 990     struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
 991     name:       If the resource is identified by name this
 992                 attribute will contain the name string. None
 993                 otherwise. If identified by id, the id is
 994                 availabe at 'struct.Id'
 995     id:         the id, also in struct.Id
 996     directory:  If this entry has a lower level directory
 997                 this attribute will point to the
 998                 ResourceDirData instance representing it.
 999     data:       If this entry has no futher lower directories
1000                 and points to the actual resource data, this
1001                 attribute will reference the corresponding
1002                 ResourceDataEntryData instance.
1003     (Either of the 'directory' or 'data' attribute will exist,
1004     but not both.)
1005     """
1006
1007 class ResourceDataEntryData(DataContainer):
1008     """Holds resource data entry information.
1009
1010     struct:     IMAGE_RESOURCE_DATA_ENTRY structure
1011     lang:       Primary language ID
1012     sublang:    Sublanguage ID
1013     """
1014
1015 class DebugData(DataContainer):
1016     """Holds debug information.
1017
1018     struct:     IMAGE_DEBUG_DIRECTORY structure
1019     """
1020
1021 class BaseRelocationData(DataContainer):
1022     """Holds base relocation information.
1023
1024     struct:     IMAGE_BASE_RELOCATION structure
1025     entries:    list of relocation data (RelocationData instances)
1026     """
1027
1028 class RelocationData(DataContainer):
1029     """Holds relocation information.
1030
1031     type:       Type of relocation
1032                 The type string is can be obtained by
1033                 RELOCATION_TYPE[type]
1034     rva:        RVA of the relocation
1035     """
1036
1037 class TlsData(DataContainer):
1038     """Holds TLS information.
1039
1040     struct:     IMAGE_TLS_DIRECTORY structure
1041     """
1042
1043 class BoundImportDescData(DataContainer):
1044     """Holds bound import descriptor data.
1045
1046     This directory entry will provide with information on the
1047     DLLs this PE files has been bound to (if bound at all).
1048     The structure will contain the name and timestamp of the
1049     DLL at the time of binding so that the loader can know
1050     whether it differs from the one currently present in the
1051     system and must, therefore, re-bind the PE's imports.
1052
1053     struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
1054     name:       DLL name
1055     entries:    list of entries (BoundImportRefData instances)
1056                 the entries will exist if this DLL has forwarded
1057                 symbols. If so, the destination DLL will have an
1058                 entry in this list.
1059     """
1060
1061 class BoundImportRefData(DataContainer):
1062     """Holds bound import forwader reference data.
1063
1064     Contains the same information as the bound descriptor but
1065     for forwarded DLLs, if any.
1066
1067     struct:     IMAGE_BOUND_FORWARDER_REF structure
1068     name:       dll name
1069     """
1070
1071
1072 class PE:
1073     """A Portable Executable representation.
1074
1075     This class provides access to most of the information in a PE file.
1076
1077     It expects to be supplied the name of the file to load or PE data
1078     to process and an optional argument 'fast_load' (False by default)
1079     which controls whether to load all the directories information,
1080     which can be quite time consuming.
1081
1082     pe = pefile.PE('module.dll')
1083     pe = pefile.PE(name='module.dll')
1084
1085     would load 'module.dll' and process it. If the data would be already
1086     available in a buffer the same could be achieved with:
1087
1088     pe = pefile.PE(data=module_dll_data)
1089
1090     The "fast_load" can be set to a default by setting its value in the
1091     module itself by means,for instance, of a "pefile.fast_load = True".
1092     That will make all the subsequent instances not to load the
1093     whole PE structure. The "full_load" method can be used to parse
1094     the missing data at a later stage.
1095
1096     Basic headers information will be available in the attributes:
1097
1098     DOS_HEADER
1099     NT_HEADERS
1100     FILE_HEADER
1101     OPTIONAL_HEADER
1102
1103     All of them will contain among their attrbitues the members of the
1104     corresponding structures as defined in WINNT.H
1105
1106     The raw data corresponding to the header (from the beginning of the
1107     file up to the start of the first section) will be avaiable in the
1108     instance's attribute 'header' as a string.
1109
1110     The sections will be available as a list in the 'sections' attribute.
1111     Each entry will contain as attributes all the structure's members.
1112
1113     Directory entries will be available as attributes (if they exist):
1114     (no other entries are processed at this point)
1115
1116     DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
1117     DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
1118     DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
1119     DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
1120     DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
1121     DIRECTORY_ENTRY_TLS
1122     DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
1123
1124     The following dictionary attributes provide ways of mapping different
1125     constants. They will accept the numeric value and return the string
1126     representation and the opposite, feed in the string and get the
1127     numeric constant:
1128
1129     DIRECTORY_ENTRY
1130     IMAGE_CHARACTERISTICS
1131     SECTION_CHARACTERISTICS
1132     DEBUG_TYPE
1133     SUBSYSTEM_TYPE
1134     MACHINE_TYPE
1135     RELOCATION_TYPE
1136     RESOURCE_TYPE
1137     LANG
1138     SUBLANG
1139     """
1140
1141     #
1142     # Format specifications for PE structures.
1143     #
1144
1145     __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
1146         ('H,e_magic', 'H,e_cblp', 'H,e_cp',
1147         'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
1148         'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
1149         'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
1150         'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
1151         'L,e_lfanew'))
1152
1153     __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
1154         ('H,Machine', 'H,NumberOfSections',
1155         'L,TimeDateStamp', 'L,PointerToSymbolTable',
1156         'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
1157         'H,Characteristics'))
1158
1159     __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
1160         ('L,VirtualAddress', 'L,Size'))
1161
1162
1163     __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
1164         ('H,Magic', 'B,MajorLinkerVersion',
1165         'B,MinorLinkerVersion', 'L,SizeOfCode',
1166         'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1167         'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
1168         'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1169         'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1170         'H,MajorImageVersion', 'H,MinorImageVersion',
1171         'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1172         'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1173         'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1174         'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
1175         'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
1176         'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1177
1178
1179     __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
1180         ('H,Magic', 'B,MajorLinkerVersion',
1181         'B,MinorLinkerVersion', 'L,SizeOfCode',
1182         'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1183         'L,AddressOfEntryPoint', 'L,BaseOfCode',
1184         'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1185         'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1186         'H,MajorImageVersion', 'H,MinorImageVersion',
1187         'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1188         'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1189         'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1190         'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
1191         'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
1192         'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1193
1194
1195     __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
1196
1197     __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
1198         ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
1199         'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
1200         'L,PointerToRelocations', 'L,PointerToLinenumbers',
1201         'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
1202         'L,Characteristics'))
1203
1204     __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
1205         ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
1206         'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
1207
1208     __IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
1209         ('L,OriginalFirstThunk,Characteristics',
1210         'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
1211
1212     __IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
1213         ('L,Characteristics',
1214         'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
1215         'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
1216         'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
1217
1218     __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
1219         ('L,Characteristics',
1220         'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
1221         'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
1222
1223     __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
1224         ('L,Name',
1225         'L,OffsetToData'))
1226
1227     __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
1228         ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
1229
1230     __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
1231         ('H,Length', 'H,ValueLength', 'H,Type' ))
1232
1233     __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
1234         ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
1235          'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
1236          'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
1237
1238     __StringFileInfo_format__ = ( 'StringFileInfo',
1239         ('H,Length', 'H,ValueLength', 'H,Type' ))
1240
1241     __StringTable_format__ = ( 'StringTable',
1242         ('H,Length', 'H,ValueLength', 'H,Type' ))
1243
1244     __String_format__ = ( 'String',
1245         ('H,Length', 'H,ValueLength', 'H,Type' ))
1246
1247     __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
1248
1249     __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
1250         ('L,ForwarderString,Function,Ordinal,AddressOfData',))
1251
1252     __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
1253         ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
1254
1255     __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
1256         ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
1257         'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
1258         'L,PointerToRawData'))
1259
1260     __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
1261         ('L,VirtualAddress', 'L,SizeOfBlock') )
1262
1263     __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
1264         ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
1265         'L,AddressOfIndex', 'L,AddressOfCallBacks',
1266         'L,SizeOfZeroFill', 'L,Characteristics' ) )
1267
1268     __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
1269         ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
1270         'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
1271         'L,SizeOfZeroFill', 'L,Characteristics' ) )
1272
1273     __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
1274         ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
1275
1276     __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
1277         ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
1278
1279
1280     def __init__(self, name=None, data=None, fast_load=None):
1281
1282         self.sections = []
1283
1284         self.__warnings = []
1285
1286         self.PE_TYPE = None
1287
1288         if  not name and not data:
1289             return
1290
1291         # This list will keep track of all the structures created.
1292         # That will allow for an easy iteration through the list
1293         # in order to save the modifications made
1294         self.__structures__ = []
1295
1296         if not fast_load:
1297             fast_load = globals()['fast_load']
1298         self.__parse__(name, data, fast_load)
1299
1300
1301
1302     def __unpack_data__(self, format, data, file_offset):
1303         """Apply structure format to raw data.
1304
1305         Returns and unpacked structure object if successful, None otherwise.
1306         """
1307
1308         structure = Structure(format, file_offset=file_offset)
1309         #if len(data) < structure.sizeof():
1310         #    return None
1311
1312         try:
1313             structure.__unpack__(data)
1314         except PEFormatError, err:
1315             self.__warnings.append(
1316                 'Corrupt header "%s" at file offset %d. Exception: %s' % (
1317                     format[0], file_offset, str(err))  )
1318             return None
1319
1320         self.__structures__.append(structure)
1321
1322         return structure
1323
1324
1325
1326     def __parse__(self, fname, data, fast_load):
1327         """Parse a Portable Executable file.
1328
1329         Loads a PE file, parsing all its structures and making them available
1330         through the instance's attributes.
1331         """
1332
1333         if fname:
1334             fd = file(fname, 'rb')
1335             self.__data__ = fd.read()
1336             fd.close()
1337         elif data:
1338             self.__data__ = data
1339
1340
1341         self.DOS_HEADER = self.__unpack_data__(
1342             self.__IMAGE_DOS_HEADER_format__,
1343             self.__data__, file_offset=0)
1344
1345         if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
1346             raise PEFormatError('DOS Header magic not found.')
1347
1348         # OC Patch:
1349         # Check for sane value in e_lfanew
1350         #
1351         if self.DOS_HEADER.e_lfanew > len(self.__data__):
1352             raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
1353
1354         nt_headers_offset = self.DOS_HEADER.e_lfanew
1355
1356         self.NT_HEADERS = self.__unpack_data__(
1357             self.__IMAGE_NT_HEADERS_format__,
1358             self.__data__[nt_headers_offset:],
1359             file_offset = nt_headers_offset)
1360
1361         # We better check the signature right here, before the file screws
1362         # around with sections:
1363         # OC Patch:
1364         # Some malware will cause the Signature value to not exist at all
1365         if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
1366             raise PEFormatError('NT Headers not found.')
1367
1368         if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
1369             raise PEFormatError('Invalid NT Headers signature.')
1370
1371         self.FILE_HEADER = self.__unpack_data__(
1372             self.__IMAGE_FILE_HEADER_format__,
1373             self.__data__[nt_headers_offset+4:],
1374             file_offset = nt_headers_offset+4)
1375         image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
1376
1377         if not self.FILE_HEADER:
1378             raise PEFormatError('File Header missing')
1379
1380         # Set the image's flags according the the Characteristics member
1381         self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
1382
1383         optional_header_offset =    \
1384             nt_headers_offset+4+self.FILE_HEADER.sizeof()
1385
1386         # Note: location of sections can be controlled from PE header:
1387         sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
1388
1389         self.OPTIONAL_HEADER = self.__unpack_data__(
1390             self.__IMAGE_OPTIONAL_HEADER_format__,
1391             self.__data__[optional_header_offset:],
1392             file_offset = optional_header_offset)
1393
1394         # According to solardesigner's findings for his
1395         # Tiny PE project, the optional header does not
1396         # need fields beyond "Subsystem" in order to be
1397         # loadable by the Windows loader (given that zeroes
1398         # are acceptable values and the header is loaded
1399         # in a zeroed memory page)
1400         # If trying to parse a full Optional Header fails
1401         # we try to parse it again with some 0 padding
1402         #
1403         MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
1404
1405         if ( self.OPTIONAL_HEADER is None and
1406             len(self.__data__[optional_header_offset:])
1407                 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
1408
1409             # Add enough zeroes to make up for the unused fields
1410             #
1411             padding_length = 128
1412
1413             # Create padding
1414             #
1415             padded_data = self.__data__[optional_header_offset:] + (
1416                 '\0' * padding_length)
1417
1418             self.OPTIONAL_HEADER = self.__unpack_data__(
1419                 self.__IMAGE_OPTIONAL_HEADER_format__,
1420                 padded_data,
1421                 file_offset = optional_header_offset)
1422
1423
1424         # Check the Magic in the OPTIONAL_HEADER and set the PE file
1425         # type accordingly
1426         #
1427         if self.OPTIONAL_HEADER is not None:
1428
1429             if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
1430
1431                 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
1432
1433             elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
1434
1435                 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
1436
1437                 self.OPTIONAL_HEADER = self.__unpack_data__(
1438                     self.__IMAGE_OPTIONAL_HEADER64_format__,
1439                     self.__data__[optional_header_offset:],
1440                     file_offset = optional_header_offset)
1441
1442                 # Again, as explained above, we try to parse
1443                 # a reduced form of the Optional Header which
1444                 # is still valid despite not including all
1445                 # structure members
1446                 #
1447                 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4
1448
1449                 if ( self.OPTIONAL_HEADER is None and
1450                     len(self.__data__[optional_header_offset:])
1451                         >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
1452
1453                     padding_length = 128
1454                     padded_data = self.__data__[optional_header_offset:] + (
1455                         '\0' * padding_length)
1456                     self.OPTIONAL_HEADER = self.__unpack_data__(
1457                         self.__IMAGE_OPTIONAL_HEADER64_format__,
1458                         padded_data,
1459                         file_offset = optional_header_offset)
1460
1461
1462         if not self.FILE_HEADER:
1463             raise PEFormatError('File Header missing')
1464
1465
1466         # OC Patch:
1467         # Die gracefully if there is no OPTIONAL_HEADER field
1468         # 975440f5ad5e2e4a92c4d9a5f22f75c1
1469         if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
1470             raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
1471
1472         dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
1473
1474         # Set the Dll Characteristics flags according the the DllCharacteristics member
1475         self.set_flags(
1476             self.OPTIONAL_HEADER,
1477             self.OPTIONAL_HEADER.DllCharacteristics,
1478             dll_characteristics_flags)
1479
1480
1481         self.OPTIONAL_HEADER.DATA_DIRECTORY = []
1482         #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
1483         offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
1484
1485
1486         self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
1487         self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
1488
1489
1490         # The NumberOfRvaAndSizes is sanitized to stay within
1491         # reasonable limits so can be casted to an int
1492         #
1493         if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
1494             self.__warnings.append(
1495                 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
1496                 'Normal values are never larger than 0x10, the value is: 0x%x' %
1497                 self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
1498
1499         for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
1500
1501             if len(self.__data__[offset:]) == 0:
1502                 break
1503
1504             if len(self.__data__[offset:]) < 8:
1505                 data = self.__data__[offset:]+'\0'*8
1506             else:
1507                 data = self.__data__[offset:]
1508
1509             dir_entry = self.__unpack_data__(
1510                 self.__IMAGE_DATA_DIRECTORY_format__,
1511                 data,
1512                 file_offset = offset)
1513
1514             if dir_entry is None:
1515                 break
1516
1517             # Would fail if missing an entry
1518             # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
1519             try:
1520                 dir_entry.name = DIRECTORY_ENTRY[i]
1521             except (KeyError, AttributeError):
1522                 break
1523
1524             offset += dir_entry.sizeof()
1525
1526             self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
1527
1528             # If the offset goes outside the optional header,
1529             # the loop is broken, regardless of how many directories
1530             # NumberOfRvaAndSizes says there are
1531             #
1532             # We assume a normally sized optional header, hence that we do
1533             # a sizeof() instead of reading SizeOfOptionalHeader.
1534             # Then we add a default number of drectories times their size,
1535             # if we go beyond that, we assume the number of directories
1536             # is wrong and stop processing
1537             if offset >= (optional_header_offset +
1538                 self.OPTIONAL_HEADER.sizeof() + 8*16) :
1539
1540                 break
1541
1542
1543         offset = self.parse_sections(sections_offset)
1544
1545         # OC Patch:
1546         # There could be a problem if there are no raw data sections
1547         # greater than 0
1548         # fc91013eb72529da005110a3403541b6 example
1549         # Should this throw an exception in the minimum header offset
1550         # can't be found?
1551         #
1552         rawDataPointers = [
1553             s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
1554
1555         if len(rawDataPointers) > 0:
1556             lowest_section_offset = min(rawDataPointers)
1557         else:
1558             lowest_section_offset = None
1559
1560         if not lowest_section_offset or lowest_section_offset<offset:
1561             self.header = self.__data__[:offset]
1562         else:
1563             self.header = self.__data__[:lowest_section_offset]
1564
1565
1566         # Check whether the entry point lies within a section
1567         #
1568         if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
1569
1570             # Check whether the entry point lies within the file
1571             #
1572             ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
1573             if ep_offset > len(self.__data__):
1574
1575                 self.__warnings.append(
1576                     'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
1577                     'AddressOfEntryPoint: 0x%x' %
1578                     self.OPTIONAL_HEADER.AddressOfEntryPoint )
1579
1580         else:
1581
1582             self.__warnings.append(
1583                 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
1584                 'AddressOfEntryPoint: 0x%x' %
1585                 self.OPTIONAL_HEADER.AddressOfEntryPoint )
1586
1587
1588         if not fast_load:
1589             self.parse_data_directories()
1590
1591
1592     def get_warnings(self):
1593         """Return the list of warnings.
1594
1595         Non-critical problems found when parsing the PE file are
1596         appended to a list of warnings. This method returns the
1597         full list.
1598         """
1599
1600         return self.__warnings
1601
1602
1603     def show_warnings(self):
1604         """Print the list of warnings.
1605
1606         Non-critical problems found when parsing the PE file are
1607         appended to a list of warnings. This method prints the
1608         full list to standard output.
1609         """
1610
1611         for warning in self.__warnings:
1612             print '>', warning
1613
1614
1615     def full_load(self):
1616         """Process the data directories.
1617
1618         This mathod will load the data directories which might not have
1619         been loaded if the "fast_load" option was used.
1620         """
1621
1622         self.parse_data_directories()
1623
1624
1625     def write(self, filename=None):
1626         """Write the PE file.
1627
1628         This function will process all headers and components
1629         of the PE file and include all changes made (by just
1630         assigning to attributes in the PE objects) and write
1631         the changes back to a file whose name is provided as
1632         an argument. The filename is optional.
1633         The data to be written to the file will be returned
1634         as a 'str' object.
1635         """
1636
1637         file_data = list(self.__data__)
1638         for struct in self.__structures__:
1639
1640             struct_data = list(struct.__pack__())
1641             offset = struct.get_file_offset()
1642
1643             file_data[offset:offset+len(struct_data)] = struct_data
1644
1645         if hasattr(self, 'VS_VERSIONINFO'):
1646             if hasattr(self, 'FileInfo'):
1647                 for entry in self.FileInfo:
1648                     if hasattr(entry, 'StringTable'):
1649                         for st_entry in entry.StringTable:
1650                             for key, entry in st_entry.entries.items():
1651
1652                                 offsets = st_entry.entries_offsets[key]
1653                                 lengths = st_entry.entries_lengths[key]
1654
1655                                 if len( entry ) > lengths[1]:
1656
1657                                     uc = zip(
1658                                             list(entry[:lengths[1]]), ['\0'] * lengths[1] )
1659                                     l = list()
1660                                     map(l.extend, uc)
1661
1662                                     file_data[
1663                                         offsets[1] : offsets[1] + lengths[1]*2 ] = l
1664
1665                                 else:
1666
1667                                     uc = zip(
1668                                             list(entry), ['\0'] * len(entry) )
1669                                     l = list()
1670                                     map(l.extend, uc)
1671
1672                                     file_data[
1673                                         offsets[1] : offsets[1] + len(entry)*2 ] = l
1674
1675                                     remainder = lengths[1] - len(entry)
1676                                     file_data[
1677                                         offsets[1] + len(entry)*2 :
1678                                         offsets[1] + lengths[1]*2 ] = [
1679                                             u'\0' ] * remainder*2
1680
1681         new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )
1682
1683         if filename:
1684             f = file(filename, 'wb+')
1685             f.write(new_file_data)
1686             f.close()
1687
1688         return new_file_data
1689
1690
1691
1692     def parse_sections(self, offset):
1693         """Fetch the PE file sections.
1694
1695         The sections will be readily available in the "sections" attribute.
1696         Its attributes will contain all the section information plus "data"
1697         a buffer containing the section's data.
1698
1699         The "Characteristics" member will be processed and attributes
1700         representing the section characteristics (with the 'IMAGE_SCN_'
1701         string trimmed from the constant's names) will be added to the
1702         section instance.
1703
1704         Refer to the SectionStructure class for additional info.
1705         """
1706
1707         self.sections = []
1708
1709         for i in xrange(self.FILE_HEADER.NumberOfSections):
1710             section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
1711             if not section:
1712                 break
1713             section_offset = offset + section.sizeof() * i
1714             section.set_file_offset(section_offset)
1715             section.__unpack__(self.__data__[section_offset:])
1716             self.__structures__.append(section)
1717
1718             if section.SizeOfRawData > len(self.__data__):
1719                 self.__warnings.append(
1720                     ('Error parsing section %d. ' % i) +
1721                     'SizeOfRawData is larger than file.')
1722
1723             if section.PointerToRawData > len(self.__data__):
1724                 self.__warnings.append(
1725                     ('Error parsing section %d. ' % i) +
1726                     'PointerToRawData points beyond the end of the file.')
1727
1728             if section.Misc_VirtualSize > 0x10000000:
1729                 self.__warnings.append(
1730                     ('Suspicious value found parsing section %d. ' % i) +
1731                     'VirtualSize is extremely large > 256MiB.')
1732
1733             if section.VirtualAddress > 0x10000000:
1734                 self.__warnings.append(
1735                     ('Suspicious value found parsing section %d. ' % i) +
1736                     'VirtualAddress is beyond 0x10000000.')
1737
1738             #
1739             # Some packer used a non-aligned PointerToRawData in the sections,
1740             # which causes several common tools not to load the section data
1741             # properly as they blindly read from the indicated offset.
1742             # It seems that Windows will round the offset down to the largest
1743             # offset multiple of FileAlignment which is smaller than
1744             # PointerToRawData. The following code will do the same.
1745             #
1746
1747             #alignment = self.OPTIONAL_HEADER.FileAlignment
1748             section_data_start = section.PointerToRawData
1749
1750             if ( self.OPTIONAL_HEADER.FileAlignment != 0 and
1751                 (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
1752                 self.__warnings.append(
1753                     ('Error parsing section %d. ' % i) +
1754                     'Suspicious value for FileAlignment in the Optional Header. ' +
1755                     'Normally the PointerToRawData entry of the sections\' structures ' +
1756                     'is a multiple of FileAlignment, this might imply the file ' +
1757                     'is trying to confuse tools which parse this incorrectly')
1758
1759             section_data_end = section_data_start+section.SizeOfRawData
1760             section.set_data(self.__data__[section_data_start:section_data_end])
1761
1762             section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
1763
1764             # Set the section's flags according the the Characteristics member
1765             self.set_flags(section, section.Characteristics, section_flags)
1766
1767             if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and
1768                 section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
1769
1770                 self.__warnings.append(
1771                     ('Suspicious flags set for section %d. ' % i) +
1772                     'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
1773                     'This might indicate a packed executable.')
1774
1775             self.sections.append(section)
1776
1777         if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
1778             return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
1779         else:
1780             return offset
1781
1782
1783     def retrieve_flags(self, flag_dict, flag_filter):
1784         """Read the flags from a dictionary and return them in a usable form.
1785
1786         Will return a list of (flag, value) for all flags in "flag_dict"
1787         matching the filter "flag_filter".
1788         """
1789
1790         return [(f[0], f[1]) for f in flag_dict.items() if
1791                 isinstance(f[0], str) and f[0].startswith(flag_filter)]
1792
1793
1794     def set_flags(self, obj, flag_field, flags):
1795         """Will process the flags and set attributes in the object accordingly.
1796
1797         The object "obj" will gain attritutes named after the flags provided in
1798         "flags" and valued True/False, matching the results of applyin each
1799         flag value from "flags" to flag_field.
1800         """
1801
1802         for flag in flags:
1803             if flag[1] & flag_field:
1804                 setattr(obj, flag[0], True)
1805             else:
1806                 setattr(obj, flag[0], False)
1807
1808
1809
1810     def parse_data_directories(self):
1811         """Parse and process the PE file's data directories."""
1812
1813         directory_parsing = (
1814             ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
1815             ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
1816             ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
1817             ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
1818             ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
1819             ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
1820             ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
1821             ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
1822
1823         for entry in directory_parsing:
1824             # OC Patch:
1825             #
1826             try:
1827                 dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
1828                     DIRECTORY_ENTRY[entry[0]]]
1829             except IndexError:
1830                 break
1831             if dir_entry.VirtualAddress:
1832                 value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
1833                 if value:
1834                     setattr(self, entry[0][6:], value)
1835
1836
1837     def parse_directory_bound_imports(self, rva, size):
1838         """"""
1839
1840         bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
1841         bnd_descr_size = bnd_descr.sizeof()
1842         start = rva
1843
1844         bound_imports = []
1845         while True:
1846
1847             bnd_descr = self.__unpack_data__(
1848                 self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
1849                    self.__data__[rva:rva+bnd_descr_size],
1850                    file_offset = rva)
1851             if bnd_descr is None:
1852                 # If can't parse directory then silently return.
1853                 # This directory does not necesarily have to be valid to
1854                 # still have a valid PE file
1855
1856                 self.__warnings.append(
1857                     'The Bound Imports directory exists but can\'t be parsed.')
1858
1859                 return
1860
1861             if bnd_descr.all_zeroes():
1862                 break
1863
1864             rva += bnd_descr.sizeof()
1865
1866             forwarder_refs = []
1867             for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
1868                 # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
1869                 # IMAGE_BOUND_FORWARDER_REF have the same size.
1870                 bnd_frwd_ref = self.__unpack_data__(
1871                     self.__IMAGE_BOUND_FORWARDER_REF_format__,
1872                     self.__data__[rva:rva+bnd_descr_size],
1873                     file_offset = rva)
1874                 # OC Patch:
1875                 if not bnd_frwd_ref:
1876                     raise PEFormatError(
1877                         "IMAGE_BOUND_FORWARDER_REF cannot be read")
1878                 rva += bnd_frwd_ref.sizeof()
1879
1880                 name_str =  self.get_string_from_data(
1881                     start+bnd_frwd_ref.OffsetModuleName, self.__data__)
1882
1883                 if not name_str:
1884                     break
1885                 forwarder_refs.append(BoundImportRefData(
1886                     struct = bnd_frwd_ref,
1887                     name = name_str))
1888
1889             name_str = self.get_string_from_data(
1890                 start+bnd_descr.OffsetModuleName, self.__data__)
1891
1892             if not name_str:
1893                 break
1894             bound_imports.append(
1895                 BoundImportDescData(
1896                     struct = bnd_descr,
1897                     name = name_str,
1898                     entries = forwarder_refs))
1899
1900         return bound_imports
1901
1902
1903     def parse_directory_tls(self, rva, size):
1904         """"""
1905
1906         if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
1907             format = self.__IMAGE_TLS_DIRECTORY_format__
1908
1909         elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
1910             format = self.__IMAGE_TLS_DIRECTORY64_format__
1911
1912         tls_struct = self.__unpack_data__(
1913             format,
1914             self.get_data(rva),
1915             file_offset = self.get_offset_from_rva(rva))
1916
1917         if not tls_struct:
1918             return None
1919
1920         return TlsData( struct = tls_struct )
1921
1922
1923     def parse_relocations_directory(self, rva, size):
1924         """"""
1925
1926         rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
1927         rlc_size = rlc.sizeof()
1928         end = rva+size
1929
1930         relocations = []
1931         while rva<end:
1932
1933             # OC Patch:
1934             # Malware that has bad rva entries will cause an error.
1935             # Just continue on after an exception
1936             #
1937             try:
1938                 rlc = self.__unpack_data__(
1939                     self.__IMAGE_BASE_RELOCATION_format__,
1940                     self.get_data(rva, rlc_size),
1941                     file_offset = self.get_offset_from_rva(rva) )
1942             except PEFormatError:
1943                 self.__warnings.append(
1944                     'Invalid relocation information. Can\'t read ' +
1945                     'data at RVA: 0x%x' % rva)
1946                 rlc = None
1947
1948             if not rlc:
1949                 break
1950
1951             reloc_entries = self.parse_relocations(
1952                 rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
1953
1954             relocations.append(
1955                 BaseRelocationData(
1956                     struct = rlc,
1957                     entries = reloc_entries))
1958
1959             if not rlc.SizeOfBlock:
1960                 break
1961             rva += rlc.SizeOfBlock
1962
1963         return relocations
1964
1965
1966     def parse_relocations(self, data_rva, rva, size):
1967         """"""
1968
1969         data = self.get_data(data_rva, size)
1970
1971         entries = []
1972         for idx in xrange(len(data)/2):
1973             word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
1974             reloc_type = (word>>12)
1975             reloc_offset = (word&0x0fff)
1976             entries.append(
1977                 RelocationData(
1978                     type = reloc_type,
1979                     rva = reloc_offset+rva))
1980
1981         return entries
1982
1983
1984     def parse_debug_directory(self, rva, size):
1985         """"""
1986
1987         dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
1988         dbg_size = dbg.sizeof()
1989
1990         debug = []
1991         for idx in xrange(size/dbg_size):
1992             try:
1993                 data = self.get_data(rva+dbg_size*idx, dbg_size)
1994             except PEFormatError, e:
1995                 self.__warnings.append(
1996                     'Invalid debug information. Can\'t read ' +
1997                     'data at RVA: 0x%x' % rva)
1998                 return None
1999
2000             dbg = self.__unpack_data__(
2001                 self.__IMAGE_DEBUG_DIRECTORY_format__,
2002                 data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
2003
2004             if not dbg:
2005                 return None
2006
2007             debug.append(
2008                 DebugData(
2009                     struct = dbg))
2010
2011         return debug
2012
2013
2014     def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
2015         """Parse the resources directory.
2016
2017         Given the rva of the resources directory, it will process all
2018         its entries.
2019
2020         The root will have the corresponding member of its structure,
2021         IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
2022         entries in the directory.
2023
2024         Those entries will have, correspondingly, all the structure's
2025         members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
2026         "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
2027         representing upper layers of the tree. This one will also have
2028         an 'entries' attribute, pointing to the 3rd, and last, level.
2029         Another directory with more entries. Those last entries will
2030         have a new atribute (both 'leaf' or 'data_entry' can be used to
2031         access it). This structure finally points to the resource data.
2032         All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
2033         are available as its attributes.
2034         """
2035
2036         # OC Patch:
2037         original_rva = rva
2038
2039         if base_rva is None:
2040             base_rva = rva
2041
2042         resources_section = self.get_section_by_rva(rva)
2043
2044         try:
2045             # If the RVA is invalid all would blow up. Some EXEs seem to be
2046             # specially nasty and have an invalid RVA.
2047             data = self.get_data(rva)
2048         except PEFormatError, e:
2049             self.__warnings.append(
2050                 'Invalid resources directory. Can\'t read ' +
2051                 'directory data at RVA: 0x%x' % rva)
2052             return None
2053
2054         # Get the resource directory structure, that is, the header
2055         # of the table preceding the actual entries
2056         #
2057         resource_dir = self.__unpack_data__(
2058             self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
2059             file_offset = self.get_offset_from_rva(rva) )
2060         if resource_dir is None:
2061             # If can't parse resources directory then silently return.
2062             # This directory does not necesarily have to be valid to
2063             # still have a valid PE file
2064             self.__warnings.append(
2065                 'Invalid resources directory. Can\'t parse ' +
2066                 'directory data at RVA: 0x%x' % rva)
2067             return None
2068
2069         dir_entries = []
2070
2071         # Advance the rva to the positon immediately following the directory
2072         # table header and pointing to the first entry in the table
2073         #
2074         rva += resource_dir.sizeof()
2075
2076         number_of_entries = (
2077             resource_dir.NumberOfNamedEntries +
2078             resource_dir.NumberOfIdEntries )
2079
2080         strings_to_postprocess = list()
2081
2082         for idx in xrange(number_of_entries):
2083
2084             res = self.parse_resource_entry(rva)
2085             if res is None:
2086                 self.__warnings.append(
2087                     'Error parsing the resources directory, ' +
2088                     'Entry %d is invalid, RVA = 0x%x. ' %
2089                     (idx, rva) )
2090                 break
2091
2092
2093             entry_name = None
2094             entry_id = None
2095
2096             # If all named entries have been processed, only Id ones
2097             # remain
2098
2099             if idx >= resource_dir.NumberOfNamedEntries:
2100                 entry_id = res.Name
2101             else:
2102                 ustr_offset = base_rva+res.NameOffset
2103                 try:
2104                     #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
2105                     entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
2106                     strings_to_postprocess.append(entry_name)
2107
2108                 except PEFormatError, excp:
2109                     self.__warnings.append(
2110                         'Error parsing the resources directory, ' +
2111                         'attempting to read entry name. ' +
2112                         'Can\'t read unicode string at offset 0x%x' %
2113                         (ustr_offset) )
2114
2115
2116             if res.DataIsDirectory:
2117                 # OC Patch:
2118                 #
2119                 # One trick malware can do is to recursively reference
2120                 # the next directory. This causes hilarity to ensue when
2121                 # trying to parse everything correctly.
2122                 # If the original RVA given to this function is equal to
2123                 # the next one to parse, we assume that it's a trick.
2124                 # Instead of raising a PEFormatError this would skip some
2125                 # reasonable data so we just break.
2126                 #
2127                 # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
2128                 if original_rva == (base_rva + res.OffsetToDirectory):
2129
2130                     break
2131
2132                 else:
2133                     entry_directory = self.parse_resources_directory(
2134                         base_rva+res.OffsetToDirectory,
2135                         base_rva=base_rva, level = level+1)
2136
2137                 if not entry_directory:
2138                     break
2139                 dir_entries.append(
2140                     ResourceDirEntryData(
2141                         struct = res,
2142                         name = entry_name,
2143                         id = entry_id,
2144                         directory = entry_directory))
2145
2146             else:
2147                 struct = self.parse_resource_data_entry(
2148                     base_rva + res.OffsetToDirectory)
2149
2150                 if struct:
2151                     entry_data = ResourceDataEntryData(
2152                         struct = struct,
2153                         lang = res.Name & 0xff,
2154                         sublang = (res.Name>>8) & 0xff)
2155
2156                     dir_entries.append(
2157                         ResourceDirEntryData(
2158                             struct = res,
2159                             name = entry_name,
2160                             id = entry_id,
2161                             data = entry_data))
2162
2163                 else:
2164                     break
2165
2166
2167
2168             # Check if this entry contains version information
2169             #
2170             if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
2171                 if len(dir_entries)>0:
2172                     last_entry = dir_entries[-1]
2173
2174                 rt_version_struct = None
2175                 try:
2176                     rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
2177                 except:
2178                     # Maybe a malformed directory structure...?
2179                     # Lets ignore it
2180                     pass
2181
2182                 if rt_version_struct is not None:
2183                     self.parse_version_information(rt_version_struct)
2184
2185             rva += res.sizeof()
2186
2187
2188         string_rvas = [s.get_rva() for s in strings_to_postprocess]
2189         string_rvas.sort()
2190
2191         for idx, s in enumerate(strings_to_postprocess):
2192             s.render_pascal_16()
2193
2194
2195         resource_directory_data = ResourceDirData(
2196             struct = resource_dir,
2197             entries = dir_entries)
2198
2199         return resource_directory_data
2200
2201
2202     def parse_resource_data_entry(self, rva):
2203         """Parse a data entry from the resources directory."""
2204
2205         try:
2206             # If the RVA is invalid all would blow up. Some EXEs seem to be
2207             # specially nasty and have an invalid RVA.
2208             data = self.get_data(rva)
2209         except PEFormatError, excp:
2210             self.__warnings.append(
2211                 'Error parsing a resource directory data entry, ' +
2212                 'the RVA is invalid: 0x%x' % ( rva ) )
2213             return None
2214
2215         data_entry = self.__unpack_data__(
2216             self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
2217             file_offset = self.get_offset_from_rva(rva) )
2218
2219         return data_entry
2220
2221
2222     def parse_resource_entry(self, rva):
2223         """Parse a directory entry from the resources directory."""
2224
2225         resource = self.__unpack_data__(
2226             self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
2227             file_offset = self.get_offset_from_rva(rva) )
2228
2229         if resource is None:
2230             return None
2231
2232         #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
2233         resource.NameOffset = resource.Name & 0x7FFFFFFFL
2234
2235         resource.__pad = resource.Name & 0xFFFF0000L
2236         resource.Id = resource.Name & 0x0000FFFFL
2237
2238         resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
2239         resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
2240
2241         return resource
2242
2243
2244     def parse_version_information(self, version_struct):
2245         """Parse version information structure.
2246
2247         The date will be made available in three attributes of the PE object.
2248
2249         VS_VERSIONINFO     will contain the first three fields of the main structure:
2250             'Length', 'ValueLength', and 'Type'
2251
2252         VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
2253             'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
2254             'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
2255             'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
2256
2257         FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
2258
2259         StringFileInfo structures will have a list as an attribute named 'StringTable'
2260         containing all the StringTable structures. Each of those structures contains a
2261         dictionary 'entries' with all the key/value version information string pairs.
2262
2263         VarFileInfo structures will have a list as an attribute named 'Var' containing
2264         all Var structures. Each Var structure will have a dictionary as an attribute
2265         named 'entry' which will contain the name and value of the Var.
2266         """
2267
2268
2269         # Retrieve the data for the version info resource
2270         #
2271         start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
2272         raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
2273
2274
2275         # Map the main structure and the subsequent string
2276         #
2277         versioninfo_struct = self.__unpack_data__(
2278             self.__VS_VERSIONINFO_format__, raw_data,
2279             file_offset = start_offset )
2280
2281         if versioninfo_struct is None:
2282             return
2283
2284         ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
2285         try:
2286             versioninfo_string = self.get_string_u_at_rva( ustr_offset )
2287         except PEFormatError, excp:
2288             self.__warnings.append(
2289                 'Error parsing the version information, ' +
2290                 'attempting to read VS_VERSION_INFO string. Can\'t ' +
2291                 'read unicode string at offset 0x%x' % (
2292                 ustr_offset ) )
2293
2294             versioninfo_string = None
2295
2296         # If the structure does not contain the expected name, it's assumed to be invalid
2297         #
2298         if versioninfo_string != u'VS_VERSION_INFO':
2299
2300             self.__warnings.append('Invalid VS_VERSION_INFO block')
2301             return
2302
2303
2304         # Set the PE object's VS_VERSIONINFO to this one
2305         #
2306         self.VS_VERSIONINFO = versioninfo_struct
2307
2308         # The the Key attribute to point to the unicode string identifying the structure
2309         #
2310         self.VS_VERSIONINFO.Key = versioninfo_string
2311
2312
2313         # Process the fixed version information, get the offset and structure
2314         #
2315         fixedfileinfo_offset = self.dword_align(
2316             versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
2317             version_struct.OffsetToData)
2318         fixedfileinfo_struct = self.__unpack_data__(
2319             self.__VS_FIXEDFILEINFO_format__,
2320             raw_data[fixedfileinfo_offset:],
2321             file_offset = start_offset+fixedfileinfo_offset )
2322
2323         if not fixedfileinfo_struct:
2324             return
2325
2326
2327         # Set the PE object's VS_FIXEDFILEINFO to this one
2328         #
2329         self.VS_FIXEDFILEINFO = fixedfileinfo_struct
2330
2331
2332         # Start parsing all the StringFileInfo and VarFileInfo structures
2333         #
2334
2335         # Get the first one
2336         #
2337         stringfileinfo_offset = self.dword_align(
2338             fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
2339             version_struct.OffsetToData)
2340         original_stringfileinfo_offset = stringfileinfo_offset
2341
2342
2343         # Set the PE object's attribute that will contain them all.
2344         #
2345         self.FileInfo = list()
2346
2347
2348         while True:
2349
2350             # Process the StringFileInfo/VarFileInfo struct
2351             #
2352             stringfileinfo_struct = self.__unpack_data__(
2353                 self.__StringFileInfo_format__,
2354                 raw_data[stringfileinfo_offset:],
2355                 file_offset = start_offset+stringfileinfo_offset )
2356
2357             if stringfileinfo_struct is None:
2358                 self.__warnings.append(
2359                     'Error parsing StringFileInfo/VarFileInfo struct' )
2360                 return None
2361
2362             # Get the subsequent string defining the structure.
2363             #
2364             ustr_offset = ( version_struct.OffsetToData +
2365                 stringfileinfo_offset + versioninfo_struct.sizeof() )
2366             try:
2367                 stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
2368             except PEFormatError, excp:
2369                 self.__warnings.append(
2370                     'Error parsing the version information, ' +
2371                     'attempting to read StringFileInfo string. Can\'t ' +
2372                     'read unicode string at offset 0x%x' %  ( ustr_offset ) )
2373                 break
2374
2375             # Set such string as the Key attribute
2376             #
2377             stringfileinfo_struct.Key = stringfileinfo_string
2378
2379
2380             # Append the structure to the PE object's list
2381             #
2382             self.FileInfo.append(stringfileinfo_struct)
2383
2384
2385             # Parse a StringFileInfo entry
2386             #
2387             if stringfileinfo_string == u'StringFileInfo':
2388
2389                 if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
2390
2391                     stringtable_offset = self.dword_align(
2392                         stringfileinfo_offset + stringfileinfo_struct.sizeof() +
2393                             2*(len(stringfileinfo_string)+1),
2394                         version_struct.OffsetToData)
2395
2396                     stringfileinfo_struct.StringTable = list()
2397
2398                     # Process the String Table entries
2399                     #
2400                     while True:
2401                         stringtable_struct = self.__unpack_data__(
2402                             self.__StringTable_format__,
2403                             raw_data[stringtable_offset:],
2404                             file_offset = start_offset+stringtable_offset )
2405
2406                         if not stringtable_struct:
2407                             break
2408
2409                         ustr_offset = ( version_struct.OffsetToData + stringtable_offset +
2410                             stringtable_struct.sizeof() )
2411                         try:
2412                             stringtable_string = self.get_string_u_at_rva( ustr_offset )
2413                         except PEFormatError, excp:
2414                             self.__warnings.append(
2415                                 'Error parsing the version information, ' +
2416                                 'attempting to read StringTable string. Can\'t ' +
2417                                 'read unicode string at offset 0x%x' % ( ustr_offset ) )
2418                             break
2419
2420                         stringtable_struct.LangID = stringtable_string
2421                         stringtable_struct.entries = dict()
2422                         stringtable_struct.entries_offsets = dict()
2423                         stringtable_struct.entries_lengths = dict()
2424                         stringfileinfo_struct.StringTable.append(stringtable_struct)
2425
2426                         entry_offset = self.dword_align(
2427                             stringtable_offset + stringtable_struct.sizeof() +
2428                                 2*(len(stringtable_string)+1),
2429                             version_struct.OffsetToData)
2430
2431                         # Process all entries in the string table
2432                         #
2433
2434                         while entry_offset < stringtable_offset + stringtable_struct.Length:
2435
2436                             string_struct = self.__unpack_data__(
2437                                 self.__String_format__, raw_data[entry_offset:],
2438                                 file_offset = start_offset+entry_offset )
2439
2440                             if not string_struct:
2441                                 break
2442
2443                             ustr_offset = ( version_struct.OffsetToData + entry_offset +
2444                                 string_struct.sizeof() )
2445                             try:
2446                                 key = self.get_string_u_at_rva( ustr_offset )
2447                                 key_offset = self.get_offset_from_rva( ustr_offset )
2448                             except PEFormatError, excp:
2449                                 self.__warnings.append(
2450                                     'Error parsing the version information, ' +
2451                                     'attempting to read StringTable Key string. Can\'t ' +
2452                                     'read unicode string at offset 0x%x' % ( ustr_offset ) )
2453                                 break
2454
2455                             value_offset = self.dword_align(
2456                                 2*(len(key)+1) + entry_offset + string_struct.sizeof(),
2457                                 version_struct.OffsetToData)
2458
2459                             ustr_offset = version_struct.OffsetToData + value_offset
2460                             try:
2461                                 value = self.get_string_u_at_rva( ustr_offset,
2462                                     max_length = string_struct.ValueLength )
2463                                 value_offset = self.get_offset_from_rva( ustr_offset )
2464                             except PEFormatError, excp:
2465                                 self.__warnings.append(
2466                                     'Error parsing the version information, ' +
2467                                     'attempting to read StringTable Value string. ' +
2468                                     'Can\'t read unicode string at offset 0x%x' % (
2469                                     ustr_offset ) )
2470                                 break
2471
2472                             if string_struct.Length == 0:
2473                                 entry_offset = stringtable_offset + stringtable_struct.Length
2474                             else:
2475                                 entry_offset = self.dword_align(
2476                                     string_struct.Length+entry_offset, version_struct.OffsetToData)
2477
2478                             key_as_char = []
2479                             for c in key:
2480                                 if ord(c)>128:
2481                                     key_as_char.append('\\x%02x' %ord(c))
2482                                 else:
2483                                     key_as_char.append(c)
2484
2485                             key_as_char = ''.join(key_as_char)
2486
2487                             setattr(stringtable_struct, key_as_char, value)
2488                             stringtable_struct.entries[key] = value
2489                             stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
2490                             stringtable_struct.entries_lengths[key] = (len(key), len(value))
2491
2492
2493                         stringtable_offset = self.dword_align(
2494                             stringtable_struct.Length + stringtable_offset,
2495                             version_struct.OffsetToData)
2496                         if stringtable_offset >= stringfileinfo_struct.Length:
2497                             break
2498
2499             # Parse a VarFileInfo entry
2500             #
2501             elif stringfileinfo_string == u'VarFileInfo':
2502
2503                 varfileinfo_struct = stringfileinfo_struct
2504                 varfileinfo_struct.name = 'VarFileInfo'
2505
2506                 if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
2507
2508                     var_offset = self.dword_align(
2509                         stringfileinfo_offset + varfileinfo_struct.sizeof() +
2510                             2*(len(stringfileinfo_string)+1),
2511                         version_struct.OffsetToData)
2512
2513                     varfileinfo_struct.Var = list()
2514
2515                     # Process all entries
2516                     #
2517
2518                     while True:
2519                         var_struct = self.__unpack_data__(
2520                             self.__Var_format__,
2521                             raw_data[var_offset:],
2522                             file_offset = start_offset+var_offset )
2523
2524                         if not var_struct:
2525                             break
2526
2527                         ustr_offset = ( version_struct.OffsetToData + var_offset +
2528                             var_struct.sizeof() )
2529                         try:
2530                             var_string = self.get_string_u_at_rva( ustr_offset )
2531                         except PEFormatError, excp:
2532                             self.__warnings.append(
2533                                 'Error parsing the version information, ' +
2534                                 'attempting to read VarFileInfo Var string. ' +
2535                                 'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
2536                             break
2537
2538
2539                         varfileinfo_struct.Var.append(var_struct)
2540
2541                         varword_offset = self.dword_align(
2542                             2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
2543                             version_struct.OffsetToData)
2544                         orig_varword_offset = varword_offset
2545
2546                         while varword_offset < orig_varword_offset + var_struct.ValueLength:
2547                             word1 = self.get_word_from_data(
2548                                 raw_data[varword_offset:varword_offset+2], 0)
2549                             word2 = self.get_word_from_data(
2550                                 raw_data[varword_offset+2:varword_offset+4], 0)
2551                             varword_offset += 4
2552
2553                             var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}
2554
2555                         var_offset = self.dword_align(
2556                             var_offset+var_struct.Length, version_struct.OffsetToData)
2557
2558                         if var_offset <= var_offset+var_struct.Length:
2559                             break
2560
2561
2562
2563             # Increment and align the offset
2564             #
2565             stringfileinfo_offset = self.dword_align(
2566                 stringfileinfo_struct.Length+stringfileinfo_offset,
2567                 version_struct.OffsetToData)
2568
2569             # Check if all the StringFileInfo and VarFileInfo items have been processed
2570             #
2571             if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
2572                 break
2573
2574
2575
2576     def parse_export_directory(self, rva, size):
2577         """Parse the export directory.
2578
2579         Given the rva of the export directory, it will process all
2580         its entries.
2581
2582         The exports will be made available through a list "exports"
2583         containing a tuple with the following elements:
2584
2585             (ordinal, symbol_address, symbol_name)
2586
2587         And also through a dicionary "exports_by_ordinal" whose keys
2588         will be the ordinals and the values tuples of the from:
2589
2590             (symbol_address, symbol_name)
2591
2592         The symbol addresses are relative, not absolute.
2593         """
2594
2595         try:
2596             export_dir =  self.__unpack_data__(
2597                 self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
2598                 file_offset = self.get_offset_from_rva(rva) )
2599         except PEFormatError:
2600             self.__warnings.append(
2601                 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
2602             return
2603
2604         if not export_dir:
2605             return
2606
2607         try:
2608             address_of_names = self.get_data(
2609                 export_dir.AddressOfNames, export_dir.NumberOfNames*4)
2610             address_of_name_ordinals = self.get_data(
2611                 export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
2612             address_of_functions = self.get_data(
2613                 export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
2614         except PEFormatError:
2615             self.__warnings.append(
2616                 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
2617             return
2618
2619         exports = []
2620
2621         for i in xrange(export_dir.NumberOfNames):
2622
2623
2624             symbol_name = self.get_string_at_rva(
2625                 self.get_dword_from_data(address_of_names, i))
2626
2627             symbol_ordinal = self.get_word_from_data(
2628                 address_of_name_ordinals, i)
2629
2630
2631             if symbol_ordinal*4<len(address_of_functions):
2632                 symbol_address = self.get_dword_from_data(
2633                     address_of_functions, symbol_ordinal)
2634             else:
2635                 # Corrupt? a bad pointer... we assume it's all
2636                 # useless, no exports
2637                 return None
2638
2639             # If the funcion's rva points within the export directory
2640             # it will point to a string with the forwarded symbol's string
2641             # instead of pointing the the function start address.
2642
2643             if symbol_address>=rva and symbol_address<rva+size:
2644                 forwarder_str = self.get_string_at_rva(symbol_address)
2645             else:
2646                 forwarder_str = None
2647
2648
2649             exports.append(
2650                 ExportData(
2651                     ordinal = export_dir.Base+symbol_ordinal,
2652                     address = symbol_address,
2653                     name = symbol_name,
2654                     forwarder = forwarder_str))
2655
2656         ordinals = [exp.ordinal for exp in exports]
2657
2658         for idx in xrange(export_dir.NumberOfFunctions):
2659
2660             if not idx+export_dir.Base in ordinals:
2661                 symbol_address = self.get_dword_from_data(
2662                     address_of_functions,
2663                     idx)
2664
2665                 #
2666                 # Checking for forwarder again.
2667                 #
2668                 if symbol_address>=rva and symbol_address<rva+size:
2669                     forwarder_str = self.get_string_at_rva(symbol_address)
2670                 else:
2671                     forwarder_str = None
2672
2673                 exports.append(
2674                     ExportData(
2675                         ordinal = export_dir.Base+idx,
2676                         address = symbol_address,
2677                         name = None,
2678                         forwarder = forwarder_str))
2679
2680         return ExportDirData(
2681                 struct = export_dir,
2682                 symbols = exports)
2683
2684
2685     def dword_align(self, offset, base):
2686         offset += base
2687         return (offset+3) - ((offset+3)%4) - base
2688
2689
2690
2691     def parse_delay_import_directory(self, rva, size):
2692         """Walk and parse the delay import directory."""
2693
2694         import_descs =  []
2695         while True:
2696             try:
2697                 # If the RVA is invalid all would blow up. Some PEs seem to be
2698                 # specially nasty and have an invalid RVA.
2699                 data = self.get_data(rva)
2700             except PEFormatError, e:
2701                 self.__warnings.append(
2702                     'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
2703                 break
2704
2705             import_desc =  self.__unpack_data__(
2706                 self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
2707                 data, file_offset = self.get_offset_from_rva(rva) )
2708
2709
2710             # If the structure is all zeores, we reached the end of the list
2711             if not import_desc or import_desc.all_zeroes():
2712                 break
2713
2714
2715             rva += import_desc.sizeof()
2716
2717             try:
2718                 import_data =  self.parse_imports(
2719                     import_desc.pINT,
2720                     import_desc.pIAT,
2721                     None)
2722             except PEFormatError, e:
2723                 self.__warnings.append(
2724                     'Error parsing the Delay import directory. ' +
2725                     'Invalid import data at RVA: 0x%x' % ( rva ) )
2726                 break
2727
2728             if not import_data:
2729                 continue
2730
2731
2732             dll = self.get_string_at_rva(import_desc.szName)
2733             if dll:
2734                 import_descs.append(
2735                     ImportDescData(
2736                         struct = import_desc,
2737                         imports = import_data,
2738                         dll = dll))
2739
2740         return import_descs
2741
2742
2743
2744     def parse_import_directory(self, rva, size):
2745         """Walk and parse the import directory."""
2746
2747         import_descs =  []
2748         while True:
2749             try:
2750                 # If the RVA is invalid all would blow up. Some EXEs seem to be
2751                 # specially nasty and have an invalid RVA.
2752                 data = self.get_data(rva)
2753             except PEFormatError, e:
2754                 self.__warnings.append(
2755                     'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
2756                 break
2757
2758             import_desc =  self.__unpack_data__(
2759                 self.__IMAGE_IMPORT_DESCRIPTOR_format__,
2760                 data, file_offset = self.get_offset_from_rva(rva) )
2761
2762             # If the structure is all zeores, we reached the end of the list
2763             if not import_desc or import_desc.all_zeroes():
2764                 break
2765
2766             rva += import_desc.sizeof()
2767
2768             try:
2769                 import_data =  self.parse_imports(
2770                     import_desc.OriginalFirstThunk,
2771                     import_desc.FirstThunk,
2772                     import_desc.ForwarderChain)
2773             except PEFormatError, excp:
2774                 self.__warnings.append(
2775                     'Error parsing the Import directory. ' +
2776                     'Invalid Import data at RVA: 0x%x' % ( rva ) )
2777                 break
2778                 #raise excp
2779
2780             if not import_data:
2781                 continue
2782
2783             dll = self.get_string_at_rva(import_desc.Name)
2784             if dll:
2785                 import_descs.append(
2786                     ImportDescData(
2787                         struct = import_desc,
2788                         imports = import_data,
2789                         dll = dll))
2790
2791         return import_descs
2792
2793
2794
2795     def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
2796         """Parse the imported symbols.
2797
2798         It will fill a list, which will be avalable as the dictionary
2799         attribute "imports". Its keys will be the DLL names and the values
2800         all the symbols imported from that object.
2801         """
2802
2803         imported_symbols = []
2804         imports_section = self.get_section_by_rva(first_thunk)
2805         if not imports_section:
2806             raise PEFormatError, 'Invalid/corrupt imports.'
2807
2808
2809         # Import Lookup Table. Contains ordinals or pointers to strings.
2810         ilt = self.get_import_table(original_first_thunk)
2811         # Import Address Table. May have identical content to ILT if
2812         # PE file is not bounded, Will contain the address of the
2813         # imported symbols once the binary is loaded or if it is already
2814         # bound.
2815         iat = self.get_import_table(first_thunk)
2816
2817         # OC Patch:
2818         # Would crash if iat or ilt had None type
2819         if not iat and not ilt:
2820             raise PEFormatError(
2821                 'Invalid Import Table information. ' +
2822                 'Both ILT and IAT appear to be broken.')
2823
2824         if not iat and ilt:
2825             table = ilt
2826         elif iat and not ilt:
2827             table = iat
2828         elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
2829             table = ilt
2830         elif (ilt and len(ilt))==0 and (iat and len(iat)):
2831             table = iat
2832         else:
2833             return None
2834
2835         for idx in xrange(len(table)):
2836
2837             imp_ord = None
2838             imp_hint = None
2839             imp_name = None
2840             hint_name_table_rva = None
2841
2842             if table[idx].AddressOfData:
2843
2844                 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
2845                     ordinal_flag = IMAGE_ORDINAL_FLAG
2846                 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
2847                     ordinal_flag = IMAGE_ORDINAL_FLAG64
2848
2849                 # If imported by ordinal, we will append the ordinal number
2850                 #
2851                 if table[idx].AddressOfData & ordinal_flag:
2852                     import_by_ordinal = True
2853                     imp_ord = table[idx].AddressOfData & 0xffff
2854                     imp_name = None
2855                 else:
2856                     import_by_ordinal = False
2857                     try:
2858                         hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
2859                         data = self.get_data(hint_name_table_rva, 2)
2860                         # Get the Hint
2861                         imp_hint = self.get_word_from_data(data, 0)
2862                         imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
2863                     except PEFormatError, e:
2864                         pass
2865
2866             imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
2867
2868             if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
2869                 imp_bound = iat[idx].AddressOfData
2870             else:
2871                 imp_bound = None
2872
2873             if imp_name != '' and (imp_ord or imp_name):
2874                 imported_symbols.append(
2875                     ImportData(
2876                         import_by_ordinal = import_by_ordinal,
2877                         ordinal = imp_ord,
2878                         hint = imp_hint,
2879                         name = imp_name,
2880                         bound = imp_bound,
2881                         address = imp_address,
2882                         hint_name_table_rva = hint_name_table_rva))
2883
2884         return imported_symbols
2885
2886
2887
2888     def get_import_table(self, rva):
2889
2890         table = []
2891
2892         while True and rva:
2893             try:
2894                 data = self.get_data(rva)
2895             except PEFormatError, e:
2896                 self.__warnings.append(
2897                     'Error parsing the import table. ' +
2898                     'Invalid data at RVA: 0x%x' % ( rva ) )
2899                 return None
2900
2901             if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
2902                 format = self.__IMAGE_THUNK_DATA_format__
2903             elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
2904                 format = self.__IMAGE_THUNK_DATA64_format__
2905
2906             thunk_data = self.__unpack_data__(
2907                 format, data, file_offset=self.get_offset_from_rva(rva) )
2908
2909             if not thunk_data or thunk_data.all_zeroes():
2910                 break
2911
2912             rva += thunk_data.sizeof()
2913
2914             table.append(thunk_data)
2915
2916         return table
2917
2918
2919     def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
2920         """Returns the data corresponding to the memory layout of the PE file.
2921
2922         The data includes the PE header and the sections loaded at offsets
2923         corresponding to their relative virtual addresses. (the VirtualAddress
2924         section header member).
2925         Any offset in this data corresponds to the absolute memory address
2926         ImageBase+offset.
2927
2928         The optional argument 'max_virtual_address' provides with means of limiting
2929         which section are processed.
2930         Any section with their VirtualAddress beyond this value will be skipped.
2931         Normally, sections with values beyond this range are just there to confuse
2932         tools. It's a common trick to see in packed executables.
2933
2934         If the 'ImageBase' optional argument is supplied, the file's relocations
2935         will be applied to the image by calling the 'relocate_image()' method.
2936         """
2937
2938         # Collect all sections in one code block
2939         data = self.header
2940         for section in self.sections:
2941
2942             # Miscellanous integrity tests.
2943             # Some packer will set these to bogus values to
2944             # make tools go nuts.
2945             #
2946             if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
2947                 continue
2948
2949             if section.SizeOfRawData > len(self.__data__):
2950                 continue
2951
2952             if section.PointerToRawData > len(self.__data__):
2953                 continue
2954
2955             if section.VirtualAddress >= max_virtual_address:
2956                 continue
2957
2958             padding_length = section.VirtualAddress - len(data)
2959
2960             if padding_length>0:
2961                 data += '\0'*padding_length
2962             elif padding_length<0:
2963                 data = data[:padding_length]
2964
2965             data += section.data
2966
2967         return data
2968
2969
2970     def get_data(self, rva, length=None):
2971         """Get data regardless of the section where it lies on.
2972
2973         Given a rva and the size of the chunk to retrieve, this method
2974         will find the section where the data lies and return the data.
2975         """
2976
2977         s = self.get_section_by_rva(rva)
2978
2979         if not s:
2980             if rva<len(self.header):
2981                 if length:
2982                     end = rva+length
2983                 else:
2984                     end = None
2985                 return self.header[rva:end]
2986
2987             raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
2988
2989         return s.get_data(rva, length)
2990
2991
2992     def get_rva_from_offset(self, offset):
2993         """Get the rva corresponding to this file offset. """
2994
2995         s = self.get_section_by_offset(offset)
2996         if not s:
2997             raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
2998         return s.get_rva_from_offset(offset)
2999
3000     def get_offset_from_rva(self, rva):
3001         """Get the file offset corresponding to this rva.
3002
3003         Given a rva , this method will find the section where the
3004         data lies and return the offset within the file.
3005         """
3006
3007         s = self.get_section_by_rva(rva)
3008         if not s:
3009
3010             raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
3011
3012         return s.get_offset_from_rva(rva)
3013
3014
3015     def get_string_at_rva(self, rva):
3016         """Get an ASCII string located at the given address."""
3017
3018         s = self.get_section_by_rva(rva)
3019         if not s:
3020             if rva<len(self.header):
3021                 return self.get_string_from_data(rva, self.header)
3022             return None
3023
3024         return self.get_string_from_data(rva-s.VirtualAddress, s.data)
3025
3026
3027     def get_string_from_data(self, offset, data):
3028         """Get an ASCII string from within the data."""
3029
3030         # OC Patch
3031         b = None
3032
3033         try:
3034             b = data[offset]
3035         except IndexError:
3036             return ''
3037
3038         s = ''
3039         while ord(b):
3040             s += b
3041             offset += 1
3042             try:
3043                 b = data[offset]
3044             except IndexError:
3045                 break
3046
3047         return s
3048
3049
3050     def get_string_u_at_rva(self, rva, max_length = 2**16):
3051         """Get an Unicode string located at the given address."""
3052
3053         try:
3054             # If the RVA is invalid all would blow up. Some EXEs seem to be
3055             # specially nasty and have an invalid RVA.
3056             data = self.get_data(rva, 2)
3057         except PEFormatError, e:
3058             return None
3059
3060         #length = struct.unpack('<H', data)[0]
3061
3062         s = u''
3063         for idx in xrange(max_length):
3064             try:
3065                 uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
3066             except struct.error:
3067                 break
3068
3069             if unichr(uchr) == u'\0':
3070                 break
3071             s += unichr(uchr)
3072
3073         return s
3074
3075
3076     def get_section_by_offset(self, offset):
3077         """Get the section containing the given file offset."""
3078
3079         sections = [s for s in self.sections if s.contains_offset(offset)]
3080
3081         if sections:
3082             return sections[0]
3083
3084         return None
3085
3086
3087     def get_section_by_rva(self, rva):
3088         """Get the section containing the given address."""
3089
3090         sections = [s for s in self.sections if s.contains_rva(rva)]
3091
3092         if sections:
3093             return sections[0]
3094
3095         return None
3096
3097     def __str__(self):
3098         return self.dump_info()
3099
3100
3101     def print_info(self):
3102         """Print all the PE header information in a human readable from."""
3103         print self.dump_info()
3104
3105
3106     def dump_info(self, dump=None):
3107         """Dump all the PE header information into human readable string."""
3108
3109
3110         if dump is None:
3111             dump = Dump()
3112
3113         warnings = self.get_warnings()
3114         if warnings:
3115             dump.add_header('Parsing Warnings')
3116             for warning in warnings:
3117                 dump.add_line(warning)
3118                 dump.add_newline()
3119
3120
3121         dump.add_header('DOS_HEADER')
3122         dump.add_lines(self.DOS_HEADER.dump())
3123         dump.add_newline()
3124
3125         dump.add_header('NT_HEADERS')
3126         dump.add_lines(self.NT_HEADERS.dump())
3127         dump.add_newline()
3128
3129         dump.add_header('FILE_HEADER')
3130         dump.add_lines(self.FILE_HEADER.dump())
3131
3132         image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
3133
3134         dump.add('Flags: ')
3135         flags = []
3136         for flag in image_flags:
3137             if getattr(self.FILE_HEADER, flag[0]):
3138                 flags.append(flag[0])
3139         dump.add_line(', '.join(flags))
3140         dump.add_newline()
3141
3142         if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
3143             dump.add_header('OPTIONAL_HEADER')
3144             dump.add_lines(self.OPTIONAL_HEADER.dump())
3145
3146         dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
3147
3148         dump.add('DllCharacteristics: ')
3149         flags = []
3150         for flag in dll_characteristics_flags:
3151             if getattr(self.OPTIONAL_HEADER, flag[0]):
3152                 flags.append(flag[0])
3153         dump.add_line(', '.join(flags))
3154         dump.add_newline()
3155
3156
3157         dump.add_header('PE Sections')
3158
3159         section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
3160
3161         for section in self.sections:
3162             dump.add_lines(section.dump())
3163             dump.add('Flags: ')
3164             flags = []
3165             for flag in section_flags:
3166                 if getattr(section, flag[0]):
3167                     flags.append(flag[0])
3168             dump.add_line(', '.join(flags))
3169             dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
3170             if md5 is not None:
3171                 dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
3172             if sha1 is not None:
3173                 dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
3174             if sha256 is not None:
3175                 dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
3176             if sha512 is not None:
3177                 dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
3178             dump.add_newline()
3179
3180
3181
3182         if (hasattr(self, 'OPTIONAL_HEADER') and
3183             hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
3184
3185             dump.add_header('Directories')
3186             for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
3187                 directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
3188                 dump.add_lines(directory.dump())
3189             dump.add_newline()
3190
3191
3192         if hasattr(self, 'VS_VERSIONINFO'):
3193             dump.add_header('Version Information')
3194             dump.add_lines(self.VS_VERSIONINFO.dump())
3195             dump.add_newline()
3196
3197             if hasattr(self, 'VS_FIXEDFILEINFO'):
3198                 dump.add_lines(self.VS_FIXEDFILEINFO.dump())
3199                 dump.add_newline()
3200
3201             if hasattr(self, 'FileInfo'):
3202                 for entry in self.FileInfo:
3203                     dump.add_lines(entry.dump())
3204                     dump.add_newline()
3205
3206                     if hasattr(entry, 'StringTable'):
3207                         for st_entry in entry.StringTable:
3208                             [dump.add_line('  '+line) for line in st_entry.dump()]
3209                             dump.add_line('  LangID: '+st_entry.LangID)
3210                             dump.add_newline()
3211                             for str_entry in st_entry.entries.items():
3212                                 dump.add_line('    '+str_entry[0]+': '+str_entry[1])
3213                         dump.add_newline()
3214
3215                     elif hasattr(entry, 'Var'):
3216                         for var_entry in entry.Var:
3217                             if hasattr(var_entry, 'entry'):
3218                                 [dump.add_line('  '+line) for line in var_entry.dump()]
3219                                 dump.add_line(
3220                                     '    ' + var_entry.entry.keys()[0] +
3221                                     ': ' + var_entry.entry.values()[0])
3222
3223                         dump.add_newline()
3224
3225
3226
3227         if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
3228             dump.add_header('Exported symbols')
3229             dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
3230             dump.add_newline()
3231             dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
3232             for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
3233                 dump.add('%-10d 0x%08Xh    %s' % (
3234                     export.ordinal, export.address, export.name))
3235                 if export.forwarder:
3236                     dump.add_line(' forwarder: %s' % export.forwarder)
3237                 else:
3238                     dump.add_newline()
3239
3240             dump.add_newline()
3241
3242         if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
3243             dump.add_header('Imported symbols')
3244             for module in self.DIRECTORY_ENTRY_IMPORT:
3245                 dump.add_lines(module.struct.dump())
3246                 dump.add_newline()
3247                 for symbol in module.imports:
3248
3249                     if symbol.import_by_ordinal is True:
3250                         dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3251                             module.dll, str(symbol.ordinal)))
3252                     else:
3253                         dump.add('%s.%s Hint[%s]' % (
3254                             module.dll, symbol.name, str(symbol.hint)))
3255
3256                     if symbol.bound:
3257                         dump.add_line(' Bound: 0x%08X' % (symbol.bound))
3258                     else:
3259                         dump.add_newline()
3260                 dump.add_newline()
3261
3262
3263         if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
3264             dump.add_header('Bound imports')
3265             for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
3266
3267                 dump.add_lines(bound_imp_desc.struct.dump())
3268                 dump.add_line('DLL: %s' % bound_imp_desc.name)
3269                 dump.add_newline()
3270
3271                 for bound_imp_ref in bound_imp_desc.entries:
3272                     dump.add_lines(bound_imp_ref.struct.dump(), 4)
3273                     dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
3274                     dump.add_newline()
3275
3276
3277         if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
3278             dump.add_header('Delay Imported symbols')
3279             for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
3280
3281                 dump.add_lines(module.struct.dump())
3282                 dump.add_newline()
3283
3284                 for symbol in module.imports:
3285                     if symbol.import_by_ordinal is True:
3286                         dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3287                             module.dll, str(symbol.ordinal)))
3288                     else:
3289                         dump.add('%s.%s Hint[%s]' % (
3290                             module.dll, symbol.name, str(symbol.hint)))
3291
3292                     if symbol.bound:
3293                         dump.add_line(' Bound: 0x%08X' % (symbol.bound))
3294                     else:
3295                         dump.add_newline()
3296                 dump.add_newline()
3297
3298
3299         if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
3300             dump.add_header('Resource directory')
3301
3302             dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
3303
3304             for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
3305
3306                 if resource_type.name is not None:
3307                     dump.add_line('Name: [%s]' % resource_type.name, 2)
3308                 else:
3309                     dump.add_line('Id: [0x%X] (%s)' % (
3310                         resource_type.struct.Id, RESOURCE_TYPE.get(
3311                             resource_type.struct.Id, '-')),
3312                         2)
3313
3314                 dump.add_lines(resource_type.struct.dump(), 2)
3315
3316                 if hasattr(resource_type, 'directory'):
3317
3318                     dump.add_lines(resource_type.directory.struct.dump(), 4)
3319
3320                     for resource_id in resource_type.directory.entries:
3321
3322                         if resource_id.name is not None:
3323                             dump.add_line('Name: [%s]' % resource_id.name, 6)
3324                         else:
3325                             dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
3326
3327                         dump.add_lines(resource_id.struct.dump(), 6)
3328
3329                         if hasattr(resource_id, 'directory'):
3330                             dump.add_lines(resource_id.directory.struct.dump(), 8)
3331
3332                             for resource_lang in resource_id.directory.entries:
3333                             #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
3334                             #        resource_lang.data.lang,
3335                             #        resource_lang.data.sublang,
3336                             #        LANG[resource_lang.data.lang]), 8)
3337                                 dump.add_lines(resource_lang.struct.dump(), 10)
3338                                 dump.add_lines(resource_lang.data.struct.dump(), 12)
3339                 dump.add_newline()
3340
3341             dump.add_newline()
3342
3343
3344         if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and
3345              self.DIRECTORY_ENTRY_TLS and
3346              self.DIRECTORY_ENTRY_TLS.struct ):
3347
3348             dump.add_header('TLS')
3349             dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
3350             dump.add_newline()
3351
3352
3353         if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
3354             dump.add_header('Debug information')
3355             for dbg in self.DIRECTORY_ENTRY_DEBUG:
3356                 dump.add_lines(dbg.struct.dump())
3357                 try:
3358                     dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
3359                 except KeyError:
3360                     dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
3361                 dump.add_newline()
3362
3363
3364         if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
3365             dump.add_header('Base relocations')
3366             for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
3367                 dump.add_lines(base_reloc.struct.dump())
3368                 for reloc in base_reloc.entries:
3369                     try:
3370                         dump.add_line('%08Xh %s' % (
3371                             reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
3372                     except KeyError:
3373                         dump.add_line('0x%08X 0x%x(Unknown)' % (
3374                             reloc.rva, reloc.type), 4)
3375                 dump.add_newline()
3376
3377
3378         return dump.get_text()
3379
3380     # OC Patch
3381     def get_physical_by_rva(self, rva):
3382         """Gets the physical address in the PE file from an RVA value."""
3383         try:
3384             return self.get_offset_from_rva(rva)
3385         except Exception:
3386             return None
3387
3388
3389     ##
3390     # Double-Word get/set
3391     ##
3392
3393     def get_data_from_dword(self, dword):
3394         """Return a four byte string representing the double word value. (little endian)."""
3395         return struct.pack('<L', dword)
3396
3397
3398     def get_dword_from_data(self, data, offset):
3399         """Convert four bytes of data to a double word (little endian)
3400
3401         'offset' is assumed to index into a dword array. So setting it to
3402         N will return a dword out of the data sarting at offset N*4.
3403
3404         Returns None if the data can't be turned into a double word.
3405         """
3406
3407         if (offset+1)*4 > len(data):
3408             return None
3409
3410         return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
3411
3412
3413     def get_dword_at_rva(self, rva):
3414         """Return the double word value at the given RVA.
3415
3416         Returns None if the value can't be read, i.e. the RVA can't be mapped
3417         to a file offset.
3418         """
3419
3420         try:
3421             return self.get_dword_from_data(self.get_data(rva)[:4], 0)
3422         except PEFormatError:
3423             return None
3424
3425
3426     def get_dword_from_offset(self, offset):
3427         """Return the double word value at the given file offset. (little endian)"""
3428
3429         if offset+4 > len(self.__data__):
3430             return None
3431
3432         return self.get_dword_from_data(self.__data__[offset:offset+4], 0)
3433
3434
3435     def set_dword_at_rva(self, rva, dword):
3436         """Set the double word value at the file offset corresponding to the given RVA."""
3437         return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
3438
3439
3440     def set_dword_at_offset(self, offset, dword):
3441         """Set the double word value at the given file offset."""
3442         return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
3443
3444
3445
3446     ##
3447     # Word get/set
3448     ##
3449
3450     def get_data_from_word(self, word):
3451         """Return a two byte string representing the word value. (little endian)."""
3452         return struct.pack('<H', word)
3453
3454
3455     def get_word_from_data(self, data, offset):
3456         """Convert two bytes of data to a word (little endian)
3457
3458         'offset' is assumed to index into a word array. So setting it to
3459         N will return a dword out of the data sarting at offset N*2.
3460
3461         Returns None if the data can't be turned into a word.
3462         """
3463
3464         if (offset+1)*2 > len(data):
3465             return None
3466
3467         return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]
3468
3469
3470     def get_word_at_rva(self, rva):
3471         """Return the word value at the given RVA.
3472
3473         Returns None if the value can't be read, i.e. the RVA can't be mapped
3474         to a file offset.
3475         """
3476
3477         try:
3478             return self.get_word_from_data(self.get_data(rva)[:2], 0)
3479         except PEFormatError:
3480             return None
3481
3482
3483     def get_word_from_offset(self, offset):
3484         """Return the word value at the given file offset. (little endian)"""
3485
3486         if offset+2 > len(self.__data__):
3487             return None
3488
3489         return self.get_word_from_data(self.__data__[offset:offset+2], 0)
3490
3491
3492     def set_word_at_rva(self, rva, word):
3493         """Set the word value at the file offset corresponding to the given RVA."""
3494         return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
3495
3496
3497     def set_word_at_offset(self, offset, word):
3498         """Set the word value at the given file offset."""
3499         return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
3500
3501
3502     ##
3503     # Quad-Word get/set
3504     ##
3505
3506     def get_data_from_qword(self, word):
3507         """Return a eight byte string representing the quad-word value. (little endian)."""
3508         return struct.pack('<Q', word)
3509
3510
3511     def get_qword_from_data(self, data, offset):
3512         """Convert eight bytes of data to a word (little endian)
3513
3514         'offset' is assumed to index into a word array. So setting it to
3515         N will return a dword out of the data sarting at offset N*8.
3516
3517         Returns None if the data can't be turned into a quad word.
3518         """
3519
3520         if (offset+1)*8 > len(data):
3521             return None
3522
3523         return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]
3524
3525
3526     def get_qword_at_rva(self, rva):
3527         """Return the quad-word value at the given RVA.
3528
3529         Returns None if the value can't be read, i.e. the RVA can't be mapped
3530         to a file offset.
3531         """
3532
3533         try:
3534             return self.get_qword_from_data(self.get_data(rva)[:8], 0)
3535         except PEFormatError:
3536             return None
3537
3538
3539     def get_qword_from_offset(self, offset):
3540         """Return the quad-word value at the given file offset. (little endian)"""
3541
3542         if offset+8 > len(self.__data__):
3543             return None
3544
3545         return self.get_qword_from_data(self.__data__[offset:offset+8], 0)
3546
3547
3548     def set_qword_at_rva(self, rva, qword):
3549         """Set the quad-word value at the file offset corresponding to the given RVA."""
3550         return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
3551
3552
3553     def set_qword_at_offset(self, offset, qword):
3554         """Set the quad-word value at the given file offset."""
3555         return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
3556
3557
3558
3559     ##
3560     # Set bytes
3561     ##
3562
3563
3564     def set_bytes_at_rva(self, rva, data):
3565         """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
3566
3567         Return True if successful, False otherwise. It can fail if the
3568         offset is outside the file's boundaries.
3569         """
3570
3571         offset = self.get_physical_by_rva(rva)
3572         if not offset:
3573             raise False
3574
3575         return self.set_bytes_at_offset(offset, data)
3576
3577
3578     def set_bytes_at_offset(self, offset, data):
3579         """Overwrite the bytes at the given file offset with the given string.
3580
3581         Return True if successful, False otherwise. It can fail if the
3582         offset is outside the file's boundaries.
3583         """
3584
3585         if not isinstance(data, str):
3586             raise TypeError('data should be of type: str')
3587
3588         if offset >= 0 and offset < len(self.__data__):
3589             self.__data__ = ( self.__data__[:offset] +
3590                 data +
3591                 self.__data__[offset+len(data):] )
3592         else:
3593             return False
3594
3595         # Refresh the section's data with the modified information
3596         #
3597         for section in self.sections:
3598             section_data_start = section.PointerToRawData
3599             section_data_end = section_data_start+section.SizeOfRawData
3600             section.data = self.__data__[section_data_start:section_data_end]
3601
3602         return True
3603
3604
3605
3606     def relocate_image(self, new_ImageBase):
3607         """Apply the relocation information to the image using the provided new image base.
3608
3609         This method will apply the relocation information to the image. Given the new base,
3610         all the relocations will be processed and both the raw data and the section's data
3611         will be fixed accordingly.
3612         The resulting image can be retrieved as well through the method:
3613
3614             get_memory_mapped_image()
3615
3616         In order to get something that would more closely match what could be found in memory
3617         once the Windows loader finished its work.
3618         """
3619
3620         relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
3621
3622
3623         for reloc in self.DIRECTORY_ENTRY_BASERELOC:
3624
3625             virtual_address = reloc.struct.VirtualAddress
3626             size_of_block = reloc.struct.SizeOfBlock
3627
3628             # We iterate with an index because if the relocation is of type
3629             # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
3630             # at once and skip it for the next interation
3631             #
3632             entry_idx = 0
3633             while entry_idx<len(reloc.entries):
3634
3635                 entry = reloc.entries[entry_idx]
3636                 entry_idx += 1
3637
3638                 if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
3639                     # Nothing to do for this type of relocation
3640                     pass
3641
3642                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
3643                     # Fix the high 16bits of a relocation
3644                     #
3645                     # Add high 16bits of relocation_difference to the
3646                     # 16bit value at RVA=entry.rva
3647
3648                     self.set_word_at_rva(
3649                         entry.rva,
3650                         ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
3651
3652                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
3653                     # Fix the low 16bits of a relocation
3654                     #
3655                     # Add low 16 bits of relocation_difference to the 16bit value
3656                     # at RVA=entry.rva
3657
3658                     self.set_word_at_rva(
3659                         entry.rva,
3660                         ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
3661
3662                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
3663                     # Handle all high and low parts of a 32bit relocation
3664                     #
3665                     # Add relocation_difference to the value at RVA=entry.rva
3666
3667                     self.set_dword_at_rva(
3668                         entry.rva,
3669                         self.get_dword_at_rva(entry.rva)+relocation_difference)
3670
3671                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
3672                     # Fix the high 16bits of a relocation and adjust
3673                     #
3674                     # Add high 16bits of relocation_difference to the 32bit value
3675                     # composed from the (16bit value at RVA=entry.rva)<<16 plus
3676                     # the 16bit value at the next relocation entry.
3677                     #
3678
3679                     # If the next entry is beyond the array's limits,
3680                     # abort... the table is corrupt
3681                     #
3682                     if entry_idx == len(reloc.entries):
3683                         break
3684
3685                     next_entry = reloc.entries[entry_idx]
3686                     entry_idx += 1
3687                     self.set_word_at_rva( entry.rva,
3688                         ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
3689                         relocation_difference & 0xffff0000) >> 16 )
3690
3691                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
3692                     # Apply the difference to the 64bit value at the offset
3693                     # RVA=entry.rva
3694
3695                     self.set_qword_at_rva(
3696                         entry.rva,
3697                         self.get_qword_at_rva(entry.rva) + relocation_difference)
3698
3699
3700     def verify_checksum(self):
3701
3702         return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
3703
3704
3705     def generate_checksum(self):
3706
3707         # Get the offset to the CheckSum field in the OptionalHeader
3708         #
3709         checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
3710
3711         checksum = 0
3712
3713         for i in range( len(self.__data__) / 4 ):
3714
3715             # Skip the checksum field
3716             #
3717             if i == checksum_offset / 4:
3718                 continue
3719
3720             dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
3721             checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
3722             if checksum > 2**32:
3723                 checksum = (checksum & 0xffffffff) + (checksum >> 32)
3724
3725         checksum = (checksum & 0xffff) + (checksum >> 16)
3726         checksum = (checksum) + (checksum >> 16)
3727         checksum = checksum & 0xffff
3728
3729         return checksum + len(self.__data__)