1 #===- object.py - Python Object Bindings --------------------*- python -*--===#
3 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 # See https://llvm.org/LICENSE.txt for license information.
5 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 #===------------------------------------------------------------------------===#
13 This module provides an interface for reading information from object files
14 (e.g. binary executables and libraries).
16 Using this module, you can obtain information about an object file's sections,
17 symbols, and relocations. These are represented by the classes ObjectFile,
18 Section, Symbol, and Relocation, respectively.
23 The only way to use this module is to start by creating an ObjectFile. You can
24 create an ObjectFile by loading a file (specified by its path) or by creating a
25 llvm.core.MemoryBuffer and loading that.
27 Once you have an object file, you can inspect its sections and symbols directly
28 by calling get_sections() and get_symbols() respectively. To inspect
29 relocations, call get_relocations() on a Section instance.
34 The LLVM bindings expose iteration over sections, symbols, and relocations in a
35 way that only allows one instance to be operated on at a single time. This is
36 slightly annoying from a Python perspective, as it isn't very Pythonic to have
37 objects that "expire" but are still active from a dynamic language.
39 To aid working around this limitation, each Section, Symbol, and Relocation
40 instance caches its properties after first access. So, if the underlying
41 iterator is advanced, the properties can still be obtained provided they have
42 already been retrieved.
44 In addition, we also provide a "cache" method on each class to cache all
45 available data. You can call this on each obtained instance. Or, you can pass
46 cache=True to the appropriate get_XXX() method to have this done for you.
48 Here are some examples on how to perform iteration:
50 obj = ObjectFile(filename='/bin/ls')
52 # This is OK. Each Section is only accessed inside its own iteration slot.
54 for section in obj.get_sections():
55 section_names.append(section.name)
57 # This is NOT OK. You perform a lookup after the object has expired.
58 symbols = list(obj.get_symbols())
59 for symbol in symbols:
60 print symbol.name # This raises because the object has expired.
62 # In this example, we mix a working and failing scenario.
64 for symbol in obj.get_symbols():
65 symbols.append(symbol)
68 for symbol in symbols:
69 print symbol.name # OK
70 print symbol.address # NOT OK. We didn't look up this property before.
72 # Cache everything up front.
73 symbols = list(obj.get_symbols(cache=True))
74 for symbol in symbols:
75 print symbol.name # OK
79 from ctypes
import c_char_p
80 from ctypes
import c_char
81 from ctypes
import POINTER
82 from ctypes
import c_uint64
83 from ctypes
import string_at
85 from .common
import CachedProperty
86 from .common
import LLVMObject
87 from .common
import c_object_p
88 from .common
import get_library
89 from .core
import MemoryBuffer
99 class ObjectFile(LLVMObject
):
100 """Represents an object/binary file."""
102 def __init__(self
, filename
=None, contents
=None):
103 """Construct an instance from a filename or binary data.
105 filename must be a path to a file that can be opened with open().
106 contents can be either a native Python buffer type (like str) or a
107 llvm.core.MemoryBuffer instance.
110 assert isinstance(contents
, MemoryBuffer
)
112 if filename
is not None:
113 contents
= MemoryBuffer(filename
=filename
)
116 raise Exception('No input found.')
118 ptr
= lib
.LLVMCreateObjectFile(contents
)
119 LLVMObject
.__init
__(self
, ptr
, disposer
=lib
.LLVMDisposeObjectFile
)
120 self
.take_ownership(contents
)
122 def get_sections(self
, cache
=False):
123 """Obtain the sections in this object file.
125 This is a generator for llvm.object.Section instances.
127 Sections are exposed as limited-use objects. See the module's
128 documentation on iterators for more.
130 sections
= lib
.LLVMGetSections(self
)
133 if lib
.LLVMIsSectionIteratorAtEnd(self
, sections
):
136 last
= Section(sections
)
142 lib
.LLVMMoveToNextSection(sections
)
148 lib
.LLVMDisposeSectionIterator(sections
)
150 def get_symbols(self
, cache
=False):
151 """Obtain the symbols in this object file.
153 This is a generator for llvm.object.Symbol instances.
155 Each Symbol instance is a limited-use object. See this module's
156 documentation on iterators for more.
158 symbols
= lib
.LLVMGetSymbols(self
)
161 if lib
.LLVMIsSymbolIteratorAtEnd(self
, symbols
):
164 last
= Symbol(symbols
, self
)
170 lib
.LLVMMoveToNextSymbol(symbols
)
176 lib
.LLVMDisposeSymbolIterator(symbols
)
178 class Section(LLVMObject
):
179 """Represents a section in an object file."""
181 def __init__(self
, ptr
):
182 """Construct a new section instance.
184 Section instances can currently only be created from an ObjectFile
185 instance. Therefore, this constructor should not be used outside of
188 LLVMObject
.__init
__(self
, ptr
)
194 """Obtain the string name of the section.
196 This is typically something like '.dynsym' or '.rodata'.
199 raise Exception('Section instance has expired.')
201 return lib
.LLVMGetSectionName(self
)
205 """The size of the section, in long bytes."""
207 raise Exception('Section instance has expired.')
209 return lib
.LLVMGetSectionSize(self
)
214 raise Exception('Section instance has expired.')
218 r
= lib
.LLVMGetSectionContents(self
)
220 return string_at(r
, siz
)
225 """The address of this section, in long bytes."""
227 raise Exception('Section instance has expired.')
229 return lib
.LLVMGetSectionAddress(self
)
231 def has_symbol(self
, symbol
):
232 """Returns whether a Symbol instance is present in this Section."""
234 raise Exception('Section instance has expired.')
236 assert isinstance(symbol
, Symbol
)
237 return lib
.LLVMGetSectionContainsSymbol(self
, symbol
)
239 def get_relocations(self
, cache
=False):
240 """Obtain the relocations in this Section.
242 This is a generator for llvm.object.Relocation instances.
244 Each instance is a limited used object. See this module's documentation
245 on iterators for more.
248 raise Exception('Section instance has expired.')
250 relocations
= lib
.LLVMGetRelocations(self
)
253 if lib
.LLVMIsRelocationIteratorAtEnd(self
, relocations
):
256 last
= Relocation(relocations
)
262 lib
.LLVMMoveToNextRelocation(relocations
)
268 lib
.LLVMDisposeRelocationIterator(relocations
)
271 """Cache properties of this Section.
273 This can be called as a workaround to the single active Section
274 limitation. When called, the properties of the Section are fetched so
275 they are still available after the Section has been marked inactive.
277 getattr(self
, 'name')
278 getattr(self
, 'size')
279 getattr(self
, 'contents')
280 getattr(self
, 'address')
283 """Expire the section.
285 This is called internally by the section iterator.
289 class Symbol(LLVMObject
):
290 """Represents a symbol in an object file."""
291 def __init__(self
, ptr
, object_file
):
292 assert isinstance(ptr
, c_object_p
)
293 assert isinstance(object_file
, ObjectFile
)
295 LLVMObject
.__init
__(self
, ptr
)
298 self
._object
_file
= object_file
302 """The str name of the symbol.
304 This is often a function or variable name. Keep in mind that name
305 mangling could be in effect.
308 raise Exception('Symbol instance has expired.')
310 return lib
.LLVMGetSymbolName(self
)
314 """The address of this symbol, in long bytes."""
316 raise Exception('Symbol instance has expired.')
318 return lib
.LLVMGetSymbolAddress(self
)
322 """The size of the symbol, in long bytes."""
324 raise Exception('Symbol instance has expired.')
326 return lib
.LLVMGetSymbolSize(self
)
330 """The Section to which this Symbol belongs.
332 The returned Section instance does not expire, unlike Sections that are
333 commonly obtained through iteration.
335 Because this obtains a new section iterator each time it is accessed,
336 calling this on a number of Symbol instances could be expensive.
338 sections
= lib
.LLVMGetSections(self
._object
_file
)
339 lib
.LLVMMoveToContainingSection(sections
, self
)
341 return Section(sections
)
344 """Cache all cacheable properties."""
345 getattr(self
, 'name')
346 getattr(self
, 'address')
347 getattr(self
, 'size')
350 """Mark the object as expired to prevent future API accesses.
352 This is called internally by this module and it is unlikely that
353 external callers have a legitimate reason for using it.
357 class Relocation(LLVMObject
):
358 """Represents a relocation definition."""
359 def __init__(self
, ptr
):
360 """Create a new relocation instance.
362 Relocations are created from objects derived from Section instances.
363 Therefore, this constructor should not be called outside of this
364 module. See Section.get_relocations() for the proper method to obtain
365 a Relocation instance.
367 assert isinstance(ptr
, c_object_p
)
369 LLVMObject
.__init
__(self
, ptr
)
375 """The offset of this relocation, in long bytes."""
377 raise Exception('Relocation instance has expired.')
379 return lib
.LLVMGetRelocationOffset(self
)
383 """The Symbol corresponding to this Relocation."""
385 raise Exception('Relocation instance has expired.')
387 ptr
= lib
.LLVMGetRelocationSymbol(self
)
391 def type_number(self
):
392 """The relocation type, as a long."""
394 raise Exception('Relocation instance has expired.')
396 return lib
.LLVMGetRelocationType(self
)
400 """The relocation type's name, as a str."""
402 raise Exception('Relocation instance has expired.')
404 return lib
.LLVMGetRelocationTypeName(self
)
407 def value_string(self
):
409 raise Exception('Relocation instance has expired.')
411 return lib
.LLVMGetRelocationValueString(self
)
414 """Expire this instance, making future API accesses fail."""
418 """Cache all cacheable properties on this instance."""
419 getattr(self
, 'address')
420 getattr(self
, 'offset')
421 getattr(self
, 'symbol')
422 getattr(self
, 'type')
423 getattr(self
, 'type_name')
424 getattr(self
, 'value_string')
426 def register_library(library
):
427 """Register function prototypes with LLVM library instance."""
430 library
.LLVMCreateObjectFile
.argtypes
= [MemoryBuffer
]
431 library
.LLVMCreateObjectFile
.restype
= c_object_p
433 library
.LLVMDisposeObjectFile
.argtypes
= [ObjectFile
]
435 library
.LLVMGetSections
.argtypes
= [ObjectFile
]
436 library
.LLVMGetSections
.restype
= c_object_p
438 library
.LLVMDisposeSectionIterator
.argtypes
= [c_object_p
]
440 library
.LLVMIsSectionIteratorAtEnd
.argtypes
= [ObjectFile
, c_object_p
]
441 library
.LLVMIsSectionIteratorAtEnd
.restype
= bool
443 library
.LLVMMoveToNextSection
.argtypes
= [c_object_p
]
445 library
.LLVMMoveToContainingSection
.argtypes
= [c_object_p
, c_object_p
]
447 library
.LLVMGetSymbols
.argtypes
= [ObjectFile
]
448 library
.LLVMGetSymbols
.restype
= c_object_p
450 library
.LLVMDisposeSymbolIterator
.argtypes
= [c_object_p
]
452 library
.LLVMIsSymbolIteratorAtEnd
.argtypes
= [ObjectFile
, c_object_p
]
453 library
.LLVMIsSymbolIteratorAtEnd
.restype
= bool
455 library
.LLVMMoveToNextSymbol
.argtypes
= [c_object_p
]
457 library
.LLVMGetSectionName
.argtypes
= [c_object_p
]
458 library
.LLVMGetSectionName
.restype
= c_char_p
460 library
.LLVMGetSectionSize
.argtypes
= [c_object_p
]
461 library
.LLVMGetSectionSize
.restype
= c_uint64
463 library
.LLVMGetSectionContents
.argtypes
= [c_object_p
]
464 # Can't use c_char_p here as it isn't a NUL-terminated string.
465 library
.LLVMGetSectionContents
.restype
= POINTER(c_char
)
467 library
.LLVMGetSectionAddress
.argtypes
= [c_object_p
]
468 library
.LLVMGetSectionAddress
.restype
= c_uint64
470 library
.LLVMGetSectionContainsSymbol
.argtypes
= [c_object_p
, c_object_p
]
471 library
.LLVMGetSectionContainsSymbol
.restype
= bool
473 library
.LLVMGetRelocations
.argtypes
= [c_object_p
]
474 library
.LLVMGetRelocations
.restype
= c_object_p
476 library
.LLVMDisposeRelocationIterator
.argtypes
= [c_object_p
]
478 library
.LLVMIsRelocationIteratorAtEnd
.argtypes
= [c_object_p
, c_object_p
]
479 library
.LLVMIsRelocationIteratorAtEnd
.restype
= bool
481 library
.LLVMMoveToNextRelocation
.argtypes
= [c_object_p
]
483 library
.LLVMGetSymbolName
.argtypes
= [Symbol
]
484 library
.LLVMGetSymbolName
.restype
= c_char_p
486 library
.LLVMGetSymbolAddress
.argtypes
= [Symbol
]
487 library
.LLVMGetSymbolAddress
.restype
= c_uint64
489 library
.LLVMGetSymbolSize
.argtypes
= [Symbol
]
490 library
.LLVMGetSymbolSize
.restype
= c_uint64
492 library
.LLVMGetRelocationOffset
.argtypes
= [c_object_p
]
493 library
.LLVMGetRelocationOffset
.restype
= c_uint64
495 library
.LLVMGetRelocationSymbol
.argtypes
= [c_object_p
]
496 library
.LLVMGetRelocationSymbol
.restype
= c_object_p
498 library
.LLVMGetRelocationType
.argtypes
= [c_object_p
]
499 library
.LLVMGetRelocationType
.restype
= c_uint64
501 library
.LLVMGetRelocationTypeName
.argtypes
= [c_object_p
]
502 library
.LLVMGetRelocationTypeName
.restype
= c_char_p
504 library
.LLVMGetRelocationValueString
.argtypes
= [c_object_p
]
505 library
.LLVMGetRelocationValueString
.restype
= c_char_p
508 register_library(lib
)