llvm/utils/extract-section.py

   1 #!/usr/bin/env python
   2 from __future__ import print_function
   3 '''
   4 Helper script to print out the raw content of an ELF section.
   5 Example usages:
   6 ```
   7 # print out as bits by default
   8 extract-section.py .text --input-file=foo.o
   9 ```
  10 ```
  11 # read from stdin and print out in hex
  12 cat foo.o | extract-section.py -h .text
  13 ```
  14 This is merely a wrapper around `llvm-readobj` that focuses on the binary
  15 content as well as providing more formatting options.
  16 '''
  17
  18 # Unfortunately reading binary from stdin is not so trivial in Python...
  19 def read_raw_stdin():
  20     import sys
  21     if sys.version_info >= (3, 0):
  22         reading_source = sys.stdin.buffer
  23     else:
  24         # Windows will always read as string so we need some
  25         # special handling
  26         if sys.platform == 'win32':
  27             import os, msvcrt
  28             msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
  29         reading_source = sys.stdin
  30     return reading_source.read()
  31
  32 def get_raw_section_dump(readobj_path, section_name, input_file):
  33     import subprocess
  34     cmd = [readobj_path, '--elf-output-style=GNU', '--hex-dump={}'.format(section_name),
  35             input_file]
  36     proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  37
  38     if input_file == '-':
  39         # From stdin
  40         out,_ = proc.communicate(input=read_raw_stdin())
  41     else:
  42         out,_ = proc.communicate()
  43
  44     return out.decode('utf-8') if type(out) is not str else out
  45
  46 if __name__ == '__main__':
  47     import argparse
  48     # The default '-h' (--help) will conflict with our '-h' (hex) format
  49     arg_parser = argparse.ArgumentParser(add_help=False)
  50     arg_parser.add_argument('--readobj-path', metavar='<executable path>', type=str,
  51             help='Path to llvm-readobj')
  52     arg_parser.add_argument('--input-file', metavar='<file>', type=str,
  53             help='Input object file, or \'-\' to read from stdin')
  54     arg_parser.add_argument('section', metavar='<name>', type=str,
  55             help='Name of the section to extract')
  56     # Output format
  57     format_group = arg_parser.add_mutually_exclusive_group()
  58     format_group.add_argument('-b', dest='format', action='store_const', const='bits',
  59             help='Print out in bits')
  60     arg_parser.add_argument('--byte-indicator', action='store_true',
  61             help='Whether to print a \'.\' every 8 bits in bits printing mode')
  62     arg_parser.add_argument('--bits-endian', metavar='<little/big>', type=str,
  63             choices=['little', 'big'],
  64             help='Print out bits in specified endianness (little or big); defaults to big')
  65     format_group.add_argument('-h', dest='format', action='store_const', const='hex',
  66             help='Print out in hexadecimal')
  67     arg_parser.add_argument('--hex-width', metavar='<# of bytes>', type=int,
  68             help='The width (in byte) of every element in hex printing mode')
  69
  70     arg_parser.add_argument('--help', action='help')
  71     arg_parser.set_defaults(format='bits', tool_path='llvm-readobj', input_file='-',
  72             byte_indicator=False, hex_width=4, bits_endian='big')
  73     args = arg_parser.parse_args()
  74
  75     raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
  76
  77     results = []
  78     for line in raw_section.splitlines(False):
  79         if line.startswith('Hex dump'):
  80             continue
  81         parts = line.strip().split(' ')[1:]
  82         for part in parts[:4]:
  83             # exclude any non-hex dump string
  84             try:
  85                 val = int(part, 16)
  86                 if args.format == 'bits':
  87                     # divided into bytes first
  88                     offsets = (24, 16, 8, 0)
  89                     if args.bits_endian == 'little':
  90                         offsets = (0, 8, 16, 24)
  91                     for byte in [(val >> off) & 0xFF for off in offsets]:
  92                         for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
  93                             results.append(str(bit))
  94                         if args.byte_indicator:
  95                             results.append('.')
  96                 elif args.format == 'hex':
  97                     assert args.hex_width <= 4 and args.hex_width > 0
  98                     width_bits = args.hex_width * 8
  99                     offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
 100                     mask = (1 << width_bits) - 1
 101                     format_str = "{:0" + str(args.hex_width * 2) + "x}"
 102                     for word in [(val >> i) & mask for i in offsets]:
 103                         results.append(format_str.format(word))
 104             except:
 105                 break
 106     print(' '.join(results), end='')