bolt/test/link_fdata.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 This script reads the input from stdin, extracts all lines starting with
   5 "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
   6 replaces symbol names ("#name#") with either symbol values or with offsets from
   7 respective anchor symbols, and prints the resulting file to stdout.
   8 """
   9
  10 import argparse
  11 import subprocess
  12 import sys
  13 import re
  14
  15 parser = argparse.ArgumentParser()
  16 parser.add_argument("input")
  17 parser.add_argument("objfile", help="Object file to extract symbol values from")
  18 parser.add_argument("output")
  19 parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
  20 parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
  21 parser.add_argument("--no-lbr", action="store_true")
  22
  23 args = parser.parse_args()
  24
  25 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
  26 # profile data
  27 prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
  28
  29 # FDATA records:
  30 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
  31 # <is symbol?> <closest elf symbol or DSO name> <relative TO address>
  32 # <number of mispredictions> <number of branches>
  33 fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
  34
  35 # Pre-aggregated profile:
  36 # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
  37 # [<mispred_count>]
  38 preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
  39
  40 # No-LBR profile:
  41 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
  42 nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
  43
  44 # Replacement symbol: #symname#
  45 replace_pat = re.compile(r"#(?P<symname>[^#]+)#")
  46
  47 # Read input and construct the representation of fdata expressions
  48 # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
  49 # are represented as (is_sym, anchor, offset) tuples
  50 exprs = []
  51 with open(args.input, "r") as f:
  52     for line in f.readlines():
  53         prefix_match = prefix_pat.match(line)
  54         if not prefix_match:
  55             continue
  56         profile_line = prefix_match.group(1)
  57         fdata_match = fdata_pat.match(profile_line)
  58         preagg_match = preagg_pat.match(profile_line)
  59         nolbr_match = nolbr_pat.match(profile_line)
  60         if fdata_match:
  61             src_dst, execnt, mispred = fdata_match.groups()
  62             # Split by whitespaces not preceded by a backslash (negative lookbehind)
  63             chunks = re.split(r"(?<!\\) +", src_dst)
  64             # Check if the number of records separated by non-escaped whitespace
  65             # exactly matches the format.
  66             assert (
  67                 len(chunks) == 6
  68             ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
  69             exprs.append(("FDATA", (*chunks, execnt, mispred)))
  70         elif nolbr_match:
  71             loc, count = nolbr_match.groups()
  72             # Split by whitespaces not preceded by a backslash (negative lookbehind)
  73             chunks = re.split(r"(?<!\\) +", loc)
  74             # Check if the number of records separated by non-escaped whitespace
  75             # exactly matches the format.
  76             assert (
  77                 len(chunks) == 3
  78             ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
  79             exprs.append(("NOLBR", (*chunks, count)))
  80         elif preagg_match:
  81             exprs.append(("PREAGG", preagg_match.groups()))
  82         else:
  83             exit("ERROR: unexpected input:\n%s" % line)
  84
  85 # Read nm output: <symbol value> <symbol type> <symbol name>
  86 nm_output = subprocess.run(
  87     [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True
  88 ).stdout
  89 # Populate symbol map
  90 symbols = {}
  91 for symline in nm_output.splitlines():
  92     symval, _, symname = symline.split(maxsplit=2)
  93     symbols[symname] = symval
  94
  95
  96 def evaluate_symbol(issym, anchor, offsym):
  97     sym_match = replace_pat.match(offsym)
  98     if not sym_match:
  99         # No need to evaluate symbol value, return as is
 100         return f"{issym} {anchor} {offsym}"
 101     symname = sym_match.group("symname")
 102     assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
 103     # Evaluate to an absolute offset if issym is false
 104     if issym == "0":
 105         return f"{issym} {anchor} {symbols[symname]}"
 106     # Evaluate symbol against its anchor if issym is true
 107     assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
 108     anchor_value = int(symbols[anchor], 16)
 109     symbol_value = int(symbols[symname], 16)
 110     sym_offset = symbol_value - anchor_value
 111     return f'{issym} {anchor} {format(sym_offset, "x")}'
 112
 113
 114 def replace_symbol(matchobj):
 115     """
 116     Expects matchobj to only capture one group which contains the symbol name.
 117     """
 118     symname = matchobj.group("symname")
 119     assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
 120     return symbols[symname]
 121
 122
 123 with open(args.output, "w", newline="\n") as f:
 124     if args.no_lbr:
 125         print("no_lbr", file=f)
 126     for etype, expr in exprs:
 127         if etype == "FDATA":
 128             issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
 129             print(
 130                 evaluate_symbol(issym1, anchor1, offsym1),
 131                 evaluate_symbol(issym2, anchor2, offsym2),
 132                 execnt,
 133                 mispred,
 134                 file=f,
 135             )
 136         elif etype == "NOLBR":
 137             issym, anchor, offsym, count = expr
 138             print(evaluate_symbol(issym, anchor, offsym), count, file=f)
 139         elif etype == "PREAGG":
 140             # Replace all symbols enclosed in ##
 141             print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f)
 142         else:
 143             exit("ERROR: unhandled expression type:\n%s" % etype)