bolt/test/link_fdata.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 This script reads the input from stdin, extracts all lines starting with
   5 "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
   6 replaces symbol names ("#name#") with either symbol values or with offsets from
   7 respective anchor symbols, and prints the resulting file to stdout.
   8 """
   9
  10 import argparse
  11 import subprocess
  12 import sys
  13 import re
  14
  15 parser = argparse.ArgumentParser()
  16 parser.add_argument("input")
  17 parser.add_argument("objfile", help="Object file to extract symbol values from")
  18 parser.add_argument("output")
  19 parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
  20 parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
  21 parser.add_argument("--no-lbr", action="store_true")
  22 parser.add_argument("--no-redefine", action="store_true")
  23
  24 args = parser.parse_args()
  25
  26 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
  27 # profile data
  28 prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
  29
  30 # FDATA records:
  31 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
  32 # <is symbol?> <closest elf symbol or DSO name> <relative TO address>
  33 # <number of mispredictions> <number of branches>
  34 fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
  35
  36 # Pre-aggregated profile:
  37 # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
  38 # [<mispred_count>]
  39 preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
  40
  41 # No-LBR profile:
  42 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
  43 nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
  44
  45 # Replacement symbol: #symname#
  46 replace_pat = re.compile(r"#(?P<symname>[^#]+)#")
  47
  48 # Read input and construct the representation of fdata expressions
  49 # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
  50 # are represented as (is_sym, anchor, offset) tuples
  51 exprs = []
  52 with open(args.input, "r") as f:
  53     for line in f.readlines():
  54         prefix_match = prefix_pat.match(line)
  55         if not prefix_match:
  56             continue
  57         profile_line = prefix_match.group(1)
  58         fdata_match = fdata_pat.match(profile_line)
  59         preagg_match = preagg_pat.match(profile_line)
  60         nolbr_match = nolbr_pat.match(profile_line)
  61         if fdata_match:
  62             src_dst, execnt, mispred = fdata_match.groups()
  63             # Split by whitespaces not preceded by a backslash (negative lookbehind)
  64             chunks = re.split(r"(?<!\\) +", src_dst)
  65             # Check if the number of records separated by non-escaped whitespace
  66             # exactly matches the format.
  67             assert (
  68                 len(chunks) == 6
  69             ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
  70             exprs.append(("FDATA", (*chunks, execnt, mispred)))
  71         elif nolbr_match:
  72             loc, count = nolbr_match.groups()
  73             # Split by whitespaces not preceded by a backslash (negative lookbehind)
  74             chunks = re.split(r"(?<!\\) +", loc)
  75             # Check if the number of records separated by non-escaped whitespace
  76             # exactly matches the format.
  77             assert (
  78                 len(chunks) == 3
  79             ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
  80             exprs.append(("NOLBR", (*chunks, count)))
  81         elif preagg_match:
  82             exprs.append(("PREAGG", preagg_match.groups()))
  83         else:
  84             exit("ERROR: unexpected input:\n%s" % line)
  85
  86 # Read nm output: <symbol value> <symbol type> <symbol name>
  87 nm_output = subprocess.run(
  88     [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True
  89 ).stdout
  90 # Populate symbol map
  91 symbols = {}
  92 for symline in nm_output.splitlines():
  93     symval, _, symname = symline.split(maxsplit=2)
  94     if symname in symbols and args.no_redefine:
  95         continue
  96     symbols[symname] = symval
  97
  98
  99 def evaluate_symbol(issym, anchor, offsym):
 100     sym_match = replace_pat.match(offsym)
 101     if not sym_match:
 102         # No need to evaluate symbol value, return as is
 103         return f"{issym} {anchor} {offsym}"
 104     symname = sym_match.group("symname")
 105     assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
 106     # Evaluate to an absolute offset if issym is false
 107     if issym == "0":
 108         return f"{issym} {anchor} {symbols[symname]}"
 109     # Evaluate symbol against its anchor if issym is true
 110     assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
 111     anchor_value = int(symbols[anchor], 16)
 112     symbol_value = int(symbols[symname], 16)
 113     sym_offset = symbol_value - anchor_value
 114     return f'{issym} {anchor} {format(sym_offset, "x")}'
 115
 116
 117 def replace_symbol(matchobj):
 118     """
 119     Expects matchobj to only capture one group which contains the symbol name.
 120     """
 121     symname = matchobj.group("symname")
 122     assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
 123     return symbols[symname]
 124
 125
 126 with open(args.output, "w", newline="\n") as f:
 127     if args.no_lbr:
 128         print("no_lbr", file=f)
 129     for etype, expr in exprs:
 130         if etype == "FDATA":
 131             issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
 132             print(
 133                 evaluate_symbol(issym1, anchor1, offsym1),
 134                 evaluate_symbol(issym2, anchor2, offsym2),
 135                 execnt,
 136                 mispred,
 137                 file=f,
 138             )
 139         elif etype == "NOLBR":
 140             issym, anchor, offsym, count = expr
 141             print(evaluate_symbol(issym, anchor, offsym), count, file=f)
 142         elif etype == "PREAGG":
 143             # Replace all symbols enclosed in ##
 144             print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f)
 145         else:
 146             exit("ERROR: unhandled expression type:\n%s" % etype)