Bump version to 19.1.0 (final)
[llvm-project.git] / bolt / test / link_fdata.py
blob3837e394ccc87bb6b0c79d9613f0cec8bf8aff9c
1 #!/usr/bin/env python3
3 """
4 This script reads the input from stdin, extracts all lines starting with
5 "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
6 replaces symbol names ("#name#") with either symbol values or with offsets from
7 respective anchor symbols, and prints the resulting file to stdout.
8 """
10 import argparse
11 import subprocess
12 import sys
13 import re
15 parser = argparse.ArgumentParser()
16 parser.add_argument("input")
17 parser.add_argument("objfile", help="Object file to extract symbol values from")
18 parser.add_argument("output")
19 parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
20 parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
21 parser.add_argument("--no-lbr", action="store_true")
22 parser.add_argument("--no-redefine", action="store_true")
24 args = parser.parse_args()
26 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
27 # profile data
28 prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
30 # FDATA records:
31 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
32 # <is symbol?> <closest elf symbol or DSO name> <relative TO address>
33 # <number of mispredictions> <number of branches>
34 fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
36 # Pre-aggregated profile:
37 # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
38 # [<mispred_count>]
39 preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
41 # No-LBR profile:
42 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
43 nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
45 # Replacement symbol: #symname#
46 replace_pat = re.compile(r"#(?P<symname>[^#]+)#")
48 # Read input and construct the representation of fdata expressions
49 # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
50 # are represented as (is_sym, anchor, offset) tuples
51 exprs = []
52 with open(args.input, "r") as f:
53 for line in f.readlines():
54 prefix_match = prefix_pat.match(line)
55 if not prefix_match:
56 continue
57 profile_line = prefix_match.group(1)
58 fdata_match = fdata_pat.match(profile_line)
59 preagg_match = preagg_pat.match(profile_line)
60 nolbr_match = nolbr_pat.match(profile_line)
61 if fdata_match:
62 src_dst, execnt, mispred = fdata_match.groups()
63 # Split by whitespaces not preceded by a backslash (negative lookbehind)
64 chunks = re.split(r"(?<!\\) +", src_dst)
65 # Check if the number of records separated by non-escaped whitespace
66 # exactly matches the format.
67 assert (
68 len(chunks) == 6
69 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
70 exprs.append(("FDATA", (*chunks, execnt, mispred)))
71 elif nolbr_match:
72 loc, count = nolbr_match.groups()
73 # Split by whitespaces not preceded by a backslash (negative lookbehind)
74 chunks = re.split(r"(?<!\\) +", loc)
75 # Check if the number of records separated by non-escaped whitespace
76 # exactly matches the format.
77 assert (
78 len(chunks) == 3
79 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
80 exprs.append(("NOLBR", (*chunks, count)))
81 elif preagg_match:
82 exprs.append(("PREAGG", preagg_match.groups()))
83 else:
84 exit("ERROR: unexpected input:\n%s" % line)
86 # Read nm output: <symbol value> <symbol type> <symbol name>
87 nm_output = subprocess.run(
88 [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True
89 ).stdout
90 # Populate symbol map
91 symbols = {}
92 for symline in nm_output.splitlines():
93 symval, _, symname = symline.split(maxsplit=2)
94 if symname in symbols and args.no_redefine:
95 continue
96 symbols[symname] = symval
99 def evaluate_symbol(issym, anchor, offsym):
100 sym_match = replace_pat.match(offsym)
101 if not sym_match:
102 # No need to evaluate symbol value, return as is
103 return f"{issym} {anchor} {offsym}"
104 symname = sym_match.group("symname")
105 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
106 # Evaluate to an absolute offset if issym is false
107 if issym == "0":
108 return f"{issym} {anchor} {symbols[symname]}"
109 # Evaluate symbol against its anchor if issym is true
110 assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
111 anchor_value = int(symbols[anchor], 16)
112 symbol_value = int(symbols[symname], 16)
113 sym_offset = symbol_value - anchor_value
114 return f'{issym} {anchor} {format(sym_offset, "x")}'
117 def replace_symbol(matchobj):
119 Expects matchobj to only capture one group which contains the symbol name.
121 symname = matchobj.group("symname")
122 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
123 return symbols[symname]
126 with open(args.output, "w", newline="\n") as f:
127 if args.no_lbr:
128 print("no_lbr", file=f)
129 for etype, expr in exprs:
130 if etype == "FDATA":
131 issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
132 print(
133 evaluate_symbol(issym1, anchor1, offsym1),
134 evaluate_symbol(issym2, anchor2, offsym2),
135 execnt,
136 mispred,
137 file=f,
139 elif etype == "NOLBR":
140 issym, anchor, offsym, count = expr
141 print(evaluate_symbol(issym, anchor, offsym), count, file=f)
142 elif etype == "PREAGG":
143 # Replace all symbols enclosed in ##
144 print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f)
145 else:
146 exit("ERROR: unhandled expression type:\n%s" % etype)