4 This script reads the input from stdin, extracts all lines starting with
5 "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
6 replaces symbol names ("#name#") with either symbol values or with offsets from
7 respective anchor symbols, and prints the resulting file to stdout.
15 parser
= argparse
.ArgumentParser()
16 parser
.add_argument("input")
17 parser
.add_argument("objfile", help="Object file to extract symbol values from")
18 parser
.add_argument("output")
19 parser
.add_argument("prefix", nargs
="?", default
="FDATA", help="Custom FDATA prefix")
20 parser
.add_argument("--nmtool", default
="nm", help="Path to nm tool")
21 parser
.add_argument("--no-lbr", action
="store_true")
22 parser
.add_argument("--no-redefine", action
="store_true")
24 args
= parser
.parse_args()
26 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
28 prefix_pat
= re
.compile(f
"^# {args.prefix}: (.*)")
31 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
32 # <is symbol?> <closest elf symbol or DSO name> <relative TO address>
33 # <number of mispredictions> <number of branches>
34 fdata_pat
= re
.compile(r
"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
36 # Pre-aggregated profile:
37 # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
39 preagg_pat
= re
.compile(r
"(?P<type>[BFf]) (?P<offsets_count>.*)")
42 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
43 nolbr_pat
= re
.compile(r
"([01].*) (?P<count>\d+)")
45 # Replacement symbol: #symname#
46 replace_pat
= re
.compile(r
"#(?P<symname>[^#]+)#")
48 # Read input and construct the representation of fdata expressions
49 # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
50 # are represented as (is_sym, anchor, offset) tuples
52 with
open(args
.input, "r") as f
:
53 for line
in f
.readlines():
54 prefix_match
= prefix_pat
.match(line
)
57 profile_line
= prefix_match
.group(1)
58 fdata_match
= fdata_pat
.match(profile_line
)
59 preagg_match
= preagg_pat
.match(profile_line
)
60 nolbr_match
= nolbr_pat
.match(profile_line
)
62 src_dst
, execnt
, mispred
= fdata_match
.groups()
63 # Split by whitespaces not preceded by a backslash (negative lookbehind)
64 chunks
= re
.split(r
"(?<!\\) +", src_dst
)
65 # Check if the number of records separated by non-escaped whitespace
66 # exactly matches the format.
69 ), f
"ERROR: wrong format/whitespaces must be escaped:\n{line}"
70 exprs
.append(("FDATA", (*chunks
, execnt
, mispred
)))
72 loc
, count
= nolbr_match
.groups()
73 # Split by whitespaces not preceded by a backslash (negative lookbehind)
74 chunks
= re
.split(r
"(?<!\\) +", loc
)
75 # Check if the number of records separated by non-escaped whitespace
76 # exactly matches the format.
79 ), f
"ERROR: wrong format/whitespaces must be escaped:\n{line}"
80 exprs
.append(("NOLBR", (*chunks
, count
)))
82 exprs
.append(("PREAGG", preagg_match
.groups()))
84 exit("ERROR: unexpected input:\n%s" % line
)
86 # Read nm output: <symbol value> <symbol type> <symbol name>
87 nm_output
= subprocess
.run(
88 [args
.nmtool
, "--defined-only", args
.objfile
], text
=True, capture_output
=True
92 for symline
in nm_output
.splitlines():
93 symval
, _
, symname
= symline
.split(maxsplit
=2)
94 if symname
in symbols
and args
.no_redefine
:
96 symbols
[symname
] = symval
99 def evaluate_symbol(issym
, anchor
, offsym
):
100 sym_match
= replace_pat
.match(offsym
)
102 # No need to evaluate symbol value, return as is
103 return f
"{issym} {anchor} {offsym}"
104 symname
= sym_match
.group("symname")
105 assert symname
in symbols
, f
"ERROR: symbol {symname} is not defined in binary"
106 # Evaluate to an absolute offset if issym is false
108 return f
"{issym} {anchor} {symbols[symname]}"
109 # Evaluate symbol against its anchor if issym is true
110 assert anchor
in symbols
, f
"ERROR: symbol {anchor} is not defined in binary"
111 anchor_value
= int(symbols
[anchor
], 16)
112 symbol_value
= int(symbols
[symname
], 16)
113 sym_offset
= symbol_value
- anchor_value
114 return f
'{issym} {anchor} {format(sym_offset, "x")}'
117 def replace_symbol(matchobj
):
119 Expects matchobj to only capture one group which contains the symbol name.
121 symname
= matchobj
.group("symname")
122 assert symname
in symbols
, f
"ERROR: symbol {symname} is not defined in binary"
123 return symbols
[symname
]
126 with
open(args
.output
, "w", newline
="\n") as f
:
128 print("no_lbr", file=f
)
129 for etype
, expr
in exprs
:
131 issym1
, anchor1
, offsym1
, issym2
, anchor2
, offsym2
, execnt
, mispred
= expr
133 evaluate_symbol(issym1
, anchor1
, offsym1
),
134 evaluate_symbol(issym2
, anchor2
, offsym2
),
139 elif etype
== "NOLBR":
140 issym
, anchor
, offsym
, count
= expr
141 print(evaluate_symbol(issym
, anchor
, offsym
), count
, file=f
)
142 elif etype
== "PREAGG":
143 # Replace all symbols enclosed in ##
144 print(expr
[0], re
.sub(replace_pat
, replace_symbol
, expr
[1]), file=f
)
146 exit("ERROR: unhandled expression type:\n%s" % etype
)