4 This script reads the input from stdin, extracts all lines starting with
5 "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
6 replaces symbol names ("#name#") with either symbol values or with offsets from
7 respective anchor symbols, and prints the resulting file to stdout.
15 parser
= argparse
.ArgumentParser()
16 parser
.add_argument("input")
17 parser
.add_argument("objfile", help="Object file to extract symbol values from")
18 parser
.add_argument("output")
19 parser
.add_argument("prefix", nargs
="?", default
="FDATA", help="Custom FDATA prefix")
20 parser
.add_argument("--nmtool", default
="nm", help="Path to nm tool")
21 parser
.add_argument("--no-lbr", action
="store_true")
23 args
= parser
.parse_args()
25 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
27 prefix_pat
= re
.compile(f
"^# {args.prefix}: (.*)")
30 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
31 # <is symbol?> <closest elf symbol or DSO name> <relative TO address>
32 # <number of mispredictions> <number of branches>
33 fdata_pat
= re
.compile(r
"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
35 # Pre-aggregated profile:
36 # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
38 preagg_pat
= re
.compile(r
"(?P<type>[BFf]) (?P<offsets_count>.*)")
41 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
42 nolbr_pat
= re
.compile(r
"([01].*) (?P<count>\d+)")
44 # Replacement symbol: #symname#
45 replace_pat
= re
.compile(r
"#(?P<symname>[^#]+)#")
47 # Read input and construct the representation of fdata expressions
48 # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
49 # are represented as (is_sym, anchor, offset) tuples
51 with
open(args
.input, "r") as f
:
52 for line
in f
.readlines():
53 prefix_match
= prefix_pat
.match(line
)
56 profile_line
= prefix_match
.group(1)
57 fdata_match
= fdata_pat
.match(profile_line
)
58 preagg_match
= preagg_pat
.match(profile_line
)
59 nolbr_match
= nolbr_pat
.match(profile_line
)
61 src_dst
, execnt
, mispred
= fdata_match
.groups()
62 # Split by whitespaces not preceded by a backslash (negative lookbehind)
63 chunks
= re
.split(r
"(?<!\\) +", src_dst
)
64 # Check if the number of records separated by non-escaped whitespace
65 # exactly matches the format.
68 ), f
"ERROR: wrong format/whitespaces must be escaped:\n{line}"
69 exprs
.append(("FDATA", (*chunks
, execnt
, mispred
)))
71 loc
, count
= nolbr_match
.groups()
72 # Split by whitespaces not preceded by a backslash (negative lookbehind)
73 chunks
= re
.split(r
"(?<!\\) +", loc
)
74 # Check if the number of records separated by non-escaped whitespace
75 # exactly matches the format.
78 ), f
"ERROR: wrong format/whitespaces must be escaped:\n{line}"
79 exprs
.append(("NOLBR", (*chunks
, count
)))
81 exprs
.append(("PREAGG", preagg_match
.groups()))
83 exit("ERROR: unexpected input:\n%s" % line
)
85 # Read nm output: <symbol value> <symbol type> <symbol name>
86 nm_output
= subprocess
.run(
87 [args
.nmtool
, "--defined-only", args
.objfile
], text
=True, capture_output
=True
91 for symline
in nm_output
.splitlines():
92 symval
, _
, symname
= symline
.split(maxsplit
=2)
93 symbols
[symname
] = symval
96 def evaluate_symbol(issym
, anchor
, offsym
):
97 sym_match
= replace_pat
.match(offsym
)
99 # No need to evaluate symbol value, return as is
100 return f
"{issym} {anchor} {offsym}"
101 symname
= sym_match
.group("symname")
102 assert symname
in symbols
, f
"ERROR: symbol {symname} is not defined in binary"
103 # Evaluate to an absolute offset if issym is false
105 return f
"{issym} {anchor} {symbols[symname]}"
106 # Evaluate symbol against its anchor if issym is true
107 assert anchor
in symbols
, f
"ERROR: symbol {anchor} is not defined in binary"
108 anchor_value
= int(symbols
[anchor
], 16)
109 symbol_value
= int(symbols
[symname
], 16)
110 sym_offset
= symbol_value
- anchor_value
111 return f
'{issym} {anchor} {format(sym_offset, "x")}'
114 def replace_symbol(matchobj
):
116 Expects matchobj to only capture one group which contains the symbol name.
118 symname
= matchobj
.group("symname")
119 assert symname
in symbols
, f
"ERROR: symbol {symname} is not defined in binary"
120 return symbols
[symname
]
123 with
open(args
.output
, "w", newline
="\n") as f
:
125 print("no_lbr", file=f
)
126 for etype
, expr
in exprs
:
128 issym1
, anchor1
, offsym1
, issym2
, anchor2
, offsym2
, execnt
, mispred
= expr
130 evaluate_symbol(issym1
, anchor1
, offsym1
),
131 evaluate_symbol(issym2
, anchor2
, offsym2
),
136 elif etype
== "NOLBR":
137 issym
, anchor
, offsym
, count
= expr
138 print(evaluate_symbol(issym
, anchor
, offsym
), count
, file=f
)
139 elif etype
== "PREAGG":
140 # Replace all symbols enclosed in ##
141 print(expr
[0], re
.sub(replace_pat
, replace_symbol
, expr
[1]), file=f
)
143 exit("ERROR: unhandled expression type:\n%s" % etype
)