12 from types
import SimpleNamespace
13 from textwrap
import dedent
16 # 0. Prepare two BOLT build versions: base and compare.
17 # 1. Create the config by invoking this script with required options.
18 # Save the config as `llvm-bolt-wrapper.ini` next to the script or
19 # in the testing directory.
20 # In the base BOLT build directory:
21 # 2. Rename `llvm-bolt` to `llvm-bolt.real`
22 # 3. Create a symlink from this script to `llvm-bolt`
23 # 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below.
25 # This script will compare binaries produced by base and compare BOLT, and
26 # report elapsed processing time and max RSS.
28 # read options from config file llvm-bolt-wrapper.ini in script CWD
32 # base_bolt = /full/path/to/llvm-bolt.real
33 # cmp_bolt = /full/path/to/other/llvm-bolt
34 # # optional, default to False
41 # # optional, defaults to timing.log in CWD
42 # timing_file = timing1.log
45 src_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
46 cfg
= configparser
.ConfigParser(allow_no_value
= True)
47 cfgs
= cfg
.read("llvm-bolt-wrapper.ini")
49 cfgs
= cfg
.read(os
.path
.join(src_dir
, "llvm-bolt-wrapper.ini"))
50 assert cfgs
, f
"llvm-bolt-wrapper.ini is not found in {os.getcwd()}"
53 # if key is not present in config, assume False
54 if key
not in cfg
['config']:
56 # if key is present, but has no value, assume True
57 if not cfg
['config'][key
]:
59 # if key has associated value, interpret the value
60 return cfg
['config'].getboolean(key
)
63 # BOLT binary locations
64 'BASE_BOLT': cfg
['config']['base_bolt'],
65 'CMP_BOLT': cfg
['config']['cmp_bolt'],
67 'VERBOSE': get_cfg('verbose'),
68 'KEEP_TMP': get_cfg('keep_tmp'),
69 'NO_MINIMIZE': get_cfg('no_minimize'),
70 'RUN_SEQUENTIALLY': get_cfg('run_sequentially'),
71 'COMPARE_OUTPUT': get_cfg('compare_output'),
72 'SKIP_BINARY_CMP': get_cfg('skip_binary_cmp'),
73 'TIMING_FILE': cfg
['config'].get('timing_file', 'timing.log'),
76 print(f
"Using config {os.path.abspath(cfgs[0])}")
77 return SimpleNamespace(**d
)
80 PERF2BOLT_MODE
= ['-aggregate-only', '-ignore-build-id']
83 BOLTDIFF_MODE
= ['-diff-only', '-o', '/dev/null']
85 # options to suppress binary differences as much as possible
86 MINIMIZE_DIFFS
= ['-bolt-info=0']
88 # bolt output options that need to be intercepted
90 '-o': 'BOLT output binary',
91 '-w': 'BOLT recorded profile',
94 # regex patterns to exclude the line from log comparison
96 'BOLT-INFO: BOLT version',
99 r
'BOLT-INFO:.*data.*output data',
100 'WARNING: reading perf data directly',
103 def run_cmd(cmd
, out_f
, cfg
):
106 return subprocess
.Popen(cmd
, stdout
=out_f
, stderr
=subprocess
.STDOUT
)
108 def run_bolt(bolt_path
, bolt_args
, out_f
, cfg
):
109 p2b
= os
.path
.basename(sys
.argv
[0]) == 'perf2bolt' # perf2bolt mode
110 bd
= os
.path
.basename(sys
.argv
[0]) == 'llvm-boltdiff' # boltdiff mode
111 hm
= sys
.argv
[1] == 'heatmap' # heatmap mode
112 cmd
= ['/usr/bin/time', '-f', '%e %M', bolt_path
] + bolt_args
114 # -ignore-build-id can occur at most once, hence remove it from cmd
115 if '-ignore-build-id' in cmd
:
116 cmd
.remove('-ignore-build-id')
117 cmd
+= PERF2BOLT_MODE
120 elif not cfg
.NO_MINIMIZE
and not hm
:
121 cmd
+= MINIMIZE_DIFFS
122 return run_cmd(cmd
, out_f
, cfg
)
124 def prepend_dash(args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
126 Accepts parsed arguments and returns flat list with dash prepended to
128 Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp']
130 dashed
= [('-'+key
,value
) for (key
,value
) in args
.items()]
131 flattened
= list(sum(dashed
, ()))
134 def replace_cmp_path(tmp
: AnyStr
, args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
136 Keeps file names, but replaces the path to a temp folder.
137 Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp')
138 Except preserve /dev/null.
140 replace_path
= lambda x
: os
.path
.join(tmp
, os
.path
.basename(x
)) if x
!= '/dev/null' else '/dev/null'
141 new_args
= {key
: replace_path(value
) for key
, value
in args
.items()}
142 return prepend_dash(new_args
)
144 def preprocess_args(args
: argparse
.Namespace
) -> Mapping
[AnyStr
, AnyStr
]:
146 Drop options that weren't parsed (e.g. -w), convert to a dict
148 return {key
: value
for key
, value
in vars(args
).items() if value
}
150 def write_to(txt
, filename
, mode
='w'):
151 with
open(filename
, mode
) as f
:
154 def wait(proc
, fdesc
):
157 return open(fdesc
.name
)
159 def compare_logs(main
, cmp, skip_begin
=0, skip_end
=0, str_input
=True):
161 Compares logs but allows for certain lines to be excluded from comparison.
162 If str_input is True (default), the input it assumed to be a string,
163 which is split into lines. Otherwise the input is assumed to be a file.
164 Returns None on success, mismatch otherwise.
166 main_inp
= main
.splitlines() if str_input
else main
.readlines()
167 cmp_inp
= cmp.splitlines() if str_input
else cmp.readlines()
168 # rewind logs after consumption
172 for lhs
, rhs
in list(zip(main_inp
, cmp_inp
))[skip_begin
:-skip_end
or None]:
174 # check skip patterns
175 for skip
in SKIP_MATCH
:
176 # both lines must contain the pattern
177 if re
.search(skip
, lhs
) and re
.search(skip
, rhs
):
179 # otherwise return mismatching lines
184 def fmt_cmp(cmp_tuple
):
187 return f
'main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n'
189 def compare_with(lhs
, rhs
, cmd
, skip_begin
=0, skip_end
=0):
191 Runs cmd on both lhs and rhs and compares stdout.
192 Returns tuple (mismatch, lhs_stdout):
193 - if stdout matches between two files, mismatch is None,
194 - otherwise mismatch is a tuple of mismatching lines.
196 run
= lambda binary
: subprocess
.run(cmd
.split() + [binary
],
197 text
=True, check
=True,
198 capture_output
=True).stdout
201 cmp = compare_logs(run_lhs
, run_rhs
, skip_begin
, skip_end
)
204 def parse_cmp_offset(cmp_out
):
206 Extracts byte number from cmp output:
207 file1 file2 differ: byte X, line Y
209 return int(re
.search(r
'byte (\d+),', cmp_out
).groups()[0])
211 def report_real_time(binary
, main_err
, cmp_err
, cfg
):
213 Extracts real time from stderr and appends it to TIMING FILE it as csv:
214 "output binary; base bolt; cmp bolt"
216 def get_real_from_stderr(logline
):
217 return '; '.join(logline
.split())
218 for line
in main_err
:
220 main
= get_real_from_stderr(line
)
223 cmp = get_real_from_stderr(line
)
224 write_to(f
"{binary}; {main}; {cmp}\n", cfg
.TIMING_FILE
, 'a')
225 # rewind logs after consumption
229 def clean_exit(tmp
, out
, exitcode
, cfg
):
230 # temp files are only cleaned on success
234 # report stdout and stderr from the main process
235 shutil
.copyfileobj(out
, sys
.stdout
)
238 def find_section(offset
, readelf_hdr
):
239 hdr
= readelf_hdr
.split('\n')
241 # extract sections table (parse objdump -hw output)
242 for line
in hdr
[5:-1]:
243 cols
= line
.strip().split()
244 # extract section offset
245 file_offset
= int(cols
[5], 16)
247 size
= int(cols
[2], 16)
248 if offset
>= file_offset
and offset
<= file_offset
+ size
:
249 if sys
.stdout
.isatty(): # terminal supports colors
250 print(f
"\033[1m{line}\033[0m")
258 def main_config_generator():
259 parser
= argparse
.ArgumentParser()
260 parser
.add_argument('base_bolt', help='Full path to base llvm-bolt binary')
261 parser
.add_argument('cmp_bolt', help='Full path to cmp llvm-bolt binary')
262 parser
.add_argument('--verbose', action
='store_true',
263 help='Print subprocess invocation cmdline (default False)')
264 parser
.add_argument('--keep_tmp', action
='store_true',
265 help = 'Preserve tmp folder on a clean exit '
266 '(tmp directory is preserved on crash by default)')
267 parser
.add_argument('--no_minimize', action
='store_true',
268 help=f
'Do not add `{MINIMIZE_DIFFS}` that is used '
269 'by default to reduce binary differences')
270 parser
.add_argument('--run_sequentially', action
='store_true',
271 help='Run both binaries sequentially (default '
272 'in parallel). Use for timing comparison')
273 parser
.add_argument('--compare_output', action
='store_true',
274 help = 'Compare bolt stdout/stderr (disabled by default)')
275 parser
.add_argument('--skip_binary_cmp', action
='store_true',
276 help = 'Disable output comparison')
277 parser
.add_argument('--timing_file', help = 'Override path to timing log '
278 'file (default `timing.log` in CWD)')
279 args
= parser
.parse_args()
284 base_bolt = {args.base_bolt}
285 cmp_bolt = {args.cmp_bolt}'''))
291 for key
, value
in d
.items():
297 # intercept output arguments
298 parser
= argparse
.ArgumentParser(add_help
=False)
299 for option
, help in BOLT_OUTPUT_OPTS
.items():
300 parser
.add_argument(option
, help=help)
301 args
, unknownargs
= parser
.parse_known_args()
302 args
= preprocess_args(args
)
303 cmp_args
= copy
.deepcopy(args
)
304 tmp
= tempfile
.mkdtemp()
305 cmp_args
= replace_cmp_path(tmp
, cmp_args
)
307 # reconstruct output arguments: prepend dash
308 args
= prepend_dash(args
)
310 # run both BOLT binaries
311 main_f
= open(os
.path
.join(tmp
, 'main_bolt.stdout'), 'w')
312 cmp_f
= open(os
.path
.join(tmp
, 'cmp_bolt.stdout'), 'w')
313 main_bolt
= run_bolt(cfg
.BASE_BOLT
, unknownargs
+ args
, main_f
, cfg
)
314 if cfg
.RUN_SEQUENTIALLY
:
315 main_out
= wait(main_bolt
, main_f
)
316 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
318 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
319 main_out
= wait(main_bolt
, main_f
)
320 cmp_out
= wait(cmp_bolt
, cmp_f
)
323 if main_bolt
.returncode
!= cmp_bolt
.returncode
:
325 exit("exitcode mismatch")
327 # compare logs, skip_end=1 skips the line with time
328 out
= compare_logs(main_out
, cmp_out
, skip_end
=1, str_input
=False) if cfg
.COMPARE_OUTPUT
else None
332 write_to(fmt_cmp(out
), os
.path
.join(tmp
, 'summary.txt'))
333 exit("logs mismatch")
335 if os
.path
.basename(sys
.argv
[0]) == 'llvm-boltdiff': # boltdiff mode
336 # no output binary to compare, so just exit
337 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
339 # compare binaries (using cmp)
340 main_binary
= args
[args
.index('-o')+1]
341 cmp_binary
= cmp_args
[cmp_args
.index('-o')+1]
342 if main_binary
== '/dev/null':
343 assert cmp_binary
== '/dev/null'
344 cfg
.SKIP_BINARY_CMP
= True
346 # report binary timing as csv: output binary; base bolt real; cmp bolt real
347 report_real_time(main_binary
, main_out
, cmp_out
, cfg
)
349 # check if files exist
350 main_exists
= os
.path
.exists(main_binary
)
351 cmp_exists
= os
.path
.exists(cmp_binary
)
352 if main_exists
and cmp_exists
:
353 # proceed to comparison
355 elif not main_exists
and not cmp_exists
:
356 # both don't exist, assume it's intended, skip comparison
357 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
359 assert not cmp_exists
360 exit(f
"{cmp_binary} doesn't exist")
362 assert not main_exists
363 exit(f
"{main_binary} doesn't exist")
365 if not cfg
.SKIP_BINARY_CMP
:
366 cmp_proc
= subprocess
.run(['cmp', '-b', main_binary
, cmp_binary
],
367 capture_output
=True, text
=True)
368 if cmp_proc
.returncode
:
369 # check if output is an ELF file (magic bytes)
370 with
open(main_binary
, 'rb') as f
:
372 if magic
!= b
'\x7fELF':
373 exit("output mismatch")
374 # check if ELF headers match
375 mismatch
, _
= compare_with(main_binary
, cmp_binary
, 'readelf -We')
377 print(fmt_cmp(mismatch
))
378 write_to(fmt_cmp(mismatch
), os
.path
.join(tmp
, 'headers.txt'))
379 exit("headers mismatch")
380 # if headers match, compare sections (skip line with filename)
381 mismatch
, hdr
= compare_with(main_binary
, cmp_binary
, 'objdump -hw',
384 # check which section has the first mismatch
385 mismatch_offset
= parse_cmp_offset(cmp_proc
.stdout
)
386 section
= find_section(mismatch_offset
, hdr
)
387 exit(f
"binary mismatch @{hex(mismatch_offset)} ({section})")
389 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
391 if __name__
== "__main__":
392 # config generator mode if the script is launched as is
393 if os
.path
.basename(__file__
) == "llvm-bolt-wrapper.py":
394 main_config_generator()
396 # llvm-bolt interceptor mode otherwise