12 from types
import SimpleNamespace
13 from textwrap
import dedent
16 # 0. Prepare two BOLT build versions: base and compare.
17 # 1. Create the config by invoking this script with required options.
18 # Save the config as `llvm-bolt-wrapper.ini` next to the script or
19 # in the testing directory.
20 # In the base BOLT build directory:
21 # 2. Rename `llvm-bolt` to `llvm-bolt.real`
22 # 3. Create a symlink from this script to `llvm-bolt`
23 # 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below.
25 # This script will compare binaries produced by base and compare BOLT, and
26 # report elapsed processing time and max RSS.
28 # read options from config file llvm-bolt-wrapper.ini in script CWD
32 # base_bolt = /full/path/to/llvm-bolt.real
33 # cmp_bolt = /full/path/to/other/llvm-bolt
34 # # optional, default to False
41 # # optional, defaults to timing.log in CWD
42 # timing_file = timing1.log
46 src_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
47 cfg
= configparser
.ConfigParser(allow_no_value
=True)
48 cfgs
= cfg
.read("llvm-bolt-wrapper.ini")
50 cfgs
= cfg
.read(os
.path
.join(src_dir
, "llvm-bolt-wrapper.ini"))
51 assert cfgs
, f
"llvm-bolt-wrapper.ini is not found in {os.getcwd()}"
54 # if key is not present in config, assume False
55 if key
not in cfg
["config"]:
57 # if key is present, but has no value, assume True
58 if not cfg
["config"][key
]:
60 # if key has associated value, interpret the value
61 return cfg
["config"].getboolean(key
)
64 # BOLT binary locations
65 "BASE_BOLT": cfg
["config"]["base_bolt"],
66 "CMP_BOLT": cfg
["config"]["cmp_bolt"],
68 "VERBOSE": get_cfg("verbose"),
69 "KEEP_TMP": get_cfg("keep_tmp"),
70 "NO_MINIMIZE": get_cfg("no_minimize"),
71 "RUN_SEQUENTIALLY": get_cfg("run_sequentially"),
72 "COMPARE_OUTPUT": get_cfg("compare_output"),
73 "SKIP_BINARY_CMP": get_cfg("skip_binary_cmp"),
74 "TIMING_FILE": cfg
["config"].get("timing_file", "timing.log"),
77 print(f
"Using config {os.path.abspath(cfgs[0])}")
78 return SimpleNamespace(**d
)
82 PERF2BOLT_MODE
= ["-aggregate-only", "-ignore-build-id"]
85 BOLTDIFF_MODE
= ["-diff-only", "-o", "/dev/null"]
87 # options to suppress binary differences as much as possible
88 MINIMIZE_DIFFS
= ["-bolt-info=0"]
90 # bolt output options that need to be intercepted
92 "-o": "BOLT output binary",
93 "-w": "BOLT recorded profile",
96 # regex patterns to exclude the line from log comparison
98 "BOLT-INFO: BOLT version",
101 r
"BOLT-INFO:.*data.*output data",
102 "WARNING: reading perf data directly",
106 def run_cmd(cmd
, out_f
, cfg
):
109 return subprocess
.Popen(cmd
, stdout
=out_f
, stderr
=subprocess
.STDOUT
)
112 def run_bolt(bolt_path
, bolt_args
, out_f
, cfg
):
113 p2b
= os
.path
.basename(sys
.argv
[0]) == "perf2bolt" # perf2bolt mode
114 bd
= os
.path
.basename(sys
.argv
[0]) == "llvm-boltdiff" # boltdiff mode
115 hm
= sys
.argv
[1] == "heatmap" # heatmap mode
116 cmd
= ["/usr/bin/time", "-f", "%e %M", bolt_path
] + bolt_args
118 # -ignore-build-id can occur at most once, hence remove it from cmd
119 if "-ignore-build-id" in cmd
:
120 cmd
.remove("-ignore-build-id")
121 cmd
+= PERF2BOLT_MODE
124 elif not cfg
.NO_MINIMIZE
and not hm
:
125 cmd
+= MINIMIZE_DIFFS
126 return run_cmd(cmd
, out_f
, cfg
)
129 def prepend_dash(args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
131 Accepts parsed arguments and returns flat list with dash prepended to
133 Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp']
135 dashed
= [("-" + key
, value
) for (key
, value
) in args
.items()]
136 flattened
= list(sum(dashed
, ()))
140 def replace_cmp_path(tmp
: AnyStr
, args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
142 Keeps file names, but replaces the path to a temp folder.
143 Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp')
144 Except preserve /dev/null.
147 lambda x
: os
.path
.join(tmp
, os
.path
.basename(x
))
151 new_args
= {key
: replace_path(value
) for key
, value
in args
.items()}
152 return prepend_dash(new_args
)
155 def preprocess_args(args
: argparse
.Namespace
) -> Mapping
[AnyStr
, AnyStr
]:
157 Drop options that weren't parsed (e.g. -w), convert to a dict
159 return {key
: value
for key
, value
in vars(args
).items() if value
}
162 def write_to(txt
, filename
, mode
="w"):
163 with
open(filename
, mode
) as f
:
167 def wait(proc
, fdesc
):
170 return open(fdesc
.name
)
173 def compare_logs(main
, cmp, skip_begin
=0, skip_end
=0, str_input
=True):
175 Compares logs but allows for certain lines to be excluded from comparison.
176 If str_input is True (default), the input it assumed to be a string,
177 which is split into lines. Otherwise the input is assumed to be a file.
178 Returns None on success, mismatch otherwise.
180 main_inp
= main
.splitlines() if str_input
else main
.readlines()
181 cmp_inp
= cmp.splitlines() if str_input
else cmp.readlines()
182 # rewind logs after consumption
186 for lhs
, rhs
in list(zip(main_inp
, cmp_inp
))[skip_begin
: -skip_end
or None]:
188 # check skip patterns
189 for skip
in SKIP_MATCH
:
190 # both lines must contain the pattern
191 if re
.search(skip
, lhs
) and re
.search(skip
, rhs
):
193 # otherwise return mismatching lines
199 def fmt_cmp(cmp_tuple
):
202 return f
"main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n"
205 def compare_with(lhs
, rhs
, cmd
, skip_begin
=0, skip_end
=0):
207 Runs cmd on both lhs and rhs and compares stdout.
208 Returns tuple (mismatch, lhs_stdout):
209 - if stdout matches between two files, mismatch is None,
210 - otherwise mismatch is a tuple of mismatching lines.
212 run
= lambda binary
: subprocess
.run(
213 cmd
.split() + [binary
], text
=True, check
=True, capture_output
=True
217 cmp = compare_logs(run_lhs
, run_rhs
, skip_begin
, skip_end
)
221 def parse_cmp_offset(cmp_out
):
223 Extracts byte number from cmp output:
224 file1 file2 differ: byte X, line Y
226 # NOTE: cmp counts bytes starting from 1!
227 return int(re
.search(r
"byte (\d+),", cmp_out
).groups()[0]) - 1
230 def report_real_time(binary
, main_err
, cmp_err
, cfg
):
232 Extracts real time from stderr and appends it to TIMING FILE it as csv:
233 "output binary; base bolt; cmp bolt"
236 def get_real_from_stderr(logline
):
237 return "; ".join(logline
.split())
239 for line
in main_err
:
241 main
= get_real_from_stderr(line
)
244 cmp = get_real_from_stderr(line
)
245 write_to(f
"{binary}; {main}; {cmp}\n", cfg
.TIMING_FILE
, "a")
246 # rewind logs after consumption
251 def clean_exit(tmp
, out
, exitcode
, cfg
):
252 # temp files are only cleaned on success
256 # report stdout and stderr from the main process
257 shutil
.copyfileobj(out
, sys
.stdout
)
261 def find_section(offset
, readelf_hdr
):
262 hdr
= readelf_hdr
.split("\n")
264 # extract sections table (parse objdump -hw output)
265 for line
in hdr
[5:-1]:
266 cols
= line
.strip().split()
267 # extract section offset
268 file_offset
= int(cols
[5], 16)
270 size
= int(cols
[2], 16)
271 if offset
>= file_offset
and offset
< file_offset
+ size
:
272 if sys
.stdout
.isatty(): # terminal supports colors
273 print(f
"\033[1m{line}\033[0m")
282 def main_config_generator():
283 parser
= argparse
.ArgumentParser()
284 parser
.add_argument("base_bolt", help="Full path to base llvm-bolt binary")
285 parser
.add_argument("cmp_bolt", help="Full path to cmp llvm-bolt binary")
289 help="Print subprocess invocation cmdline (default False)",
294 help="Preserve tmp folder on a clean exit "
295 "(tmp directory is preserved on crash by default)",
300 help=f
"Do not add `{MINIMIZE_DIFFS}` that is used "
301 "by default to reduce binary differences",
304 "--run_sequentially",
306 help="Run both binaries sequentially (default "
307 "in parallel). Use for timing comparison",
312 help="Compare bolt stdout/stderr (disabled by default)",
315 "--skip_binary_cmp", action
="store_true", help="Disable output comparison"
319 help="Override path to timing log " "file (default `timing.log` in CWD)",
321 args
= parser
.parse_args()
328 base_bolt = {args.base_bolt}
329 cmp_bolt = {args.cmp_bolt}"""
337 for key
, value
in d
.items():
344 # intercept output arguments
345 parser
= argparse
.ArgumentParser(add_help
=False)
346 for option
, help in BOLT_OUTPUT_OPTS
.items():
347 parser
.add_argument(option
, help=help)
348 args
, unknownargs
= parser
.parse_known_args()
349 args
= preprocess_args(args
)
350 cmp_args
= copy
.deepcopy(args
)
351 tmp
= tempfile
.mkdtemp()
352 cmp_args
= replace_cmp_path(tmp
, cmp_args
)
354 # reconstruct output arguments: prepend dash
355 args
= prepend_dash(args
)
357 # run both BOLT binaries
358 main_f
= open(os
.path
.join(tmp
, "main_bolt.stdout"), "w")
359 cmp_f
= open(os
.path
.join(tmp
, "cmp_bolt.stdout"), "w")
360 main_bolt
= run_bolt(cfg
.BASE_BOLT
, unknownargs
+ args
, main_f
, cfg
)
361 if cfg
.RUN_SEQUENTIALLY
:
362 main_out
= wait(main_bolt
, main_f
)
363 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
365 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
366 main_out
= wait(main_bolt
, main_f
)
367 cmp_out
= wait(cmp_bolt
, cmp_f
)
370 if main_bolt
.returncode
!= cmp_bolt
.returncode
:
372 exit("exitcode mismatch")
374 # don't compare output upon unsuccessful exit
375 if main_bolt
.returncode
!= 0:
376 cfg
.SKIP_BINARY_CMP
= True
378 # compare logs, skip_end=1 skips the line with time
380 compare_logs(main_out
, cmp_out
, skip_end
=1, str_input
=False)
381 if cfg
.COMPARE_OUTPUT
387 write_to(fmt_cmp(out
), os
.path
.join(tmp
, "summary.txt"))
388 exit("logs mismatch")
390 if os
.path
.basename(sys
.argv
[0]) == "llvm-boltdiff": # boltdiff mode
391 # no output binary to compare, so just exit
392 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
394 # compare binaries (using cmp)
395 main_binary
= args
[args
.index("-o") + 1]
396 cmp_binary
= cmp_args
[cmp_args
.index("-o") + 1]
397 if main_binary
== "/dev/null":
398 assert cmp_binary
== "/dev/null"
399 cfg
.SKIP_BINARY_CMP
= True
401 # report binary timing as csv: output binary; base bolt real; cmp bolt real
402 report_real_time(main_binary
, main_out
, cmp_out
, cfg
)
404 if not cfg
.SKIP_BINARY_CMP
:
405 # check if files exist
406 main_exists
= os
.path
.exists(main_binary
)
407 cmp_exists
= os
.path
.exists(cmp_binary
)
408 if main_exists
and cmp_exists
:
409 # proceed to comparison
411 elif not main_exists
and not cmp_exists
:
412 # both don't exist, assume it's intended, skip comparison
413 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
415 assert not cmp_exists
416 exit(f
"{cmp_binary} doesn't exist")
418 assert not main_exists
419 exit(f
"{main_binary} doesn't exist")
421 cmp_proc
= subprocess
.run(
422 ["cmp", "-b", main_binary
, cmp_binary
], capture_output
=True, text
=True
424 if cmp_proc
.returncode
:
425 # check if output is an ELF file (magic bytes)
426 with
open(main_binary
, "rb") as f
:
428 if magic
!= b
"\x7fELF":
429 exit("output mismatch")
430 # check if ELF headers match
431 mismatch
, _
= compare_with(main_binary
, cmp_binary
, "readelf -We")
433 print(fmt_cmp(mismatch
))
434 write_to(fmt_cmp(mismatch
), os
.path
.join(tmp
, "headers.txt"))
435 exit("headers mismatch")
436 # if headers match, compare sections (skip line with filename)
437 mismatch
, hdr
= compare_with(
438 main_binary
, cmp_binary
, "objdump -hw", skip_begin
=2
441 # check which section has the first mismatch
442 mismatch_offset
= parse_cmp_offset(cmp_proc
.stdout
)
443 section
= find_section(mismatch_offset
, hdr
)
444 exit(f
"binary mismatch @{hex(mismatch_offset)} ({section})")
446 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
449 if __name__
== "__main__":
450 # config generator mode if the script is launched as is
451 if os
.path
.basename(__file__
) == "llvm-bolt-wrapper.py":
452 main_config_generator()
454 # llvm-bolt interceptor mode otherwise