12 from types
import SimpleNamespace
13 from textwrap
import dedent
16 # 0. Prepare two BOLT build versions: base and compare.
17 # 1. Create the config by invoking this script with required options.
18 # Save the config as `llvm-bolt-wrapper.ini` next to the script or
19 # in the testing directory.
20 # In the base BOLT build directory:
21 # 2. Rename `llvm-bolt` to `llvm-bolt.real`
22 # 3. Create a symlink from this script to `llvm-bolt`
23 # 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below.
25 # This script will compare binaries produced by base and compare BOLT, and
26 # report elapsed processing time and max RSS.
28 # read options from config file llvm-bolt-wrapper.ini in script CWD
32 # base_bolt = /full/path/to/llvm-bolt.real
33 # cmp_bolt = /full/path/to/other/llvm-bolt
34 # # optional, default to False
41 # # optional, defaults to timing.log in CWD
42 # timing_file = timing1.log
46 src_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
47 cfg
= configparser
.ConfigParser(allow_no_value
=True)
48 cfgs
= cfg
.read("llvm-bolt-wrapper.ini")
50 cfgs
= cfg
.read(os
.path
.join(src_dir
, "llvm-bolt-wrapper.ini"))
51 assert cfgs
, f
"llvm-bolt-wrapper.ini is not found in {os.getcwd()}"
54 # if key is not present in config, assume False
55 if key
not in cfg
["config"]:
57 # if key is present, but has no value, assume True
58 if not cfg
["config"][key
]:
60 # if key has associated value, interpret the value
61 return cfg
["config"].getboolean(key
)
64 # BOLT binary locations
65 "BASE_BOLT": cfg
["config"]["base_bolt"],
66 "CMP_BOLT": cfg
["config"]["cmp_bolt"],
68 "VERBOSE": get_cfg("verbose"),
69 "KEEP_TMP": get_cfg("keep_tmp"),
70 "NO_MINIMIZE": get_cfg("no_minimize"),
71 "RUN_SEQUENTIALLY": get_cfg("run_sequentially"),
72 "COMPARE_OUTPUT": get_cfg("compare_output"),
73 "SKIP_BINARY_CMP": get_cfg("skip_binary_cmp"),
74 "TIMING_FILE": cfg
["config"].get("timing_file", "timing.log"),
77 print(f
"Using config {os.path.abspath(cfgs[0])}")
78 return SimpleNamespace(**d
)
82 PERF2BOLT_MODE
= ["-aggregate-only", "-ignore-build-id"]
85 BOLTDIFF_MODE
= ["-diff-only", "-o", "/dev/null"]
87 # options to suppress binary differences as much as possible
88 MINIMIZE_DIFFS
= ["-bolt-info=0"]
90 # bolt output options that need to be intercepted
92 "-o": "BOLT output binary",
93 "-w": "BOLT recorded profile",
96 # regex patterns to exclude the line from log comparison
98 "BOLT-INFO: BOLT version",
101 r
"BOLT-INFO:.*data.*output data",
102 "WARNING: reading perf data directly",
106 def run_cmd(cmd
, out_f
, cfg
):
109 return subprocess
.Popen(cmd
, stdout
=out_f
, stderr
=subprocess
.STDOUT
)
112 def run_bolt(bolt_path
, bolt_args
, out_f
, cfg
):
113 p2b
= os
.path
.basename(sys
.argv
[0]) == "perf2bolt" # perf2bolt mode
114 bd
= os
.path
.basename(sys
.argv
[0]) == "llvm-boltdiff" # boltdiff mode
115 cmd
= ["/usr/bin/time", "-f", "%e %M", bolt_path
] + bolt_args
117 # -ignore-build-id can occur at most once, hence remove it from cmd
118 if "-ignore-build-id" in cmd
:
119 cmd
.remove("-ignore-build-id")
120 cmd
+= PERF2BOLT_MODE
123 elif not cfg
.NO_MINIMIZE
:
124 cmd
+= MINIMIZE_DIFFS
125 return run_cmd(cmd
, out_f
, cfg
)
128 def prepend_dash(args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
130 Accepts parsed arguments and returns flat list with dash prepended to
132 Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp']
134 dashed
= [("-" + key
, value
) for (key
, value
) in args
.items()]
135 flattened
= list(sum(dashed
, ()))
139 def replace_cmp_path(tmp
: AnyStr
, args
: Mapping
[AnyStr
, AnyStr
]) -> Sequence
[AnyStr
]:
141 Keeps file names, but replaces the path to a temp folder.
142 Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp')
143 Except preserve /dev/null.
146 lambda x
: os
.path
.join(tmp
, os
.path
.basename(x
))
150 new_args
= {key
: replace_path(value
) for key
, value
in args
.items()}
151 return prepend_dash(new_args
)
154 def preprocess_args(args
: argparse
.Namespace
) -> Mapping
[AnyStr
, AnyStr
]:
156 Drop options that weren't parsed (e.g. -w), convert to a dict
158 return {key
: value
for key
, value
in vars(args
).items() if value
}
161 def write_to(txt
, filename
, mode
="w"):
162 with
open(filename
, mode
) as f
:
166 def wait(proc
, fdesc
):
169 return open(fdesc
.name
)
172 def compare_logs(main
, cmp, skip_begin
=0, skip_end
=0, str_input
=True):
174 Compares logs but allows for certain lines to be excluded from comparison.
175 If str_input is True (default), the input it assumed to be a string,
176 which is split into lines. Otherwise the input is assumed to be a file.
177 Returns None on success, mismatch otherwise.
179 main_inp
= main
.splitlines() if str_input
else main
.readlines()
180 cmp_inp
= cmp.splitlines() if str_input
else cmp.readlines()
181 # rewind logs after consumption
185 for lhs
, rhs
in list(zip(main_inp
, cmp_inp
))[skip_begin
: -skip_end
or None]:
187 # check skip patterns
188 for skip
in SKIP_MATCH
:
189 # both lines must contain the pattern
190 if re
.search(skip
, lhs
) and re
.search(skip
, rhs
):
192 # otherwise return mismatching lines
198 def fmt_cmp(cmp_tuple
):
201 return f
"main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n"
204 def compare_with(lhs
, rhs
, cmd
, skip_begin
=0, skip_end
=0):
206 Runs cmd on both lhs and rhs and compares stdout.
207 Returns tuple (mismatch, lhs_stdout):
208 - if stdout matches between two files, mismatch is None,
209 - otherwise mismatch is a tuple of mismatching lines.
211 run
= lambda binary
: subprocess
.run(
212 cmd
.split() + [binary
], text
=True, check
=True, capture_output
=True
216 cmp = compare_logs(run_lhs
, run_rhs
, skip_begin
, skip_end
)
220 def parse_cmp_offset(cmp_out
):
222 Extracts byte number from cmp output:
223 file1 file2 differ: byte X, line Y
225 # NOTE: cmp counts bytes starting from 1!
226 return int(re
.search(r
"byte (\d+),", cmp_out
).groups()[0]) - 1
229 def report_real_time(binary
, main_err
, cmp_err
, cfg
):
231 Extracts real time from stderr and appends it to TIMING FILE it as csv:
232 "output binary; base bolt; cmp bolt"
235 def get_real_from_stderr(logline
):
236 return "; ".join(logline
.split())
238 for line
in main_err
:
240 main
= get_real_from_stderr(line
)
243 cmp = get_real_from_stderr(line
)
244 write_to(f
"{binary}; {main}; {cmp}\n", cfg
.TIMING_FILE
, "a")
245 # rewind logs after consumption
250 def clean_exit(tmp
, out
, exitcode
, cfg
):
251 # temp files are only cleaned on success
255 # report stdout and stderr from the main process
256 shutil
.copyfileobj(out
, sys
.stdout
)
260 def find_section(offset
, readelf_hdr
):
261 hdr
= readelf_hdr
.split("\n")
263 # extract sections table (parse objdump -hw output)
264 for line
in hdr
[5:-1]:
265 cols
= line
.strip().split()
266 # extract section offset
267 file_offset
= int(cols
[5], 16)
269 size
= int(cols
[2], 16)
270 if offset
>= file_offset
and offset
< file_offset
+ size
:
271 if sys
.stdout
.isatty(): # terminal supports colors
272 print(f
"\033[1m{line}\033[0m")
281 def main_config_generator():
282 parser
= argparse
.ArgumentParser()
283 parser
.add_argument("base_bolt", help="Full path to base llvm-bolt binary")
284 parser
.add_argument("cmp_bolt", help="Full path to cmp llvm-bolt binary")
288 help="Print subprocess invocation cmdline (default False)",
293 help="Preserve tmp folder on a clean exit "
294 "(tmp directory is preserved on crash by default)",
299 help=f
"Do not add `{MINIMIZE_DIFFS}` that is used "
300 "by default to reduce binary differences",
303 "--run_sequentially",
305 help="Run both binaries sequentially (default "
306 "in parallel). Use for timing comparison",
311 help="Compare bolt stdout/stderr (disabled by default)",
314 "--skip_binary_cmp", action
="store_true", help="Disable output comparison"
318 help="Override path to timing log " "file (default `timing.log` in CWD)",
320 args
= parser
.parse_args()
327 base_bolt = {args.base_bolt}
328 cmp_bolt = {args.cmp_bolt}"""
336 for key
, value
in d
.items():
343 # intercept output arguments
344 parser
= argparse
.ArgumentParser(add_help
=False)
345 for option
, help in BOLT_OUTPUT_OPTS
.items():
346 parser
.add_argument(option
, help=help)
347 args
, unknownargs
= parser
.parse_known_args()
348 args
= preprocess_args(args
)
349 cmp_args
= copy
.deepcopy(args
)
350 tmp
= tempfile
.mkdtemp()
351 cmp_args
= replace_cmp_path(tmp
, cmp_args
)
353 # reconstruct output arguments: prepend dash
354 args
= prepend_dash(args
)
356 # run both BOLT binaries
357 main_f
= open(os
.path
.join(tmp
, "main_bolt.stdout"), "w")
358 cmp_f
= open(os
.path
.join(tmp
, "cmp_bolt.stdout"), "w")
359 main_bolt
= run_bolt(cfg
.BASE_BOLT
, unknownargs
+ args
, main_f
, cfg
)
360 if cfg
.RUN_SEQUENTIALLY
:
361 main_out
= wait(main_bolt
, main_f
)
362 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
364 cmp_bolt
= run_bolt(cfg
.CMP_BOLT
, unknownargs
+ cmp_args
, cmp_f
, cfg
)
365 main_out
= wait(main_bolt
, main_f
)
366 cmp_out
= wait(cmp_bolt
, cmp_f
)
369 if main_bolt
.returncode
!= cmp_bolt
.returncode
:
371 exit("exitcode mismatch")
373 # don't compare output upon unsuccessful exit
374 if main_bolt
.returncode
!= 0:
375 cfg
.SKIP_BINARY_CMP
= True
377 # compare logs, skip_end=1 skips the line with time
379 compare_logs(main_out
, cmp_out
, skip_end
=1, str_input
=False)
380 if cfg
.COMPARE_OUTPUT
386 write_to(fmt_cmp(out
), os
.path
.join(tmp
, "summary.txt"))
387 exit("logs mismatch")
389 if os
.path
.basename(sys
.argv
[0]) == "llvm-boltdiff": # boltdiff mode
390 # no output binary to compare, so just exit
391 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
393 # compare binaries (using cmp)
394 main_binary
= args
[args
.index("-o") + 1]
395 cmp_binary
= cmp_args
[cmp_args
.index("-o") + 1]
396 if main_binary
== "/dev/null":
397 assert cmp_binary
== "/dev/null"
398 cfg
.SKIP_BINARY_CMP
= True
400 # report binary timing as csv: output binary; base bolt real; cmp bolt real
401 report_real_time(main_binary
, main_out
, cmp_out
, cfg
)
403 if not cfg
.SKIP_BINARY_CMP
:
404 # check if files exist
405 main_exists
= os
.path
.exists(main_binary
)
406 cmp_exists
= os
.path
.exists(cmp_binary
)
407 if main_exists
and cmp_exists
:
408 # proceed to comparison
410 elif not main_exists
and not cmp_exists
:
411 # both don't exist, assume it's intended, skip comparison
412 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
414 assert not cmp_exists
415 exit(f
"{cmp_binary} doesn't exist")
417 assert not main_exists
418 exit(f
"{main_binary} doesn't exist")
420 cmp_proc
= subprocess
.run(
421 ["cmp", "-b", main_binary
, cmp_binary
], capture_output
=True, text
=True
423 if cmp_proc
.returncode
:
424 # check if output is an ELF file (magic bytes)
425 with
open(main_binary
, "rb") as f
:
427 if magic
!= b
"\x7fELF":
428 exit("output mismatch")
429 # check if ELF headers match
430 mismatch
, _
= compare_with(main_binary
, cmp_binary
, "readelf -We")
432 print(fmt_cmp(mismatch
))
433 write_to(fmt_cmp(mismatch
), os
.path
.join(tmp
, "headers.txt"))
434 exit("headers mismatch")
435 # if headers match, compare sections (skip line with filename)
436 mismatch
, hdr
= compare_with(
437 main_binary
, cmp_binary
, "objdump -hw", skip_begin
=2
440 # check which section has the first mismatch
441 mismatch_offset
= parse_cmp_offset(cmp_proc
.stdout
)
442 section
= find_section(mismatch_offset
, hdr
)
443 exit(f
"binary mismatch @{hex(mismatch_offset)} ({section})")
445 clean_exit(tmp
, main_out
, main_bolt
.returncode
, cfg
)
448 if __name__
== "__main__":
449 # config generator mode if the script is launched as is
450 if os
.path
.basename(__file__
) == "llvm-bolt-wrapper.py":
451 main_config_generator()
453 # llvm-bolt interceptor mode otherwise