llvm/utils/abtest.py

   1 #!/usr/bin/env python
   2 #
   3 # Given a previous good compile narrow down miscompiles.
   4 # Expects two directories named "before" and "after" each containing a set of
   5 # assembly or object files where the "after" version is assumed to be broken.
   6 # You also have to provide a script called "link_test". It is called with a
   7 # list of files which should be linked together and result tested. "link_test"
   8 # should returns with exitcode 0 if the linking and testing succeeded.
   9 #
  10 # If a response file is provided, only the object files that are listed in the
  11 # file are inspected. In addition, the "link_test" is called with a temporary
  12 # response file representing one iteration of bisection.
  13 #
  14 # abtest.py operates by taking all files from the "before" directory and
  15 # in each step replacing one of them with a file from the "bad" directory.
  16 #
  17 # Additionally you can perform the same steps with a single .s file. In this
  18 # mode functions are identified by " -- Begin function FunctionName" and
  19 # " -- End function" markers. The abtest.py then takes all
  20 # function from the file in the "before" directory and replaces one function
  21 # with the corresponding function from the "bad" file in each step.
  22 #
  23 # Example usage to identify miscompiled files:
  24 #    1. Create a link_test script, make it executable. Simple Example:
  25 #          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
  26 #    2. Run the script to figure out which files are miscompiled:
  27 #       > ./abtest.py
  28 #       somefile.s: ok
  29 #       someotherfile.s: skipped: same content
  30 #       anotherfile.s: failed: './link_test' exitcode != 0
  31 #       ...
  32 # Example usage to identify miscompiled functions inside a file:
  33 #    3. Run the tests on a single file (assuming before/file.s and
  34 #       after/file.s exist)
  35 #       > ./abtest.py file.s
  36 #       funcname1 [0/XX]: ok
  37 #       funcname2 [1/XX]: ok
  38 #       funcname3 [2/XX]: skipped: same content
  39 #       funcname4 [3/XX]: failed: './link_test' exitcode != 0
  40 #       ...
  41 from fnmatch import filter
  42 from sys import stderr
  43 import argparse
  44 import filecmp
  45 import os
  46 import subprocess
  47 import sys
  48 import tempfile
  49
  50 # Specify LINKTEST via `--test`. Default value is './link_test'.
  51 LINKTEST = ""
  52 ESCAPE = "\033[%sm"
  53 BOLD = ESCAPE % "1"
  54 RED = ESCAPE % "31"
  55 NORMAL = ESCAPE % "0"
  56 FAILED = RED + "failed" + NORMAL
  57
  58
  59 def find(dir, file_filter=None):
  60     files = [walkdir[0] + "/" + file for walkdir in os.walk(dir) for file in walkdir[2]]
  61     if file_filter is not None:
  62         files = filter(files, file_filter)
  63     return sorted(files)
  64
  65
  66 def error(message):
  67     stderr.write("Error: %s\n" % (message,))
  68
  69
  70 def warn(message):
  71     stderr.write("Warning: %s\n" % (message,))
  72
  73
  74 def info(message):
  75     stderr.write("Info: %s\n" % (message,))
  76
  77
  78 def announce_test(name):
  79     stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
  80     stderr.flush()
  81
  82
  83 def announce_result(result):
  84     stderr.write(result)
  85     stderr.write("\n")
  86     stderr.flush()
  87
  88
  89 def format_namelist(l):
  90     result = ", ".join(l[0:3])
  91     if len(l) > 3:
  92         result += "... (%d total)" % len(l)
  93     return result
  94
  95
  96 def check_sanity(choices, perform_test):
  97     announce_test("sanity check A")
  98     all_a = {name: a_b[0] for name, a_b in choices}
  99     res_a = perform_test(all_a)
 100     if res_a is not True:
 101         error("Picking all choices from A failed to pass the test")
 102         sys.exit(1)
 103
 104     announce_test("sanity check B (expecting failure)")
 105     all_b = {name: a_b[1] for name, a_b in choices}
 106     res_b = perform_test(all_b)
 107     if res_b is not False:
 108         error("Picking all choices from B did unexpectedly pass the test")
 109         sys.exit(1)
 110
 111
 112 def check_sequentially(choices, perform_test):
 113     known_good = set()
 114     all_a = {name: a_b[0] for name, a_b in choices}
 115     n = 1
 116     for name, a_b in sorted(choices):
 117         picks = dict(all_a)
 118         picks[name] = a_b[1]
 119         announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
 120         n += 1
 121         res = perform_test(picks)
 122         if res is True:
 123             known_good.add(name)
 124     return known_good
 125
 126
 127 def check_bisect(choices, perform_test):
 128     known_good = set()
 129     if len(choices) == 0:
 130         return known_good
 131
 132     choice_map = dict(choices)
 133     all_a = {name: a_b[0] for name, a_b in choices}
 134
 135     def test_partition(partition, upcoming_partition):
 136         # Compute the maximum number of checks we have to do in the worst case.
 137         max_remaining_steps = len(partition) * 2 - 1
 138         if upcoming_partition is not None:
 139             max_remaining_steps += len(upcoming_partition) * 2 - 1
 140         for x in partitions_to_split:
 141             max_remaining_steps += (len(x) - 1) * 2
 142
 143         picks = dict(all_a)
 144         for x in partition:
 145             picks[x] = choice_map[x][1]
 146         announce_test(
 147             "checking %s [<=%d remaining]"
 148             % (format_namelist(partition), max_remaining_steps)
 149         )
 150         res = perform_test(picks)
 151         if res is True:
 152             known_good.update(partition)
 153         elif len(partition) > 1:
 154             partitions_to_split.insert(0, partition)
 155
 156     # TODO:
 157     # - We could optimize based on the knowledge that when splitting a failed
 158     #   partition into two and one side checks out okay then we can deduce that
 159     #   the other partition must be a failure.
 160     all_choice_names = [name for name, _ in choices]
 161     partitions_to_split = [all_choice_names]
 162     while len(partitions_to_split) > 0:
 163         partition = partitions_to_split.pop()
 164
 165         middle = len(partition) // 2
 166         left = partition[0:middle]
 167         right = partition[middle:]
 168
 169         if len(left) > 0:
 170             test_partition(left, right)
 171         assert len(right) > 0
 172         test_partition(right, None)
 173
 174     return known_good
 175
 176
 177 def extract_functions(file):
 178     functions = []
 179     in_function = None
 180     for line in open(file):
 181         marker = line.find(" -- Begin function ")
 182         if marker != -1:
 183             if in_function is not None:
 184                 warn("Missing end of function %s" % (in_function,))
 185             funcname = line[marker + 19 : -1]
 186             in_function = funcname
 187             text = line
 188             continue
 189
 190         marker = line.find(" -- End function")
 191         if marker != -1:
 192             text += line
 193             functions.append((in_function, text))
 194             in_function = None
 195             continue
 196
 197         if in_function is not None:
 198             text += line
 199     return functions
 200
 201
 202 def replace_functions(source, dest, replacements):
 203     out = open(dest, "w")
 204     skip = False
 205     in_function = None
 206     for line in open(source):
 207         marker = line.find(" -- Begin function ")
 208         if marker != -1:
 209             if in_function is not None:
 210                 warn("Missing end of function %s" % (in_function,))
 211             funcname = line[marker + 19 : -1]
 212             in_function = funcname
 213             replacement = replacements.get(in_function)
 214             if replacement is not None:
 215                 out.write(replacement)
 216                 skip = True
 217         else:
 218             marker = line.find(" -- End function")
 219             if marker != -1:
 220                 in_function = None
 221                 if skip:
 222                     skip = False
 223                     continue
 224
 225         if not skip:
 226             out.write(line)
 227
 228
 229 def testrun(files):
 230     linkline = "%s %s" % (
 231         LINKTEST,
 232         " ".join(files),
 233     )
 234     res = subprocess.call(linkline, shell=True)
 235     if res != 0:
 236         announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
 237         return False
 238     else:
 239         announce_result("ok")
 240         return True
 241
 242
 243 def prepare_files(gooddir, baddir, rspfile):
 244     files_a = []
 245     files_b = []
 246
 247     if rspfile is not None:
 248
 249         def get_basename(name):
 250             # remove prefix
 251             if name.startswith(gooddir):
 252                 return name[len(gooddir) :]
 253             if name.startswith(baddir):
 254                 return name[len(baddir) :]
 255             assert False, ""
 256
 257         with open(rspfile, "r") as rf:
 258             for line in rf.read().splitlines():
 259                 for obj in line.split():
 260                     assert not os.path.isabs(obj), "TODO: support abs path"
 261                     files_a.append(gooddir + "/" + obj)
 262                     files_b.append(baddir + "/" + obj)
 263     else:
 264         get_basename = lambda name: os.path.basename(name)
 265         files_a = find(gooddir, "*")
 266         files_b = find(baddir, "*")
 267
 268     basenames_a = set(map(get_basename, files_a))
 269     basenames_b = set(map(get_basename, files_b))
 270
 271     for name in files_b:
 272         basename = get_basename(name)
 273         if basename not in basenames_a:
 274             warn("There is no corresponding file to '%s' in %s" % (name, gooddir))
 275     choices = []
 276     skipped = []
 277     for name in files_a:
 278         basename = get_basename(name)
 279         if basename not in basenames_b:
 280             warn("There is no corresponding file to '%s' in %s" % (name, baddir))
 281
 282         file_a = gooddir + "/" + basename
 283         file_b = baddir + "/" + basename
 284         if filecmp.cmp(file_a, file_b):
 285             skipped.append(basename)
 286             continue
 287
 288         choice = (basename, (file_a, file_b))
 289         choices.append(choice)
 290
 291     if len(skipped) > 0:
 292         info("Skipped (same content): %s" % format_namelist(skipped))
 293
 294     def perform_test(picks):
 295         files = []
 296         # Note that we iterate over files_a so we don't change the order
 297         # (cannot use `picks` as it is a dictionary without order)
 298         for x in files_a:
 299             basename = get_basename(x)
 300             picked = picks.get(basename)
 301             if picked is None:
 302                 assert basename in skipped
 303                 files.append(x)
 304             else:
 305                 files.append(picked)
 306
 307         # If response file is used, create a temporary response file for the
 308         # picked files.
 309         if rspfile is not None:
 310             with tempfile.NamedTemporaryFile("w", suffix=".rsp", delete=False) as tf:
 311                 tf.write(" ".join(files))
 312                 tf.flush()
 313             ret = testrun([tf.name])
 314             os.remove(tf.name)
 315             return ret
 316
 317         return testrun(files)
 318
 319     return perform_test, choices
 320
 321
 322 def prepare_functions(to_check, gooddir, goodfile, badfile):
 323     files_good = find(gooddir, "*")
 324
 325     functions_a = extract_functions(goodfile)
 326     functions_a_map = dict(functions_a)
 327     functions_b_map = dict(extract_functions(badfile))
 328
 329     for name in functions_b_map.keys():
 330         if name not in functions_a_map:
 331             warn("Function '%s' missing from good file" % name)
 332     choices = []
 333     skipped = []
 334     for name, candidate_a in functions_a:
 335         candidate_b = functions_b_map.get(name)
 336         if candidate_b is None:
 337             warn("Function '%s' missing from bad file" % name)
 338             continue
 339         if candidate_a == candidate_b:
 340             skipped.append(name)
 341             continue
 342         choice = name, (candidate_a, candidate_b)
 343         choices.append(choice)
 344
 345     if len(skipped) > 0:
 346         info("Skipped (same content): %s" % format_namelist(skipped))
 347
 348     combined_file = "/tmp/combined2.s"
 349     files = []
 350     found_good_file = False
 351     for c in files_good:
 352         if os.path.basename(c) == to_check:
 353             found_good_file = True
 354             files.append(combined_file)
 355             continue
 356         files.append(c)
 357     assert found_good_file
 358
 359     def perform_test(picks):
 360         for name, x in picks.items():
 361             assert x == functions_a_map[name] or x == functions_b_map[name]
 362         replace_functions(goodfile, combined_file, picks)
 363         return testrun(files)
 364
 365     return perform_test, choices
 366
 367
 368 def main():
 369     parser = argparse.ArgumentParser()
 370     parser.add_argument("--a", dest="dir_a", default="before")
 371     parser.add_argument("--b", dest="dir_b", default="after")
 372     parser.add_argument("--rsp", default=None)
 373     parser.add_argument("--test", default="./link_test")
 374     parser.add_argument("--insane", help="Skip sanity check", action="store_true")
 375     parser.add_argument(
 376         "--seq", help="Check sequentially instead of bisection", action="store_true"
 377     )
 378     parser.add_argument("file", metavar="file", nargs="?")
 379     config = parser.parse_args()
 380
 381     gooddir = config.dir_a
 382     baddir = config.dir_b
 383     rspfile = config.rsp
 384     global LINKTEST
 385     LINKTEST = config.test
 386
 387     # Preparation phase: Creates a dictionary mapping names to a list of two
 388     # choices each. The bisection algorithm will pick one choice for each name
 389     # and then run the perform_test function on it.
 390     if config.file is not None:
 391         goodfile = gooddir + "/" + config.file
 392         badfile = baddir + "/" + config.file
 393         perform_test, choices = prepare_functions(
 394             config.file, gooddir, goodfile, badfile
 395         )
 396     else:
 397         perform_test, choices = prepare_files(gooddir, baddir, rspfile)
 398
 399     info("%d bisection choices" % len(choices))
 400
 401     # "Checking whether build environment is sane ..."
 402     if not config.insane:
 403         if not os.access(LINKTEST, os.X_OK):
 404             error("Expect '%s' to be present and executable" % (LINKTEST,))
 405             exit(1)
 406
 407         check_sanity(choices, perform_test)
 408
 409     if config.seq:
 410         known_good = check_sequentially(choices, perform_test)
 411     else:
 412         known_good = check_bisect(choices, perform_test)
 413
 414     stderr.write("")
 415     if len(known_good) != len(choices):
 416         stderr.write("== Failing ==\n")
 417         for name, _ in choices:
 418             if name not in known_good:
 419                 stderr.write("%s\n" % name)
 420     else:
 421         # This shouldn't happen when the sanity check works...
 422         # Maybe link_test isn't deterministic?
 423         stderr.write("Could not identify failing parts?!?")
 424
 425
 426 if __name__ == "__main__":
 427     main()