llvm/utils/abtest.py

   1 #!/usr/bin/env python
   2 #
   3 # Given a previous good compile narrow down miscompiles.
   4 # Expects two directories named "before" and "after" each containing a set of
   5 # assembly or object files where the "after" version is assumed to be broken.
   6 # You also have to provide a script called "link_test". It is called with a
   7 # list of files which should be linked together and result tested. "link_test"
   8 # should returns with exitcode 0 if the linking and testing succeeded.
   9 #
  10 # If a response file is provided, only the object files that are listed in the
  11 # file are inspected. In addition, the "link_test" is called with a temporary
  12 # response file representing one iteration of bisection.
  13 #
  14 # abtest.py operates by taking all files from the "before" directory and
  15 # in each step replacing one of them with a file from the "bad" directory.
  16 #
  17 # Additionally you can perform the same steps with a single .s file. In this
  18 # mode functions are identified by " -- Begin function FunctionName" and
  19 # " -- End function" markers. The abtest.py then takes all
  20 # function from the file in the "before" directory and replaces one function
  21 # with the corresponding function from the "bad" file in each step.
  22 #
  23 # Example usage to identify miscompiled files:
  24 #    1. Create a link_test script, make it executable. Simple Example:
  25 #          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
  26 #    2. Run the script to figure out which files are miscompiled:
  27 #       > ./abtest.py
  28 #       somefile.s: ok
  29 #       someotherfile.s: skipped: same content
  30 #       anotherfile.s: failed: './link_test' exitcode != 0
  31 #       ...
  32 # Example usage to identify miscompiled functions inside a file:
  33 #    3. Run the tests on a single file (assuming before/file.s and
  34 #       after/file.s exist)
  35 #       > ./abtest.py file.s
  36 #       funcname1 [0/XX]: ok
  37 #       funcname2 [1/XX]: ok
  38 #       funcname3 [2/XX]: skipped: same content
  39 #       funcname4 [3/XX]: failed: './link_test' exitcode != 0
  40 #       ...
  41 from fnmatch import filter
  42 from sys import stderr
  43 import argparse
  44 import filecmp
  45 import os
  46 import subprocess
  47 import sys
  48 import tempfile
  49
  50 # Specify LINKTEST via `--test`. Default value is './link_test'.
  51 LINKTEST = ""
  52 ESCAPE = "\033[%sm"
  53 BOLD = ESCAPE % "1"
  54 RED = ESCAPE % "31"
  55 NORMAL = ESCAPE % "0"
  56 FAILED = RED + "failed" + NORMAL
  57
  58
  59 def find(dir, file_filter=None):
  60     files = [
  61         walkdir[0]+"/"+file
  62         for walkdir in os.walk(dir)
  63         for file in walkdir[2]
  64     ]
  65     if file_filter is not None:
  66         files = filter(files, file_filter)
  67     return sorted(files)
  68
  69
  70 def error(message):
  71     stderr.write("Error: %s\n" % (message,))
  72
  73
  74 def warn(message):
  75     stderr.write("Warning: %s\n" % (message,))
  76
  77
  78 def info(message):
  79     stderr.write("Info: %s\n" % (message,))
  80
  81
  82 def announce_test(name):
  83     stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
  84     stderr.flush()
  85
  86
  87 def announce_result(result):
  88     stderr.write(result)
  89     stderr.write("\n")
  90     stderr.flush()
  91
  92
  93 def format_namelist(l):
  94     result = ", ".join(l[0:3])
  95     if len(l) > 3:
  96         result += "... (%d total)" % len(l)
  97     return result
  98
  99
 100 def check_sanity(choices, perform_test):
 101     announce_test("sanity check A")
 102     all_a = {name: a_b[0] for name, a_b in choices}
 103     res_a = perform_test(all_a)
 104     if res_a is not True:
 105         error("Picking all choices from A failed to pass the test")
 106         sys.exit(1)
 107
 108     announce_test("sanity check B (expecting failure)")
 109     all_b = {name: a_b[1] for name, a_b in choices}
 110     res_b = perform_test(all_b)
 111     if res_b is not False:
 112         error("Picking all choices from B did unexpectedly pass the test")
 113         sys.exit(1)
 114
 115
 116 def check_sequentially(choices, perform_test):
 117     known_good = set()
 118     all_a = {name: a_b[0] for name, a_b in choices}
 119     n = 1
 120     for name, a_b in sorted(choices):
 121         picks = dict(all_a)
 122         picks[name] = a_b[1]
 123         announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
 124         n += 1
 125         res = perform_test(picks)
 126         if res is True:
 127             known_good.add(name)
 128     return known_good
 129
 130
 131 def check_bisect(choices, perform_test):
 132     known_good = set()
 133     if len(choices) == 0:
 134         return known_good
 135
 136     choice_map = dict(choices)
 137     all_a = {name: a_b[0] for name, a_b in choices}
 138
 139     def test_partition(partition, upcoming_partition):
 140         # Compute the maximum number of checks we have to do in the worst case.
 141         max_remaining_steps = len(partition) * 2 - 1
 142         if upcoming_partition is not None:
 143             max_remaining_steps += len(upcoming_partition) * 2 - 1
 144         for x in partitions_to_split:
 145             max_remaining_steps += (len(x) - 1) * 2
 146
 147         picks = dict(all_a)
 148         for x in partition:
 149             picks[x] = choice_map[x][1]
 150         announce_test("checking %s [<=%d remaining]" %
 151                       (format_namelist(partition), max_remaining_steps))
 152         res = perform_test(picks)
 153         if res is True:
 154             known_good.update(partition)
 155         elif len(partition) > 1:
 156             partitions_to_split.insert(0, partition)
 157
 158     # TODO:
 159     # - We could optimize based on the knowledge that when splitting a failed
 160     #   partition into two and one side checks out okay then we can deduce that
 161     #   the other partition must be a failure.
 162     all_choice_names = [name for name, _ in choices]
 163     partitions_to_split = [all_choice_names]
 164     while len(partitions_to_split) > 0:
 165         partition = partitions_to_split.pop()
 166
 167         middle = len(partition) // 2
 168         left = partition[0:middle]
 169         right = partition[middle:]
 170
 171         if len(left) > 0:
 172             test_partition(left, right)
 173         assert len(right) > 0
 174         test_partition(right, None)
 175
 176     return known_good
 177
 178
 179 def extract_functions(file):
 180     functions = []
 181     in_function = None
 182     for line in open(file):
 183         marker = line.find(" -- Begin function ")
 184         if marker != -1:
 185             if in_function is not None:
 186                 warn("Missing end of function %s" % (in_function,))
 187             funcname = line[marker + 19:-1]
 188             in_function = funcname
 189             text = line
 190             continue
 191
 192         marker = line.find(" -- End function")
 193         if marker != -1:
 194             text += line
 195             functions.append((in_function, text))
 196             in_function = None
 197             continue
 198
 199         if in_function is not None:
 200             text += line
 201     return functions
 202
 203
 204 def replace_functions(source, dest, replacements):
 205     out = open(dest, "w")
 206     skip = False
 207     in_function = None
 208     for line in open(source):
 209         marker = line.find(" -- Begin function ")
 210         if marker != -1:
 211             if in_function is not None:
 212                 warn("Missing end of function %s" % (in_function,))
 213             funcname = line[marker + 19:-1]
 214             in_function = funcname
 215             replacement = replacements.get(in_function)
 216             if replacement is not None:
 217                 out.write(replacement)
 218                 skip = True
 219         else:
 220             marker = line.find(" -- End function")
 221             if marker != -1:
 222                 in_function = None
 223                 if skip:
 224                     skip = False
 225                     continue
 226
 227         if not skip:
 228             out.write(line)
 229
 230
 231 def testrun(files):
 232     linkline = "%s %s" % (LINKTEST, " ".join(files),)
 233     res = subprocess.call(linkline, shell=True)
 234     if res != 0:
 235         announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
 236         return False
 237     else:
 238         announce_result("ok")
 239         return True
 240
 241
 242 def prepare_files(gooddir, baddir, rspfile):
 243     files_a = []
 244     files_b = []
 245
 246     if rspfile is not None:
 247         def get_basename(name):
 248             # remove prefix
 249             if name.startswith(gooddir):
 250                 return name[len(gooddir):]
 251             if name.startswith(baddir):
 252                 return name[len(baddir):]
 253             assert False, ""
 254
 255         with open(rspfile, "r") as rf:
 256             for line in rf.read().splitlines():
 257                 for obj in line.split():
 258                     assert not os.path.isabs(obj), "TODO: support abs path"
 259                     files_a.append(gooddir + "/" + obj)
 260                     files_b.append(baddir + "/" + obj)
 261     else:
 262         get_basename = lambda name: os.path.basename(name)
 263         files_a = find(gooddir, "*")
 264         files_b = find(baddir, "*")
 265
 266     basenames_a = set(map(get_basename, files_a))
 267     basenames_b = set(map(get_basename, files_b))
 268
 269     for name in files_b:
 270         basename = get_basename(name)
 271         if basename not in basenames_a:
 272             warn("There is no corresponding file to '%s' in %s" %
 273                  (name, gooddir))
 274     choices = []
 275     skipped = []
 276     for name in files_a:
 277         basename = get_basename(name)
 278         if basename not in basenames_b:
 279             warn("There is no corresponding file to '%s' in %s" %
 280                  (name, baddir))
 281
 282         file_a = gooddir + "/" + basename
 283         file_b = baddir + "/" + basename
 284         if filecmp.cmp(file_a, file_b):
 285             skipped.append(basename)
 286             continue
 287
 288         choice = (basename, (file_a, file_b))
 289         choices.append(choice)
 290
 291     if len(skipped) > 0:
 292         info("Skipped (same content): %s" % format_namelist(skipped))
 293
 294     def perform_test(picks):
 295         files = []
 296         # Note that we iterate over files_a so we don't change the order
 297         # (cannot use `picks` as it is a dictionary without order)
 298         for x in files_a:
 299             basename = get_basename(x)
 300             picked = picks.get(basename)
 301             if picked is None:
 302                 assert basename in skipped
 303                 files.append(x)
 304             else:
 305                 files.append(picked)
 306
 307         # If response file is used, create a temporary response file for the
 308         # picked files.
 309         if rspfile is not None:
 310             with tempfile.NamedTemporaryFile('w', suffix='.rsp',
 311                                              delete=False) as tf:
 312                 tf.write(" ".join(files))
 313                 tf.flush()
 314             ret = testrun([tf.name])
 315             os.remove(tf.name)
 316             return ret
 317
 318         return testrun(files)
 319
 320     return perform_test, choices
 321
 322
 323 def prepare_functions(to_check, gooddir, goodfile, badfile):
 324     files_good = find(gooddir, "*")
 325
 326     functions_a = extract_functions(goodfile)
 327     functions_a_map = dict(functions_a)
 328     functions_b_map = dict(extract_functions(badfile))
 329
 330     for name in functions_b_map.keys():
 331         if name not in functions_a_map:
 332             warn("Function '%s' missing from good file" % name)
 333     choices = []
 334     skipped = []
 335     for name, candidate_a in functions_a:
 336         candidate_b = functions_b_map.get(name)
 337         if candidate_b is None:
 338             warn("Function '%s' missing from bad file" % name)
 339             continue
 340         if candidate_a == candidate_b:
 341             skipped.append(name)
 342             continue
 343         choice = name, (candidate_a, candidate_b)
 344         choices.append(choice)
 345
 346     if len(skipped) > 0:
 347         info("Skipped (same content): %s" % format_namelist(skipped))
 348
 349     combined_file = '/tmp/combined2.s'
 350     files = []
 351     found_good_file = False
 352     for c in files_good:
 353         if os.path.basename(c) == to_check:
 354             found_good_file = True
 355             files.append(combined_file)
 356             continue
 357         files.append(c)
 358     assert found_good_file
 359
 360     def perform_test(picks):
 361         for name, x in picks.items():
 362             assert x == functions_a_map[name] or x == functions_b_map[name]
 363         replace_functions(goodfile, combined_file, picks)
 364         return testrun(files)
 365     return perform_test, choices
 366
 367
 368 def main():
 369     parser = argparse.ArgumentParser()
 370     parser.add_argument('--a', dest='dir_a', default='before')
 371     parser.add_argument('--b', dest='dir_b', default='after')
 372     parser.add_argument('--rsp', default=None)
 373     parser.add_argument('--test', default='./link_test')
 374     parser.add_argument('--insane', help='Skip sanity check',
 375                         action='store_true')
 376     parser.add_argument('--seq',
 377                         help='Check sequentially instead of bisection',
 378                         action='store_true')
 379     parser.add_argument('file', metavar='file', nargs='?')
 380     config = parser.parse_args()
 381
 382     gooddir = config.dir_a
 383     baddir = config.dir_b
 384     rspfile = config.rsp
 385     global LINKTEST
 386     LINKTEST = config.test
 387
 388     # Preparation phase: Creates a dictionary mapping names to a list of two
 389     # choices each. The bisection algorithm will pick one choice for each name
 390     # and then run the perform_test function on it.
 391     if config.file is not None:
 392         goodfile = gooddir + "/" + config.file
 393         badfile = baddir + "/" + config.file
 394         perform_test, choices = prepare_functions(config.file, gooddir,
 395                                                   goodfile, badfile)
 396     else:
 397         perform_test, choices = prepare_files(gooddir, baddir, rspfile)
 398
 399     info("%d bisection choices" % len(choices))
 400
 401     # "Checking whether build environment is sane ..."
 402     if not config.insane:
 403         if not os.access(LINKTEST, os.X_OK):
 404             error("Expect '%s' to be present and executable" % (LINKTEST,))
 405             exit(1)
 406
 407         check_sanity(choices, perform_test)
 408
 409     if config.seq:
 410         known_good = check_sequentially(choices, perform_test)
 411     else:
 412         known_good = check_bisect(choices, perform_test)
 413
 414     stderr.write("")
 415     if len(known_good) != len(choices):
 416         stderr.write("== Failing ==\n")
 417         for name, _ in choices:
 418             if name not in known_good:
 419                 stderr.write("%s\n" % name)
 420     else:
 421         # This shouldn't happen when the sanity check works...
 422         # Maybe link_test isn't deterministic?
 423         stderr.write("Could not identify failing parts?!?")
 424
 425
 426 if __name__ == '__main__':
 427     main()