utils/update_analyze_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """A script to generate FileCheck statements for 'opt' analysis tests.
   4
   5 This script is a utility to update LLVM opt analysis test cases with new
   6 FileCheck patterns. It can either update all of the tests in the file or
   7 a single test function.
   8
   9 Example usage:
  10 $ update_analyze_test_checks.py --opt=../bin/opt test/foo.ll
  11
  12 Workflow:
  13 1. Make a compiler patch that requires updating some number of FileCheck lines
  14    in regression test files.
  15 2. Save the patch and revert it from your local work area.
  16 3. Update the RUN-lines in the affected regression tests to look canonical.
  17    Example: "; RUN: opt < %s -analyze -cost-model -S | FileCheck %s"
  18 4. Refresh the FileCheck lines for either the entire file or select functions by
  19    running this script.
  20 5. Commit the fresh baseline of checks.
  21 6. Apply your patch from step 1 and rebuild your local binaries.
  22 7. Re-run this script on affected regression tests.
  23 8. Check the diffs to ensure the script has done something reasonable.
  24 9. Submit a patch including the regression test diffs for review.
  25
  26 A common pattern is to have the script insert complete checking of every
  27 instruction. Then, edit it down to only check the relevant instructions.
  28 The script is designed to make adding checks to a test case fast, it is *not*
  29 designed to be authoratitive about what constitutes a good test!
  30 """
  31
  32 from __future__ import print_function
  33
  34 import argparse
  35 import glob
  36 import itertools
  37 import os         # Used to advertise this file's name ("autogenerated_note").
  38 import string
  39 import subprocess
  40 import sys
  41 import tempfile
  42 import re
  43
  44 from UpdateTestChecks import common
  45
  46 ADVERT = '; NOTE: Assertions have been autogenerated by '
  47
  48 # RegEx: this is where the magic happens.
  49
  50 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
  51
  52 def main():
  53   from argparse import RawTextHelpFormatter
  54   parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
  55   parser.add_argument('-v', '--verbose', action='store_true',
  56                       help='Show verbose output')
  57   parser.add_argument('--opt-binary', default='opt',
  58                       help='The opt binary used to generate the test case')
  59   parser.add_argument(
  60       '--function', help='The function in the test file to update')
  61   parser.add_argument('-u', '--update-only', action='store_true',
  62                       help='Only update test if it was already autogened')
  63   parser.add_argument('tests', nargs='+')
  64   args = parser.parse_args()
  65
  66   script_name = os.path.basename(__file__)
  67   autogenerated_note = (ADVERT + 'utils/' + script_name)
  68
  69   opt_basename = os.path.basename(args.opt_binary)
  70   if (opt_basename != "opt"):
  71     common.error('Unexpected opt name: ' + opt_basename)
  72     sys.exit(1)
  73
  74   test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
  75   for test in test_paths:
  76     if args.verbose:
  77       print('Scanning for RUN lines in test file: %s' % (test,), file=sys.stderr)
  78     with open(test) as f:
  79       input_lines = [l.rstrip() for l in f]
  80
  81     first_line = input_lines[0] if input_lines else ""
  82     if 'autogenerated' in first_line and script_name not in first_line:
  83       common.warn("Skipping test which wasn't autogenerated by " + script_name + ": " + test)
  84       continue
  85
  86     if args.update_only:
  87       if not first_line or 'autogenerated' not in first_line:
  88         common.warn("Skipping test which isn't autogenerated: " + test)
  89         continue
  90
  91     raw_lines = [m.group(1)
  92                  for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
  93     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
  94     for l in raw_lines[1:]:
  95       if run_lines[-1].endswith("\\"):
  96         run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
  97       else:
  98         run_lines.append(l)
  99
 100     if args.verbose:
 101       print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr)
 102       for l in run_lines:
 103         print('  RUN: ' + l, file=sys.stderr)
 104
 105     prefix_list = []
 106     for l in run_lines:
 107       if '|' not in l:
 108         common.warn('Skipping unparseable RUN line: ' + l)
 109         continue
 110
 111       (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
 112       common.verify_filecheck_prefixes(filecheck_cmd)
 113
 114       if not tool_cmd.startswith(opt_basename + ' '):
 115         common.warn('WSkipping non-%s RUN line: %s' % (opt_basename, l))
 116         continue
 117
 118       if not filecheck_cmd.startswith('FileCheck '):
 119         common.warn('Skipping non-FileChecked RUN line: ' + l)
 120         continue
 121
 122       tool_cmd_args = tool_cmd[len(opt_basename):].strip()
 123       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 124
 125       check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 126                                for item in m.group(1).split(',')]
 127       if not check_prefixes:
 128         check_prefixes = ['CHECK']
 129
 130       # FIXME: We should use multiple check prefixes to common check lines. For
 131       # now, we just ignore all but the last.
 132       prefix_list.append((check_prefixes, tool_cmd_args))
 133
 134     func_dict = {}
 135     for prefixes, _ in prefix_list:
 136       for prefix in prefixes:
 137         func_dict.update({prefix: dict()})
 138     for prefixes, opt_args in prefix_list:
 139       if args.verbose:
 140         print('Extracted opt cmd: ' + opt_basename + ' ' + opt_args, file=sys.stderr)
 141         print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr)
 142
 143       raw_tool_outputs = common.invoke_tool(args.opt_binary, opt_args, test)
 144
 145       # Split analysis outputs by "Printing analysis " declarations.
 146       for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
 147         common.build_function_body_dictionary(
 148           common.ANALYZE_FUNCTION_RE, common.scrub_body, [],
 149           raw_tool_output, prefixes, func_dict, args.verbose)
 150
 151     is_in_function = False
 152     is_in_function_start = False
 153     prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
 154     if args.verbose:
 155       print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr)
 156     output_lines = []
 157     output_lines.append(autogenerated_note)
 158
 159     for input_line in input_lines:
 160       if is_in_function_start:
 161         if input_line == '':
 162           continue
 163         if input_line.lstrip().startswith(';'):
 164           m = common.CHECK_RE.match(input_line)
 165           if not m or m.group(1) not in prefix_set:
 166             output_lines.append(input_line)
 167             continue
 168
 169         # Print out the various check lines here.
 170         common.add_analyze_checks(output_lines, ';', prefix_list, func_dict, func_name)
 171         is_in_function_start = False
 172
 173       if is_in_function:
 174         if common.should_add_line_to_output(input_line, prefix_set):
 175           # This input line of the function body will go as-is into the output.
 176           # Except make leading whitespace uniform: 2 spaces.
 177           input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
 178           output_lines.append(input_line)
 179         else:
 180           continue
 181         if input_line.strip() == '}':
 182           is_in_function = False
 183         continue
 184
 185       # Discard any previous script advertising.
 186       if input_line.startswith(ADVERT):
 187         continue
 188
 189       # If it's outside a function, it just gets copied to the output.
 190       output_lines.append(input_line)
 191
 192       m = IR_FUNCTION_RE.match(input_line)
 193       if not m:
 194         continue
 195       func_name = m.group(1)
 196       if args.function is not None and func_name != args.function:
 197         # When filtering on a specific function, skip all others.
 198         continue
 199       is_in_function = is_in_function_start = True
 200
 201     if args.verbose:
 202       print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr)
 203
 204     with open(test, 'wb') as f:
 205       f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 206
 207
 208 if __name__ == '__main__':
 209   main()