utils/update_mir_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import glob
  25 import os
  26 import re
  27 import subprocess
  28 import sys
  29
  30 from UpdateTestChecks import common
  31
  32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  36 VREG_DEF_RE = re.compile(
  37     r'^ *(?P<vregs>{0}(?:, {0})*) '
  38     r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
  39 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  40
  41 IR_FUNC_NAME_RE = re.compile(
  42     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
  43 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  44
  45 MIR_FUNC_RE = re.compile(
  46     r'^---$'
  47     r'\n'
  48     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  49     r'.*?'
  50     r'^ *body: *\|\n'
  51     r'(?P<body>.*?)\n'
  52     r'^\.\.\.$',
  53     flags=(re.M | re.S))
  54
  55
  56 class LLC:
  57     def __init__(self, bin):
  58         self.bin = bin
  59
  60     def __call__(self, args, ir):
  61         if ir.endswith('.mir'):
  62             args = '{} -x mir'.format(args)
  63         with open(ir) as ir_file:
  64             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  65                                              shell=True, stdin=ir_file)
  66             if sys.version_info[0] > 2:
  67               stdout = stdout.decode()
  68             # Fix line endings to unix CR style.
  69             stdout = stdout.replace('\r\n', '\n')
  70         return stdout
  71
  72
  73 class Run:
  74     def __init__(self, prefixes, cmd_args, triple):
  75         self.prefixes = prefixes
  76         self.cmd_args = cmd_args
  77         self.triple = triple
  78
  79     def __getitem__(self, index):
  80         return [self.prefixes, self.cmd_args, self.triple][index]
  81
  82
  83 def log(msg, verbose=True):
  84     if verbose:
  85         print(msg, file=sys.stderr)
  86
  87
  88 def find_triple_in_ir(lines, verbose=False):
  89     for l in lines:
  90         m = common.TRIPLE_IR_RE.match(l)
  91         if m:
  92             return m.group(1)
  93     return None
  94
  95
  96 def find_run_lines(test, lines, verbose=False):
  97     raw_lines = [m.group(1)
  98                  for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
  99     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 100     for l in raw_lines[1:]:
 101         if run_lines[-1].endswith("\\"):
 102             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 103         else:
 104             run_lines.append(l)
 105     if verbose:
 106         log('Found {} RUN lines:'.format(len(run_lines)))
 107         for l in run_lines:
 108             log('  RUN: {}'.format(l))
 109     return run_lines
 110
 111
 112 def build_run_list(test, run_lines, verbose=False):
 113     run_list = []
 114     all_prefixes = []
 115     for l in run_lines:
 116         if '|' not in l:
 117             common.warn('Skipping unparseable RUN line: ' + l)
 118             continue
 119
 120         commands = [cmd.strip() for cmd in l.split('|', 1)]
 121         llc_cmd = commands[0]
 122         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 123         common.verify_filecheck_prefixes(filecheck_cmd)
 124
 125         if not llc_cmd.startswith('llc '):
 126             common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 127             continue
 128         if not filecheck_cmd.startswith('FileCheck '):
 129             common.warn('Skipping non-FileChecked RUN line: {}'.format(l),
 130                  test_file=test)
 131             continue
 132
 133         triple = None
 134         m = common.TRIPLE_ARG_RE.search(llc_cmd)
 135         if m:
 136             triple = m.group(1)
 137         # If we find -march but not -mtriple, use that.
 138         m = common.MARCH_ARG_RE.search(llc_cmd)
 139         if m and not triple:
 140             triple = '{}--'.format(m.group(1))
 141
 142         cmd_args = llc_cmd[len('llc'):].strip()
 143         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 144
 145         check_prefixes = [
 146             item
 147             for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 148             for item in m.group(1).split(',')]
 149         if not check_prefixes:
 150             check_prefixes = ['CHECK']
 151         all_prefixes += check_prefixes
 152
 153         run_list.append(Run(check_prefixes, cmd_args, triple))
 154
 155     # Remove any common prefixes. We'll just leave those entirely alone.
 156     common_prefixes = set([prefix for prefix in all_prefixes
 157                            if all_prefixes.count(prefix) > 1])
 158     for run in run_list:
 159         run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
 160
 161     return run_list, common_prefixes
 162
 163
 164 def find_functions_with_one_bb(lines, verbose=False):
 165     result = []
 166     cur_func = None
 167     bbs = 0
 168     for line in lines:
 169         m = MIR_FUNC_NAME_RE.match(line)
 170         if m:
 171             if bbs == 1:
 172                 result.append(cur_func)
 173             cur_func = m.group('func')
 174             bbs = 0
 175         m = MIR_BASIC_BLOCK_RE.match(line)
 176         if m:
 177             bbs += 1
 178     if bbs == 1:
 179         result.append(cur_func)
 180     return result
 181
 182
 183 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 184                                    func_dict, verbose):
 185     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 186         func = m.group('func')
 187         body = m.group('body')
 188         if verbose:
 189             log('Processing function: {}'.format(func))
 190             for l in body.splitlines():
 191                 log('  {}'.format(l))
 192         for prefix in prefixes:
 193             if func in func_dict[prefix] and func_dict[prefix][func] != body:
 194                 common.warn('Found conflicting asm for prefix: {}'.format(prefix),
 195                      test_file=test)
 196             func_dict[prefix][func] = body
 197
 198
 199 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 200                             single_bb, verbose=False):
 201     printed_prefixes = set()
 202     for run in run_list:
 203         for prefix in run.prefixes:
 204             if prefix in printed_prefixes:
 205                 continue
 206             if not func_dict[prefix][func_name]:
 207                 continue
 208             # if printed_prefixes:
 209             #     # Add some space between different check prefixes.
 210             #     output_lines.append('')
 211             printed_prefixes.add(prefix)
 212             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 213             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 214                             func_dict[prefix][func_name].splitlines())
 215             break
 216     return output_lines
 217
 218
 219 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 220                     func_body):
 221     if single_bb:
 222         # Don't bother checking the basic block label for a single BB
 223         func_body.pop(0)
 224
 225     if not func_body:
 226         common.warn('Function has no instructions to check: {}'.format(func_name),
 227              test_file=test)
 228         return
 229
 230     first_line = func_body[0]
 231     indent = len(first_line) - len(first_line.lstrip(' '))
 232     # A check comment, indented the appropriate amount
 233     check = '{:>{}}; {}'.format('', indent, prefix)
 234
 235     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 236
 237     vreg_map = {}
 238     for func_line in func_body:
 239         if not func_line.strip():
 240             continue
 241         m = VREG_DEF_RE.match(func_line)
 242         if m:
 243             for vreg in VREG_RE.finditer(m.group('vregs')):
 244                 name = mangle_vreg(m.group('opcode'), vreg_map.values())
 245                 vreg_map[vreg.group(1)] = name
 246                 func_line = func_line.replace(
 247                     vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 248         for number, name in vreg_map.items():
 249             func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 250                                func_line)
 251         check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
 252         output_lines.append(check_line)
 253
 254
 255 def mangle_vreg(opcode, current_names):
 256     base = opcode
 257     # Simplify some common prefixes and suffixes
 258     if opcode.startswith('G_'):
 259         base = base[len('G_'):]
 260     if opcode.endswith('_PSEUDO'):
 261         base = base[:len('_PSEUDO')]
 262     # Shorten some common opcodes with long-ish names
 263     base = dict(IMPLICIT_DEF='DEF',
 264                 GLOBAL_VALUE='GV',
 265                 CONSTANT='C',
 266                 FCONSTANT='C',
 267                 MERGE_VALUES='MV',
 268                 UNMERGE_VALUES='UV',
 269                 INTRINSIC='INT',
 270                 INTRINSIC_W_SIDE_EFFECTS='INT',
 271                 INSERT_VECTOR_ELT='IVEC',
 272                 EXTRACT_VECTOR_ELT='EVEC',
 273                 SHUFFLE_VECTOR='SHUF').get(base, base)
 274     # Avoid ambiguity when opcodes end in numbers
 275     if len(base.rstrip('0123456789')) < len(base):
 276         base += '_'
 277
 278     i = 0
 279     for name in current_names:
 280         if name.rstrip('0123456789') == base:
 281             i += 1
 282     if i:
 283         return '{}{}'.format(base, i)
 284     return base
 285
 286
 287 def should_add_line_to_output(input_line, prefix_set):
 288     # Skip any check lines that we're handling.
 289     m = common.CHECK_RE.match(input_line)
 290     if m and m.group(1) in prefix_set:
 291         return False
 292     return True
 293
 294
 295 def update_test_file(args, test):
 296     log('Scanning for RUN lines in test file: {}'.format(test), args.verbose)
 297     with open(test) as fd:
 298         input_lines = [l.rstrip() for l in fd]
 299
 300     script_name = os.path.basename(__file__)
 301     first_line = input_lines[0] if input_lines else ""
 302     if 'autogenerated' in first_line and script_name not in first_line:
 303         common.warn("Skipping test which wasn't autogenerated by " +
 304                     script_name + ": " + test)
 305         return
 306
 307     if args.update_only:
 308       if not first_line or 'autogenerated' not in first_line:
 309         common.warn("Skipping test which isn't autogenerated: " + test)
 310         return
 311
 312     triple_in_ir = find_triple_in_ir(input_lines, args.verbose)
 313     run_lines = find_run_lines(test, input_lines, args.verbose)
 314     run_list, common_prefixes = build_run_list(test, run_lines, args.verbose)
 315
 316     simple_functions = find_functions_with_one_bb(input_lines, args.verbose)
 317
 318     func_dict = {}
 319     for run in run_list:
 320         for prefix in run.prefixes:
 321             func_dict.update({prefix: dict()})
 322     for prefixes, llc_args, triple_in_cmd in run_list:
 323         log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose)
 324         log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose)
 325
 326         raw_tool_output = args.llc(llc_args, test)
 327         if not triple_in_cmd and not triple_in_ir:
 328             common.warn('No triple found: skipping file', test_file=test)
 329             return
 330
 331         build_function_body_dictionary(test, raw_tool_output,
 332                                        triple_in_cmd or triple_in_ir,
 333                                        prefixes, func_dict, args.verbose)
 334
 335     state = 'toplevel'
 336     func_name = None
 337     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 338     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose)
 339
 340     if args.remove_common_prefixes:
 341         prefix_set.update(common_prefixes)
 342     elif common_prefixes:
 343         common.warn('Ignoring common prefixes: {}'.format(common_prefixes),
 344              test_file=test)
 345
 346     comment_char = '#' if test.endswith('.mir') else ';'
 347     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 348                           'utils/{}'.format(comment_char, script_name))
 349     output_lines = []
 350     output_lines.append(autogenerated_note)
 351
 352     for input_line in input_lines:
 353         if input_line == autogenerated_note:
 354             continue
 355
 356         if state == 'toplevel':
 357             m = IR_FUNC_NAME_RE.match(input_line)
 358             if m:
 359                 state = 'ir function prefix'
 360                 func_name = m.group('func')
 361             if input_line.rstrip('| \r\n') == '---':
 362                 state = 'document'
 363             output_lines.append(input_line)
 364         elif state == 'document':
 365             m = MIR_FUNC_NAME_RE.match(input_line)
 366             if m:
 367                 state = 'mir function metadata'
 368                 func_name = m.group('func')
 369             if input_line.strip() == '...':
 370                 state = 'toplevel'
 371                 func_name = None
 372             if should_add_line_to_output(input_line, prefix_set):
 373                 output_lines.append(input_line)
 374         elif state == 'mir function metadata':
 375             if should_add_line_to_output(input_line, prefix_set):
 376                 output_lines.append(input_line)
 377             m = MIR_BODY_BEGIN_RE.match(input_line)
 378             if m:
 379                 if func_name in simple_functions:
 380                     # If there's only one block, put the checks inside it
 381                     state = 'mir function prefix'
 382                     continue
 383                 state = 'mir function body'
 384                 add_checks_for_function(test, output_lines, run_list,
 385                                         func_dict, func_name, single_bb=False,
 386                                         verbose=args.verbose)
 387         elif state == 'mir function prefix':
 388             m = MIR_PREFIX_DATA_RE.match(input_line)
 389             if not m:
 390                 state = 'mir function body'
 391                 add_checks_for_function(test, output_lines, run_list,
 392                                         func_dict, func_name, single_bb=True,
 393                                         verbose=args.verbose)
 394
 395             if should_add_line_to_output(input_line, prefix_set):
 396                 output_lines.append(input_line)
 397         elif state == 'mir function body':
 398             if input_line.strip() == '...':
 399                 state = 'toplevel'
 400                 func_name = None
 401             if should_add_line_to_output(input_line, prefix_set):
 402                 output_lines.append(input_line)
 403         elif state == 'ir function prefix':
 404             m = IR_PREFIX_DATA_RE.match(input_line)
 405             if not m:
 406                 state = 'ir function body'
 407                 add_checks_for_function(test, output_lines, run_list,
 408                                         func_dict, func_name, single_bb=False,
 409                                         verbose=args.verbose)
 410
 411             if should_add_line_to_output(input_line, prefix_set):
 412                 output_lines.append(input_line)
 413         elif state == 'ir function body':
 414             if input_line.strip() == '}':
 415                 state = 'toplevel'
 416                 func_name = None
 417             if should_add_line_to_output(input_line, prefix_set):
 418                 output_lines.append(input_line)
 419
 420
 421     log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose)
 422
 423     with open(test, 'wb') as fd:
 424         fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 425
 426
 427 def main():
 428     parser = argparse.ArgumentParser(
 429         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 430     parser.add_argument('-v', '--verbose', action='store_true',
 431                         help='Show verbose output')
 432     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 433                         help='The "llc" binary to generate the test case with')
 434     parser.add_argument('--remove-common-prefixes', action='store_true',
 435                         help='Remove existing check lines whose prefixes are '
 436                              'shared between multiple commands')
 437     parser.add_argument('-u', '--update-only', action='store_true',
 438                         help='Only update test if it was already autogened')
 439     parser.add_argument('tests', nargs='+')
 440     args = parser.parse_args()
 441
 442     test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 443     for test in test_paths:
 444         try:
 445             update_test_file(args, test)
 446         except Exception:
 447             common.warn('Error processing file', test_file=test)
 448             raise
 449
 450
 451 if __name__ == '__main__':
 452   main()