utils/update_mir_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import glob
  25 import os
  26 import re
  27 import subprocess
  28 import sys
  29
  30 from UpdateTestChecks import common
  31
  32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  36 VREG_DEF_RE = re.compile(
  37     r'^ *(?P<vregs>{0}(?:, {0})*) '
  38     r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
  39 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  40
  41 IR_FUNC_NAME_RE = re.compile(
  42     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
  43 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  44
  45 MIR_FUNC_RE = re.compile(
  46     r'^---$'
  47     r'\n'
  48     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  49     r'.*?'
  50     r'^ *body: *\|\n'
  51     r'(?P<body>.*?)\n'
  52     r'^\.\.\.$',
  53     flags=(re.M | re.S))
  54
  55
  56 class LLC:
  57     def __init__(self, bin):
  58         self.bin = bin
  59
  60     def __call__(self, args, ir):
  61         if ir.endswith('.mir'):
  62             args = '{} -x mir'.format(args)
  63         with open(ir) as ir_file:
  64             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  65                                              shell=True, stdin=ir_file)
  66             if sys.version_info[0] > 2:
  67               stdout = stdout.decode()
  68             # Fix line endings to unix CR style.
  69             stdout = stdout.replace('\r\n', '\n')
  70         return stdout
  71
  72
  73 class Run:
  74     def __init__(self, prefixes, cmd_args, triple):
  75         self.prefixes = prefixes
  76         self.cmd_args = cmd_args
  77         self.triple = triple
  78
  79     def __getitem__(self, index):
  80         return [self.prefixes, self.cmd_args, self.triple][index]
  81
  82
  83 def log(msg, verbose=True):
  84     if verbose:
  85         print(msg, file=sys.stderr)
  86
  87
  88 def warn(msg, test_file=None):
  89     if test_file:
  90         msg = '{}: {}'.format(test_file, msg)
  91     print('WARNING: {}'.format(msg), file=sys.stderr)
  92
  93
  94 def find_triple_in_ir(lines, verbose=False):
  95     for l in lines:
  96         m = common.TRIPLE_IR_RE.match(l)
  97         if m:
  98             return m.group(1)
  99     return None
 100
 101
 102 def find_run_lines(test, lines, verbose=False):
 103     raw_lines = [m.group(1)
 104                  for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
 105     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 106     for l in raw_lines[1:]:
 107         if run_lines[-1].endswith("\\"):
 108             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 109         else:
 110             run_lines.append(l)
 111     if verbose:
 112         log('Found {} RUN lines:'.format(len(run_lines)))
 113         for l in run_lines:
 114             log('  RUN: {}'.format(l))
 115     return run_lines
 116
 117
 118 def build_run_list(test, run_lines, verbose=False):
 119     run_list = []
 120     all_prefixes = []
 121     for l in run_lines:
 122         commands = [cmd.strip() for cmd in l.split('|', 1)]
 123         llc_cmd = commands[0]
 124         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 125
 126         if not llc_cmd.startswith('llc '):
 127             warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 128             continue
 129         if not filecheck_cmd.startswith('FileCheck '):
 130             warn('Skipping non-FileChecked RUN line: {}'.format(l),
 131                  test_file=test)
 132             continue
 133
 134         triple = None
 135         m = common.TRIPLE_ARG_RE.search(llc_cmd)
 136         if m:
 137             triple = m.group(1)
 138         # If we find -march but not -mtriple, use that.
 139         m = common.MARCH_ARG_RE.search(llc_cmd)
 140         if m and not triple:
 141             triple = '{}--'.format(m.group(1))
 142
 143         cmd_args = llc_cmd[len('llc'):].strip()
 144         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 145
 146         check_prefixes = [
 147             item
 148             for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 149             for item in m.group(1).split(',')]
 150         if not check_prefixes:
 151             check_prefixes = ['CHECK']
 152         all_prefixes += check_prefixes
 153
 154         run_list.append(Run(check_prefixes, cmd_args, triple))
 155
 156     # Remove any common prefixes. We'll just leave those entirely alone.
 157     common_prefixes = set([prefix for prefix in all_prefixes
 158                            if all_prefixes.count(prefix) > 1])
 159     for run in run_list:
 160         run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
 161
 162     return run_list, common_prefixes
 163
 164
 165 def find_functions_with_one_bb(lines, verbose=False):
 166     result = []
 167     cur_func = None
 168     bbs = 0
 169     for line in lines:
 170         m = MIR_FUNC_NAME_RE.match(line)
 171         if m:
 172             if bbs == 1:
 173                 result.append(cur_func)
 174             cur_func = m.group('func')
 175             bbs = 0
 176         m = MIR_BASIC_BLOCK_RE.match(line)
 177         if m:
 178             bbs += 1
 179     if bbs == 1:
 180         result.append(cur_func)
 181     return result
 182
 183
 184 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 185                                    func_dict, verbose):
 186     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 187         func = m.group('func')
 188         body = m.group('body')
 189         if verbose:
 190             log('Processing function: {}'.format(func))
 191             for l in body.splitlines():
 192                 log('  {}'.format(l))
 193         for prefix in prefixes:
 194             if func in func_dict[prefix] and func_dict[prefix][func] != body:
 195                 warn('Found conflicting asm for prefix: {}'.format(prefix),
 196                      test_file=test)
 197             func_dict[prefix][func] = body
 198
 199
 200 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 201                             single_bb, verbose=False):
 202     printed_prefixes = set()
 203     for run in run_list:
 204         for prefix in run.prefixes:
 205             if prefix in printed_prefixes:
 206                 continue
 207             if not func_dict[prefix][func_name]:
 208                 continue
 209             # if printed_prefixes:
 210             #     # Add some space between different check prefixes.
 211             #     output_lines.append('')
 212             printed_prefixes.add(prefix)
 213             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 214             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 215                             func_dict[prefix][func_name].splitlines())
 216             break
 217     return output_lines
 218
 219
 220 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 221                     func_body):
 222     if single_bb:
 223         # Don't bother checking the basic block label for a single BB
 224         func_body.pop(0)
 225
 226     if not func_body:
 227         warn('Function has no instructions to check: {}'.format(func_name),
 228              test_file=test)
 229         return
 230
 231     first_line = func_body[0]
 232     indent = len(first_line) - len(first_line.lstrip(' '))
 233     # A check comment, indented the appropriate amount
 234     check = '{:>{}}; {}'.format('', indent, prefix)
 235
 236     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 237
 238     vreg_map = {}
 239     for func_line in func_body:
 240         if not func_line.strip():
 241             continue
 242         m = VREG_DEF_RE.match(func_line)
 243         if m:
 244             for vreg in VREG_RE.finditer(m.group('vregs')):
 245                 name = mangle_vreg(m.group('opcode'), vreg_map.values())
 246                 vreg_map[vreg.group(1)] = name
 247                 func_line = func_line.replace(
 248                     vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 249         for number, name in vreg_map.items():
 250             func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 251                                func_line)
 252         check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
 253         output_lines.append(check_line)
 254
 255
 256 def mangle_vreg(opcode, current_names):
 257     base = opcode
 258     # Simplify some common prefixes and suffixes
 259     if opcode.startswith('G_'):
 260         base = base[len('G_'):]
 261     if opcode.endswith('_PSEUDO'):
 262         base = base[:len('_PSEUDO')]
 263     # Shorten some common opcodes with long-ish names
 264     base = dict(IMPLICIT_DEF='DEF',
 265                 GLOBAL_VALUE='GV',
 266                 CONSTANT='C',
 267                 FCONSTANT='C',
 268                 MERGE_VALUES='MV',
 269                 UNMERGE_VALUES='UV',
 270                 INTRINSIC='INT',
 271                 INTRINSIC_W_SIDE_EFFECTS='INT',
 272                 INSERT_VECTOR_ELT='IVEC',
 273                 EXTRACT_VECTOR_ELT='EVEC',
 274                 SHUFFLE_VECTOR='SHUF').get(base, base)
 275     # Avoid ambiguity when opcodes end in numbers
 276     if len(base.rstrip('0123456789')) < len(base):
 277         base += '_'
 278
 279     i = 0
 280     for name in current_names:
 281         if name.rstrip('0123456789') == base:
 282             i += 1
 283     if i:
 284         return '{}{}'.format(base, i)
 285     return base
 286
 287
 288 def should_add_line_to_output(input_line, prefix_set):
 289     # Skip any check lines that we're handling.
 290     m = common.CHECK_RE.match(input_line)
 291     if m and m.group(1) in prefix_set:
 292         return False
 293     return True
 294
 295
 296 def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
 297     log('Scanning for RUN lines in test file: {}'.format(test), verbose)
 298     with open(test) as fd:
 299         input_lines = [l.rstrip() for l in fd]
 300
 301     triple_in_ir = find_triple_in_ir(input_lines, verbose)
 302     run_lines = find_run_lines(test, input_lines, verbose)
 303     run_list, common_prefixes = build_run_list(test, run_lines, verbose)
 304
 305     simple_functions = find_functions_with_one_bb(input_lines, verbose)
 306
 307     func_dict = {}
 308     for run in run_list:
 309         for prefix in run.prefixes:
 310             func_dict.update({prefix: dict()})
 311     for prefixes, llc_args, triple_in_cmd in run_list:
 312         log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
 313         log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
 314
 315         raw_tool_output = llc(llc_args, test)
 316         if not triple_in_cmd and not triple_in_ir:
 317             warn('No triple found: skipping file', test_file=test)
 318             return
 319
 320         build_function_body_dictionary(test, raw_tool_output,
 321                                        triple_in_cmd or triple_in_ir,
 322                                        prefixes, func_dict, verbose)
 323
 324     state = 'toplevel'
 325     func_name = None
 326     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 327     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
 328
 329     if remove_common_prefixes:
 330         prefix_set.update(common_prefixes)
 331     elif common_prefixes:
 332         warn('Ignoring common prefixes: {}'.format(common_prefixes),
 333              test_file=test)
 334
 335     comment_char = '#' if test.endswith('.mir') else ';'
 336     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 337                           'utils/{}'.format(comment_char,
 338                                             os.path.basename(__file__)))
 339     output_lines = []
 340     output_lines.append(autogenerated_note)
 341
 342     for input_line in input_lines:
 343         if input_line == autogenerated_note:
 344             continue
 345
 346         if state == 'toplevel':
 347             m = IR_FUNC_NAME_RE.match(input_line)
 348             if m:
 349                 state = 'ir function prefix'
 350                 func_name = m.group('func')
 351             if input_line.rstrip('| \r\n') == '---':
 352                 state = 'document'
 353             output_lines.append(input_line)
 354         elif state == 'document':
 355             m = MIR_FUNC_NAME_RE.match(input_line)
 356             if m:
 357                 state = 'mir function metadata'
 358                 func_name = m.group('func')
 359             if input_line.strip() == '...':
 360                 state = 'toplevel'
 361                 func_name = None
 362             if should_add_line_to_output(input_line, prefix_set):
 363                 output_lines.append(input_line)
 364         elif state == 'mir function metadata':
 365             if should_add_line_to_output(input_line, prefix_set):
 366                 output_lines.append(input_line)
 367             m = MIR_BODY_BEGIN_RE.match(input_line)
 368             if m:
 369                 if func_name in simple_functions:
 370                     # If there's only one block, put the checks inside it
 371                     state = 'mir function prefix'
 372                     continue
 373                 state = 'mir function body'
 374                 add_checks_for_function(test, output_lines, run_list,
 375                                         func_dict, func_name, single_bb=False,
 376                                         verbose=verbose)
 377         elif state == 'mir function prefix':
 378             m = MIR_PREFIX_DATA_RE.match(input_line)
 379             if not m:
 380                 state = 'mir function body'
 381                 add_checks_for_function(test, output_lines, run_list,
 382                                         func_dict, func_name, single_bb=True,
 383                                         verbose=verbose)
 384
 385             if should_add_line_to_output(input_line, prefix_set):
 386                 output_lines.append(input_line)
 387         elif state == 'mir function body':
 388             if input_line.strip() == '...':
 389                 state = 'toplevel'
 390                 func_name = None
 391             if should_add_line_to_output(input_line, prefix_set):
 392                 output_lines.append(input_line)
 393         elif state == 'ir function prefix':
 394             m = IR_PREFIX_DATA_RE.match(input_line)
 395             if not m:
 396                 state = 'ir function body'
 397                 add_checks_for_function(test, output_lines, run_list,
 398                                         func_dict, func_name, single_bb=False,
 399                                         verbose=verbose)
 400
 401             if should_add_line_to_output(input_line, prefix_set):
 402                 output_lines.append(input_line)
 403         elif state == 'ir function body':
 404             if input_line.strip() == '}':
 405                 state = 'toplevel'
 406                 func_name = None
 407             if should_add_line_to_output(input_line, prefix_set):
 408                 output_lines.append(input_line)
 409
 410
 411     log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
 412
 413     with open(test, 'wb') as fd:
 414         fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 415
 416
 417 def main():
 418     parser = argparse.ArgumentParser(
 419         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 420     parser.add_argument('-v', '--verbose', action='store_true',
 421                         help='Show verbose output')
 422     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 423                         help='The "llc" binary to generate the test case with')
 424     parser.add_argument('--remove-common-prefixes', action='store_true',
 425                         help='Remove existing check lines whose prefixes are '
 426                              'shared between multiple commands')
 427     parser.add_argument('tests', nargs='+')
 428     args = parser.parse_args()
 429
 430     test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 431     for test in test_paths:
 432         try:
 433             update_test_file(args.llc, test, args.remove_common_prefixes,
 434                              verbose=args.verbose)
 435         except Exception:
 436             warn('Error processing file', test_file=test)
 437             raise
 438
 439
 440 if __name__ == '__main__':
 441   main()