utils/update_mir_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import glob
  25 import os
  26 import re
  27 import subprocess
  28 import sys
  29
  30 from UpdateTestChecks import common
  31
  32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  36 VREG_DEF_RE = re.compile(
  37     r'^ *(?P<vregs>{0}(?:, {0})*) '
  38     r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
  39 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  40
  41 IR_FUNC_NAME_RE = re.compile(
  42     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
  43 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  44
  45 MIR_FUNC_RE = re.compile(
  46     r'^---$'
  47     r'\n'
  48     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  49     r'.*?'
  50     r'^ *body: *\|\n'
  51     r'(?P<body>.*?)\n'
  52     r'^\.\.\.$',
  53     flags=(re.M | re.S))
  54
  55
  56 class LLC:
  57     def __init__(self, bin):
  58         self.bin = bin
  59
  60     def __call__(self, args, ir):
  61         if ir.endswith('.mir'):
  62             args = '{} -x mir'.format(args)
  63         with open(ir) as ir_file:
  64             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  65                                              shell=True, stdin=ir_file)
  66             if sys.version_info[0] > 2:
  67               stdout = stdout.decode()
  68             # Fix line endings to unix CR style.
  69             stdout = stdout.replace('\r\n', '\n')
  70         return stdout
  71
  72
  73 class Run:
  74     def __init__(self, prefixes, cmd_args, triple):
  75         self.prefixes = prefixes
  76         self.cmd_args = cmd_args
  77         self.triple = triple
  78
  79     def __getitem__(self, index):
  80         return [self.prefixes, self.cmd_args, self.triple][index]
  81
  82
  83 def log(msg, verbose=True):
  84     if verbose:
  85         print(msg, file=sys.stderr)
  86
  87
  88 def warn(msg, test_file=None):
  89     if test_file:
  90         msg = '{}: {}'.format(test_file, msg)
  91     print('WARNING: {}'.format(msg), file=sys.stderr)
  92
  93
  94 def find_triple_in_ir(lines, verbose=False):
  95     for l in lines:
  96         m = common.TRIPLE_IR_RE.match(l)
  97         if m:
  98             return m.group(1)
  99     return None
 100
 101
 102 def find_run_lines(test, lines, verbose=False):
 103     raw_lines = [m.group(1)
 104                  for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
 105     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 106     for l in raw_lines[1:]:
 107         if run_lines[-1].endswith("\\"):
 108             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 109         else:
 110             run_lines.append(l)
 111     if verbose:
 112         log('Found {} RUN lines:'.format(len(run_lines)))
 113         for l in run_lines:
 114             log('  RUN: {}'.format(l))
 115     return run_lines
 116
 117
 118 def build_run_list(test, run_lines, verbose=False):
 119     run_list = []
 120     all_prefixes = []
 121     for l in run_lines:
 122         commands = [cmd.strip() for cmd in l.split('|', 1)]
 123         llc_cmd = commands[0]
 124         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 125         common.verify_filecheck_prefixes(filecheck_cmd)
 126
 127         if not llc_cmd.startswith('llc '):
 128             warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 129             continue
 130         if not filecheck_cmd.startswith('FileCheck '):
 131             warn('Skipping non-FileChecked RUN line: {}'.format(l),
 132                  test_file=test)
 133             continue
 134
 135         triple = None
 136         m = common.TRIPLE_ARG_RE.search(llc_cmd)
 137         if m:
 138             triple = m.group(1)
 139         # If we find -march but not -mtriple, use that.
 140         m = common.MARCH_ARG_RE.search(llc_cmd)
 141         if m and not triple:
 142             triple = '{}--'.format(m.group(1))
 143
 144         cmd_args = llc_cmd[len('llc'):].strip()
 145         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 146
 147         check_prefixes = [
 148             item
 149             for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 150             for item in m.group(1).split(',')]
 151         if not check_prefixes:
 152             check_prefixes = ['CHECK']
 153         all_prefixes += check_prefixes
 154
 155         run_list.append(Run(check_prefixes, cmd_args, triple))
 156
 157     # Remove any common prefixes. We'll just leave those entirely alone.
 158     common_prefixes = set([prefix for prefix in all_prefixes
 159                            if all_prefixes.count(prefix) > 1])
 160     for run in run_list:
 161         run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
 162
 163     return run_list, common_prefixes
 164
 165
 166 def find_functions_with_one_bb(lines, verbose=False):
 167     result = []
 168     cur_func = None
 169     bbs = 0
 170     for line in lines:
 171         m = MIR_FUNC_NAME_RE.match(line)
 172         if m:
 173             if bbs == 1:
 174                 result.append(cur_func)
 175             cur_func = m.group('func')
 176             bbs = 0
 177         m = MIR_BASIC_BLOCK_RE.match(line)
 178         if m:
 179             bbs += 1
 180     if bbs == 1:
 181         result.append(cur_func)
 182     return result
 183
 184
 185 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 186                                    func_dict, verbose):
 187     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 188         func = m.group('func')
 189         body = m.group('body')
 190         if verbose:
 191             log('Processing function: {}'.format(func))
 192             for l in body.splitlines():
 193                 log('  {}'.format(l))
 194         for prefix in prefixes:
 195             if func in func_dict[prefix] and func_dict[prefix][func] != body:
 196                 warn('Found conflicting asm for prefix: {}'.format(prefix),
 197                      test_file=test)
 198             func_dict[prefix][func] = body
 199
 200
 201 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 202                             single_bb, verbose=False):
 203     printed_prefixes = set()
 204     for run in run_list:
 205         for prefix in run.prefixes:
 206             if prefix in printed_prefixes:
 207                 continue
 208             if not func_dict[prefix][func_name]:
 209                 continue
 210             # if printed_prefixes:
 211             #     # Add some space between different check prefixes.
 212             #     output_lines.append('')
 213             printed_prefixes.add(prefix)
 214             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 215             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 216                             func_dict[prefix][func_name].splitlines())
 217             break
 218     return output_lines
 219
 220
 221 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 222                     func_body):
 223     if single_bb:
 224         # Don't bother checking the basic block label for a single BB
 225         func_body.pop(0)
 226
 227     if not func_body:
 228         warn('Function has no instructions to check: {}'.format(func_name),
 229              test_file=test)
 230         return
 231
 232     first_line = func_body[0]
 233     indent = len(first_line) - len(first_line.lstrip(' '))
 234     # A check comment, indented the appropriate amount
 235     check = '{:>{}}; {}'.format('', indent, prefix)
 236
 237     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 238
 239     vreg_map = {}
 240     for func_line in func_body:
 241         if not func_line.strip():
 242             continue
 243         m = VREG_DEF_RE.match(func_line)
 244         if m:
 245             for vreg in VREG_RE.finditer(m.group('vregs')):
 246                 name = mangle_vreg(m.group('opcode'), vreg_map.values())
 247                 vreg_map[vreg.group(1)] = name
 248                 func_line = func_line.replace(
 249                     vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 250         for number, name in vreg_map.items():
 251             func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 252                                func_line)
 253         check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
 254         output_lines.append(check_line)
 255
 256
 257 def mangle_vreg(opcode, current_names):
 258     base = opcode
 259     # Simplify some common prefixes and suffixes
 260     if opcode.startswith('G_'):
 261         base = base[len('G_'):]
 262     if opcode.endswith('_PSEUDO'):
 263         base = base[:len('_PSEUDO')]
 264     # Shorten some common opcodes with long-ish names
 265     base = dict(IMPLICIT_DEF='DEF',
 266                 GLOBAL_VALUE='GV',
 267                 CONSTANT='C',
 268                 FCONSTANT='C',
 269                 MERGE_VALUES='MV',
 270                 UNMERGE_VALUES='UV',
 271                 INTRINSIC='INT',
 272                 INTRINSIC_W_SIDE_EFFECTS='INT',
 273                 INSERT_VECTOR_ELT='IVEC',
 274                 EXTRACT_VECTOR_ELT='EVEC',
 275                 SHUFFLE_VECTOR='SHUF').get(base, base)
 276     # Avoid ambiguity when opcodes end in numbers
 277     if len(base.rstrip('0123456789')) < len(base):
 278         base += '_'
 279
 280     i = 0
 281     for name in current_names:
 282         if name.rstrip('0123456789') == base:
 283             i += 1
 284     if i:
 285         return '{}{}'.format(base, i)
 286     return base
 287
 288
 289 def should_add_line_to_output(input_line, prefix_set):
 290     # Skip any check lines that we're handling.
 291     m = common.CHECK_RE.match(input_line)
 292     if m and m.group(1) in prefix_set:
 293         return False
 294     return True
 295
 296
 297 def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
 298     log('Scanning for RUN lines in test file: {}'.format(test), verbose)
 299     with open(test) as fd:
 300         input_lines = [l.rstrip() for l in fd]
 301
 302     triple_in_ir = find_triple_in_ir(input_lines, verbose)
 303     run_lines = find_run_lines(test, input_lines, verbose)
 304     run_list, common_prefixes = build_run_list(test, run_lines, verbose)
 305
 306     simple_functions = find_functions_with_one_bb(input_lines, verbose)
 307
 308     func_dict = {}
 309     for run in run_list:
 310         for prefix in run.prefixes:
 311             func_dict.update({prefix: dict()})
 312     for prefixes, llc_args, triple_in_cmd in run_list:
 313         log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
 314         log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
 315
 316         raw_tool_output = llc(llc_args, test)
 317         if not triple_in_cmd and not triple_in_ir:
 318             warn('No triple found: skipping file', test_file=test)
 319             return
 320
 321         build_function_body_dictionary(test, raw_tool_output,
 322                                        triple_in_cmd or triple_in_ir,
 323                                        prefixes, func_dict, verbose)
 324
 325     state = 'toplevel'
 326     func_name = None
 327     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 328     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
 329
 330     if remove_common_prefixes:
 331         prefix_set.update(common_prefixes)
 332     elif common_prefixes:
 333         warn('Ignoring common prefixes: {}'.format(common_prefixes),
 334              test_file=test)
 335
 336     comment_char = '#' if test.endswith('.mir') else ';'
 337     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 338                           'utils/{}'.format(comment_char,
 339                                             os.path.basename(__file__)))
 340     output_lines = []
 341     output_lines.append(autogenerated_note)
 342
 343     for input_line in input_lines:
 344         if input_line == autogenerated_note:
 345             continue
 346
 347         if state == 'toplevel':
 348             m = IR_FUNC_NAME_RE.match(input_line)
 349             if m:
 350                 state = 'ir function prefix'
 351                 func_name = m.group('func')
 352             if input_line.rstrip('| \r\n') == '---':
 353                 state = 'document'
 354             output_lines.append(input_line)
 355         elif state == 'document':
 356             m = MIR_FUNC_NAME_RE.match(input_line)
 357             if m:
 358                 state = 'mir function metadata'
 359                 func_name = m.group('func')
 360             if input_line.strip() == '...':
 361                 state = 'toplevel'
 362                 func_name = None
 363             if should_add_line_to_output(input_line, prefix_set):
 364                 output_lines.append(input_line)
 365         elif state == 'mir function metadata':
 366             if should_add_line_to_output(input_line, prefix_set):
 367                 output_lines.append(input_line)
 368             m = MIR_BODY_BEGIN_RE.match(input_line)
 369             if m:
 370                 if func_name in simple_functions:
 371                     # If there's only one block, put the checks inside it
 372                     state = 'mir function prefix'
 373                     continue
 374                 state = 'mir function body'
 375                 add_checks_for_function(test, output_lines, run_list,
 376                                         func_dict, func_name, single_bb=False,
 377                                         verbose=verbose)
 378         elif state == 'mir function prefix':
 379             m = MIR_PREFIX_DATA_RE.match(input_line)
 380             if not m:
 381                 state = 'mir function body'
 382                 add_checks_for_function(test, output_lines, run_list,
 383                                         func_dict, func_name, single_bb=True,
 384                                         verbose=verbose)
 385
 386             if should_add_line_to_output(input_line, prefix_set):
 387                 output_lines.append(input_line)
 388         elif state == 'mir function body':
 389             if input_line.strip() == '...':
 390                 state = 'toplevel'
 391                 func_name = None
 392             if should_add_line_to_output(input_line, prefix_set):
 393                 output_lines.append(input_line)
 394         elif state == 'ir function prefix':
 395             m = IR_PREFIX_DATA_RE.match(input_line)
 396             if not m:
 397                 state = 'ir function body'
 398                 add_checks_for_function(test, output_lines, run_list,
 399                                         func_dict, func_name, single_bb=False,
 400                                         verbose=verbose)
 401
 402             if should_add_line_to_output(input_line, prefix_set):
 403                 output_lines.append(input_line)
 404         elif state == 'ir function body':
 405             if input_line.strip() == '}':
 406                 state = 'toplevel'
 407                 func_name = None
 408             if should_add_line_to_output(input_line, prefix_set):
 409                 output_lines.append(input_line)
 410
 411
 412     log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
 413
 414     with open(test, 'wb') as fd:
 415         fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 416
 417
 418 def main():
 419     parser = argparse.ArgumentParser(
 420         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 421     parser.add_argument('-v', '--verbose', action='store_true',
 422                         help='Show verbose output')
 423     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 424                         help='The "llc" binary to generate the test case with')
 425     parser.add_argument('--remove-common-prefixes', action='store_true',
 426                         help='Remove existing check lines whose prefixes are '
 427                              'shared between multiple commands')
 428     parser.add_argument('tests', nargs='+')
 429     args = parser.parse_args()
 430
 431     test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 432     for test in test_paths:
 433         try:
 434             update_test_file(args.llc, test, args.remove_common_prefixes,
 435                              verbose=args.verbose)
 436         except Exception:
 437             warn('Error processing file', test_file=test)
 438             raise
 439
 440
 441 if __name__ == '__main__':
 442   main()