llvm/utils/update_mir_test_checks.py

   1 #!/usr/bin/env python3
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import glob
  25 import os
  26 import re
  27 import subprocess
  28 import sys
  29
  30 from UpdateTestChecks import common
  31
  32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  36 MI_FLAGS_STR= (
  37     r'(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn '
  38     r'|reassoc |nuw |nsw |exact |fpexcept )*')
  39 VREG_DEF_RE = re.compile(
  40     r'^ *(?P<vregs>{0}(?:, {0})*) = '
  41     r'{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern, MI_FLAGS_STR))
  42 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  43
  44 IR_FUNC_NAME_RE = re.compile(
  45     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
  46 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  47
  48 MIR_FUNC_RE = re.compile(
  49     r'^---$'
  50     r'\n'
  51     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  52     r'.*?'
  53     r'^ *body: *\|\n'
  54     r'(?P<body>.*?)\n'
  55     r'^\.\.\.$',
  56     flags=(re.M | re.S))
  57
  58
  59 class LLC:
  60     def __init__(self, bin):
  61         self.bin = bin
  62
  63     def __call__(self, args, ir):
  64         if ir.endswith('.mir'):
  65             args = '{} -x mir'.format(args)
  66         with open(ir) as ir_file:
  67             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  68                                              shell=True, stdin=ir_file)
  69             if sys.version_info[0] > 2:
  70               stdout = stdout.decode()
  71             # Fix line endings to unix CR style.
  72             stdout = stdout.replace('\r\n', '\n')
  73         return stdout
  74
  75
  76 class Run:
  77     def __init__(self, prefixes, cmd_args, triple):
  78         self.prefixes = prefixes
  79         self.cmd_args = cmd_args
  80         self.triple = triple
  81
  82     def __getitem__(self, index):
  83         return [self.prefixes, self.cmd_args, self.triple][index]
  84
  85
  86 def log(msg, verbose=True):
  87     if verbose:
  88         print(msg, file=sys.stderr)
  89
  90
  91 def find_triple_in_ir(lines, verbose=False):
  92     for l in lines:
  93         m = common.TRIPLE_IR_RE.match(l)
  94         if m:
  95             return m.group(1)
  96     return None
  97
  98
  99 def build_run_list(test, run_lines, verbose=False):
 100     run_list = []
 101     all_prefixes = []
 102     for l in run_lines:
 103         if '|' not in l:
 104             common.warn('Skipping unparseable RUN line: ' + l)
 105             continue
 106
 107         commands = [cmd.strip() for cmd in l.split('|', 1)]
 108         llc_cmd = commands[0]
 109         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 110         common.verify_filecheck_prefixes(filecheck_cmd)
 111
 112         if not llc_cmd.startswith('llc '):
 113             common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 114             continue
 115         if not filecheck_cmd.startswith('FileCheck '):
 116             common.warn('Skipping non-FileChecked RUN line: {}'.format(l),
 117                  test_file=test)
 118             continue
 119
 120         triple = None
 121         m = common.TRIPLE_ARG_RE.search(llc_cmd)
 122         if m:
 123             triple = m.group(1)
 124         # If we find -march but not -mtriple, use that.
 125         m = common.MARCH_ARG_RE.search(llc_cmd)
 126         if m and not triple:
 127             triple = '{}--'.format(m.group(1))
 128
 129         cmd_args = llc_cmd[len('llc'):].strip()
 130         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 131
 132         check_prefixes = [
 133             item
 134             for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 135             for item in m.group(1).split(',')]
 136         if not check_prefixes:
 137             check_prefixes = ['CHECK']
 138         all_prefixes += check_prefixes
 139
 140         run_list.append(Run(check_prefixes, cmd_args, triple))
 141
 142     # Sort prefixes that are shared between run lines before unshared prefixes.
 143     # This causes us to prefer printing shared prefixes.
 144     for run in run_list:
 145         run.prefixes.sort(key=lambda prefix: -all_prefixes.count(prefix))
 146
 147     return run_list
 148
 149
 150 def find_functions_with_one_bb(lines, verbose=False):
 151     result = []
 152     cur_func = None
 153     bbs = 0
 154     for line in lines:
 155         m = MIR_FUNC_NAME_RE.match(line)
 156         if m:
 157             if bbs == 1:
 158                 result.append(cur_func)
 159             cur_func = m.group('func')
 160             bbs = 0
 161         m = MIR_BASIC_BLOCK_RE.match(line)
 162         if m:
 163             bbs += 1
 164     if bbs == 1:
 165         result.append(cur_func)
 166     return result
 167
 168
 169 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 170                                    func_dict, verbose):
 171     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 172         func = m.group('func')
 173         body = m.group('body')
 174         if verbose:
 175             log('Processing function: {}'.format(func))
 176             for l in body.splitlines():
 177                 log('  {}'.format(l))
 178
 179         # Vreg mangling
 180         mangled = []
 181         vreg_map = {}
 182         for func_line in body.splitlines(keepends=True):
 183             m = VREG_DEF_RE.match(func_line)
 184             if m:
 185                 for vreg in VREG_RE.finditer(m.group('vregs')):
 186                     name = mangle_vreg(m.group('opcode'), vreg_map.values())
 187                     vreg_map[vreg.group(1)] = name
 188                     func_line = func_line.replace(
 189                         vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 190             for number, name in vreg_map.items():
 191                 func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 192                                 func_line)
 193             mangled.append(func_line)
 194         body = ''.join(mangled)
 195
 196         for prefix in prefixes:
 197             if func in func_dict[prefix]:
 198                 if func_dict[prefix][func] != body:
 199                     func_dict[prefix][func] = None
 200             else:
 201                 func_dict[prefix][func] = body
 202
 203
 204 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 205                             single_bb, verbose=False):
 206     printed_prefixes = set()
 207     for run in run_list:
 208         for prefix in run.prefixes:
 209             if prefix in printed_prefixes:
 210                 break
 211             if not func_dict[prefix][func_name]:
 212                 continue
 213             # if printed_prefixes:
 214             #     # Add some space between different check prefixes.
 215             #     output_lines.append('')
 216             printed_prefixes.add(prefix)
 217             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 218             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 219                             func_dict[prefix][func_name].splitlines())
 220             break
 221         else:
 222             common.warn(
 223                 'Found conflicting asm for function: {}'.format(func_name),
 224                 test_file=test)
 225     return output_lines
 226
 227
 228 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 229                     func_body):
 230     if single_bb:
 231         # Don't bother checking the basic block label for a single BB
 232         func_body.pop(0)
 233
 234     if not func_body:
 235         common.warn('Function has no instructions to check: {}'.format(func_name),
 236              test_file=test)
 237         return
 238
 239     first_line = func_body[0]
 240     indent = len(first_line) - len(first_line.lstrip(' '))
 241     # A check comment, indented the appropriate amount
 242     check = '{:>{}}; {}'.format('', indent, prefix)
 243
 244     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 245     first_check = True
 246
 247     for func_line in func_body:
 248         if not func_line.strip():
 249             # The mir printer prints leading whitespace so we can't use CHECK-EMPTY:
 250             output_lines.append(check + '-NEXT: {{' + func_line + '$}}')
 251             continue
 252         filecheck_directive = check if first_check else check + '-NEXT'
 253         first_check = False
 254         check_line = '{}: {}'.format(filecheck_directive, func_line[indent:]).rstrip()
 255         output_lines.append(check_line)
 256
 257
 258 def mangle_vreg(opcode, current_names):
 259     base = opcode
 260     # Simplify some common prefixes and suffixes
 261     if opcode.startswith('G_'):
 262         base = base[len('G_'):]
 263     if opcode.endswith('_PSEUDO'):
 264         base = base[:len('_PSEUDO')]
 265     # Shorten some common opcodes with long-ish names
 266     base = dict(IMPLICIT_DEF='DEF',
 267                 GLOBAL_VALUE='GV',
 268                 CONSTANT='C',
 269                 FCONSTANT='C',
 270                 MERGE_VALUES='MV',
 271                 UNMERGE_VALUES='UV',
 272                 INTRINSIC='INT',
 273                 INTRINSIC_W_SIDE_EFFECTS='INT',
 274                 INSERT_VECTOR_ELT='IVEC',
 275                 EXTRACT_VECTOR_ELT='EVEC',
 276                 SHUFFLE_VECTOR='SHUF').get(base, base)
 277     # Avoid ambiguity when opcodes end in numbers
 278     if len(base.rstrip('0123456789')) < len(base):
 279         base += '_'
 280
 281     i = 0
 282     for name in current_names:
 283         if name.rstrip('0123456789') == base:
 284             i += 1
 285     if i:
 286         return '{}{}'.format(base, i)
 287     return base
 288
 289
 290 def should_add_line_to_output(input_line, prefix_set):
 291     # Skip any check lines that we're handling.
 292     m = common.CHECK_RE.match(input_line)
 293     if m and m.group(1) in prefix_set:
 294         return False
 295     return True
 296
 297
 298 def update_test_file(args, test):
 299     with open(test) as fd:
 300         input_lines = [l.rstrip() for l in fd]
 301
 302     script_name = os.path.basename(__file__)
 303     first_line = input_lines[0] if input_lines else ""
 304     if 'autogenerated' in first_line and script_name not in first_line:
 305         common.warn("Skipping test which wasn't autogenerated by " +
 306                     script_name + ": " + test)
 307         return
 308
 309     if args.update_only:
 310       if not first_line or 'autogenerated' not in first_line:
 311         common.warn("Skipping test which isn't autogenerated: " + test)
 312         return
 313
 314     triple_in_ir = find_triple_in_ir(input_lines, args.verbose)
 315     run_lines = common.find_run_lines(test, input_lines)
 316     run_list = build_run_list(test, run_lines, args.verbose)
 317
 318     simple_functions = find_functions_with_one_bb(input_lines, args.verbose)
 319
 320     func_dict = {}
 321     for run in run_list:
 322         for prefix in run.prefixes:
 323             func_dict.update({prefix: dict()})
 324     for prefixes, llc_args, triple_in_cmd in run_list:
 325         log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose)
 326         log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose)
 327
 328         raw_tool_output = args.llc(llc_args, test)
 329         if not triple_in_cmd and not triple_in_ir:
 330             common.warn('No triple found: skipping file', test_file=test)
 331             return
 332
 333         build_function_body_dictionary(test, raw_tool_output,
 334                                        triple_in_cmd or triple_in_ir,
 335                                        prefixes, func_dict, args.verbose)
 336
 337     state = 'toplevel'
 338     func_name = None
 339     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 340     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose)
 341
 342     comment_char = '#' if test.endswith('.mir') else ';'
 343     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 344                           'utils/{}'.format(comment_char, script_name))
 345     output_lines = []
 346     output_lines.append(autogenerated_note)
 347
 348     for input_line in input_lines:
 349         if input_line == autogenerated_note:
 350             continue
 351
 352         if state == 'toplevel':
 353             m = IR_FUNC_NAME_RE.match(input_line)
 354             if m:
 355                 state = 'ir function prefix'
 356                 func_name = m.group('func')
 357             if input_line.rstrip('| \r\n') == '---':
 358                 state = 'document'
 359             output_lines.append(input_line)
 360         elif state == 'document':
 361             m = MIR_FUNC_NAME_RE.match(input_line)
 362             if m:
 363                 state = 'mir function metadata'
 364                 func_name = m.group('func')
 365             if input_line.strip() == '...':
 366                 state = 'toplevel'
 367                 func_name = None
 368             if should_add_line_to_output(input_line, prefix_set):
 369                 output_lines.append(input_line)
 370         elif state == 'mir function metadata':
 371             if should_add_line_to_output(input_line, prefix_set):
 372                 output_lines.append(input_line)
 373             m = MIR_BODY_BEGIN_RE.match(input_line)
 374             if m:
 375                 if func_name in simple_functions:
 376                     # If there's only one block, put the checks inside it
 377                     state = 'mir function prefix'
 378                     continue
 379                 state = 'mir function body'
 380                 add_checks_for_function(test, output_lines, run_list,
 381                                         func_dict, func_name, single_bb=False,
 382                                         verbose=args.verbose)
 383         elif state == 'mir function prefix':
 384             m = MIR_PREFIX_DATA_RE.match(input_line)
 385             if not m:
 386                 state = 'mir function body'
 387                 add_checks_for_function(test, output_lines, run_list,
 388                                         func_dict, func_name, single_bb=True,
 389                                         verbose=args.verbose)
 390
 391             if should_add_line_to_output(input_line, prefix_set):
 392                 output_lines.append(input_line)
 393         elif state == 'mir function body':
 394             if input_line.strip() == '...':
 395                 state = 'toplevel'
 396                 func_name = None
 397             if should_add_line_to_output(input_line, prefix_set):
 398                 output_lines.append(input_line)
 399         elif state == 'ir function prefix':
 400             m = IR_PREFIX_DATA_RE.match(input_line)
 401             if not m:
 402                 state = 'ir function body'
 403                 add_checks_for_function(test, output_lines, run_list,
 404                                         func_dict, func_name, single_bb=False,
 405                                         verbose=args.verbose)
 406
 407             if should_add_line_to_output(input_line, prefix_set):
 408                 output_lines.append(input_line)
 409         elif state == 'ir function body':
 410             if input_line.strip() == '}':
 411                 state = 'toplevel'
 412                 func_name = None
 413             if should_add_line_to_output(input_line, prefix_set):
 414                 output_lines.append(input_line)
 415
 416
 417     log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose)
 418
 419     with open(test, 'wb') as fd:
 420         fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 421
 422
 423 def main():
 424     parser = argparse.ArgumentParser(
 425         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 426     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 427                         help='The "llc" binary to generate the test case with')
 428     parser.add_argument('tests', nargs='+')
 429     args = common.parse_commandline_args(parser)
 430
 431     test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 432     for test in test_paths:
 433         try:
 434             update_test_file(args, test)
 435         except Exception:
 436             common.warn('Error processing file', test_file=test)
 437             raise
 438
 439
 440 if __name__ == '__main__':
 441   main()