llvm/utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2 import re
   3 import string
   4 import subprocess
   5 import sys
   6 import copy
   7
   8 if sys.version_info[0] > 2:
   9   class string:
  10     expandtabs = str.expandtabs
  11 else:
  12   import string
  13
  14 ##### Common utilities for update_*test_checks.py
  15
  16
  17 _verbose = False
  18
  19 def parse_commandline_args(parser):
  20   parser.add_argument('-v', '--verbose', action='store_true',
  21                       help='Show verbose output')
  22   parser.add_argument('-u', '--update-only', action='store_true',
  23                       help='Only update test if it was already autogened')
  24   args = parser.parse_args()
  25   global _verbose
  26   _verbose = args.verbose
  27   return args
  28
  29 def should_add_line_to_output(input_line, prefix_set):
  30   # Skip any blank comment lines in the IR.
  31   if input_line.strip() == ';':
  32     return False
  33   # Skip any blank lines in the IR.
  34   #if input_line.strip() == '':
  35   #  return False
  36   # And skip any CHECK lines. We're building our own.
  37   m = CHECK_RE.match(input_line)
  38   if m and m.group(1) in prefix_set:
  39     return False
  40
  41   return True
  42
  43 # Invoke the tool that is being tested.
  44 def invoke_tool(exe, cmd_args, ir):
  45   with open(ir) as ir_file:
  46     # TODO Remove the str form which is used by update_test_checks.py and
  47     # update_llc_test_checks.py
  48     # The safer list form is used by update_cc_test_checks.py
  49     if isinstance(cmd_args, list):
  50       stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
  51     else:
  52       stdout = subprocess.check_output(exe + ' ' + cmd_args,
  53                                        shell=True, stdin=ir_file)
  54     if sys.version_info[0] > 2:
  55       stdout = stdout.decode()
  56   # Fix line endings to unix CR style.
  57   return stdout.replace('\r\n', '\n')
  58
  59 ##### LLVM IR parser
  60
  61 RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
  62 CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
  63 PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
  64 CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
  65
  66 OPT_FUNCTION_RE = re.compile(
  67     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*'
  68     r'(?P<args_and_sig>\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
  69     flags=(re.M | re.S))
  70
  71 ANALYZE_FUNCTION_RE = re.compile(
  72     r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
  73     r'\s*\n(?P<body>.*)$',
  74     flags=(re.X | re.S))
  75
  76 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
  77 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
  78 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
  79 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
  80
  81 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
  82 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
  83 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
  84 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
  85 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
  86 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
  87 SCRUB_LOOP_COMMENT_RE = re.compile(
  88     r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
  89
  90
  91 def error(msg, test_file=None):
  92   if test_file:
  93     msg = '{}: {}'.format(msg, test_file)
  94   print('ERROR: {}'.format(msg), file=sys.stderr)
  95
  96 def warn(msg, test_file=None):
  97   if test_file:
  98     msg = '{}: {}'.format(msg, test_file)
  99   print('WARNING: {}'.format(msg), file=sys.stderr)
 100
 101 def debug(*args, **kwargs):
 102   # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 103   if 'file' not in kwargs:
 104     kwargs['file'] = sys.stderr
 105   if _verbose:
 106     print(*args, **kwargs)
 107
 108 def find_run_lines(test, lines):
 109   debug('Scanning for RUN lines in test file:', test)
 110   raw_lines = [m.group(1)
 111                for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 112   run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 113   for l in raw_lines[1:]:
 114     if run_lines[-1].endswith('\\'):
 115       run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
 116     else:
 117       run_lines.append(l)
 118   debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
 119   for l in run_lines:
 120     debug('  RUN: {}'.format(l))
 121   return run_lines
 122
 123 def scrub_body(body):
 124   # Scrub runs of whitespace out of the assembly, but leave the leading
 125   # whitespace in place.
 126   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
 127   # Expand the tabs used for indentation.
 128   body = string.expandtabs(body, 2)
 129   # Strip trailing whitespace.
 130   body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
 131   return body
 132
 133 def do_scrub(body, scrubber, scrubber_args, extra):
 134   if scrubber_args:
 135     local_args = copy.deepcopy(scrubber_args)
 136     local_args[0].extra_scrub = extra
 137     return scrubber(body, *local_args)
 138   return scrubber(body, *scrubber_args)
 139
 140 # Build up a dictionary of all the function bodies.
 141 class function_body(object):
 142   def __init__(self, string, extra, args_and_sig):
 143     self.scrub = string
 144     self.extrascrub = extra
 145     self.args_and_sig = args_and_sig
 146   def is_same_except_arg_names(self, extrascrub, args_and_sig):
 147     arg_names = set()
 148     def drop_arg_names(match):
 149         arg_names.add(match.group(2))
 150         return match.group(1) + match.group(3)
 151     def repl_arg_names(match):
 152         if match.group(2) in arg_names:
 153             return match.group(1) + match.group(3)
 154         return match.group(1) + match.group(2) + match.group(3)
 155     ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
 156     ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
 157     if ans0 != ans1:
 158         return False
 159     es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
 160     es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
 161     es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
 162     es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
 163     return es0 == es1
 164
 165   def __str__(self):
 166     return self.scrub
 167
 168 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
 169   for m in function_re.finditer(raw_tool_output):
 170     if not m:
 171       continue
 172     func = m.group('func')
 173     body = m.group('body')
 174     # Determine if we print arguments, the opening brace, or nothing after the function name
 175     if record_args and 'args_and_sig' in m.groupdict():
 176         args_and_sig = scrub_body(m.group('args_and_sig').strip())
 177     elif 'args_and_sig' in m.groupdict():
 178         args_and_sig = '('
 179     else:
 180         args_and_sig = ''
 181     scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
 182     scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
 183     if 'analysis' in m.groupdict():
 184       analysis = m.group('analysis')
 185       if analysis.lower() != 'cost model analysis':
 186         warn('Unsupported analysis mode: %r!' % (analysis,))
 187     if func.startswith('stress'):
 188       # We only use the last line of the function body for stress tests.
 189       scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 190     if verbose:
 191       print('Processing function: ' + func, file=sys.stderr)
 192       for l in scrubbed_body.splitlines():
 193         print('  ' + l, file=sys.stderr)
 194     for prefix in prefixes:
 195       if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
 196         if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
 197           func_dict[prefix][func].scrub = scrubbed_extra
 198           func_dict[prefix][func].args_and_sig = args_and_sig
 199           continue
 200         else:
 201           if prefix == prefixes[-1]:
 202             warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
 203           else:
 204             func_dict[prefix][func] = None
 205             continue
 206
 207       func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
 208
 209 ##### Generator of LLVM IR CHECK lines
 210
 211 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 212
 213 # Match things that look at identifiers, but only if they are followed by
 214 # spaces, commas, paren, or end of the string
 215 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
 216
 217 # Create a FileCheck variable name based on an IR name.
 218 def get_value_name(var):
 219   if var.isdigit():
 220     var = 'TMP' + var
 221   var = var.replace('.', '_')
 222   var = var.replace('-', '_')
 223   return var.upper()
 224
 225
 226 # Create a FileCheck variable from regex.
 227 def get_value_definition(var):
 228   return '[[' + get_value_name(var) + ':%.*]]'
 229
 230
 231 # Use a FileCheck variable.
 232 def get_value_use(var):
 233   return '[[' + get_value_name(var) + ']]'
 234
 235 # Replace IR value defs and uses with FileCheck variables.
 236 def genericize_check_lines(lines, is_analyze, vars_seen):
 237   # This gets called for each match that occurs in
 238   # a line. We transform variables we haven't seen
 239   # into defs, and variables we have seen into uses.
 240   def transform_line_vars(match):
 241     var = match.group(2)
 242     if var in vars_seen:
 243       rv = get_value_use(var)
 244     else:
 245       vars_seen.add(var)
 246       rv = get_value_definition(var)
 247     # re.sub replaces the entire regex match
 248     # with whatever you return, so we have
 249     # to make sure to hand it back everything
 250     # including the commas and spaces.
 251     return match.group(1) + rv + match.group(3)
 252
 253   lines_with_def = []
 254
 255   for i, line in enumerate(lines):
 256     # An IR variable named '%.' matches the FileCheck regex string.
 257     line = line.replace('%.', '%dot')
 258     # Ignore any comments, since the check lines will too.
 259     scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 260     if is_analyze:
 261       lines[i] = scrubbed_line
 262     else:
 263       lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
 264   return lines
 265
 266
 267 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
 268   # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
 269   prefix_blacklist = set()
 270   printed_prefixes = []
 271   for p in prefix_list:
 272     checkprefixes = p[0]
 273     # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
 274     # exist for this run line. A subset of the check prefixes might know about the function but only because
 275     # other run lines created it.
 276     if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
 277         prefix_blacklist |= set(checkprefixes)
 278         continue
 279
 280   # prefix_blacklist is constructed, we can now emit the output
 281   for p in prefix_list:
 282     checkprefixes = p[0]
 283     saved_output = None
 284     for checkprefix in checkprefixes:
 285       if checkprefix in printed_prefixes:
 286         break
 287
 288       # prefix is blacklisted. We remember the output as we might need it later but we will not emit anything for the prefix.
 289       if checkprefix in prefix_blacklist:
 290           if not saved_output and func_name in func_dict[checkprefix]:
 291               saved_output = func_dict[checkprefix][func_name]
 292           continue
 293
 294       # If we do not have output for this prefix but there is one saved, we go ahead with this prefix and the saved output.
 295       if not func_dict[checkprefix][func_name]:
 296         if not saved_output:
 297             continue
 298         func_dict[checkprefix][func_name] = saved_output
 299
 300       # Add some space between different check prefixes, but not after the last
 301       # check line (before the test code).
 302       if is_asm:
 303         if len(printed_prefixes) != 0:
 304           output_lines.append(comment_marker)
 305
 306       vars_seen = set()
 307       printed_prefixes.append(checkprefix)
 308       args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
 309       args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
 310       if '[[' in args_and_sig:
 311         output_lines.append(check_label_format % (checkprefix, func_name, ''))
 312         output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
 313       else:
 314         output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
 315       func_body = str(func_dict[checkprefix][func_name]).splitlines()
 316
 317       # For ASM output, just emit the check lines.
 318       if is_asm:
 319         output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 320         for func_line in func_body[1:]:
 321           if func_line.strip() == '':
 322             output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
 323           else:
 324             output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
 325         break
 326
 327       # For IR output, change all defs to FileCheck variables, so we're immune
 328       # to variable naming fashions.
 329       func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
 330
 331       # This could be selectively enabled with an optional invocation argument.
 332       # Disabled for now: better to check everything. Be safe rather than sorry.
 333
 334       # Handle the first line of the function body as a special case because
 335       # it's often just noise (a useless asm comment or entry label).
 336       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 337       #  is_blank_line = True
 338       #else:
 339       #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 340       #  is_blank_line = False
 341
 342       is_blank_line = False
 343
 344       for func_line in func_body:
 345         if func_line.strip() == '':
 346           is_blank_line = True
 347           continue
 348         # Do not waste time checking IR comments.
 349         func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
 350
 351         # Skip blank lines instead of checking them.
 352         if is_blank_line:
 353           output_lines.append('{} {}:       {}'.format(
 354               comment_marker, checkprefix, func_line))
 355         else:
 356           output_lines.append('{} {}-NEXT:  {}'.format(
 357               comment_marker, checkprefix, func_line))
 358         is_blank_line = False
 359
 360       # Add space between different check prefixes and also before the first
 361       # line of code in the test function.
 362       output_lines.append(comment_marker)
 363       break
 364
 365 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
 366                   func_name, preserve_names, function_sig):
 367   # Label format is based on IR string.
 368   function_def_regex = 'define {{[^@]+}}' if function_sig else ''
 369   check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
 370   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
 371              check_label_format, False, preserve_names)
 372
 373 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
 374   check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
 375   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
 376
 377
 378 def check_prefix(prefix):
 379   if not PREFIX_RE.match(prefix):
 380         hint = ""
 381         if ',' in prefix:
 382           hint = " Did you mean '--check-prefixes=" + prefix + "'?"
 383         warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
 384              (prefix))
 385
 386
 387 def verify_filecheck_prefixes(fc_cmd):
 388   fc_cmd_parts = fc_cmd.split()
 389   for part in fc_cmd_parts:
 390     if "check-prefix=" in part:
 391       prefix = part.split('=', 1)[1]
 392       check_prefix(prefix)
 393     elif "check-prefixes=" in part:
 394       prefixes = part.split('=', 1)[1].split(',')
 395       for prefix in prefixes:
 396         check_prefix(prefix)
 397         if prefixes.count(prefix) > 1:
 398           warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))