utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2 import re
   3 import string
   4 import subprocess
   5 import sys
   6 import copy
   7
   8 if sys.version_info[0] > 2:
   9   class string:
  10     expandtabs = str.expandtabs
  11 else:
  12   import string
  13
  14 ##### Common utilities for update_*test_checks.py
  15
  16 def should_add_line_to_output(input_line, prefix_set):
  17   # Skip any blank comment lines in the IR.
  18   if input_line.strip() == ';':
  19     return False
  20   # Skip any blank lines in the IR.
  21   #if input_line.strip() == '':
  22   #  return False
  23   # And skip any CHECK lines. We're building our own.
  24   m = CHECK_RE.match(input_line)
  25   if m and m.group(1) in prefix_set:
  26     return False
  27
  28   return True
  29
  30 # Invoke the tool that is being tested.
  31 def invoke_tool(exe, cmd_args, ir):
  32   with open(ir) as ir_file:
  33     # TODO Remove the str form which is used by update_test_checks.py and
  34     # update_llc_test_checks.py
  35     # The safer list form is used by update_cc_test_checks.py
  36     if isinstance(cmd_args, list):
  37       stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
  38     else:
  39       stdout = subprocess.check_output(exe + ' ' + cmd_args,
  40                                        shell=True, stdin=ir_file)
  41     if sys.version_info[0] > 2:
  42       stdout = stdout.decode()
  43   # Fix line endings to unix CR style.
  44   return stdout.replace('\r\n', '\n')
  45
  46 ##### LLVM IR parser
  47
  48 RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
  49 CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
  50 PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
  51 CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
  52
  53 OPT_FUNCTION_RE = re.compile(
  54     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
  55     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
  56     flags=(re.M | re.S))
  57
  58 ANALYZE_FUNCTION_RE = re.compile(
  59     r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
  60     r'\s*\n(?P<body>.*)$',
  61     flags=(re.X | re.S))
  62
  63 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
  64 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
  65 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
  66 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
  67
  68 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
  69 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
  70 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
  71 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
  72 SCRUB_LOOP_COMMENT_RE = re.compile(
  73     r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
  74
  75 def scrub_body(body):
  76   # Scrub runs of whitespace out of the assembly, but leave the leading
  77   # whitespace in place.
  78   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
  79   # Expand the tabs used for indentation.
  80   body = string.expandtabs(body, 2)
  81   # Strip trailing whitespace.
  82   body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
  83   return body
  84
  85 def do_scrub(body, scrubber, scrubber_args, extra):
  86   if scrubber_args:
  87     local_args = copy.deepcopy(scrubber_args)
  88     local_args[0].extra_scrub = extra
  89     return scrubber(body, *local_args)
  90   return scrubber(body, *scrubber_args)
  91
  92 # Build up a dictionary of all the function bodies.
  93 class function_body(object):
  94   def __init__(self, string, extra):
  95     self.scrub = string
  96     self.extrascrub = extra
  97   def __str__(self):
  98     return self.scrub
  99
 100 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
 101   for m in function_re.finditer(raw_tool_output):
 102     if not m:
 103       continue
 104     func = m.group('func')
 105     body = m.group('body')
 106     scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
 107     scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
 108     if 'analysis' in m.groupdict():
 109       analysis = m.group('analysis')
 110       if analysis.lower() != 'cost model analysis':
 111         print('WARNING: Unsupported analysis mode: %r!' % (analysis,), file=sys.stderr)
 112     if func.startswith('stress'):
 113       # We only use the last line of the function body for stress tests.
 114       scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 115     if verbose:
 116       print('Processing function: ' + func, file=sys.stderr)
 117       for l in scrubbed_body.splitlines():
 118         print('  ' + l, file=sys.stderr)
 119     for prefix in prefixes:
 120       if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body:
 121         if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra:
 122           func_dict[prefix][func].scrub = scrubbed_extra
 123           continue
 124         else:
 125           if prefix == prefixes[-1]:
 126             print('WARNING: Found conflicting asm under the '
 127                                  'same prefix: %r!' % (prefix,), file=sys.stderr)
 128           else:
 129             func_dict[prefix][func] = None
 130             continue
 131
 132       func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)
 133
 134 ##### Generator of LLVM IR CHECK lines
 135
 136 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 137
 138 # Match things that look at identifiers, but only if they are followed by
 139 # spaces, commas, paren, or end of the string
 140 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
 141
 142 # Create a FileCheck variable name based on an IR name.
 143 def get_value_name(var):
 144   if var.isdigit():
 145     var = 'TMP' + var
 146   var = var.replace('.', '_')
 147   var = var.replace('-', '_')
 148   return var.upper()
 149
 150
 151 # Create a FileCheck variable from regex.
 152 def get_value_definition(var):
 153   return '[[' + get_value_name(var) + ':%.*]]'
 154
 155
 156 # Use a FileCheck variable.
 157 def get_value_use(var):
 158   return '[[' + get_value_name(var) + ']]'
 159
 160 # Replace IR value defs and uses with FileCheck variables.
 161 def genericize_check_lines(lines, is_analyze):
 162   # This gets called for each match that occurs in
 163   # a line. We transform variables we haven't seen
 164   # into defs, and variables we have seen into uses.
 165   def transform_line_vars(match):
 166     var = match.group(2)
 167     if var in vars_seen:
 168       rv = get_value_use(var)
 169     else:
 170       vars_seen.add(var)
 171       rv = get_value_definition(var)
 172     # re.sub replaces the entire regex match
 173     # with whatever you return, so we have
 174     # to make sure to hand it back everything
 175     # including the commas and spaces.
 176     return match.group(1) + rv + match.group(3)
 177
 178   vars_seen = set()
 179   lines_with_def = []
 180
 181   for i, line in enumerate(lines):
 182     # An IR variable named '%.' matches the FileCheck regex string.
 183     line = line.replace('%.', '%dot')
 184     # Ignore any comments, since the check lines will too.
 185     scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 186     if is_analyze:
 187       lines[i] = scrubbed_line
 188     else:
 189       lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
 190   return lines
 191
 192
 193 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
 194   printed_prefixes = []
 195   for p in prefix_list:
 196     checkprefixes = p[0]
 197     for checkprefix in checkprefixes:
 198       if checkprefix in printed_prefixes:
 199         break
 200       # TODO func_dict[checkprefix] may be None, '' or not exist.
 201       # Fix the call sites.
 202       if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]:
 203         continue
 204
 205       # Add some space between different check prefixes, but not after the last
 206       # check line (before the test code).
 207       if is_asm:
 208         if len(printed_prefixes) != 0:
 209           output_lines.append(comment_marker)
 210
 211       printed_prefixes.append(checkprefix)
 212       output_lines.append(check_label_format % (checkprefix, func_name))
 213       func_body = str(func_dict[checkprefix][func_name]).splitlines()
 214
 215       # For ASM output, just emit the check lines.
 216       if is_asm:
 217         output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 218         for func_line in func_body[1:]:
 219           output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
 220         break
 221
 222       # For IR output, change all defs to FileCheck variables, so we're immune
 223       # to variable naming fashions.
 224       func_body = genericize_check_lines(func_body, is_analyze)
 225
 226       # This could be selectively enabled with an optional invocation argument.
 227       # Disabled for now: better to check everything. Be safe rather than sorry.
 228
 229       # Handle the first line of the function body as a special case because
 230       # it's often just noise (a useless asm comment or entry label).
 231       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 232       #  is_blank_line = True
 233       #else:
 234       #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 235       #  is_blank_line = False
 236
 237       is_blank_line = False
 238
 239       for func_line in func_body:
 240         if func_line.strip() == '':
 241           is_blank_line = True
 242           continue
 243         # Do not waste time checking IR comments.
 244         func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
 245
 246         # Skip blank lines instead of checking them.
 247         if is_blank_line:
 248           output_lines.append('{} {}:       {}'.format(
 249               comment_marker, checkprefix, func_line))
 250         else:
 251           output_lines.append('{} {}-NEXT:  {}'.format(
 252               comment_marker, checkprefix, func_line))
 253         is_blank_line = False
 254
 255       # Add space between different check prefixes and also before the first
 256       # line of code in the test function.
 257       output_lines.append(comment_marker)
 258       break
 259
 260 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
 261   # Label format is based on IR string.
 262   check_label_format = '{} %s-LABEL: @%s('.format(comment_marker)
 263   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, False)
 264
 265 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
 266   check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker)
 267   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
 268
 269
 270 def check_prefix(prefix):
 271   if not PREFIX_RE.match(prefix):
 272         hint = ""
 273         if ',' in prefix:
 274           hint = " Did you mean '--check-prefixes=" + prefix + "'?"
 275         print(("WARNING: Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
 276               (prefix), file=sys.stderr)
 277
 278
 279 def verify_filecheck_prefixes(fc_cmd):
 280   fc_cmd_parts = fc_cmd.split()
 281   for part in fc_cmd_parts:
 282     if "check-prefix=" in part:
 283       prefix = part.split('=', 1)[1]
 284       check_prefix(prefix)
 285     elif "check-prefixes=" in part:
 286       prefixes = part.split('=', 1)[1].split(',')
 287       for prefix in prefixes:
 288         check_prefix(prefix)
 289         if prefixes.count(prefix) > 1:
 290           print("WARNING: Supplied prefix '%s' is not unique in the prefix list." %
 291                 (prefix,), file=sys.stderr)