llvm/utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2
   3 import copy
   4 import glob
   5 import os
   6 import re
   7 import subprocess
   8 import sys
   9
  10 if sys.version_info[0] > 2:
  11   class string:
  12     expandtabs = str.expandtabs
  13 else:
  14   import string
  15
  16 ##### Common utilities for update_*test_checks.py
  17
  18
  19 _verbose = False
  20 _prefix_filecheck_ir_name = ''
  21
  22 def parse_commandline_args(parser):
  23   parser.add_argument('--include-generated-funcs', action='store_true',
  24                       help='Output checks for functions not in source')
  25   parser.add_argument('-v', '--verbose', action='store_true',
  26                       help='Show verbose output')
  27   parser.add_argument('-u', '--update-only', action='store_true',
  28                       help='Only update test if it was already autogened')
  29   parser.add_argument('--force-update', action='store_true',
  30                       help='Update test even if it was autogened by a different script')
  31   parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
  32                        help='Activate CHECK line generation from this point forward')
  33   parser.add_argument('--disable', action='store_false', dest='enabled',
  34                       help='Deactivate CHECK line generation from this point forward')
  35   parser.add_argument('--replace-value-regex', nargs='+', default=[],
  36                       help='List of regular expressions to replace matching value names')
  37   parser.add_argument('--prefix-filecheck-ir-name', default='',
  38                       help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
  39   parser.add_argument('--global-value-regex', nargs='+', default=[],
  40                       help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)')
  41   parser.add_argument('--global-hex-value-regex', nargs='+', default=[],
  42                       help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives')
  43   args = parser.parse_args()
  44   global _verbose, _global_value_regex, _global_hex_value_regex
  45   _verbose = args.verbose
  46   _global_value_regex = args.global_value_regex
  47   _global_hex_value_regex = args.global_hex_value_regex
  48   return args
  49
  50
  51 class InputLineInfo(object):
  52   def __init__(self, line, line_number, args, argv):
  53     self.line = line
  54     self.line_number = line_number
  55     self.args = args
  56     self.argv = argv
  57
  58
  59 class TestInfo(object):
  60   def __init__(self, test, parser, script_name, input_lines, args, argv,
  61                comment_prefix, argparse_callback):
  62     self.parser = parser
  63     self.argparse_callback = argparse_callback
  64     self.path = test
  65     self.args = args
  66     if args.prefix_filecheck_ir_name:
  67       global _prefix_filecheck_ir_name
  68       _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
  69     self.argv = argv
  70     self.input_lines = input_lines
  71     self.run_lines = find_run_lines(test, self.input_lines)
  72     self.comment_prefix = comment_prefix
  73     if self.comment_prefix is None:
  74       if self.path.endswith('.mir'):
  75         self.comment_prefix = '#'
  76       else:
  77         self.comment_prefix = ';'
  78     self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
  79     self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
  80     self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
  81
  82   def ro_iterlines(self):
  83     for line_num, input_line in enumerate(self.input_lines):
  84       args, argv = check_for_command(input_line, self.parser,
  85                                      self.args, self.argv, self.argparse_callback)
  86       yield InputLineInfo(input_line, line_num, args, argv)
  87
  88   def iterlines(self, output_lines):
  89     output_lines.append(self.test_autogenerated_note)
  90     for line_info in self.ro_iterlines():
  91       input_line = line_info.line
  92       # Discard any previous script advertising.
  93       if input_line.startswith(self.autogenerated_note_prefix):
  94         continue
  95       self.args = line_info.args
  96       self.argv = line_info.argv
  97       if not self.args.enabled:
  98         output_lines.append(input_line)
  99         continue
 100       yield line_info
 101
 102 def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
 103   for pattern in test_patterns:
 104     # On Windows we must expand the patterns ourselves.
 105     tests_list = glob.glob(pattern)
 106     if not tests_list:
 107       warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
 108       continue
 109     for test in tests_list:
 110       with open(test) as f:
 111         input_lines = [l.rstrip() for l in f]
 112       args = parser.parse_args()
 113       if argparse_callback is not None:
 114         argparse_callback(args)
 115       argv = sys.argv[:]
 116       first_line = input_lines[0] if input_lines else ""
 117       if UTC_ADVERT in first_line:
 118         if script_name not in first_line and not args.force_update:
 119           warn("Skipping test which wasn't autogenerated by " + script_name, test)
 120           continue
 121         args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
 122       elif args.update_only:
 123         assert UTC_ADVERT not in first_line
 124         warn("Skipping test which isn't autogenerated: " + test)
 125         continue
 126       yield TestInfo(test, parser, script_name, input_lines, args, argv,
 127                      comment_prefix, argparse_callback)
 128
 129
 130 def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'):
 131   # Skip any blank comment lines in the IR.
 132   if not skip_global_checks and input_line.strip() == comment_marker:
 133     return False
 134   # Skip a special double comment line we use as a separator.
 135   if input_line.strip() == comment_marker + SEPARATOR:
 136     return False
 137   # Skip any blank lines in the IR.
 138   #if input_line.strip() == '':
 139   #  return False
 140   # And skip any CHECK lines. We're building our own.
 141   m = CHECK_RE.match(input_line)
 142   if m and m.group(1) in prefix_set:
 143     if skip_global_checks:
 144       global_ir_value_re = re.compile('\[\[', flags=(re.M))
 145       return not global_ir_value_re.search(input_line)
 146     return False
 147
 148   return True
 149
 150 # Invoke the tool that is being tested.
 151 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
 152   with open(ir) as ir_file:
 153     # TODO Remove the str form which is used by update_test_checks.py and
 154     # update_llc_test_checks.py
 155     # The safer list form is used by update_cc_test_checks.py
 156     if preprocess_cmd:
 157       # Allow pre-processing the IR file (e.g. using sed):
 158       assert isinstance(preprocess_cmd, str)  # TODO: use a list instead of using shell
 159       preprocess_cmd = preprocess_cmd.replace('%s', ir).strip()
 160       if verbose:
 161         print('Pre-processing input file: ', ir, " with command '",
 162               preprocess_cmd, "'", sep="", file=sys.stderr)
 163       # Python 2.7 doesn't have subprocess.DEVNULL:
 164       with open(os.devnull, 'w') as devnull:
 165         pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull,
 166                               stdout=subprocess.PIPE)
 167         ir_file = pp.stdout
 168     if isinstance(cmd_args, list):
 169       stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
 170     else:
 171       stdout = subprocess.check_output(exe + ' ' + cmd_args,
 172                                        shell=True, stdin=ir_file)
 173     if sys.version_info[0] > 2:
 174       stdout = stdout.decode()
 175   # Fix line endings to unix CR style.
 176   return stdout.replace('\r\n', '\n')
 177
 178 ##### LLVM IR parser
 179 RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
 180 CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
 181 PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
 182 CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
 183
 184 UTC_ARGS_KEY = 'UTC_ARGS:'
 185 UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
 186 UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
 187
 188 OPT_FUNCTION_RE = re.compile(
 189     r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
 190     r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
 191     flags=(re.M | re.S))
 192
 193 ANALYZE_FUNCTION_RE = re.compile(
 194     r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
 195     r'\s*\n(?P<body>.*)$',
 196     flags=(re.X | re.S))
 197
 198 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 199 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 200 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
 201 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
 202
 203 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
 204 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
 205 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
 206 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
 207 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
 208 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
 209 SCRUB_LOOP_COMMENT_RE = re.compile(
 210     r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
 211 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
 212
 213 SEPARATOR = '.'
 214
 215 def error(msg, test_file=None):
 216   if test_file:
 217     msg = '{}: {}'.format(msg, test_file)
 218   print('ERROR: {}'.format(msg), file=sys.stderr)
 219
 220 def warn(msg, test_file=None):
 221   if test_file:
 222     msg = '{}: {}'.format(msg, test_file)
 223   print('WARNING: {}'.format(msg), file=sys.stderr)
 224
 225 def debug(*args, **kwargs):
 226   # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 227   if 'file' not in kwargs:
 228     kwargs['file'] = sys.stderr
 229   if _verbose:
 230     print(*args, **kwargs)
 231
 232 def find_run_lines(test, lines):
 233   debug('Scanning for RUN lines in test file:', test)
 234   raw_lines = [m.group(1)
 235                for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 236   run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 237   for l in raw_lines[1:]:
 238     if run_lines[-1].endswith('\\'):
 239       run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
 240     else:
 241       run_lines.append(l)
 242   debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
 243   for l in run_lines:
 244     debug('  RUN: {}'.format(l))
 245   return run_lines
 246
 247 def scrub_body(body):
 248   # Scrub runs of whitespace out of the assembly, but leave the leading
 249   # whitespace in place.
 250   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
 251   # Expand the tabs used for indentation.
 252   body = string.expandtabs(body, 2)
 253   # Strip trailing whitespace.
 254   body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
 255   return body
 256
 257 def do_scrub(body, scrubber, scrubber_args, extra):
 258   if scrubber_args:
 259     local_args = copy.deepcopy(scrubber_args)
 260     local_args[0].extra_scrub = extra
 261     return scrubber(body, *local_args)
 262   return scrubber(body, *scrubber_args)
 263
 264 # Build up a dictionary of all the function bodies.
 265 class function_body(object):
 266   def __init__(self, string, extra, args_and_sig, attrs):
 267     self.scrub = string
 268     self.extrascrub = extra
 269     self.args_and_sig = args_and_sig
 270     self.attrs = attrs
 271   def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_asm):
 272     arg_names = set()
 273     def drop_arg_names(match):
 274         arg_names.add(match.group(variable_group_in_ir_value_match))
 275         if match.group(attribute_group_in_ir_value_match):
 276             attr = match.group(attribute_group_in_ir_value_match)
 277         else:
 278             attr = ''
 279         return match.group(1) + attr + match.group(match.lastindex)
 280     def repl_arg_names(match):
 281         if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
 282             return match.group(1) + match.group(match.lastindex)
 283         return match.group(1) + match.group(2) + match.group(match.lastindex)
 284     if self.attrs != attrs:
 285       return False
 286     ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
 287     ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
 288     if ans0 != ans1:
 289         return False
 290     if is_asm:
 291         # Check without replacements, the replacements are not applied to the
 292         # body for asm checks.
 293         return self.extrascrub == extrascrub
 294
 295     es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
 296     es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
 297     es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
 298     es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
 299     return es0 == es1
 300
 301   def __str__(self):
 302     return self.scrub
 303
 304 class FunctionTestBuilder:
 305   def __init__(self, run_list, flags, scrubber_args, path):
 306     self._verbose = flags.verbose
 307     self._record_args = flags.function_signature
 308     self._check_attributes = flags.check_attributes
 309     self._scrubber_args = scrubber_args
 310     self._path = path
 311     # Strip double-quotes if input was read by UTC_ARGS
 312     self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex))
 313     self._func_dict = {}
 314     self._func_order = {}
 315     self._global_var_dict = {}
 316     for tuple in run_list:
 317       for prefix in tuple[0]:
 318         self._func_dict.update({prefix:dict()})
 319         self._func_order.update({prefix: []})
 320         self._global_var_dict.update({prefix:dict()})
 321
 322   def finish_and_get_func_dict(self):
 323     for prefix in self._get_failed_prefixes():
 324       warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,))
 325     return self._func_dict
 326
 327   def func_order(self):
 328     return self._func_order
 329
 330   def global_var_dict(self):
 331     return self._global_var_dict
 332
 333   def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_asm):
 334     build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
 335     for m in function_re.finditer(raw_tool_output):
 336       if not m:
 337         continue
 338       func = m.group('func')
 339       body = m.group('body')
 340       attrs = m.group('attrs') if self._check_attributes else ''
 341       # Determine if we print arguments, the opening brace, or nothing after the
 342       # function name
 343       if self._record_args and 'args_and_sig' in m.groupdict():
 344           args_and_sig = scrub_body(m.group('args_and_sig').strip())
 345       elif 'args_and_sig' in m.groupdict():
 346           args_and_sig = '('
 347       else:
 348           args_and_sig = ''
 349       scrubbed_body = do_scrub(body, scrubber, self._scrubber_args,
 350                                extra=False)
 351       scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args,
 352                                 extra=True)
 353       if 'analysis' in m.groupdict():
 354         analysis = m.group('analysis')
 355         if analysis.lower() != 'cost model analysis':
 356           warn('Unsupported analysis mode: %r!' % (analysis,))
 357       if func.startswith('stress'):
 358         # We only use the last line of the function body for stress tests.
 359         scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 360       if self._verbose:
 361         print('Processing function: ' + func, file=sys.stderr)
 362         for l in scrubbed_body.splitlines():
 363           print('  ' + l, file=sys.stderr)
 364       for prefix in prefixes:
 365         # Replace function names matching the regex.
 366         for regex in self._replace_value_regex:
 367           # Pattern that matches capture groups in the regex in leftmost order.
 368           group_regex = re.compile('\(.*?\)')
 369           # Replace function name with regex.
 370           match = re.match(regex, func)
 371           if match:
 372             func_repl = regex
 373             # Replace any capture groups with their matched strings.
 374             for g in match.groups():
 375               func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
 376             func = re.sub(func_repl, '{{' + func_repl + '}}', func)
 377
 378           # Replace all calls to regex matching functions.
 379           matches = re.finditer(regex, scrubbed_body)
 380           for match in matches:
 381             func_repl = regex
 382             # Replace any capture groups with their matched strings.
 383             for g in match.groups():
 384                 func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
 385             # Substitute function call names that match the regex with the same
 386             # capture groups set.
 387             scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}',
 388                                    scrubbed_body)
 389
 390         if func in self._func_dict[prefix]:
 391           if (self._func_dict[prefix][func] is None or
 392               str(self._func_dict[prefix][func]) != scrubbed_body or
 393               self._func_dict[prefix][func].args_and_sig != args_and_sig or
 394                   self._func_dict[prefix][func].attrs != attrs):
 395             if (self._func_dict[prefix][func] is not None and
 396                 self._func_dict[prefix][func].is_same_except_arg_names(
 397                 scrubbed_extra,
 398                 args_and_sig,
 399                 attrs,
 400                 is_asm)):
 401               self._func_dict[prefix][func].scrub = scrubbed_extra
 402               self._func_dict[prefix][func].args_and_sig = args_and_sig
 403               continue
 404             else:
 405               # This means a previous RUN line produced a body for this function
 406               # that is different from the one produced by this current RUN line,
 407               # so the body can't be common accross RUN lines. We use None to
 408               # indicate that.
 409               self._func_dict[prefix][func] = None
 410               continue
 411
 412         self._func_dict[prefix][func] = function_body(
 413             scrubbed_body, scrubbed_extra, args_and_sig, attrs)
 414         self._func_order[prefix].append(func)
 415
 416   def _get_failed_prefixes(self):
 417     # This returns the list of those prefixes that failed to match any function,
 418     # because there were conflicting bodies produced by different RUN lines, in
 419     # all instances of the prefix. Effectively, this prefix is unused and should
 420     # be removed.
 421     for prefix in self._func_dict:
 422       if (self._func_dict[prefix] and
 423           (not [fct for fct in self._func_dict[prefix]
 424                 if self._func_dict[prefix][fct] is not None])):
 425         yield prefix
 426
 427
 428 ##### Generator of LLVM IR CHECK lines
 429
 430 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 431
 432 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
 433
 434 class NamelessValue:
 435     def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
 436                  ir_regexp, global_ir_rhs_regexp, is_before_functions):
 437         self.check_prefix = check_prefix
 438         self.check_key = check_key
 439         self.ir_prefix = ir_prefix
 440         self.global_ir_prefix = global_ir_prefix
 441         self.global_ir_prefix_regexp = global_ir_prefix_regexp
 442         self.ir_regexp = ir_regexp
 443         self.global_ir_rhs_regexp = global_ir_rhs_regexp
 444         self.is_before_functions = is_before_functions
 445
 446 # Description of the different "unnamed" values we match in the IR, e.g.,
 447 # (local) ssa values, (debug) metadata, etc.
 448 nameless_values = [
 449     NamelessValue(r'TMP'  , '%' , r'%'           , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
 450     NamelessValue(r'ATTR' , '#' , r'#'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
 451     NamelessValue(r'ATTR' , '#' , None           , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
 452     NamelessValue(r'GLOB' , '@' , r'@'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
 453     NamelessValue(r'GLOB' , '@' , None           , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
 454     NamelessValue(r'DBG'  , '!' , r'!dbg '       , None            , None                   , r'![0-9]+'   , None                 , False) ,
 455     NamelessValue(r'PROF' , '!' , r'!prof '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
 456     NamelessValue(r'TBAA' , '!' , r'!tbaa '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
 457     NamelessValue(r'RNG'  , '!' , r'!range '     , None            , None                   , r'![0-9]+'   , None                 , False) ,
 458     NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
 459     NamelessValue(r'META' , '!' , r'metadata '   , None            , None                   , r'![0-9]+'   , None                 , False) ,
 460     NamelessValue(r'META' , '!' , None           , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
 461 ]
 462
 463 def createOrRegexp(old, new):
 464     if not old:
 465         return new
 466     if not new:
 467         return old
 468     return old + '|' + new
 469
 470 def createPrefixMatch(prefix_str, prefix_re):
 471     if prefix_str is None or prefix_re is None:
 472         return ''
 473     return '(?:' + prefix_str + '(' + prefix_re + '))'
 474
 475 # Build the regexp that matches an "IR value". This can be a local variable,
 476 # argument, global, or metadata, anything that is "named". It is important that
 477 # the PREFIX and SUFFIX below only contain a single group, if that changes
 478 # other locations will need adjustment as well.
 479 IR_VALUE_REGEXP_PREFIX = r'(\s*)'
 480 IR_VALUE_REGEXP_STRING = r''
 481 for nameless_value in nameless_values:
 482     lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
 483     glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
 484     assert((lcl_match or glb_match) and not (lcl_match and glb_match))
 485     if lcl_match:
 486         IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
 487     elif glb_match:
 488         IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
 489 IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
 490 IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
 491
 492 # The entire match is group 0, the prefix has one group (=1), the entire
 493 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
 494 first_nameless_group_in_ir_value_match = 3
 495
 496 # constants for the group id of special matches
 497 variable_group_in_ir_value_match = 3
 498 attribute_group_in_ir_value_match = 4
 499
 500 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local
 501 # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
 502 def get_idx_from_ir_value_match(match):
 503     for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
 504         if match.group(i) is not None:
 505             return i - first_nameless_group_in_ir_value_match
 506     error("Unable to identify the kind of IR value from the match!")
 507     return 0
 508
 509 # See get_idx_from_ir_value_match
 510 def get_name_from_ir_value_match(match):
 511     return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
 512
 513 # Return the nameless prefix we use for this kind or IR value, see also
 514 # get_idx_from_ir_value_match
 515 def get_nameless_check_prefix_from_ir_value_match(match):
 516     return nameless_values[get_idx_from_ir_value_match(match)].check_prefix
 517
 518 # Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals,
 519 # see also get_idx_from_ir_value_match
 520 def get_ir_prefix_from_ir_value_match(match):
 521     idx = get_idx_from_ir_value_match(match)
 522     if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
 523         return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix
 524     return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix
 525
 526 def get_check_key_from_ir_value_match(match):
 527     idx = get_idx_from_ir_value_match(match)
 528     return nameless_values[idx].check_key
 529
 530 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals,
 531 # see also get_idx_from_ir_value_match
 532 def get_ir_prefix_from_ir_value_re_match(match):
 533     # for backwards compatibility we check locals with '.*'
 534     if is_local_def_ir_value_match(match):
 535         return '.*'
 536     idx = get_idx_from_ir_value_match(match)
 537     if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
 538         return nameless_values[idx].ir_regexp
 539     return nameless_values[idx].global_ir_prefix_regexp
 540
 541 # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
 542 def is_local_def_ir_value_match(match):
 543     return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%'
 544
 545 # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
 546 def is_global_scope_ir_value_match(match):
 547     return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None
 548
 549 # Return true if var clashes with the scripted FileCheck check_prefix.
 550 def may_clash_with_default_check_prefix_name(check_prefix, var):
 551   return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
 552
 553 # Create a FileCheck variable name based on an IR name.
 554 def get_value_name(var, check_prefix):
 555   var = var.replace('!', '')
 556   # This is a nameless value, prepend check_prefix.
 557   if var.isdigit():
 558     var = check_prefix + var
 559   else:
 560     # This is a named value that clashes with the check_prefix, prepend with _prefix_filecheck_ir_name,
 561     # if it has been defined.
 562     if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
 563       var = _prefix_filecheck_ir_name + var
 564   var = var.replace('.', '_')
 565   var = var.replace('-', '_')
 566   return var.upper()
 567
 568 # Create a FileCheck variable from regex.
 569 def get_value_definition(var, match):
 570   # for backwards compatibility we check locals with '.*'
 571   if is_local_def_ir_value_match(match):
 572     return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \
 573             get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]'
 574   prefix = get_ir_prefix_from_ir_value_match(match)[0]
 575   return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]'
 576
 577 # Use a FileCheck variable.
 578 def get_value_use(var, match, check_prefix):
 579   if is_local_def_ir_value_match(match):
 580     return '[[' + get_value_name(var, check_prefix) + ']]'
 581   prefix = get_ir_prefix_from_ir_value_match(match)[0]
 582   return prefix + '[[' + get_value_name(var, check_prefix) + ']]'
 583
 584 # Replace IR value defs and uses with FileCheck variables.
 585 def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
 586   # This gets called for each match that occurs in
 587   # a line. We transform variables we haven't seen
 588   # into defs, and variables we have seen into uses.
 589   def transform_line_vars(match):
 590     pre, check = get_ir_prefix_from_ir_value_match(match)
 591     var = get_name_from_ir_value_match(match)
 592     for nameless_value in nameless_values:
 593         if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
 594           warn("Change IR value name '%s' or use -prefix-ir-filecheck-name to prevent possible conflict"
 595             " with scripted FileCheck name." % (var,))
 596     key = (var, get_check_key_from_ir_value_match(match))
 597     is_local_def = is_local_def_ir_value_match(match)
 598     if is_local_def and key in vars_seen:
 599       rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match))
 600     elif not is_local_def and key in global_vars_seen:
 601       rv = get_value_use(var, match, global_vars_seen[key])
 602     else:
 603       if is_local_def:
 604          vars_seen.add(key)
 605       else:
 606          global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match)
 607       rv = get_value_definition(var, match)
 608     # re.sub replaces the entire regex match
 609     # with whatever you return, so we have
 610     # to make sure to hand it back everything
 611     # including the commas and spaces.
 612     return match.group(1) + rv + match.group(match.lastindex)
 613
 614   lines_with_def = []
 615
 616   for i, line in enumerate(lines):
 617     # An IR variable named '%.' matches the FileCheck regex string.
 618     line = line.replace('%.', '%dot')
 619     for regex in _global_hex_value_regex:
 620       if re.match('^@' + regex + ' = ', line):
 621         line = re.sub(r'\bi([0-9]+) ([0-9]+)',
 622             lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]',
 623             line)
 624         break
 625     # Ignore any comments, since the check lines will too.
 626     scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 627     lines[i] = scrubbed_line
 628     if not is_analyze:
 629       # It can happen that two matches are back-to-back and for some reason sub
 630       # will not replace both of them. For now we work around this by
 631       # substituting until there is no more match.
 632       changed = True
 633       while changed:
 634           (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1)
 635   return lines
 636
 637
 638 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict):
 639   # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
 640   prefix_exclusions = set()
 641   printed_prefixes = []
 642   for p in prefix_list:
 643     checkprefixes = p[0]
 644     # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
 645     # exist for this run line. A subset of the check prefixes might know about the function but only because
 646     # other run lines created it.
 647     if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
 648         prefix_exclusions |= set(checkprefixes)
 649         continue
 650
 651   # prefix_exclusions is constructed, we can now emit the output
 652   for p in prefix_list:
 653     global_vars_seen = {}
 654     checkprefixes = p[0]
 655     for checkprefix in checkprefixes:
 656       if checkprefix in global_vars_seen_dict:
 657         global_vars_seen.update(global_vars_seen_dict[checkprefix])
 658       else:
 659         global_vars_seen_dict[checkprefix] = {}
 660       if checkprefix in printed_prefixes:
 661         break
 662
 663       # Check if the prefix is excluded.
 664       if checkprefix in prefix_exclusions:
 665         continue
 666
 667       # If we do not have output for this prefix we skip it.
 668       if not func_dict[checkprefix][func_name]:
 669         continue
 670
 671       # Add some space between different check prefixes, but not after the last
 672       # check line (before the test code).
 673       if is_asm:
 674         if len(printed_prefixes) != 0:
 675           output_lines.append(comment_marker)
 676
 677       if checkprefix not in global_vars_seen_dict:
 678           global_vars_seen_dict[checkprefix] = {}
 679
 680       global_vars_seen_before = [key for key in global_vars_seen.keys()]
 681
 682       vars_seen = set()
 683       printed_prefixes.append(checkprefix)
 684       attrs = str(func_dict[checkprefix][func_name].attrs)
 685       attrs = '' if attrs == 'None' else attrs
 686       if attrs:
 687         output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
 688       args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
 689       args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
 690       if '[[' in args_and_sig:
 691         output_lines.append(check_label_format % (checkprefix, func_name, ''))
 692         output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
 693       else:
 694         output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
 695       func_body = str(func_dict[checkprefix][func_name]).splitlines()
 696
 697       # For ASM output, just emit the check lines.
 698       if is_asm:
 699         output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 700         for func_line in func_body[1:]:
 701           if func_line.strip() == '':
 702             output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
 703           else:
 704             output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
 705         break
 706
 707       # For IR output, change all defs to FileCheck variables, so we're immune
 708       # to variable naming fashions.
 709       func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
 710
 711       # This could be selectively enabled with an optional invocation argument.
 712       # Disabled for now: better to check everything. Be safe rather than sorry.
 713
 714       # Handle the first line of the function body as a special case because
 715       # it's often just noise (a useless asm comment or entry label).
 716       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 717       #  is_blank_line = True
 718       #else:
 719       #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 720       #  is_blank_line = False
 721
 722       is_blank_line = False
 723
 724       for func_line in func_body:
 725         if func_line.strip() == '':
 726           is_blank_line = True
 727           continue
 728         # Do not waste time checking IR comments.
 729         func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
 730
 731         # Skip blank lines instead of checking them.
 732         if is_blank_line:
 733           output_lines.append('{} {}:       {}'.format(
 734               comment_marker, checkprefix, func_line))
 735         else:
 736           output_lines.append('{} {}-NEXT:  {}'.format(
 737               comment_marker, checkprefix, func_line))
 738         is_blank_line = False
 739
 740       # Add space between different check prefixes and also before the first
 741       # line of code in the test function.
 742       output_lines.append(comment_marker)
 743
 744       # Remembe new global variables we have not seen before
 745       for key in global_vars_seen:
 746           if key not in global_vars_seen_before:
 747               global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
 748       break
 749
 750 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
 751                   func_name, preserve_names, function_sig, global_vars_seen_dict):
 752   # Label format is based on IR string.
 753   function_def_regex = 'define {{[^@]+}}' if function_sig else ''
 754   check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
 755   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
 756              check_label_format, False, preserve_names, global_vars_seen_dict)
 757
 758 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
 759   check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
 760   global_vars_seen_dict = {}
 761   add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
 762              check_label_format, False, True, global_vars_seen_dict)
 763
 764 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
 765   for nameless_value in nameless_values:
 766     if nameless_value.global_ir_prefix is None:
 767       continue
 768
 769     lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
 770     rhs_re_str = nameless_value.global_ir_rhs_regexp
 771
 772     global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
 773     global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
 774     lines = []
 775     for m in global_ir_value_re.finditer(raw_tool_output):
 776         lines.append(m.group(0))
 777
 778     for prefix in prefixes:
 779       if glob_val_dict[prefix] is None:
 780         continue
 781       if nameless_value.check_prefix in glob_val_dict[prefix]:
 782         if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
 783           continue
 784         if prefix == prefixes[-1]:
 785           warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
 786         else:
 787           glob_val_dict[prefix][nameless_value.check_prefix] = None
 788           continue
 789       glob_val_dict[prefix][nameless_value.check_prefix] = lines
 790
 791 def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
 792   printed_prefixes = set()
 793   for nameless_value in nameless_values:
 794     if nameless_value.global_ir_prefix is None:
 795         continue
 796     if nameless_value.is_before_functions != is_before_functions:
 797         continue
 798     for p in prefix_list:
 799       global_vars_seen = {}
 800       checkprefixes = p[0]
 801       if checkprefixes is None:
 802         continue
 803       for checkprefix in checkprefixes:
 804         if checkprefix in global_vars_seen_dict:
 805             global_vars_seen.update(global_vars_seen_dict[checkprefix])
 806         else:
 807             global_vars_seen_dict[checkprefix] = {}
 808         if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
 809           break
 810         if not glob_val_dict[checkprefix]:
 811           continue
 812         if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
 813           continue
 814         if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
 815           continue
 816
 817         check_lines = []
 818         global_vars_seen_before = [key for key in global_vars_seen.keys()]
 819         for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
 820           if _global_value_regex:
 821             matched = False
 822             for regex in _global_value_regex:
 823               if re.match('^@' + regex + ' = ', line):
 824                 matched = True
 825                 break
 826             if not matched:
 827               continue
 828           tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
 829           check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
 830           check_lines.append(check_line)
 831         if not check_lines:
 832           continue
 833
 834         output_lines.append(comment_marker + SEPARATOR)
 835         for check_line in check_lines:
 836           output_lines.append(check_line)
 837
 838         printed_prefixes.add((checkprefix, nameless_value.check_prefix))
 839
 840         # Remembe new global variables we have not seen before
 841         for key in global_vars_seen:
 842             if key not in global_vars_seen_before:
 843                 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
 844         break
 845
 846   if printed_prefixes:
 847       output_lines.append(comment_marker + SEPARATOR)
 848
 849
 850 def check_prefix(prefix):
 851   if not PREFIX_RE.match(prefix):
 852         hint = ""
 853         if ',' in prefix:
 854           hint = " Did you mean '--check-prefixes=" + prefix + "'?"
 855         warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
 856              (prefix))
 857
 858
 859 def verify_filecheck_prefixes(fc_cmd):
 860   fc_cmd_parts = fc_cmd.split()
 861   for part in fc_cmd_parts:
 862     if "check-prefix=" in part:
 863       prefix = part.split('=', 1)[1]
 864       check_prefix(prefix)
 865     elif "check-prefixes=" in part:
 866       prefixes = part.split('=', 1)[1].split(',')
 867       for prefix in prefixes:
 868         check_prefix(prefix)
 869         if prefixes.count(prefix) > 1:
 870           warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
 871
 872
 873 def get_autogennote_suffix(parser, args):
 874   autogenerated_note_args = ''
 875   for action in parser._actions:
 876     if not hasattr(args, action.dest):
 877       continue  # Ignore options such as --help that aren't included in args
 878     # Ignore parameters such as paths to the binary or the list of tests
 879     if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
 880                        'clang', 'opt', 'llvm_bin', 'verbose'):
 881       continue
 882     value = getattr(args, action.dest)
 883     if action.const is not None:  # action stores a constant (usually True/False)
 884       # Skip actions with different constant values (this happens with boolean
 885       # --foo/--no-foo options)
 886       if value != action.const:
 887         continue
 888     if parser.get_default(action.dest) == value:
 889       continue  # Don't add default values
 890     autogenerated_note_args += action.option_strings[0] + ' '
 891     if action.const is None:  # action takes a parameter
 892       if action.nargs == '+':
 893         value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
 894       autogenerated_note_args += '%s ' % value
 895   if autogenerated_note_args:
 896     autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
 897   return autogenerated_note_args
 898
 899
 900 def check_for_command(line, parser, args, argv, argparse_callback):
 901     cmd_m = UTC_ARGS_CMD.match(line)
 902     if cmd_m:
 903         for option in cmd_m.group('cmd').strip().split(' '):
 904             if option:
 905                 argv.append(option)
 906         args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
 907         if argparse_callback is not None:
 908           argparse_callback(args)
 909     return args, argv
 910
 911 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
 912   result = get_arg_to_check(test_info.args)
 913   if not result and is_global:
 914     # See if this has been specified via UTC_ARGS.  This is a "global" option
 915     # that affects the entire generation of test checks.  If it exists anywhere
 916     # in the test, apply it to everything.
 917     saw_line = False
 918     for line_info in test_info.ro_iterlines():
 919       line = line_info.line
 920       if not line.startswith(';') and line.strip() != '':
 921         saw_line = True
 922       result = get_arg_to_check(line_info.args)
 923       if result:
 924         if warn and saw_line:
 925           # We saw the option after already reading some test input lines.
 926           # Warn about it.
 927           print('WARNING: Found {} in line following test start: '.format(arg_string)
 928                 + line, file=sys.stderr)
 929           print('WARNING: Consider moving {} to top of file'.format(arg_string),
 930                 file=sys.stderr)
 931         break
 932   return result
 933
 934 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
 935   for input_line_info in test_info.iterlines(output_lines):
 936     line = input_line_info.line
 937     args = input_line_info.args
 938     if line.strip() == comment_string:
 939       continue
 940     if line.strip() == comment_string + SEPARATOR:
 941       continue
 942     if line.lstrip().startswith(comment_string):
 943       m = CHECK_RE.match(line)
 944       if m and m.group(1) in prefix_set:
 945         continue
 946     output_lines.append(line.rstrip('\n'))
 947
 948 def add_checks_at_end(output_lines, prefix_list, func_order,
 949                       comment_string, check_generator):
 950   added = set()
 951   for prefix in prefix_list:
 952     prefixes = prefix[0]
 953     tool_args = prefix[1]
 954     for prefix in prefixes:
 955       for func in func_order[prefix]:
 956         if added:
 957           output_lines.append(comment_string)
 958         added.add(func)
 959
 960         # The add_*_checks routines expect a run list whose items are
 961         # tuples that have a list of prefixes as their first element and
 962         # tool command args string as their second element.  They output
 963         # checks for each prefix in the list of prefixes.  By doing so, it
 964         # implicitly assumes that for each function every run line will
 965         # generate something for that function.  That is not the case for
 966         # generated functions as some run lines might not generate them
 967         # (e.g. -fopenmp vs. no -fopenmp).
 968         #
 969         # Therefore, pass just the prefix we're interested in.  This has
 970         # the effect of generating all of the checks for functions of a
 971         # single prefix before moving on to the next prefix.  So checks
 972         # are ordered by prefix instead of by function as in "normal"
 973         # mode.
 974         check_generator(output_lines,
 975                         [([prefix], tool_args)],
 976                         func)