llvm/utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2
   3 import argparse
   4 import copy
   5 import glob
   6 import itertools
   7 import os
   8 import re
   9 import subprocess
  10 import sys
  11 import shlex
  12
  13 from typing import List
  14
  15 ##### Common utilities for update_*test_checks.py
  16
  17
  18 _verbose = False
  19 _prefix_filecheck_ir_name = ''
  20
  21 class Regex(object):
  22   """Wrap a compiled regular expression object to allow deep copy of a regexp.
  23   This is required for the deep copy done in do_scrub.
  24
  25   """
  26   def __init__(self, regex):
  27     self.regex = regex
  28
  29   def __deepcopy__(self, memo):
  30     result = copy.copy(self)
  31     result.regex = self.regex
  32     return result
  33
  34   def search(self, line):
  35     return self.regex.search(line)
  36
  37   def sub(self, repl, line):
  38     return self.regex.sub(repl, line)
  39
  40   def pattern(self):
  41     return self.regex.pattern
  42
  43   def flags(self):
  44     return self.regex.flags
  45
  46 class Filter(Regex):
  47   """Augment a Regex object with a flag indicating whether a match should be
  48     added (!is_filter_out) or removed (is_filter_out) from the generated checks.
  49
  50   """
  51   def __init__(self, regex, is_filter_out):
  52     super(Filter, self).__init__(regex)
  53     self.is_filter_out = is_filter_out
  54
  55   def __deepcopy__(self, memo):
  56     result = copy.deepcopy(super(Filter, self), memo)
  57     result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
  58     return result
  59
  60 def parse_commandline_args(parser):
  61   class RegexAction(argparse.Action):
  62     """Add a regular expression option value to a list of regular expressions.
  63     This compiles the expression, wraps it in a Regex and adds it to the option
  64     value list."""
  65     def __init__(self, option_strings, dest, nargs=None, **kwargs):
  66       if nargs is not None:
  67         raise ValueError('nargs not allowed')
  68       super(RegexAction, self).__init__(option_strings, dest, **kwargs)
  69
  70     def do_call(self, namespace, values, flags):
  71       value_list = getattr(namespace, self.dest)
  72       if value_list is None:
  73         value_list = []
  74
  75       try:
  76         value_list.append(Regex(re.compile(values, flags)))
  77       except re.error as error:
  78         raise ValueError('{}: Invalid regular expression \'{}\' ({})'.format(
  79           option_string, error.pattern, error.msg))
  80
  81       setattr(namespace, self.dest, value_list)
  82
  83     def __call__(self, parser, namespace, values, option_string=None):
  84       self.do_call(namespace, values, 0)
  85
  86   class FilterAction(RegexAction):
  87     """Add a filter to a list of filter option values."""
  88     def __init__(self, option_strings, dest, nargs=None, **kwargs):
  89       super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
  90
  91     def __call__(self, parser, namespace, values, option_string=None):
  92       super(FilterAction, self).__call__(parser, namespace, values, option_string)
  93
  94       value_list = getattr(namespace, self.dest)
  95
  96       is_filter_out = ( option_string == '--filter-out' )
  97
  98       value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
  99
 100       setattr(namespace, self.dest, value_list)
 101
 102   filter_group = parser.add_argument_group(
 103     'filtering',
 104     """Filters are applied to each output line according to the order given. The
 105     first matching filter terminates filter processing for that current line.""")
 106
 107   filter_group.add_argument('--filter', action=FilterAction, dest='filters',
 108                             metavar='REGEX',
 109                             help='Only include lines matching REGEX (may be specified multiple times)')
 110   filter_group.add_argument('--filter-out', action=FilterAction, dest='filters',
 111                             metavar='REGEX',
 112                             help='Exclude lines matching REGEX')
 113
 114   parser.add_argument('--include-generated-funcs', action='store_true',
 115                       help='Output checks for functions not in source')
 116   parser.add_argument('-v', '--verbose', action='store_true',
 117                       help='Show verbose output')
 118   parser.add_argument('-u', '--update-only', action='store_true',
 119                       help='Only update test if it was already autogened')
 120   parser.add_argument('--force-update', action='store_true',
 121                       help='Update test even if it was autogened by a different script')
 122   parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
 123                        help='Activate CHECK line generation from this point forward')
 124   parser.add_argument('--disable', action='store_false', dest='enabled',
 125                       help='Deactivate CHECK line generation from this point forward')
 126   parser.add_argument('--replace-value-regex', nargs='+', default=[],
 127                       help='List of regular expressions to replace matching value names')
 128   parser.add_argument('--prefix-filecheck-ir-name', default='',
 129                       help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
 130   parser.add_argument('--global-value-regex', nargs='+', default=[],
 131                       help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)')
 132   parser.add_argument('--global-hex-value-regex', nargs='+', default=[],
 133                       help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives')
 134   # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
 135   # we need to rename the flag to just -generate-body-for-unused-prefixes.
 136   parser.add_argument('--no-generate-body-for-unused-prefixes',
 137                       action='store_false',
 138                       dest='gen_unused_prefix_body',
 139                       default=True,
 140                       help='Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.')
 141   args = parser.parse_args()
 142   global _verbose, _global_value_regex, _global_hex_value_regex
 143   _verbose = args.verbose
 144   _global_value_regex = args.global_value_regex
 145   _global_hex_value_regex = args.global_hex_value_regex
 146   return args
 147
 148
 149 class InputLineInfo(object):
 150   def __init__(self, line, line_number, args, argv):
 151     self.line = line
 152     self.line_number = line_number
 153     self.args = args
 154     self.argv = argv
 155
 156
 157 class TestInfo(object):
 158   def __init__(self, test, parser, script_name, input_lines, args, argv,
 159                comment_prefix, argparse_callback):
 160     self.parser = parser
 161     self.argparse_callback = argparse_callback
 162     self.path = test
 163     self.args = args
 164     if args.prefix_filecheck_ir_name:
 165       global _prefix_filecheck_ir_name
 166       _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
 167     self.argv = argv
 168     self.input_lines = input_lines
 169     self.run_lines = find_run_lines(test, self.input_lines)
 170     self.comment_prefix = comment_prefix
 171     if self.comment_prefix is None:
 172       if self.path.endswith('.mir'):
 173         self.comment_prefix = '#'
 174       else:
 175         self.comment_prefix = ';'
 176     self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
 177     self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
 178     self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
 179     self.test_unused_note = self.comment_prefix + self.comment_prefix + ' ' + UNUSED_NOTE
 180
 181   def ro_iterlines(self):
 182     for line_num, input_line in enumerate(self.input_lines):
 183       args, argv = check_for_command(input_line, self.parser,
 184                                      self.args, self.argv, self.argparse_callback)
 185       yield InputLineInfo(input_line, line_num, args, argv)
 186
 187   def iterlines(self, output_lines):
 188     output_lines.append(self.test_autogenerated_note)
 189     for line_info in self.ro_iterlines():
 190       input_line = line_info.line
 191       # Discard any previous script advertising.
 192       if input_line.startswith(self.autogenerated_note_prefix):
 193         continue
 194       self.args = line_info.args
 195       self.argv = line_info.argv
 196       if not self.args.enabled:
 197         output_lines.append(input_line)
 198         continue
 199       yield line_info
 200
 201   def get_checks_for_unused_prefixes(self, run_list, used_prefixes: List[str]) -> List[str]:
 202     unused_prefixes = set(
 203         [prefix for sublist in run_list for prefix in sublist[0]]).difference(set(used_prefixes))
 204
 205     ret = []
 206     if not unused_prefixes:
 207       return ret
 208     ret.append(self.test_unused_note)
 209     for unused in sorted(unused_prefixes):
 210       ret.append('{comment} {prefix}: {match_everything}'.format(
 211         comment=self.comment_prefix,
 212         prefix=unused,
 213         match_everything=r"""{{.*}}"""
 214       ))
 215     return ret
 216
 217 def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
 218   for pattern in test_patterns:
 219     # On Windows we must expand the patterns ourselves.
 220     tests_list = glob.glob(pattern)
 221     if not tests_list:
 222       warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
 223       continue
 224     for test in tests_list:
 225       with open(test) as f:
 226         input_lines = [l.rstrip() for l in f]
 227       args = parser.parse_args()
 228       if argparse_callback is not None:
 229         argparse_callback(args)
 230       argv = sys.argv[:]
 231       first_line = input_lines[0] if input_lines else ""
 232       if UTC_ADVERT in first_line:
 233         if script_name not in first_line and not args.force_update:
 234           warn("Skipping test which wasn't autogenerated by " + script_name, test)
 235           continue
 236         args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
 237       elif args.update_only:
 238         assert UTC_ADVERT not in first_line
 239         warn("Skipping test which isn't autogenerated: " + test)
 240         continue
 241       final_input_lines = []
 242       for l in input_lines:
 243         if UNUSED_NOTE in l:
 244           break
 245         final_input_lines.append(l)
 246       yield TestInfo(test, parser, script_name, final_input_lines, args, argv,
 247                      comment_prefix, argparse_callback)
 248
 249
 250 def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'):
 251   # Skip any blank comment lines in the IR.
 252   if not skip_global_checks and input_line.strip() == comment_marker:
 253     return False
 254   # Skip a special double comment line we use as a separator.
 255   if input_line.strip() == comment_marker + SEPARATOR:
 256     return False
 257   # Skip any blank lines in the IR.
 258   #if input_line.strip() == '':
 259   #  return False
 260   # And skip any CHECK lines. We're building our own.
 261   m = CHECK_RE.match(input_line)
 262   if m and m.group(1) in prefix_set:
 263     if skip_global_checks:
 264       global_ir_value_re = re.compile(r'\[\[', flags=(re.M))
 265       return not global_ir_value_re.search(input_line)
 266     return False
 267
 268   return True
 269
 270 # Perform lit-like substitutions
 271 def getSubstitutions(sourcepath):
 272   sourcedir = os.path.dirname(sourcepath)
 273   return [('%s', sourcepath),
 274           ('%S', sourcedir),
 275           ('%p', sourcedir),
 276           ('%{pathsep}', os.pathsep)]
 277
 278 def applySubstitutions(s, substitutions):
 279   for a,b in substitutions:
 280     s = s.replace(a, b)
 281   return s
 282
 283 # Invoke the tool that is being tested.
 284 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
 285   with open(ir) as ir_file:
 286     substitutions = getSubstitutions(ir)
 287
 288     # TODO Remove the str form which is used by update_test_checks.py and
 289     # update_llc_test_checks.py
 290     # The safer list form is used by update_cc_test_checks.py
 291     if preprocess_cmd:
 292       # Allow pre-processing the IR file (e.g. using sed):
 293       assert isinstance(preprocess_cmd, str)  # TODO: use a list instead of using shell
 294       preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
 295       if verbose:
 296         print('Pre-processing input file: ', ir, " with command '",
 297               preprocess_cmd, "'", sep="", file=sys.stderr)
 298       # Python 2.7 doesn't have subprocess.DEVNULL:
 299       with open(os.devnull, 'w') as devnull:
 300         pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull,
 301                               stdout=subprocess.PIPE)
 302         ir_file = pp.stdout
 303
 304     if isinstance(cmd_args, list):
 305       args = [applySubstitutions(a, substitutions) for a in cmd_args]
 306       stdout = subprocess.check_output([exe] + args, stdin=ir_file)
 307     else:
 308       stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions),
 309                                        shell=True, stdin=ir_file)
 310     if sys.version_info[0] > 2:
 311       # FYI, if you crashed here with a decode error, your run line probably
 312       # results in bitcode or other binary format being written to the pipe.
 313       # For an opt test, you probably want to add -S or -disable-output.
 314       stdout = stdout.decode()
 315   # Fix line endings to unix CR style.
 316   return stdout.replace('\r\n', '\n')
 317
 318 ##### LLVM IR parser
 319 RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
 320 CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
 321 PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
 322 CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
 323
 324 UTC_ARGS_KEY = 'UTC_ARGS:'
 325 UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
 326 UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
 327 UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:'
 328
 329 OPT_FUNCTION_RE = re.compile(
 330     r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
 331     r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
 332     flags=(re.M | re.S))
 333
 334 ANALYZE_FUNCTION_RE = re.compile(
 335     r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
 336     r'\s*\n(?P<body>.*)$',
 337     flags=(re.X | re.S))
 338
 339 LV_DEBUG_RE = re.compile(
 340     r'^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*'
 341     r'\s*\n(?P<body>.*)$',
 342     flags=(re.X | re.S))
 343
 344 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 345 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 346 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
 347 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
 348 DEBUG_ONLY_ARG_RE = re.compile(r'-debug-only[= ]([^ ]+)')
 349
 350 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
 351 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
 352 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
 353 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
 354 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
 355 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
 356 SCRUB_LOOP_COMMENT_RE = re.compile(
 357     r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
 358 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
 359
 360 SEPARATOR = '.'
 361
 362 def error(msg, test_file=None):
 363   if test_file:
 364     msg = '{}: {}'.format(msg, test_file)
 365   print('ERROR: {}'.format(msg), file=sys.stderr)
 366
 367 def warn(msg, test_file=None):
 368   if test_file:
 369     msg = '{}: {}'.format(msg, test_file)
 370   print('WARNING: {}'.format(msg), file=sys.stderr)
 371
 372 def debug(*args, **kwargs):
 373   # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 374   if 'file' not in kwargs:
 375     kwargs['file'] = sys.stderr
 376   if _verbose:
 377     print(*args, **kwargs)
 378
 379 def find_run_lines(test, lines):
 380   debug('Scanning for RUN lines in test file:', test)
 381   raw_lines = [m.group(1)
 382                for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 383   run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 384   for l in raw_lines[1:]:
 385     if run_lines[-1].endswith('\\'):
 386       run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
 387     else:
 388       run_lines.append(l)
 389   debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
 390   for l in run_lines:
 391     debug('  RUN: {}'.format(l))
 392   return run_lines
 393
 394 def get_triple_from_march(march):
 395   triples = {
 396       'amdgcn': 'amdgcn',
 397       'r600': 'r600',
 398       'mips': 'mips',
 399       'sparc': 'sparc',
 400       'hexagon': 'hexagon',
 401       've': 've',
 402   }
 403   for prefix, triple in triples.items():
 404     if march.startswith(prefix):
 405       return triple
 406   print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
 407   return 'x86'
 408
 409 def apply_filters(line, filters):
 410   has_filter = False
 411   for f in filters:
 412     if not f.is_filter_out:
 413       has_filter = True
 414     if f.search(line):
 415       return False if f.is_filter_out else True
 416   # If we only used filter-out, keep the line, otherwise discard it since no
 417   # filter matched.
 418   return False if has_filter else True
 419
 420 def do_filter(body, filters):
 421   return body if not filters else '\n'.join(filter(
 422     lambda line: apply_filters(line, filters), body.splitlines()))
 423
 424 def scrub_body(body):
 425   # Scrub runs of whitespace out of the assembly, but leave the leading
 426   # whitespace in place.
 427   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
 428   # Expand the tabs used for indentation.
 429   body = str.expandtabs(body, 2)
 430   # Strip trailing whitespace.
 431   body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
 432   return body
 433
 434 def do_scrub(body, scrubber, scrubber_args, extra):
 435   if scrubber_args:
 436     local_args = copy.deepcopy(scrubber_args)
 437     local_args[0].extra_scrub = extra
 438     return scrubber(body, *local_args)
 439   return scrubber(body, *scrubber_args)
 440
 441 # Build up a dictionary of all the function bodies.
 442 class function_body(object):
 443   def __init__(self, string, extra, args_and_sig, attrs, func_name_separator):
 444     self.scrub = string
 445     self.extrascrub = extra
 446     self.args_and_sig = args_and_sig
 447     self.attrs = attrs
 448     self.func_name_separator = func_name_separator
 449   def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_backend):
 450     arg_names = set()
 451     def drop_arg_names(match):
 452       arg_names.add(match.group(variable_group_in_ir_value_match))
 453       if match.group(attribute_group_in_ir_value_match):
 454         attr = match.group(attribute_group_in_ir_value_match)
 455       else:
 456         attr = ''
 457       return match.group(1) + attr + match.group(match.lastindex)
 458     def repl_arg_names(match):
 459       if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
 460         return match.group(1) + match.group(match.lastindex)
 461       return match.group(1) + match.group(2) + match.group(match.lastindex)
 462     if self.attrs != attrs:
 463       return False
 464     ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
 465     ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
 466     if ans0 != ans1:
 467       return False
 468     if is_backend:
 469       # Check without replacements, the replacements are not applied to the
 470       # body for backend checks.
 471       return self.extrascrub == extrascrub
 472
 473     es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
 474     es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
 475     es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
 476     es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
 477     return es0 == es1
 478
 479   def __str__(self):
 480     return self.scrub
 481
 482 class FunctionTestBuilder:
 483   def __init__(self, run_list, flags, scrubber_args, path):
 484     self._verbose = flags.verbose
 485     self._record_args = flags.function_signature
 486     self._check_attributes = flags.check_attributes
 487     # Strip double-quotes if input was read by UTC_ARGS
 488     self._filters = list(map(lambda f: Filter(re.compile(f.pattern().strip('"'),
 489                                                          f.flags()),
 490                                               f.is_filter_out),
 491                              flags.filters)) if flags.filters else []
 492     self._scrubber_args = scrubber_args
 493     self._path = path
 494     # Strip double-quotes if input was read by UTC_ARGS
 495     self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex))
 496     self._func_dict = {}
 497     self._func_order = {}
 498     self._global_var_dict = {}
 499     self._processed_prefixes = set()
 500     for tuple in run_list:
 501       for prefix in tuple[0]:
 502         self._func_dict.update({prefix:dict()})
 503         self._func_order.update({prefix: []})
 504         self._global_var_dict.update({prefix:dict()})
 505
 506   def finish_and_get_func_dict(self):
 507     for prefix in self.get_failed_prefixes():
 508       warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,))
 509     return self._func_dict
 510
 511   def func_order(self):
 512     return self._func_order
 513
 514   def global_var_dict(self):
 515     return self._global_var_dict
 516
 517   def is_filtered(self):
 518     return bool(self._filters)
 519
 520   def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_backend):
 521     build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
 522     for m in function_re.finditer(raw_tool_output):
 523       if not m:
 524         continue
 525       func = m.group('func')
 526       body = m.group('body')
 527       # func_name_separator is the string that is placed right after function name at the
 528       # beginning of assembly function definition. In most assemblies, that is just a
 529       # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
 530       # False, just assume that separator is an empty string.
 531       if is_backend:
 532         # Use ':' as default separator.
 533         func_name_separator = m.group('func_name_separator') if 'func_name_separator' in m.groupdict() else ':'
 534       else:
 535         func_name_separator = ''
 536       attrs = m.group('attrs') if self._check_attributes else ''
 537       # Determine if we print arguments, the opening brace, or nothing after the
 538       # function name
 539       if self._record_args and 'args_and_sig' in m.groupdict():
 540         args_and_sig = scrub_body(m.group('args_and_sig').strip())
 541       elif 'args_and_sig' in m.groupdict():
 542         args_and_sig = '('
 543       else:
 544         args_and_sig = ''
 545       filtered_body = do_filter(body, self._filters)
 546       scrubbed_body = do_scrub(filtered_body, scrubber, self._scrubber_args,
 547                                extra=False)
 548       scrubbed_extra = do_scrub(filtered_body, scrubber, self._scrubber_args,
 549                                 extra=True)
 550       if 'analysis' in m.groupdict():
 551         analysis = m.group('analysis')
 552         if analysis.lower() != 'cost model analysis':
 553           warn('Unsupported analysis mode: %r!' % (analysis,))
 554       if func.startswith('stress'):
 555         # We only use the last line of the function body for stress tests.
 556         scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 557       if self._verbose:
 558         print('Processing function: ' + func, file=sys.stderr)
 559         for l in scrubbed_body.splitlines():
 560           print('  ' + l, file=sys.stderr)
 561       for prefix in prefixes:
 562         # Replace function names matching the regex.
 563         for regex in self._replace_value_regex:
 564           # Pattern that matches capture groups in the regex in leftmost order.
 565           group_regex = re.compile(r'\(.*?\)')
 566           # Replace function name with regex.
 567           match = re.match(regex, func)
 568           if match:
 569             func_repl = regex
 570             # Replace any capture groups with their matched strings.
 571             for g in match.groups():
 572               func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
 573             func = re.sub(func_repl, '{{' + func_repl + '}}', func)
 574
 575           # Replace all calls to regex matching functions.
 576           matches = re.finditer(regex, scrubbed_body)
 577           for match in matches:
 578             func_repl = regex
 579             # Replace any capture groups with their matched strings.
 580             for g in match.groups():
 581               func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
 582             # Substitute function call names that match the regex with the same
 583             # capture groups set.
 584             scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}',
 585                                    scrubbed_body)
 586
 587         if func in self._func_dict[prefix]:
 588           if (self._func_dict[prefix][func] is not None and
 589               (str(self._func_dict[prefix][func]) != scrubbed_body or
 590                self._func_dict[prefix][func].args_and_sig != args_and_sig or
 591                self._func_dict[prefix][func].attrs != attrs)):
 592             if self._func_dict[prefix][func].is_same_except_arg_names(
 593                 scrubbed_extra,
 594                 args_and_sig,
 595                 attrs,
 596                 is_backend):
 597               self._func_dict[prefix][func].scrub = scrubbed_extra
 598               self._func_dict[prefix][func].args_and_sig = args_and_sig
 599             else:
 600               # This means a previous RUN line produced a body for this function
 601               # that is different from the one produced by this current RUN line,
 602               # so the body can't be common accross RUN lines. We use None to
 603               # indicate that.
 604               self._func_dict[prefix][func] = None
 605         else:
 606           if prefix not in self._processed_prefixes:
 607             self._func_dict[prefix][func] = function_body(
 608                 scrubbed_body, scrubbed_extra, args_and_sig, attrs,
 609                 func_name_separator)
 610             self._func_order[prefix].append(func)
 611           else:
 612             # An earlier RUN line used this check prefixes but didn't produce
 613             # a body for this function. This happens in Clang tests that use
 614             # preprocesser directives to exclude individual functions from some
 615             # RUN lines.
 616             self._func_dict[prefix][func] = None
 617
 618   def processed_prefixes(self, prefixes):
 619     """
 620     Mark a set of prefixes as having had at least one applicable RUN line fully
 621     processed. This is used to filter out function bodies that don't have
 622     outputs for all RUN lines.
 623     """
 624     self._processed_prefixes.update(prefixes)
 625
 626   def get_failed_prefixes(self):
 627     # This returns the list of those prefixes that failed to match any function,
 628     # because there were conflicting bodies produced by different RUN lines, in
 629     # all instances of the prefix.
 630     for prefix in self._func_dict:
 631       if (self._func_dict[prefix] and
 632           (not [fct for fct in self._func_dict[prefix]
 633                 if self._func_dict[prefix][fct] is not None])):
 634         yield prefix
 635
 636
 637 ##### Generator of LLVM IR CHECK lines
 638
 639 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 640
 641 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
 642
 643 class NamelessValue:
 644   def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
 645                ir_regexp, global_ir_rhs_regexp, is_before_functions, *,
 646                is_number=False, replace_number_with_counter=False):
 647     self.check_prefix = check_prefix
 648     self.check_key = check_key
 649     self.ir_prefix = ir_prefix
 650     self.global_ir_prefix = global_ir_prefix
 651     self.global_ir_prefix_regexp = global_ir_prefix_regexp
 652     self.ir_regexp = ir_regexp
 653     self.global_ir_rhs_regexp = global_ir_rhs_regexp
 654     self.is_before_functions = is_before_functions
 655     self.is_number = is_number
 656     # Some variable numbers (e.g. MCINST1234) will change based on unrelated
 657     # modifications to LLVM, replace those with an incrementing counter.
 658     self.replace_number_with_counter = replace_number_with_counter
 659     self.variable_mapping = {}
 660
 661   # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
 662   def is_local_def_ir_value_match(self, match):
 663     return self.ir_prefix == '%'
 664
 665   # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
 666   def is_global_scope_ir_value_match(self, match):
 667     return self.global_ir_prefix is not None
 668
 669   # Return the IR prefix and check prefix we use for this kind or IR value,
 670   # e.g., (%, TMP) for locals.
 671   def get_ir_prefix_from_ir_value_match(self, match):
 672     if self.ir_prefix and match.group(0).strip().startswith(self.ir_prefix):
 673       return self.ir_prefix, self.check_prefix
 674     return self.global_ir_prefix, self.check_prefix
 675
 676   # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
 677   def get_ir_regex_from_ir_value_re_match(self, match):
 678     # for backwards compatibility we check locals with '.*'
 679     if self.is_local_def_ir_value_match(match):
 680       return '.*'
 681     if self.ir_prefix and match.group(0).strip().startswith(self.ir_prefix):
 682       return self.ir_regexp
 683     return self.global_ir_prefix_regexp
 684
 685   # Create a FileCheck variable name based on an IR name.
 686   def get_value_name(self, var: str, check_prefix: str):
 687     var = var.replace('!', '')
 688     if self.replace_number_with_counter:
 689       assert var.isdigit(), var
 690       replacement = self.variable_mapping.get(var, None)
 691       if replacement is None:
 692         # Replace variable with an incrementing counter
 693         replacement = str(len(self.variable_mapping) + 1)
 694         self.variable_mapping[var] = replacement
 695       var = replacement
 696     # This is a nameless value, prepend check_prefix.
 697     if var.isdigit():
 698       var = check_prefix + var
 699     else:
 700       # This is a named value that clashes with the check_prefix, prepend with
 701       # _prefix_filecheck_ir_name, if it has been defined.
 702       if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
 703         var = _prefix_filecheck_ir_name + var
 704     var = var.replace('.', '_')
 705     var = var.replace('-', '_')
 706     return var.upper()
 707
 708   # Create a FileCheck variable from regex.
 709   def get_value_definition(self, var, match):
 710     # for backwards compatibility we check locals with '.*'
 711     varname = self.get_value_name(var, self.check_prefix)
 712     prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 713     if self.is_number:
 714       regex = ''  # always capture a number in the default format
 715       capture_start = '[[#'
 716     else:
 717       regex = self.get_ir_regex_from_ir_value_re_match(match)
 718       capture_start = '[['
 719     if self.is_local_def_ir_value_match(match):
 720       return capture_start + varname + ':' + prefix + regex + ']]'
 721     return prefix + capture_start + varname + ':' + regex + ']]'
 722
 723   # Use a FileCheck variable.
 724   def get_value_use(self, var, match, var_prefix=None):
 725     if var_prefix is None:
 726       var_prefix = self.check_prefix
 727     capture_start = '[[#' if self.is_number else '[['
 728     if self.is_local_def_ir_value_match(match):
 729       return capture_start + self.get_value_name(var, var_prefix) + ']]'
 730     prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 731     return prefix + capture_start + self.get_value_name(var, var_prefix) + ']]'
 732
 733 # Description of the different "unnamed" values we match in the IR, e.g.,
 734 # (local) ssa values, (debug) metadata, etc.
 735 ir_nameless_values = [
 736     NamelessValue(r'TMP'     , '%' , r'%'                   , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
 737     NamelessValue(r'ATTR'    , '#' , r'#'                   , None            , None                   , r'[0-9]+'    , None                 , False) ,
 738     NamelessValue(r'ATTR'    , '#' , None                   , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
 739     NamelessValue(r'GLOB'    , '@' , r'@'                   , None            , None                   , r'[0-9]+'    , None                 , False) ,
 740     NamelessValue(r'GLOB'    , '@' , None                   , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
 741     NamelessValue(r'DBG'     , '!' , r'!dbg '               , None            , None                   , r'![0-9]+'   , None                 , False) ,
 742     NamelessValue(r'PROF'    , '!' , r'!prof '              , None            , None                   , r'![0-9]+'   , None                 , False) ,
 743     NamelessValue(r'TBAA'    , '!' , r'!tbaa '              , None            , None                   , r'![0-9]+'   , None                 , False) ,
 744     NamelessValue(r'RNG'     , '!' , r'!range '             , None            , None                   , r'![0-9]+'   , None                 , False) ,
 745     NamelessValue(r'LOOP'    , '!' , r'!llvm.loop '         , None            , None                   , r'![0-9]+'   , None                 , False) ,
 746     NamelessValue(r'META'    , '!' , r'metadata '           , None            , None                   , r'![0-9]+'   , None                 , False) ,
 747     NamelessValue(r'META'    , '!' , None                   , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
 748     NamelessValue(r'ACC_GRP' , '!' , r'!llvm.access.group ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
 749 ]
 750
 751 asm_nameless_values = [
 752  NamelessValue(r'MCINST', 'Inst#', None, '<MCInst #', r'\d+', None, r'.+',
 753                False, is_number=True, replace_number_with_counter=True),
 754  NamelessValue(r'MCREG',  'Reg:', None, '<MCOperand Reg:', r'\d+', None, r'.+',
 755                False, is_number=True, replace_number_with_counter=True),
 756 ]
 757
 758 def createOrRegexp(old, new):
 759   if not old:
 760     return new
 761   if not new:
 762     return old
 763   return old + '|' + new
 764
 765 def createPrefixMatch(prefix_str, prefix_re):
 766   if prefix_str is None or prefix_re is None:
 767     return ''
 768   return '(?:' + prefix_str + '(' + prefix_re + '))'
 769
 770 # Build the regexp that matches an "IR value". This can be a local variable,
 771 # argument, global, or metadata, anything that is "named". It is important that
 772 # the PREFIX and SUFFIX below only contain a single group, if that changes
 773 # other locations will need adjustment as well.
 774 IR_VALUE_REGEXP_PREFIX = r'(\s*)'
 775 IR_VALUE_REGEXP_STRING = r''
 776 for nameless_value in ir_nameless_values:
 777   lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
 778   glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
 779   assert((lcl_match or glb_match) and not (lcl_match and glb_match))
 780   if lcl_match:
 781     IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
 782   elif glb_match:
 783     IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
 784 IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
 785 IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
 786
 787 # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
 788 ASM_VALUE_REGEXP_STRING = ''
 789 for nameless_value in asm_nameless_values:
 790   glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
 791   assert not nameless_value.ir_prefix and not nameless_value.ir_regexp
 792   ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, glb_match)
 793 ASM_VALUE_REGEXP_SUFFIX = r'([>\s]|\Z)'
 794 ASM_VALUE_RE = re.compile(r'((?:#|//)\s*)' + '(' + ASM_VALUE_REGEXP_STRING + ')' + ASM_VALUE_REGEXP_SUFFIX)
 795
 796 # The entire match is group 0, the prefix has one group (=1), the entire
 797 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
 798 first_nameless_group_in_ir_value_match = 3
 799
 800 # constants for the group id of special matches
 801 variable_group_in_ir_value_match = 3
 802 attribute_group_in_ir_value_match = 4
 803
 804 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local
 805 # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
 806 def get_idx_from_ir_value_match(match):
 807   for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
 808     if match.group(i) is not None:
 809       return i - first_nameless_group_in_ir_value_match
 810   error("Unable to identify the kind of IR value from the match!")
 811   return 0
 812
 813 # See get_idx_from_ir_value_match
 814 def get_name_from_ir_value_match(match):
 815   return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
 816
 817 def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
 818   return nameless_values[get_idx_from_ir_value_match(match)]
 819
 820 # Return true if var clashes with the scripted FileCheck check_prefix.
 821 def may_clash_with_default_check_prefix_name(check_prefix, var):
 822   return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
 823
 824 def generalize_check_lines_common(lines, is_analyze, vars_seen,
 825                                   global_vars_seen, nameless_values,
 826                                   nameless_value_regex, is_asm):
 827   # This gets called for each match that occurs in
 828   # a line. We transform variables we haven't seen
 829   # into defs, and variables we have seen into uses.
 830   def transform_line_vars(match):
 831     var = get_name_from_ir_value_match(match)
 832     nameless_value = get_nameless_value_from_match(match, nameless_values)
 833     if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
 834       warn("Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
 835            " with scripted FileCheck name." % (var,))
 836     key = (var, nameless_value.check_key)
 837     is_local_def = nameless_value.is_local_def_ir_value_match(match)
 838     if is_local_def and key in vars_seen:
 839       rv = nameless_value.get_value_use(var, match)
 840     elif not is_local_def and key in global_vars_seen:
 841       # We could have seen a different prefix for the global variables first,
 842       # ensure we use that one instead of the prefix for the current match.
 843       rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
 844     else:
 845       if is_local_def:
 846         vars_seen.add(key)
 847       else:
 848         global_vars_seen[key] = nameless_value.check_prefix
 849       rv = nameless_value.get_value_definition(var, match)
 850     # re.sub replaces the entire regex match
 851     # with whatever you return, so we have
 852     # to make sure to hand it back everything
 853     # including the commas and spaces.
 854     return match.group(1) + rv + match.group(match.lastindex)
 855
 856   lines_with_def = []
 857
 858   for i, line in enumerate(lines):
 859     if not is_asm:
 860       # An IR variable named '%.' matches the FileCheck regex string.
 861       line = line.replace('%.', '%dot')
 862       for regex in _global_hex_value_regex:
 863         if re.match('^@' + regex + ' = ', line):
 864           line = re.sub(r'\bi([0-9]+) ([0-9]+)',
 865               lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]',
 866               line)
 867           break
 868       # Ignore any comments, since the check lines will too.
 869       scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 870       lines[i] = scrubbed_line
 871     if is_asm or not is_analyze:
 872       # It can happen that two matches are back-to-back and for some reason sub
 873       # will not replace both of them. For now we work around this by
 874       # substituting until there is no more match.
 875       changed = True
 876       while changed:
 877         (lines[i], changed) = nameless_value_regex.subn(transform_line_vars,
 878                                                         lines[i], count=1)
 879   return lines
 880
 881 # Replace IR value defs and uses with FileCheck variables.
 882 def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
 883   return generalize_check_lines_common(lines, is_analyze, vars_seen,
 884                                        global_vars_seen, ir_nameless_values,
 885                                        IR_VALUE_RE, False)
 886
 887 def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
 888   return generalize_check_lines_common(lines, False, vars_seen,
 889                                        global_vars_seen, asm_nameless_values,
 890                                        ASM_VALUE_RE, True)
 891
 892 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_backend, is_analyze, global_vars_seen_dict, is_filtered):
 893   # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
 894   prefix_exclusions = set()
 895   printed_prefixes = []
 896   for p in prefix_list:
 897     checkprefixes = p[0]
 898     # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
 899     # exist for this run line. A subset of the check prefixes might know about the function but only because
 900     # other run lines created it.
 901     if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
 902       prefix_exclusions |= set(checkprefixes)
 903       continue
 904
 905   # prefix_exclusions is constructed, we can now emit the output
 906   for p in prefix_list:
 907     global_vars_seen = {}
 908     checkprefixes = p[0]
 909     for checkprefix in checkprefixes:
 910       if checkprefix in global_vars_seen_dict:
 911         global_vars_seen.update(global_vars_seen_dict[checkprefix])
 912       else:
 913         global_vars_seen_dict[checkprefix] = {}
 914       if checkprefix in printed_prefixes:
 915         break
 916
 917       # Check if the prefix is excluded.
 918       if checkprefix in prefix_exclusions:
 919         continue
 920
 921       # If we do not have output for this prefix we skip it.
 922       if not func_dict[checkprefix][func_name]:
 923         continue
 924
 925       # Add some space between different check prefixes, but not after the last
 926       # check line (before the test code).
 927       if is_backend:
 928         if len(printed_prefixes) != 0:
 929           output_lines.append(comment_marker)
 930
 931       if checkprefix not in global_vars_seen_dict:
 932         global_vars_seen_dict[checkprefix] = {}
 933
 934       global_vars_seen_before = [key for key in global_vars_seen.keys()]
 935
 936       vars_seen = set()
 937       printed_prefixes.append(checkprefix)
 938       attrs = str(func_dict[checkprefix][func_name].attrs)
 939       attrs = '' if attrs == 'None' else attrs
 940       if attrs:
 941         output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
 942       args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
 943       if args_and_sig:
 944         args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
 945       func_name_separator = func_dict[checkprefix][func_name].func_name_separator
 946       if '[[' in args_and_sig:
 947         output_lines.append(check_label_format % (checkprefix, func_name, '', func_name_separator))
 948         output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
 949       else:
 950         output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig, func_name_separator))
 951       func_body = str(func_dict[checkprefix][func_name]).splitlines()
 952       if not func_body:
 953         # We have filtered everything.
 954         continue
 955
 956       # For ASM output, just emit the check lines.
 957       if is_backend:
 958         body_start = 1
 959         if is_filtered:
 960           # For filtered output we don't add "-NEXT" so don't add extra spaces
 961           # before the first line.
 962           body_start = 0
 963         else:
 964           output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 965         func_lines = generalize_asm_check_lines(func_body[body_start:],
 966                                                 vars_seen, global_vars_seen)
 967         for func_line in func_lines:
 968           if func_line.strip() == '':
 969             output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
 970           else:
 971             check_suffix = '-NEXT' if not is_filtered else ''
 972             output_lines.append('%s %s%s:  %s' % (comment_marker, checkprefix,
 973                                                   check_suffix, func_line))
 974         # Remember new global variables we have not seen before
 975         for key in global_vars_seen:
 976           if key not in global_vars_seen_before:
 977             global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
 978         break
 979
 980       # For IR output, change all defs to FileCheck variables, so we're immune
 981       # to variable naming fashions.
 982       func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
 983
 984       # This could be selectively enabled with an optional invocation argument.
 985       # Disabled for now: better to check everything. Be safe rather than sorry.
 986
 987       # Handle the first line of the function body as a special case because
 988       # it's often just noise (a useless asm comment or entry label).
 989       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 990       #  is_blank_line = True
 991       #else:
 992       #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
 993       #  is_blank_line = False
 994
 995       is_blank_line = False
 996
 997       for func_line in func_body:
 998         if func_line.strip() == '':
 999           is_blank_line = True
1000           continue
1001         # Do not waste time checking IR comments.
1002         func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
1003
1004         # Skip blank lines instead of checking them.
1005         if is_blank_line:
1006           output_lines.append('{} {}:       {}'.format(
1007               comment_marker, checkprefix, func_line))
1008         else:
1009           check_suffix = '-NEXT' if not is_filtered else ''
1010           output_lines.append('{} {}{}:  {}'.format(
1011               comment_marker, checkprefix, check_suffix, func_line))
1012         is_blank_line = False
1013
1014       # Add space between different check prefixes and also before the first
1015       # line of code in the test function.
1016       output_lines.append(comment_marker)
1017
1018       # Remember new global variables we have not seen before
1019       for key in global_vars_seen:
1020         if key not in global_vars_seen_before:
1021           global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1022       break
1023   return printed_prefixes
1024
1025 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
1026                   func_name, preserve_names, function_sig,
1027                   global_vars_seen_dict, is_filtered):
1028   # Label format is based on IR string.
1029   function_def_regex = 'define {{[^@]+}}' if function_sig else ''
1030   check_label_format = '{} %s-LABEL: {}@%s%s%s'.format(comment_marker, function_def_regex)
1031   return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
1032                     check_label_format, False, preserve_names, global_vars_seen_dict,
1033                     is_filtered)
1034
1035 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered):
1036   check_label_format = '{} %s-LABEL: \'%s%s%s\''.format(comment_marker)
1037   global_vars_seen_dict = {}
1038   return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
1039                     check_label_format, False, True, global_vars_seen_dict,
1040                     is_filtered)
1041
1042 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1043   for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values):
1044     if nameless_value.global_ir_prefix is None:
1045       continue
1046
1047     lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
1048     rhs_re_str = nameless_value.global_ir_rhs_regexp
1049
1050     global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
1051     global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1052     lines = []
1053     for m in global_ir_value_re.finditer(raw_tool_output):
1054       lines.append(m.group(0))
1055
1056     for prefix in prefixes:
1057       if glob_val_dict[prefix] is None:
1058         continue
1059       if nameless_value.check_prefix in glob_val_dict[prefix]:
1060         if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1061           continue
1062         if prefix == prefixes[-1]:
1063           warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
1064         else:
1065           glob_val_dict[prefix][nameless_value.check_prefix] = None
1066           continue
1067       glob_val_dict[prefix][nameless_value.check_prefix] = lines
1068
1069 def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
1070   printed_prefixes = set()
1071   for nameless_value in ir_nameless_values:
1072     if nameless_value.global_ir_prefix is None:
1073       continue
1074     if nameless_value.is_before_functions != is_before_functions:
1075       continue
1076     for p in prefix_list:
1077       global_vars_seen = {}
1078       checkprefixes = p[0]
1079       if checkprefixes is None:
1080         continue
1081       for checkprefix in checkprefixes:
1082         if checkprefix in global_vars_seen_dict:
1083           global_vars_seen.update(global_vars_seen_dict[checkprefix])
1084         else:
1085           global_vars_seen_dict[checkprefix] = {}
1086         if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1087           break
1088         if not glob_val_dict[checkprefix]:
1089           continue
1090         if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1091           continue
1092         if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1093           continue
1094
1095         check_lines = []
1096         global_vars_seen_before = [key for key in global_vars_seen.keys()]
1097         for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
1098           if _global_value_regex:
1099             matched = False
1100             for regex in _global_value_regex:
1101               if re.match('^@' + regex + ' = ', line):
1102                 matched = True
1103                 break
1104             if not matched:
1105               continue
1106           tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
1107           check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
1108           check_lines.append(check_line)
1109         if not check_lines:
1110           continue
1111
1112         output_lines.append(comment_marker + SEPARATOR)
1113         for check_line in check_lines:
1114           output_lines.append(check_line)
1115
1116         printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1117
1118         # Remembe new global variables we have not seen before
1119         for key in global_vars_seen:
1120           if key not in global_vars_seen_before:
1121             global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1122         break
1123
1124   if printed_prefixes:
1125     output_lines.append(comment_marker + SEPARATOR)
1126
1127
1128 def check_prefix(prefix):
1129   if not PREFIX_RE.match(prefix):
1130     hint = ""
1131     if ',' in prefix:
1132       hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1133     warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
1134          (prefix))
1135
1136
1137 def verify_filecheck_prefixes(fc_cmd):
1138   fc_cmd_parts = fc_cmd.split()
1139   for part in fc_cmd_parts:
1140     if "check-prefix=" in part:
1141       prefix = part.split('=', 1)[1]
1142       check_prefix(prefix)
1143     elif "check-prefixes=" in part:
1144       prefixes = part.split('=', 1)[1].split(',')
1145       for prefix in prefixes:
1146         check_prefix(prefix)
1147         if prefixes.count(prefix) > 1:
1148           warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
1149
1150
1151 def get_autogennote_suffix(parser, args):
1152   autogenerated_note_args = ''
1153   for action in parser._actions:
1154     if not hasattr(args, action.dest):
1155       continue  # Ignore options such as --help that aren't included in args
1156     # Ignore parameters such as paths to the binary or the list of tests
1157     if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
1158                        'clang', 'opt', 'llvm_bin', 'verbose'):
1159       continue
1160     value = getattr(args, action.dest)
1161     if action.const is not None:  # action stores a constant (usually True/False)
1162       # Skip actions with different constant values (this happens with boolean
1163       # --foo/--no-foo options)
1164       if value != action.const:
1165         continue
1166     if parser.get_default(action.dest) == value:
1167       continue  # Don't add default values
1168     if action.dest == 'filters':
1169       # Create a separate option for each filter element.  The value is a list
1170       # of Filter objects.
1171       for elem in value:
1172         opt_name = 'filter-out' if elem.is_filter_out else 'filter'
1173         opt_value = elem.pattern()
1174         new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1175         if new_arg not in autogenerated_note_args:
1176           autogenerated_note_args += new_arg
1177     else:
1178       autogenerated_note_args += action.option_strings[0] + ' '
1179       if action.const is None:  # action takes a parameter
1180         if action.nargs == '+':
1181           value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
1182         autogenerated_note_args += '%s ' % value
1183   if autogenerated_note_args:
1184     autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
1185   return autogenerated_note_args
1186
1187
1188 def check_for_command(line, parser, args, argv, argparse_callback):
1189   cmd_m = UTC_ARGS_CMD.match(line)
1190   if cmd_m:
1191     for option in shlex.split(cmd_m.group('cmd').strip()):
1192       if option:
1193         argv.append(option)
1194     args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
1195     if argparse_callback is not None:
1196       argparse_callback(args)
1197   return args, argv
1198
1199 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1200   result = get_arg_to_check(test_info.args)
1201   if not result and is_global:
1202     # See if this has been specified via UTC_ARGS.  This is a "global" option
1203     # that affects the entire generation of test checks.  If it exists anywhere
1204     # in the test, apply it to everything.
1205     saw_line = False
1206     for line_info in test_info.ro_iterlines():
1207       line = line_info.line
1208       if not line.startswith(';') and line.strip() != '':
1209         saw_line = True
1210       result = get_arg_to_check(line_info.args)
1211       if result:
1212         if warn and saw_line:
1213           # We saw the option after already reading some test input lines.
1214           # Warn about it.
1215           print('WARNING: Found {} in line following test start: '.format(arg_string)
1216                 + line, file=sys.stderr)
1217           print('WARNING: Consider moving {} to top of file'.format(arg_string),
1218                 file=sys.stderr)
1219         break
1220   return result
1221
1222 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1223   for input_line_info in test_info.iterlines(output_lines):
1224     line = input_line_info.line
1225     args = input_line_info.args
1226     if line.strip() == comment_string:
1227       continue
1228     if line.strip() == comment_string + SEPARATOR:
1229       continue
1230     if line.lstrip().startswith(comment_string):
1231       m = CHECK_RE.match(line)
1232       if m and m.group(1) in prefix_set:
1233         continue
1234     output_lines.append(line.rstrip('\n'))
1235
1236 def add_checks_at_end(output_lines, prefix_list, func_order,
1237                       comment_string, check_generator):
1238   added = set()
1239   generated_prefixes = set()
1240   for prefix in prefix_list:
1241     prefixes = prefix[0]
1242     tool_args = prefix[1]
1243     for prefix in prefixes:
1244       for func in func_order[prefix]:
1245         # The func order can contain the same functions multiple times.
1246         # If we see one again we are done.
1247         if (func, prefix) in added:
1248             continue
1249         if added:
1250           output_lines.append(comment_string)
1251
1252         # The add_*_checks routines expect a run list whose items are
1253         # tuples that have a list of prefixes as their first element and
1254         # tool command args string as their second element.  They output
1255         # checks for each prefix in the list of prefixes.  By doing so, it
1256         # implicitly assumes that for each function every run line will
1257         # generate something for that function.  That is not the case for
1258         # generated functions as some run lines might not generate them
1259         # (e.g. -fopenmp vs. no -fopenmp).
1260         #
1261         # Therefore, pass just the prefix we're interested in.  This has
1262         # the effect of generating all of the checks for functions of a
1263         # single prefix before moving on to the next prefix.  So checks
1264         # are ordered by prefix instead of by function as in "normal"
1265         # mode.
1266         for generated_prefix in check_generator(output_lines,
1267                         [([prefix], tool_args)], func):
1268             added.add((func, generated_prefix))
1269             generated_prefixes.add(generated_prefix)
1270   return generated_prefixes