llvm/utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2
   3 import argparse
   4 import copy
   5 import glob
   6 import itertools
   7 import os
   8 import re
   9 import subprocess
  10 import sys
  11 import shlex
  12
  13 from typing import List
  14
  15 ##### Common utilities for update_*test_checks.py
  16
  17
  18 _verbose = False
  19 _prefix_filecheck_ir_name = ""
  20
  21 """
  22 Version changelog:
  23
  24 1: Initial version, used by tests that don't specify --version explicitly.
  25 2: --function-signature is now enabled by default and also checks return
  26    type/attributes.
  27 3: Opening parenthesis of function args is kept on the first LABEL line
  28    in case arguments are split to a separate SAME line.
  29 """
  30 DEFAULT_VERSION = 3
  31
  32
  33 class Regex(object):
  34     """Wrap a compiled regular expression object to allow deep copy of a regexp.
  35     This is required for the deep copy done in do_scrub.
  36
  37     """
  38
  39     def __init__(self, regex):
  40         self.regex = regex
  41
  42     def __deepcopy__(self, memo):
  43         result = copy.copy(self)
  44         result.regex = self.regex
  45         return result
  46
  47     def search(self, line):
  48         return self.regex.search(line)
  49
  50     def sub(self, repl, line):
  51         return self.regex.sub(repl, line)
  52
  53     def pattern(self):
  54         return self.regex.pattern
  55
  56     def flags(self):
  57         return self.regex.flags
  58
  59
  60 class Filter(Regex):
  61     """Augment a Regex object with a flag indicating whether a match should be
  62     added (!is_filter_out) or removed (is_filter_out) from the generated checks.
  63
  64     """
  65
  66     def __init__(self, regex, is_filter_out):
  67         super(Filter, self).__init__(regex)
  68         self.is_filter_out = is_filter_out
  69
  70     def __deepcopy__(self, memo):
  71         result = copy.deepcopy(super(Filter, self), memo)
  72         result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
  73         return result
  74
  75
  76 def parse_commandline_args(parser):
  77     class RegexAction(argparse.Action):
  78         """Add a regular expression option value to a list of regular expressions.
  79         This compiles the expression, wraps it in a Regex and adds it to the option
  80         value list."""
  81
  82         def __init__(self, option_strings, dest, nargs=None, **kwargs):
  83             if nargs is not None:
  84                 raise ValueError("nargs not allowed")
  85             super(RegexAction, self).__init__(option_strings, dest, **kwargs)
  86
  87         def do_call(self, namespace, values, flags):
  88             value_list = getattr(namespace, self.dest)
  89             if value_list is None:
  90                 value_list = []
  91
  92             try:
  93                 value_list.append(Regex(re.compile(values, flags)))
  94             except re.error as error:
  95                 raise ValueError(
  96                     "{}: Invalid regular expression '{}' ({})".format(
  97                         option_string, error.pattern, error.msg
  98                     )
  99                 )
 100
 101             setattr(namespace, self.dest, value_list)
 102
 103         def __call__(self, parser, namespace, values, option_string=None):
 104             self.do_call(namespace, values, 0)
 105
 106     class FilterAction(RegexAction):
 107         """Add a filter to a list of filter option values."""
 108
 109         def __init__(self, option_strings, dest, nargs=None, **kwargs):
 110             super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
 111
 112         def __call__(self, parser, namespace, values, option_string=None):
 113             super(FilterAction, self).__call__(parser, namespace, values, option_string)
 114
 115             value_list = getattr(namespace, self.dest)
 116
 117             is_filter_out = option_string == "--filter-out"
 118
 119             value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
 120
 121             setattr(namespace, self.dest, value_list)
 122
 123     filter_group = parser.add_argument_group(
 124         "filtering",
 125         """Filters are applied to each output line according to the order given. The
 126     first matching filter terminates filter processing for that current line.""",
 127     )
 128
 129     filter_group.add_argument(
 130         "--filter",
 131         action=FilterAction,
 132         dest="filters",
 133         metavar="REGEX",
 134         help="Only include lines matching REGEX (may be specified multiple times)",
 135     )
 136     filter_group.add_argument(
 137         "--filter-out",
 138         action=FilterAction,
 139         dest="filters",
 140         metavar="REGEX",
 141         help="Exclude lines matching REGEX",
 142     )
 143
 144     parser.add_argument(
 145         "--include-generated-funcs",
 146         action="store_true",
 147         help="Output checks for functions not in source",
 148     )
 149     parser.add_argument(
 150         "-v", "--verbose", action="store_true", help="Show verbose output"
 151     )
 152     parser.add_argument(
 153         "-u",
 154         "--update-only",
 155         action="store_true",
 156         help="Only update test if it was already autogened",
 157     )
 158     parser.add_argument(
 159         "--force-update",
 160         action="store_true",
 161         help="Update test even if it was autogened by a different script",
 162     )
 163     parser.add_argument(
 164         "--enable",
 165         action="store_true",
 166         dest="enabled",
 167         default=True,
 168         help="Activate CHECK line generation from this point forward",
 169     )
 170     parser.add_argument(
 171         "--disable",
 172         action="store_false",
 173         dest="enabled",
 174         help="Deactivate CHECK line generation from this point forward",
 175     )
 176     parser.add_argument(
 177         "--replace-value-regex",
 178         nargs="+",
 179         default=[],
 180         help="List of regular expressions to replace matching value names",
 181     )
 182     parser.add_argument(
 183         "--prefix-filecheck-ir-name",
 184         default="",
 185         help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
 186     )
 187     parser.add_argument(
 188         "--global-value-regex",
 189         nargs="+",
 190         default=[],
 191         help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
 192     )
 193     parser.add_argument(
 194         "--global-hex-value-regex",
 195         nargs="+",
 196         default=[],
 197         help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
 198     )
 199     # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
 200     # we need to rename the flag to just -generate-body-for-unused-prefixes.
 201     parser.add_argument(
 202         "--no-generate-body-for-unused-prefixes",
 203         action="store_false",
 204         dest="gen_unused_prefix_body",
 205         default=True,
 206         help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
 207     )
 208     # This is the default when regenerating existing tests. The default when
 209     # generating new tests is determined by DEFAULT_VERSION.
 210     parser.add_argument(
 211         "--version", type=int, default=1, help="The version of output format"
 212     )
 213     args = parser.parse_args()
 214     # TODO: This should not be handled differently from the other options
 215     global _verbose, _global_value_regex, _global_hex_value_regex
 216     _verbose = args.verbose
 217     _global_value_regex = args.global_value_regex
 218     _global_hex_value_regex = args.global_hex_value_regex
 219     return args
 220
 221
 222 def parse_args(parser, argv):
 223     args = parser.parse_args(argv)
 224     if args.version >= 2:
 225         args.function_signature = True
 226     # TODO: This should not be handled differently from the other options
 227     global _verbose, _global_value_regex, _global_hex_value_regex
 228     _verbose = args.verbose
 229     _global_value_regex = args.global_value_regex
 230     _global_hex_value_regex = args.global_hex_value_regex
 231     return args
 232
 233
 234 class InputLineInfo(object):
 235     def __init__(self, line, line_number, args, argv):
 236         self.line = line
 237         self.line_number = line_number
 238         self.args = args
 239         self.argv = argv
 240
 241
 242 class TestInfo(object):
 243     def __init__(
 244         self,
 245         test,
 246         parser,
 247         script_name,
 248         input_lines,
 249         args,
 250         argv,
 251         comment_prefix,
 252         argparse_callback,
 253     ):
 254         self.parser = parser
 255         self.argparse_callback = argparse_callback
 256         self.path = test
 257         self.args = args
 258         if args.prefix_filecheck_ir_name:
 259             global _prefix_filecheck_ir_name
 260             _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
 261         self.argv = argv
 262         self.input_lines = input_lines
 263         self.run_lines = find_run_lines(test, self.input_lines)
 264         self.comment_prefix = comment_prefix
 265         if self.comment_prefix is None:
 266             if self.path.endswith(".mir"):
 267                 self.comment_prefix = "#"
 268             else:
 269                 self.comment_prefix = ";"
 270         self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
 271         self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
 272         self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
 273         self.test_unused_note = (
 274             self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
 275         )
 276
 277     def ro_iterlines(self):
 278         for line_num, input_line in enumerate(self.input_lines):
 279             args, argv = check_for_command(
 280                 input_line, self.parser, self.args, self.argv, self.argparse_callback
 281             )
 282             yield InputLineInfo(input_line, line_num, args, argv)
 283
 284     def iterlines(self, output_lines):
 285         output_lines.append(self.test_autogenerated_note)
 286         for line_info in self.ro_iterlines():
 287             input_line = line_info.line
 288             # Discard any previous script advertising.
 289             if input_line.startswith(self.autogenerated_note_prefix):
 290                 continue
 291             self.args = line_info.args
 292             self.argv = line_info.argv
 293             if not self.args.enabled:
 294                 output_lines.append(input_line)
 295                 continue
 296             yield line_info
 297
 298     def get_checks_for_unused_prefixes(
 299         self, run_list, used_prefixes: List[str]
 300     ) -> List[str]:
 301         run_list = [element for element in run_list if element[0] is not None]
 302         unused_prefixes = set(
 303             [prefix for sublist in run_list for prefix in sublist[0]]
 304         ).difference(set(used_prefixes))
 305
 306         ret = []
 307         if not unused_prefixes:
 308             return ret
 309         ret.append(self.test_unused_note)
 310         for unused in sorted(unused_prefixes):
 311             ret.append(
 312                 "{comment} {prefix}: {match_everything}".format(
 313                     comment=self.comment_prefix,
 314                     prefix=unused,
 315                     match_everything=r"""{{.*}}""",
 316                 )
 317             )
 318         return ret
 319
 320
 321 def itertests(
 322     test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
 323 ):
 324     for pattern in test_patterns:
 325         # On Windows we must expand the patterns ourselves.
 326         tests_list = glob.glob(pattern)
 327         if not tests_list:
 328             warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
 329             continue
 330         for test in tests_list:
 331             with open(test) as f:
 332                 input_lines = [l.rstrip() for l in f]
 333             first_line = input_lines[0] if input_lines else ""
 334             if UTC_AVOID in first_line:
 335               warn("Skipping test that must not be autogenerated: " + test)
 336               continue
 337             is_regenerate = UTC_ADVERT in first_line
 338
 339             # If we're generating a new test, set the default version to the latest.
 340             argv = sys.argv[:]
 341             if not is_regenerate:
 342                 argv.insert(1, "--version=" + str(DEFAULT_VERSION))
 343
 344             args = parse_args(parser, argv[1:])
 345             if argparse_callback is not None:
 346                 argparse_callback(args)
 347             if is_regenerate:
 348                 if script_name not in first_line and not args.force_update:
 349                     warn(
 350                         "Skipping test which wasn't autogenerated by " + script_name,
 351                         test,
 352                     )
 353                     continue
 354                 args, argv = check_for_command(
 355                     first_line, parser, args, argv, argparse_callback
 356                 )
 357             elif args.update_only:
 358                 assert UTC_ADVERT not in first_line
 359                 warn("Skipping test which isn't autogenerated: " + test)
 360                 continue
 361             final_input_lines = []
 362             for l in input_lines:
 363                 if UNUSED_NOTE in l:
 364                     break
 365                 final_input_lines.append(l)
 366             yield TestInfo(
 367                 test,
 368                 parser,
 369                 script_name,
 370                 final_input_lines,
 371                 args,
 372                 argv,
 373                 comment_prefix,
 374                 argparse_callback,
 375             )
 376
 377
 378 def should_add_line_to_output(
 379     input_line, prefix_set, skip_global_checks=False, comment_marker=";"
 380 ):
 381     # Skip any blank comment lines in the IR.
 382     if not skip_global_checks and input_line.strip() == comment_marker:
 383         return False
 384     # Skip a special double comment line we use as a separator.
 385     if input_line.strip() == comment_marker + SEPARATOR:
 386         return False
 387     # Skip any blank lines in the IR.
 388     # if input_line.strip() == '':
 389     #  return False
 390     # And skip any CHECK lines. We're building our own.
 391     m = CHECK_RE.match(input_line)
 392     if m and m.group(1) in prefix_set:
 393         if skip_global_checks:
 394             global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
 395             return not global_ir_value_re.search(input_line)
 396         return False
 397
 398     return True
 399
 400
 401 # Perform lit-like substitutions
 402 def getSubstitutions(sourcepath):
 403     sourcedir = os.path.dirname(sourcepath)
 404     return [
 405         ("%s", sourcepath),
 406         ("%S", sourcedir),
 407         ("%p", sourcedir),
 408         ("%{pathsep}", os.pathsep),
 409     ]
 410
 411
 412 def applySubstitutions(s, substitutions):
 413     for a, b in substitutions:
 414         s = s.replace(a, b)
 415     return s
 416
 417
 418 # Invoke the tool that is being tested.
 419 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
 420     with open(ir) as ir_file:
 421         substitutions = getSubstitutions(ir)
 422
 423         # TODO Remove the str form which is used by update_test_checks.py and
 424         # update_llc_test_checks.py
 425         # The safer list form is used by update_cc_test_checks.py
 426         if preprocess_cmd:
 427             # Allow pre-processing the IR file (e.g. using sed):
 428             assert isinstance(
 429                 preprocess_cmd, str
 430             )  # TODO: use a list instead of using shell
 431             preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
 432             if verbose:
 433                 print(
 434                     "Pre-processing input file: ",
 435                     ir,
 436                     " with command '",
 437                     preprocess_cmd,
 438                     "'",
 439                     sep="",
 440                     file=sys.stderr,
 441                 )
 442             # Python 2.7 doesn't have subprocess.DEVNULL:
 443             with open(os.devnull, "w") as devnull:
 444                 pp = subprocess.Popen(
 445                     preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
 446                 )
 447                 ir_file = pp.stdout
 448
 449         if isinstance(cmd_args, list):
 450             args = [applySubstitutions(a, substitutions) for a in cmd_args]
 451             stdout = subprocess.check_output([exe] + args, stdin=ir_file)
 452         else:
 453             stdout = subprocess.check_output(
 454                 exe + " " + applySubstitutions(cmd_args, substitutions),
 455                 shell=True,
 456                 stdin=ir_file,
 457             )
 458         if sys.version_info[0] > 2:
 459             # FYI, if you crashed here with a decode error, your run line probably
 460             # results in bitcode or other binary format being written to the pipe.
 461             # For an opt test, you probably want to add -S or -disable-output.
 462             stdout = stdout.decode()
 463     # Fix line endings to unix CR style.
 464     return stdout.replace("\r\n", "\n")
 465
 466
 467 ##### LLVM IR parser
 468 RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
 469 CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
 470 PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
 471 CHECK_RE = re.compile(
 472     r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
 473 )
 474
 475 UTC_ARGS_KEY = "UTC_ARGS:"
 476 UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + "\s*(?P<cmd>.*)\s*$")
 477 UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
 478 UTC_AVOID = "NOTE: Do not autogenerate"
 479 UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
 480
 481 OPT_FUNCTION_RE = re.compile(
 482     r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
 483     r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
 484     flags=(re.M | re.S),
 485 )
 486
 487 ANALYZE_FUNCTION_RE = re.compile(
 488     r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
 489     r"\s*\n(?P<body>.*)$",
 490     flags=(re.X | re.S),
 491 )
 492
 493 LV_DEBUG_RE = re.compile(
 494     r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
 495 )
 496
 497 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 498 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 499 TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
 500 MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
 501 DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
 502
 503 SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
 504 SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
 505 SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
 506 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
 507 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
 508 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
 509     r"([ \t]|(#[0-9]+))+$", flags=re.M
 510 )
 511 SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
 512 SCRUB_LOOP_COMMENT_RE = re.compile(
 513     r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
 514 )
 515 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
 516
 517 SEPARATOR = "."
 518
 519
 520 def error(msg, test_file=None):
 521     if test_file:
 522         msg = "{}: {}".format(msg, test_file)
 523     print("ERROR: {}".format(msg), file=sys.stderr)
 524
 525
 526 def warn(msg, test_file=None):
 527     if test_file:
 528         msg = "{}: {}".format(msg, test_file)
 529     print("WARNING: {}".format(msg), file=sys.stderr)
 530
 531
 532 def debug(*args, **kwargs):
 533     # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 534     if "file" not in kwargs:
 535         kwargs["file"] = sys.stderr
 536     if _verbose:
 537         print(*args, **kwargs)
 538
 539
 540 def find_run_lines(test, lines):
 541     debug("Scanning for RUN lines in test file:", test)
 542     raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 543     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 544     for l in raw_lines[1:]:
 545         if run_lines[-1].endswith("\\"):
 546             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 547         else:
 548             run_lines.append(l)
 549     debug("Found {} RUN lines in {}:".format(len(run_lines), test))
 550     for l in run_lines:
 551         debug("  RUN: {}".format(l))
 552     return run_lines
 553
 554
 555 def get_triple_from_march(march):
 556     triples = {
 557         "amdgcn": "amdgcn",
 558         "r600": "r600",
 559         "mips": "mips",
 560         "sparc": "sparc",
 561         "hexagon": "hexagon",
 562         "ve": "ve",
 563     }
 564     for prefix, triple in triples.items():
 565         if march.startswith(prefix):
 566             return triple
 567     print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
 568     return "x86"
 569
 570
 571 def apply_filters(line, filters):
 572     has_filter = False
 573     for f in filters:
 574         if not f.is_filter_out:
 575             has_filter = True
 576         if f.search(line):
 577             return False if f.is_filter_out else True
 578     # If we only used filter-out, keep the line, otherwise discard it since no
 579     # filter matched.
 580     return False if has_filter else True
 581
 582
 583 def do_filter(body, filters):
 584     return (
 585         body
 586         if not filters
 587         else "\n".join(
 588             filter(lambda line: apply_filters(line, filters), body.splitlines())
 589         )
 590     )
 591
 592
 593 def scrub_body(body):
 594     # Scrub runs of whitespace out of the assembly, but leave the leading
 595     # whitespace in place.
 596     body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
 597
 598     # Expand the tabs used for indentation.
 599     body = str.expandtabs(body, 2)
 600     # Strip trailing whitespace.
 601     body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
 602     return body
 603
 604
 605 def do_scrub(body, scrubber, scrubber_args, extra):
 606     if scrubber_args:
 607         local_args = copy.deepcopy(scrubber_args)
 608         local_args[0].extra_scrub = extra
 609         return scrubber(body, *local_args)
 610     return scrubber(body, *scrubber_args)
 611
 612
 613 # Build up a dictionary of all the function bodies.
 614 class function_body(object):
 615     def __init__(
 616         self,
 617         string,
 618         extra,
 619         funcdef_attrs_and_ret,
 620         args_and_sig,
 621         attrs,
 622         func_name_separator,
 623     ):
 624         self.scrub = string
 625         self.extrascrub = extra
 626         self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
 627         self.args_and_sig = args_and_sig
 628         self.attrs = attrs
 629         self.func_name_separator = func_name_separator
 630
 631     def is_same_except_arg_names(
 632         self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
 633     ):
 634         arg_names = set()
 635
 636         def drop_arg_names(match):
 637             arg_names.add(match.group(variable_group_in_ir_value_match))
 638             if match.group(attribute_group_in_ir_value_match):
 639                 attr = match.group(attribute_group_in_ir_value_match)
 640             else:
 641                 attr = ""
 642             return match.group(1) + attr + match.group(match.lastindex)
 643
 644         def repl_arg_names(match):
 645             if (
 646                 match.group(variable_group_in_ir_value_match) is not None
 647                 and match.group(variable_group_in_ir_value_match) in arg_names
 648             ):
 649                 return match.group(1) + match.group(match.lastindex)
 650             return match.group(1) + match.group(2) + match.group(match.lastindex)
 651
 652         if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
 653             return False
 654         if self.attrs != attrs:
 655             return False
 656         ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
 657         ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
 658         if ans0 != ans1:
 659             return False
 660         if is_backend:
 661             # Check without replacements, the replacements are not applied to the
 662             # body for backend checks.
 663             return self.extrascrub == extrascrub
 664
 665         es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
 666         es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
 667         es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
 668         es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
 669         return es0 == es1
 670
 671     def __str__(self):
 672         return self.scrub
 673
 674
 675 class FunctionTestBuilder:
 676     def __init__(self, run_list, flags, scrubber_args, path):
 677         self._verbose = flags.verbose
 678         self._record_args = flags.function_signature
 679         self._check_attributes = flags.check_attributes
 680         # Strip double-quotes if input was read by UTC_ARGS
 681         self._filters = (
 682             list(
 683                 map(
 684                     lambda f: Filter(
 685                         re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
 686                     ),
 687                     flags.filters,
 688                 )
 689             )
 690             if flags.filters
 691             else []
 692         )
 693         self._scrubber_args = scrubber_args
 694         self._path = path
 695         # Strip double-quotes if input was read by UTC_ARGS
 696         self._replace_value_regex = list(
 697             map(lambda x: x.strip('"'), flags.replace_value_regex)
 698         )
 699         self._func_dict = {}
 700         self._func_order = {}
 701         self._global_var_dict = {}
 702         self._processed_prefixes = set()
 703         for tuple in run_list:
 704             for prefix in tuple[0]:
 705                 self._func_dict.update({prefix: dict()})
 706                 self._func_order.update({prefix: []})
 707                 self._global_var_dict.update({prefix: dict()})
 708
 709     def finish_and_get_func_dict(self):
 710         for prefix in self.get_failed_prefixes():
 711             warn(
 712                 "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
 713                 % (
 714                     prefix,
 715                     self._path,
 716                 )
 717             )
 718         return self._func_dict
 719
 720     def func_order(self):
 721         return self._func_order
 722
 723     def global_var_dict(self):
 724         return self._global_var_dict
 725
 726     def is_filtered(self):
 727         return bool(self._filters)
 728
 729     def process_run_line(
 730         self, function_re, scrubber, raw_tool_output, prefixes, is_backend
 731     ):
 732         build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
 733         for m in function_re.finditer(raw_tool_output):
 734             if not m:
 735                 continue
 736             func = m.group("func")
 737             body = m.group("body")
 738             # func_name_separator is the string that is placed right after function name at the
 739             # beginning of assembly function definition. In most assemblies, that is just a
 740             # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
 741             # False, just assume that separator is an empty string.
 742             if is_backend:
 743                 # Use ':' as default separator.
 744                 func_name_separator = (
 745                     m.group("func_name_separator")
 746                     if "func_name_separator" in m.groupdict()
 747                     else ":"
 748                 )
 749             else:
 750                 func_name_separator = ""
 751             attrs = m.group("attrs") if self._check_attributes else ""
 752             funcdef_attrs_and_ret = (
 753                 m.group("funcdef_attrs_and_ret") if self._record_args else ""
 754             )
 755             # Determine if we print arguments, the opening brace, or nothing after the
 756             # function name
 757             if self._record_args and "args_and_sig" in m.groupdict():
 758                 args_and_sig = scrub_body(m.group("args_and_sig").strip())
 759             elif "args_and_sig" in m.groupdict():
 760                 args_and_sig = "("
 761             else:
 762                 args_and_sig = ""
 763             filtered_body = do_filter(body, self._filters)
 764             scrubbed_body = do_scrub(
 765                 filtered_body, scrubber, self._scrubber_args, extra=False
 766             )
 767             scrubbed_extra = do_scrub(
 768                 filtered_body, scrubber, self._scrubber_args, extra=True
 769             )
 770             if "analysis" in m.groupdict():
 771                 analysis = m.group("analysis")
 772                 supported_analyses = {
 773                     "cost model analysis",
 774                     "scalar evolution analysis",
 775                     "loop access analysis",
 776                 }
 777                 if analysis.lower() not in supported_analyses:
 778                     warn("Unsupported analysis mode: %r!" % (analysis,))
 779             if func.startswith("stress"):
 780                 # We only use the last line of the function body for stress tests.
 781                 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
 782             if self._verbose:
 783                 print("Processing function: " + func, file=sys.stderr)
 784                 for l in scrubbed_body.splitlines():
 785                     print("  " + l, file=sys.stderr)
 786             for prefix in prefixes:
 787                 # Replace function names matching the regex.
 788                 for regex in self._replace_value_regex:
 789                     # Pattern that matches capture groups in the regex in leftmost order.
 790                     group_regex = re.compile(r"\(.*?\)")
 791                     # Replace function name with regex.
 792                     match = re.match(regex, func)
 793                     if match:
 794                         func_repl = regex
 795                         # Replace any capture groups with their matched strings.
 796                         for g in match.groups():
 797                             func_repl = group_regex.sub(
 798                                 re.escape(g), func_repl, count=1
 799                             )
 800                         func = re.sub(func_repl, "{{" + func_repl + "}}", func)
 801
 802                     # Replace all calls to regex matching functions.
 803                     matches = re.finditer(regex, scrubbed_body)
 804                     for match in matches:
 805                         func_repl = regex
 806                         # Replace any capture groups with their matched strings.
 807                         for g in match.groups():
 808                             func_repl = group_regex.sub(
 809                                 re.escape(g), func_repl, count=1
 810                             )
 811                         # Substitute function call names that match the regex with the same
 812                         # capture groups set.
 813                         scrubbed_body = re.sub(
 814                             func_repl, "{{" + func_repl + "}}", scrubbed_body
 815                         )
 816
 817                 if func in self._func_dict[prefix]:
 818                     if self._func_dict[prefix][func] is not None and (
 819                         str(self._func_dict[prefix][func]) != scrubbed_body
 820                         or self._func_dict[prefix][func].args_and_sig != args_and_sig
 821                         or self._func_dict[prefix][func].attrs != attrs
 822                         or self._func_dict[prefix][func].funcdef_attrs_and_ret
 823                         != funcdef_attrs_and_ret
 824                     ):
 825                         if self._func_dict[prefix][func].is_same_except_arg_names(
 826                             scrubbed_extra,
 827                             funcdef_attrs_and_ret,
 828                             args_and_sig,
 829                             attrs,
 830                             is_backend,
 831                         ):
 832                             self._func_dict[prefix][func].scrub = scrubbed_extra
 833                             self._func_dict[prefix][func].args_and_sig = args_and_sig
 834                         else:
 835                             # This means a previous RUN line produced a body for this function
 836                             # that is different from the one produced by this current RUN line,
 837                             # so the body can't be common across RUN lines. We use None to
 838                             # indicate that.
 839                             self._func_dict[prefix][func] = None
 840                 else:
 841                     if prefix not in self._processed_prefixes:
 842                         self._func_dict[prefix][func] = function_body(
 843                             scrubbed_body,
 844                             scrubbed_extra,
 845                             funcdef_attrs_and_ret,
 846                             args_and_sig,
 847                             attrs,
 848                             func_name_separator,
 849                         )
 850                         self._func_order[prefix].append(func)
 851                     else:
 852                         # An earlier RUN line used this check prefixes but didn't produce
 853                         # a body for this function. This happens in Clang tests that use
 854                         # preprocesser directives to exclude individual functions from some
 855                         # RUN lines.
 856                         self._func_dict[prefix][func] = None
 857
 858     def processed_prefixes(self, prefixes):
 859         """
 860         Mark a set of prefixes as having had at least one applicable RUN line fully
 861         processed. This is used to filter out function bodies that don't have
 862         outputs for all RUN lines.
 863         """
 864         self._processed_prefixes.update(prefixes)
 865
 866     def get_failed_prefixes(self):
 867         # This returns the list of those prefixes that failed to match any function,
 868         # because there were conflicting bodies produced by different RUN lines, in
 869         # all instances of the prefix.
 870         for prefix in self._func_dict:
 871             if self._func_dict[prefix] and (
 872                 not [
 873                     fct
 874                     for fct in self._func_dict[prefix]
 875                     if self._func_dict[prefix][fct] is not None
 876                 ]
 877             ):
 878                 yield prefix
 879
 880
 881 ##### Generator of LLVM IR CHECK lines
 882
 883 SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
 884
 885 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
 886
 887
 888 class NamelessValue:
 889     def __init__(
 890         self,
 891         check_prefix,
 892         check_key,
 893         ir_prefix,
 894         ir_regexp,
 895         global_ir_rhs_regexp,
 896         *,
 897         is_before_functions=False,
 898         is_number=False,
 899         replace_number_with_counter=False
 900     ):
 901         self.check_prefix = check_prefix
 902         self.check_key = check_key
 903         self.ir_prefix = ir_prefix
 904         self.ir_regexp = ir_regexp
 905         self.global_ir_rhs_regexp = global_ir_rhs_regexp
 906         self.is_before_functions = is_before_functions
 907         self.is_number = is_number
 908         # Some variable numbers (e.g. MCINST1234) will change based on unrelated
 909         # modifications to LLVM, replace those with an incrementing counter.
 910         self.replace_number_with_counter = replace_number_with_counter
 911         self.variable_mapping = {}
 912
 913     # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
 914     def is_local_def_ir_value_match(self, match):
 915         return self.ir_prefix == "%"
 916
 917     # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
 918     def is_global_scope_ir_value_match(self, match):
 919         return self.global_ir_rhs_regexp is not None
 920
 921     # Return the IR prefix and check prefix we use for this kind or IR value,
 922     # e.g., (%, TMP) for locals.
 923     def get_ir_prefix_from_ir_value_match(self, match):
 924         return self.ir_prefix, self.check_prefix
 925
 926     # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
 927     def get_ir_regex_from_ir_value_re_match(self, match):
 928         # for backwards compatibility we check locals with '.*'
 929         if self.is_local_def_ir_value_match(match):
 930             return ".*"
 931         return self.ir_regexp
 932
 933     # Create a FileCheck variable name based on an IR name.
 934     def get_value_name(self, var: str, check_prefix: str):
 935         var = var.replace("!", "")
 936         if self.replace_number_with_counter:
 937             assert var
 938             replacement = self.variable_mapping.get(var, None)
 939             if replacement is None:
 940                 # Replace variable with an incrementing counter
 941                 replacement = str(len(self.variable_mapping) + 1)
 942                 self.variable_mapping[var] = replacement
 943             var = replacement
 944         # This is a nameless value, prepend check_prefix.
 945         if var.isdigit():
 946             var = check_prefix + var
 947         else:
 948             # This is a named value that clashes with the check_prefix, prepend with
 949             # _prefix_filecheck_ir_name, if it has been defined.
 950             if (
 951                 may_clash_with_default_check_prefix_name(check_prefix, var)
 952                 and _prefix_filecheck_ir_name
 953             ):
 954                 var = _prefix_filecheck_ir_name + var
 955         var = var.replace(".", "_")
 956         var = var.replace("-", "_")
 957         return var.upper()
 958
 959     # Create a FileCheck variable from regex.
 960     def get_value_definition(self, var, match):
 961         # for backwards compatibility we check locals with '.*'
 962         varname = self.get_value_name(var, self.check_prefix)
 963         prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 964         if self.is_number:
 965             regex = ""  # always capture a number in the default format
 966             capture_start = "[[#"
 967         else:
 968             regex = self.get_ir_regex_from_ir_value_re_match(match)
 969             capture_start = "[["
 970         if self.is_local_def_ir_value_match(match):
 971             return capture_start + varname + ":" + prefix + regex + "]]"
 972         return prefix + capture_start + varname + ":" + regex + "]]"
 973
 974     # Use a FileCheck variable.
 975     def get_value_use(self, var, match, var_prefix=None):
 976         if var_prefix is None:
 977             var_prefix = self.check_prefix
 978         capture_start = "[[#" if self.is_number else "[["
 979         if self.is_local_def_ir_value_match(match):
 980             return capture_start + self.get_value_name(var, var_prefix) + "]]"
 981         prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 982         return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
 983
 984
 985 # Description of the different "unnamed" values we match in the IR, e.g.,
 986 # (local) ssa values, (debug) metadata, etc.
 987 ir_nameless_values = [
 988     #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
 989     NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
 990     NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
 991     NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
 992     NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
 993     NamelessValue(
 994         r"GLOB", "@", r"@", r'[a-zA-Z0-9_$"\\.-]+', r".+", is_before_functions=True
 995     ),
 996     NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
 997     NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
 998     NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
 999     NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1000     NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1001     NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1002     NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1003     NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
1004     NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1005     NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1006 ]
1007
1008 asm_nameless_values = [
1009     NamelessValue(
1010         r"MCINST",
1011         "Inst#",
1012         "<MCInst #",
1013         r"\d+",
1014         r".+",
1015         is_number=True,
1016         replace_number_with_counter=True,
1017     ),
1018     NamelessValue(
1019         r"MCREG",
1020         "Reg:",
1021         "<MCOperand Reg:",
1022         r"\d+",
1023         r".+",
1024         is_number=True,
1025         replace_number_with_counter=True,
1026     ),
1027 ]
1028
1029 analyze_nameless_values = [
1030     NamelessValue(
1031         r"GRP",
1032         "#",
1033         r"",
1034         r"0x[0-9a-f]+",
1035         None,
1036         replace_number_with_counter=True,
1037     ),
1038 ]
1039
1040 def createOrRegexp(old, new):
1041     if not old:
1042         return new
1043     if not new:
1044         return old
1045     return old + "|" + new
1046
1047
1048 def createPrefixMatch(prefix_str, prefix_re):
1049     return "(?:" + prefix_str + "(" + prefix_re + "))"
1050
1051
1052 # Build the regexp that matches an "IR value". This can be a local variable,
1053 # argument, global, or metadata, anything that is "named". It is important that
1054 # the PREFIX and SUFFIX below only contain a single group, if that changes
1055 # other locations will need adjustment as well.
1056 IR_VALUE_REGEXP_PREFIX = r"(\s*)"
1057 IR_VALUE_REGEXP_STRING = r""
1058 for nameless_value in ir_nameless_values:
1059     match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1060     if nameless_value.global_ir_rhs_regexp is not None:
1061         match = "^" + match
1062     IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
1063 IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)]|\Z)"
1064 IR_VALUE_RE = re.compile(
1065     IR_VALUE_REGEXP_PREFIX
1066     + r"("
1067     + IR_VALUE_REGEXP_STRING
1068     + r")"
1069     + IR_VALUE_REGEXP_SUFFIX
1070 )
1071
1072 # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
1073 ASM_VALUE_REGEXP_STRING = ""
1074 for nameless_value in asm_nameless_values:
1075     match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1076     ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
1077 ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
1078 ASM_VALUE_RE = re.compile(
1079     r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
1080 )
1081
1082 ANALYZE_VALUE_REGEXP_PREFIX = r"(\s*)"
1083 ANALYZE_VALUE_REGEXP_STRING = r""
1084 for nameless_value in analyze_nameless_values:
1085     match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1086     ANALYZE_VALUE_REGEXP_STRING = createOrRegexp(ANALYZE_VALUE_REGEXP_STRING, match)
1087 ANALYZE_VALUE_REGEXP_SUFFIX = r"(\)?:)"
1088 ANALYZE_VALUE_RE = re.compile(
1089     ANALYZE_VALUE_REGEXP_PREFIX
1090     + r"("
1091     + ANALYZE_VALUE_REGEXP_STRING
1092     + r")"
1093     + ANALYZE_VALUE_REGEXP_SUFFIX
1094 )
1095
1096 # The entire match is group 0, the prefix has one group (=1), the entire
1097 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1098 first_nameless_group_in_ir_value_match = 3
1099
1100 # constants for the group id of special matches
1101 variable_group_in_ir_value_match = 3
1102 attribute_group_in_ir_value_match = 4
1103
1104 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local
1105 # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
1106 def get_idx_from_ir_value_match(match):
1107     for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
1108         if match.group(i) is not None:
1109             return i - first_nameless_group_in_ir_value_match
1110     error("Unable to identify the kind of IR value from the match!")
1111     return 0
1112
1113
1114 # See get_idx_from_ir_value_match
1115 def get_name_from_ir_value_match(match):
1116     return match.group(
1117         get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
1118     )
1119
1120
1121 def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
1122     return nameless_values[get_idx_from_ir_value_match(match)]
1123
1124
1125 # Return true if var clashes with the scripted FileCheck check_prefix.
1126 def may_clash_with_default_check_prefix_name(check_prefix, var):
1127     return check_prefix and re.match(
1128         r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1129     )
1130
1131
1132 def generalize_check_lines_common(
1133     lines,
1134     is_analyze,
1135     vars_seen,
1136     global_vars_seen,
1137     nameless_values,
1138     nameless_value_regex,
1139     is_asm,
1140     preserve_names,
1141 ):
1142     # This gets called for each match that occurs in
1143     # a line. We transform variables we haven't seen
1144     # into defs, and variables we have seen into uses.
1145     def transform_line_vars(match):
1146         var = get_name_from_ir_value_match(match)
1147         nameless_value = get_nameless_value_from_match(match, nameless_values)
1148         if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
1149             warn(
1150                 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1151                 " with scripted FileCheck name." % (var,)
1152             )
1153         key = (var, nameless_value.check_key)
1154         is_local_def = nameless_value.is_local_def_ir_value_match(match)
1155         if is_local_def and key in vars_seen:
1156             rv = nameless_value.get_value_use(var, match)
1157         elif not is_local_def and key in global_vars_seen:
1158             # We could have seen a different prefix for the global variables first,
1159             # ensure we use that one instead of the prefix for the current match.
1160             rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
1161         else:
1162             if is_local_def:
1163                 vars_seen.add(key)
1164             else:
1165                 global_vars_seen[key] = nameless_value.check_prefix
1166             rv = nameless_value.get_value_definition(var, match)
1167         # re.sub replaces the entire regex match
1168         # with whatever you return, so we have
1169         # to make sure to hand it back everything
1170         # including the commas and spaces.
1171         return match.group(1) + rv + match.group(match.lastindex)
1172
1173     lines_with_def = []
1174
1175     for i, line in enumerate(lines):
1176         if not is_asm and not is_analyze:
1177             # An IR variable named '%.' matches the FileCheck regex string.
1178             line = line.replace("%.", "%dot")
1179             for regex in _global_hex_value_regex:
1180                 if re.match("^@" + regex + " = ", line):
1181                     line = re.sub(
1182                         r"\bi([0-9]+) ([0-9]+)",
1183                         lambda m: "i"
1184                         + m.group(1)
1185                         + " [[#"
1186                         + hex(int(m.group(2)))
1187                         + "]]",
1188                         line,
1189                     )
1190                     break
1191             # Ignore any comments, since the check lines will too.
1192             scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1193             lines[i] = scrubbed_line
1194         if not preserve_names:
1195             # It can happen that two matches are back-to-back and for some reason sub
1196             # will not replace both of them. For now we work around this by
1197             # substituting until there is no more match.
1198             changed = True
1199             while changed:
1200                 (lines[i], changed) = nameless_value_regex.subn(
1201                     transform_line_vars, lines[i], count=1
1202                 )
1203     return lines
1204
1205
1206 # Replace IR value defs and uses with FileCheck variables.
1207 def generalize_check_lines(
1208     lines, is_analyze, vars_seen, global_vars_seen, preserve_names
1209 ):
1210     return generalize_check_lines_common(
1211         lines,
1212         is_analyze,
1213         vars_seen,
1214         global_vars_seen,
1215         ir_nameless_values,
1216         IR_VALUE_RE,
1217         False,
1218         preserve_names,
1219     )
1220
1221
1222 def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
1223     return generalize_check_lines_common(
1224         lines,
1225         False,
1226         vars_seen,
1227         global_vars_seen,
1228         asm_nameless_values,
1229         ASM_VALUE_RE,
1230         True,
1231         False,
1232     )
1233
1234
1235 def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen):
1236     return generalize_check_lines_common(
1237         lines,
1238         True,
1239         vars_seen,
1240         global_vars_seen,
1241         analyze_nameless_values,
1242         ANALYZE_VALUE_RE,
1243         False,
1244         False,
1245     )
1246
1247 def add_checks(
1248     output_lines,
1249     comment_marker,
1250     prefix_list,
1251     func_dict,
1252     func_name,
1253     check_label_format,
1254     is_backend,
1255     is_analyze,
1256     version,
1257     global_vars_seen_dict,
1258     is_filtered,
1259     preserve_names=False,
1260 ):
1261     # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1262     prefix_exclusions = set()
1263     printed_prefixes = []
1264     for p in prefix_list:
1265         checkprefixes = p[0]
1266         # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1267         # exist for this run line. A subset of the check prefixes might know about the function but only because
1268         # other run lines created it.
1269         if any(
1270             map(
1271                 lambda checkprefix: func_name not in func_dict[checkprefix],
1272                 checkprefixes,
1273             )
1274         ):
1275             prefix_exclusions |= set(checkprefixes)
1276             continue
1277
1278     # prefix_exclusions is constructed, we can now emit the output
1279     for p in prefix_list:
1280         global_vars_seen = {}
1281         checkprefixes = p[0]
1282         for checkprefix in checkprefixes:
1283             if checkprefix in global_vars_seen_dict:
1284                 global_vars_seen.update(global_vars_seen_dict[checkprefix])
1285             else:
1286                 global_vars_seen_dict[checkprefix] = {}
1287             if checkprefix in printed_prefixes:
1288                 break
1289
1290             # Check if the prefix is excluded.
1291             if checkprefix in prefix_exclusions:
1292                 continue
1293
1294             # If we do not have output for this prefix we skip it.
1295             if not func_dict[checkprefix][func_name]:
1296                 continue
1297
1298             # Add some space between different check prefixes, but not after the last
1299             # check line (before the test code).
1300             if is_backend:
1301                 if len(printed_prefixes) != 0:
1302                     output_lines.append(comment_marker)
1303
1304             if checkprefix not in global_vars_seen_dict:
1305                 global_vars_seen_dict[checkprefix] = {}
1306
1307             global_vars_seen_before = [key for key in global_vars_seen.keys()]
1308
1309             vars_seen = set()
1310             printed_prefixes.append(checkprefix)
1311             attrs = str(func_dict[checkprefix][func_name].attrs)
1312             attrs = "" if attrs == "None" else attrs
1313             if version > 1:
1314                 funcdef_attrs_and_ret = func_dict[checkprefix][
1315                     func_name
1316                 ].funcdef_attrs_and_ret
1317             else:
1318                 funcdef_attrs_and_ret = ""
1319
1320             if attrs:
1321                 output_lines.append(
1322                     "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1323                 )
1324             args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1325             if args_and_sig:
1326                 args_and_sig = generalize_check_lines(
1327                     [args_and_sig],
1328                     is_analyze,
1329                     vars_seen,
1330                     global_vars_seen,
1331                     preserve_names,
1332                 )[0]
1333             func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1334             if "[[" in args_and_sig:
1335                 # Captures in label lines are not supported, thus split into a -LABEL
1336                 # and a separate -SAME line that contains the arguments with captures.
1337                 args_and_sig_prefix = ""
1338                 if version >= 3 and args_and_sig.startswith("("):
1339                     # Ensure the "(" separating function name and arguments is in the
1340                     # label line. This is required in case of function names that are
1341                     # prefixes of each other. Otherwise, the label line for "foo" might
1342                     # incorrectly match on "foo.specialized".
1343                     args_and_sig_prefix = args_and_sig[0]
1344                     args_and_sig = args_and_sig[1:]
1345
1346                 # Removing args_and_sig from the label match line requires
1347                 # func_name_separator to be empty. Otherwise, the match will not work.
1348                 assert func_name_separator == ""
1349                 output_lines.append(
1350                     check_label_format
1351                     % (
1352                         checkprefix,
1353                         funcdef_attrs_and_ret,
1354                         func_name,
1355                         args_and_sig_prefix,
1356                         func_name_separator,
1357                     )
1358                 )
1359                 output_lines.append(
1360                     "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1361                 )
1362             else:
1363                 output_lines.append(
1364                     check_label_format
1365                     % (
1366                         checkprefix,
1367                         funcdef_attrs_and_ret,
1368                         func_name,
1369                         args_and_sig,
1370                         func_name_separator,
1371                     )
1372                 )
1373             func_body = str(func_dict[checkprefix][func_name]).splitlines()
1374             if not func_body:
1375                 # We have filtered everything.
1376                 continue
1377
1378             # For ASM output, just emit the check lines.
1379             if is_backend:
1380                 body_start = 1
1381                 if is_filtered:
1382                     # For filtered output we don't add "-NEXT" so don't add extra spaces
1383                     # before the first line.
1384                     body_start = 0
1385                 else:
1386                     output_lines.append(
1387                         "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1388                     )
1389                 func_lines = generalize_asm_check_lines(
1390                     func_body[body_start:], vars_seen, global_vars_seen
1391                 )
1392                 for func_line in func_lines:
1393                     if func_line.strip() == "":
1394                         output_lines.append(
1395                             "%s %s-EMPTY:" % (comment_marker, checkprefix)
1396                         )
1397                     else:
1398                         check_suffix = "-NEXT" if not is_filtered else ""
1399                         output_lines.append(
1400                             "%s %s%s:  %s"
1401                             % (comment_marker, checkprefix, check_suffix, func_line)
1402                         )
1403                 # Remember new global variables we have not seen before
1404                 for key in global_vars_seen:
1405                     if key not in global_vars_seen_before:
1406                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1407                 break
1408             # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
1409             elif is_analyze:
1410                 func_body = generalize_analyze_check_lines(
1411                     func_body, vars_seen, global_vars_seen
1412                 )
1413                 for func_line in func_body:
1414                     if func_line.strip() == "":
1415                         output_lines.append(
1416                             "{} {}-EMPTY:".format(comment_marker, checkprefix)
1417                         )
1418                     else:
1419                         check_suffix = "-NEXT" if not is_filtered else ""
1420                         output_lines.append(
1421                             "{} {}{}:  {}".format(
1422                                 comment_marker, checkprefix, check_suffix, func_line
1423                             )
1424                         )
1425
1426                 # Add space between different check prefixes and also before the first
1427                 # line of code in the test function.
1428                 output_lines.append(comment_marker)
1429
1430                 # Remember new global variables we have not seen before
1431                 for key in global_vars_seen:
1432                     if key not in global_vars_seen_before:
1433                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1434                 break
1435             # For IR output, change all defs to FileCheck variables, so we're immune
1436             # to variable naming fashions.
1437             else:
1438                 func_body = generalize_check_lines(
1439                     func_body, False, vars_seen, global_vars_seen, preserve_names
1440                 )
1441
1442                 # This could be selectively enabled with an optional invocation argument.
1443                 # Disabled for now: better to check everything. Be safe rather than sorry.
1444
1445                 # Handle the first line of the function body as a special case because
1446                 # it's often just noise (a useless asm comment or entry label).
1447                 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
1448                 #  is_blank_line = True
1449                 # else:
1450                 #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
1451                 #  is_blank_line = False
1452
1453                 is_blank_line = False
1454
1455                 for func_line in func_body:
1456                     if func_line.strip() == "":
1457                         is_blank_line = True
1458                         continue
1459                     # Do not waste time checking IR comments.
1460                     func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
1461
1462                     # Skip blank lines instead of checking them.
1463                     if is_blank_line:
1464                         output_lines.append(
1465                             "{} {}:       {}".format(
1466                                 comment_marker, checkprefix, func_line
1467                             )
1468                         )
1469                     else:
1470                         check_suffix = "-NEXT" if not is_filtered else ""
1471                         output_lines.append(
1472                             "{} {}{}:  {}".format(
1473                                 comment_marker, checkprefix, check_suffix, func_line
1474                             )
1475                         )
1476                     is_blank_line = False
1477
1478                 # Add space between different check prefixes and also before the first
1479                 # line of code in the test function.
1480                 output_lines.append(comment_marker)
1481
1482                 # Remember new global variables we have not seen before
1483                 for key in global_vars_seen:
1484                     if key not in global_vars_seen_before:
1485                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1486                 break
1487     return printed_prefixes
1488
1489
1490 def add_ir_checks(
1491     output_lines,
1492     comment_marker,
1493     prefix_list,
1494     func_dict,
1495     func_name,
1496     preserve_names,
1497     function_sig,
1498     version,
1499     global_vars_seen_dict,
1500     is_filtered,
1501 ):
1502     # Label format is based on IR string.
1503     if function_sig and version > 1:
1504         function_def_regex = "define %s"
1505     elif function_sig:
1506         function_def_regex = "define {{[^@]+}}%s"
1507     else:
1508         function_def_regex = "%s"
1509     check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
1510         comment_marker, function_def_regex
1511     )
1512     return add_checks(
1513         output_lines,
1514         comment_marker,
1515         prefix_list,
1516         func_dict,
1517         func_name,
1518         check_label_format,
1519         False,
1520         False,
1521         version,
1522         global_vars_seen_dict,
1523         is_filtered,
1524         preserve_names,
1525     )
1526
1527
1528 def add_analyze_checks(
1529     output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
1530 ):
1531     check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
1532     global_vars_seen_dict = {}
1533     return add_checks(
1534         output_lines,
1535         comment_marker,
1536         prefix_list,
1537         func_dict,
1538         func_name,
1539         check_label_format,
1540         False,
1541         True,
1542         1,
1543         global_vars_seen_dict,
1544         is_filtered,
1545     )
1546
1547
1548 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1549     for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values):
1550         if nameless_value.global_ir_rhs_regexp is None:
1551             continue
1552
1553         lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
1554         rhs_re_str = nameless_value.global_ir_rhs_regexp
1555
1556         global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
1557         global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1558         lines = []
1559         for m in global_ir_value_re.finditer(raw_tool_output):
1560             lines.append(m.group(0))
1561
1562         for prefix in prefixes:
1563             if glob_val_dict[prefix] is None:
1564                 continue
1565             if nameless_value.check_prefix in glob_val_dict[prefix]:
1566                 if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1567                     continue
1568                 if prefix == prefixes[-1]:
1569                     warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
1570                 else:
1571                     glob_val_dict[prefix][nameless_value.check_prefix] = None
1572                     continue
1573             glob_val_dict[prefix][nameless_value.check_prefix] = lines
1574
1575
1576 def add_global_checks(
1577     glob_val_dict,
1578     comment_marker,
1579     prefix_list,
1580     output_lines,
1581     global_vars_seen_dict,
1582     preserve_names,
1583     is_before_functions,
1584 ):
1585     printed_prefixes = set()
1586     for nameless_value in ir_nameless_values:
1587         if nameless_value.global_ir_rhs_regexp is None:
1588             continue
1589         if nameless_value.is_before_functions != is_before_functions:
1590             continue
1591         for p in prefix_list:
1592             global_vars_seen = {}
1593             checkprefixes = p[0]
1594             if checkprefixes is None:
1595                 continue
1596             for checkprefix in checkprefixes:
1597                 if checkprefix in global_vars_seen_dict:
1598                     global_vars_seen.update(global_vars_seen_dict[checkprefix])
1599                 else:
1600                     global_vars_seen_dict[checkprefix] = {}
1601                 if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1602                     break
1603                 if not glob_val_dict[checkprefix]:
1604                     continue
1605                 if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1606                     continue
1607                 if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1608                     continue
1609
1610                 check_lines = []
1611                 global_vars_seen_before = [key for key in global_vars_seen.keys()]
1612                 for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
1613                     if _global_value_regex:
1614                         matched = False
1615                         for regex in _global_value_regex:
1616                             if re.match("^@" + regex + " = ", line):
1617                                 matched = True
1618                                 break
1619                         if not matched:
1620                             continue
1621                     tmp = generalize_check_lines(
1622                         [line], False, set(), global_vars_seen, preserve_names
1623                     )
1624                     check_line = "%s %s: %s" % (comment_marker, checkprefix, tmp[0])
1625                     check_lines.append(check_line)
1626                 if not check_lines:
1627                     continue
1628
1629                 output_lines.append(comment_marker + SEPARATOR)
1630                 for check_line in check_lines:
1631                     output_lines.append(check_line)
1632
1633                 printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1634
1635                 # Remembe new global variables we have not seen before
1636                 for key in global_vars_seen:
1637                     if key not in global_vars_seen_before:
1638                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1639                 break
1640
1641     if printed_prefixes:
1642         output_lines.append(comment_marker + SEPARATOR)
1643     return printed_prefixes
1644
1645
1646 def check_prefix(prefix):
1647     if not PREFIX_RE.match(prefix):
1648         hint = ""
1649         if "," in prefix:
1650             hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1651         warn(
1652             (
1653                 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
1654                 + hint
1655             )
1656             % (prefix)
1657         )
1658
1659
1660 def get_check_prefixes(filecheck_cmd):
1661     check_prefixes = [
1662         item
1663         for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
1664         for item in m.group(1).split(",")
1665     ]
1666     if not check_prefixes:
1667         check_prefixes = ["CHECK"]
1668     return check_prefixes
1669
1670
1671 def verify_filecheck_prefixes(fc_cmd):
1672     fc_cmd_parts = fc_cmd.split()
1673     for part in fc_cmd_parts:
1674         if "check-prefix=" in part:
1675             prefix = part.split("=", 1)[1]
1676             check_prefix(prefix)
1677         elif "check-prefixes=" in part:
1678             prefixes = part.split("=", 1)[1].split(",")
1679             for prefix in prefixes:
1680                 check_prefix(prefix)
1681                 if prefixes.count(prefix) > 1:
1682                     warn(
1683                         "Supplied prefix '%s' is not unique in the prefix list."
1684                         % (prefix,)
1685                     )
1686
1687
1688 def get_autogennote_suffix(parser, args):
1689     autogenerated_note_args = ""
1690     for action in parser._actions:
1691         if not hasattr(args, action.dest):
1692             continue  # Ignore options such as --help that aren't included in args
1693         # Ignore parameters such as paths to the binary or the list of tests
1694         if action.dest in (
1695             "tests",
1696             "update_only",
1697             "tool_binary",
1698             "opt_binary",
1699             "llc_binary",
1700             "clang",
1701             "opt",
1702             "llvm_bin",
1703             "verbose",
1704             "force_update",
1705         ):
1706             continue
1707         value = getattr(args, action.dest)
1708         if action.const is not None:  # action stores a constant (usually True/False)
1709             # Skip actions with different constant values (this happens with boolean
1710             # --foo/--no-foo options)
1711             if value != action.const:
1712                 continue
1713         if parser.get_default(action.dest) == value:
1714             continue  # Don't add default values
1715         if action.dest == "function_signature" and args.version >= 2:
1716             continue  # Enabled by default in version 2
1717         if action.dest == "filters":
1718             # Create a separate option for each filter element.  The value is a list
1719             # of Filter objects.
1720             for elem in value:
1721                 opt_name = "filter-out" if elem.is_filter_out else "filter"
1722                 opt_value = elem.pattern()
1723                 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1724                 if new_arg not in autogenerated_note_args:
1725                     autogenerated_note_args += new_arg
1726         else:
1727             autogenerated_note_args += action.option_strings[0] + " "
1728             if action.const is None:  # action takes a parameter
1729                 if action.nargs == "+":
1730                     value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
1731                 autogenerated_note_args += "%s " % value
1732     if autogenerated_note_args:
1733         autogenerated_note_args = " %s %s" % (
1734             UTC_ARGS_KEY,
1735             autogenerated_note_args[:-1],
1736         )
1737     return autogenerated_note_args
1738
1739
1740 def check_for_command(line, parser, args, argv, argparse_callback):
1741     cmd_m = UTC_ARGS_CMD.match(line)
1742     if cmd_m:
1743         for option in shlex.split(cmd_m.group("cmd").strip()):
1744             if option:
1745                 argv.append(option)
1746         args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
1747         if argparse_callback is not None:
1748             argparse_callback(args)
1749     return args, argv
1750
1751
1752 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1753     result = get_arg_to_check(test_info.args)
1754     if not result and is_global:
1755         # See if this has been specified via UTC_ARGS.  This is a "global" option
1756         # that affects the entire generation of test checks.  If it exists anywhere
1757         # in the test, apply it to everything.
1758         saw_line = False
1759         for line_info in test_info.ro_iterlines():
1760             line = line_info.line
1761             if not line.startswith(";") and line.strip() != "":
1762                 saw_line = True
1763             result = get_arg_to_check(line_info.args)
1764             if result:
1765                 if warn and saw_line:
1766                     # We saw the option after already reading some test input lines.
1767                     # Warn about it.
1768                     print(
1769                         "WARNING: Found {} in line following test start: ".format(
1770                             arg_string
1771                         )
1772                         + line,
1773                         file=sys.stderr,
1774                     )
1775                     print(
1776                         "WARNING: Consider moving {} to top of file".format(arg_string),
1777                         file=sys.stderr,
1778                     )
1779                 break
1780     return result
1781
1782
1783 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1784     for input_line_info in test_info.iterlines(output_lines):
1785         line = input_line_info.line
1786         args = input_line_info.args
1787         if line.strip() == comment_string:
1788             continue
1789         if line.strip() == comment_string + SEPARATOR:
1790             continue
1791         if line.lstrip().startswith(comment_string):
1792             m = CHECK_RE.match(line)
1793             if m and m.group(1) in prefix_set:
1794                 continue
1795         output_lines.append(line.rstrip("\n"))
1796
1797
1798 def add_checks_at_end(
1799     output_lines, prefix_list, func_order, comment_string, check_generator
1800 ):
1801     added = set()
1802     generated_prefixes = set()
1803     for prefix in prefix_list:
1804         prefixes = prefix[0]
1805         tool_args = prefix[1]
1806         for prefix in prefixes:
1807             for func in func_order[prefix]:
1808                 # The func order can contain the same functions multiple times.
1809                 # If we see one again we are done.
1810                 if (func, prefix) in added:
1811                     continue
1812                 if added:
1813                     output_lines.append(comment_string)
1814
1815                 # The add_*_checks routines expect a run list whose items are
1816                 # tuples that have a list of prefixes as their first element and
1817                 # tool command args string as their second element.  They output
1818                 # checks for each prefix in the list of prefixes.  By doing so, it
1819                 # implicitly assumes that for each function every run line will
1820                 # generate something for that function.  That is not the case for
1821                 # generated functions as some run lines might not generate them
1822                 # (e.g. -fopenmp vs. no -fopenmp).
1823                 #
1824                 # Therefore, pass just the prefix we're interested in.  This has
1825                 # the effect of generating all of the checks for functions of a
1826                 # single prefix before moving on to the next prefix.  So checks
1827                 # are ordered by prefix instead of by function as in "normal"
1828                 # mode.
1829                 for generated_prefix in check_generator(
1830                     output_lines, [([prefix], tool_args)], func
1831                 ):
1832                     added.add((func, generated_prefix))
1833                     generated_prefixes.add(generated_prefix)
1834     return generated_prefixes