llvm/utils/UpdateTestChecks/common.py

   1 import argparse
   2 import bisect
   3 import collections
   4 import copy
   5 import glob
   6 import os
   7 import re
   8 import subprocess
   9 import sys
  10 import shlex
  11
  12 from typing import List, Mapping, Set
  13
  14 ##### Common utilities for update_*test_checks.py
  15
  16
  17 _verbose = False
  18 _prefix_filecheck_ir_name = ""
  19
  20 """
  21 Version changelog:
  22
  23 1: Initial version, used by tests that don't specify --version explicitly.
  24 2: --function-signature is now enabled by default and also checks return
  25    type/attributes.
  26 3: Opening parenthesis of function args is kept on the first LABEL line
  27    in case arguments are split to a separate SAME line.
  28 4: --check-globals now has a third option ('smart'). The others are now called
  29    'none' and 'all'. 'smart' is the default.
  30 5: Basic block labels are matched by FileCheck expressions
  31 """
  32 DEFAULT_VERSION = 5
  33
  34
  35 SUPPORTED_ANALYSES = {
  36     "Branch Probability Analysis",
  37     "Cost Model Analysis",
  38     "Loop Access Analysis",
  39     "Scalar Evolution Analysis",
  40 }
  41
  42
  43 class Regex(object):
  44     """Wrap a compiled regular expression object to allow deep copy of a regexp.
  45     This is required for the deep copy done in do_scrub.
  46
  47     """
  48
  49     def __init__(self, regex):
  50         self.regex = regex
  51
  52     def __deepcopy__(self, memo):
  53         result = copy.copy(self)
  54         result.regex = self.regex
  55         return result
  56
  57     def search(self, line):
  58         return self.regex.search(line)
  59
  60     def sub(self, repl, line):
  61         return self.regex.sub(repl, line)
  62
  63     def pattern(self):
  64         return self.regex.pattern
  65
  66     def flags(self):
  67         return self.regex.flags
  68
  69
  70 class Filter(Regex):
  71     """Augment a Regex object with a flag indicating whether a match should be
  72     added (!is_filter_out) or removed (is_filter_out) from the generated checks.
  73
  74     """
  75
  76     def __init__(self, regex, is_filter_out):
  77         super(Filter, self).__init__(regex)
  78         self.is_filter_out = is_filter_out
  79
  80     def __deepcopy__(self, memo):
  81         result = copy.deepcopy(super(Filter, self), memo)
  82         result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
  83         return result
  84
  85
  86 def parse_commandline_args(parser):
  87     class RegexAction(argparse.Action):
  88         """Add a regular expression option value to a list of regular expressions.
  89         This compiles the expression, wraps it in a Regex and adds it to the option
  90         value list."""
  91
  92         def __init__(self, option_strings, dest, nargs=None, **kwargs):
  93             if nargs is not None:
  94                 raise ValueError("nargs not allowed")
  95             super(RegexAction, self).__init__(option_strings, dest, **kwargs)
  96
  97         def do_call(self, namespace, values, flags):
  98             value_list = getattr(namespace, self.dest)
  99             if value_list is None:
 100                 value_list = []
 101
 102             try:
 103                 value_list.append(Regex(re.compile(values, flags)))
 104             except re.error as error:
 105                 raise ValueError(
 106                     "{}: Invalid regular expression '{}' ({})".format(
 107                         option_string, error.pattern, error.msg
 108                     )
 109                 )
 110
 111             setattr(namespace, self.dest, value_list)
 112
 113         def __call__(self, parser, namespace, values, option_string=None):
 114             self.do_call(namespace, values, 0)
 115
 116     class FilterAction(RegexAction):
 117         """Add a filter to a list of filter option values."""
 118
 119         def __init__(self, option_strings, dest, nargs=None, **kwargs):
 120             super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
 121
 122         def __call__(self, parser, namespace, values, option_string=None):
 123             super(FilterAction, self).__call__(parser, namespace, values, option_string)
 124
 125             value_list = getattr(namespace, self.dest)
 126
 127             is_filter_out = option_string == "--filter-out"
 128
 129             value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
 130
 131             setattr(namespace, self.dest, value_list)
 132
 133     filter_group = parser.add_argument_group(
 134         "filtering",
 135         """Filters are applied to each output line according to the order given. The
 136     first matching filter terminates filter processing for that current line.""",
 137     )
 138
 139     filter_group.add_argument(
 140         "--filter",
 141         action=FilterAction,
 142         dest="filters",
 143         metavar="REGEX",
 144         help="Only include lines matching REGEX (may be specified multiple times)",
 145     )
 146     filter_group.add_argument(
 147         "--filter-out",
 148         action=FilterAction,
 149         dest="filters",
 150         metavar="REGEX",
 151         help="Exclude lines matching REGEX",
 152     )
 153
 154     parser.add_argument(
 155         "--include-generated-funcs",
 156         action="store_true",
 157         help="Output checks for functions not in source",
 158     )
 159     parser.add_argument(
 160         "-v", "--verbose", action="store_true", help="Show verbose output"
 161     )
 162     parser.add_argument(
 163         "-u",
 164         "--update-only",
 165         action="store_true",
 166         help="Only update test if it was already autogened",
 167     )
 168     parser.add_argument(
 169         "--force-update",
 170         action="store_true",
 171         help="Update test even if it was autogened by a different script",
 172     )
 173     parser.add_argument(
 174         "--enable",
 175         action="store_true",
 176         dest="enabled",
 177         default=True,
 178         help="Activate CHECK line generation from this point forward",
 179     )
 180     parser.add_argument(
 181         "--disable",
 182         action="store_false",
 183         dest="enabled",
 184         help="Deactivate CHECK line generation from this point forward",
 185     )
 186     parser.add_argument(
 187         "--replace-value-regex",
 188         nargs="+",
 189         default=[],
 190         help="List of regular expressions to replace matching value names",
 191     )
 192     parser.add_argument(
 193         "--prefix-filecheck-ir-name",
 194         default="",
 195         help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
 196     )
 197     parser.add_argument(
 198         "--global-value-regex",
 199         nargs="+",
 200         default=[],
 201         help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
 202     )
 203     parser.add_argument(
 204         "--global-hex-value-regex",
 205         nargs="+",
 206         default=[],
 207         help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
 208     )
 209     # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
 210     # we need to rename the flag to just -generate-body-for-unused-prefixes.
 211     parser.add_argument(
 212         "--no-generate-body-for-unused-prefixes",
 213         action="store_false",
 214         dest="gen_unused_prefix_body",
 215         default=True,
 216         help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
 217     )
 218     # This is the default when regenerating existing tests. The default when
 219     # generating new tests is determined by DEFAULT_VERSION.
 220     parser.add_argument(
 221         "--version", type=int, default=1, help="The version of output format"
 222     )
 223     args = parser.parse_args()
 224     # TODO: This should not be handled differently from the other options
 225     global _verbose, _global_value_regex, _global_hex_value_regex
 226     _verbose = args.verbose
 227     _global_value_regex = args.global_value_regex
 228     _global_hex_value_regex = args.global_hex_value_regex
 229     return args
 230
 231
 232 def parse_args(parser, argv):
 233     args = parser.parse_args(argv)
 234     if args.version >= 2:
 235         args.function_signature = True
 236     # TODO: This should not be handled differently from the other options
 237     global _verbose, _global_value_regex, _global_hex_value_regex
 238     _verbose = args.verbose
 239     _global_value_regex = args.global_value_regex
 240     _global_hex_value_regex = args.global_hex_value_regex
 241     if "check_globals" in args and args.check_globals == "default":
 242         args.check_globals = "none" if args.version < 4 else "smart"
 243     return args
 244
 245
 246 class InputLineInfo(object):
 247     def __init__(self, line, line_number, args, argv):
 248         self.line = line
 249         self.line_number = line_number
 250         self.args = args
 251         self.argv = argv
 252
 253
 254 class TestInfo(object):
 255     def __init__(
 256         self,
 257         test,
 258         parser,
 259         script_name,
 260         input_lines,
 261         args,
 262         argv,
 263         comment_prefix,
 264         argparse_callback,
 265     ):
 266         self.parser = parser
 267         self.argparse_callback = argparse_callback
 268         self.path = test
 269         self.args = args
 270         if args.prefix_filecheck_ir_name:
 271             global _prefix_filecheck_ir_name
 272             _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
 273         self.argv = argv
 274         self.input_lines = input_lines
 275         self.run_lines = find_run_lines(test, self.input_lines)
 276         self.comment_prefix = comment_prefix
 277         if self.comment_prefix is None:
 278             if self.path.endswith(".mir") or self.path.endswith(".txt"):
 279                 self.comment_prefix = "#"
 280             elif self.path.endswith(".s"):
 281                 self.comment_prefix = "//"
 282             else:
 283                 self.comment_prefix = ";"
 284         self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
 285         self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
 286         self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
 287         self.test_unused_note = (
 288             self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
 289         )
 290
 291     def ro_iterlines(self):
 292         for line_num, input_line in enumerate(self.input_lines):
 293             args, argv = check_for_command(
 294                 input_line, self.parser, self.args, self.argv, self.argparse_callback
 295             )
 296             yield InputLineInfo(input_line, line_num, args, argv)
 297
 298     def iterlines(self, output_lines):
 299         output_lines.append(self.test_autogenerated_note)
 300         for line_info in self.ro_iterlines():
 301             input_line = line_info.line
 302             # Discard any previous script advertising.
 303             if input_line.startswith(self.autogenerated_note_prefix):
 304                 continue
 305             self.args = line_info.args
 306             self.argv = line_info.argv
 307             if not self.args.enabled:
 308                 output_lines.append(input_line)
 309                 continue
 310             yield line_info
 311
 312     def get_checks_for_unused_prefixes(
 313         self, run_list, used_prefixes: List[str]
 314     ) -> List[str]:
 315         run_list = [element for element in run_list if element[0] is not None]
 316         unused_prefixes = set(
 317             [prefix for sublist in run_list for prefix in sublist[0]]
 318         ).difference(set(used_prefixes))
 319
 320         ret = []
 321         if not unused_prefixes:
 322             return ret
 323         ret.append(self.test_unused_note)
 324         for unused in sorted(unused_prefixes):
 325             ret.append(
 326                 "{comment} {prefix}: {match_everything}".format(
 327                     comment=self.comment_prefix,
 328                     prefix=unused,
 329                     match_everything=r"""{{.*}}""",
 330                 )
 331             )
 332         return ret
 333
 334
 335 def itertests(
 336     test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
 337 ):
 338     for pattern in test_patterns:
 339         # On Windows we must expand the patterns ourselves.
 340         tests_list = glob.glob(pattern)
 341         if not tests_list:
 342             warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
 343             continue
 344         for test in tests_list:
 345             with open(test) as f:
 346                 input_lines = [l.rstrip() for l in f]
 347             first_line = input_lines[0] if input_lines else ""
 348             if UTC_AVOID in first_line:
 349                 warn("Skipping test that must not be autogenerated: " + test)
 350                 continue
 351             is_regenerate = UTC_ADVERT in first_line
 352
 353             # If we're generating a new test, set the default version to the latest.
 354             argv = sys.argv[:]
 355             if not is_regenerate:
 356                 argv.insert(1, "--version=" + str(DEFAULT_VERSION))
 357
 358             args = parse_args(parser, argv[1:])
 359             if argparse_callback is not None:
 360                 argparse_callback(args)
 361             if is_regenerate:
 362                 if script_name not in first_line and not args.force_update:
 363                     warn(
 364                         "Skipping test which wasn't autogenerated by " + script_name,
 365                         test,
 366                     )
 367                     continue
 368                 args, argv = check_for_command(
 369                     first_line, parser, args, argv, argparse_callback
 370                 )
 371             elif args.update_only:
 372                 assert UTC_ADVERT not in first_line
 373                 warn("Skipping test which isn't autogenerated: " + test)
 374                 continue
 375             final_input_lines = []
 376             for l in input_lines:
 377                 if UNUSED_NOTE in l:
 378                     break
 379                 final_input_lines.append(l)
 380             yield TestInfo(
 381                 test,
 382                 parser,
 383                 script_name,
 384                 final_input_lines,
 385                 args,
 386                 argv,
 387                 comment_prefix,
 388                 argparse_callback,
 389             )
 390
 391
 392 def should_add_line_to_output(
 393     input_line,
 394     prefix_set,
 395     *,
 396     skip_global_checks=False,
 397     skip_same_checks=False,
 398     comment_marker=";",
 399 ):
 400     # Skip any blank comment lines in the IR.
 401     if not skip_global_checks and input_line.strip() == comment_marker:
 402         return False
 403     # Skip a special double comment line we use as a separator.
 404     if input_line.strip() == comment_marker + SEPARATOR:
 405         return False
 406     # Skip any blank lines in the IR.
 407     # if input_line.strip() == '':
 408     #  return False
 409     # And skip any CHECK lines. We're building our own.
 410     m = CHECK_RE.match(input_line)
 411     if m and m.group(1) in prefix_set:
 412         if skip_same_checks and CHECK_SAME_RE.match(input_line):
 413             # The previous CHECK line was removed, so don't leave this dangling
 414             return False
 415         if skip_global_checks:
 416             # Skip checks only if they are of global value definitions
 417             global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
 418             is_global = global_ir_value_re.search(input_line)
 419             return not is_global
 420         return False
 421
 422     return True
 423
 424
 425 def collect_original_check_lines(ti: TestInfo, prefix_set: set):
 426     """
 427     Collect pre-existing check lines into a dictionary `result` which is
 428     returned.
 429
 430     result[func_name][prefix] is filled with a list of right-hand-sides of check
 431     lines.
 432     """
 433     result = collections.defaultdict(lambda: {})
 434
 435     current_prefix = None
 436     current_function = None
 437     for input_line_info in ti.ro_iterlines():
 438         input_line = input_line_info.line
 439         if input_line.lstrip().startswith(";"):
 440             m = CHECK_RE.match(input_line)
 441             if m is not None:
 442                 prefix = m.group(1)
 443                 check_kind = m.group(2)
 444                 line = input_line[m.end() :].strip()
 445
 446                 if prefix != current_prefix:
 447                     current_function = None
 448                     current_prefix = None
 449
 450                 if check_kind not in ["LABEL", "SAME"]:
 451                     if current_function is not None:
 452                         current_function.append(line)
 453                     continue
 454
 455                 if check_kind == "SAME":
 456                     continue
 457
 458                 if check_kind == "LABEL":
 459                     m = IR_FUNCTION_LABEL_RE.match(line)
 460                     if m is not None:
 461                         func_name = m.group(1)
 462                         if (
 463                             ti.args.function is not None
 464                             and func_name != ti.args.function
 465                         ):
 466                             # When filtering on a specific function, skip all others.
 467                             continue
 468
 469                         current_prefix = prefix
 470                         current_function = result[func_name][prefix] = []
 471                         continue
 472
 473         current_function = None
 474
 475     return result
 476
 477
 478 # Perform lit-like substitutions
 479 def getSubstitutions(sourcepath):
 480     sourcedir = os.path.dirname(sourcepath)
 481     return [
 482         ("%s", sourcepath),
 483         ("%S", sourcedir),
 484         ("%p", sourcedir),
 485         ("%{pathsep}", os.pathsep),
 486     ]
 487
 488
 489 def applySubstitutions(s, substitutions):
 490     for a, b in substitutions:
 491         s = s.replace(a, b)
 492     return s
 493
 494
 495 # Invoke the tool that is being tested.
 496 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
 497     with open(ir) as ir_file:
 498         substitutions = getSubstitutions(ir)
 499
 500         # TODO Remove the str form which is used by update_test_checks.py and
 501         # update_llc_test_checks.py
 502         # The safer list form is used by update_cc_test_checks.py
 503         if preprocess_cmd:
 504             # Allow pre-processing the IR file (e.g. using sed):
 505             assert isinstance(
 506                 preprocess_cmd, str
 507             )  # TODO: use a list instead of using shell
 508             preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
 509             if verbose:
 510                 print(
 511                     "Pre-processing input file: ",
 512                     ir,
 513                     " with command '",
 514                     preprocess_cmd,
 515                     "'",
 516                     sep="",
 517                     file=sys.stderr,
 518                 )
 519             pp = subprocess.Popen(
 520                 preprocess_cmd,
 521                 shell=True,
 522                 stdin=subprocess.DEVNULL,
 523                 stdout=subprocess.PIPE,
 524             )
 525             ir_file = pp.stdout
 526
 527         if isinstance(cmd_args, list):
 528             args = [applySubstitutions(a, substitutions) for a in cmd_args]
 529             stdout = subprocess.check_output([exe] + args, stdin=ir_file)
 530         else:
 531             stdout = subprocess.check_output(
 532                 exe + " " + applySubstitutions(cmd_args, substitutions),
 533                 shell=True,
 534                 stdin=ir_file,
 535             )
 536         if sys.version_info[0] > 2:
 537             # FYI, if you crashed here with a decode error, your run line probably
 538             # results in bitcode or other binary format being written to the pipe.
 539             # For an opt test, you probably want to add -S or -disable-output.
 540             stdout = stdout.decode()
 541     # Fix line endings to unix CR style.
 542     return stdout.replace("\r\n", "\n")
 543
 544
 545 ##### LLVM IR parser
 546 RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
 547 CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
 548 PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
 549 CHECK_RE = re.compile(
 550     r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
 551 )
 552 CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
 553
 554 UTC_ARGS_KEY = "UTC_ARGS:"
 555 UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
 556 UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
 557 UTC_AVOID = "NOTE: Do not autogenerate"
 558 UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
 559
 560 OPT_FUNCTION_RE = re.compile(
 561     r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
 562     r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
 563     flags=(re.M | re.S),
 564 )
 565
 566 ANALYZE_FUNCTION_RE = re.compile(
 567     r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
 568     r"\s*\n(?P<body>.*)$",
 569     flags=(re.X | re.S),
 570 )
 571
 572 LOOP_PASS_DEBUG_RE = re.compile(
 573     r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
 574 )
 575
 576 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 577 IR_FUNCTION_LABEL_RE = re.compile(
 578     r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
 579 )
 580 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 581 TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
 582 MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
 583 DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
 584
 585 SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
 586 SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
 587 SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
 588 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
 589 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
 590 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
 591     r"([ \t]|(#[0-9]+))+$", flags=re.M
 592 )
 593 SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
 594 SCRUB_LOOP_COMMENT_RE = re.compile(
 595     r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
 596 )
 597 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
 598
 599 SEPARATOR = "."
 600
 601
 602 def error(msg, test_file=None):
 603     if test_file:
 604         msg = "{}: {}".format(msg, test_file)
 605     print("ERROR: {}".format(msg), file=sys.stderr)
 606
 607
 608 def warn(msg, test_file=None):
 609     if test_file:
 610         msg = "{}: {}".format(msg, test_file)
 611     print("WARNING: {}".format(msg), file=sys.stderr)
 612
 613
 614 def debug(*args, **kwargs):
 615     # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 616     if "file" not in kwargs:
 617         kwargs["file"] = sys.stderr
 618     if _verbose:
 619         print(*args, **kwargs)
 620
 621
 622 def find_run_lines(test, lines):
 623     debug("Scanning for RUN lines in test file:", test)
 624     raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 625     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 626     for l in raw_lines[1:]:
 627         if run_lines[-1].endswith("\\"):
 628             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 629         else:
 630             run_lines.append(l)
 631     debug("Found {} RUN lines in {}:".format(len(run_lines), test))
 632     for l in run_lines:
 633         debug("  RUN: {}".format(l))
 634     return run_lines
 635
 636
 637 def get_triple_from_march(march):
 638     triples = {
 639         "amdgcn": "amdgcn",
 640         "r600": "r600",
 641         "mips": "mips",
 642         "nvptx64": "nvptx64",
 643         "sparc": "sparc",
 644         "hexagon": "hexagon",
 645         "ve": "ve",
 646     }
 647     for prefix, triple in triples.items():
 648         if march.startswith(prefix):
 649             return triple
 650     print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
 651     return "x86"
 652
 653
 654 def apply_filters(line, filters):
 655     has_filter = False
 656     for f in filters:
 657         if not f.is_filter_out:
 658             has_filter = True
 659         if f.search(line):
 660             return False if f.is_filter_out else True
 661     # If we only used filter-out, keep the line, otherwise discard it since no
 662     # filter matched.
 663     return False if has_filter else True
 664
 665
 666 def do_filter(body, filters):
 667     return (
 668         body
 669         if not filters
 670         else "\n".join(
 671             filter(lambda line: apply_filters(line, filters), body.splitlines())
 672         )
 673     )
 674
 675
 676 def scrub_body(body):
 677     # Scrub runs of whitespace out of the assembly, but leave the leading
 678     # whitespace in place.
 679     body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
 680
 681     # Expand the tabs used for indentation.
 682     body = str.expandtabs(body, 2)
 683     # Strip trailing whitespace.
 684     body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
 685     return body
 686
 687
 688 def do_scrub(body, scrubber, scrubber_args, extra):
 689     if scrubber_args:
 690         local_args = copy.deepcopy(scrubber_args)
 691         local_args[0].extra_scrub = extra
 692         return scrubber(body, *local_args)
 693     return scrubber(body, *scrubber_args)
 694
 695
 696 # Build up a dictionary of all the function bodies.
 697 class function_body(object):
 698     def __init__(
 699         self,
 700         string,
 701         extra,
 702         funcdef_attrs_and_ret,
 703         args_and_sig,
 704         attrs,
 705         func_name_separator,
 706         ginfo,
 707     ):
 708         self.scrub = string
 709         self.extrascrub = extra
 710         self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
 711         self.args_and_sig = args_and_sig
 712         self.attrs = attrs
 713         self.func_name_separator = func_name_separator
 714         self._ginfo = ginfo
 715
 716     def is_same_except_arg_names(
 717         self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
 718     ):
 719         arg_names = set()
 720
 721         def drop_arg_names(match):
 722             nameless_value = self._ginfo.get_nameless_value_from_match(match)
 723             if nameless_value.check_key == "%":
 724                 arg_names.add(self._ginfo.get_name_from_match(match))
 725                 substitute = ""
 726             else:
 727                 substitute = match.group(2)
 728             return match.group(1) + substitute + match.group(match.lastindex)
 729
 730         def repl_arg_names(match):
 731             nameless_value = self._ginfo.get_nameless_value_from_match(match)
 732             if (
 733                 nameless_value.check_key == "%"
 734                 and self._ginfo.get_name_from_match(match) in arg_names
 735             ):
 736                 return match.group(1) + match.group(match.lastindex)
 737             return match.group(1) + match.group(2) + match.group(match.lastindex)
 738
 739         if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
 740             return False
 741         if self.attrs != attrs:
 742             return False
 743
 744         regexp = self._ginfo.get_regexp()
 745         ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
 746         ans1 = regexp.sub(drop_arg_names, args_and_sig)
 747         if ans0 != ans1:
 748             return False
 749         if self._ginfo.is_asm():
 750             # Check without replacements, the replacements are not applied to the
 751             # body for backend checks.
 752             return self.extrascrub == extrascrub
 753
 754         es0 = regexp.sub(repl_arg_names, self.extrascrub)
 755         es1 = regexp.sub(repl_arg_names, extrascrub)
 756         es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
 757         es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
 758         return es0 == es1
 759
 760     def __str__(self):
 761         return self.scrub
 762
 763
 764 class FunctionTestBuilder:
 765     def __init__(self, run_list, flags, scrubber_args, path, ginfo):
 766         self._verbose = flags.verbose
 767         self._record_args = flags.function_signature
 768         self._check_attributes = flags.check_attributes
 769         # Strip double-quotes if input was read by UTC_ARGS
 770         self._filters = (
 771             list(
 772                 map(
 773                     lambda f: Filter(
 774                         re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
 775                     ),
 776                     flags.filters,
 777                 )
 778             )
 779             if flags.filters
 780             else []
 781         )
 782         self._scrubber_args = scrubber_args
 783         self._path = path
 784         self._ginfo = ginfo
 785         # Strip double-quotes if input was read by UTC_ARGS
 786         self._replace_value_regex = list(
 787             map(lambda x: x.strip('"'), flags.replace_value_regex)
 788         )
 789         self._func_dict = {}
 790         self._func_order = {}
 791         self._global_var_dict = {}
 792         self._processed_prefixes = set()
 793         for tuple in run_list:
 794             for prefix in tuple[0]:
 795                 self._func_dict.update({prefix: dict()})
 796                 self._func_order.update({prefix: []})
 797                 self._global_var_dict.update({prefix: dict()})
 798
 799     def finish_and_get_func_dict(self):
 800         for prefix in self.get_failed_prefixes():
 801             warn(
 802                 "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
 803                 % (
 804                     prefix,
 805                     self._path,
 806                 )
 807             )
 808         return self._func_dict
 809
 810     def func_order(self):
 811         return self._func_order
 812
 813     def global_var_dict(self):
 814         return self._global_var_dict
 815
 816     def is_filtered(self):
 817         return bool(self._filters)
 818
 819     def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
 820         build_global_values_dictionary(
 821             self._global_var_dict, raw_tool_output, prefixes, self._ginfo
 822         )
 823         for m in function_re.finditer(raw_tool_output):
 824             if not m:
 825                 continue
 826             func = m.group("func")
 827             body = m.group("body")
 828             # func_name_separator is the string that is placed right after function name at the
 829             # beginning of assembly function definition. In most assemblies, that is just a
 830             # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
 831             # False, just assume that separator is an empty string.
 832             if self._ginfo.is_asm():
 833                 # Use ':' as default separator.
 834                 func_name_separator = (
 835                     m.group("func_name_separator")
 836                     if "func_name_separator" in m.groupdict()
 837                     else ":"
 838                 )
 839             else:
 840                 func_name_separator = ""
 841             attrs = m.group("attrs") if self._check_attributes else ""
 842             funcdef_attrs_and_ret = (
 843                 m.group("funcdef_attrs_and_ret") if self._record_args else ""
 844             )
 845             # Determine if we print arguments, the opening brace, or nothing after the
 846             # function name
 847             if self._record_args and "args_and_sig" in m.groupdict():
 848                 args_and_sig = scrub_body(m.group("args_and_sig").strip())
 849             elif "args_and_sig" in m.groupdict():
 850                 args_and_sig = "("
 851             else:
 852                 args_and_sig = ""
 853             filtered_body = do_filter(body, self._filters)
 854             scrubbed_body = do_scrub(
 855                 filtered_body, scrubber, self._scrubber_args, extra=False
 856             )
 857             scrubbed_extra = do_scrub(
 858                 filtered_body, scrubber, self._scrubber_args, extra=True
 859             )
 860             if "analysis" in m.groupdict():
 861                 analysis = m.group("analysis")
 862                 if analysis not in SUPPORTED_ANALYSES:
 863                     warn("Unsupported analysis mode: %r!" % (analysis,))
 864             if func.startswith("stress"):
 865                 # We only use the last line of the function body for stress tests.
 866                 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
 867             if self._verbose:
 868                 print("Processing function: " + func, file=sys.stderr)
 869                 for l in scrubbed_body.splitlines():
 870                     print("  " + l, file=sys.stderr)
 871             for prefix in prefixes:
 872                 # Replace function names matching the regex.
 873                 for regex in self._replace_value_regex:
 874                     # Pattern that matches capture groups in the regex in leftmost order.
 875                     group_regex = re.compile(r"\(.*?\)")
 876                     # Replace function name with regex.
 877                     match = re.match(regex, func)
 878                     if match:
 879                         func_repl = regex
 880                         # Replace any capture groups with their matched strings.
 881                         for g in match.groups():
 882                             func_repl = group_regex.sub(
 883                                 re.escape(g), func_repl, count=1
 884                             )
 885                         func = re.sub(func_repl, "{{" + func_repl + "}}", func)
 886
 887                     # Replace all calls to regex matching functions.
 888                     matches = re.finditer(regex, scrubbed_body)
 889                     for match in matches:
 890                         func_repl = regex
 891                         # Replace any capture groups with their matched strings.
 892                         for g in match.groups():
 893                             func_repl = group_regex.sub(
 894                                 re.escape(g), func_repl, count=1
 895                             )
 896                         # Substitute function call names that match the regex with the same
 897                         # capture groups set.
 898                         scrubbed_body = re.sub(
 899                             func_repl, "{{" + func_repl + "}}", scrubbed_body
 900                         )
 901
 902                 if func in self._func_dict[prefix]:
 903                     if self._func_dict[prefix][func] is not None and (
 904                         str(self._func_dict[prefix][func]) != scrubbed_body
 905                         or self._func_dict[prefix][func].args_and_sig != args_and_sig
 906                         or self._func_dict[prefix][func].attrs != attrs
 907                         or self._func_dict[prefix][func].funcdef_attrs_and_ret
 908                         != funcdef_attrs_and_ret
 909                     ):
 910                         if self._func_dict[prefix][func].is_same_except_arg_names(
 911                             scrubbed_extra,
 912                             funcdef_attrs_and_ret,
 913                             args_and_sig,
 914                             attrs,
 915                         ):
 916                             self._func_dict[prefix][func].scrub = scrubbed_extra
 917                             self._func_dict[prefix][func].args_and_sig = args_and_sig
 918                         else:
 919                             # This means a previous RUN line produced a body for this function
 920                             # that is different from the one produced by this current RUN line,
 921                             # so the body can't be common across RUN lines. We use None to
 922                             # indicate that.
 923                             self._func_dict[prefix][func] = None
 924                 else:
 925                     if prefix not in self._processed_prefixes:
 926                         self._func_dict[prefix][func] = function_body(
 927                             scrubbed_body,
 928                             scrubbed_extra,
 929                             funcdef_attrs_and_ret,
 930                             args_and_sig,
 931                             attrs,
 932                             func_name_separator,
 933                             self._ginfo,
 934                         )
 935                         self._func_order[prefix].append(func)
 936                     else:
 937                         # An earlier RUN line used this check prefixes but didn't produce
 938                         # a body for this function. This happens in Clang tests that use
 939                         # preprocesser directives to exclude individual functions from some
 940                         # RUN lines.
 941                         self._func_dict[prefix][func] = None
 942
 943     def processed_prefixes(self, prefixes):
 944         """
 945         Mark a set of prefixes as having had at least one applicable RUN line fully
 946         processed. This is used to filter out function bodies that don't have
 947         outputs for all RUN lines.
 948         """
 949         self._processed_prefixes.update(prefixes)
 950
 951     def get_failed_prefixes(self):
 952         # This returns the list of those prefixes that failed to match any function,
 953         # because there were conflicting bodies produced by different RUN lines, in
 954         # all instances of the prefix.
 955         for prefix in self._func_dict:
 956             if self._func_dict[prefix] and (
 957                 not [
 958                     fct
 959                     for fct in self._func_dict[prefix]
 960                     if self._func_dict[prefix][fct] is not None
 961                 ]
 962             ):
 963                 yield prefix
 964
 965
 966 ##### Generator of LLVM IR CHECK lines
 967
 968 SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
 969
 970 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
 971
 972
 973 class NamelessValue:
 974     """
 975     A NamelessValue object represents a type of value in the IR whose "name" we
 976     generalize in the generated check lines; where the "name" could be an actual
 977     name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
 978     or `!4`).
 979     """
 980
 981     def __init__(
 982         self,
 983         check_prefix,
 984         check_key,
 985         ir_prefix,
 986         ir_regexp,
 987         global_ir_rhs_regexp,
 988         *,
 989         is_before_functions=False,
 990         is_number=False,
 991         replace_number_with_counter=False,
 992         match_literally=False,
 993         interlaced_with_previous=False,
 994         ir_suffix=r"",
 995     ):
 996         self.check_prefix = check_prefix
 997         self.check_key = check_key
 998         self.ir_prefix = ir_prefix
 999         self.ir_regexp = ir_regexp
1000         self.ir_suffix = ir_suffix
1001         self.global_ir_rhs_regexp = global_ir_rhs_regexp
1002         self.is_before_functions = is_before_functions
1003         self.is_number = is_number
1004         # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1005         # modifications to LLVM, replace those with an incrementing counter.
1006         self.replace_number_with_counter = replace_number_with_counter
1007         self.match_literally = match_literally
1008         self.interlaced_with_previous = interlaced_with_previous
1009         self.variable_mapping = {}
1010
1011     # Return true if this kind of IR value is defined "locally" to functions,
1012     # which we assume is only the case precisely for LLVM IR local values.
1013     def is_local_def_ir_value(self):
1014         return self.check_key == "%"
1015
1016     # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1017     def get_ir_regex(self):
1018         # for backwards compatibility we check locals with '.*'
1019         if self.is_local_def_ir_value():
1020             return ".*"
1021         return self.ir_regexp
1022
1023     # Create a FileCheck variable name based on an IR name.
1024     def get_value_name(self, var: str, check_prefix: str):
1025         var = var.replace("!", "")
1026         if self.replace_number_with_counter:
1027             assert var
1028             replacement = self.variable_mapping.get(var, None)
1029             if replacement is None:
1030                 # Replace variable with an incrementing counter
1031                 replacement = str(len(self.variable_mapping) + 1)
1032                 self.variable_mapping[var] = replacement
1033             var = replacement
1034         # This is a nameless value, prepend check_prefix.
1035         if var.isdigit():
1036             var = check_prefix + var
1037         else:
1038             # This is a named value that clashes with the check_prefix, prepend with
1039             # _prefix_filecheck_ir_name, if it has been defined.
1040             if (
1041                 may_clash_with_default_check_prefix_name(check_prefix, var)
1042                 and _prefix_filecheck_ir_name
1043             ):
1044                 var = _prefix_filecheck_ir_name + var
1045         var = var.replace(".", "_")
1046         var = var.replace("-", "_")
1047         return var.upper()
1048
1049     def get_affixes_from_match(self, match):
1050         prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1051         suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1052         return prefix, suffix
1053
1054
1055 class GeneralizerInfo:
1056     """
1057     A GeneralizerInfo object holds information about how check lines should be generalized
1058     (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1059     state (e.g. information about IR global variables).
1060     """
1061
1062     MODE_IR = 0
1063     MODE_ASM = 1
1064     MODE_ANALYZE = 2
1065
1066     def __init__(
1067         self,
1068         version,
1069         mode,
1070         nameless_values: List[NamelessValue],
1071         regexp_prefix,
1072         regexp_suffix,
1073     ):
1074         self._version = version
1075         self._mode = mode
1076         self._nameless_values = nameless_values
1077
1078         self._regexp_prefix = regexp_prefix
1079         self._regexp_suffix = regexp_suffix
1080
1081         self._regexp, _ = self._build_regexp(False, False)
1082         (
1083             self._unstable_globals_regexp,
1084             self._unstable_globals_values,
1085         ) = self._build_regexp(True, True)
1086
1087     def _build_regexp(self, globals_only, unstable_only):
1088         matches = []
1089         values = []
1090         for nameless_value in self._nameless_values:
1091             is_global = nameless_value.global_ir_rhs_regexp is not None
1092             if globals_only and not is_global:
1093                 continue
1094             if unstable_only and nameless_value.match_literally:
1095                 continue
1096
1097             match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1098             if self.is_ir() and not globals_only and is_global:
1099                 match = "^" + match
1100             matches.append(match)
1101             values.append(nameless_value)
1102
1103         regexp_string = r"|".join(matches)
1104
1105         return (
1106             re.compile(
1107                 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1108             ),
1109             values,
1110         )
1111
1112     def get_version(self):
1113         return self._version
1114
1115     def is_ir(self):
1116         return self._mode == GeneralizerInfo.MODE_IR
1117
1118     def is_asm(self):
1119         return self._mode == GeneralizerInfo.MODE_ASM
1120
1121     def is_analyze(self):
1122         return self._mode == GeneralizerInfo.MODE_ANALYZE
1123
1124     def get_nameless_values(self):
1125         return self._nameless_values
1126
1127     def get_regexp(self):
1128         return self._regexp
1129
1130     def get_unstable_globals_regexp(self):
1131         return self._unstable_globals_regexp
1132
1133     # The entire match is group 0, the prefix has one group (=1), the entire
1134     # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1135     FIRST_NAMELESS_GROUP_IN_MATCH = 3
1136
1137     def get_match_info(self, match):
1138         """
1139         Returns (name, nameless_value) for the given match object
1140         """
1141         if match.re == self._regexp:
1142             values = self._nameless_values
1143         else:
1144             match.re == self._unstable_globals_regexp
1145             values = self._unstable_globals_values
1146         for i in range(len(values)):
1147             g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1148             if g is not None:
1149                 return g, values[i]
1150         error("Unable to identify the kind of IR value from the match!")
1151         return None, None
1152
1153     # See get_idx_from_match
1154     def get_name_from_match(self, match):
1155         return self.get_match_info(match)[0]
1156
1157     def get_nameless_value_from_match(self, match) -> NamelessValue:
1158         return self.get_match_info(match)[1]
1159
1160
1161 def make_ir_generalizer(version):
1162     values = []
1163
1164     if version >= 5:
1165         values += [
1166             NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1167             NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1168         ]
1169
1170     values += [
1171         #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1172         NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1173         NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1174         NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1175         NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1176         NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1177         NamelessValue(
1178             r"GLOBNAMED",
1179             "@",
1180             r"@",
1181             r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1182             r".+",
1183             is_before_functions=True,
1184             match_literally=True,
1185             interlaced_with_previous=True,
1186         ),
1187         NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1188         NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1189         NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1190         NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1191         NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1192         NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1193         NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1194         NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1195         NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1196         NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1197         NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1198     ]
1199
1200     prefix = r"(\s*)"
1201     suffix = r"([,\s\(\)\}]|\Z)"
1202
1203     # values = [
1204     #     nameless_value
1205     #     for nameless_value in IR_NAMELESS_VALUES
1206     #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1207     #        not (unstable_ids_only and nameless_value.match_literally)
1208     # ]
1209
1210     return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1211
1212
1213 def make_asm_generalizer(version):
1214     values = [
1215         NamelessValue(
1216             r"MCINST",
1217             "Inst#",
1218             "<MCInst #",
1219             r"\d+",
1220             r".+",
1221             is_number=True,
1222             replace_number_with_counter=True,
1223         ),
1224         NamelessValue(
1225             r"MCREG",
1226             "Reg:",
1227             "<MCOperand Reg:",
1228             r"\d+",
1229             r".+",
1230             is_number=True,
1231             replace_number_with_counter=True,
1232         ),
1233     ]
1234
1235     prefix = r"((?:#|//)\s*)"
1236     suffix = r"([>\s]|\Z)"
1237
1238     return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1239
1240
1241 def make_analyze_generalizer(version):
1242     values = [
1243         NamelessValue(
1244             r"GRP",
1245             "#",
1246             r"",
1247             r"0x[0-9a-f]+",
1248             None,
1249             replace_number_with_counter=True,
1250         ),
1251     ]
1252
1253     prefix = r"(\s*)"
1254     suffix = r"(\)?:)"
1255
1256     return GeneralizerInfo(
1257         version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1258     )
1259
1260
1261 # Return true if var clashes with the scripted FileCheck check_prefix.
1262 def may_clash_with_default_check_prefix_name(check_prefix, var):
1263     return check_prefix and re.match(
1264         r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1265     )
1266
1267
1268 def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1269     """
1270     Find a large ordered matching between strings in lhs and rhs.
1271
1272     Think of this as finding the *unchanged* lines in a diff, where the entries
1273     of lhs and rhs are lines of the files being diffed.
1274
1275     Returns a list of matched (lhs_idx, rhs_idx) pairs.
1276     """
1277
1278     if not lhs or not rhs:
1279         return []
1280
1281     # Collect matches in reverse order.
1282     matches = []
1283
1284     # First, collect a set of candidate matching edges. We limit this to a
1285     # constant multiple of the input size to avoid quadratic runtime.
1286     patterns = collections.defaultdict(lambda: ([], []))
1287
1288     for idx in range(len(lhs)):
1289         patterns[lhs[idx]][0].append(idx)
1290     for idx in range(len(rhs)):
1291         patterns[rhs[idx]][1].append(idx)
1292
1293     multiple_patterns = []
1294
1295     candidates = []
1296     for pattern in patterns.values():
1297         if not pattern[0] or not pattern[1]:
1298             continue
1299
1300         if len(pattern[0]) == len(pattern[1]) == 1:
1301             candidates.append((pattern[0][0], pattern[1][0]))
1302         else:
1303             multiple_patterns.append(pattern)
1304
1305     multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1306
1307     for pattern in multiple_patterns:
1308         if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1309             len(lhs) + len(rhs)
1310         ):
1311             break
1312         for lhs_idx in pattern[0]:
1313             for rhs_idx in pattern[1]:
1314                 candidates.append((lhs_idx, rhs_idx))
1315
1316     if not candidates:
1317         # The LHS and RHS either share nothing in common, or lines are just too
1318         # identical. In that case, let's give up and not match anything.
1319         return []
1320
1321     # Compute a maximal crossing-free matching via an algorithm that is
1322     # inspired by a mixture of dynamic programming and line-sweeping in
1323     # discrete geometry.
1324     #
1325     # I would be surprised if this algorithm didn't exist somewhere in the
1326     # literature, but I found it without consciously recalling any
1327     # references, so you'll have to make do with the explanation below.
1328     # Sorry.
1329     #
1330     # The underlying graph is bipartite:
1331     #  - nodes on the LHS represent lines in the original check
1332     #  - nodes on the RHS represent lines in the new (updated) check
1333     #
1334     # Nodes are implicitly sorted by the corresponding line number.
1335     # Edges (unique_matches) are sorted by the line number on the LHS.
1336     #
1337     # Here's the geometric intuition for the algorithm.
1338     #
1339     #  * Plot the edges as points in the plane, with the original line
1340     #    number on the X axis and the updated line number on the Y axis.
1341     #  * The goal is to find a longest "chain" of points where each point
1342     #    is strictly above and to the right of the previous point.
1343     #  * The algorithm proceeds by sweeping a vertical line from left to
1344     #    right.
1345     #  * The algorithm maintains a table where `table[N]` answers the
1346     #    question "What is currently the 'best' way to build a chain of N+1
1347     #    points to the left of the vertical line". Here, 'best' means
1348     #    that the last point of the chain is a as low as possible (minimal
1349     #    Y coordinate).
1350     #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1351     #     the last point in the chain and `y` is its Y coordinate
1352     #   * A key invariant is that the Y values in the table are
1353     #     monotonically increasing
1354     #  * Thanks to these properties, the table can be used to answer the
1355     #    question "What is the longest chain that can be built to the left
1356     #    of the vertical line using only points below a certain Y value",
1357     #    using a binary search over the table.
1358     #  * The algorithm also builds a backlink structure in which every point
1359     #    links back to the previous point on a best (longest) chain ending
1360     #    at that point
1361     #
1362     # The core loop of the algorithm sweeps the line and updates the table
1363     # and backlink structure for every point that we cross during the sweep.
1364     # Therefore, the algorithm is trivially O(M log M) in the number of
1365     # points.
1366     candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1367
1368     backlinks = []
1369     table_rhs_idx = []
1370     table_candidate_idx = []
1371     for _, rhs_idx in candidates:
1372         candidate_idx = len(backlinks)
1373         ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1374
1375         # Update the table to record a best chain ending in the current point.
1376         # There always is one, and if any of the previously visited points had
1377         # a higher Y coordinate, then there is always a previously recorded best
1378         # chain that can be improved upon by using the current point.
1379         #
1380         # There is only one case where there is some ambiguity. If the
1381         # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1382         # the current point (this can only happen if the same line appeared
1383         # multiple times on the LHS), then we could choose to keep the
1384         # previously recorded best chain instead. That would bias the algorithm
1385         # differently but should have no systematic impact on the quality of the
1386         # result.
1387         if ti < len(table_rhs_idx):
1388             table_rhs_idx[ti] = rhs_idx
1389             table_candidate_idx[ti] = candidate_idx
1390         else:
1391             table_rhs_idx.append(rhs_idx)
1392             table_candidate_idx.append(candidate_idx)
1393         if ti > 0:
1394             backlinks.append(table_candidate_idx[ti - 1])
1395         else:
1396             backlinks.append(None)
1397
1398     # Commit to names in the matching by walking the backlinks. Recursively
1399     # attempt to fill in more matches in-betweem.
1400     match_idx = table_candidate_idx[-1]
1401     while match_idx is not None:
1402         current = candidates[match_idx]
1403         matches.append(current)
1404         match_idx = backlinks[match_idx]
1405
1406     matches.reverse()
1407     return matches
1408
1409
1410 VARIABLE_TAG = "[[@@]]"
1411 METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1412 NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1413
1414
1415 class TestVar:
1416     def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1417         self._nameless_value = nameless_value
1418
1419         self._prefix = prefix
1420         self._suffix = suffix
1421
1422     def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1423         if prefix != self._prefix:
1424             self._prefix = ""
1425         if suffix != self._suffix:
1426             self._suffix = ""
1427
1428     def get_variable_name(self, text):
1429         return self._nameless_value.get_value_name(
1430             text, self._nameless_value.check_prefix
1431         )
1432
1433     def get_def(self, name, prefix, suffix):
1434         if self._nameless_value.is_number:
1435             return f"{prefix}[[#{name}:]]{suffix}"
1436         if self._prefix:
1437             assert self._prefix == prefix
1438             prefix = ""
1439         if self._suffix:
1440             assert self._suffix == suffix
1441             suffix = ""
1442         return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1443
1444     def get_use(self, name, prefix, suffix):
1445         if self._nameless_value.is_number:
1446             return f"{prefix}[[#{name}]]{suffix}"
1447         if self._prefix:
1448             assert self._prefix == prefix
1449             prefix = ""
1450         if self._suffix:
1451             assert self._suffix == suffix
1452             suffix = ""
1453         return f"{prefix}[[{name}]]{suffix}"
1454
1455
1456 class CheckValueInfo:
1457     def __init__(
1458         self,
1459         key,
1460         text,
1461         name: str,
1462         prefix: str,
1463         suffix: str,
1464     ):
1465         # Key for the value, e.g. '%'
1466         self.key = key
1467
1468         # Text to be matched by the FileCheck variable (without any prefix or suffix)
1469         self.text = text
1470
1471         # Name of the FileCheck variable
1472         self.name = name
1473
1474         # Prefix and suffix that were captured by the NamelessValue regular expression
1475         self.prefix = prefix
1476         self.suffix = suffix
1477
1478
1479 # Represent a check line in a way that allows us to compare check lines while
1480 # ignoring some or all of the FileCheck variable names.
1481 class CheckLineInfo:
1482     def __init__(self, line, values):
1483         # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1484         self.line: str = line
1485
1486         # Information on each FileCheck variable name occurrences in the line
1487         self.values: List[CheckValueInfo] = values
1488
1489     def __repr__(self):
1490         return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1491
1492
1493 def remap_metavar_names(
1494     old_line_infos: List[CheckLineInfo],
1495     new_line_infos: List[CheckLineInfo],
1496     committed_names: Set[str],
1497 ) -> Mapping[str, str]:
1498     """
1499     Map all FileCheck variable names that appear in new_line_infos to new
1500     FileCheck variable names in an attempt to reduce the diff from old_line_infos
1501     to new_line_infos.
1502
1503     This is done by:
1504     * Matching old check lines and new check lines using a diffing algorithm
1505       applied after replacing names with wildcards.
1506     * Committing to variable names such that the matched lines become equal
1507       (without wildcards) if possible
1508     * This is done recursively to handle cases where many lines are equal
1509       after wildcard replacement
1510     """
1511     # Initialize uncommitted identity mappings
1512     new_mapping = {}
1513     for line in new_line_infos:
1514         for value in line.values:
1515             new_mapping[value.name] = value.name
1516
1517     # Recursively commit to the identity mapping or find a better one
1518     def recurse(old_begin, old_end, new_begin, new_end):
1519         if old_begin == old_end or new_begin == new_end:
1520             return
1521
1522         # Find a matching of lines where uncommitted names are replaced
1523         # with a placeholder.
1524         def diffify_line(line, mapper):
1525             values = []
1526             for value in line.values:
1527                 mapped = mapper(value.name)
1528                 values.append(mapped if mapped in committed_names else "?")
1529             return line.line.strip() + " @@@ " + " @ ".join(values)
1530
1531         lhs_lines = [
1532             diffify_line(line, lambda x: x)
1533             for line in old_line_infos[old_begin:old_end]
1534         ]
1535         rhs_lines = [
1536             diffify_line(line, lambda x: new_mapping[x])
1537             for line in new_line_infos[new_begin:new_end]
1538         ]
1539
1540         candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1541
1542         candidate_matches = [
1543             (old_begin + lhs_idx, new_begin + rhs_idx)
1544             for lhs_idx, rhs_idx in candidate_matches
1545         ]
1546
1547         # Candidate matches may conflict if they require conflicting mappings of
1548         # names. We want to determine a large set of compatible candidates,
1549         # because that leads to a small diff.
1550         #
1551         # We think of the candidates as vertices in a conflict graph. The
1552         # conflict graph has edges between incompatible candidates. We want to
1553         # find a large independent set in this graph.
1554         #
1555         # Greedily selecting candidates and removing incompatible ones has the
1556         # disadvantage that making few bad decisions early on can have huge
1557         # consequences.
1558         #
1559         # Instead, we implicitly compute multiple independent sets by greedily
1560         # assigning a *coloring* to the conflict graph. Then, we select the
1561         # largest color class (which is the largest independent set we found),
1562         # commit to all candidates in it, and recurse.
1563         #
1564         # Note that we don't actually materialize the conflict graph. Instead,
1565         # each color class tracks the information needed to decide implicitly
1566         # whether a vertex conflicts (has an edge to) any of the vertices added
1567         # to the color class so far.
1568         class Color:
1569             def __init__(self):
1570                 # (lhs_idx, rhs_idx) of matches in this color
1571                 self.matches = []
1572
1573                 # rhs_name -> lhs_name mappings required by this color
1574                 self.mapping = {}
1575
1576                 # lhs_names committed for this color
1577                 self.committed = set()
1578
1579         colors = []
1580
1581         for lhs_idx, rhs_idx in candidate_matches:
1582             lhs_line = old_line_infos[lhs_idx]
1583             rhs_line = new_line_infos[rhs_idx]
1584
1585             # We scan through the uncommitted names in the candidate line and
1586             # filter out the color classes to which the candidate could be
1587             # assigned.
1588             #
1589             # Simultaneously, we prepare a new color class in case the candidate
1590             # conflicts with all colors that have been established so far.
1591             compatible_colors = colors[:]
1592             new_color = Color()
1593             new_color.matches.append((lhs_idx, rhs_idx))
1594
1595             for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1596                 if new_mapping[rhs_value.name] in committed_names:
1597                     # The new value has already been committed. If it was mapped
1598                     # to the same name as the original value, we can consider
1599                     # committing other values from this line. Otherwise, we
1600                     # should ignore this line.
1601                     if new_mapping[rhs_value.name] == lhs_value.name:
1602                         continue
1603                     else:
1604                         break
1605
1606                 if rhs_value.name in new_color.mapping:
1607                     # Same, but for a possible commit happening on the same line
1608                     if new_color.mapping[rhs_value.name] == lhs_value.name:
1609                         continue
1610                     else:
1611                         break
1612
1613                 if (
1614                     lhs_value.name in committed_names
1615                     or lhs_value.name in new_color.committed
1616                 ):
1617                     # We can't map this value because the name we would map it
1618                     # to has already been committed for something else. Give up
1619                     # on this line.
1620                     break
1621
1622                 new_color.mapping[rhs_value.name] = lhs_value.name
1623                 new_color.committed.add(lhs_value.name)
1624
1625                 color_idx = 0
1626                 while color_idx < len(compatible_colors):
1627                     color = compatible_colors[color_idx]
1628                     compatible = True
1629                     if rhs_value.name in color.mapping:
1630                         compatible = color.mapping[rhs_value.name] == lhs_value.name
1631                     else:
1632                         compatible = lhs_value.name not in color.committed
1633                     if compatible:
1634                         color_idx += 1
1635                     else:
1636                         del compatible_colors[color_idx]
1637             else:
1638                 # We never broke out of the loop, which means that at a minimum,
1639                 # this line is viable standalone
1640                 if compatible_colors:
1641                     color = max(compatible_colors, key=lambda color: len(color.matches))
1642                     color.mapping.update(new_color.mapping)
1643                     color.committed.update(new_color.committed)
1644                     color.matches.append((lhs_idx, rhs_idx))
1645                 else:
1646                     colors.append(new_color)
1647
1648         if colors:
1649             # Pick the largest color class. This gives us a large independent
1650             # (non-conflicting) set of candidate matches. Assign all names
1651             # required by the independent set and recurse.
1652             max_color = max(colors, key=lambda color: len(color.matches))
1653
1654             for rhs_var, lhs_var in max_color.mapping.items():
1655                 new_mapping[rhs_var] = lhs_var
1656                 committed_names.add(lhs_var)
1657
1658                 if (
1659                     lhs_var != rhs_var
1660                     and lhs_var in new_mapping
1661                     and new_mapping[lhs_var] == lhs_var
1662                 ):
1663                     new_mapping[lhs_var] = "conflict_" + lhs_var
1664
1665             matches = (
1666                 [(old_begin - 1, new_begin - 1)]
1667                 + max_color.matches
1668                 + [(old_end, new_end)]
1669             )
1670
1671             for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1672                 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1673
1674     recurse(0, len(old_line_infos), 0, len(new_line_infos))
1675
1676     # Commit to remaining names and resolve conflicts
1677     for new_name, mapped_name in new_mapping.items():
1678         if mapped_name in committed_names:
1679             continue
1680         if not mapped_name.startswith("conflict_"):
1681             assert mapped_name == new_name
1682             committed_names.add(mapped_name)
1683
1684     for new_name, mapped_name in new_mapping.items():
1685         if mapped_name in committed_names:
1686             continue
1687         assert mapped_name.startswith("conflict_")
1688
1689         m = NUMERIC_SUFFIX_RE.search(new_name)
1690         base_name = new_name[: m.start()]
1691         suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1692         while True:
1693             candidate = f"{base_name}{suffix}"
1694             if candidate not in committed_names:
1695                 new_mapping[new_name] = candidate
1696                 committed_names.add(candidate)
1697                 break
1698             suffix += 1
1699
1700     return new_mapping
1701
1702
1703 def generalize_check_lines(
1704     lines,
1705     ginfo: GeneralizerInfo,
1706     vars_seen,
1707     global_vars_seen,
1708     preserve_names=False,
1709     original_check_lines=None,
1710     *,
1711     unstable_globals_only=False,
1712 ):
1713     if unstable_globals_only:
1714         regexp = ginfo.get_unstable_globals_regexp()
1715     else:
1716         regexp = ginfo.get_regexp()
1717
1718     multiple_braces_re = re.compile(r"({{+)|(}}+)")
1719
1720     def escape_braces(match_obj):
1721         return "{{" + re.escape(match_obj.group(0)) + "}}"
1722
1723     if ginfo.is_ir():
1724         for i, line in enumerate(lines):
1725             # An IR variable named '%.' matches the FileCheck regex string.
1726             line = line.replace("%.", "%dot")
1727             for regex in _global_hex_value_regex:
1728                 if re.match("^@" + regex + " = ", line):
1729                     line = re.sub(
1730                         r"\bi([0-9]+) ([0-9]+)",
1731                         lambda m: "i"
1732                         + m.group(1)
1733                         + " [[#"
1734                         + hex(int(m.group(2)))
1735                         + "]]",
1736                         line,
1737                     )
1738                     break
1739             # Ignore any comments, since the check lines will too.
1740             scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1741             lines[i] = scrubbed_line
1742
1743     if not preserve_names:
1744         committed_names = set(
1745             test_var.get_variable_name(name)
1746             for (name, _), test_var in vars_seen.items()
1747         )
1748         defs = set()
1749
1750         # Collect information about new check lines, and generalize global reference
1751         new_line_infos = []
1752         for line in lines:
1753             filtered_line = ""
1754             values = []
1755             while True:
1756                 m = regexp.search(line)
1757                 if m is None:
1758                     filtered_line += line
1759                     break
1760
1761                 name = ginfo.get_name_from_match(m)
1762                 nameless_value = ginfo.get_nameless_value_from_match(m)
1763                 prefix, suffix = nameless_value.get_affixes_from_match(m)
1764                 if may_clash_with_default_check_prefix_name(
1765                     nameless_value.check_prefix, name
1766                 ):
1767                     warn(
1768                         "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1769                         " with scripted FileCheck name." % (name,)
1770                     )
1771
1772                 # Record the variable as seen and (for locals) accumulate
1773                 # prefixes/suffixes
1774                 is_local_def = nameless_value.is_local_def_ir_value()
1775                 if is_local_def:
1776                     vars_dict = vars_seen
1777                 else:
1778                     vars_dict = global_vars_seen
1779
1780                 key = (name, nameless_value.check_key)
1781
1782                 if is_local_def:
1783                     test_prefix = prefix
1784                     test_suffix = suffix
1785                 else:
1786                     test_prefix = ""
1787                     test_suffix = ""
1788
1789                 if key in vars_dict:
1790                     vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1791                 else:
1792                     vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1793                     defs.add(key)
1794
1795                 var = vars_dict[key].get_variable_name(name)
1796
1797                 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1798                 filtered_line += (
1799                     line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1800                 )
1801                 line = line[m.end() :]
1802
1803                 values.append(
1804                     CheckValueInfo(
1805                         key=nameless_value.check_key,
1806                         text=name,
1807                         name=var,
1808                         prefix=prefix,
1809                         suffix=suffix,
1810                     )
1811                 )
1812
1813             new_line_infos.append(CheckLineInfo(filtered_line, values))
1814
1815         committed_names.update(
1816             test_var.get_variable_name(name)
1817             for (name, _), test_var in global_vars_seen.items()
1818         )
1819
1820         # Collect information about original check lines, if any.
1821         orig_line_infos = []
1822         for line in original_check_lines or []:
1823             filtered_line = ""
1824             values = []
1825             while True:
1826                 m = METAVAR_RE.search(line)
1827                 if m is None:
1828                     filtered_line += line
1829                     break
1830
1831                 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1832                 filtered_line += line[: m.start()] + VARIABLE_TAG
1833                 line = line[m.end() :]
1834                 values.append(
1835                     CheckValueInfo(
1836                         key=None,
1837                         text=None,
1838                         name=m.group(1),
1839                         prefix="",
1840                         suffix="",
1841                     )
1842                 )
1843             orig_line_infos.append(CheckLineInfo(filtered_line, values))
1844
1845         # Compute the variable name mapping
1846         mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1847
1848         # Apply the variable name mapping
1849         for i, line_info in enumerate(new_line_infos):
1850             line_template = line_info.line
1851             line = ""
1852
1853             for value in line_info.values:
1854                 idx = line_template.find(VARIABLE_TAG)
1855                 line += line_template[:idx]
1856                 line_template = line_template[idx + len(VARIABLE_TAG) :]
1857
1858                 key = (value.text, value.key)
1859                 if value.key == "%":
1860                     vars_dict = vars_seen
1861                 else:
1862                     vars_dict = global_vars_seen
1863
1864                 if key in defs:
1865                     line += vars_dict[key].get_def(
1866                         mapping[value.name], value.prefix, value.suffix
1867                     )
1868                     defs.remove(key)
1869                 else:
1870                     line += vars_dict[key].get_use(
1871                         mapping[value.name], value.prefix, value.suffix
1872                     )
1873
1874             line += line_template
1875
1876             lines[i] = line
1877
1878     if ginfo.is_analyze():
1879         for i, _ in enumerate(lines):
1880             # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1881             scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1882             lines[i] = scrubbed_line
1883
1884     return lines
1885
1886
1887 def add_checks(
1888     output_lines,
1889     comment_marker,
1890     prefix_list,
1891     func_dict,
1892     func_name,
1893     check_label_format,
1894     ginfo,
1895     global_vars_seen_dict,
1896     is_filtered,
1897     preserve_names=False,
1898     original_check_lines: Mapping[str, List[str]] = {},
1899 ):
1900     # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1901     prefix_exclusions = set()
1902     printed_prefixes = []
1903     for p in prefix_list:
1904         checkprefixes = p[0]
1905         # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1906         # exist for this run line. A subset of the check prefixes might know about the function but only because
1907         # other run lines created it.
1908         if any(
1909             map(
1910                 lambda checkprefix: func_name not in func_dict[checkprefix],
1911                 checkprefixes,
1912             )
1913         ):
1914             prefix_exclusions |= set(checkprefixes)
1915             continue
1916
1917     # prefix_exclusions is constructed, we can now emit the output
1918     for p in prefix_list:
1919         global_vars_seen = {}
1920         checkprefixes = p[0]
1921         for checkprefix in checkprefixes:
1922             if checkprefix in global_vars_seen_dict:
1923                 global_vars_seen.update(global_vars_seen_dict[checkprefix])
1924             else:
1925                 global_vars_seen_dict[checkprefix] = {}
1926             if checkprefix in printed_prefixes:
1927                 break
1928
1929             # Check if the prefix is excluded.
1930             if checkprefix in prefix_exclusions:
1931                 continue
1932
1933             # If we do not have output for this prefix we skip it.
1934             if not func_dict[checkprefix][func_name]:
1935                 continue
1936
1937             # Add some space between different check prefixes, but not after the last
1938             # check line (before the test code).
1939             if ginfo.is_asm():
1940                 if len(printed_prefixes) != 0:
1941                     output_lines.append(comment_marker)
1942
1943             if checkprefix not in global_vars_seen_dict:
1944                 global_vars_seen_dict[checkprefix] = {}
1945
1946             global_vars_seen_before = [key for key in global_vars_seen.keys()]
1947
1948             vars_seen = {}
1949             printed_prefixes.append(checkprefix)
1950             attrs = str(func_dict[checkprefix][func_name].attrs)
1951             attrs = "" if attrs == "None" else attrs
1952             if ginfo.get_version() > 1:
1953                 funcdef_attrs_and_ret = func_dict[checkprefix][
1954                     func_name
1955                 ].funcdef_attrs_and_ret
1956             else:
1957                 funcdef_attrs_and_ret = ""
1958
1959             if attrs:
1960                 output_lines.append(
1961                     "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1962                 )
1963             args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1964             if args_and_sig:
1965                 args_and_sig = generalize_check_lines(
1966                     [args_and_sig],
1967                     ginfo,
1968                     vars_seen,
1969                     global_vars_seen,
1970                     preserve_names,
1971                     original_check_lines=[],
1972                 )[0]
1973             func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1974             if "[[" in args_and_sig:
1975                 # Captures in label lines are not supported, thus split into a -LABEL
1976                 # and a separate -SAME line that contains the arguments with captures.
1977                 args_and_sig_prefix = ""
1978                 if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1979                     # Ensure the "(" separating function name and arguments is in the
1980                     # label line. This is required in case of function names that are
1981                     # prefixes of each other. Otherwise, the label line for "foo" might
1982                     # incorrectly match on "foo.specialized".
1983                     args_and_sig_prefix = args_and_sig[0]
1984                     args_and_sig = args_and_sig[1:]
1985
1986                 # Removing args_and_sig from the label match line requires
1987                 # func_name_separator to be empty. Otherwise, the match will not work.
1988                 assert func_name_separator == ""
1989                 output_lines.append(
1990                     check_label_format
1991                     % (
1992                         checkprefix,
1993                         funcdef_attrs_and_ret,
1994                         func_name,
1995                         args_and_sig_prefix,
1996                         func_name_separator,
1997                     )
1998                 )
1999                 output_lines.append(
2000                     "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
2001                 )
2002             else:
2003                 output_lines.append(
2004                     check_label_format
2005                     % (
2006                         checkprefix,
2007                         funcdef_attrs_and_ret,
2008                         func_name,
2009                         args_and_sig,
2010                         func_name_separator,
2011                     )
2012                 )
2013             func_body = str(func_dict[checkprefix][func_name]).splitlines()
2014             if not func_body:
2015                 # We have filtered everything.
2016                 continue
2017
2018             # For ASM output, just emit the check lines.
2019             if ginfo.is_asm():
2020                 body_start = 1
2021                 if is_filtered:
2022                     # For filtered output we don't add "-NEXT" so don't add extra spaces
2023                     # before the first line.
2024                     body_start = 0
2025                 else:
2026                     output_lines.append(
2027                         "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2028                     )
2029                 func_lines = generalize_check_lines(
2030                     func_body[body_start:], ginfo, vars_seen, global_vars_seen
2031                 )
2032                 for func_line in func_lines:
2033                     if func_line.strip() == "":
2034                         output_lines.append(
2035                             "%s %s-EMPTY:" % (comment_marker, checkprefix)
2036                         )
2037                     else:
2038                         check_suffix = "-NEXT" if not is_filtered else ""
2039                         output_lines.append(
2040                             "%s %s%s:  %s"
2041                             % (comment_marker, checkprefix, check_suffix, func_line)
2042                         )
2043                 # Remember new global variables we have not seen before
2044                 for key in global_vars_seen:
2045                     if key not in global_vars_seen_before:
2046                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2047                 break
2048             # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2049             elif ginfo.is_analyze():
2050                 func_body = generalize_check_lines(
2051                     func_body, ginfo, vars_seen, global_vars_seen
2052                 )
2053                 for func_line in func_body:
2054                     if func_line.strip() == "":
2055                         output_lines.append(
2056                             "{} {}-EMPTY:".format(comment_marker, checkprefix)
2057                         )
2058                     else:
2059                         check_suffix = "-NEXT" if not is_filtered else ""
2060                         output_lines.append(
2061                             "{} {}{}:  {}".format(
2062                                 comment_marker, checkprefix, check_suffix, func_line
2063                             )
2064                         )
2065
2066                 # Add space between different check prefixes and also before the first
2067                 # line of code in the test function.
2068                 output_lines.append(comment_marker)
2069
2070                 # Remember new global variables we have not seen before
2071                 for key in global_vars_seen:
2072                     if key not in global_vars_seen_before:
2073                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2074                 break
2075             # For IR output, change all defs to FileCheck variables, so we're immune
2076             # to variable naming fashions.
2077             else:
2078                 func_body = generalize_check_lines(
2079                     func_body,
2080                     ginfo,
2081                     vars_seen,
2082                     global_vars_seen,
2083                     preserve_names,
2084                     original_check_lines=original_check_lines.get(checkprefix),
2085                 )
2086
2087                 # This could be selectively enabled with an optional invocation argument.
2088                 # Disabled for now: better to check everything. Be safe rather than sorry.
2089
2090                 # Handle the first line of the function body as a special case because
2091                 # it's often just noise (a useless asm comment or entry label).
2092                 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2093                 #  is_blank_line = True
2094                 # else:
2095                 #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2096                 #  is_blank_line = False
2097
2098                 is_blank_line = False
2099
2100                 for func_line in func_body:
2101                     if func_line.strip() == "":
2102                         is_blank_line = True
2103                         continue
2104                     # Do not waste time checking IR comments.
2105                     func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2106
2107                     # Skip blank lines instead of checking them.
2108                     if is_blank_line:
2109                         output_lines.append(
2110                             "{} {}:       {}".format(
2111                                 comment_marker, checkprefix, func_line
2112                             )
2113                         )
2114                     else:
2115                         check_suffix = "-NEXT" if not is_filtered else ""
2116                         output_lines.append(
2117                             "{} {}{}:  {}".format(
2118                                 comment_marker, checkprefix, check_suffix, func_line
2119                             )
2120                         )
2121                     is_blank_line = False
2122
2123                 # Add space between different check prefixes and also before the first
2124                 # line of code in the test function.
2125                 output_lines.append(comment_marker)
2126
2127                 # Remember new global variables we have not seen before
2128                 for key in global_vars_seen:
2129                     if key not in global_vars_seen_before:
2130                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2131                 break
2132     return printed_prefixes
2133
2134
2135 def add_ir_checks(
2136     output_lines,
2137     comment_marker,
2138     prefix_list,
2139     func_dict,
2140     func_name,
2141     preserve_names,
2142     function_sig,
2143     ginfo: GeneralizerInfo,
2144     global_vars_seen_dict,
2145     is_filtered,
2146     original_check_lines={},
2147 ):
2148     assert ginfo.is_ir()
2149     # Label format is based on IR string.
2150     if function_sig and ginfo.get_version() > 1:
2151         function_def_regex = "define %s"
2152     elif function_sig:
2153         function_def_regex = "define {{[^@]+}}%s"
2154     else:
2155         function_def_regex = "%s"
2156     check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2157         comment_marker, function_def_regex
2158     )
2159     return add_checks(
2160         output_lines,
2161         comment_marker,
2162         prefix_list,
2163         func_dict,
2164         func_name,
2165         check_label_format,
2166         ginfo,
2167         global_vars_seen_dict,
2168         is_filtered,
2169         preserve_names,
2170         original_check_lines=original_check_lines,
2171     )
2172
2173
2174 def add_analyze_checks(
2175     output_lines,
2176     comment_marker,
2177     prefix_list,
2178     func_dict,
2179     func_name,
2180     ginfo: GeneralizerInfo,
2181     is_filtered,
2182 ):
2183     assert ginfo.is_analyze()
2184     check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2185     global_vars_seen_dict = {}
2186     return add_checks(
2187         output_lines,
2188         comment_marker,
2189         prefix_list,
2190         func_dict,
2191         func_name,
2192         check_label_format,
2193         ginfo,
2194         global_vars_seen_dict,
2195         is_filtered,
2196     )
2197
2198
2199 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2200     for nameless_value in ginfo.get_nameless_values():
2201         if nameless_value.global_ir_rhs_regexp is None:
2202             continue
2203
2204         lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2205         rhs_re_str = nameless_value.global_ir_rhs_regexp
2206
2207         global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2208         global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2209         lines = []
2210         for m in global_ir_value_re.finditer(raw_tool_output):
2211             # Attach the substring's start index so that CHECK lines
2212             # can be sorted properly even if they are matched by different nameless values.
2213             # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2214             lines.append((m.start(), m.group(0)))
2215
2216         for prefix in prefixes:
2217             if glob_val_dict[prefix] is None:
2218                 continue
2219             if nameless_value.check_prefix in glob_val_dict[prefix]:
2220                 if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2221                     continue
2222                 if prefix == prefixes[-1]:
2223                     warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2224                 else:
2225                     glob_val_dict[prefix][nameless_value.check_prefix] = None
2226                     continue
2227             glob_val_dict[prefix][nameless_value.check_prefix] = lines
2228
2229
2230 def filter_globals_according_to_preference(
2231     global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2232 ):
2233     if global_check_setting == "none":
2234         return []
2235     if global_check_setting == "all":
2236         return global_val_lines_w_index
2237     assert global_check_setting == "smart"
2238
2239     if nameless_value.check_key == "#":
2240         # attribute sets are usually better checked by --check-attributes
2241         return []
2242
2243     def extract(line, nv):
2244         p = (
2245             "^"
2246             + nv.ir_prefix
2247             + "("
2248             + nv.ir_regexp
2249             + ") = ("
2250             + nv.global_ir_rhs_regexp
2251             + ")"
2252         )
2253         match = re.match(p, line)
2254         return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2255
2256     transitively_visible = set()
2257     contains_refs_to = {}
2258
2259     def add(var):
2260         nonlocal transitively_visible
2261         nonlocal contains_refs_to
2262         if var in transitively_visible:
2263             return
2264         transitively_visible.add(var)
2265         if not var in contains_refs_to:
2266             return
2267         for x in contains_refs_to[var]:
2268             add(x)
2269
2270     for i, line in global_val_lines_w_index:
2271         (var, refs) = extract(line, nameless_value)
2272         contains_refs_to[var] = refs
2273     for var, check_key in global_vars_seen:
2274         if check_key != nameless_value.check_key:
2275             continue
2276         add(var)
2277     return [
2278         (i, line)
2279         for i, line in global_val_lines_w_index
2280         if extract(line, nameless_value)[0] in transitively_visible
2281     ]
2282
2283
2284 METADATA_FILTERS = [
2285     (
2286         r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2287         r"{{.*}}\2{{.*}}",
2288     ),  # preface with glob also, to capture optional CLANG_VENDOR
2289     (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2290 ]
2291 METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2292
2293
2294 def filter_unstable_metadata(line):
2295     for f, replacement in METADATA_FILTERS_RE:
2296         line = f.sub(replacement, line)
2297     return line
2298
2299
2300 def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2301     if not new_lines_w_index:
2302         return
2303     output_lines.append(comment_marker + SEPARATOR)
2304     new_lines_w_index.sort()
2305     for _, line in new_lines_w_index:
2306         output_lines.append(line)
2307     new_lines_w_index.clear()
2308
2309
2310 def add_global_checks(
2311     glob_val_dict,
2312     comment_marker,
2313     prefix_list,
2314     output_lines,
2315     ginfo: GeneralizerInfo,
2316     global_vars_seen_dict,
2317     preserve_names,
2318     is_before_functions,
2319     global_check_setting,
2320 ):
2321     printed_prefixes = set()
2322     output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2323     for nameless_value in ginfo.get_nameless_values():
2324         if nameless_value.global_ir_rhs_regexp is None:
2325             continue
2326         if nameless_value.is_before_functions != is_before_functions:
2327             continue
2328         for p in prefix_list:
2329             global_vars_seen = {}
2330             checkprefixes = p[0]
2331             if checkprefixes is None:
2332                 continue
2333             for checkprefix in checkprefixes:
2334                 if checkprefix in global_vars_seen_dict:
2335                     global_vars_seen.update(global_vars_seen_dict[checkprefix])
2336                 else:
2337                     global_vars_seen_dict[checkprefix] = {}
2338                 if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2339                     break
2340                 if not glob_val_dict[checkprefix]:
2341                     continue
2342                 if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2343                     continue
2344                 if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2345                     continue
2346
2347                 check_lines = []
2348                 global_vars_seen_before = [key for key in global_vars_seen.keys()]
2349                 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2350                 lines_w_index = filter_globals_according_to_preference(
2351                     lines_w_index,
2352                     global_vars_seen_before,
2353                     nameless_value,
2354                     global_check_setting,
2355                 )
2356                 for i, line in lines_w_index:
2357                     if _global_value_regex:
2358                         matched = False
2359                         for regex in _global_value_regex:
2360                             if re.match("^@" + regex + " = ", line) or re.match(
2361                                 "^!" + regex + " = ", line
2362                             ):
2363                                 matched = True
2364                                 break
2365                         if not matched:
2366                             continue
2367                     [new_line] = generalize_check_lines(
2368                         [line],
2369                         ginfo,
2370                         {},
2371                         global_vars_seen,
2372                         preserve_names,
2373                         unstable_globals_only=True,
2374                     )
2375                     new_line = filter_unstable_metadata(new_line)
2376                     check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2377                     check_lines.append((i, check_line))
2378                 if not check_lines:
2379                     continue
2380
2381                 if not checkprefix in output_lines_loc:
2382                     output_lines_loc[checkprefix] = []
2383                 if not nameless_value.interlaced_with_previous:
2384                     flush_current_checks(
2385                         output_lines, output_lines_loc[checkprefix], comment_marker
2386                     )
2387                 for check_line in check_lines:
2388                     output_lines_loc[checkprefix].append(check_line)
2389
2390                 printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2391
2392                 # Remembe new global variables we have not seen before
2393                 for key in global_vars_seen:
2394                     if key not in global_vars_seen_before:
2395                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2396                 break
2397
2398     if printed_prefixes:
2399         for p in prefix_list:
2400             if p[0] is None:
2401                 continue
2402             for checkprefix in p[0]:
2403                 if checkprefix not in output_lines_loc:
2404                     continue
2405                 flush_current_checks(
2406                     output_lines, output_lines_loc[checkprefix], comment_marker
2407                 )
2408                 break
2409         output_lines.append(comment_marker + SEPARATOR)
2410     return printed_prefixes
2411
2412
2413 def check_prefix(prefix):
2414     if not PREFIX_RE.match(prefix):
2415         hint = ""
2416         if "," in prefix:
2417             hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2418         warn(
2419             (
2420                 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2421                 + hint
2422             )
2423             % (prefix)
2424         )
2425
2426
2427 def get_check_prefixes(filecheck_cmd):
2428     check_prefixes = [
2429         item
2430         for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2431         for item in m.group(1).split(",")
2432     ]
2433     if not check_prefixes:
2434         check_prefixes = ["CHECK"]
2435     return check_prefixes
2436
2437
2438 def verify_filecheck_prefixes(fc_cmd):
2439     fc_cmd_parts = fc_cmd.split()
2440     for part in fc_cmd_parts:
2441         if "check-prefix=" in part:
2442             prefix = part.split("=", 1)[1]
2443             check_prefix(prefix)
2444         elif "check-prefixes=" in part:
2445             prefixes = part.split("=", 1)[1].split(",")
2446             for prefix in prefixes:
2447                 check_prefix(prefix)
2448                 if prefixes.count(prefix) > 1:
2449                     warn(
2450                         "Supplied prefix '%s' is not unique in the prefix list."
2451                         % (prefix,)
2452                     )
2453
2454
2455 def get_autogennote_suffix(parser, args):
2456     autogenerated_note_args = ""
2457     for action in parser._actions:
2458         if not hasattr(args, action.dest):
2459             continue  # Ignore options such as --help that aren't included in args
2460         # Ignore parameters such as paths to the binary or the list of tests
2461         if action.dest in (
2462             "tests",
2463             "update_only",
2464             "tool_binary",
2465             "opt_binary",
2466             "llc_binary",
2467             "clang",
2468             "opt",
2469             "llvm_bin",
2470             "verbose",
2471             "force_update",
2472             "reset_variable_names",
2473             "llvm_mc_binary",
2474         ):
2475             continue
2476         value = getattr(args, action.dest)
2477         if action.dest == "check_globals":
2478             default_value = "none" if args.version < 4 else "smart"
2479             if value == default_value:
2480                 continue
2481             autogenerated_note_args += action.option_strings[0] + " "
2482             if args.version < 4 and value == "all":
2483                 continue
2484             autogenerated_note_args += "%s " % value
2485             continue
2486         if action.const is not None:  # action stores a constant (usually True/False)
2487             # Skip actions with different constant values (this happens with boolean
2488             # --foo/--no-foo options)
2489             if value != action.const:
2490                 continue
2491         if parser.get_default(action.dest) == value:
2492             continue  # Don't add default values
2493         if action.dest == "function_signature" and args.version >= 2:
2494             continue  # Enabled by default in version 2
2495         if action.dest == "filters":
2496             # Create a separate option for each filter element.  The value is a list
2497             # of Filter objects.
2498             for elem in value:
2499                 opt_name = "filter-out" if elem.is_filter_out else "filter"
2500                 opt_value = elem.pattern()
2501                 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2502                 if new_arg not in autogenerated_note_args:
2503                     autogenerated_note_args += new_arg
2504         else:
2505             autogenerated_note_args += action.option_strings[0] + " "
2506             if action.const is None:  # action takes a parameter
2507                 if action.nargs == "+":
2508                     value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2509                 autogenerated_note_args += "%s " % value
2510     if autogenerated_note_args:
2511         autogenerated_note_args = " %s %s" % (
2512             UTC_ARGS_KEY,
2513             autogenerated_note_args[:-1],
2514         )
2515     return autogenerated_note_args
2516
2517
2518 def check_for_command(line, parser, args, argv, argparse_callback):
2519     cmd_m = UTC_ARGS_CMD.match(line)
2520     if cmd_m:
2521         for option in shlex.split(cmd_m.group("cmd").strip()):
2522             if option:
2523                 argv.append(option)
2524         args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2525         if argparse_callback is not None:
2526             argparse_callback(args)
2527     return args, argv
2528
2529
2530 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2531     result = get_arg_to_check(test_info.args)
2532     if not result and is_global:
2533         # See if this has been specified via UTC_ARGS.  This is a "global" option
2534         # that affects the entire generation of test checks.  If it exists anywhere
2535         # in the test, apply it to everything.
2536         saw_line = False
2537         for line_info in test_info.ro_iterlines():
2538             line = line_info.line
2539             if not line.startswith(";") and line.strip() != "":
2540                 saw_line = True
2541             result = get_arg_to_check(line_info.args)
2542             if result:
2543                 if warn and saw_line:
2544                     # We saw the option after already reading some test input lines.
2545                     # Warn about it.
2546                     print(
2547                         "WARNING: Found {} in line following test start: ".format(
2548                             arg_string
2549                         )
2550                         + line,
2551                         file=sys.stderr,
2552                     )
2553                     print(
2554                         "WARNING: Consider moving {} to top of file".format(arg_string),
2555                         file=sys.stderr,
2556                     )
2557                 break
2558     return result
2559
2560
2561 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2562     for input_line_info in test_info.iterlines(output_lines):
2563         line = input_line_info.line
2564         args = input_line_info.args
2565         if line.strip() == comment_string:
2566             continue
2567         if line.strip() == comment_string + SEPARATOR:
2568             continue
2569         if line.lstrip().startswith(comment_string):
2570             m = CHECK_RE.match(line)
2571             if m and m.group(1) in prefix_set:
2572                 continue
2573         output_lines.append(line.rstrip("\n"))
2574
2575
2576 def add_checks_at_end(
2577     output_lines, prefix_list, func_order, comment_string, check_generator
2578 ):
2579     added = set()
2580     generated_prefixes = set()
2581     for prefix in prefix_list:
2582         prefixes = prefix[0]
2583         tool_args = prefix[1]
2584         for prefix in prefixes:
2585             for func in func_order[prefix]:
2586                 # The func order can contain the same functions multiple times.
2587                 # If we see one again we are done.
2588                 if (func, prefix) in added:
2589                     continue
2590                 if added:
2591                     output_lines.append(comment_string)
2592
2593                 # The add_*_checks routines expect a run list whose items are
2594                 # tuples that have a list of prefixes as their first element and
2595                 # tool command args string as their second element.  They output
2596                 # checks for each prefix in the list of prefixes.  By doing so, it
2597                 # implicitly assumes that for each function every run line will
2598                 # generate something for that function.  That is not the case for
2599                 # generated functions as some run lines might not generate them
2600                 # (e.g. -fopenmp vs. no -fopenmp).
2601                 #
2602                 # Therefore, pass just the prefix we're interested in.  This has
2603                 # the effect of generating all of the checks for functions of a
2604                 # single prefix before moving on to the next prefix.  So checks
2605                 # are ordered by prefix instead of by function as in "normal"
2606                 # mode.
2607                 for generated_prefix in check_generator(
2608                     output_lines, [([prefix], tool_args)], func
2609                 ):
2610                     added.add((func, generated_prefix))
2611                     generated_prefixes.add(generated_prefix)
2612     return generated_prefixes