llvm/utils/UpdateTestChecks/common.py

   1 from __future__ import print_function
   2
   3 import argparse
   4 import copy
   5 import glob
   6 import itertools
   7 import os
   8 import re
   9 import subprocess
  10 import sys
  11 import shlex
  12
  13 from typing import List
  14
  15 ##### Common utilities for update_*test_checks.py
  16
  17
  18 _verbose = False
  19 _prefix_filecheck_ir_name = ""
  20
  21 """
  22 Version changelog:
  23
  24 1: Initial version, used by tests that don't specify --version explicitly.
  25 2: --function-signature is now enabled by default and also checks return
  26    type/attributes.
  27 3: Opening parenthesis of function args is kept on the first LABEL line
  28    in case arguments are split to a separate SAME line.
  29 """
  30 DEFAULT_VERSION = 3
  31
  32
  33 class Regex(object):
  34     """Wrap a compiled regular expression object to allow deep copy of a regexp.
  35     This is required for the deep copy done in do_scrub.
  36
  37     """
  38
  39     def __init__(self, regex):
  40         self.regex = regex
  41
  42     def __deepcopy__(self, memo):
  43         result = copy.copy(self)
  44         result.regex = self.regex
  45         return result
  46
  47     def search(self, line):
  48         return self.regex.search(line)
  49
  50     def sub(self, repl, line):
  51         return self.regex.sub(repl, line)
  52
  53     def pattern(self):
  54         return self.regex.pattern
  55
  56     def flags(self):
  57         return self.regex.flags
  58
  59
  60 class Filter(Regex):
  61     """Augment a Regex object with a flag indicating whether a match should be
  62     added (!is_filter_out) or removed (is_filter_out) from the generated checks.
  63
  64     """
  65
  66     def __init__(self, regex, is_filter_out):
  67         super(Filter, self).__init__(regex)
  68         self.is_filter_out = is_filter_out
  69
  70     def __deepcopy__(self, memo):
  71         result = copy.deepcopy(super(Filter, self), memo)
  72         result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
  73         return result
  74
  75
  76 def parse_commandline_args(parser):
  77     class RegexAction(argparse.Action):
  78         """Add a regular expression option value to a list of regular expressions.
  79         This compiles the expression, wraps it in a Regex and adds it to the option
  80         value list."""
  81
  82         def __init__(self, option_strings, dest, nargs=None, **kwargs):
  83             if nargs is not None:
  84                 raise ValueError("nargs not allowed")
  85             super(RegexAction, self).__init__(option_strings, dest, **kwargs)
  86
  87         def do_call(self, namespace, values, flags):
  88             value_list = getattr(namespace, self.dest)
  89             if value_list is None:
  90                 value_list = []
  91
  92             try:
  93                 value_list.append(Regex(re.compile(values, flags)))
  94             except re.error as error:
  95                 raise ValueError(
  96                     "{}: Invalid regular expression '{}' ({})".format(
  97                         option_string, error.pattern, error.msg
  98                     )
  99                 )
 100
 101             setattr(namespace, self.dest, value_list)
 102
 103         def __call__(self, parser, namespace, values, option_string=None):
 104             self.do_call(namespace, values, 0)
 105
 106     class FilterAction(RegexAction):
 107         """Add a filter to a list of filter option values."""
 108
 109         def __init__(self, option_strings, dest, nargs=None, **kwargs):
 110             super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
 111
 112         def __call__(self, parser, namespace, values, option_string=None):
 113             super(FilterAction, self).__call__(parser, namespace, values, option_string)
 114
 115             value_list = getattr(namespace, self.dest)
 116
 117             is_filter_out = option_string == "--filter-out"
 118
 119             value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
 120
 121             setattr(namespace, self.dest, value_list)
 122
 123     filter_group = parser.add_argument_group(
 124         "filtering",
 125         """Filters are applied to each output line according to the order given. The
 126     first matching filter terminates filter processing for that current line.""",
 127     )
 128
 129     filter_group.add_argument(
 130         "--filter",
 131         action=FilterAction,
 132         dest="filters",
 133         metavar="REGEX",
 134         help="Only include lines matching REGEX (may be specified multiple times)",
 135     )
 136     filter_group.add_argument(
 137         "--filter-out",
 138         action=FilterAction,
 139         dest="filters",
 140         metavar="REGEX",
 141         help="Exclude lines matching REGEX",
 142     )
 143
 144     parser.add_argument(
 145         "--include-generated-funcs",
 146         action="store_true",
 147         help="Output checks for functions not in source",
 148     )
 149     parser.add_argument(
 150         "-v", "--verbose", action="store_true", help="Show verbose output"
 151     )
 152     parser.add_argument(
 153         "-u",
 154         "--update-only",
 155         action="store_true",
 156         help="Only update test if it was already autogened",
 157     )
 158     parser.add_argument(
 159         "--force-update",
 160         action="store_true",
 161         help="Update test even if it was autogened by a different script",
 162     )
 163     parser.add_argument(
 164         "--enable",
 165         action="store_true",
 166         dest="enabled",
 167         default=True,
 168         help="Activate CHECK line generation from this point forward",
 169     )
 170     parser.add_argument(
 171         "--disable",
 172         action="store_false",
 173         dest="enabled",
 174         help="Deactivate CHECK line generation from this point forward",
 175     )
 176     parser.add_argument(
 177         "--replace-value-regex",
 178         nargs="+",
 179         default=[],
 180         help="List of regular expressions to replace matching value names",
 181     )
 182     parser.add_argument(
 183         "--prefix-filecheck-ir-name",
 184         default="",
 185         help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
 186     )
 187     parser.add_argument(
 188         "--global-value-regex",
 189         nargs="+",
 190         default=[],
 191         help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
 192     )
 193     parser.add_argument(
 194         "--global-hex-value-regex",
 195         nargs="+",
 196         default=[],
 197         help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
 198     )
 199     # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
 200     # we need to rename the flag to just -generate-body-for-unused-prefixes.
 201     parser.add_argument(
 202         "--no-generate-body-for-unused-prefixes",
 203         action="store_false",
 204         dest="gen_unused_prefix_body",
 205         default=True,
 206         help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
 207     )
 208     # This is the default when regenerating existing tests. The default when
 209     # generating new tests is determined by DEFAULT_VERSION.
 210     parser.add_argument(
 211         "--version", type=int, default=1, help="The version of output format"
 212     )
 213     args = parser.parse_args()
 214     # TODO: This should not be handled differently from the other options
 215     global _verbose, _global_value_regex, _global_hex_value_regex
 216     _verbose = args.verbose
 217     _global_value_regex = args.global_value_regex
 218     _global_hex_value_regex = args.global_hex_value_regex
 219     return args
 220
 221
 222 def parse_args(parser, argv):
 223     args = parser.parse_args(argv)
 224     if args.version >= 2:
 225         args.function_signature = True
 226     # TODO: This should not be handled differently from the other options
 227     global _verbose, _global_value_regex, _global_hex_value_regex
 228     _verbose = args.verbose
 229     _global_value_regex = args.global_value_regex
 230     _global_hex_value_regex = args.global_hex_value_regex
 231     return args
 232
 233
 234 class InputLineInfo(object):
 235     def __init__(self, line, line_number, args, argv):
 236         self.line = line
 237         self.line_number = line_number
 238         self.args = args
 239         self.argv = argv
 240
 241
 242 class TestInfo(object):
 243     def __init__(
 244         self,
 245         test,
 246         parser,
 247         script_name,
 248         input_lines,
 249         args,
 250         argv,
 251         comment_prefix,
 252         argparse_callback,
 253     ):
 254         self.parser = parser
 255         self.argparse_callback = argparse_callback
 256         self.path = test
 257         self.args = args
 258         if args.prefix_filecheck_ir_name:
 259             global _prefix_filecheck_ir_name
 260             _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
 261         self.argv = argv
 262         self.input_lines = input_lines
 263         self.run_lines = find_run_lines(test, self.input_lines)
 264         self.comment_prefix = comment_prefix
 265         if self.comment_prefix is None:
 266             if self.path.endswith(".mir"):
 267                 self.comment_prefix = "#"
 268             else:
 269                 self.comment_prefix = ";"
 270         self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
 271         self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
 272         self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
 273         self.test_unused_note = (
 274             self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
 275         )
 276
 277     def ro_iterlines(self):
 278         for line_num, input_line in enumerate(self.input_lines):
 279             args, argv = check_for_command(
 280                 input_line, self.parser, self.args, self.argv, self.argparse_callback
 281             )
 282             yield InputLineInfo(input_line, line_num, args, argv)
 283
 284     def iterlines(self, output_lines):
 285         output_lines.append(self.test_autogenerated_note)
 286         for line_info in self.ro_iterlines():
 287             input_line = line_info.line
 288             # Discard any previous script advertising.
 289             if input_line.startswith(self.autogenerated_note_prefix):
 290                 continue
 291             self.args = line_info.args
 292             self.argv = line_info.argv
 293             if not self.args.enabled:
 294                 output_lines.append(input_line)
 295                 continue
 296             yield line_info
 297
 298     def get_checks_for_unused_prefixes(
 299         self, run_list, used_prefixes: List[str]
 300     ) -> List[str]:
 301         run_list = [element for element in run_list if element[0] is not None]
 302         unused_prefixes = set(
 303             [prefix for sublist in run_list for prefix in sublist[0]]
 304         ).difference(set(used_prefixes))
 305
 306         ret = []
 307         if not unused_prefixes:
 308             return ret
 309         ret.append(self.test_unused_note)
 310         for unused in sorted(unused_prefixes):
 311             ret.append(
 312                 "{comment} {prefix}: {match_everything}".format(
 313                     comment=self.comment_prefix,
 314                     prefix=unused,
 315                     match_everything=r"""{{.*}}""",
 316                 )
 317             )
 318         return ret
 319
 320
 321 def itertests(
 322     test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
 323 ):
 324     for pattern in test_patterns:
 325         # On Windows we must expand the patterns ourselves.
 326         tests_list = glob.glob(pattern)
 327         if not tests_list:
 328             warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
 329             continue
 330         for test in tests_list:
 331             with open(test) as f:
 332                 input_lines = [l.rstrip() for l in f]
 333             first_line = input_lines[0] if input_lines else ""
 334             if UTC_AVOID in first_line:
 335               warn("Skipping test that must not be autogenerated: " + test)
 336               continue
 337             is_regenerate = UTC_ADVERT in first_line
 338
 339             # If we're generating a new test, set the default version to the latest.
 340             argv = sys.argv[:]
 341             if not is_regenerate:
 342                 argv.insert(1, "--version=" + str(DEFAULT_VERSION))
 343
 344             args = parse_args(parser, argv[1:])
 345             if argparse_callback is not None:
 346                 argparse_callback(args)
 347             if is_regenerate:
 348                 if script_name not in first_line and not args.force_update:
 349                     warn(
 350                         "Skipping test which wasn't autogenerated by " + script_name,
 351                         test,
 352                     )
 353                     continue
 354                 args, argv = check_for_command(
 355                     first_line, parser, args, argv, argparse_callback
 356                 )
 357             elif args.update_only:
 358                 assert UTC_ADVERT not in first_line
 359                 warn("Skipping test which isn't autogenerated: " + test)
 360                 continue
 361             final_input_lines = []
 362             for l in input_lines:
 363                 if UNUSED_NOTE in l:
 364                     break
 365                 final_input_lines.append(l)
 366             yield TestInfo(
 367                 test,
 368                 parser,
 369                 script_name,
 370                 final_input_lines,
 371                 args,
 372                 argv,
 373                 comment_prefix,
 374                 argparse_callback,
 375             )
 376
 377
 378 def should_add_line_to_output(
 379     input_line, prefix_set, skip_global_checks=False, comment_marker=";"
 380 ):
 381     # Skip any blank comment lines in the IR.
 382     if not skip_global_checks and input_line.strip() == comment_marker:
 383         return False
 384     # Skip a special double comment line we use as a separator.
 385     if input_line.strip() == comment_marker + SEPARATOR:
 386         return False
 387     # Skip any blank lines in the IR.
 388     # if input_line.strip() == '':
 389     #  return False
 390     # And skip any CHECK lines. We're building our own.
 391     m = CHECK_RE.match(input_line)
 392     if m and m.group(1) in prefix_set:
 393         if skip_global_checks:
 394             global_ir_value_re = re.compile(r"\[\[", flags=(re.M))
 395             return not global_ir_value_re.search(input_line)
 396         return False
 397
 398     return True
 399
 400
 401 # Perform lit-like substitutions
 402 def getSubstitutions(sourcepath):
 403     sourcedir = os.path.dirname(sourcepath)
 404     return [
 405         ("%s", sourcepath),
 406         ("%S", sourcedir),
 407         ("%p", sourcedir),
 408         ("%{pathsep}", os.pathsep),
 409     ]
 410
 411
 412 def applySubstitutions(s, substitutions):
 413     for a, b in substitutions:
 414         s = s.replace(a, b)
 415     return s
 416
 417
 418 # Invoke the tool that is being tested.
 419 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
 420     with open(ir) as ir_file:
 421         substitutions = getSubstitutions(ir)
 422
 423         # TODO Remove the str form which is used by update_test_checks.py and
 424         # update_llc_test_checks.py
 425         # The safer list form is used by update_cc_test_checks.py
 426         if preprocess_cmd:
 427             # Allow pre-processing the IR file (e.g. using sed):
 428             assert isinstance(
 429                 preprocess_cmd, str
 430             )  # TODO: use a list instead of using shell
 431             preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
 432             if verbose:
 433                 print(
 434                     "Pre-processing input file: ",
 435                     ir,
 436                     " with command '",
 437                     preprocess_cmd,
 438                     "'",
 439                     sep="",
 440                     file=sys.stderr,
 441                 )
 442             # Python 2.7 doesn't have subprocess.DEVNULL:
 443             with open(os.devnull, "w") as devnull:
 444                 pp = subprocess.Popen(
 445                     preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
 446                 )
 447                 ir_file = pp.stdout
 448
 449         if isinstance(cmd_args, list):
 450             args = [applySubstitutions(a, substitutions) for a in cmd_args]
 451             stdout = subprocess.check_output([exe] + args, stdin=ir_file)
 452         else:
 453             stdout = subprocess.check_output(
 454                 exe + " " + applySubstitutions(cmd_args, substitutions),
 455                 shell=True,
 456                 stdin=ir_file,
 457             )
 458         if sys.version_info[0] > 2:
 459             # FYI, if you crashed here with a decode error, your run line probably
 460             # results in bitcode or other binary format being written to the pipe.
 461             # For an opt test, you probably want to add -S or -disable-output.
 462             stdout = stdout.decode()
 463     # Fix line endings to unix CR style.
 464     return stdout.replace("\r\n", "\n")
 465
 466
 467 ##### LLVM IR parser
 468 RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
 469 CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
 470 PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
 471 CHECK_RE = re.compile(
 472     r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
 473 )
 474
 475 UTC_ARGS_KEY = "UTC_ARGS:"
 476 UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + "\s*(?P<cmd>.*)\s*$")
 477 UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
 478 UTC_AVOID = "NOTE: Do not autogenerate"
 479 UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
 480
 481 OPT_FUNCTION_RE = re.compile(
 482     r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
 483     r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
 484     flags=(re.M | re.S),
 485 )
 486
 487 ANALYZE_FUNCTION_RE = re.compile(
 488     r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
 489     r"\s*\n(?P<body>.*)$",
 490     flags=(re.X | re.S),
 491 )
 492
 493 LV_DEBUG_RE = re.compile(
 494     r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
 495 )
 496
 497 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 498 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 499 TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
 500 MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
 501 DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
 502
 503 SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
 504 SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
 505 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
 506 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
 507 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
 508     r"([ \t]|(#[0-9]+))+$", flags=re.M
 509 )
 510 SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
 511 SCRUB_LOOP_COMMENT_RE = re.compile(
 512     r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
 513 )
 514 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
 515
 516 SEPARATOR = "."
 517
 518
 519 def error(msg, test_file=None):
 520     if test_file:
 521         msg = "{}: {}".format(msg, test_file)
 522     print("ERROR: {}".format(msg), file=sys.stderr)
 523
 524
 525 def warn(msg, test_file=None):
 526     if test_file:
 527         msg = "{}: {}".format(msg, test_file)
 528     print("WARNING: {}".format(msg), file=sys.stderr)
 529
 530
 531 def debug(*args, **kwargs):
 532     # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
 533     if "file" not in kwargs:
 534         kwargs["file"] = sys.stderr
 535     if _verbose:
 536         print(*args, **kwargs)
 537
 538
 539 def find_run_lines(test, lines):
 540     debug("Scanning for RUN lines in test file:", test)
 541     raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 542     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 543     for l in raw_lines[1:]:
 544         if run_lines[-1].endswith("\\"):
 545             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 546         else:
 547             run_lines.append(l)
 548     debug("Found {} RUN lines in {}:".format(len(run_lines), test))
 549     for l in run_lines:
 550         debug("  RUN: {}".format(l))
 551     return run_lines
 552
 553
 554 def get_triple_from_march(march):
 555     triples = {
 556         "amdgcn": "amdgcn",
 557         "r600": "r600",
 558         "mips": "mips",
 559         "sparc": "sparc",
 560         "hexagon": "hexagon",
 561         "ve": "ve",
 562     }
 563     for prefix, triple in triples.items():
 564         if march.startswith(prefix):
 565             return triple
 566     print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
 567     return "x86"
 568
 569
 570 def apply_filters(line, filters):
 571     has_filter = False
 572     for f in filters:
 573         if not f.is_filter_out:
 574             has_filter = True
 575         if f.search(line):
 576             return False if f.is_filter_out else True
 577     # If we only used filter-out, keep the line, otherwise discard it since no
 578     # filter matched.
 579     return False if has_filter else True
 580
 581
 582 def do_filter(body, filters):
 583     return (
 584         body
 585         if not filters
 586         else "\n".join(
 587             filter(lambda line: apply_filters(line, filters), body.splitlines())
 588         )
 589     )
 590
 591
 592 def scrub_body(body):
 593     # Scrub runs of whitespace out of the assembly, but leave the leading
 594     # whitespace in place.
 595     body = SCRUB_WHITESPACE_RE.sub(r" ", body)
 596     # Expand the tabs used for indentation.
 597     body = str.expandtabs(body, 2)
 598     # Strip trailing whitespace.
 599     body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
 600     return body
 601
 602
 603 def do_scrub(body, scrubber, scrubber_args, extra):
 604     if scrubber_args:
 605         local_args = copy.deepcopy(scrubber_args)
 606         local_args[0].extra_scrub = extra
 607         return scrubber(body, *local_args)
 608     return scrubber(body, *scrubber_args)
 609
 610
 611 # Build up a dictionary of all the function bodies.
 612 class function_body(object):
 613     def __init__(
 614         self,
 615         string,
 616         extra,
 617         funcdef_attrs_and_ret,
 618         args_and_sig,
 619         attrs,
 620         func_name_separator,
 621     ):
 622         self.scrub = string
 623         self.extrascrub = extra
 624         self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
 625         self.args_and_sig = args_and_sig
 626         self.attrs = attrs
 627         self.func_name_separator = func_name_separator
 628
 629     def is_same_except_arg_names(
 630         self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
 631     ):
 632         arg_names = set()
 633
 634         def drop_arg_names(match):
 635             arg_names.add(match.group(variable_group_in_ir_value_match))
 636             if match.group(attribute_group_in_ir_value_match):
 637                 attr = match.group(attribute_group_in_ir_value_match)
 638             else:
 639                 attr = ""
 640             return match.group(1) + attr + match.group(match.lastindex)
 641
 642         def repl_arg_names(match):
 643             if (
 644                 match.group(variable_group_in_ir_value_match) is not None
 645                 and match.group(variable_group_in_ir_value_match) in arg_names
 646             ):
 647                 return match.group(1) + match.group(match.lastindex)
 648             return match.group(1) + match.group(2) + match.group(match.lastindex)
 649
 650         if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
 651             return False
 652         if self.attrs != attrs:
 653             return False
 654         ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
 655         ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
 656         if ans0 != ans1:
 657             return False
 658         if is_backend:
 659             # Check without replacements, the replacements are not applied to the
 660             # body for backend checks.
 661             return self.extrascrub == extrascrub
 662
 663         es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
 664         es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
 665         es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
 666         es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
 667         return es0 == es1
 668
 669     def __str__(self):
 670         return self.scrub
 671
 672
 673 class FunctionTestBuilder:
 674     def __init__(self, run_list, flags, scrubber_args, path):
 675         self._verbose = flags.verbose
 676         self._record_args = flags.function_signature
 677         self._check_attributes = flags.check_attributes
 678         # Strip double-quotes if input was read by UTC_ARGS
 679         self._filters = (
 680             list(
 681                 map(
 682                     lambda f: Filter(
 683                         re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
 684                     ),
 685                     flags.filters,
 686                 )
 687             )
 688             if flags.filters
 689             else []
 690         )
 691         self._scrubber_args = scrubber_args
 692         self._path = path
 693         # Strip double-quotes if input was read by UTC_ARGS
 694         self._replace_value_regex = list(
 695             map(lambda x: x.strip('"'), flags.replace_value_regex)
 696         )
 697         self._func_dict = {}
 698         self._func_order = {}
 699         self._global_var_dict = {}
 700         self._processed_prefixes = set()
 701         for tuple in run_list:
 702             for prefix in tuple[0]:
 703                 self._func_dict.update({prefix: dict()})
 704                 self._func_order.update({prefix: []})
 705                 self._global_var_dict.update({prefix: dict()})
 706
 707     def finish_and_get_func_dict(self):
 708         for prefix in self.get_failed_prefixes():
 709             warn(
 710                 "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
 711                 % (
 712                     prefix,
 713                     self._path,
 714                 )
 715             )
 716         return self._func_dict
 717
 718     def func_order(self):
 719         return self._func_order
 720
 721     def global_var_dict(self):
 722         return self._global_var_dict
 723
 724     def is_filtered(self):
 725         return bool(self._filters)
 726
 727     def process_run_line(
 728         self, function_re, scrubber, raw_tool_output, prefixes, is_backend
 729     ):
 730         build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
 731         for m in function_re.finditer(raw_tool_output):
 732             if not m:
 733                 continue
 734             func = m.group("func")
 735             body = m.group("body")
 736             # func_name_separator is the string that is placed right after function name at the
 737             # beginning of assembly function definition. In most assemblies, that is just a
 738             # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
 739             # False, just assume that separator is an empty string.
 740             if is_backend:
 741                 # Use ':' as default separator.
 742                 func_name_separator = (
 743                     m.group("func_name_separator")
 744                     if "func_name_separator" in m.groupdict()
 745                     else ":"
 746                 )
 747             else:
 748                 func_name_separator = ""
 749             attrs = m.group("attrs") if self._check_attributes else ""
 750             funcdef_attrs_and_ret = (
 751                 m.group("funcdef_attrs_and_ret") if self._record_args else ""
 752             )
 753             # Determine if we print arguments, the opening brace, or nothing after the
 754             # function name
 755             if self._record_args and "args_and_sig" in m.groupdict():
 756                 args_and_sig = scrub_body(m.group("args_and_sig").strip())
 757             elif "args_and_sig" in m.groupdict():
 758                 args_and_sig = "("
 759             else:
 760                 args_and_sig = ""
 761             filtered_body = do_filter(body, self._filters)
 762             scrubbed_body = do_scrub(
 763                 filtered_body, scrubber, self._scrubber_args, extra=False
 764             )
 765             scrubbed_extra = do_scrub(
 766                 filtered_body, scrubber, self._scrubber_args, extra=True
 767             )
 768             if "analysis" in m.groupdict():
 769                 analysis = m.group("analysis")
 770                 supported_analyses = {
 771                     "cost model analysis",
 772                     "scalar evolution analysis",
 773                 }
 774                 if analysis.lower() not in supported_analyses:
 775                     warn("Unsupported analysis mode: %r!" % (analysis,))
 776             if func.startswith("stress"):
 777                 # We only use the last line of the function body for stress tests.
 778                 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
 779             if self._verbose:
 780                 print("Processing function: " + func, file=sys.stderr)
 781                 for l in scrubbed_body.splitlines():
 782                     print("  " + l, file=sys.stderr)
 783             for prefix in prefixes:
 784                 # Replace function names matching the regex.
 785                 for regex in self._replace_value_regex:
 786                     # Pattern that matches capture groups in the regex in leftmost order.
 787                     group_regex = re.compile(r"\(.*?\)")
 788                     # Replace function name with regex.
 789                     match = re.match(regex, func)
 790                     if match:
 791                         func_repl = regex
 792                         # Replace any capture groups with their matched strings.
 793                         for g in match.groups():
 794                             func_repl = group_regex.sub(
 795                                 re.escape(g), func_repl, count=1
 796                             )
 797                         func = re.sub(func_repl, "{{" + func_repl + "}}", func)
 798
 799                     # Replace all calls to regex matching functions.
 800                     matches = re.finditer(regex, scrubbed_body)
 801                     for match in matches:
 802                         func_repl = regex
 803                         # Replace any capture groups with their matched strings.
 804                         for g in match.groups():
 805                             func_repl = group_regex.sub(
 806                                 re.escape(g), func_repl, count=1
 807                             )
 808                         # Substitute function call names that match the regex with the same
 809                         # capture groups set.
 810                         scrubbed_body = re.sub(
 811                             func_repl, "{{" + func_repl + "}}", scrubbed_body
 812                         )
 813
 814                 if func in self._func_dict[prefix]:
 815                     if self._func_dict[prefix][func] is not None and (
 816                         str(self._func_dict[prefix][func]) != scrubbed_body
 817                         or self._func_dict[prefix][func].args_and_sig != args_and_sig
 818                         or self._func_dict[prefix][func].attrs != attrs
 819                         or self._func_dict[prefix][func].funcdef_attrs_and_ret
 820                         != funcdef_attrs_and_ret
 821                     ):
 822                         if self._func_dict[prefix][func].is_same_except_arg_names(
 823                             scrubbed_extra,
 824                             funcdef_attrs_and_ret,
 825                             args_and_sig,
 826                             attrs,
 827                             is_backend,
 828                         ):
 829                             self._func_dict[prefix][func].scrub = scrubbed_extra
 830                             self._func_dict[prefix][func].args_and_sig = args_and_sig
 831                         else:
 832                             # This means a previous RUN line produced a body for this function
 833                             # that is different from the one produced by this current RUN line,
 834                             # so the body can't be common across RUN lines. We use None to
 835                             # indicate that.
 836                             self._func_dict[prefix][func] = None
 837                 else:
 838                     if prefix not in self._processed_prefixes:
 839                         self._func_dict[prefix][func] = function_body(
 840                             scrubbed_body,
 841                             scrubbed_extra,
 842                             funcdef_attrs_and_ret,
 843                             args_and_sig,
 844                             attrs,
 845                             func_name_separator,
 846                         )
 847                         self._func_order[prefix].append(func)
 848                     else:
 849                         # An earlier RUN line used this check prefixes but didn't produce
 850                         # a body for this function. This happens in Clang tests that use
 851                         # preprocesser directives to exclude individual functions from some
 852                         # RUN lines.
 853                         self._func_dict[prefix][func] = None
 854
 855     def processed_prefixes(self, prefixes):
 856         """
 857         Mark a set of prefixes as having had at least one applicable RUN line fully
 858         processed. This is used to filter out function bodies that don't have
 859         outputs for all RUN lines.
 860         """
 861         self._processed_prefixes.update(prefixes)
 862
 863     def get_failed_prefixes(self):
 864         # This returns the list of those prefixes that failed to match any function,
 865         # because there were conflicting bodies produced by different RUN lines, in
 866         # all instances of the prefix.
 867         for prefix in self._func_dict:
 868             if self._func_dict[prefix] and (
 869                 not [
 870                     fct
 871                     for fct in self._func_dict[prefix]
 872                     if self._func_dict[prefix][fct] is not None
 873                 ]
 874             ):
 875                 yield prefix
 876
 877
 878 ##### Generator of LLVM IR CHECK lines
 879
 880 SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
 881
 882 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
 883
 884
 885 class NamelessValue:
 886     def __init__(
 887         self,
 888         check_prefix,
 889         check_key,
 890         ir_prefix,
 891         ir_regexp,
 892         global_ir_rhs_regexp,
 893         *,
 894         is_before_functions=False,
 895         is_number=False,
 896         replace_number_with_counter=False
 897     ):
 898         self.check_prefix = check_prefix
 899         self.check_key = check_key
 900         self.ir_prefix = ir_prefix
 901         self.ir_regexp = ir_regexp
 902         self.global_ir_rhs_regexp = global_ir_rhs_regexp
 903         self.is_before_functions = is_before_functions
 904         self.is_number = is_number
 905         # Some variable numbers (e.g. MCINST1234) will change based on unrelated
 906         # modifications to LLVM, replace those with an incrementing counter.
 907         self.replace_number_with_counter = replace_number_with_counter
 908         self.variable_mapping = {}
 909
 910     # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
 911     def is_local_def_ir_value_match(self, match):
 912         return self.ir_prefix == "%"
 913
 914     # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
 915     def is_global_scope_ir_value_match(self, match):
 916         return self.global_ir_rhs_regexp is not None
 917
 918     # Return the IR prefix and check prefix we use for this kind or IR value,
 919     # e.g., (%, TMP) for locals.
 920     def get_ir_prefix_from_ir_value_match(self, match):
 921         return self.ir_prefix, self.check_prefix
 922
 923     # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
 924     def get_ir_regex_from_ir_value_re_match(self, match):
 925         # for backwards compatibility we check locals with '.*'
 926         if self.is_local_def_ir_value_match(match):
 927             return ".*"
 928         return self.ir_regexp
 929
 930     # Create a FileCheck variable name based on an IR name.
 931     def get_value_name(self, var: str, check_prefix: str):
 932         var = var.replace("!", "")
 933         if self.replace_number_with_counter:
 934             assert var.isdigit(), var
 935             replacement = self.variable_mapping.get(var, None)
 936             if replacement is None:
 937                 # Replace variable with an incrementing counter
 938                 replacement = str(len(self.variable_mapping) + 1)
 939                 self.variable_mapping[var] = replacement
 940             var = replacement
 941         # This is a nameless value, prepend check_prefix.
 942         if var.isdigit():
 943             var = check_prefix + var
 944         else:
 945             # This is a named value that clashes with the check_prefix, prepend with
 946             # _prefix_filecheck_ir_name, if it has been defined.
 947             if (
 948                 may_clash_with_default_check_prefix_name(check_prefix, var)
 949                 and _prefix_filecheck_ir_name
 950             ):
 951                 var = _prefix_filecheck_ir_name + var
 952         var = var.replace(".", "_")
 953         var = var.replace("-", "_")
 954         return var.upper()
 955
 956     # Create a FileCheck variable from regex.
 957     def get_value_definition(self, var, match):
 958         # for backwards compatibility we check locals with '.*'
 959         varname = self.get_value_name(var, self.check_prefix)
 960         prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 961         if self.is_number:
 962             regex = ""  # always capture a number in the default format
 963             capture_start = "[[#"
 964         else:
 965             regex = self.get_ir_regex_from_ir_value_re_match(match)
 966             capture_start = "[["
 967         if self.is_local_def_ir_value_match(match):
 968             return capture_start + varname + ":" + prefix + regex + "]]"
 969         return prefix + capture_start + varname + ":" + regex + "]]"
 970
 971     # Use a FileCheck variable.
 972     def get_value_use(self, var, match, var_prefix=None):
 973         if var_prefix is None:
 974             var_prefix = self.check_prefix
 975         capture_start = "[[#" if self.is_number else "[["
 976         if self.is_local_def_ir_value_match(match):
 977             return capture_start + self.get_value_name(var, var_prefix) + "]]"
 978         prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
 979         return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
 980
 981
 982 # Description of the different "unnamed" values we match in the IR, e.g.,
 983 # (local) ssa values, (debug) metadata, etc.
 984 ir_nameless_values = [
 985     #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
 986     NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
 987     NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
 988     NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
 989     NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
 990     NamelessValue(
 991         r"GLOB", "@", r"@", r'[a-zA-Z0-9_$"\\.-]+', r".+", is_before_functions=True
 992     ),
 993     NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
 994     NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
 995     NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
 996     NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
 997     NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
 998     NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
 999     NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1000     NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
1001     NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1002     NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1003 ]
1004
1005 asm_nameless_values = [
1006     NamelessValue(
1007         r"MCINST",
1008         "Inst#",
1009         "<MCInst #",
1010         r"\d+",
1011         r".+",
1012         is_number=True,
1013         replace_number_with_counter=True,
1014     ),
1015     NamelessValue(
1016         r"MCREG",
1017         "Reg:",
1018         "<MCOperand Reg:",
1019         r"\d+",
1020         r".+",
1021         is_number=True,
1022         replace_number_with_counter=True,
1023     ),
1024 ]
1025
1026
1027 def createOrRegexp(old, new):
1028     if not old:
1029         return new
1030     if not new:
1031         return old
1032     return old + "|" + new
1033
1034
1035 def createPrefixMatch(prefix_str, prefix_re):
1036     return "(?:" + prefix_str + "(" + prefix_re + "))"
1037
1038
1039 # Build the regexp that matches an "IR value". This can be a local variable,
1040 # argument, global, or metadata, anything that is "named". It is important that
1041 # the PREFIX and SUFFIX below only contain a single group, if that changes
1042 # other locations will need adjustment as well.
1043 IR_VALUE_REGEXP_PREFIX = r"(\s*)"
1044 IR_VALUE_REGEXP_STRING = r""
1045 for nameless_value in ir_nameless_values:
1046     match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1047     if nameless_value.global_ir_rhs_regexp is not None:
1048         match = "^" + match
1049     IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
1050 IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)]|\Z)"
1051 IR_VALUE_RE = re.compile(
1052     IR_VALUE_REGEXP_PREFIX
1053     + r"("
1054     + IR_VALUE_REGEXP_STRING
1055     + r")"
1056     + IR_VALUE_REGEXP_SUFFIX
1057 )
1058
1059 # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
1060 ASM_VALUE_REGEXP_STRING = ""
1061 for nameless_value in asm_nameless_values:
1062     match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1063     ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
1064 ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
1065 ASM_VALUE_RE = re.compile(
1066     r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
1067 )
1068
1069 # The entire match is group 0, the prefix has one group (=1), the entire
1070 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1071 first_nameless_group_in_ir_value_match = 3
1072
1073 # constants for the group id of special matches
1074 variable_group_in_ir_value_match = 3
1075 attribute_group_in_ir_value_match = 4
1076
1077 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local
1078 # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
1079 def get_idx_from_ir_value_match(match):
1080     for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
1081         if match.group(i) is not None:
1082             return i - first_nameless_group_in_ir_value_match
1083     error("Unable to identify the kind of IR value from the match!")
1084     return 0
1085
1086
1087 # See get_idx_from_ir_value_match
1088 def get_name_from_ir_value_match(match):
1089     return match.group(
1090         get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
1091     )
1092
1093
1094 def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
1095     return nameless_values[get_idx_from_ir_value_match(match)]
1096
1097
1098 # Return true if var clashes with the scripted FileCheck check_prefix.
1099 def may_clash_with_default_check_prefix_name(check_prefix, var):
1100     return check_prefix and re.match(
1101         r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1102     )
1103
1104
1105 def generalize_check_lines_common(
1106     lines,
1107     is_analyze,
1108     vars_seen,
1109     global_vars_seen,
1110     nameless_values,
1111     nameless_value_regex,
1112     is_asm,
1113 ):
1114     # This gets called for each match that occurs in
1115     # a line. We transform variables we haven't seen
1116     # into defs, and variables we have seen into uses.
1117     def transform_line_vars(match):
1118         var = get_name_from_ir_value_match(match)
1119         nameless_value = get_nameless_value_from_match(match, nameless_values)
1120         if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
1121             warn(
1122                 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1123                 " with scripted FileCheck name." % (var,)
1124             )
1125         key = (var, nameless_value.check_key)
1126         is_local_def = nameless_value.is_local_def_ir_value_match(match)
1127         if is_local_def and key in vars_seen:
1128             rv = nameless_value.get_value_use(var, match)
1129         elif not is_local_def and key in global_vars_seen:
1130             # We could have seen a different prefix for the global variables first,
1131             # ensure we use that one instead of the prefix for the current match.
1132             rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
1133         else:
1134             if is_local_def:
1135                 vars_seen.add(key)
1136             else:
1137                 global_vars_seen[key] = nameless_value.check_prefix
1138             rv = nameless_value.get_value_definition(var, match)
1139         # re.sub replaces the entire regex match
1140         # with whatever you return, so we have
1141         # to make sure to hand it back everything
1142         # including the commas and spaces.
1143         return match.group(1) + rv + match.group(match.lastindex)
1144
1145     lines_with_def = []
1146
1147     for i, line in enumerate(lines):
1148         if not is_asm:
1149             # An IR variable named '%.' matches the FileCheck regex string.
1150             line = line.replace("%.", "%dot")
1151             for regex in _global_hex_value_regex:
1152                 if re.match("^@" + regex + " = ", line):
1153                     line = re.sub(
1154                         r"\bi([0-9]+) ([0-9]+)",
1155                         lambda m: "i"
1156                         + m.group(1)
1157                         + " [[#"
1158                         + hex(int(m.group(2)))
1159                         + "]]",
1160                         line,
1161                     )
1162                     break
1163             # Ignore any comments, since the check lines will too.
1164             scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1165             lines[i] = scrubbed_line
1166         if is_asm or not is_analyze:
1167             # It can happen that two matches are back-to-back and for some reason sub
1168             # will not replace both of them. For now we work around this by
1169             # substituting until there is no more match.
1170             changed = True
1171             while changed:
1172                 (lines[i], changed) = nameless_value_regex.subn(
1173                     transform_line_vars, lines[i], count=1
1174                 )
1175     return lines
1176
1177
1178 # Replace IR value defs and uses with FileCheck variables.
1179 def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
1180     return generalize_check_lines_common(
1181         lines,
1182         is_analyze,
1183         vars_seen,
1184         global_vars_seen,
1185         ir_nameless_values,
1186         IR_VALUE_RE,
1187         False,
1188     )
1189
1190
1191 def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
1192     return generalize_check_lines_common(
1193         lines,
1194         False,
1195         vars_seen,
1196         global_vars_seen,
1197         asm_nameless_values,
1198         ASM_VALUE_RE,
1199         True,
1200     )
1201
1202
1203 def add_checks(
1204     output_lines,
1205     comment_marker,
1206     prefix_list,
1207     func_dict,
1208     func_name,
1209     check_label_format,
1210     is_backend,
1211     is_analyze,
1212     version,
1213     global_vars_seen_dict,
1214     is_filtered,
1215 ):
1216     # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1217     prefix_exclusions = set()
1218     printed_prefixes = []
1219     for p in prefix_list:
1220         checkprefixes = p[0]
1221         # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1222         # exist for this run line. A subset of the check prefixes might know about the function but only because
1223         # other run lines created it.
1224         if any(
1225             map(
1226                 lambda checkprefix: func_name not in func_dict[checkprefix],
1227                 checkprefixes,
1228             )
1229         ):
1230             prefix_exclusions |= set(checkprefixes)
1231             continue
1232
1233     # prefix_exclusions is constructed, we can now emit the output
1234     for p in prefix_list:
1235         global_vars_seen = {}
1236         checkprefixes = p[0]
1237         for checkprefix in checkprefixes:
1238             if checkprefix in global_vars_seen_dict:
1239                 global_vars_seen.update(global_vars_seen_dict[checkprefix])
1240             else:
1241                 global_vars_seen_dict[checkprefix] = {}
1242             if checkprefix in printed_prefixes:
1243                 break
1244
1245             # Check if the prefix is excluded.
1246             if checkprefix in prefix_exclusions:
1247                 continue
1248
1249             # If we do not have output for this prefix we skip it.
1250             if not func_dict[checkprefix][func_name]:
1251                 continue
1252
1253             # Add some space between different check prefixes, but not after the last
1254             # check line (before the test code).
1255             if is_backend:
1256                 if len(printed_prefixes) != 0:
1257                     output_lines.append(comment_marker)
1258
1259             if checkprefix not in global_vars_seen_dict:
1260                 global_vars_seen_dict[checkprefix] = {}
1261
1262             global_vars_seen_before = [key for key in global_vars_seen.keys()]
1263
1264             vars_seen = set()
1265             printed_prefixes.append(checkprefix)
1266             attrs = str(func_dict[checkprefix][func_name].attrs)
1267             attrs = "" if attrs == "None" else attrs
1268             if version > 1:
1269                 funcdef_attrs_and_ret = func_dict[checkprefix][
1270                     func_name
1271                 ].funcdef_attrs_and_ret
1272             else:
1273                 funcdef_attrs_and_ret = ""
1274
1275             if attrs:
1276                 output_lines.append(
1277                     "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1278                 )
1279             args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1280             if args_and_sig:
1281                 args_and_sig = generalize_check_lines(
1282                     [args_and_sig], is_analyze, vars_seen, global_vars_seen
1283                 )[0]
1284             func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1285             if "[[" in args_and_sig:
1286                 # Captures in label lines are not supported, thus split into a -LABEL
1287                 # and a separate -SAME line that contains the arguments with captures.
1288                 args_and_sig_prefix = ""
1289                 if version >= 3 and args_and_sig.startswith("("):
1290                     # Ensure the "(" separating function name and arguments is in the
1291                     # label line. This is required in case of function names that are
1292                     # prefixes of each other. Otherwise, the label line for "foo" might
1293                     # incorrectly match on "foo.specialized".
1294                     args_and_sig_prefix = args_and_sig[0]
1295                     args_and_sig = args_and_sig[1:]
1296
1297                 # Removing args_and_sig from the label match line requires
1298                 # func_name_separator to be empty. Otherwise, the match will not work.
1299                 assert func_name_separator == ""
1300                 output_lines.append(
1301                     check_label_format
1302                     % (
1303                         checkprefix,
1304                         funcdef_attrs_and_ret,
1305                         func_name,
1306                         args_and_sig_prefix,
1307                         func_name_separator,
1308                     )
1309                 )
1310                 output_lines.append(
1311                     "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1312                 )
1313             else:
1314                 output_lines.append(
1315                     check_label_format
1316                     % (
1317                         checkprefix,
1318                         funcdef_attrs_and_ret,
1319                         func_name,
1320                         args_and_sig,
1321                         func_name_separator,
1322                     )
1323                 )
1324             func_body = str(func_dict[checkprefix][func_name]).splitlines()
1325             if not func_body:
1326                 # We have filtered everything.
1327                 continue
1328
1329             # For ASM output, just emit the check lines.
1330             if is_backend:
1331                 body_start = 1
1332                 if is_filtered:
1333                     # For filtered output we don't add "-NEXT" so don't add extra spaces
1334                     # before the first line.
1335                     body_start = 0
1336                 else:
1337                     output_lines.append(
1338                         "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1339                     )
1340                 func_lines = generalize_asm_check_lines(
1341                     func_body[body_start:], vars_seen, global_vars_seen
1342                 )
1343                 for func_line in func_lines:
1344                     if func_line.strip() == "":
1345                         output_lines.append(
1346                             "%s %s-EMPTY:" % (comment_marker, checkprefix)
1347                         )
1348                     else:
1349                         check_suffix = "-NEXT" if not is_filtered else ""
1350                         output_lines.append(
1351                             "%s %s%s:  %s"
1352                             % (comment_marker, checkprefix, check_suffix, func_line)
1353                         )
1354                 # Remember new global variables we have not seen before
1355                 for key in global_vars_seen:
1356                     if key not in global_vars_seen_before:
1357                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1358                 break
1359
1360             # For IR output, change all defs to FileCheck variables, so we're immune
1361             # to variable naming fashions.
1362             func_body = generalize_check_lines(
1363                 func_body, is_analyze, vars_seen, global_vars_seen
1364             )
1365
1366             # This could be selectively enabled with an optional invocation argument.
1367             # Disabled for now: better to check everything. Be safe rather than sorry.
1368
1369             # Handle the first line of the function body as a special case because
1370             # it's often just noise (a useless asm comment or entry label).
1371             # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
1372             #  is_blank_line = True
1373             # else:
1374             #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
1375             #  is_blank_line = False
1376
1377             is_blank_line = False
1378
1379             for func_line in func_body:
1380                 if func_line.strip() == "":
1381                     is_blank_line = True
1382                     continue
1383                 # Do not waste time checking IR comments.
1384                 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
1385
1386                 # Skip blank lines instead of checking them.
1387                 if is_blank_line:
1388                     output_lines.append(
1389                         "{} {}:       {}".format(comment_marker, checkprefix, func_line)
1390                     )
1391                 else:
1392                     check_suffix = "-NEXT" if not is_filtered else ""
1393                     output_lines.append(
1394                         "{} {}{}:  {}".format(
1395                             comment_marker, checkprefix, check_suffix, func_line
1396                         )
1397                     )
1398                 is_blank_line = False
1399
1400             # Add space between different check prefixes and also before the first
1401             # line of code in the test function.
1402             output_lines.append(comment_marker)
1403
1404             # Remember new global variables we have not seen before
1405             for key in global_vars_seen:
1406                 if key not in global_vars_seen_before:
1407                     global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1408             break
1409     return printed_prefixes
1410
1411
1412 def add_ir_checks(
1413     output_lines,
1414     comment_marker,
1415     prefix_list,
1416     func_dict,
1417     func_name,
1418     preserve_names,
1419     function_sig,
1420     version,
1421     global_vars_seen_dict,
1422     is_filtered,
1423 ):
1424     # Label format is based on IR string.
1425     if function_sig and version > 1:
1426         function_def_regex = "define %s"
1427     elif function_sig:
1428         function_def_regex = "define {{[^@]+}}%s"
1429     else:
1430         function_def_regex = "%s"
1431     check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
1432         comment_marker, function_def_regex
1433     )
1434     return add_checks(
1435         output_lines,
1436         comment_marker,
1437         prefix_list,
1438         func_dict,
1439         func_name,
1440         check_label_format,
1441         False,
1442         preserve_names,
1443         version,
1444         global_vars_seen_dict,
1445         is_filtered,
1446     )
1447
1448
1449 def add_analyze_checks(
1450     output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
1451 ):
1452     check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
1453     global_vars_seen_dict = {}
1454     return add_checks(
1455         output_lines,
1456         comment_marker,
1457         prefix_list,
1458         func_dict,
1459         func_name,
1460         check_label_format,
1461         False,
1462         True,
1463         1,
1464         global_vars_seen_dict,
1465         is_filtered,
1466     )
1467
1468
1469 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1470     for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values):
1471         if nameless_value.global_ir_rhs_regexp is None:
1472             continue
1473
1474         lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
1475         rhs_re_str = nameless_value.global_ir_rhs_regexp
1476
1477         global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
1478         global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1479         lines = []
1480         for m in global_ir_value_re.finditer(raw_tool_output):
1481             lines.append(m.group(0))
1482
1483         for prefix in prefixes:
1484             if glob_val_dict[prefix] is None:
1485                 continue
1486             if nameless_value.check_prefix in glob_val_dict[prefix]:
1487                 if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1488                     continue
1489                 if prefix == prefixes[-1]:
1490                     warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
1491                 else:
1492                     glob_val_dict[prefix][nameless_value.check_prefix] = None
1493                     continue
1494             glob_val_dict[prefix][nameless_value.check_prefix] = lines
1495
1496
1497 def add_global_checks(
1498     glob_val_dict,
1499     comment_marker,
1500     prefix_list,
1501     output_lines,
1502     global_vars_seen_dict,
1503     is_analyze,
1504     is_before_functions,
1505 ):
1506     printed_prefixes = set()
1507     for nameless_value in ir_nameless_values:
1508         if nameless_value.global_ir_rhs_regexp is None:
1509             continue
1510         if nameless_value.is_before_functions != is_before_functions:
1511             continue
1512         for p in prefix_list:
1513             global_vars_seen = {}
1514             checkprefixes = p[0]
1515             if checkprefixes is None:
1516                 continue
1517             for checkprefix in checkprefixes:
1518                 if checkprefix in global_vars_seen_dict:
1519                     global_vars_seen.update(global_vars_seen_dict[checkprefix])
1520                 else:
1521                     global_vars_seen_dict[checkprefix] = {}
1522                 if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1523                     break
1524                 if not glob_val_dict[checkprefix]:
1525                     continue
1526                 if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1527                     continue
1528                 if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1529                     continue
1530
1531                 check_lines = []
1532                 global_vars_seen_before = [key for key in global_vars_seen.keys()]
1533                 for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
1534                     if _global_value_regex:
1535                         matched = False
1536                         for regex in _global_value_regex:
1537                             if re.match("^@" + regex + " = ", line):
1538                                 matched = True
1539                                 break
1540                         if not matched:
1541                             continue
1542                     tmp = generalize_check_lines(
1543                         [line], is_analyze, set(), global_vars_seen
1544                     )
1545                     check_line = "%s %s: %s" % (comment_marker, checkprefix, tmp[0])
1546                     check_lines.append(check_line)
1547                 if not check_lines:
1548                     continue
1549
1550                 output_lines.append(comment_marker + SEPARATOR)
1551                 for check_line in check_lines:
1552                     output_lines.append(check_line)
1553
1554                 printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1555
1556                 # Remembe new global variables we have not seen before
1557                 for key in global_vars_seen:
1558                     if key not in global_vars_seen_before:
1559                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1560                 break
1561
1562     if printed_prefixes:
1563         output_lines.append(comment_marker + SEPARATOR)
1564     return printed_prefixes
1565
1566
1567 def check_prefix(prefix):
1568     if not PREFIX_RE.match(prefix):
1569         hint = ""
1570         if "," in prefix:
1571             hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1572         warn(
1573             (
1574                 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
1575                 + hint
1576             )
1577             % (prefix)
1578         )
1579
1580
1581 def get_check_prefixes(filecheck_cmd):
1582     check_prefixes = [
1583         item
1584         for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
1585         for item in m.group(1).split(",")
1586     ]
1587     if not check_prefixes:
1588         check_prefixes = ["CHECK"]
1589     return check_prefixes
1590
1591
1592 def verify_filecheck_prefixes(fc_cmd):
1593     fc_cmd_parts = fc_cmd.split()
1594     for part in fc_cmd_parts:
1595         if "check-prefix=" in part:
1596             prefix = part.split("=", 1)[1]
1597             check_prefix(prefix)
1598         elif "check-prefixes=" in part:
1599             prefixes = part.split("=", 1)[1].split(",")
1600             for prefix in prefixes:
1601                 check_prefix(prefix)
1602                 if prefixes.count(prefix) > 1:
1603                     warn(
1604                         "Supplied prefix '%s' is not unique in the prefix list."
1605                         % (prefix,)
1606                     )
1607
1608
1609 def get_autogennote_suffix(parser, args):
1610     autogenerated_note_args = ""
1611     for action in parser._actions:
1612         if not hasattr(args, action.dest):
1613             continue  # Ignore options such as --help that aren't included in args
1614         # Ignore parameters such as paths to the binary or the list of tests
1615         if action.dest in (
1616             "tests",
1617             "update_only",
1618             "tool_binary",
1619             "opt_binary",
1620             "llc_binary",
1621             "clang",
1622             "opt",
1623             "llvm_bin",
1624             "verbose",
1625             "force_update",
1626         ):
1627             continue
1628         value = getattr(args, action.dest)
1629         if action.const is not None:  # action stores a constant (usually True/False)
1630             # Skip actions with different constant values (this happens with boolean
1631             # --foo/--no-foo options)
1632             if value != action.const:
1633                 continue
1634         if parser.get_default(action.dest) == value:
1635             continue  # Don't add default values
1636         if action.dest == "function_signature" and args.version >= 2:
1637             continue  # Enabled by default in version 2
1638         if action.dest == "filters":
1639             # Create a separate option for each filter element.  The value is a list
1640             # of Filter objects.
1641             for elem in value:
1642                 opt_name = "filter-out" if elem.is_filter_out else "filter"
1643                 opt_value = elem.pattern()
1644                 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1645                 if new_arg not in autogenerated_note_args:
1646                     autogenerated_note_args += new_arg
1647         else:
1648             autogenerated_note_args += action.option_strings[0] + " "
1649             if action.const is None:  # action takes a parameter
1650                 if action.nargs == "+":
1651                     value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
1652                 autogenerated_note_args += "%s " % value
1653     if autogenerated_note_args:
1654         autogenerated_note_args = " %s %s" % (
1655             UTC_ARGS_KEY,
1656             autogenerated_note_args[:-1],
1657         )
1658     return autogenerated_note_args
1659
1660
1661 def check_for_command(line, parser, args, argv, argparse_callback):
1662     cmd_m = UTC_ARGS_CMD.match(line)
1663     if cmd_m:
1664         for option in shlex.split(cmd_m.group("cmd").strip()):
1665             if option:
1666                 argv.append(option)
1667         args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
1668         if argparse_callback is not None:
1669             argparse_callback(args)
1670     return args, argv
1671
1672
1673 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1674     result = get_arg_to_check(test_info.args)
1675     if not result and is_global:
1676         # See if this has been specified via UTC_ARGS.  This is a "global" option
1677         # that affects the entire generation of test checks.  If it exists anywhere
1678         # in the test, apply it to everything.
1679         saw_line = False
1680         for line_info in test_info.ro_iterlines():
1681             line = line_info.line
1682             if not line.startswith(";") and line.strip() != "":
1683                 saw_line = True
1684             result = get_arg_to_check(line_info.args)
1685             if result:
1686                 if warn and saw_line:
1687                     # We saw the option after already reading some test input lines.
1688                     # Warn about it.
1689                     print(
1690                         "WARNING: Found {} in line following test start: ".format(
1691                             arg_string
1692                         )
1693                         + line,
1694                         file=sys.stderr,
1695                     )
1696                     print(
1697                         "WARNING: Consider moving {} to top of file".format(arg_string),
1698                         file=sys.stderr,
1699                     )
1700                 break
1701     return result
1702
1703
1704 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1705     for input_line_info in test_info.iterlines(output_lines):
1706         line = input_line_info.line
1707         args = input_line_info.args
1708         if line.strip() == comment_string:
1709             continue
1710         if line.strip() == comment_string + SEPARATOR:
1711             continue
1712         if line.lstrip().startswith(comment_string):
1713             m = CHECK_RE.match(line)
1714             if m and m.group(1) in prefix_set:
1715                 continue
1716         output_lines.append(line.rstrip("\n"))
1717
1718
1719 def add_checks_at_end(
1720     output_lines, prefix_list, func_order, comment_string, check_generator
1721 ):
1722     added = set()
1723     generated_prefixes = set()
1724     for prefix in prefix_list:
1725         prefixes = prefix[0]
1726         tool_args = prefix[1]
1727         for prefix in prefixes:
1728             for func in func_order[prefix]:
1729                 # The func order can contain the same functions multiple times.
1730                 # If we see one again we are done.
1731                 if (func, prefix) in added:
1732                     continue
1733                 if added:
1734                     output_lines.append(comment_string)
1735
1736                 # The add_*_checks routines expect a run list whose items are
1737                 # tuples that have a list of prefixes as their first element and
1738                 # tool command args string as their second element.  They output
1739                 # checks for each prefix in the list of prefixes.  By doing so, it
1740                 # implicitly assumes that for each function every run line will
1741                 # generate something for that function.  That is not the case for
1742                 # generated functions as some run lines might not generate them
1743                 # (e.g. -fopenmp vs. no -fopenmp).
1744                 #
1745                 # Therefore, pass just the prefix we're interested in.  This has
1746                 # the effect of generating all of the checks for functions of a
1747                 # single prefix before moving on to the next prefix.  So checks
1748                 # are ordered by prefix instead of by function as in "normal"
1749                 # mode.
1750                 for generated_prefix in check_generator(
1751                     output_lines, [([prefix], tool_args)], func
1752                 ):
1753                     added.add((func, generated_prefix))
1754                     generated_prefixes.add(generated_prefix)
1755     return generated_prefixes