[Reland][Runtimes] Merge 'compile_commands.json' files from runtimes build (#116303)
[llvm-project.git] / llvm / utils / UpdateTestChecks / common.py
blobb108a21dbc52b840db345560c4d18df450ceff70
1 import argparse
2 import bisect
3 import collections
4 import copy
5 import glob
6 import os
7 import re
8 import subprocess
9 import sys
10 import shlex
12 from typing import List, Mapping, Set
14 ##### Common utilities for update_*test_checks.py
17 _verbose = False
18 _prefix_filecheck_ir_name = ""
20 """
21 Version changelog:
23 1: Initial version, used by tests that don't specify --version explicitly.
24 2: --function-signature is now enabled by default and also checks return
25 type/attributes.
26 3: Opening parenthesis of function args is kept on the first LABEL line
27 in case arguments are split to a separate SAME line.
28 4: --check-globals now has a third option ('smart'). The others are now called
29 'none' and 'all'. 'smart' is the default.
30 5: Basic block labels are matched by FileCheck expressions
31 """
32 DEFAULT_VERSION = 5
35 SUPPORTED_ANALYSES = {
36 "Branch Probability Analysis",
37 "Cost Model Analysis",
38 "Loop Access Analysis",
39 "Scalar Evolution Analysis",
43 class Regex(object):
44 """Wrap a compiled regular expression object to allow deep copy of a regexp.
45 This is required for the deep copy done in do_scrub.
47 """
49 def __init__(self, regex):
50 self.regex = regex
52 def __deepcopy__(self, memo):
53 result = copy.copy(self)
54 result.regex = self.regex
55 return result
57 def search(self, line):
58 return self.regex.search(line)
60 def sub(self, repl, line):
61 return self.regex.sub(repl, line)
63 def pattern(self):
64 return self.regex.pattern
66 def flags(self):
67 return self.regex.flags
70 class Filter(Regex):
71 """Augment a Regex object with a flag indicating whether a match should be
72 added (!is_filter_out) or removed (is_filter_out) from the generated checks.
74 """
76 def __init__(self, regex, is_filter_out):
77 super(Filter, self).__init__(regex)
78 self.is_filter_out = is_filter_out
80 def __deepcopy__(self, memo):
81 result = copy.deepcopy(super(Filter, self), memo)
82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
83 return result
86 def parse_commandline_args(parser):
87 class RegexAction(argparse.Action):
88 """Add a regular expression option value to a list of regular expressions.
89 This compiles the expression, wraps it in a Regex and adds it to the option
90 value list."""
92 def __init__(self, option_strings, dest, nargs=None, **kwargs):
93 if nargs is not None:
94 raise ValueError("nargs not allowed")
95 super(RegexAction, self).__init__(option_strings, dest, **kwargs)
97 def do_call(self, namespace, values, flags):
98 value_list = getattr(namespace, self.dest)
99 if value_list is None:
100 value_list = []
102 try:
103 value_list.append(Regex(re.compile(values, flags)))
104 except re.error as error:
105 raise ValueError(
106 "{}: Invalid regular expression '{}' ({})".format(
107 option_string, error.pattern, error.msg
111 setattr(namespace, self.dest, value_list)
113 def __call__(self, parser, namespace, values, option_string=None):
114 self.do_call(namespace, values, 0)
116 class FilterAction(RegexAction):
117 """Add a filter to a list of filter option values."""
119 def __init__(self, option_strings, dest, nargs=None, **kwargs):
120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
122 def __call__(self, parser, namespace, values, option_string=None):
123 super(FilterAction, self).__call__(parser, namespace, values, option_string)
125 value_list = getattr(namespace, self.dest)
127 is_filter_out = option_string == "--filter-out"
129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
131 setattr(namespace, self.dest, value_list)
133 filter_group = parser.add_argument_group(
134 "filtering",
135 """Filters are applied to each output line according to the order given. The
136 first matching filter terminates filter processing for that current line.""",
139 filter_group.add_argument(
140 "--filter",
141 action=FilterAction,
142 dest="filters",
143 metavar="REGEX",
144 help="Only include lines matching REGEX (may be specified multiple times)",
146 filter_group.add_argument(
147 "--filter-out",
148 action=FilterAction,
149 dest="filters",
150 metavar="REGEX",
151 help="Exclude lines matching REGEX",
154 parser.add_argument(
155 "--include-generated-funcs",
156 action="store_true",
157 help="Output checks for functions not in source",
159 parser.add_argument(
160 "-v", "--verbose", action="store_true", help="Show verbose output"
162 parser.add_argument(
163 "-u",
164 "--update-only",
165 action="store_true",
166 help="Only update test if it was already autogened",
168 parser.add_argument(
169 "--force-update",
170 action="store_true",
171 help="Update test even if it was autogened by a different script",
173 parser.add_argument(
174 "--enable",
175 action="store_true",
176 dest="enabled",
177 default=True,
178 help="Activate CHECK line generation from this point forward",
180 parser.add_argument(
181 "--disable",
182 action="store_false",
183 dest="enabled",
184 help="Deactivate CHECK line generation from this point forward",
186 parser.add_argument(
187 "--replace-value-regex",
188 nargs="+",
189 default=[],
190 help="List of regular expressions to replace matching value names",
192 parser.add_argument(
193 "--prefix-filecheck-ir-name",
194 default="",
195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
197 parser.add_argument(
198 "--global-value-regex",
199 nargs="+",
200 default=[],
201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
203 parser.add_argument(
204 "--global-hex-value-regex",
205 nargs="+",
206 default=[],
207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
210 # we need to rename the flag to just -generate-body-for-unused-prefixes.
211 parser.add_argument(
212 "--no-generate-body-for-unused-prefixes",
213 action="store_false",
214 dest="gen_unused_prefix_body",
215 default=True,
216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
218 # This is the default when regenerating existing tests. The default when
219 # generating new tests is determined by DEFAULT_VERSION.
220 parser.add_argument(
221 "--version", type=int, default=1, help="The version of output format"
223 args = parser.parse_args()
224 # TODO: This should not be handled differently from the other options
225 global _verbose, _global_value_regex, _global_hex_value_regex
226 _verbose = args.verbose
227 _global_value_regex = args.global_value_regex
228 _global_hex_value_regex = args.global_hex_value_regex
229 return args
232 def parse_args(parser, argv):
233 args = parser.parse_args(argv)
234 if args.version >= 2:
235 args.function_signature = True
236 # TODO: This should not be handled differently from the other options
237 global _verbose, _global_value_regex, _global_hex_value_regex
238 _verbose = args.verbose
239 _global_value_regex = args.global_value_regex
240 _global_hex_value_regex = args.global_hex_value_regex
241 if "check_globals" in args and args.check_globals == "default":
242 args.check_globals = "none" if args.version < 4 else "smart"
243 return args
246 class InputLineInfo(object):
247 def __init__(self, line, line_number, args, argv):
248 self.line = line
249 self.line_number = line_number
250 self.args = args
251 self.argv = argv
254 class TestInfo(object):
255 def __init__(
256 self,
257 test,
258 parser,
259 script_name,
260 input_lines,
261 args,
262 argv,
263 comment_prefix,
264 argparse_callback,
266 self.parser = parser
267 self.argparse_callback = argparse_callback
268 self.path = test
269 self.args = args
270 if args.prefix_filecheck_ir_name:
271 global _prefix_filecheck_ir_name
272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
273 self.argv = argv
274 self.input_lines = input_lines
275 self.run_lines = find_run_lines(test, self.input_lines)
276 self.comment_prefix = comment_prefix
277 if self.comment_prefix is None:
278 if self.path.endswith(".mir") or self.path.endswith(".txt"):
279 self.comment_prefix = "#"
280 elif self.path.endswith(".s"):
281 self.comment_prefix = "//"
282 else:
283 self.comment_prefix = ";"
284 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
285 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
286 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
287 self.test_unused_note = (
288 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
291 def ro_iterlines(self):
292 for line_num, input_line in enumerate(self.input_lines):
293 args, argv = check_for_command(
294 input_line, self.parser, self.args, self.argv, self.argparse_callback
296 yield InputLineInfo(input_line, line_num, args, argv)
298 def iterlines(self, output_lines):
299 output_lines.append(self.test_autogenerated_note)
300 for line_info in self.ro_iterlines():
301 input_line = line_info.line
302 # Discard any previous script advertising.
303 if input_line.startswith(self.autogenerated_note_prefix):
304 continue
305 self.args = line_info.args
306 self.argv = line_info.argv
307 if not self.args.enabled:
308 output_lines.append(input_line)
309 continue
310 yield line_info
312 def get_checks_for_unused_prefixes(
313 self, run_list, used_prefixes: List[str]
314 ) -> List[str]:
315 run_list = [element for element in run_list if element[0] is not None]
316 unused_prefixes = set(
317 [prefix for sublist in run_list for prefix in sublist[0]]
318 ).difference(set(used_prefixes))
320 ret = []
321 if not unused_prefixes:
322 return ret
323 ret.append(self.test_unused_note)
324 for unused in sorted(unused_prefixes):
325 ret.append(
326 "{comment} {prefix}: {match_everything}".format(
327 comment=self.comment_prefix,
328 prefix=unused,
329 match_everything=r"""{{.*}}""",
332 return ret
335 def itertests(
336 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
338 for pattern in test_patterns:
339 # On Windows we must expand the patterns ourselves.
340 tests_list = glob.glob(pattern)
341 if not tests_list:
342 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
343 continue
344 for test in tests_list:
345 with open(test) as f:
346 input_lines = [l.rstrip() for l in f]
347 first_line = input_lines[0] if input_lines else ""
348 if UTC_AVOID in first_line:
349 warn("Skipping test that must not be autogenerated: " + test)
350 continue
351 is_regenerate = UTC_ADVERT in first_line
353 # If we're generating a new test, set the default version to the latest.
354 argv = sys.argv[:]
355 if not is_regenerate:
356 argv.insert(1, "--version=" + str(DEFAULT_VERSION))
358 args = parse_args(parser, argv[1:])
359 if argparse_callback is not None:
360 argparse_callback(args)
361 if is_regenerate:
362 if script_name not in first_line and not args.force_update:
363 warn(
364 "Skipping test which wasn't autogenerated by " + script_name,
365 test,
367 continue
368 args, argv = check_for_command(
369 first_line, parser, args, argv, argparse_callback
371 elif args.update_only:
372 assert UTC_ADVERT not in first_line
373 warn("Skipping test which isn't autogenerated: " + test)
374 continue
375 final_input_lines = []
376 for l in input_lines:
377 if UNUSED_NOTE in l:
378 break
379 final_input_lines.append(l)
380 yield TestInfo(
381 test,
382 parser,
383 script_name,
384 final_input_lines,
385 args,
386 argv,
387 comment_prefix,
388 argparse_callback,
392 def should_add_line_to_output(
393 input_line,
394 prefix_set,
396 skip_global_checks=False,
397 skip_same_checks=False,
398 comment_marker=";",
400 # Skip any blank comment lines in the IR.
401 if not skip_global_checks and input_line.strip() == comment_marker:
402 return False
403 # Skip a special double comment line we use as a separator.
404 if input_line.strip() == comment_marker + SEPARATOR:
405 return False
406 # Skip any blank lines in the IR.
407 # if input_line.strip() == '':
408 # return False
409 # And skip any CHECK lines. We're building our own.
410 m = CHECK_RE.match(input_line)
411 if m and m.group(1) in prefix_set:
412 if skip_same_checks and CHECK_SAME_RE.match(input_line):
413 # The previous CHECK line was removed, so don't leave this dangling
414 return False
415 if skip_global_checks:
416 # Skip checks only if they are of global value definitions
417 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
418 is_global = global_ir_value_re.search(input_line)
419 return not is_global
420 return False
422 return True
425 def collect_original_check_lines(ti: TestInfo, prefix_set: set):
427 Collect pre-existing check lines into a dictionary `result` which is
428 returned.
430 result[func_name][prefix] is filled with a list of right-hand-sides of check
431 lines.
433 result = collections.defaultdict(lambda: {})
435 current_prefix = None
436 current_function = None
437 for input_line_info in ti.ro_iterlines():
438 input_line = input_line_info.line
439 if input_line.lstrip().startswith(";"):
440 m = CHECK_RE.match(input_line)
441 if m is not None:
442 prefix = m.group(1)
443 check_kind = m.group(2)
444 line = input_line[m.end() :].strip()
446 if prefix != current_prefix:
447 current_function = None
448 current_prefix = None
450 if check_kind not in ["LABEL", "SAME"]:
451 if current_function is not None:
452 current_function.append(line)
453 continue
455 if check_kind == "SAME":
456 continue
458 if check_kind == "LABEL":
459 m = IR_FUNCTION_LABEL_RE.match(line)
460 if m is not None:
461 func_name = m.group(1)
462 if (
463 ti.args.function is not None
464 and func_name != ti.args.function
466 # When filtering on a specific function, skip all others.
467 continue
469 current_prefix = prefix
470 current_function = result[func_name][prefix] = []
471 continue
473 current_function = None
475 return result
478 # Perform lit-like substitutions
479 def getSubstitutions(sourcepath):
480 sourcedir = os.path.dirname(sourcepath)
481 return [
482 ("%s", sourcepath),
483 ("%S", sourcedir),
484 ("%p", sourcedir),
485 ("%{pathsep}", os.pathsep),
489 def applySubstitutions(s, substitutions):
490 for a, b in substitutions:
491 s = s.replace(a, b)
492 return s
495 # Invoke the tool that is being tested.
496 def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
497 with open(ir) as ir_file:
498 substitutions = getSubstitutions(ir)
500 # TODO Remove the str form which is used by update_test_checks.py and
501 # update_llc_test_checks.py
502 # The safer list form is used by update_cc_test_checks.py
503 if preprocess_cmd:
504 # Allow pre-processing the IR file (e.g. using sed):
505 assert isinstance(
506 preprocess_cmd, str
507 ) # TODO: use a list instead of using shell
508 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
509 if verbose:
510 print(
511 "Pre-processing input file: ",
513 " with command '",
514 preprocess_cmd,
515 "'",
516 sep="",
517 file=sys.stderr,
519 pp = subprocess.Popen(
520 preprocess_cmd,
521 shell=True,
522 stdin=subprocess.DEVNULL,
523 stdout=subprocess.PIPE,
525 ir_file = pp.stdout
527 if isinstance(cmd_args, list):
528 args = [applySubstitutions(a, substitutions) for a in cmd_args]
529 stdout = subprocess.check_output([exe] + args, stdin=ir_file)
530 else:
531 stdout = subprocess.check_output(
532 exe + " " + applySubstitutions(cmd_args, substitutions),
533 shell=True,
534 stdin=ir_file,
536 if sys.version_info[0] > 2:
537 # FYI, if you crashed here with a decode error, your run line probably
538 # results in bitcode or other binary format being written to the pipe.
539 # For an opt test, you probably want to add -S or -disable-output.
540 stdout = stdout.decode()
541 # Fix line endings to unix CR style.
542 return stdout.replace("\r\n", "\n")
545 ##### LLVM IR parser
546 RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
547 CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
548 PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
549 CHECK_RE = re.compile(
550 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
552 CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
554 UTC_ARGS_KEY = "UTC_ARGS:"
555 UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
556 UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
557 UTC_AVOID = "NOTE: Do not autogenerate"
558 UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
560 OPT_FUNCTION_RE = re.compile(
561 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
562 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
563 flags=(re.M | re.S),
566 ANALYZE_FUNCTION_RE = re.compile(
567 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
568 r"\s*\n(?P<body>.*)$",
569 flags=(re.X | re.S),
572 LOOP_PASS_DEBUG_RE = re.compile(
573 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
576 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
577 IR_FUNCTION_LABEL_RE = re.compile(
578 r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
580 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
581 TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
582 MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
583 DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
585 SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
586 SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M)
587 SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
588 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
589 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
590 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
591 r"([ \t]|(#[0-9]+))+$", flags=re.M
593 SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
594 SCRUB_LOOP_COMMENT_RE = re.compile(
595 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
597 SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
599 SEPARATOR = "."
602 def error(msg, test_file=None):
603 if test_file:
604 msg = "{}: {}".format(msg, test_file)
605 print("ERROR: {}".format(msg), file=sys.stderr)
608 def warn(msg, test_file=None):
609 if test_file:
610 msg = "{}: {}".format(msg, test_file)
611 print("WARNING: {}".format(msg), file=sys.stderr)
614 def debug(*args, **kwargs):
615 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
616 if "file" not in kwargs:
617 kwargs["file"] = sys.stderr
618 if _verbose:
619 print(*args, **kwargs)
622 def find_run_lines(test, lines):
623 debug("Scanning for RUN lines in test file:", test)
624 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
625 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
626 for l in raw_lines[1:]:
627 if run_lines[-1].endswith("\\"):
628 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
629 else:
630 run_lines.append(l)
631 debug("Found {} RUN lines in {}:".format(len(run_lines), test))
632 for l in run_lines:
633 debug(" RUN: {}".format(l))
634 return run_lines
637 def get_triple_from_march(march):
638 triples = {
639 "amdgcn": "amdgcn",
640 "r600": "r600",
641 "mips": "mips",
642 "nvptx64": "nvptx64",
643 "sparc": "sparc",
644 "hexagon": "hexagon",
645 "ve": "ve",
647 for prefix, triple in triples.items():
648 if march.startswith(prefix):
649 return triple
650 print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
651 return "x86"
654 def apply_filters(line, filters):
655 has_filter = False
656 for f in filters:
657 if not f.is_filter_out:
658 has_filter = True
659 if f.search(line):
660 return False if f.is_filter_out else True
661 # If we only used filter-out, keep the line, otherwise discard it since no
662 # filter matched.
663 return False if has_filter else True
666 def do_filter(body, filters):
667 return (
668 body
669 if not filters
670 else "\n".join(
671 filter(lambda line: apply_filters(line, filters), body.splitlines())
676 def scrub_body(body):
677 # Scrub runs of whitespace out of the assembly, but leave the leading
678 # whitespace in place.
679 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
681 # Expand the tabs used for indentation.
682 body = str.expandtabs(body, 2)
683 # Strip trailing whitespace.
684 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
685 return body
688 def do_scrub(body, scrubber, scrubber_args, extra):
689 if scrubber_args:
690 local_args = copy.deepcopy(scrubber_args)
691 local_args[0].extra_scrub = extra
692 return scrubber(body, *local_args)
693 return scrubber(body, *scrubber_args)
696 # Build up a dictionary of all the function bodies.
697 class function_body(object):
698 def __init__(
699 self,
700 string,
701 extra,
702 funcdef_attrs_and_ret,
703 args_and_sig,
704 attrs,
705 func_name_separator,
706 ginfo,
708 self.scrub = string
709 self.extrascrub = extra
710 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
711 self.args_and_sig = args_and_sig
712 self.attrs = attrs
713 self.func_name_separator = func_name_separator
714 self._ginfo = ginfo
716 def is_same_except_arg_names(
717 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
719 arg_names = set()
721 def drop_arg_names(match):
722 nameless_value = self._ginfo.get_nameless_value_from_match(match)
723 if nameless_value.check_key == "%":
724 arg_names.add(self._ginfo.get_name_from_match(match))
725 substitute = ""
726 else:
727 substitute = match.group(2)
728 return match.group(1) + substitute + match.group(match.lastindex)
730 def repl_arg_names(match):
731 nameless_value = self._ginfo.get_nameless_value_from_match(match)
732 if (
733 nameless_value.check_key == "%"
734 and self._ginfo.get_name_from_match(match) in arg_names
736 return match.group(1) + match.group(match.lastindex)
737 return match.group(1) + match.group(2) + match.group(match.lastindex)
739 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
740 return False
741 if self.attrs != attrs:
742 return False
744 regexp = self._ginfo.get_regexp()
745 ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
746 ans1 = regexp.sub(drop_arg_names, args_and_sig)
747 if ans0 != ans1:
748 return False
749 if self._ginfo.is_asm():
750 # Check without replacements, the replacements are not applied to the
751 # body for backend checks.
752 return self.extrascrub == extrascrub
754 es0 = regexp.sub(repl_arg_names, self.extrascrub)
755 es1 = regexp.sub(repl_arg_names, extrascrub)
756 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
757 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
758 return es0 == es1
760 def __str__(self):
761 return self.scrub
764 class FunctionTestBuilder:
765 def __init__(self, run_list, flags, scrubber_args, path, ginfo):
766 self._verbose = flags.verbose
767 self._record_args = flags.function_signature
768 self._check_attributes = flags.check_attributes
769 # Strip double-quotes if input was read by UTC_ARGS
770 self._filters = (
771 list(
772 map(
773 lambda f: Filter(
774 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
776 flags.filters,
779 if flags.filters
780 else []
782 self._scrubber_args = scrubber_args
783 self._path = path
784 self._ginfo = ginfo
785 # Strip double-quotes if input was read by UTC_ARGS
786 self._replace_value_regex = list(
787 map(lambda x: x.strip('"'), flags.replace_value_regex)
789 self._func_dict = {}
790 self._func_order = {}
791 self._global_var_dict = {}
792 self._processed_prefixes = set()
793 for tuple in run_list:
794 for prefix in tuple[0]:
795 self._func_dict.update({prefix: dict()})
796 self._func_order.update({prefix: []})
797 self._global_var_dict.update({prefix: dict()})
799 def finish_and_get_func_dict(self):
800 for prefix in self.get_failed_prefixes():
801 warn(
802 "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
804 prefix,
805 self._path,
808 return self._func_dict
810 def func_order(self):
811 return self._func_order
813 def global_var_dict(self):
814 return self._global_var_dict
816 def is_filtered(self):
817 return bool(self._filters)
819 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
820 build_global_values_dictionary(
821 self._global_var_dict, raw_tool_output, prefixes, self._ginfo
823 for m in function_re.finditer(raw_tool_output):
824 if not m:
825 continue
826 func = m.group("func")
827 body = m.group("body")
828 # func_name_separator is the string that is placed right after function name at the
829 # beginning of assembly function definition. In most assemblies, that is just a
830 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
831 # False, just assume that separator is an empty string.
832 if self._ginfo.is_asm():
833 # Use ':' as default separator.
834 func_name_separator = (
835 m.group("func_name_separator")
836 if "func_name_separator" in m.groupdict()
837 else ":"
839 else:
840 func_name_separator = ""
841 attrs = m.group("attrs") if self._check_attributes else ""
842 funcdef_attrs_and_ret = (
843 m.group("funcdef_attrs_and_ret") if self._record_args else ""
845 # Determine if we print arguments, the opening brace, or nothing after the
846 # function name
847 if self._record_args and "args_and_sig" in m.groupdict():
848 args_and_sig = scrub_body(m.group("args_and_sig").strip())
849 elif "args_and_sig" in m.groupdict():
850 args_and_sig = "("
851 else:
852 args_and_sig = ""
853 filtered_body = do_filter(body, self._filters)
854 scrubbed_body = do_scrub(
855 filtered_body, scrubber, self._scrubber_args, extra=False
857 scrubbed_extra = do_scrub(
858 filtered_body, scrubber, self._scrubber_args, extra=True
860 if "analysis" in m.groupdict():
861 analysis = m.group("analysis")
862 if analysis not in SUPPORTED_ANALYSES:
863 warn("Unsupported analysis mode: %r!" % (analysis,))
864 if func.startswith("stress"):
865 # We only use the last line of the function body for stress tests.
866 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
867 if self._verbose:
868 print("Processing function: " + func, file=sys.stderr)
869 for l in scrubbed_body.splitlines():
870 print(" " + l, file=sys.stderr)
871 for prefix in prefixes:
872 # Replace function names matching the regex.
873 for regex in self._replace_value_regex:
874 # Pattern that matches capture groups in the regex in leftmost order.
875 group_regex = re.compile(r"\(.*?\)")
876 # Replace function name with regex.
877 match = re.match(regex, func)
878 if match:
879 func_repl = regex
880 # Replace any capture groups with their matched strings.
881 for g in match.groups():
882 func_repl = group_regex.sub(
883 re.escape(g), func_repl, count=1
885 func = re.sub(func_repl, "{{" + func_repl + "}}", func)
887 # Replace all calls to regex matching functions.
888 matches = re.finditer(regex, scrubbed_body)
889 for match in matches:
890 func_repl = regex
891 # Replace any capture groups with their matched strings.
892 for g in match.groups():
893 func_repl = group_regex.sub(
894 re.escape(g), func_repl, count=1
896 # Substitute function call names that match the regex with the same
897 # capture groups set.
898 scrubbed_body = re.sub(
899 func_repl, "{{" + func_repl + "}}", scrubbed_body
902 if func in self._func_dict[prefix]:
903 if self._func_dict[prefix][func] is not None and (
904 str(self._func_dict[prefix][func]) != scrubbed_body
905 or self._func_dict[prefix][func].args_and_sig != args_and_sig
906 or self._func_dict[prefix][func].attrs != attrs
907 or self._func_dict[prefix][func].funcdef_attrs_and_ret
908 != funcdef_attrs_and_ret
910 if self._func_dict[prefix][func].is_same_except_arg_names(
911 scrubbed_extra,
912 funcdef_attrs_and_ret,
913 args_and_sig,
914 attrs,
916 self._func_dict[prefix][func].scrub = scrubbed_extra
917 self._func_dict[prefix][func].args_and_sig = args_and_sig
918 else:
919 # This means a previous RUN line produced a body for this function
920 # that is different from the one produced by this current RUN line,
921 # so the body can't be common across RUN lines. We use None to
922 # indicate that.
923 self._func_dict[prefix][func] = None
924 else:
925 if prefix not in self._processed_prefixes:
926 self._func_dict[prefix][func] = function_body(
927 scrubbed_body,
928 scrubbed_extra,
929 funcdef_attrs_and_ret,
930 args_and_sig,
931 attrs,
932 func_name_separator,
933 self._ginfo,
935 self._func_order[prefix].append(func)
936 else:
937 # An earlier RUN line used this check prefixes but didn't produce
938 # a body for this function. This happens in Clang tests that use
939 # preprocesser directives to exclude individual functions from some
940 # RUN lines.
941 self._func_dict[prefix][func] = None
943 def processed_prefixes(self, prefixes):
945 Mark a set of prefixes as having had at least one applicable RUN line fully
946 processed. This is used to filter out function bodies that don't have
947 outputs for all RUN lines.
949 self._processed_prefixes.update(prefixes)
951 def get_failed_prefixes(self):
952 # This returns the list of those prefixes that failed to match any function,
953 # because there were conflicting bodies produced by different RUN lines, in
954 # all instances of the prefix.
955 for prefix in self._func_dict:
956 if self._func_dict[prefix] and (
957 not [
959 for fct in self._func_dict[prefix]
960 if self._func_dict[prefix][fct] is not None
963 yield prefix
966 ##### Generator of LLVM IR CHECK lines
968 SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
970 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
973 class NamelessValue:
975 A NamelessValue object represents a type of value in the IR whose "name" we
976 generalize in the generated check lines; where the "name" could be an actual
977 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
978 or `!4`).
981 def __init__(
982 self,
983 check_prefix,
984 check_key,
985 ir_prefix,
986 ir_regexp,
987 global_ir_rhs_regexp,
989 is_before_functions=False,
990 is_number=False,
991 replace_number_with_counter=False,
992 match_literally=False,
993 interlaced_with_previous=False,
994 ir_suffix=r"",
996 self.check_prefix = check_prefix
997 self.check_key = check_key
998 self.ir_prefix = ir_prefix
999 self.ir_regexp = ir_regexp
1000 self.ir_suffix = ir_suffix
1001 self.global_ir_rhs_regexp = global_ir_rhs_regexp
1002 self.is_before_functions = is_before_functions
1003 self.is_number = is_number
1004 # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1005 # modifications to LLVM, replace those with an incrementing counter.
1006 self.replace_number_with_counter = replace_number_with_counter
1007 self.match_literally = match_literally
1008 self.interlaced_with_previous = interlaced_with_previous
1009 self.variable_mapping = {}
1011 # Return true if this kind of IR value is defined "locally" to functions,
1012 # which we assume is only the case precisely for LLVM IR local values.
1013 def is_local_def_ir_value(self):
1014 return self.check_key == "%"
1016 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1017 def get_ir_regex(self):
1018 # for backwards compatibility we check locals with '.*'
1019 if self.is_local_def_ir_value():
1020 return ".*"
1021 return self.ir_regexp
1023 # Create a FileCheck variable name based on an IR name.
1024 def get_value_name(self, var: str, check_prefix: str):
1025 var = var.replace("!", "")
1026 if self.replace_number_with_counter:
1027 assert var
1028 replacement = self.variable_mapping.get(var, None)
1029 if replacement is None:
1030 # Replace variable with an incrementing counter
1031 replacement = str(len(self.variable_mapping) + 1)
1032 self.variable_mapping[var] = replacement
1033 var = replacement
1034 # This is a nameless value, prepend check_prefix.
1035 if var.isdigit():
1036 var = check_prefix + var
1037 else:
1038 # This is a named value that clashes with the check_prefix, prepend with
1039 # _prefix_filecheck_ir_name, if it has been defined.
1040 if (
1041 may_clash_with_default_check_prefix_name(check_prefix, var)
1042 and _prefix_filecheck_ir_name
1044 var = _prefix_filecheck_ir_name + var
1045 var = var.replace(".", "_")
1046 var = var.replace("-", "_")
1047 return var.upper()
1049 def get_affixes_from_match(self, match):
1050 prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1051 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1052 return prefix, suffix
1055 class GeneralizerInfo:
1057 A GeneralizerInfo object holds information about how check lines should be generalized
1058 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1059 state (e.g. information about IR global variables).
1062 MODE_IR = 0
1063 MODE_ASM = 1
1064 MODE_ANALYZE = 2
1066 def __init__(
1067 self,
1068 version,
1069 mode,
1070 nameless_values: List[NamelessValue],
1071 regexp_prefix,
1072 regexp_suffix,
1074 self._version = version
1075 self._mode = mode
1076 self._nameless_values = nameless_values
1078 self._regexp_prefix = regexp_prefix
1079 self._regexp_suffix = regexp_suffix
1081 self._regexp, _ = self._build_regexp(False, False)
1083 self._unstable_globals_regexp,
1084 self._unstable_globals_values,
1085 ) = self._build_regexp(True, True)
1087 def _build_regexp(self, globals_only, unstable_only):
1088 matches = []
1089 values = []
1090 for nameless_value in self._nameless_values:
1091 is_global = nameless_value.global_ir_rhs_regexp is not None
1092 if globals_only and not is_global:
1093 continue
1094 if unstable_only and nameless_value.match_literally:
1095 continue
1097 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1098 if self.is_ir() and not globals_only and is_global:
1099 match = "^" + match
1100 matches.append(match)
1101 values.append(nameless_value)
1103 regexp_string = r"|".join(matches)
1105 return (
1106 re.compile(
1107 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1109 values,
1112 def get_version(self):
1113 return self._version
1115 def is_ir(self):
1116 return self._mode == GeneralizerInfo.MODE_IR
1118 def is_asm(self):
1119 return self._mode == GeneralizerInfo.MODE_ASM
1121 def is_analyze(self):
1122 return self._mode == GeneralizerInfo.MODE_ANALYZE
1124 def get_nameless_values(self):
1125 return self._nameless_values
1127 def get_regexp(self):
1128 return self._regexp
1130 def get_unstable_globals_regexp(self):
1131 return self._unstable_globals_regexp
1133 # The entire match is group 0, the prefix has one group (=1), the entire
1134 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1135 FIRST_NAMELESS_GROUP_IN_MATCH = 3
1137 def get_match_info(self, match):
1139 Returns (name, nameless_value) for the given match object
1141 if match.re == self._regexp:
1142 values = self._nameless_values
1143 else:
1144 match.re == self._unstable_globals_regexp
1145 values = self._unstable_globals_values
1146 for i in range(len(values)):
1147 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1148 if g is not None:
1149 return g, values[i]
1150 error("Unable to identify the kind of IR value from the match!")
1151 return None, None
1153 # See get_idx_from_match
1154 def get_name_from_match(self, match):
1155 return self.get_match_info(match)[0]
1157 def get_nameless_value_from_match(self, match) -> NamelessValue:
1158 return self.get_match_info(match)[1]
1161 def make_ir_generalizer(version):
1162 values = []
1164 if version >= 5:
1165 values += [
1166 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1167 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1170 values += [
1171 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp
1172 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1173 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1174 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1175 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1176 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1177 NamelessValue(
1178 r"GLOBNAMED",
1179 "@",
1180 r"@",
1181 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1182 r".+",
1183 is_before_functions=True,
1184 match_literally=True,
1185 interlaced_with_previous=True,
1187 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1188 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1189 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1190 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1191 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1192 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1193 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1194 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1195 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1196 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1197 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1200 prefix = r"(\s*)"
1201 suffix = r"([,\s\(\)\}]|\Z)"
1203 # values = [
1204 # nameless_value
1205 # for nameless_value in IR_NAMELESS_VALUES
1206 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1207 # not (unstable_ids_only and nameless_value.match_literally)
1210 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1213 def make_asm_generalizer(version):
1214 values = [
1215 NamelessValue(
1216 r"MCINST",
1217 "Inst#",
1218 "<MCInst #",
1219 r"\d+",
1220 r".+",
1221 is_number=True,
1222 replace_number_with_counter=True,
1224 NamelessValue(
1225 r"MCREG",
1226 "Reg:",
1227 "<MCOperand Reg:",
1228 r"\d+",
1229 r".+",
1230 is_number=True,
1231 replace_number_with_counter=True,
1235 prefix = r"((?:#|//)\s*)"
1236 suffix = r"([>\s]|\Z)"
1238 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1241 def make_analyze_generalizer(version):
1242 values = [
1243 NamelessValue(
1244 r"GRP",
1245 "#",
1246 r"",
1247 r"0x[0-9a-f]+",
1248 None,
1249 replace_number_with_counter=True,
1253 prefix = r"(\s*)"
1254 suffix = r"(\)?:)"
1256 return GeneralizerInfo(
1257 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1261 # Return true if var clashes with the scripted FileCheck check_prefix.
1262 def may_clash_with_default_check_prefix_name(check_prefix, var):
1263 return check_prefix and re.match(
1264 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1268 def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1270 Find a large ordered matching between strings in lhs and rhs.
1272 Think of this as finding the *unchanged* lines in a diff, where the entries
1273 of lhs and rhs are lines of the files being diffed.
1275 Returns a list of matched (lhs_idx, rhs_idx) pairs.
1278 if not lhs or not rhs:
1279 return []
1281 # Collect matches in reverse order.
1282 matches = []
1284 # First, collect a set of candidate matching edges. We limit this to a
1285 # constant multiple of the input size to avoid quadratic runtime.
1286 patterns = collections.defaultdict(lambda: ([], []))
1288 for idx in range(len(lhs)):
1289 patterns[lhs[idx]][0].append(idx)
1290 for idx in range(len(rhs)):
1291 patterns[rhs[idx]][1].append(idx)
1293 multiple_patterns = []
1295 candidates = []
1296 for pattern in patterns.values():
1297 if not pattern[0] or not pattern[1]:
1298 continue
1300 if len(pattern[0]) == len(pattern[1]) == 1:
1301 candidates.append((pattern[0][0], pattern[1][0]))
1302 else:
1303 multiple_patterns.append(pattern)
1305 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1307 for pattern in multiple_patterns:
1308 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1309 len(lhs) + len(rhs)
1311 break
1312 for lhs_idx in pattern[0]:
1313 for rhs_idx in pattern[1]:
1314 candidates.append((lhs_idx, rhs_idx))
1316 if not candidates:
1317 # The LHS and RHS either share nothing in common, or lines are just too
1318 # identical. In that case, let's give up and not match anything.
1319 return []
1321 # Compute a maximal crossing-free matching via an algorithm that is
1322 # inspired by a mixture of dynamic programming and line-sweeping in
1323 # discrete geometry.
1325 # I would be surprised if this algorithm didn't exist somewhere in the
1326 # literature, but I found it without consciously recalling any
1327 # references, so you'll have to make do with the explanation below.
1328 # Sorry.
1330 # The underlying graph is bipartite:
1331 # - nodes on the LHS represent lines in the original check
1332 # - nodes on the RHS represent lines in the new (updated) check
1334 # Nodes are implicitly sorted by the corresponding line number.
1335 # Edges (unique_matches) are sorted by the line number on the LHS.
1337 # Here's the geometric intuition for the algorithm.
1339 # * Plot the edges as points in the plane, with the original line
1340 # number on the X axis and the updated line number on the Y axis.
1341 # * The goal is to find a longest "chain" of points where each point
1342 # is strictly above and to the right of the previous point.
1343 # * The algorithm proceeds by sweeping a vertical line from left to
1344 # right.
1345 # * The algorithm maintains a table where `table[N]` answers the
1346 # question "What is currently the 'best' way to build a chain of N+1
1347 # points to the left of the vertical line". Here, 'best' means
1348 # that the last point of the chain is a as low as possible (minimal
1349 # Y coordinate).
1350 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1351 # the last point in the chain and `y` is its Y coordinate
1352 # * A key invariant is that the Y values in the table are
1353 # monotonically increasing
1354 # * Thanks to these properties, the table can be used to answer the
1355 # question "What is the longest chain that can be built to the left
1356 # of the vertical line using only points below a certain Y value",
1357 # using a binary search over the table.
1358 # * The algorithm also builds a backlink structure in which every point
1359 # links back to the previous point on a best (longest) chain ending
1360 # at that point
1362 # The core loop of the algorithm sweeps the line and updates the table
1363 # and backlink structure for every point that we cross during the sweep.
1364 # Therefore, the algorithm is trivially O(M log M) in the number of
1365 # points.
1366 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1368 backlinks = []
1369 table_rhs_idx = []
1370 table_candidate_idx = []
1371 for _, rhs_idx in candidates:
1372 candidate_idx = len(backlinks)
1373 ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1375 # Update the table to record a best chain ending in the current point.
1376 # There always is one, and if any of the previously visited points had
1377 # a higher Y coordinate, then there is always a previously recorded best
1378 # chain that can be improved upon by using the current point.
1380 # There is only one case where there is some ambiguity. If the
1381 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1382 # the current point (this can only happen if the same line appeared
1383 # multiple times on the LHS), then we could choose to keep the
1384 # previously recorded best chain instead. That would bias the algorithm
1385 # differently but should have no systematic impact on the quality of the
1386 # result.
1387 if ti < len(table_rhs_idx):
1388 table_rhs_idx[ti] = rhs_idx
1389 table_candidate_idx[ti] = candidate_idx
1390 else:
1391 table_rhs_idx.append(rhs_idx)
1392 table_candidate_idx.append(candidate_idx)
1393 if ti > 0:
1394 backlinks.append(table_candidate_idx[ti - 1])
1395 else:
1396 backlinks.append(None)
1398 # Commit to names in the matching by walking the backlinks. Recursively
1399 # attempt to fill in more matches in-betweem.
1400 match_idx = table_candidate_idx[-1]
1401 while match_idx is not None:
1402 current = candidates[match_idx]
1403 matches.append(current)
1404 match_idx = backlinks[match_idx]
1406 matches.reverse()
1407 return matches
1410 VARIABLE_TAG = "[[@@]]"
1411 METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1412 NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1415 class TestVar:
1416 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1417 self._nameless_value = nameless_value
1419 self._prefix = prefix
1420 self._suffix = suffix
1422 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1423 if prefix != self._prefix:
1424 self._prefix = ""
1425 if suffix != self._suffix:
1426 self._suffix = ""
1428 def get_variable_name(self, text):
1429 return self._nameless_value.get_value_name(
1430 text, self._nameless_value.check_prefix
1433 def get_def(self, name, prefix, suffix):
1434 if self._nameless_value.is_number:
1435 return f"{prefix}[[#{name}:]]{suffix}"
1436 if self._prefix:
1437 assert self._prefix == prefix
1438 prefix = ""
1439 if self._suffix:
1440 assert self._suffix == suffix
1441 suffix = ""
1442 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1444 def get_use(self, name, prefix, suffix):
1445 if self._nameless_value.is_number:
1446 return f"{prefix}[[#{name}]]{suffix}"
1447 if self._prefix:
1448 assert self._prefix == prefix
1449 prefix = ""
1450 if self._suffix:
1451 assert self._suffix == suffix
1452 suffix = ""
1453 return f"{prefix}[[{name}]]{suffix}"
1456 class CheckValueInfo:
1457 def __init__(
1458 self,
1459 key,
1460 text,
1461 name: str,
1462 prefix: str,
1463 suffix: str,
1465 # Key for the value, e.g. '%'
1466 self.key = key
1468 # Text to be matched by the FileCheck variable (without any prefix or suffix)
1469 self.text = text
1471 # Name of the FileCheck variable
1472 self.name = name
1474 # Prefix and suffix that were captured by the NamelessValue regular expression
1475 self.prefix = prefix
1476 self.suffix = suffix
1479 # Represent a check line in a way that allows us to compare check lines while
1480 # ignoring some or all of the FileCheck variable names.
1481 class CheckLineInfo:
1482 def __init__(self, line, values):
1483 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1484 self.line: str = line
1486 # Information on each FileCheck variable name occurrences in the line
1487 self.values: List[CheckValueInfo] = values
1489 def __repr__(self):
1490 return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1493 def remap_metavar_names(
1494 old_line_infos: List[CheckLineInfo],
1495 new_line_infos: List[CheckLineInfo],
1496 committed_names: Set[str],
1497 ) -> Mapping[str, str]:
1499 Map all FileCheck variable names that appear in new_line_infos to new
1500 FileCheck variable names in an attempt to reduce the diff from old_line_infos
1501 to new_line_infos.
1503 This is done by:
1504 * Matching old check lines and new check lines using a diffing algorithm
1505 applied after replacing names with wildcards.
1506 * Committing to variable names such that the matched lines become equal
1507 (without wildcards) if possible
1508 * This is done recursively to handle cases where many lines are equal
1509 after wildcard replacement
1511 # Initialize uncommitted identity mappings
1512 new_mapping = {}
1513 for line in new_line_infos:
1514 for value in line.values:
1515 new_mapping[value.name] = value.name
1517 # Recursively commit to the identity mapping or find a better one
1518 def recurse(old_begin, old_end, new_begin, new_end):
1519 if old_begin == old_end or new_begin == new_end:
1520 return
1522 # Find a matching of lines where uncommitted names are replaced
1523 # with a placeholder.
1524 def diffify_line(line, mapper):
1525 values = []
1526 for value in line.values:
1527 mapped = mapper(value.name)
1528 values.append(mapped if mapped in committed_names else "?")
1529 return line.line.strip() + " @@@ " + " @ ".join(values)
1531 lhs_lines = [
1532 diffify_line(line, lambda x: x)
1533 for line in old_line_infos[old_begin:old_end]
1535 rhs_lines = [
1536 diffify_line(line, lambda x: new_mapping[x])
1537 for line in new_line_infos[new_begin:new_end]
1540 candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1542 candidate_matches = [
1543 (old_begin + lhs_idx, new_begin + rhs_idx)
1544 for lhs_idx, rhs_idx in candidate_matches
1547 # Candidate matches may conflict if they require conflicting mappings of
1548 # names. We want to determine a large set of compatible candidates,
1549 # because that leads to a small diff.
1551 # We think of the candidates as vertices in a conflict graph. The
1552 # conflict graph has edges between incompatible candidates. We want to
1553 # find a large independent set in this graph.
1555 # Greedily selecting candidates and removing incompatible ones has the
1556 # disadvantage that making few bad decisions early on can have huge
1557 # consequences.
1559 # Instead, we implicitly compute multiple independent sets by greedily
1560 # assigning a *coloring* to the conflict graph. Then, we select the
1561 # largest color class (which is the largest independent set we found),
1562 # commit to all candidates in it, and recurse.
1564 # Note that we don't actually materialize the conflict graph. Instead,
1565 # each color class tracks the information needed to decide implicitly
1566 # whether a vertex conflicts (has an edge to) any of the vertices added
1567 # to the color class so far.
1568 class Color:
1569 def __init__(self):
1570 # (lhs_idx, rhs_idx) of matches in this color
1571 self.matches = []
1573 # rhs_name -> lhs_name mappings required by this color
1574 self.mapping = {}
1576 # lhs_names committed for this color
1577 self.committed = set()
1579 colors = []
1581 for lhs_idx, rhs_idx in candidate_matches:
1582 lhs_line = old_line_infos[lhs_idx]
1583 rhs_line = new_line_infos[rhs_idx]
1585 # We scan through the uncommitted names in the candidate line and
1586 # filter out the color classes to which the candidate could be
1587 # assigned.
1589 # Simultaneously, we prepare a new color class in case the candidate
1590 # conflicts with all colors that have been established so far.
1591 compatible_colors = colors[:]
1592 new_color = Color()
1593 new_color.matches.append((lhs_idx, rhs_idx))
1595 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1596 if new_mapping[rhs_value.name] in committed_names:
1597 # The new value has already been committed. If it was mapped
1598 # to the same name as the original value, we can consider
1599 # committing other values from this line. Otherwise, we
1600 # should ignore this line.
1601 if new_mapping[rhs_value.name] == lhs_value.name:
1602 continue
1603 else:
1604 break
1606 if rhs_value.name in new_color.mapping:
1607 # Same, but for a possible commit happening on the same line
1608 if new_color.mapping[rhs_value.name] == lhs_value.name:
1609 continue
1610 else:
1611 break
1613 if (
1614 lhs_value.name in committed_names
1615 or lhs_value.name in new_color.committed
1617 # We can't map this value because the name we would map it
1618 # to has already been committed for something else. Give up
1619 # on this line.
1620 break
1622 new_color.mapping[rhs_value.name] = lhs_value.name
1623 new_color.committed.add(lhs_value.name)
1625 color_idx = 0
1626 while color_idx < len(compatible_colors):
1627 color = compatible_colors[color_idx]
1628 compatible = True
1629 if rhs_value.name in color.mapping:
1630 compatible = color.mapping[rhs_value.name] == lhs_value.name
1631 else:
1632 compatible = lhs_value.name not in color.committed
1633 if compatible:
1634 color_idx += 1
1635 else:
1636 del compatible_colors[color_idx]
1637 else:
1638 # We never broke out of the loop, which means that at a minimum,
1639 # this line is viable standalone
1640 if compatible_colors:
1641 color = max(compatible_colors, key=lambda color: len(color.matches))
1642 color.mapping.update(new_color.mapping)
1643 color.committed.update(new_color.committed)
1644 color.matches.append((lhs_idx, rhs_idx))
1645 else:
1646 colors.append(new_color)
1648 if colors:
1649 # Pick the largest color class. This gives us a large independent
1650 # (non-conflicting) set of candidate matches. Assign all names
1651 # required by the independent set and recurse.
1652 max_color = max(colors, key=lambda color: len(color.matches))
1654 for rhs_var, lhs_var in max_color.mapping.items():
1655 new_mapping[rhs_var] = lhs_var
1656 committed_names.add(lhs_var)
1658 if (
1659 lhs_var != rhs_var
1660 and lhs_var in new_mapping
1661 and new_mapping[lhs_var] == lhs_var
1663 new_mapping[lhs_var] = "conflict_" + lhs_var
1665 matches = (
1666 [(old_begin - 1, new_begin - 1)]
1667 + max_color.matches
1668 + [(old_end, new_end)]
1671 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1672 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1674 recurse(0, len(old_line_infos), 0, len(new_line_infos))
1676 # Commit to remaining names and resolve conflicts
1677 for new_name, mapped_name in new_mapping.items():
1678 if mapped_name in committed_names:
1679 continue
1680 if not mapped_name.startswith("conflict_"):
1681 assert mapped_name == new_name
1682 committed_names.add(mapped_name)
1684 for new_name, mapped_name in new_mapping.items():
1685 if mapped_name in committed_names:
1686 continue
1687 assert mapped_name.startswith("conflict_")
1689 m = NUMERIC_SUFFIX_RE.search(new_name)
1690 base_name = new_name[: m.start()]
1691 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1692 while True:
1693 candidate = f"{base_name}{suffix}"
1694 if candidate not in committed_names:
1695 new_mapping[new_name] = candidate
1696 committed_names.add(candidate)
1697 break
1698 suffix += 1
1700 return new_mapping
1703 def generalize_check_lines(
1704 lines,
1705 ginfo: GeneralizerInfo,
1706 vars_seen,
1707 global_vars_seen,
1708 preserve_names=False,
1709 original_check_lines=None,
1711 unstable_globals_only=False,
1713 if unstable_globals_only:
1714 regexp = ginfo.get_unstable_globals_regexp()
1715 else:
1716 regexp = ginfo.get_regexp()
1718 multiple_braces_re = re.compile(r"({{+)|(}}+)")
1720 def escape_braces(match_obj):
1721 return "{{" + re.escape(match_obj.group(0)) + "}}"
1723 if ginfo.is_ir():
1724 for i, line in enumerate(lines):
1725 # An IR variable named '%.' matches the FileCheck regex string.
1726 line = line.replace("%.", "%dot")
1727 for regex in _global_hex_value_regex:
1728 if re.match("^@" + regex + " = ", line):
1729 line = re.sub(
1730 r"\bi([0-9]+) ([0-9]+)",
1731 lambda m: "i"
1732 + m.group(1)
1733 + " [[#"
1734 + hex(int(m.group(2)))
1735 + "]]",
1736 line,
1738 break
1739 # Ignore any comments, since the check lines will too.
1740 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1741 lines[i] = scrubbed_line
1743 if not preserve_names:
1744 committed_names = set(
1745 test_var.get_variable_name(name)
1746 for (name, _), test_var in vars_seen.items()
1748 defs = set()
1750 # Collect information about new check lines, and generalize global reference
1751 new_line_infos = []
1752 for line in lines:
1753 filtered_line = ""
1754 values = []
1755 while True:
1756 m = regexp.search(line)
1757 if m is None:
1758 filtered_line += line
1759 break
1761 name = ginfo.get_name_from_match(m)
1762 nameless_value = ginfo.get_nameless_value_from_match(m)
1763 prefix, suffix = nameless_value.get_affixes_from_match(m)
1764 if may_clash_with_default_check_prefix_name(
1765 nameless_value.check_prefix, name
1767 warn(
1768 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1769 " with scripted FileCheck name." % (name,)
1772 # Record the variable as seen and (for locals) accumulate
1773 # prefixes/suffixes
1774 is_local_def = nameless_value.is_local_def_ir_value()
1775 if is_local_def:
1776 vars_dict = vars_seen
1777 else:
1778 vars_dict = global_vars_seen
1780 key = (name, nameless_value.check_key)
1782 if is_local_def:
1783 test_prefix = prefix
1784 test_suffix = suffix
1785 else:
1786 test_prefix = ""
1787 test_suffix = ""
1789 if key in vars_dict:
1790 vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1791 else:
1792 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1793 defs.add(key)
1795 var = vars_dict[key].get_variable_name(name)
1797 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1798 filtered_line += (
1799 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1801 line = line[m.end() :]
1803 values.append(
1804 CheckValueInfo(
1805 key=nameless_value.check_key,
1806 text=name,
1807 name=var,
1808 prefix=prefix,
1809 suffix=suffix,
1813 new_line_infos.append(CheckLineInfo(filtered_line, values))
1815 committed_names.update(
1816 test_var.get_variable_name(name)
1817 for (name, _), test_var in global_vars_seen.items()
1820 # Collect information about original check lines, if any.
1821 orig_line_infos = []
1822 for line in original_check_lines or []:
1823 filtered_line = ""
1824 values = []
1825 while True:
1826 m = METAVAR_RE.search(line)
1827 if m is None:
1828 filtered_line += line
1829 break
1831 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1832 filtered_line += line[: m.start()] + VARIABLE_TAG
1833 line = line[m.end() :]
1834 values.append(
1835 CheckValueInfo(
1836 key=None,
1837 text=None,
1838 name=m.group(1),
1839 prefix="",
1840 suffix="",
1843 orig_line_infos.append(CheckLineInfo(filtered_line, values))
1845 # Compute the variable name mapping
1846 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1848 # Apply the variable name mapping
1849 for i, line_info in enumerate(new_line_infos):
1850 line_template = line_info.line
1851 line = ""
1853 for value in line_info.values:
1854 idx = line_template.find(VARIABLE_TAG)
1855 line += line_template[:idx]
1856 line_template = line_template[idx + len(VARIABLE_TAG) :]
1858 key = (value.text, value.key)
1859 if value.key == "%":
1860 vars_dict = vars_seen
1861 else:
1862 vars_dict = global_vars_seen
1864 if key in defs:
1865 line += vars_dict[key].get_def(
1866 mapping[value.name], value.prefix, value.suffix
1868 defs.remove(key)
1869 else:
1870 line += vars_dict[key].get_use(
1871 mapping[value.name], value.prefix, value.suffix
1874 line += line_template
1876 lines[i] = line
1878 if ginfo.is_analyze():
1879 for i, _ in enumerate(lines):
1880 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1881 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1882 lines[i] = scrubbed_line
1884 return lines
1887 def add_checks(
1888 output_lines,
1889 comment_marker,
1890 prefix_list,
1891 func_dict,
1892 func_name,
1893 check_label_format,
1894 ginfo,
1895 global_vars_seen_dict,
1896 is_filtered,
1897 preserve_names=False,
1898 original_check_lines: Mapping[str, List[str]] = {},
1900 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1901 prefix_exclusions = set()
1902 printed_prefixes = []
1903 for p in prefix_list:
1904 checkprefixes = p[0]
1905 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1906 # exist for this run line. A subset of the check prefixes might know about the function but only because
1907 # other run lines created it.
1908 if any(
1909 map(
1910 lambda checkprefix: func_name not in func_dict[checkprefix],
1911 checkprefixes,
1914 prefix_exclusions |= set(checkprefixes)
1915 continue
1917 # prefix_exclusions is constructed, we can now emit the output
1918 for p in prefix_list:
1919 global_vars_seen = {}
1920 checkprefixes = p[0]
1921 for checkprefix in checkprefixes:
1922 if checkprefix in global_vars_seen_dict:
1923 global_vars_seen.update(global_vars_seen_dict[checkprefix])
1924 else:
1925 global_vars_seen_dict[checkprefix] = {}
1926 if checkprefix in printed_prefixes:
1927 break
1929 # Check if the prefix is excluded.
1930 if checkprefix in prefix_exclusions:
1931 continue
1933 # If we do not have output for this prefix we skip it.
1934 if not func_dict[checkprefix][func_name]:
1935 continue
1937 # Add some space between different check prefixes, but not after the last
1938 # check line (before the test code).
1939 if ginfo.is_asm():
1940 if len(printed_prefixes) != 0:
1941 output_lines.append(comment_marker)
1943 if checkprefix not in global_vars_seen_dict:
1944 global_vars_seen_dict[checkprefix] = {}
1946 global_vars_seen_before = [key for key in global_vars_seen.keys()]
1948 vars_seen = {}
1949 printed_prefixes.append(checkprefix)
1950 attrs = str(func_dict[checkprefix][func_name].attrs)
1951 attrs = "" if attrs == "None" else attrs
1952 if ginfo.get_version() > 1:
1953 funcdef_attrs_and_ret = func_dict[checkprefix][
1954 func_name
1955 ].funcdef_attrs_and_ret
1956 else:
1957 funcdef_attrs_and_ret = ""
1959 if attrs:
1960 output_lines.append(
1961 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1963 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1964 if args_and_sig:
1965 args_and_sig = generalize_check_lines(
1966 [args_and_sig],
1967 ginfo,
1968 vars_seen,
1969 global_vars_seen,
1970 preserve_names,
1971 original_check_lines=[],
1972 )[0]
1973 func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1974 if "[[" in args_and_sig:
1975 # Captures in label lines are not supported, thus split into a -LABEL
1976 # and a separate -SAME line that contains the arguments with captures.
1977 args_and_sig_prefix = ""
1978 if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1979 # Ensure the "(" separating function name and arguments is in the
1980 # label line. This is required in case of function names that are
1981 # prefixes of each other. Otherwise, the label line for "foo" might
1982 # incorrectly match on "foo.specialized".
1983 args_and_sig_prefix = args_and_sig[0]
1984 args_and_sig = args_and_sig[1:]
1986 # Removing args_and_sig from the label match line requires
1987 # func_name_separator to be empty. Otherwise, the match will not work.
1988 assert func_name_separator == ""
1989 output_lines.append(
1990 check_label_format
1992 checkprefix,
1993 funcdef_attrs_and_ret,
1994 func_name,
1995 args_and_sig_prefix,
1996 func_name_separator,
1999 output_lines.append(
2000 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
2002 else:
2003 output_lines.append(
2004 check_label_format
2006 checkprefix,
2007 funcdef_attrs_and_ret,
2008 func_name,
2009 args_and_sig,
2010 func_name_separator,
2013 func_body = str(func_dict[checkprefix][func_name]).splitlines()
2014 if not func_body:
2015 # We have filtered everything.
2016 continue
2018 # For ASM output, just emit the check lines.
2019 if ginfo.is_asm():
2020 body_start = 1
2021 if is_filtered:
2022 # For filtered output we don't add "-NEXT" so don't add extra spaces
2023 # before the first line.
2024 body_start = 0
2025 else:
2026 output_lines.append(
2027 "%s %s: %s" % (comment_marker, checkprefix, func_body[0])
2029 func_lines = generalize_check_lines(
2030 func_body[body_start:], ginfo, vars_seen, global_vars_seen
2032 for func_line in func_lines:
2033 if func_line.strip() == "":
2034 output_lines.append(
2035 "%s %s-EMPTY:" % (comment_marker, checkprefix)
2037 else:
2038 check_suffix = "-NEXT" if not is_filtered else ""
2039 output_lines.append(
2040 "%s %s%s: %s"
2041 % (comment_marker, checkprefix, check_suffix, func_line)
2043 # Remember new global variables we have not seen before
2044 for key in global_vars_seen:
2045 if key not in global_vars_seen_before:
2046 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2047 break
2048 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2049 elif ginfo.is_analyze():
2050 func_body = generalize_check_lines(
2051 func_body, ginfo, vars_seen, global_vars_seen
2053 for func_line in func_body:
2054 if func_line.strip() == "":
2055 output_lines.append(
2056 "{} {}-EMPTY:".format(comment_marker, checkprefix)
2058 else:
2059 check_suffix = "-NEXT" if not is_filtered else ""
2060 output_lines.append(
2061 "{} {}{}: {}".format(
2062 comment_marker, checkprefix, check_suffix, func_line
2066 # Add space between different check prefixes and also before the first
2067 # line of code in the test function.
2068 output_lines.append(comment_marker)
2070 # Remember new global variables we have not seen before
2071 for key in global_vars_seen:
2072 if key not in global_vars_seen_before:
2073 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2074 break
2075 # For IR output, change all defs to FileCheck variables, so we're immune
2076 # to variable naming fashions.
2077 else:
2078 func_body = generalize_check_lines(
2079 func_body,
2080 ginfo,
2081 vars_seen,
2082 global_vars_seen,
2083 preserve_names,
2084 original_check_lines=original_check_lines.get(checkprefix),
2087 # This could be selectively enabled with an optional invocation argument.
2088 # Disabled for now: better to check everything. Be safe rather than sorry.
2090 # Handle the first line of the function body as a special case because
2091 # it's often just noise (a useless asm comment or entry label).
2092 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2093 # is_blank_line = True
2094 # else:
2095 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0]))
2096 # is_blank_line = False
2098 is_blank_line = False
2100 for func_line in func_body:
2101 if func_line.strip() == "":
2102 is_blank_line = True
2103 continue
2104 # Do not waste time checking IR comments.
2105 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2107 # Skip blank lines instead of checking them.
2108 if is_blank_line:
2109 output_lines.append(
2110 "{} {}: {}".format(
2111 comment_marker, checkprefix, func_line
2114 else:
2115 check_suffix = "-NEXT" if not is_filtered else ""
2116 output_lines.append(
2117 "{} {}{}: {}".format(
2118 comment_marker, checkprefix, check_suffix, func_line
2121 is_blank_line = False
2123 # Add space between different check prefixes and also before the first
2124 # line of code in the test function.
2125 output_lines.append(comment_marker)
2127 # Remember new global variables we have not seen before
2128 for key in global_vars_seen:
2129 if key not in global_vars_seen_before:
2130 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2131 break
2132 return printed_prefixes
2135 def add_ir_checks(
2136 output_lines,
2137 comment_marker,
2138 prefix_list,
2139 func_dict,
2140 func_name,
2141 preserve_names,
2142 function_sig,
2143 ginfo: GeneralizerInfo,
2144 global_vars_seen_dict,
2145 is_filtered,
2146 original_check_lines={},
2148 assert ginfo.is_ir()
2149 # Label format is based on IR string.
2150 if function_sig and ginfo.get_version() > 1:
2151 function_def_regex = "define %s"
2152 elif function_sig:
2153 function_def_regex = "define {{[^@]+}}%s"
2154 else:
2155 function_def_regex = "%s"
2156 check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2157 comment_marker, function_def_regex
2159 return add_checks(
2160 output_lines,
2161 comment_marker,
2162 prefix_list,
2163 func_dict,
2164 func_name,
2165 check_label_format,
2166 ginfo,
2167 global_vars_seen_dict,
2168 is_filtered,
2169 preserve_names,
2170 original_check_lines=original_check_lines,
2174 def add_analyze_checks(
2175 output_lines,
2176 comment_marker,
2177 prefix_list,
2178 func_dict,
2179 func_name,
2180 ginfo: GeneralizerInfo,
2181 is_filtered,
2183 assert ginfo.is_analyze()
2184 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2185 global_vars_seen_dict = {}
2186 return add_checks(
2187 output_lines,
2188 comment_marker,
2189 prefix_list,
2190 func_dict,
2191 func_name,
2192 check_label_format,
2193 ginfo,
2194 global_vars_seen_dict,
2195 is_filtered,
2199 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2200 for nameless_value in ginfo.get_nameless_values():
2201 if nameless_value.global_ir_rhs_regexp is None:
2202 continue
2204 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2205 rhs_re_str = nameless_value.global_ir_rhs_regexp
2207 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2208 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2209 lines = []
2210 for m in global_ir_value_re.finditer(raw_tool_output):
2211 # Attach the substring's start index so that CHECK lines
2212 # can be sorted properly even if they are matched by different nameless values.
2213 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2214 lines.append((m.start(), m.group(0)))
2216 for prefix in prefixes:
2217 if glob_val_dict[prefix] is None:
2218 continue
2219 if nameless_value.check_prefix in glob_val_dict[prefix]:
2220 if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2221 continue
2222 if prefix == prefixes[-1]:
2223 warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2224 else:
2225 glob_val_dict[prefix][nameless_value.check_prefix] = None
2226 continue
2227 glob_val_dict[prefix][nameless_value.check_prefix] = lines
2230 def filter_globals_according_to_preference(
2231 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2233 if global_check_setting == "none":
2234 return []
2235 if global_check_setting == "all":
2236 return global_val_lines_w_index
2237 assert global_check_setting == "smart"
2239 if nameless_value.check_key == "#":
2240 # attribute sets are usually better checked by --check-attributes
2241 return []
2243 def extract(line, nv):
2244 p = (
2246 + nv.ir_prefix
2247 + "("
2248 + nv.ir_regexp
2249 + ") = ("
2250 + nv.global_ir_rhs_regexp
2251 + ")"
2253 match = re.match(p, line)
2254 return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2256 transitively_visible = set()
2257 contains_refs_to = {}
2259 def add(var):
2260 nonlocal transitively_visible
2261 nonlocal contains_refs_to
2262 if var in transitively_visible:
2263 return
2264 transitively_visible.add(var)
2265 if not var in contains_refs_to:
2266 return
2267 for x in contains_refs_to[var]:
2268 add(x)
2270 for i, line in global_val_lines_w_index:
2271 (var, refs) = extract(line, nameless_value)
2272 contains_refs_to[var] = refs
2273 for var, check_key in global_vars_seen:
2274 if check_key != nameless_value.check_key:
2275 continue
2276 add(var)
2277 return [
2278 (i, line)
2279 for i, line in global_val_lines_w_index
2280 if extract(line, nameless_value)[0] in transitively_visible
2284 METADATA_FILTERS = [
2286 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2287 r"{{.*}}\2{{.*}}",
2288 ), # preface with glob also, to capture optional CLANG_VENDOR
2289 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2291 METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2294 def filter_unstable_metadata(line):
2295 for f, replacement in METADATA_FILTERS_RE:
2296 line = f.sub(replacement, line)
2297 return line
2300 def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2301 if not new_lines_w_index:
2302 return
2303 output_lines.append(comment_marker + SEPARATOR)
2304 new_lines_w_index.sort()
2305 for _, line in new_lines_w_index:
2306 output_lines.append(line)
2307 new_lines_w_index.clear()
2310 def add_global_checks(
2311 glob_val_dict,
2312 comment_marker,
2313 prefix_list,
2314 output_lines,
2315 ginfo: GeneralizerInfo,
2316 global_vars_seen_dict,
2317 preserve_names,
2318 is_before_functions,
2319 global_check_setting,
2321 printed_prefixes = set()
2322 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly
2323 for nameless_value in ginfo.get_nameless_values():
2324 if nameless_value.global_ir_rhs_regexp is None:
2325 continue
2326 if nameless_value.is_before_functions != is_before_functions:
2327 continue
2328 for p in prefix_list:
2329 global_vars_seen = {}
2330 checkprefixes = p[0]
2331 if checkprefixes is None:
2332 continue
2333 for checkprefix in checkprefixes:
2334 if checkprefix in global_vars_seen_dict:
2335 global_vars_seen.update(global_vars_seen_dict[checkprefix])
2336 else:
2337 global_vars_seen_dict[checkprefix] = {}
2338 if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2339 break
2340 if not glob_val_dict[checkprefix]:
2341 continue
2342 if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2343 continue
2344 if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2345 continue
2347 check_lines = []
2348 global_vars_seen_before = [key for key in global_vars_seen.keys()]
2349 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2350 lines_w_index = filter_globals_according_to_preference(
2351 lines_w_index,
2352 global_vars_seen_before,
2353 nameless_value,
2354 global_check_setting,
2356 for i, line in lines_w_index:
2357 if _global_value_regex:
2358 matched = False
2359 for regex in _global_value_regex:
2360 if re.match("^@" + regex + " = ", line) or re.match(
2361 "^!" + regex + " = ", line
2363 matched = True
2364 break
2365 if not matched:
2366 continue
2367 [new_line] = generalize_check_lines(
2368 [line],
2369 ginfo,
2371 global_vars_seen,
2372 preserve_names,
2373 unstable_globals_only=True,
2375 new_line = filter_unstable_metadata(new_line)
2376 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2377 check_lines.append((i, check_line))
2378 if not check_lines:
2379 continue
2381 if not checkprefix in output_lines_loc:
2382 output_lines_loc[checkprefix] = []
2383 if not nameless_value.interlaced_with_previous:
2384 flush_current_checks(
2385 output_lines, output_lines_loc[checkprefix], comment_marker
2387 for check_line in check_lines:
2388 output_lines_loc[checkprefix].append(check_line)
2390 printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2392 # Remembe new global variables we have not seen before
2393 for key in global_vars_seen:
2394 if key not in global_vars_seen_before:
2395 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2396 break
2398 if printed_prefixes:
2399 for p in prefix_list:
2400 if p[0] is None:
2401 continue
2402 for checkprefix in p[0]:
2403 if checkprefix not in output_lines_loc:
2404 continue
2405 flush_current_checks(
2406 output_lines, output_lines_loc[checkprefix], comment_marker
2408 break
2409 output_lines.append(comment_marker + SEPARATOR)
2410 return printed_prefixes
2413 def check_prefix(prefix):
2414 if not PREFIX_RE.match(prefix):
2415 hint = ""
2416 if "," in prefix:
2417 hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2418 warn(
2420 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2421 + hint
2423 % (prefix)
2427 def get_check_prefixes(filecheck_cmd):
2428 check_prefixes = [
2429 item
2430 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2431 for item in m.group(1).split(",")
2433 if not check_prefixes:
2434 check_prefixes = ["CHECK"]
2435 return check_prefixes
2438 def verify_filecheck_prefixes(fc_cmd):
2439 fc_cmd_parts = fc_cmd.split()
2440 for part in fc_cmd_parts:
2441 if "check-prefix=" in part:
2442 prefix = part.split("=", 1)[1]
2443 check_prefix(prefix)
2444 elif "check-prefixes=" in part:
2445 prefixes = part.split("=", 1)[1].split(",")
2446 for prefix in prefixes:
2447 check_prefix(prefix)
2448 if prefixes.count(prefix) > 1:
2449 warn(
2450 "Supplied prefix '%s' is not unique in the prefix list."
2451 % (prefix,)
2455 def get_autogennote_suffix(parser, args):
2456 autogenerated_note_args = ""
2457 for action in parser._actions:
2458 if not hasattr(args, action.dest):
2459 continue # Ignore options such as --help that aren't included in args
2460 # Ignore parameters such as paths to the binary or the list of tests
2461 if action.dest in (
2462 "tests",
2463 "update_only",
2464 "tool_binary",
2465 "opt_binary",
2466 "llc_binary",
2467 "clang",
2468 "opt",
2469 "llvm_bin",
2470 "verbose",
2471 "force_update",
2472 "reset_variable_names",
2473 "llvm_mc_binary",
2475 continue
2476 value = getattr(args, action.dest)
2477 if action.dest == "check_globals":
2478 default_value = "none" if args.version < 4 else "smart"
2479 if value == default_value:
2480 continue
2481 autogenerated_note_args += action.option_strings[0] + " "
2482 if args.version < 4 and value == "all":
2483 continue
2484 autogenerated_note_args += "%s " % value
2485 continue
2486 if action.const is not None: # action stores a constant (usually True/False)
2487 # Skip actions with different constant values (this happens with boolean
2488 # --foo/--no-foo options)
2489 if value != action.const:
2490 continue
2491 if parser.get_default(action.dest) == value:
2492 continue # Don't add default values
2493 if action.dest == "function_signature" and args.version >= 2:
2494 continue # Enabled by default in version 2
2495 if action.dest == "filters":
2496 # Create a separate option for each filter element. The value is a list
2497 # of Filter objects.
2498 for elem in value:
2499 opt_name = "filter-out" if elem.is_filter_out else "filter"
2500 opt_value = elem.pattern()
2501 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2502 if new_arg not in autogenerated_note_args:
2503 autogenerated_note_args += new_arg
2504 else:
2505 autogenerated_note_args += action.option_strings[0] + " "
2506 if action.const is None: # action takes a parameter
2507 if action.nargs == "+":
2508 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2509 autogenerated_note_args += "%s " % value
2510 if autogenerated_note_args:
2511 autogenerated_note_args = " %s %s" % (
2512 UTC_ARGS_KEY,
2513 autogenerated_note_args[:-1],
2515 return autogenerated_note_args
2518 def check_for_command(line, parser, args, argv, argparse_callback):
2519 cmd_m = UTC_ARGS_CMD.match(line)
2520 if cmd_m:
2521 for option in shlex.split(cmd_m.group("cmd").strip()):
2522 if option:
2523 argv.append(option)
2524 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2525 if argparse_callback is not None:
2526 argparse_callback(args)
2527 return args, argv
2530 def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2531 result = get_arg_to_check(test_info.args)
2532 if not result and is_global:
2533 # See if this has been specified via UTC_ARGS. This is a "global" option
2534 # that affects the entire generation of test checks. If it exists anywhere
2535 # in the test, apply it to everything.
2536 saw_line = False
2537 for line_info in test_info.ro_iterlines():
2538 line = line_info.line
2539 if not line.startswith(";") and line.strip() != "":
2540 saw_line = True
2541 result = get_arg_to_check(line_info.args)
2542 if result:
2543 if warn and saw_line:
2544 # We saw the option after already reading some test input lines.
2545 # Warn about it.
2546 print(
2547 "WARNING: Found {} in line following test start: ".format(
2548 arg_string
2550 + line,
2551 file=sys.stderr,
2553 print(
2554 "WARNING: Consider moving {} to top of file".format(arg_string),
2555 file=sys.stderr,
2557 break
2558 return result
2561 def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2562 for input_line_info in test_info.iterlines(output_lines):
2563 line = input_line_info.line
2564 args = input_line_info.args
2565 if line.strip() == comment_string:
2566 continue
2567 if line.strip() == comment_string + SEPARATOR:
2568 continue
2569 if line.lstrip().startswith(comment_string):
2570 m = CHECK_RE.match(line)
2571 if m and m.group(1) in prefix_set:
2572 continue
2573 output_lines.append(line.rstrip("\n"))
2576 def add_checks_at_end(
2577 output_lines, prefix_list, func_order, comment_string, check_generator
2579 added = set()
2580 generated_prefixes = set()
2581 for prefix in prefix_list:
2582 prefixes = prefix[0]
2583 tool_args = prefix[1]
2584 for prefix in prefixes:
2585 for func in func_order[prefix]:
2586 # The func order can contain the same functions multiple times.
2587 # If we see one again we are done.
2588 if (func, prefix) in added:
2589 continue
2590 if added:
2591 output_lines.append(comment_string)
2593 # The add_*_checks routines expect a run list whose items are
2594 # tuples that have a list of prefixes as their first element and
2595 # tool command args string as their second element. They output
2596 # checks for each prefix in the list of prefixes. By doing so, it
2597 # implicitly assumes that for each function every run line will
2598 # generate something for that function. That is not the case for
2599 # generated functions as some run lines might not generate them
2600 # (e.g. -fopenmp vs. no -fopenmp).
2602 # Therefore, pass just the prefix we're interested in. This has
2603 # the effect of generating all of the checks for functions of a
2604 # single prefix before moving on to the next prefix. So checks
2605 # are ordered by prefix instead of by function as in "normal"
2606 # mode.
2607 for generated_prefix in check_generator(
2608 output_lines, [([prefix], tool_args)], func
2610 added.add((func, generated_prefix))
2611 generated_prefixes.add(generated_prefix)
2612 return generated_prefixes