Clang] Fix expansion of response files in -Wp after integrated-cc1 change
[llvm-project.git] / llvm / utils / UpdateTestChecks / common.py
blobae4621c76978e02f4a81f70befb82db64b5d3692
1 from __future__ import print_function
2 import re
3 import string
4 import subprocess
5 import sys
6 import copy
8 if sys.version_info[0] > 2:
9 class string:
10 expandtabs = str.expandtabs
11 else:
12 import string
14 ##### Common utilities for update_*test_checks.py
17 _verbose = False
19 def parse_commandline_args(parser):
20 parser.add_argument('-v', '--verbose', action='store_true',
21 help='Show verbose output')
22 parser.add_argument('-u', '--update-only', action='store_true',
23 help='Only update test if it was already autogened')
24 args = parser.parse_args()
25 global _verbose
26 _verbose = args.verbose
27 return args
29 def should_add_line_to_output(input_line, prefix_set):
30 # Skip any blank comment lines in the IR.
31 if input_line.strip() == ';':
32 return False
33 # Skip any blank lines in the IR.
34 #if input_line.strip() == '':
35 # return False
36 # And skip any CHECK lines. We're building our own.
37 m = CHECK_RE.match(input_line)
38 if m and m.group(1) in prefix_set:
39 return False
41 return True
43 # Invoke the tool that is being tested.
44 def invoke_tool(exe, cmd_args, ir):
45 with open(ir) as ir_file:
46 # TODO Remove the str form which is used by update_test_checks.py and
47 # update_llc_test_checks.py
48 # The safer list form is used by update_cc_test_checks.py
49 if isinstance(cmd_args, list):
50 stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
51 else:
52 stdout = subprocess.check_output(exe + ' ' + cmd_args,
53 shell=True, stdin=ir_file)
54 if sys.version_info[0] > 2:
55 stdout = stdout.decode()
56 # Fix line endings to unix CR style.
57 return stdout.replace('\r\n', '\n')
59 ##### LLVM IR parser
61 RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
62 CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
63 PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
64 CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
66 OPT_FUNCTION_RE = re.compile(
67 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*'
68 r'(?P<args_and_sig>\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
69 flags=(re.M | re.S))
71 ANALYZE_FUNCTION_RE = re.compile(
72 r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
73 r'\s*\n(?P<body>.*)$',
74 flags=(re.X | re.S))
76 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
77 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
78 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
79 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
81 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
82 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
83 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
84 SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
85 SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
86 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
87 SCRUB_LOOP_COMMENT_RE = re.compile(
88 r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
91 def error(msg, test_file=None):
92 if test_file:
93 msg = '{}: {}'.format(msg, test_file)
94 print('ERROR: {}'.format(msg), file=sys.stderr)
96 def warn(msg, test_file=None):
97 if test_file:
98 msg = '{}: {}'.format(msg, test_file)
99 print('WARNING: {}'.format(msg), file=sys.stderr)
101 def debug(*args, **kwargs):
102 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
103 if 'file' not in kwargs:
104 kwargs['file'] = sys.stderr
105 if _verbose:
106 print(*args, **kwargs)
108 def find_run_lines(test, lines):
109 debug('Scanning for RUN lines in test file:', test)
110 raw_lines = [m.group(1)
111 for m in [RUN_LINE_RE.match(l) for l in lines] if m]
112 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
113 for l in raw_lines[1:]:
114 if run_lines[-1].endswith('\\'):
115 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
116 else:
117 run_lines.append(l)
118 debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
119 for l in run_lines:
120 debug(' RUN: {}'.format(l))
121 return run_lines
123 def scrub_body(body):
124 # Scrub runs of whitespace out of the assembly, but leave the leading
125 # whitespace in place.
126 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
127 # Expand the tabs used for indentation.
128 body = string.expandtabs(body, 2)
129 # Strip trailing whitespace.
130 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
131 return body
133 def do_scrub(body, scrubber, scrubber_args, extra):
134 if scrubber_args:
135 local_args = copy.deepcopy(scrubber_args)
136 local_args[0].extra_scrub = extra
137 return scrubber(body, *local_args)
138 return scrubber(body, *scrubber_args)
140 # Build up a dictionary of all the function bodies.
141 class function_body(object):
142 def __init__(self, string, extra, args_and_sig):
143 self.scrub = string
144 self.extrascrub = extra
145 self.args_and_sig = args_and_sig
146 def is_same_except_arg_names(self, extrascrub, args_and_sig):
147 arg_names = set()
148 def drop_arg_names(match):
149 arg_names.add(match.group(2))
150 return match.group(1) + match.group(3)
151 def repl_arg_names(match):
152 if match.group(2) in arg_names:
153 return match.group(1) + match.group(3)
154 return match.group(1) + match.group(2) + match.group(3)
155 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
156 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
157 if ans0 != ans1:
158 return False
159 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
160 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
161 es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
162 es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
163 return es0 == es1
165 def __str__(self):
166 return self.scrub
168 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
169 for m in function_re.finditer(raw_tool_output):
170 if not m:
171 continue
172 func = m.group('func')
173 body = m.group('body')
174 # Determine if we print arguments, the opening brace, or nothing after the function name
175 if record_args and 'args_and_sig' in m.groupdict():
176 args_and_sig = scrub_body(m.group('args_and_sig').strip())
177 elif 'args_and_sig' in m.groupdict():
178 args_and_sig = '('
179 else:
180 args_and_sig = ''
181 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
182 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
183 if 'analysis' in m.groupdict():
184 analysis = m.group('analysis')
185 if analysis.lower() != 'cost model analysis':
186 warn('Unsupported analysis mode: %r!' % (analysis,))
187 if func.startswith('stress'):
188 # We only use the last line of the function body for stress tests.
189 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
190 if verbose:
191 print('Processing function: ' + func, file=sys.stderr)
192 for l in scrubbed_body.splitlines():
193 print(' ' + l, file=sys.stderr)
194 for prefix in prefixes:
195 if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
196 if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
197 func_dict[prefix][func].scrub = scrubbed_extra
198 func_dict[prefix][func].args_and_sig = args_and_sig
199 continue
200 else:
201 if prefix == prefixes[-1]:
202 warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
203 else:
204 func_dict[prefix][func] = None
205 continue
207 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
209 ##### Generator of LLVM IR CHECK lines
211 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
213 # Match things that look at identifiers, but only if they are followed by
214 # spaces, commas, paren, or end of the string
215 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
217 # Create a FileCheck variable name based on an IR name.
218 def get_value_name(var):
219 if var.isdigit():
220 var = 'TMP' + var
221 var = var.replace('.', '_')
222 var = var.replace('-', '_')
223 return var.upper()
226 # Create a FileCheck variable from regex.
227 def get_value_definition(var):
228 return '[[' + get_value_name(var) + ':%.*]]'
231 # Use a FileCheck variable.
232 def get_value_use(var):
233 return '[[' + get_value_name(var) + ']]'
235 # Replace IR value defs and uses with FileCheck variables.
236 def genericize_check_lines(lines, is_analyze, vars_seen):
237 # This gets called for each match that occurs in
238 # a line. We transform variables we haven't seen
239 # into defs, and variables we have seen into uses.
240 def transform_line_vars(match):
241 var = match.group(2)
242 if var in vars_seen:
243 rv = get_value_use(var)
244 else:
245 vars_seen.add(var)
246 rv = get_value_definition(var)
247 # re.sub replaces the entire regex match
248 # with whatever you return, so we have
249 # to make sure to hand it back everything
250 # including the commas and spaces.
251 return match.group(1) + rv + match.group(3)
253 lines_with_def = []
255 for i, line in enumerate(lines):
256 # An IR variable named '%.' matches the FileCheck regex string.
257 line = line.replace('%.', '%dot')
258 # Ignore any comments, since the check lines will too.
259 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
260 if is_analyze:
261 lines[i] = scrubbed_line
262 else:
263 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
264 return lines
267 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
268 # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
269 prefix_blacklist = set()
270 printed_prefixes = []
271 for p in prefix_list:
272 checkprefixes = p[0]
273 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
274 # exist for this run line. A subset of the check prefixes might know about the function but only because
275 # other run lines created it.
276 if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
277 prefix_blacklist |= set(checkprefixes)
278 continue
280 # prefix_blacklist is constructed, we can now emit the output
281 for p in prefix_list:
282 checkprefixes = p[0]
283 saved_output = None
284 for checkprefix in checkprefixes:
285 if checkprefix in printed_prefixes:
286 break
288 # prefix is blacklisted. We remember the output as we might need it later but we will not emit anything for the prefix.
289 if checkprefix in prefix_blacklist:
290 if not saved_output and func_name in func_dict[checkprefix]:
291 saved_output = func_dict[checkprefix][func_name]
292 continue
294 # If we do not have output for this prefix but there is one saved, we go ahead with this prefix and the saved output.
295 if not func_dict[checkprefix][func_name]:
296 if not saved_output:
297 continue
298 func_dict[checkprefix][func_name] = saved_output
300 # Add some space between different check prefixes, but not after the last
301 # check line (before the test code).
302 if is_asm:
303 if len(printed_prefixes) != 0:
304 output_lines.append(comment_marker)
306 vars_seen = set()
307 printed_prefixes.append(checkprefix)
308 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
309 args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
310 if '[[' in args_and_sig:
311 output_lines.append(check_label_format % (checkprefix, func_name, ''))
312 output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
313 else:
314 output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
315 func_body = str(func_dict[checkprefix][func_name]).splitlines()
317 # For ASM output, just emit the check lines.
318 if is_asm:
319 output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0]))
320 for func_line in func_body[1:]:
321 if func_line.strip() == '':
322 output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
323 else:
324 output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line))
325 break
327 # For IR output, change all defs to FileCheck variables, so we're immune
328 # to variable naming fashions.
329 func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
331 # This could be selectively enabled with an optional invocation argument.
332 # Disabled for now: better to check everything. Be safe rather than sorry.
334 # Handle the first line of the function body as a special case because
335 # it's often just noise (a useless asm comment or entry label).
336 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
337 # is_blank_line = True
338 #else:
339 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0]))
340 # is_blank_line = False
342 is_blank_line = False
344 for func_line in func_body:
345 if func_line.strip() == '':
346 is_blank_line = True
347 continue
348 # Do not waste time checking IR comments.
349 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
351 # Skip blank lines instead of checking them.
352 if is_blank_line:
353 output_lines.append('{} {}: {}'.format(
354 comment_marker, checkprefix, func_line))
355 else:
356 output_lines.append('{} {}-NEXT: {}'.format(
357 comment_marker, checkprefix, func_line))
358 is_blank_line = False
360 # Add space between different check prefixes and also before the first
361 # line of code in the test function.
362 output_lines.append(comment_marker)
363 break
365 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
366 func_name, preserve_names, function_sig):
367 # Label format is based on IR string.
368 function_def_regex = 'define {{[^@]+}}' if function_sig else ''
369 check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
370 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
371 check_label_format, False, preserve_names)
373 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
374 check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
375 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
378 def check_prefix(prefix):
379 if not PREFIX_RE.match(prefix):
380 hint = ""
381 if ',' in prefix:
382 hint = " Did you mean '--check-prefixes=" + prefix + "'?"
383 warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
384 (prefix))
387 def verify_filecheck_prefixes(fc_cmd):
388 fc_cmd_parts = fc_cmd.split()
389 for part in fc_cmd_parts:
390 if "check-prefix=" in part:
391 prefix = part.split('=', 1)[1]
392 check_prefix(prefix)
393 elif "check-prefixes=" in part:
394 prefixes = part.split('=', 1)[1].split(',')
395 for prefix in prefixes:
396 check_prefix(prefix)
397 if prefixes.count(prefix) > 1:
398 warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))