[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / utils / update_mir_test_checks.py
blobadf8e4993a869bba2e7eca79bf967da86e1a0e40
1 #!/usr/bin/env python3
3 """Updates FileCheck checks in MIR tests.
5 This script is a utility to update MIR based tests with new FileCheck
6 patterns.
8 The checks added by this script will cover the entire body of each
9 function it handles. Virtual registers used are given names via
10 FileCheck patterns, so if you do want to check a subset of the body it
11 should be straightforward to trim out the irrelevant parts. None of
12 the YAML metadata will be checked, other than function names.
14 If there are multiple llc commands in a test, the full set of checks
15 will be repeated for each different check pattern. Checks for patterns
16 that are common between different commands will be left as-is by
17 default, or removed if the --remove-common-prefixes flag is provided.
18 """
20 from __future__ import print_function
22 import argparse
23 import collections
24 import glob
25 import os
26 import re
27 import subprocess
28 import sys
30 from UpdateTestChecks import common
32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
36 MI_FLAGS_STR= (
37 r'(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn '
38 r'|reassoc |nuw |nsw |exact |fpexcept )*')
39 VREG_DEF_RE = re.compile(
40 r'^ *(?P<vregs>{0}(?:, {0})*) = '
41 r'{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern, MI_FLAGS_STR))
42 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
44 IR_FUNC_NAME_RE = re.compile(
45 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
46 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
48 MIR_FUNC_RE = re.compile(
49 r'^---$'
50 r'\n'
51 r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
52 r'.*?'
53 r'^ *body: *\|\n'
54 r'(?P<body>.*?)\n'
55 r'^\.\.\.$',
56 flags=(re.M | re.S))
59 class LLC:
60 def __init__(self, bin):
61 self.bin = bin
63 def __call__(self, args, ir):
64 if ir.endswith('.mir'):
65 args = '{} -x mir'.format(args)
66 with open(ir) as ir_file:
67 stdout = subprocess.check_output('{} {}'.format(self.bin, args),
68 shell=True, stdin=ir_file)
69 if sys.version_info[0] > 2:
70 stdout = stdout.decode()
71 # Fix line endings to unix CR style.
72 stdout = stdout.replace('\r\n', '\n')
73 return stdout
76 class Run:
77 def __init__(self, prefixes, cmd_args, triple):
78 self.prefixes = prefixes
79 self.cmd_args = cmd_args
80 self.triple = triple
82 def __getitem__(self, index):
83 return [self.prefixes, self.cmd_args, self.triple][index]
86 def log(msg, verbose=True):
87 if verbose:
88 print(msg, file=sys.stderr)
91 def find_triple_in_ir(lines, verbose=False):
92 for l in lines:
93 m = common.TRIPLE_IR_RE.match(l)
94 if m:
95 return m.group(1)
96 return None
99 def build_run_list(test, run_lines, verbose=False):
100 run_list = []
101 all_prefixes = []
102 for l in run_lines:
103 if '|' not in l:
104 common.warn('Skipping unparseable RUN line: ' + l)
105 continue
107 commands = [cmd.strip() for cmd in l.split('|', 1)]
108 llc_cmd = commands[0]
109 filecheck_cmd = commands[1] if len(commands) > 1 else ''
110 common.verify_filecheck_prefixes(filecheck_cmd)
112 if not llc_cmd.startswith('llc '):
113 common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
114 continue
115 if not filecheck_cmd.startswith('FileCheck '):
116 common.warn('Skipping non-FileChecked RUN line: {}'.format(l),
117 test_file=test)
118 continue
120 triple = None
121 m = common.TRIPLE_ARG_RE.search(llc_cmd)
122 if m:
123 triple = m.group(1)
124 # If we find -march but not -mtriple, use that.
125 m = common.MARCH_ARG_RE.search(llc_cmd)
126 if m and not triple:
127 triple = '{}--'.format(m.group(1))
129 cmd_args = llc_cmd[len('llc'):].strip()
130 cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
132 check_prefixes = [
133 item
134 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
135 for item in m.group(1).split(',')]
136 if not check_prefixes:
137 check_prefixes = ['CHECK']
138 all_prefixes += check_prefixes
140 run_list.append(Run(check_prefixes, cmd_args, triple))
142 # Sort prefixes that are shared between run lines before unshared prefixes.
143 # This causes us to prefer printing shared prefixes.
144 for run in run_list:
145 run.prefixes.sort(key=lambda prefix: -all_prefixes.count(prefix))
147 return run_list
150 def find_functions_with_one_bb(lines, verbose=False):
151 result = []
152 cur_func = None
153 bbs = 0
154 for line in lines:
155 m = MIR_FUNC_NAME_RE.match(line)
156 if m:
157 if bbs == 1:
158 result.append(cur_func)
159 cur_func = m.group('func')
160 bbs = 0
161 m = MIR_BASIC_BLOCK_RE.match(line)
162 if m:
163 bbs += 1
164 if bbs == 1:
165 result.append(cur_func)
166 return result
169 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
170 func_dict, verbose):
171 for m in MIR_FUNC_RE.finditer(raw_tool_output):
172 func = m.group('func')
173 body = m.group('body')
174 if verbose:
175 log('Processing function: {}'.format(func))
176 for l in body.splitlines():
177 log(' {}'.format(l))
179 # Vreg mangling
180 mangled = []
181 vreg_map = {}
182 for func_line in body.splitlines(keepends=True):
183 m = VREG_DEF_RE.match(func_line)
184 if m:
185 for vreg in VREG_RE.finditer(m.group('vregs')):
186 name = mangle_vreg(m.group('opcode'), vreg_map.values())
187 vreg_map[vreg.group(1)] = name
188 func_line = func_line.replace(
189 vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
190 for number, name in vreg_map.items():
191 func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
192 func_line)
193 mangled.append(func_line)
194 body = ''.join(mangled)
196 for prefix in prefixes:
197 if func in func_dict[prefix]:
198 if func_dict[prefix][func] != body:
199 func_dict[prefix][func] = None
200 else:
201 func_dict[prefix][func] = body
204 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
205 single_bb, verbose=False):
206 printed_prefixes = set()
207 for run in run_list:
208 for prefix in run.prefixes:
209 if prefix in printed_prefixes:
210 break
211 if not func_dict[prefix][func_name]:
212 continue
213 # if printed_prefixes:
214 # # Add some space between different check prefixes.
215 # output_lines.append('')
216 printed_prefixes.add(prefix)
217 log('Adding {} lines for {}'.format(prefix, func_name), verbose)
218 add_check_lines(test, output_lines, prefix, func_name, single_bb,
219 func_dict[prefix][func_name].splitlines())
220 break
221 else:
222 common.warn(
223 'Found conflicting asm for function: {}'.format(func_name),
224 test_file=test)
225 return output_lines
228 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
229 func_body):
230 if single_bb:
231 # Don't bother checking the basic block label for a single BB
232 func_body.pop(0)
234 if not func_body:
235 common.warn('Function has no instructions to check: {}'.format(func_name),
236 test_file=test)
237 return
239 first_line = func_body[0]
240 indent = len(first_line) - len(first_line.lstrip(' '))
241 # A check comment, indented the appropriate amount
242 check = '{:>{}}; {}'.format('', indent, prefix)
244 output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
245 first_check = True
247 for func_line in func_body:
248 if not func_line.strip():
249 # The mir printer prints leading whitespace so we can't use CHECK-EMPTY:
250 output_lines.append(check + '-NEXT: {{' + func_line + '$}}')
251 continue
252 filecheck_directive = check if first_check else check + '-NEXT'
253 first_check = False
254 check_line = '{}: {}'.format(filecheck_directive, func_line[indent:]).rstrip()
255 output_lines.append(check_line)
258 def mangle_vreg(opcode, current_names):
259 base = opcode
260 # Simplify some common prefixes and suffixes
261 if opcode.startswith('G_'):
262 base = base[len('G_'):]
263 if opcode.endswith('_PSEUDO'):
264 base = base[:len('_PSEUDO')]
265 # Shorten some common opcodes with long-ish names
266 base = dict(IMPLICIT_DEF='DEF',
267 GLOBAL_VALUE='GV',
268 CONSTANT='C',
269 FCONSTANT='C',
270 MERGE_VALUES='MV',
271 UNMERGE_VALUES='UV',
272 INTRINSIC='INT',
273 INTRINSIC_W_SIDE_EFFECTS='INT',
274 INSERT_VECTOR_ELT='IVEC',
275 EXTRACT_VECTOR_ELT='EVEC',
276 SHUFFLE_VECTOR='SHUF').get(base, base)
277 # Avoid ambiguity when opcodes end in numbers
278 if len(base.rstrip('0123456789')) < len(base):
279 base += '_'
281 i = 0
282 for name in current_names:
283 if name.rstrip('0123456789') == base:
284 i += 1
285 if i:
286 return '{}{}'.format(base, i)
287 return base
290 def should_add_line_to_output(input_line, prefix_set):
291 # Skip any check lines that we're handling.
292 m = common.CHECK_RE.match(input_line)
293 if m and m.group(1) in prefix_set:
294 return False
295 return True
298 def update_test_file(args, test):
299 with open(test) as fd:
300 input_lines = [l.rstrip() for l in fd]
302 script_name = os.path.basename(__file__)
303 first_line = input_lines[0] if input_lines else ""
304 if 'autogenerated' in first_line and script_name not in first_line:
305 common.warn("Skipping test which wasn't autogenerated by " +
306 script_name + ": " + test)
307 return
309 if args.update_only:
310 if not first_line or 'autogenerated' not in first_line:
311 common.warn("Skipping test which isn't autogenerated: " + test)
312 return
314 triple_in_ir = find_triple_in_ir(input_lines, args.verbose)
315 run_lines = common.find_run_lines(test, input_lines)
316 run_list = build_run_list(test, run_lines, args.verbose)
318 simple_functions = find_functions_with_one_bb(input_lines, args.verbose)
320 func_dict = {}
321 for run in run_list:
322 for prefix in run.prefixes:
323 func_dict.update({prefix: dict()})
324 for prefixes, llc_args, triple_in_cmd in run_list:
325 log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose)
326 log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose)
328 raw_tool_output = args.llc(llc_args, test)
329 if not triple_in_cmd and not triple_in_ir:
330 common.warn('No triple found: skipping file', test_file=test)
331 return
333 build_function_body_dictionary(test, raw_tool_output,
334 triple_in_cmd or triple_in_ir,
335 prefixes, func_dict, args.verbose)
337 state = 'toplevel'
338 func_name = None
339 prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
340 log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose)
342 comment_char = '#' if test.endswith('.mir') else ';'
343 autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
344 'utils/{}'.format(comment_char, script_name))
345 output_lines = []
346 output_lines.append(autogenerated_note)
348 for input_line in input_lines:
349 if input_line == autogenerated_note:
350 continue
352 if state == 'toplevel':
353 m = IR_FUNC_NAME_RE.match(input_line)
354 if m:
355 state = 'ir function prefix'
356 func_name = m.group('func')
357 if input_line.rstrip('| \r\n') == '---':
358 state = 'document'
359 output_lines.append(input_line)
360 elif state == 'document':
361 m = MIR_FUNC_NAME_RE.match(input_line)
362 if m:
363 state = 'mir function metadata'
364 func_name = m.group('func')
365 if input_line.strip() == '...':
366 state = 'toplevel'
367 func_name = None
368 if should_add_line_to_output(input_line, prefix_set):
369 output_lines.append(input_line)
370 elif state == 'mir function metadata':
371 if should_add_line_to_output(input_line, prefix_set):
372 output_lines.append(input_line)
373 m = MIR_BODY_BEGIN_RE.match(input_line)
374 if m:
375 if func_name in simple_functions:
376 # If there's only one block, put the checks inside it
377 state = 'mir function prefix'
378 continue
379 state = 'mir function body'
380 add_checks_for_function(test, output_lines, run_list,
381 func_dict, func_name, single_bb=False,
382 verbose=args.verbose)
383 elif state == 'mir function prefix':
384 m = MIR_PREFIX_DATA_RE.match(input_line)
385 if not m:
386 state = 'mir function body'
387 add_checks_for_function(test, output_lines, run_list,
388 func_dict, func_name, single_bb=True,
389 verbose=args.verbose)
391 if should_add_line_to_output(input_line, prefix_set):
392 output_lines.append(input_line)
393 elif state == 'mir function body':
394 if input_line.strip() == '...':
395 state = 'toplevel'
396 func_name = None
397 if should_add_line_to_output(input_line, prefix_set):
398 output_lines.append(input_line)
399 elif state == 'ir function prefix':
400 m = IR_PREFIX_DATA_RE.match(input_line)
401 if not m:
402 state = 'ir function body'
403 add_checks_for_function(test, output_lines, run_list,
404 func_dict, func_name, single_bb=False,
405 verbose=args.verbose)
407 if should_add_line_to_output(input_line, prefix_set):
408 output_lines.append(input_line)
409 elif state == 'ir function body':
410 if input_line.strip() == '}':
411 state = 'toplevel'
412 func_name = None
413 if should_add_line_to_output(input_line, prefix_set):
414 output_lines.append(input_line)
417 log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose)
419 with open(test, 'wb') as fd:
420 fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
423 def main():
424 parser = argparse.ArgumentParser(
425 description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
426 parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
427 help='The "llc" binary to generate the test case with')
428 parser.add_argument('tests', nargs='+')
429 args = common.parse_commandline_args(parser)
431 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
432 for test in test_paths:
433 try:
434 update_test_file(args, test)
435 except Exception:
436 common.warn('Error processing file', test_file=test)
437 raise
440 if __name__ == '__main__':
441 main()