llvm/utils/update_mca_test_checks.py

   1 #!/usr/bin/env python3
   2
   3 """A test case update script.
   4
   5 This script is a utility to update LLVM 'llvm-mca' based test cases with new
   6 FileCheck patterns.
   7 """
   8
   9 import argparse
  10 from collections import defaultdict
  11 import glob
  12 import os
  13 import sys
  14 import warnings
  15
  16 from UpdateTestChecks import common
  17
  18
  19 COMMENT_CHAR = '#'
  20 ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
  21     COMMENT_CHAR)
  22 ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
  23
  24
  25 class Error(Exception):
  26   """ Generic Error that can be raised without printing a traceback.
  27   """
  28   pass
  29
  30
  31 def _warn(msg):
  32   """ Log a user warning to stderr.
  33   """
  34   warnings.warn(msg, Warning, stacklevel=2)
  35
  36
  37 def _configure_warnings(args):
  38   warnings.resetwarnings()
  39   if args.w:
  40     warnings.simplefilter('ignore')
  41   if args.Werror:
  42     warnings.simplefilter('error')
  43
  44
  45 def _showwarning(message, category, filename, lineno, file=None, line=None):
  46   """ Version of warnings.showwarning that won't attempt to print out the
  47       line at the location of the warning if the line text is not explicitly
  48       specified.
  49   """
  50   if file is None:
  51     file = sys.stderr
  52   if line is None:
  53     line = ''
  54   file.write(warnings.formatwarning(message, category, filename, lineno, line))
  55
  56
  57 def _parse_args():
  58   parser = argparse.ArgumentParser(description=__doc__)
  59   parser.add_argument('-w',
  60                       action='store_true',
  61                       help='suppress warnings')
  62   parser.add_argument('-Werror',
  63                       action='store_true',
  64                       help='promote warnings to errors')
  65   parser.add_argument('--llvm-mca-binary',
  66                       metavar='<path>',
  67                       default='llvm-mca',
  68                       help='the binary to use to generate the test case '
  69                            '(default: llvm-mca)')
  70   parser.add_argument('tests',
  71                       metavar='<test-path>',
  72                       nargs='+')
  73   args = common.parse_commandline_args(parser)
  74
  75   _configure_warnings(args)
  76
  77   if not args.llvm_mca_binary:
  78     raise Error('--llvm-mca-binary value cannot be empty string')
  79
  80   if 'llvm-mca' not in os.path.basename(args.llvm_mca_binary):
  81     _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
  82
  83   return args
  84
  85
  86 def _get_run_infos(run_lines, args):
  87   run_infos = []
  88   for run_line in run_lines:
  89     try:
  90       (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
  91                                         for cmd in run_line.split('|', 1)])
  92     except ValueError:
  93       _warn('could not split tool and filecheck commands: {}'.format(run_line))
  94       continue
  95
  96     common.verify_filecheck_prefixes(filecheck_cmd)
  97     tool_basename = os.path.splitext(os.path.basename(args.llvm_mca_binary))[0]
  98
  99     if not tool_cmd.startswith(tool_basename + ' '):
 100       _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
 101       continue
 102
 103     if not filecheck_cmd.startswith('FileCheck '):
 104       _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
 105       continue
 106
 107     tool_cmd_args = tool_cmd[len(tool_basename):].strip()
 108     tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 109
 110     check_prefixes = [item
 111                       for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 112                       for item in m.group(1).split(',')]
 113     if not check_prefixes:
 114       check_prefixes = ['CHECK']
 115
 116     run_infos.append((check_prefixes, tool_cmd_args))
 117
 118   return run_infos
 119
 120
 121 def _break_down_block(block_info, common_prefix):
 122   """ Given a block_info, see if we can analyze it further to let us break it
 123       down by prefix per-line rather than per-block.
 124   """
 125   texts = block_info.keys()
 126   prefixes = list(block_info.values())
 127   # Split the lines from each of the incoming block_texts and zip them so that
 128   # each element contains the corresponding lines from each text.  E.g.
 129   #
 130   # block_text_1: A   # line 1
 131   #               B   # line 2
 132   #
 133   # block_text_2: A   # line 1
 134   #               C   # line 2
 135   #
 136   # would become:
 137   #
 138   # [(A, A),   # line 1
 139   #  (B, C)]   # line 2
 140   #
 141   line_tuples = list(zip(*list((text.splitlines() for text in texts))))
 142
 143   # To simplify output, we'll only proceed if the very first line of the block
 144   # texts is common to each of them.
 145   if len(set(line_tuples[0])) != 1:
 146     return []
 147
 148   result = []
 149   lresult = defaultdict(list)
 150   for i, line in enumerate(line_tuples):
 151     if len(set(line)) == 1:
 152       # We're about to output a line with the common prefix.  This is a sync
 153       # point so flush any batched-up lines one prefix at a time to the output
 154       # first.
 155       for prefix in sorted(lresult):
 156         result.extend(lresult[prefix])
 157       lresult = defaultdict(list)
 158
 159       # The line is common to each block so output with the common prefix.
 160       result.append((common_prefix, line[0]))
 161     else:
 162       # The line is not common to each block, or we don't have a common prefix.
 163       # If there are no prefixes available, warn and bail out.
 164       if not prefixes[0]:
 165         _warn('multiple lines not disambiguated by prefixes:\n{}\n'
 166               'Some blocks may be skipped entirely as a result.'.format(
 167                   '\n'.join('  - {}'.format(l) for l in line)))
 168         return []
 169
 170       # Iterate through the line from each of the blocks and add the line with
 171       # the corresponding prefix to the current batch of results so that we can
 172       # later output them per-prefix.
 173       for i, l in enumerate(line):
 174         for prefix in prefixes[i]:
 175           lresult[prefix].append((prefix, l))
 176
 177   # Flush any remaining batched-up lines one prefix at a time to the output.
 178   for prefix in sorted(lresult):
 179     result.extend(lresult[prefix])
 180   return result
 181
 182
 183 def _get_useful_prefix_info(run_infos):
 184   """ Given the run_infos, calculate any prefixes that are common to every one,
 185       and the length of the longest prefix string.
 186   """
 187   try:
 188     all_sets = [set(s) for s in list(zip(*run_infos))[0]]
 189     common_to_all = set.intersection(*all_sets)
 190     longest_prefix_len = max(len(p) for p in set.union(*all_sets))
 191   except IndexError:
 192     common_to_all = []
 193     longest_prefix_len = 0
 194   else:
 195     if len(common_to_all) > 1:
 196       _warn('Multiple prefixes common to all RUN lines: {}'.format(
 197           common_to_all))
 198     if common_to_all:
 199       common_to_all = sorted(common_to_all)[0]
 200   return common_to_all, longest_prefix_len
 201
 202
 203 def _align_matching_blocks(all_blocks, farthest_indexes):
 204   """ Some sub-sequences of blocks may be common to multiple lists of blocks,
 205       but at different indexes in each one.
 206
 207       For example, in the following case, A,B,E,F, and H are common to both
 208       sets, but only A and B would be identified as such due to the indexes
 209       matching:
 210
 211       index | 0 1 2 3 4 5 6
 212       ------+--------------
 213       setA  | A B C D E F H
 214       setB  | A B E F G H
 215
 216       This function attempts to align the indexes of matching blocks by
 217       inserting empty blocks into the block list. With this approach, A, B, E,
 218       F, and H would now be able to be identified as matching blocks:
 219
 220       index | 0 1 2 3 4 5 6 7
 221       ------+----------------
 222       setA  | A B C D E F   H
 223       setB  | A B     E F G H
 224   """
 225
 226   # "Farthest block analysis": essentially, iterate over all blocks and find
 227   # the highest index into a block list for the first instance of each block.
 228   # This is relatively expensive, but we're dealing with small numbers of
 229   # blocks so it doesn't make a perceivable difference to user time.
 230   for blocks in all_blocks.values():
 231     for block in blocks:
 232       if not block:
 233         continue
 234
 235       index = blocks.index(block)
 236
 237       if index > farthest_indexes[block]:
 238         farthest_indexes[block] = index
 239
 240   # Use the results of the above analysis to identify any blocks that can be
 241   # shunted along to match the farthest index value.
 242   for blocks in all_blocks.values():
 243     for index, block in enumerate(blocks):
 244       if not block:
 245         continue
 246
 247       changed = False
 248       # If the block has not already been subject to alignment (i.e. if the
 249       # previous block is not empty) then insert empty blocks until the index
 250       # matches the farthest index identified for that block.
 251       if (index > 0) and blocks[index - 1]:
 252         while(index < farthest_indexes[block]):
 253           blocks.insert(index, '')
 254           index += 1
 255           changed = True
 256
 257       if changed:
 258         # Bail out.  We'll need to re-do the farthest block analysis now that
 259         # we've inserted some blocks.
 260         return True
 261
 262   return False
 263
 264
 265 def _get_block_infos(run_infos, test_path, args, common_prefix):  # noqa
 266   """ For each run line, run the tool with the specified args and collect the
 267       output. We use the concept of 'blocks' for uniquing, where a block is
 268       a series of lines of text with no more than one newline character between
 269       each one.  For example:
 270
 271       This
 272       is
 273       one
 274       block
 275
 276       This is
 277       another block
 278
 279       This is yet another block
 280
 281       We then build up a 'block_infos' structure containing a dict where the
 282       text of each block is the key and a list of the sets of prefixes that may
 283       generate that particular block.  This then goes through a series of
 284       transformations to minimise the amount of CHECK lines that need to be
 285       written by taking advantage of common prefixes.
 286   """
 287
 288   def _block_key(tool_args, prefixes):
 289     """ Get a hashable key based on the current tool_args and prefixes.
 290     """
 291     return ' '.join([tool_args] + prefixes)
 292
 293   all_blocks = {}
 294   max_block_len = 0
 295
 296   # A cache of the furthest-back position in any block list of the first
 297   # instance of each block, indexed by the block itself.
 298   farthest_indexes = defaultdict(int)
 299
 300   # Run the tool for each run line to generate all of the blocks.
 301   for prefixes, tool_args in run_infos:
 302     key = _block_key(tool_args, prefixes)
 303     raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
 304                                          tool_args,
 305                                          test_path)
 306
 307     # Replace any lines consisting of purely whitespace with empty lines.
 308     raw_tool_output = '\n'.join(line if line.strip() else ''
 309                                 for line in raw_tool_output.splitlines())
 310
 311     # Split blocks, stripping all trailing whitespace, but keeping preceding
 312     # whitespace except for newlines so that columns will line up visually.
 313     all_blocks[key] = [b.lstrip('\n').rstrip()
 314                        for b in raw_tool_output.split('\n\n')]
 315     max_block_len = max(max_block_len, len(all_blocks[key]))
 316
 317     # Attempt to align matching blocks until no more changes can be made.
 318     made_changes = True
 319     while made_changes:
 320       made_changes = _align_matching_blocks(all_blocks, farthest_indexes)
 321
 322   # If necessary, pad the lists of blocks with empty blocks so that they are
 323   # all the same length.
 324   for key in all_blocks:
 325     len_to_pad = max_block_len - len(all_blocks[key])
 326     all_blocks[key] += [''] * len_to_pad
 327
 328   # Create the block_infos structure where it is a nested dict in the form of:
 329   # block number -> block text -> list of prefix sets
 330   block_infos = defaultdict(lambda: defaultdict(list))
 331   for prefixes, tool_args in run_infos:
 332     key = _block_key(tool_args, prefixes)
 333     for block_num, block_text in enumerate(all_blocks[key]):
 334       block_infos[block_num][block_text].append(set(prefixes))
 335
 336   # Now go through the block_infos structure and attempt to smartly prune the
 337   # number of prefixes per block to the minimal set possible to output.
 338   for block_num in range(len(block_infos)):
 339     # When there are multiple block texts for a block num, remove any
 340     # prefixes that are common to more than one of them.
 341     # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
 342     all_sets = [s for s in block_infos[block_num].values()]
 343     pruned_sets = []
 344
 345     for i, setlist in enumerate(all_sets):
 346       other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
 347                               for set_ in setlist2 for elem in set_
 348                               if i != j])
 349       pruned_sets.append([s - other_set_values for s in setlist])
 350
 351     for i, block_text in enumerate(block_infos[block_num]):
 352
 353       # When a block text matches multiple sets of prefixes, try removing any
 354       # prefixes that aren't common to all of them.
 355       # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
 356       common_values = set.intersection(*pruned_sets[i])
 357       if common_values:
 358         pruned_sets[i] = [common_values]
 359
 360       # Everything should be uniqued as much as possible by now.  Apply the
 361       # newly pruned sets to the block_infos structure.
 362       # If there are any blocks of text that still match multiple prefixes,
 363       # output a warning.
 364       current_set = set()
 365       for s in pruned_sets[i]:
 366         s = sorted(list(s))
 367         if s:
 368           current_set.add(s[0])
 369           if len(s) > 1:
 370             _warn('Multiple prefixes generating same output: {} '
 371                   '(discarding {})'.format(','.join(s), ','.join(s[1:])))
 372
 373       if block_text and not current_set:
 374         raise Error(
 375           'block not captured by existing prefixes:\n\n{}'.format(block_text))
 376       block_infos[block_num][block_text] = sorted(list(current_set))
 377
 378     # If we have multiple block_texts, try to break them down further to avoid
 379     # the case where we have very similar block_texts repeated after each
 380     # other.
 381     if common_prefix and len(block_infos[block_num]) > 1:
 382       # We'll only attempt this if each of the block_texts have the same number
 383       # of lines as each other.
 384       same_num_Lines = (len(set(len(k.splitlines())
 385                                 for k in block_infos[block_num].keys())) == 1)
 386       if same_num_Lines:
 387         breakdown = _break_down_block(block_infos[block_num], common_prefix)
 388         if breakdown:
 389           block_infos[block_num] = breakdown
 390
 391   return block_infos
 392
 393
 394 def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
 395   """ Write an individual block, with correct padding on the prefixes.
 396       Returns a set of all of the prefixes that it has written.
 397   """
 398   end_prefix = ':     '
 399   previous_prefix = None
 400   num_lines_of_prefix = 0
 401   written_prefixes = set()
 402
 403   for prefix, line in block:
 404     if prefix in not_prefix_set:
 405       _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
 406             'in input file.'.format(prefix))
 407       continue
 408
 409     # If the previous line isn't already blank and we're writing more than one
 410     # line for the current prefix output a blank line first, unless either the
 411     # current of previous prefix is common to all.
 412     num_lines_of_prefix += 1
 413     if prefix != previous_prefix:
 414       if output and output[-1]:
 415         if num_lines_of_prefix > 1 or any(p == common_prefix
 416                                           for p in (prefix, previous_prefix)):
 417           output.append('')
 418       num_lines_of_prefix = 0
 419       previous_prefix = prefix
 420
 421     written_prefixes.add(prefix)
 422     output.append(
 423         '{} {}{}{} {}'.format(COMMENT_CHAR,
 424                               prefix,
 425                               end_prefix,
 426                               ' ' * (prefix_pad - len(prefix)),
 427                               line).rstrip())
 428     end_prefix = '-NEXT:'
 429
 430   output.append('')
 431   return written_prefixes
 432
 433
 434 def _write_output(test_path, input_lines, prefix_list, block_infos,  # noqa
 435                   args, common_prefix, prefix_pad):
 436   prefix_set = set([prefix for prefixes, _ in prefix_list
 437                     for prefix in prefixes])
 438   not_prefix_set = set()
 439
 440   output_lines = []
 441   for input_line in input_lines:
 442     if input_line.startswith(ADVERT_PREFIX):
 443       continue
 444
 445     if input_line.startswith(COMMENT_CHAR):
 446       m = common.CHECK_RE.match(input_line)
 447       try:
 448         prefix = m.group(1)
 449       except AttributeError:
 450         prefix = None
 451
 452       if '{}-NOT:'.format(prefix) in input_line:
 453         not_prefix_set.add(prefix)
 454
 455       if prefix not in prefix_set or prefix in not_prefix_set:
 456         output_lines.append(input_line)
 457         continue
 458
 459     if common.should_add_line_to_output(input_line, prefix_set):
 460       # This input line of the function body will go as-is into the output.
 461       # Except make leading whitespace uniform: 2 spaces.
 462       input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
 463
 464       # Skip empty lines if the previous output line is also empty.
 465       if input_line or output_lines[-1]:
 466         output_lines.append(input_line)
 467     else:
 468       continue
 469
 470   # Add a blank line before the new checks if required.
 471   if len(output_lines) > 0 and output_lines[-1]:
 472     output_lines.append('')
 473
 474   output_check_lines = []
 475   used_prefixes = set()
 476   for block_num in range(len(block_infos)):
 477     if type(block_infos[block_num]) is list:
 478       # The block is of the type output from _break_down_block().
 479       used_prefixes |= _write_block(output_check_lines,
 480                                     block_infos[block_num],
 481                                     not_prefix_set,
 482                                     common_prefix,
 483                                     prefix_pad)
 484     else:
 485       # _break_down_block() was unable to do do anything so output the block
 486       # as-is.
 487
 488       # Rather than writing out each block as soon we encounter it, save it
 489       # indexed by prefix so that we can write all of the blocks out sorted by
 490       # prefix at the end.
 491       output_blocks = defaultdict(list)
 492
 493       for block_text in sorted(block_infos[block_num]):
 494
 495         if not block_text:
 496           continue
 497
 498         lines = block_text.split('\n')
 499         for prefix in block_infos[block_num][block_text]:
 500           assert prefix not in output_blocks
 501           used_prefixes |= _write_block(output_blocks[prefix],
 502                                         [(prefix, line) for line in lines],
 503                                         not_prefix_set,
 504                                         common_prefix,
 505                                         prefix_pad)
 506
 507       for prefix in sorted(output_blocks):
 508         output_check_lines.extend(output_blocks[prefix])
 509
 510   unused_prefixes = (prefix_set - not_prefix_set) - used_prefixes
 511   if unused_prefixes:
 512     raise Error('unused prefixes: {}'.format(sorted(unused_prefixes)))
 513
 514   if output_check_lines:
 515     output_lines.insert(0, ADVERT)
 516     output_lines.extend(output_check_lines)
 517
 518   # The file should not end with two newlines. It creates unnecessary churn.
 519   while len(output_lines) > 0 and output_lines[-1] == '':
 520     output_lines.pop()
 521
 522   if input_lines == output_lines:
 523     sys.stderr.write('            [unchanged]\n')
 524     return
 525   sys.stderr.write('      [{} lines total]\n'.format(len(output_lines)))
 526
 527   common.debug('Writing', len(output_lines), 'lines to', test_path, '..\n\n')
 528
 529   with open(test_path, 'wb') as f:
 530     f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 531
 532 def main():
 533   args = _parse_args()
 534   test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 535   for test_path in test_paths:
 536     sys.stderr.write('Test: {}\n'.format(test_path))
 537
 538     # Call this per test. By default each warning will only be written once
 539     # per source location. Reset the warning filter so that now each warning
 540     # will be written once per source location per test.
 541     _configure_warnings(args)
 542
 543     if not os.path.isfile(test_path):
 544       raise Error('could not find test file: {}'.format(test_path))
 545
 546     with open(test_path) as f:
 547       input_lines = [l.rstrip() for l in f]
 548
 549     run_lines = common.find_run_lines(test_path, input_lines)
 550     run_infos = _get_run_infos(run_lines, args)
 551     common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
 552     block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
 553     _write_output(test_path,
 554                   input_lines,
 555                   run_infos,
 556                   block_infos,
 557                   args,
 558                   common_prefix,
 559                   prefix_pad)
 560
 561   return 0
 562
 563
 564 if __name__ == '__main__':
 565   try:
 566     warnings.showwarning = _showwarning
 567     sys.exit(main())
 568   except Error as e:
 569     sys.stdout.write('error: {}\n'.format(e))
 570     sys.exit(1)