utils/demangle_tree.py

   1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
   2 # dumping every object file encountered with correct options needed to demangle
   3 # symbols in the object file, and collect statistics about failed / crashed
   4 # demanglings.  Useful for stress testing the demangler against a large corpus
   5 # of inputs.
   6
   7 import argparse
   8 import functools
   9 import os
  10 import re
  11 import sys
  12 import subprocess
  13 import traceback
  14 from multiprocessing import Pool
  15 import multiprocessing
  16
  17 args = None
  18
  19 def parse_line(line):
  20     question = line.find('?')
  21     if question == -1:
  22         return None, None
  23
  24     open_paren = line.find('(', question)
  25     if open_paren == -1:
  26         return None, None
  27     close_paren = line.rfind(')', open_paren)
  28     if open_paren == -1:
  29         return None, None
  30     mangled = line[question : open_paren]
  31     demangled = line[open_paren+1 : close_paren]
  32     return mangled.strip(), demangled.strip()
  33
  34 class Result(object):
  35     def __init__(self):
  36         self.crashed = []
  37         self.file = None
  38         self.nsymbols = 0
  39         self.errors = set()
  40         self.nfiles = 0
  41
  42 class MapContext(object):
  43     def __init__(self):
  44         self.rincomplete = None
  45         self.rcumulative = Result()
  46         self.pending_objs = []
  47         self.npending = 0
  48
  49 def process_file(path, objdump):
  50     r = Result()
  51     r.file = path
  52
  53     popen_args = [objdump, '-t', '-demangle', path]
  54     p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  55     stdout, stderr = p.communicate()
  56     if p.returncode != 0:
  57         r.crashed = [r.file]
  58         return r
  59
  60     output = stdout.decode('utf-8')
  61
  62     for line in output.splitlines():
  63         mangled, demangled = parse_line(line)
  64         if mangled is None:
  65             continue
  66         r.nsymbols += 1
  67         if "invalid mangled name" in demangled:
  68             r.errors.add(mangled)
  69     return r
  70
  71 def add_results(r1, r2):
  72     r1.crashed.extend(r2.crashed)
  73     r1.errors.update(r2.errors)
  74     r1.nsymbols += r2.nsymbols
  75     r1.nfiles += r2.nfiles
  76
  77 def print_result_row(directory, result):
  78     print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
  79         result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
  80
  81 def process_one_chunk(pool, chunk_size, objdump, context):
  82     objs = []
  83
  84     incomplete = False
  85     dir_results = {}
  86     ordered_dirs = []
  87     while context.npending > 0 and len(objs) < chunk_size:
  88         this_dir = context.pending_objs[0][0]
  89         ordered_dirs.append(this_dir)
  90         re = Result()
  91         if context.rincomplete is not None:
  92             re = context.rincomplete
  93             context.rincomplete = None
  94
  95         dir_results[this_dir] = re
  96         re.file = this_dir
  97
  98         nneeded = chunk_size - len(objs)
  99         objs_this_dir = context.pending_objs[0][1]
 100         navail = len(objs_this_dir)
 101         ntaken = min(nneeded, navail)
 102         objs.extend(objs_this_dir[0:ntaken])
 103         remaining_objs_this_dir = objs_this_dir[ntaken:]
 104         context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
 105         context.npending -= ntaken
 106         if ntaken == navail:
 107             context.pending_objs.pop(0)
 108         else:
 109             incomplete = True
 110
 111         re.nfiles += ntaken
 112
 113     assert(len(objs) == chunk_size or context.npending == 0)
 114
 115     copier = functools.partial(process_file, objdump=objdump)
 116     mapped_results = list(pool.map(copier, objs))
 117
 118     for mr in mapped_results:
 119         result_dir = os.path.dirname(mr.file)
 120         result_entry = dir_results[result_dir]
 121         add_results(result_entry, mr)
 122
 123     # It's only possible that a single item is incomplete, and it has to be the
 124     # last item.
 125     if incomplete:
 126         context.rincomplete = dir_results[ordered_dirs[-1]]
 127         ordered_dirs.pop()
 128
 129     # Now ordered_dirs contains a list of all directories which *did* complete.
 130     for c in ordered_dirs:
 131         re = dir_results[c]
 132         add_results(context.rcumulative, re)
 133         print_result_row(c, re)
 134
 135 def process_pending_files(pool, chunk_size, objdump, context):
 136     while context.npending >= chunk_size:
 137         process_one_chunk(pool, chunk_size, objdump, context)
 138
 139 def go():
 140     global args
 141
 142     obj_dir = args.dir
 143     extensions = args.extensions.split(',')
 144     extensions = [x if x[0] == '.' else '.' + x for x in extensions]
 145
 146
 147     pool_size = 48
 148     pool = Pool(processes=pool_size)
 149
 150     try:
 151         nfiles = 0
 152         context = MapContext()
 153
 154         for root, dirs, files in os.walk(obj_dir):
 155             root = os.path.normpath(root)
 156             pending = []
 157             for f in files:
 158                 file, ext = os.path.splitext(f)
 159                 if not ext in extensions:
 160                     continue
 161
 162                 nfiles += 1
 163                 full_path = os.path.join(root, f)
 164                 full_path = os.path.normpath(full_path)
 165                 pending.append(full_path)
 166
 167             # If this directory had no object files, just print a default
 168             # status line and continue with the next dir
 169             if len(pending) == 0:
 170                 print_result_row(root, Result())
 171                 continue
 172
 173             context.npending += len(pending)
 174             context.pending_objs.append((root, pending))
 175             # Drain the tasks, `pool_size` at a time, until we have less than
 176             # `pool_size` tasks remaining.
 177             process_pending_files(pool, pool_size, args.objdump, context)
 178
 179         assert(context.npending < pool_size);
 180         process_one_chunk(pool, pool_size, args.objdump, context)
 181
 182         total = context.rcumulative
 183         nfailed = len(total.errors)
 184         nsuccess = total.nsymbols - nfailed
 185         ncrashed = len(total.crashed)
 186
 187         if (nfailed > 0):
 188             print("Failures:")
 189             for m in sorted(total.errors):
 190                 print("  " + m)
 191         if (ncrashed > 0):
 192             print("Crashes:")
 193             for f in sorted(total.crashed):
 194                 print("  " + f)
 195         print("Summary:")
 196         spct = float(nsuccess)/float(total.nsymbols)
 197         fpct = float(nfailed)/float(total.nsymbols)
 198         cpct = float(ncrashed)/float(nfiles)
 199         print("Processed {0} object files.".format(nfiles))
 200         print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
 201         print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
 202         print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
 203
 204     except:
 205         traceback.print_exc()
 206
 207     pool.close()
 208     pool.join()
 209
 210 if __name__ == "__main__":
 211     def_obj = 'obj' if sys.platform == 'win32' else 'o'
 212
 213     parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
 214     parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
 215     parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
 216                         'the tool is located as if by `which llvm-objdump`.')
 217     parser.add_argument('--extensions', type=str, default=def_obj,
 218                         help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
 219                         'By default this will be `obj` on Windows and `o` otherwise.')
 220
 221     args = parser.parse_args()
 222
 223
 224     multiprocessing.freeze_support()
 225     go()
 226