llvm/utils/demangle_tree.py

   1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
   2 # dumping every object file encountered with correct options needed to demangle
   3 # symbols in the object file, and collect statistics about failed / crashed
   4 # demanglings.  Useful for stress testing the demangler against a large corpus
   5 # of inputs.
   6
   7 from __future__ import print_function
   8
   9 import argparse
  10 import functools
  11 import os
  12 import re
  13 import sys
  14 import subprocess
  15 import traceback
  16 from multiprocessing import Pool
  17 import multiprocessing
  18
  19 args = None
  20
  21
  22 def parse_line(line):
  23     question = line.find("?")
  24     if question == -1:
  25         return None, None
  26
  27     open_paren = line.find("(", question)
  28     if open_paren == -1:
  29         return None, None
  30     close_paren = line.rfind(")", open_paren)
  31     if open_paren == -1:
  32         return None, None
  33     mangled = line[question:open_paren]
  34     demangled = line[open_paren + 1 : close_paren]
  35     return mangled.strip(), demangled.strip()
  36
  37
  38 class Result(object):
  39     def __init__(self):
  40         self.crashed = []
  41         self.file = None
  42         self.nsymbols = 0
  43         self.errors = set()
  44         self.nfiles = 0
  45
  46
  47 class MapContext(object):
  48     def __init__(self):
  49         self.rincomplete = None
  50         self.rcumulative = Result()
  51         self.pending_objs = []
  52         self.npending = 0
  53
  54
  55 def process_file(path, objdump):
  56     r = Result()
  57     r.file = path
  58
  59     popen_args = [objdump, "-t", "-demangle", path]
  60     p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  61     stdout, stderr = p.communicate()
  62     if p.returncode != 0:
  63         r.crashed = [r.file]
  64         return r
  65
  66     output = stdout.decode("utf-8")
  67
  68     for line in output.splitlines():
  69         mangled, demangled = parse_line(line)
  70         if mangled is None:
  71             continue
  72         r.nsymbols += 1
  73         if "invalid mangled name" in demangled:
  74             r.errors.add(mangled)
  75     return r
  76
  77
  78 def add_results(r1, r2):
  79     r1.crashed.extend(r2.crashed)
  80     r1.errors.update(r2.errors)
  81     r1.nsymbols += r2.nsymbols
  82     r1.nfiles += r2.nfiles
  83
  84
  85 def print_result_row(directory, result):
  86     print(
  87         "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
  88             result.nfiles,
  89             len(result.crashed),
  90             len(result.errors),
  91             result.nsymbols,
  92             directory,
  93         )
  94     )
  95
  96
  97 def process_one_chunk(pool, chunk_size, objdump, context):
  98     objs = []
  99
 100     incomplete = False
 101     dir_results = {}
 102     ordered_dirs = []
 103     while context.npending > 0 and len(objs) < chunk_size:
 104         this_dir = context.pending_objs[0][0]
 105         ordered_dirs.append(this_dir)
 106         re = Result()
 107         if context.rincomplete is not None:
 108             re = context.rincomplete
 109             context.rincomplete = None
 110
 111         dir_results[this_dir] = re
 112         re.file = this_dir
 113
 114         nneeded = chunk_size - len(objs)
 115         objs_this_dir = context.pending_objs[0][1]
 116         navail = len(objs_this_dir)
 117         ntaken = min(nneeded, navail)
 118         objs.extend(objs_this_dir[0:ntaken])
 119         remaining_objs_this_dir = objs_this_dir[ntaken:]
 120         context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
 121         context.npending -= ntaken
 122         if ntaken == navail:
 123             context.pending_objs.pop(0)
 124         else:
 125             incomplete = True
 126
 127         re.nfiles += ntaken
 128
 129     assert len(objs) == chunk_size or context.npending == 0
 130
 131     copier = functools.partial(process_file, objdump=objdump)
 132     mapped_results = list(pool.map(copier, objs))
 133
 134     for mr in mapped_results:
 135         result_dir = os.path.dirname(mr.file)
 136         result_entry = dir_results[result_dir]
 137         add_results(result_entry, mr)
 138
 139     # It's only possible that a single item is incomplete, and it has to be the
 140     # last item.
 141     if incomplete:
 142         context.rincomplete = dir_results[ordered_dirs[-1]]
 143         ordered_dirs.pop()
 144
 145     # Now ordered_dirs contains a list of all directories which *did* complete.
 146     for c in ordered_dirs:
 147         re = dir_results[c]
 148         add_results(context.rcumulative, re)
 149         print_result_row(c, re)
 150
 151
 152 def process_pending_files(pool, chunk_size, objdump, context):
 153     while context.npending >= chunk_size:
 154         process_one_chunk(pool, chunk_size, objdump, context)
 155
 156
 157 def go():
 158     global args
 159
 160     obj_dir = args.dir
 161     extensions = args.extensions.split(",")
 162     extensions = [x if x[0] == "." else "." + x for x in extensions]
 163
 164     pool_size = 48
 165     pool = Pool(processes=pool_size)
 166
 167     try:
 168         nfiles = 0
 169         context = MapContext()
 170
 171         for root, dirs, files in os.walk(obj_dir):
 172             root = os.path.normpath(root)
 173             pending = []
 174             for f in files:
 175                 file, ext = os.path.splitext(f)
 176                 if not ext in extensions:
 177                     continue
 178
 179                 nfiles += 1
 180                 full_path = os.path.join(root, f)
 181                 full_path = os.path.normpath(full_path)
 182                 pending.append(full_path)
 183
 184             # If this directory had no object files, just print a default
 185             # status line and continue with the next dir
 186             if len(pending) == 0:
 187                 print_result_row(root, Result())
 188                 continue
 189
 190             context.npending += len(pending)
 191             context.pending_objs.append((root, pending))
 192             # Drain the tasks, `pool_size` at a time, until we have less than
 193             # `pool_size` tasks remaining.
 194             process_pending_files(pool, pool_size, args.objdump, context)
 195
 196         assert context.npending < pool_size
 197         process_one_chunk(pool, pool_size, args.objdump, context)
 198
 199         total = context.rcumulative
 200         nfailed = len(total.errors)
 201         nsuccess = total.nsymbols - nfailed
 202         ncrashed = len(total.crashed)
 203
 204         if nfailed > 0:
 205             print("Failures:")
 206             for m in sorted(total.errors):
 207                 print("  " + m)
 208         if ncrashed > 0:
 209             print("Crashes:")
 210             for f in sorted(total.crashed):
 211                 print("  " + f)
 212         print("Summary:")
 213         spct = float(nsuccess) / float(total.nsymbols)
 214         fpct = float(nfailed) / float(total.nsymbols)
 215         cpct = float(ncrashed) / float(nfiles)
 216         print("Processed {0} object files.".format(nfiles))
 217         print(
 218             "{0}/{1} symbols successfully demangled ({2:.4%})".format(
 219                 nsuccess, total.nsymbols, spct
 220             )
 221         )
 222         print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
 223         print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
 224
 225     except:
 226         traceback.print_exc()
 227
 228     pool.close()
 229     pool.join()
 230
 231
 232 if __name__ == "__main__":
 233     def_obj = "obj" if sys.platform == "win32" else "o"
 234
 235     parser = argparse.ArgumentParser(
 236         description="Demangle all symbols in a tree of object files, looking for failures."
 237     )
 238     parser.add_argument(
 239         "dir", type=str, help="the root directory at which to start crawling"
 240     )
 241     parser.add_argument(
 242         "--objdump",
 243         type=str,
 244         help="path to llvm-objdump.  If not specified "
 245         + "the tool is located as if by `which llvm-objdump`.",
 246     )
 247     parser.add_argument(
 248         "--extensions",
 249         type=str,
 250         default=def_obj,
 251         help="comma separated list of extensions to demangle (e.g. `o,obj`).  "
 252         + "By default this will be `obj` on Windows and `o` otherwise.",
 253     )
 254
 255     args = parser.parse_args()
 256
 257     multiprocessing.freeze_support()
 258     go()