[llvm-exegesis][NFC] Simplify code at the cost of small code duplication
[llvm-core.git] / utils / demangle_tree.py
blob1185a233a419fa378df8f411bdcf1f94558c5be1
1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
2 # dumping every object file encountered with correct options needed to demangle
3 # symbols in the object file, and collect statistics about failed / crashed
4 # demanglings. Useful for stress testing the demangler against a large corpus
5 # of inputs.
7 import argparse
8 import functools
9 import os
10 import re
11 import sys
12 import subprocess
13 import traceback
14 from multiprocessing import Pool
15 import multiprocessing
17 args = None
19 def parse_line(line):
20 question = line.find('?')
21 if question == -1:
22 return None, None
24 open_paren = line.find('(', question)
25 if open_paren == -1:
26 return None, None
27 close_paren = line.rfind(')', open_paren)
28 if open_paren == -1:
29 return None, None
30 mangled = line[question : open_paren]
31 demangled = line[open_paren+1 : close_paren]
32 return mangled.strip(), demangled.strip()
34 class Result(object):
35 def __init__(self):
36 self.crashed = []
37 self.file = None
38 self.nsymbols = 0
39 self.errors = set()
40 self.nfiles = 0
42 class MapContext(object):
43 def __init__(self):
44 self.rincomplete = None
45 self.rcumulative = Result()
46 self.pending_objs = []
47 self.npending = 0
49 def process_file(path, objdump):
50 r = Result()
51 r.file = path
53 popen_args = [objdump, '-t', '-demangle', path]
54 p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
55 stdout, stderr = p.communicate()
56 if p.returncode != 0:
57 r.crashed = [r.file]
58 return r
60 output = stdout.decode('utf-8')
62 for line in output.splitlines():
63 mangled, demangled = parse_line(line)
64 if mangled is None:
65 continue
66 r.nsymbols += 1
67 if "invalid mangled name" in demangled:
68 r.errors.add(mangled)
69 return r
71 def add_results(r1, r2):
72 r1.crashed.extend(r2.crashed)
73 r1.errors.update(r2.errors)
74 r1.nsymbols += r2.nsymbols
75 r1.nfiles += r2.nfiles
77 def print_result_row(directory, result):
78 print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
79 result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
81 def process_one_chunk(pool, chunk_size, objdump, context):
82 objs = []
84 incomplete = False
85 dir_results = {}
86 ordered_dirs = []
87 while context.npending > 0 and len(objs) < chunk_size:
88 this_dir = context.pending_objs[0][0]
89 ordered_dirs.append(this_dir)
90 re = Result()
91 if context.rincomplete is not None:
92 re = context.rincomplete
93 context.rincomplete = None
95 dir_results[this_dir] = re
96 re.file = this_dir
98 nneeded = chunk_size - len(objs)
99 objs_this_dir = context.pending_objs[0][1]
100 navail = len(objs_this_dir)
101 ntaken = min(nneeded, navail)
102 objs.extend(objs_this_dir[0:ntaken])
103 remaining_objs_this_dir = objs_this_dir[ntaken:]
104 context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
105 context.npending -= ntaken
106 if ntaken == navail:
107 context.pending_objs.pop(0)
108 else:
109 incomplete = True
111 re.nfiles += ntaken
113 assert(len(objs) == chunk_size or context.npending == 0)
115 copier = functools.partial(process_file, objdump=objdump)
116 mapped_results = list(pool.map(copier, objs))
118 for mr in mapped_results:
119 result_dir = os.path.dirname(mr.file)
120 result_entry = dir_results[result_dir]
121 add_results(result_entry, mr)
123 # It's only possible that a single item is incomplete, and it has to be the
124 # last item.
125 if incomplete:
126 context.rincomplete = dir_results[ordered_dirs[-1]]
127 ordered_dirs.pop()
129 # Now ordered_dirs contains a list of all directories which *did* complete.
130 for c in ordered_dirs:
131 re = dir_results[c]
132 add_results(context.rcumulative, re)
133 print_result_row(c, re)
135 def process_pending_files(pool, chunk_size, objdump, context):
136 while context.npending >= chunk_size:
137 process_one_chunk(pool, chunk_size, objdump, context)
139 def go():
140 global args
142 obj_dir = args.dir
143 extensions = args.extensions.split(',')
144 extensions = [x if x[0] == '.' else '.' + x for x in extensions]
147 pool_size = 48
148 pool = Pool(processes=pool_size)
150 try:
151 nfiles = 0
152 context = MapContext()
154 for root, dirs, files in os.walk(obj_dir):
155 root = os.path.normpath(root)
156 pending = []
157 for f in files:
158 file, ext = os.path.splitext(f)
159 if not ext in extensions:
160 continue
162 nfiles += 1
163 full_path = os.path.join(root, f)
164 full_path = os.path.normpath(full_path)
165 pending.append(full_path)
167 # If this directory had no object files, just print a default
168 # status line and continue with the next dir
169 if len(pending) == 0:
170 print_result_row(root, Result())
171 continue
173 context.npending += len(pending)
174 context.pending_objs.append((root, pending))
175 # Drain the tasks, `pool_size` at a time, until we have less than
176 # `pool_size` tasks remaining.
177 process_pending_files(pool, pool_size, args.objdump, context)
179 assert(context.npending < pool_size);
180 process_one_chunk(pool, pool_size, args.objdump, context)
182 total = context.rcumulative
183 nfailed = len(total.errors)
184 nsuccess = total.nsymbols - nfailed
185 ncrashed = len(total.crashed)
187 if (nfailed > 0):
188 print("Failures:")
189 for m in sorted(total.errors):
190 print(" " + m)
191 if (ncrashed > 0):
192 print("Crashes:")
193 for f in sorted(total.crashed):
194 print(" " + f)
195 print("Summary:")
196 spct = float(nsuccess)/float(total.nsymbols)
197 fpct = float(nfailed)/float(total.nsymbols)
198 cpct = float(ncrashed)/float(nfiles)
199 print("Processed {0} object files.".format(nfiles))
200 print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
201 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
202 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
204 except:
205 traceback.print_exc()
207 pool.close()
208 pool.join()
210 if __name__ == "__main__":
211 def_obj = 'obj' if sys.platform == 'win32' else 'o'
213 parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
214 parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
215 parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
216 'the tool is located as if by `which llvm-objdump`.')
217 parser.add_argument('--extensions', type=str, default=def_obj,
218 help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
219 'By default this will be `obj` on Windows and `o` otherwise.')
221 args = parser.parse_args()
224 multiprocessing.freeze_support()
225 go()