Supervised user whitelists: Cleanup
[chromium-blink-merge.git] / tools / clang / blink_gc_plugin / process-graph.py
blobb4fb1e64116c96e2b817cadf04bc33b1a9158c68
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 import argparse, os, sys, json, subprocess, pickle, StringIO
8 parser = argparse.ArgumentParser(
9 description =
10 "Process the Blink points-to graph generated by the Blink GC plugin.")
12 parser.add_argument(
13 '-', dest='use_stdin', action='store_true',
14 help='Read JSON graph files from stdin')
16 parser.add_argument(
17 '-c', '--detect-cycles', action='store_true',
18 help='Detect cycles containing GC roots')
20 parser.add_argument(
21 '-s', '--print-stats', action='store_true',
22 help='Statistics about ref-counted and traced objects')
24 parser.add_argument(
25 '-v', '--verbose', action='store_true',
26 help='Verbose output')
28 parser.add_argument(
29 '--ignore-cycles', default=None, metavar='FILE',
30 help='File with cycles to ignore')
32 parser.add_argument(
33 '--ignore-classes', nargs='*', default=[], metavar='CLASS',
34 help='Classes to ignore when detecting cycles')
36 parser.add_argument(
37 '--pickle-graph', default=None, metavar='FILE',
38 help='File to read/save the graph from/to')
40 parser.add_argument(
41 'files', metavar='FILE_OR_DIR', nargs='*', default=[],
42 help='JSON graph files or directories containing them')
44 # Command line args after parsing.
45 args = None
47 # Map from node labels to nodes.
48 graph = {}
50 # Set of root nodes.
51 roots = []
53 # List of cycles to ignore.
54 ignored_cycles = []
56 # Global flag to determine exit code.
57 global_reported_error = False
59 def set_reported_error(value):
60 global global_reported_error
61 global_reported_error = value
63 def reported_error():
64 return global_reported_error
66 def log(msg):
67 if args.verbose:
68 print msg
70 global_inc_copy = 0
71 def inc_copy():
72 global global_inc_copy
73 global_inc_copy += 1
75 def get_node(name):
76 return graph.setdefault(name, Node(name))
78 ptr_types = ('raw', 'ref', 'mem')
80 def inc_ptr(dst, ptr):
81 if ptr in ptr_types:
82 node = graph.get(dst)
83 if not node: return
84 node.counts[ptr] += 1
86 def add_counts(s1, s2):
87 for (k, v) in s2.iteritems():
88 s1[k] += s2[k]
90 # Representation of graph nodes. Basically a map of directed edges.
91 class Node:
92 def __init__(self, name):
93 self.name = name
94 self.edges = {}
95 self.reset()
96 def __repr__(self):
97 return "%s(%s) %s" % (self.name, self.visited, self.edges)
98 def update_node(self, decl):
99 # Currently we don't track any node info besides its edges.
100 pass
101 def update_edge(self, e):
102 new_edge = Edge(**e)
103 edge = self.edges.get(new_edge.key)
104 if edge:
105 # If an edge exist, its kind is the strongest of the two.
106 edge.kind = max(edge.kind, new_edge.kind)
107 else:
108 self.edges[new_edge.key] = new_edge
109 def super_edges(self):
110 return [ e for e in self.edges.itervalues() if e.is_super() ]
111 def subclass_edges(self):
112 return [ e for e in self.edges.itervalues() if e.is_subclass() ]
113 def reset(self):
114 self.cost = sys.maxint
115 self.visited = False
116 self.path = None
117 self.counts = {}
118 for ptr in ptr_types:
119 self.counts[ptr] = 0
120 def update_counts(self):
121 for e in self.edges.itervalues():
122 inc_ptr(e.dst, e.ptr)
124 # Representation of directed graph edges.
125 class Edge:
126 def __init__(self, **decl):
127 self.src = decl['src']
128 self.dst = decl['dst']
129 self.lbl = decl['lbl']
130 self.ptr = decl['ptr']
131 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
132 self.loc = decl['loc']
133 # The label does not uniquely determine an edge from a node. We
134 # define the semi-unique key to be the concatenation of the
135 # label and dst name. This is sufficient to track the strongest
136 # edge to a particular type. For example, if the field A::m_f
137 # has type HashMap<WeakMember<B>, Member<B>> we will have a
138 # strong edge with key m_f#B from A to B.
139 self.key = '%s#%s' % (self.lbl, self.dst)
140 def __repr__(self):
141 return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
142 def is_root(self):
143 return self.kind == 2
144 def is_weak(self):
145 return self.kind == 0
146 def keeps_alive(self):
147 return self.kind > 0
148 def is_subclass(self):
149 return self.lbl.startswith('<subclass>')
150 def is_super(self):
151 return self.lbl.startswith('<super>')
153 def parse_file(filename):
154 obj = json.load(open(filename))
155 return obj
157 def build_graphs_in_dir(dirname):
158 # TODO: Use plateform independent code, eg, os.walk
159 files = subprocess.check_output(
160 ['find', dirname, '-name', '*.graph.json']).split('\n')
161 log("Found %d files" % len(files))
162 for f in files:
163 f.strip()
164 if len(f) < 1:
165 continue
166 build_graph(f)
168 def build_graph(filename):
169 for decl in parse_file(filename):
170 if decl.has_key('name'):
171 # Add/update a node entry
172 name = decl['name']
173 node = get_node(name)
174 node.update_node(decl)
175 else:
176 # Add/update an edge entry
177 name = decl['src']
178 node = get_node(name)
179 node.update_edge(decl)
181 # Copy all non-weak edges from super classes to their subclasses.
182 # This causes all fields of a super to be considered fields of a
183 # derived class without tranitively relating derived classes with
184 # each other. For example, if B <: A, C <: A, and for some D, D => B,
185 # we don't want that to entail that D => C.
186 def copy_super_edges(edge):
187 if edge.is_weak() or not edge.is_super():
188 return
189 inc_copy()
190 # Make the super-class edge weak (prohibits processing twice).
191 edge.kind = 0
192 # If the super class is not in our graph exit early.
193 super_node = graph.get(edge.dst)
194 if super_node is None: return
195 # Recursively copy all super-class edges.
196 for e in super_node.super_edges():
197 copy_super_edges(e)
198 # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
199 sub_node = graph[edge.src]
200 for e in super_node.edges.itervalues():
201 if e.keeps_alive() and not e.is_subclass():
202 new_edge = Edge(
203 src = sub_node.name,
204 dst = e.dst,
205 lbl = '%s <: %s' % (super_node.name, e.lbl),
206 ptr = e.ptr,
207 kind = e.kind,
208 loc = e.loc,
210 sub_node.edges[new_edge.key] = new_edge
211 # Add a strong sub-class edge.
212 sub_edge = Edge(
213 src = super_node.name,
214 dst = sub_node.name,
215 lbl = '<subclass>',
216 ptr = edge.ptr,
217 kind = 1,
218 loc = edge.loc,
220 super_node.edges[sub_edge.key] = sub_edge
222 def complete_graph():
223 for node in graph.itervalues():
224 for edge in node.super_edges():
225 copy_super_edges(edge)
226 for edge in node.edges.itervalues():
227 if edge.is_root():
228 roots.append(edge)
229 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)
231 def reset_graph():
232 for n in graph.itervalues():
233 n.reset()
235 def shortest_path(start, end):
236 start.cost = 0
237 minlist = [start]
238 while len(minlist) > 0:
239 minlist.sort(key=lambda n: -n.cost)
240 current = minlist.pop()
241 current.visited = True
242 if current == end or current.cost >= end.cost + 1:
243 return
244 for e in current.edges.itervalues():
245 if not e.keeps_alive():
246 continue
247 dst = graph.get(e.dst)
248 if dst is None or dst.visited:
249 continue
250 if current.cost < dst.cost:
251 dst.cost = current.cost + 1
252 dst.path = e
253 minlist.append(dst)
255 def detect_cycles():
256 for root_edge in roots:
257 reset_graph()
258 # Mark ignored classes as already visited
259 for ignore in args.ignore_classes:
260 name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)
261 node = graph.get(name)
262 if node:
263 node.visited = True
264 src = graph[root_edge.src]
265 dst = graph.get(root_edge.dst)
266 if src.visited:
267 continue
268 if root_edge.dst == "WTF::String":
269 continue
270 if dst is None:
271 print "\nPersistent root to incomplete destination object:"
272 print root_edge
273 set_reported_error(True)
274 continue
275 # Find the shortest path from the root target (dst) to its host (src)
276 shortest_path(dst, src)
277 if src.cost < sys.maxint:
278 report_cycle(root_edge)
280 def is_ignored_cycle(cycle):
281 for block in ignored_cycles:
282 if block_match(cycle, block):
283 return True
285 def block_match(b1, b2):
286 if len(b1) != len(b2):
287 return False
288 for (l1, l2) in zip(b1, b2):
289 if l1 != l2:
290 return False
291 return True
293 def report_cycle(root_edge):
294 dst = graph[root_edge.dst]
295 path = []
296 edge = root_edge
297 dst.path = None
298 while edge:
299 path.append(edge)
300 edge = graph[edge.src].path
301 path.append(root_edge)
302 path.reverse()
303 # Find the max loc length for pretty printing.
304 max_loc = 0
305 for p in path:
306 if len(p.loc) > max_loc:
307 max_loc = len(p.loc)
308 out = StringIO.StringIO()
309 for p in path[:-1]:
310 print >>out, (p.loc + ':').ljust(max_loc + 1), p
311 sout = out.getvalue()
312 if not is_ignored_cycle(sout):
313 print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
314 set_reported_error(True)
316 def load_graph():
317 global graph
318 global roots
319 log("Reading graph from pickled file: " + args.pickle_graph)
320 dump = pickle.load(open(args.pickle_graph, 'rb'))
321 graph = dump[0]
322 roots = dump[1]
324 def save_graph():
325 log("Saving graph to pickle file: " + args.pickle_graph)
326 dump = (graph, roots)
327 pickle.dump(dump, open(args.pickle_graph, 'wb'))
329 def read_ignored_cycles():
330 global ignored_cycles
331 if not args.ignore_cycles:
332 return
333 log("Reading ignored cycles from file: " + args.ignore_cycles)
334 block = []
335 for l in open(args.ignore_cycles):
336 line = l.strip()
337 if not line or line.startswith('Found'):
338 if len(block) > 0:
339 ignored_cycles.append(block)
340 block = []
341 else:
342 block += l
343 if len(block) > 0:
344 ignored_cycles.append(block)
346 gc_bases = (
347 'blink::GarbageCollected',
348 'blink::GarbageCollectedFinalized',
349 'blink::GarbageCollectedMixin',
351 ref_bases = (
352 'WTF::RefCounted',
353 'WTF::ThreadSafeRefCounted',
355 gcref_bases = (
356 'blink::RefCountedGarbageCollected',
357 'blink::ThreadSafeRefCountedGarbageCollected',
359 ref_mixins = (
360 'blink::EventTarget',
361 'blink::EventTargetWithInlineData',
362 'blink::ActiveDOMObject',
365 def print_stats():
366 gcref_managed = []
367 ref_managed = []
368 gc_managed = []
369 hierarchies = []
371 for node in graph.itervalues():
372 node.update_counts()
373 for sup in node.super_edges():
374 if sup.dst in gcref_bases:
375 gcref_managed.append(node)
376 elif sup.dst in ref_bases:
377 ref_managed.append(node)
378 elif sup.dst in gc_bases:
379 gc_managed.append(node)
381 groups = [("GC manged ", gc_managed),
382 ("ref counted ", ref_managed),
383 ("in transition", gcref_managed)]
384 total = sum([len(g) for (s,g) in groups])
385 for (s, g) in groups:
386 percent = len(g) * 100 / total
387 print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g))
389 for base in gcref_managed:
390 stats = dict({ 'classes': 0, 'ref-mixins': 0 })
391 for ptr in ptr_types: stats[ptr] = 0
392 hierarchy_stats(base, stats)
393 hierarchies.append((base, stats))
395 print "\nHierarchies in transition (RefCountedGarbageCollected):"
396 hierarchies.sort(key=lambda (n,s): -s['classes'])
397 for (node, stats) in hierarchies:
398 total = stats['mem'] + stats['ref'] + stats['raw']
399 print (
400 "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %
401 (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",
402 total == 0 and 100 or stats['mem'] * 100 / total,
403 node.name.replace('blink::', ''),
404 stats['classes'],
405 stats['mem'],
406 stats['ref'],
407 stats['raw'],
408 stats['ref-mixins'],
411 def hierarchy_stats(node, stats):
412 if not node: return
413 stats['classes'] += 1
414 add_counts(stats, node.counts)
415 for edge in node.super_edges():
416 if edge.dst in ref_mixins:
417 stats['ref-mixins'] += 1
418 for edge in node.subclass_edges():
419 hierarchy_stats(graph.get(edge.dst), stats)
421 def main():
422 global args
423 args = parser.parse_args()
424 if not (args.detect_cycles or args.print_stats):
425 print "Please select an operation to perform (eg, -c to detect cycles)"
426 parser.print_help()
427 return 1
428 if args.pickle_graph and os.path.isfile(args.pickle_graph):
429 load_graph()
430 else:
431 if args.use_stdin:
432 log("Reading files from stdin")
433 for f in sys.stdin:
434 build_graph(f.strip())
435 else:
436 log("Reading files and directories from command line")
437 if len(args.files) == 0:
438 print "Please provide files or directores for building the graph"
439 parser.print_help()
440 return 1
441 for f in args.files:
442 if os.path.isdir(f):
443 log("Building graph from files in directory: " + f)
444 build_graphs_in_dir(f)
445 else:
446 log("Building graph from file: " + f)
447 build_graph(f)
448 log("Completing graph construction (%d graph nodes)" % len(graph))
449 complete_graph()
450 if args.pickle_graph:
451 save_graph()
452 if args.detect_cycles:
453 read_ignored_cycles()
454 log("Detecting cycles containg GC roots")
455 detect_cycles()
456 if args.print_stats:
457 log("Printing statistics")
458 print_stats()
459 if reported_error():
460 return 1
461 return 0
463 if __name__ == '__main__':
464 sys.exit(main())