2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 import argparse
, os
, sys
, json
, subprocess
, pickle
, StringIO
8 parser
= argparse
.ArgumentParser(
10 "Process the Blink points-to graph generated by the Blink GC plugin.")
13 '-', dest
='use_stdin', action
='store_true',
14 help='Read JSON graph files from stdin')
17 '-c', '--detect-cycles', action
='store_true',
18 help='Detect cycles containing GC roots')
21 '-s', '--print-stats', action
='store_true',
22 help='Statistics about ref-counted and traced objects')
25 '-v', '--verbose', action
='store_true',
26 help='Verbose output')
29 '--ignore-cycles', default
=None, metavar
='FILE',
30 help='File with cycles to ignore')
33 '--ignore-classes', nargs
='*', default
=[], metavar
='CLASS',
34 help='Classes to ignore when detecting cycles')
37 '--pickle-graph', default
=None, metavar
='FILE',
38 help='File to read/save the graph from/to')
41 'files', metavar
='FILE_OR_DIR', nargs
='*', default
=[],
42 help='JSON graph files or directories containing them')
44 # Command line args after parsing.
47 # Map from node labels to nodes.
53 # List of cycles to ignore.
56 # Global flag to determine exit code.
57 global_reported_error
= False
59 def set_reported_error(value
):
60 global global_reported_error
61 global_reported_error
= value
64 return global_reported_error
72 global global_inc_copy
76 return graph
.setdefault(name
, Node(name
))
78 ptr_types
= ('raw', 'ref', 'mem')
80 def inc_ptr(dst
, ptr
):
86 def add_counts(s1
, s2
):
87 for (k
, v
) in s2
.iteritems():
90 # Representation of graph nodes. Basically a map of directed edges.
92 def __init__(self
, name
):
97 return "%s(%s) %s" % (self
.name
, self
.visited
, self
.edges
)
98 def update_node(self
, decl
):
99 # Currently we don't track any node info besides its edges.
101 def update_edge(self
, e
):
103 edge
= self
.edges
.get(new_edge
.key
)
105 # If an edge exist, its kind is the strongest of the two.
106 edge
.kind
= max(edge
.kind
, new_edge
.kind
)
108 self
.edges
[new_edge
.key
] = new_edge
109 def super_edges(self
):
110 return [ e
for e
in self
.edges
.itervalues() if e
.is_super() ]
111 def subclass_edges(self
):
112 return [ e
for e
in self
.edges
.itervalues() if e
.is_subclass() ]
114 self
.cost
= sys
.maxint
118 for ptr
in ptr_types
:
120 def update_counts(self
):
121 for e
in self
.edges
.itervalues():
122 inc_ptr(e
.dst
, e
.ptr
)
124 # Representation of directed graph edges.
126 def __init__(self
, **decl
):
127 self
.src
= decl
['src']
128 self
.dst
= decl
['dst']
129 self
.lbl
= decl
['lbl']
130 self
.ptr
= decl
['ptr']
131 self
.kind
= decl
['kind'] # 0 = weak, 1 = strong, 2 = root
132 self
.loc
= decl
['loc']
133 # The label does not uniquely determine an edge from a node. We
134 # define the semi-unique key to be the concatenation of the
135 # label and dst name. This is sufficient to track the strongest
136 # edge to a particular type. For example, if the field A::m_f
137 # has type HashMap<WeakMember<B>, Member<B>> we will have a
138 # strong edge with key m_f#B from A to B.
139 self
.key
= '%s#%s' % (self
.lbl
, self
.dst
)
141 return '%s (%s) => %s' % (self
.src
, self
.lbl
, self
.dst
)
143 return self
.kind
== 2
145 return self
.kind
== 0
146 def keeps_alive(self
):
148 def is_subclass(self
):
149 return self
.lbl
.startswith('<subclass>')
151 return self
.lbl
.startswith('<super>')
153 def parse_file(filename
):
154 obj
= json
.load(open(filename
))
157 def build_graphs_in_dir(dirname
):
158 # TODO: Use plateform independent code, eg, os.walk
159 files
= subprocess
.check_output(
160 ['find', dirname
, '-name', '*.graph.json']).split('\n')
161 log("Found %d files" % len(files
))
168 def build_graph(filename
):
169 for decl
in parse_file(filename
):
170 if decl
.has_key('name'):
171 # Add/update a node entry
173 node
= get_node(name
)
174 node
.update_node(decl
)
176 # Add/update an edge entry
178 node
= get_node(name
)
179 node
.update_edge(decl
)
181 # Copy all non-weak edges from super classes to their subclasses.
182 # This causes all fields of a super to be considered fields of a
183 # derived class without tranitively relating derived classes with
184 # each other. For example, if B <: A, C <: A, and for some D, D => B,
185 # we don't want that to entail that D => C.
186 def copy_super_edges(edge
):
187 if edge
.is_weak() or not edge
.is_super():
190 # Make the super-class edge weak (prohibits processing twice).
192 # If the super class is not in our graph exit early.
193 super_node
= graph
.get(edge
.dst
)
194 if super_node
is None: return
195 # Recursively copy all super-class edges.
196 for e
in super_node
.super_edges():
198 # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
199 sub_node
= graph
[edge
.src
]
200 for e
in super_node
.edges
.itervalues():
201 if e
.keeps_alive() and not e
.is_subclass():
205 lbl
= '%s <: %s' % (super_node
.name
, e
.lbl
),
210 sub_node
.edges
[new_edge
.key
] = new_edge
211 # Add a strong sub-class edge.
213 src
= super_node
.name
,
220 super_node
.edges
[sub_edge
.key
] = sub_edge
222 def complete_graph():
223 for node
in graph
.itervalues():
224 for edge
in node
.super_edges():
225 copy_super_edges(edge
)
226 for edge
in node
.edges
.itervalues():
229 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy
)
232 for n
in graph
.itervalues():
235 def shortest_path(start
, end
):
238 while len(minlist
) > 0:
239 minlist
.sort(key
=lambda n
: -n
.cost
)
240 current
= minlist
.pop()
241 current
.visited
= True
242 if current
== end
or current
.cost
>= end
.cost
+ 1:
244 for e
in current
.edges
.itervalues():
245 if not e
.keeps_alive():
247 dst
= graph
.get(e
.dst
)
248 if dst
is None or dst
.visited
:
250 if current
.cost
< dst
.cost
:
251 dst
.cost
= current
.cost
+ 1
256 for root_edge
in roots
:
258 # Mark ignored classes as already visited
259 for ignore
in args
.ignore_classes
:
260 name
= ignore
.find("::") > 0 and ignore
or ("blink::" + ignore
)
261 node
= graph
.get(name
)
264 src
= graph
[root_edge
.src
]
265 dst
= graph
.get(root_edge
.dst
)
268 if root_edge
.dst
== "WTF::String":
271 print "\nPersistent root to incomplete destination object:"
273 set_reported_error(True)
275 # Find the shortest path from the root target (dst) to its host (src)
276 shortest_path(dst
, src
)
277 if src
.cost
< sys
.maxint
:
278 report_cycle(root_edge
)
280 def is_ignored_cycle(cycle
):
281 for block
in ignored_cycles
:
282 if block_match(cycle
, block
):
285 def block_match(b1
, b2
):
286 if len(b1
) != len(b2
):
288 for (l1
, l2
) in zip(b1
, b2
):
293 def report_cycle(root_edge
):
294 dst
= graph
[root_edge
.dst
]
300 edge
= graph
[edge
.src
].path
301 path
.append(root_edge
)
303 # Find the max loc length for pretty printing.
306 if len(p
.loc
) > max_loc
:
308 out
= StringIO
.StringIO()
310 print >>out
, (p
.loc
+ ':').ljust(max_loc
+ 1), p
311 sout
= out
.getvalue()
312 if not is_ignored_cycle(sout
):
313 print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
314 set_reported_error(True)
319 log("Reading graph from pickled file: " + args
.pickle_graph
)
320 dump
= pickle
.load(open(args
.pickle_graph
, 'rb'))
325 log("Saving graph to pickle file: " + args
.pickle_graph
)
326 dump
= (graph
, roots
)
327 pickle
.dump(dump
, open(args
.pickle_graph
, 'wb'))
329 def read_ignored_cycles():
330 global ignored_cycles
331 if not args
.ignore_cycles
:
333 log("Reading ignored cycles from file: " + args
.ignore_cycles
)
335 for l
in open(args
.ignore_cycles
):
337 if not line
or line
.startswith('Found'):
339 ignored_cycles
.append(block
)
344 ignored_cycles
.append(block
)
347 'blink::GarbageCollected',
348 'blink::GarbageCollectedFinalized',
349 'blink::GarbageCollectedMixin',
353 'WTF::ThreadSafeRefCounted',
356 'blink::RefCountedGarbageCollected',
357 'blink::ThreadSafeRefCountedGarbageCollected',
360 'blink::EventTarget',
361 'blink::EventTargetWithInlineData',
362 'blink::ActiveDOMObject',
371 for node
in graph
.itervalues():
373 for sup
in node
.super_edges():
374 if sup
.dst
in gcref_bases
:
375 gcref_managed
.append(node
)
376 elif sup
.dst
in ref_bases
:
377 ref_managed
.append(node
)
378 elif sup
.dst
in gc_bases
:
379 gc_managed
.append(node
)
381 groups
= [("GC manged ", gc_managed
),
382 ("ref counted ", ref_managed
),
383 ("in transition", gcref_managed
)]
384 total
= sum([len(g
) for (s
,g
) in groups
])
385 for (s
, g
) in groups
:
386 percent
= len(g
) * 100 / total
387 print "%2d%% is %s (%d hierarchies)" % (percent
, s
, len(g
))
389 for base
in gcref_managed
:
390 stats
= dict({ 'classes': 0, 'ref-mixins': 0 })
391 for ptr
in ptr_types
: stats
[ptr
] = 0
392 hierarchy_stats(base
, stats
)
393 hierarchies
.append((base
, stats
))
395 print "\nHierarchies in transition (RefCountedGarbageCollected):"
396 hierarchies
.sort(key
=lambda (n
,s
): -s
['classes'])
397 for (node
, stats
) in hierarchies
:
398 total
= stats
['mem'] + stats
['ref'] + stats
['raw']
400 "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %
401 (stats
['ref'] == 0 and stats
['ref-mixins'] == 0 and "*" or " ",
402 total
== 0 and 100 or stats
['mem'] * 100 / total
,
403 node
.name
.replace('blink::', ''),
411 def hierarchy_stats(node
, stats
):
413 stats
['classes'] += 1
414 add_counts(stats
, node
.counts
)
415 for edge
in node
.super_edges():
416 if edge
.dst
in ref_mixins
:
417 stats
['ref-mixins'] += 1
418 for edge
in node
.subclass_edges():
419 hierarchy_stats(graph
.get(edge
.dst
), stats
)
423 args
= parser
.parse_args()
424 if not (args
.detect_cycles
or args
.print_stats
):
425 print "Please select an operation to perform (eg, -c to detect cycles)"
428 if args
.pickle_graph
and os
.path
.isfile(args
.pickle_graph
):
432 log("Reading files from stdin")
434 build_graph(f
.strip())
436 log("Reading files and directories from command line")
437 if len(args
.files
) == 0:
438 print "Please provide files or directores for building the graph"
443 log("Building graph from files in directory: " + f
)
444 build_graphs_in_dir(f
)
446 log("Building graph from file: " + f
)
448 log("Completing graph construction (%d graph nodes)" % len(graph
))
450 if args
.pickle_graph
:
452 if args
.detect_cycles
:
453 read_ignored_cycles()
454 log("Detecting cycles containg GC roots")
457 log("Printing statistics")
463 if __name__
== '__main__':