More unit tests
[codimension.git] / thirdparty / gprof2dot / gprof2dot.py
blob1cf3fc585dc7dfc982528f08b269a9ce2f6e28b4
1 #!/usr/bin/env python
3 # Copyright 2008-2009 Jose Fonseca
5 # This program is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 """Generate a dot graph from the output of several profilers."""
21 __author__ = "Jose Fonseca"
23 __version__ = "1.0"
26 import sys
27 import math
28 import os.path
29 import re
30 import textwrap
31 import optparse
32 import xml.parsers.expat
35 try:
36 # Debugging helper module
37 import debug
38 except ImportError:
39 pass
42 def times(x):
43 return "%dx" % (x,)
45 def percentage(p):
46 return "%.02f%%" % (p*100.0,)
48 def add(a, b):
49 return a + b
51 def equal(a, b):
52 if a == b:
53 return a
54 else:
55 return None
57 def fail(a, b):
58 assert False
61 tol = 2 ** -23
63 def ratio(numerator, denominator):
64 try:
65 ratio = float(numerator)/float(denominator)
66 except ZeroDivisionError:
67 # 0/0 is undefined, but 1.0 yields more useful results
68 return 1.0
69 if ratio < 0.0:
70 if ratio < -tol:
71 sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
72 return 0.0
73 if ratio > 1.0:
74 if ratio > 1.0 + tol:
75 sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
76 return 1.0
77 return ratio
80 class UndefinedEvent(Exception):
81 """Raised when attempting to get an event which is undefined."""
83 def __init__(self, event):
84 Exception.__init__(self)
85 self.event = event
87 def __str__(self):
88 return 'unspecified event %s' % self.event.name
91 class Event(object):
92 """Describe a kind of event, and its basic operations."""
94 def __init__(self, name, null, aggregator, formatter = str):
95 self.name = name
96 self._null = null
97 self._aggregator = aggregator
98 self._formatter = formatter
100 def __eq__(self, other):
101 return self is other
103 def __hash__(self):
104 return id(self)
106 def null(self):
107 return self._null
109 def aggregate(self, val1, val2):
110 """Aggregate two event values."""
111 assert val1 is not None
112 assert val2 is not None
113 return self._aggregator(val1, val2)
115 def format(self, val):
116 """Format an event value."""
117 assert val is not None
118 return self._formatter(val)
121 CALLS = Event("Calls", 0, add, times)
122 SAMPLES = Event("Samples", 0, add)
123 SAMPLES2 = Event("Samples", 0, add)
125 TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
126 TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
127 TOTAL_TIME = Event("Total time", 0.0, fail)
128 TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
131 class Object(object):
132 """Base class for all objects in profile which can store events."""
134 def __init__(self, events=None):
135 if events is None:
136 self.events = {}
137 else:
138 self.events = events
140 def __hash__(self):
141 return id(self)
143 def __eq__(self, other):
144 return self is other
146 def __contains__(self, event):
147 return event in self.events
149 def __getitem__(self, event):
150 try:
151 return self.events[event]
152 except KeyError:
153 raise UndefinedEvent(event)
155 def __setitem__(self, event, value):
156 if value is None:
157 if event in self.events:
158 del self.events[event]
159 else:
160 self.events[event] = value
163 class Call(Object):
164 """A call between functions.
166 There should be at most one call object for every pair of functions.
169 def __init__(self, callee_id):
170 Object.__init__(self)
171 self.callee_id = callee_id
172 self.ratio = None
173 self.weight = None
176 class Function(Object):
177 """A function."""
179 def __init__(self, id, name):
180 Object.__init__(self)
181 self.id = id
182 self.index = None
183 self.name = name
184 self.module = None
185 self.process = None
186 self.calls = {}
187 self.called = None
188 self.primitive_called = None
189 self.weight = None
190 self.cycle = None
192 def add_call(self, call):
193 if call.callee_id in self.calls:
194 sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
195 self.calls[call.callee_id] = call
197 def get_call(self, callee_id):
198 if not callee_id in self.calls:
199 call = Call(callee_id)
200 call[SAMPLES] = 0
201 call[SAMPLES2] = 0
202 call[CALLS] = 0
203 self.calls[callee_id] = call
204 return self.calls[callee_id]
206 _parenthesis_re = re.compile(r'\([^()]*\)')
207 _angles_re = re.compile(r'<[^<>]*>')
208 _const_re = re.compile(r'\s+const$')
210 def stripped_name(self):
211 """Remove extraneous information from C++ demangled function names."""
213 name = self.name
215 # Strip function parameters from name by recursively removing paired parenthesis
216 while True:
217 name, n = self._parenthesis_re.subn('', name)
218 if not n:
219 break
221 # Strip const qualifier
222 name = self._const_re.sub('', name)
224 # Strip template parameters from name by recursively removing paired angles
225 while True:
226 name, n = self._angles_re.subn('', name)
227 if not n:
228 break
230 return name
232 # TODO: write utility functions
234 def __repr__(self):
235 return self.name
238 class Cycle(Object):
239 """A cycle made from recursive function calls."""
241 def __init__(self):
242 Object.__init__(self)
243 # XXX: Do cycles need an id?
244 self.functions = set()
246 def add_function(self, function):
247 assert function not in self.functions
248 self.functions.add(function)
249 # XXX: Aggregate events?
250 if function.cycle is not None:
251 for other in function.cycle.functions:
252 if function not in self.functions:
253 self.add_function(other)
254 function.cycle = self
257 class Profile(Object):
258 """The whole profile."""
260 def __init__(self):
261 Object.__init__(self)
262 self.functions = {}
263 self.cycles = []
265 def add_function(self, function):
266 if function.id in self.functions:
267 sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
268 self.functions[function.id] = function
270 def add_cycle(self, cycle):
271 self.cycles.append(cycle)
273 def validate(self):
274 """Validate the edges."""
276 for function in self.functions.itervalues():
277 for callee_id in function.calls.keys():
278 assert function.calls[callee_id].callee_id == callee_id
279 if callee_id not in self.functions:
280 sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
281 del function.calls[callee_id]
283 def find_cycles(self):
284 """Find cycles using Tarjan's strongly connected components algorithm."""
286 # Apply the Tarjan's algorithm successively until all functions are visited
287 visited = set()
288 for function in self.functions.itervalues():
289 if function not in visited:
290 self._tarjan(function, 0, [], {}, {}, visited)
291 cycles = []
292 for function in self.functions.itervalues():
293 if function.cycle is not None and function.cycle not in cycles:
294 cycles.append(function.cycle)
295 self.cycles = cycles
296 if 0:
297 for cycle in cycles:
298 sys.stderr.write("Cycle:\n")
299 for member in cycle.functions:
300 sys.stderr.write("\tFunction %s\n" % member.name)
302 def _tarjan(self, function, order, stack, orders, lowlinks, visited):
303 """Tarjan's strongly connected components algorithm.
305 See also:
306 - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
309 visited.add(function)
310 orders[function] = order
311 lowlinks[function] = order
312 order += 1
313 pos = len(stack)
314 stack.append(function)
315 for call in function.calls.itervalues():
316 callee = self.functions[call.callee_id]
317 # TODO: use a set to optimize lookup
318 if callee not in orders:
319 order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
320 lowlinks[function] = min(lowlinks[function], lowlinks[callee])
321 elif callee in stack:
322 lowlinks[function] = min(lowlinks[function], orders[callee])
323 if lowlinks[function] == orders[function]:
324 # Strongly connected component found
325 members = stack[pos:]
326 del stack[pos:]
327 if len(members) > 1:
328 cycle = Cycle()
329 for member in members:
330 cycle.add_function(member)
331 return order
333 def call_ratios(self, event):
334 # Aggregate for incoming calls
335 cycle_totals = {}
336 for cycle in self.cycles:
337 cycle_totals[cycle] = 0.0
338 function_totals = {}
339 for function in self.functions.itervalues():
340 function_totals[function] = 0.0
341 for function in self.functions.itervalues():
342 for call in function.calls.itervalues():
343 if call.callee_id != function.id:
344 callee = self.functions[call.callee_id]
345 function_totals[callee] += call[event]
346 if callee.cycle is not None and callee.cycle is not function.cycle:
347 cycle_totals[callee.cycle] += call[event]
349 # Compute the ratios
350 for function in self.functions.itervalues():
351 for call in function.calls.itervalues():
352 assert call.ratio is None
353 if call.callee_id != function.id:
354 callee = self.functions[call.callee_id]
355 if callee.cycle is not None and callee.cycle is not function.cycle:
356 total = cycle_totals[callee.cycle]
357 else:
358 total = function_totals[callee]
359 call.ratio = ratio(call[event], total)
361 def integrate(self, outevent, inevent):
362 """Propagate function time ratio allong the function calls.
364 Must be called after finding the cycles.
366 See also:
367 - http://citeseer.ist.psu.edu/graham82gprof.html
370 # Sanity checking
371 assert outevent not in self
372 for function in self.functions.itervalues():
373 assert outevent not in function
374 assert inevent in function
375 for call in function.calls.itervalues():
376 assert outevent not in call
377 if call.callee_id != function.id:
378 assert call.ratio is not None
380 # Aggregate the input for each cycle
381 for cycle in self.cycles:
382 total = inevent.null()
383 for function in self.functions.itervalues():
384 total = inevent.aggregate(total, function[inevent])
385 self[inevent] = total
387 # Integrate along the edges
388 total = inevent.null()
389 for function in self.functions.itervalues():
390 total = inevent.aggregate(total, function[inevent])
391 self._integrate_function(function, outevent, inevent)
392 self[outevent] = total
394 def _integrate_function(self, function, outevent, inevent):
395 if function.cycle is not None:
396 return self._integrate_cycle(function.cycle, outevent, inevent)
397 else:
398 if outevent not in function:
399 total = function[inevent]
400 for call in function.calls.itervalues():
401 if call.callee_id != function.id:
402 total += self._integrate_call(call, outevent, inevent)
403 function[outevent] = total
404 return function[outevent]
406 def _integrate_call(self, call, outevent, inevent):
407 assert outevent not in call
408 assert call.ratio is not None
409 callee = self.functions[call.callee_id]
410 subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
411 call[outevent] = subtotal
412 return subtotal
414 def _integrate_cycle(self, cycle, outevent, inevent):
415 if outevent not in cycle:
417 # Compute the outevent for the whole cycle
418 total = inevent.null()
419 for member in cycle.functions:
420 subtotal = member[inevent]
421 for call in member.calls.itervalues():
422 callee = self.functions[call.callee_id]
423 if callee.cycle is not cycle:
424 subtotal += self._integrate_call(call, outevent, inevent)
425 total += subtotal
426 cycle[outevent] = total
428 # Compute the time propagated to callers of this cycle
429 callees = {}
430 for function in self.functions.itervalues():
431 if function.cycle is not cycle:
432 for call in function.calls.itervalues():
433 callee = self.functions[call.callee_id]
434 if callee.cycle is cycle:
435 try:
436 callees[callee] += call.ratio
437 except KeyError:
438 callees[callee] = call.ratio
440 for member in cycle.functions:
441 member[outevent] = outevent.null()
443 for callee, call_ratio in callees.iteritems():
444 ranks = {}
445 call_ratios = {}
446 partials = {}
447 self._rank_cycle_function(cycle, callee, 0, ranks)
448 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
449 partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
450 assert partial == max(partials.values())
451 assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001
453 return cycle[outevent]
455 def _rank_cycle_function(self, cycle, function, rank, ranks):
456 if function not in ranks or ranks[function] > rank:
457 ranks[function] = rank
458 for call in function.calls.itervalues():
459 if call.callee_id != function.id:
460 callee = self.functions[call.callee_id]
461 if callee.cycle is cycle:
462 self._rank_cycle_function(cycle, callee, rank + 1, ranks)
464 def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
465 if function not in visited:
466 visited.add(function)
467 for call in function.calls.itervalues():
468 if call.callee_id != function.id:
469 callee = self.functions[call.callee_id]
470 if callee.cycle is cycle:
471 if ranks[callee] > ranks[function]:
472 call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
473 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
475 def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
476 if function not in partials:
477 partial = partial_ratio*function[inevent]
478 for call in function.calls.itervalues():
479 if call.callee_id != function.id:
480 callee = self.functions[call.callee_id]
481 if callee.cycle is not cycle:
482 assert outevent in call
483 partial += partial_ratio*call[outevent]
484 else:
485 if ranks[callee] > ranks[function]:
486 callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
487 call_ratio = ratio(call.ratio, call_ratios[callee])
488 call_partial = call_ratio*callee_partial
489 try:
490 call[outevent] += call_partial
491 except UndefinedEvent:
492 call[outevent] = call_partial
493 partial += call_partial
494 partials[function] = partial
495 try:
496 function[outevent] += partial
497 except UndefinedEvent:
498 function[outevent] = partial
499 return partials[function]
501 def aggregate(self, event):
502 """Aggregate an event for the whole profile."""
504 total = event.null()
505 for function in self.functions.itervalues():
506 try:
507 total = event.aggregate(total, function[event])
508 except UndefinedEvent:
509 return
510 self[event] = total
512 def ratio(self, outevent, inevent):
513 assert outevent not in self
514 assert inevent in self
515 for function in self.functions.itervalues():
516 assert outevent not in function
517 assert inevent in function
518 function[outevent] = ratio(function[inevent], self[inevent])
519 for call in function.calls.itervalues():
520 assert outevent not in call
521 if inevent in call:
522 call[outevent] = ratio(call[inevent], self[inevent])
523 self[outevent] = 1.0
525 def prune(self, node_thres, edge_thres):
526 """Prune the profile"""
528 # compute the prune ratios
529 for function in self.functions.itervalues():
530 try:
531 function.weight = function[TOTAL_TIME_RATIO]
532 except UndefinedEvent:
533 pass
535 for call in function.calls.itervalues():
536 callee = self.functions[call.callee_id]
538 if TOTAL_TIME_RATIO in call:
539 # handle exact cases first
540 call.weight = call[TOTAL_TIME_RATIO]
541 else:
542 try:
543 # make a safe estimate
544 call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO])
545 except UndefinedEvent:
546 pass
548 # prune the nodes
549 for function_id in self.functions.keys():
550 function = self.functions[function_id]
551 if function.weight is not None:
552 if function.weight < node_thres:
553 del self.functions[function_id]
555 # prune the egdes
556 for function in self.functions.itervalues():
557 for callee_id in function.calls.keys():
558 call = function.calls[callee_id]
559 if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
560 del function.calls[callee_id]
562 def dump(self):
563 for function in self.functions.itervalues():
564 sys.stderr.write('Function %s:\n' % (function.name,))
565 self._dump_events(function.events)
566 for call in function.calls.itervalues():
567 callee = self.functions[call.callee_id]
568 sys.stderr.write(' Call %s:\n' % (callee.name,))
569 self._dump_events(call.events)
570 for cycle in self.cycles:
571 sys.stderr.write('Cycle:\n')
572 self._dump_events(cycle.events)
573 for function in cycle.functions:
574 sys.stderr.write(' Function %s\n' % (function.name,))
576 def _dump_events(self, events):
577 for event, value in events.iteritems():
578 sys.stderr.write(' %s: %s\n' % (event.name, event.format(value)))
581 class Struct:
582 """Masquerade a dictionary with a structure-like behavior."""
584 def __init__(self, attrs = None):
585 if attrs is None:
586 attrs = {}
587 self.__dict__['_attrs'] = attrs
589 def __getattr__(self, name):
590 try:
591 return self._attrs[name]
592 except KeyError:
593 raise AttributeError(name)
595 def __setattr__(self, name, value):
596 self._attrs[name] = value
598 def __str__(self):
599 return str(self._attrs)
601 def __repr__(self):
602 return repr(self._attrs)
605 class ParseError(Exception):
606 """Raised when parsing to signal mismatches."""
608 def __init__(self, msg, line):
609 self.msg = msg
610 # TODO: store more source line information
611 self.line = line
613 def __str__(self):
614 return '%s: %r' % (self.msg, self.line)
617 class Parser:
618 """Parser interface."""
620 def __init__(self):
621 pass
623 def parse(self):
624 raise NotImplementedError
627 class LineParser(Parser):
628 """Base class for parsers that read line-based formats."""
630 def __init__(self, file):
631 Parser.__init__(self)
632 self._file = file
633 self.__line = None
634 self.__eof = False
635 self.line_no = 0
637 def readline(self):
638 line = self._file.readline()
639 if not line:
640 self.__line = ''
641 self.__eof = True
642 else:
643 self.line_no += 1
644 self.__line = line.rstrip('\r\n')
646 def lookahead(self):
647 assert self.__line is not None
648 return self.__line
650 def consume(self):
651 assert self.__line is not None
652 line = self.__line
653 self.readline()
654 return line
656 def eof(self):
657 assert self.__line is not None
658 return self.__eof
661 XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
664 class XmlToken:
666 def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
667 assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
668 self.type = type
669 self.name_or_data = name_or_data
670 self.attrs = attrs
671 self.line = line
672 self.column = column
674 def __str__(self):
675 if self.type == XML_ELEMENT_START:
676 return '<' + self.name_or_data + ' ...>'
677 if self.type == XML_ELEMENT_END:
678 return '</' + self.name_or_data + '>'
679 if self.type == XML_CHARACTER_DATA:
680 return self.name_or_data
681 if self.type == XML_EOF:
682 return 'end of file'
683 assert 0
686 class XmlTokenizer:
687 """Expat based XML tokenizer."""
689 def __init__(self, fp, skip_ws = True):
690 self.fp = fp
691 self.tokens = []
692 self.index = 0
693 self.final = False
694 self.skip_ws = skip_ws
696 self.character_pos = 0, 0
697 self.character_data = ''
699 self.parser = xml.parsers.expat.ParserCreate()
700 self.parser.StartElementHandler = self.handle_element_start
701 self.parser.EndElementHandler = self.handle_element_end
702 self.parser.CharacterDataHandler = self.handle_character_data
704 def handle_element_start(self, name, attributes):
705 self.finish_character_data()
706 line, column = self.pos()
707 token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
708 self.tokens.append(token)
710 def handle_element_end(self, name):
711 self.finish_character_data()
712 line, column = self.pos()
713 token = XmlToken(XML_ELEMENT_END, name, None, line, column)
714 self.tokens.append(token)
716 def handle_character_data(self, data):
717 if not self.character_data:
718 self.character_pos = self.pos()
719 self.character_data += data
721 def finish_character_data(self):
722 if self.character_data:
723 if not self.skip_ws or not self.character_data.isspace():
724 line, column = self.character_pos
725 token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
726 self.tokens.append(token)
727 self.character_data = ''
729 def next(self):
730 size = 16*1024
731 while self.index >= len(self.tokens) and not self.final:
732 self.tokens = []
733 self.index = 0
734 data = self.fp.read(size)
735 self.final = len(data) < size
736 try:
737 self.parser.Parse(data, self.final)
738 except xml.parsers.expat.ExpatError, e:
739 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
740 if e.code == 3:
741 pass
742 else:
743 raise e
744 if self.index >= len(self.tokens):
745 line, column = self.pos()
746 token = XmlToken(XML_EOF, None, None, line, column)
747 else:
748 token = self.tokens[self.index]
749 self.index += 1
750 return token
752 def pos(self):
753 return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
756 class XmlTokenMismatch(Exception):
758 def __init__(self, expected, found):
759 self.expected = expected
760 self.found = found
762 def __str__(self):
763 return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
766 class XmlParser(Parser):
767 """Base XML document parser."""
769 def __init__(self, fp):
770 Parser.__init__(self)
771 self.tokenizer = XmlTokenizer(fp)
772 self.consume()
774 def consume(self):
775 self.token = self.tokenizer.next()
777 def match_element_start(self, name):
778 return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
780 def match_element_end(self, name):
781 return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
783 def element_start(self, name):
784 while self.token.type == XML_CHARACTER_DATA:
785 self.consume()
786 if self.token.type != XML_ELEMENT_START:
787 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
788 if self.token.name_or_data != name:
789 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
790 attrs = self.token.attrs
791 self.consume()
792 return attrs
794 def element_end(self, name):
795 while self.token.type == XML_CHARACTER_DATA:
796 self.consume()
797 if self.token.type != XML_ELEMENT_END:
798 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
799 if self.token.name_or_data != name:
800 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
801 self.consume()
803 def character_data(self, strip = True):
804 data = ''
805 while self.token.type == XML_CHARACTER_DATA:
806 data += self.token.name_or_data
807 self.consume()
808 if strip:
809 data = data.strip()
810 return data
813 class GprofParser(Parser):
814 """Parser for GNU gprof output.
816 See also:
817 - Chapter "Interpreting gprof's Output" from the GNU gprof manual
818 http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
819 - File "cg_print.c" from the GNU gprof source code
820 http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
823 def __init__(self, fp):
824 Parser.__init__(self)
825 self.fp = fp
826 self.functions = {}
827 self.cycles = {}
829 def readline(self):
830 line = self.fp.readline()
831 if not line:
832 sys.stderr.write('error: unexpected end of file\n')
833 sys.exit(1)
834 line = line.rstrip('\r\n')
835 return line
837 _int_re = re.compile(r'^\d+$')
838 _float_re = re.compile(r'^\d+\.\d+$')
840 def translate(self, mo):
841 """Extract a structure from a match object, while translating the types in the process."""
842 attrs = {}
843 groupdict = mo.groupdict()
844 for name, value in groupdict.iteritems():
845 if value is None:
846 value = None
847 elif self._int_re.match(value):
848 value = int(value)
849 elif self._float_re.match(value):
850 value = float(value)
851 attrs[name] = (value)
852 return Struct(attrs)
854 _cg_header_re = re.compile(
855 # original gprof header
856 r'^\s+called/total\s+parents\s*$|' +
857 r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
858 r'^\s+called/total\s+children\s*$|' +
859 # GNU gprof header
860 r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
863 _cg_ignore_re = re.compile(
864 # spontaneous
865 r'^\s+<spontaneous>\s*$|'
866 # internal calls (such as "mcount")
867 r'^.*\((\d+)\)$'
870 _cg_primary_re = re.compile(
871 r'^\[(?P<index>\d+)\]?' +
872 r'\s+(?P<percentage_time>\d+\.\d+)' +
873 r'\s+(?P<self>\d+\.\d+)' +
874 r'\s+(?P<descendants>\d+\.\d+)' +
875 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
876 r'\s+(?P<name>\S.*?)' +
877 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
878 r'\s\[(\d+)\]$'
881 _cg_parent_re = re.compile(
882 r'^\s+(?P<self>\d+\.\d+)?' +
883 r'\s+(?P<descendants>\d+\.\d+)?' +
884 r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' +
885 r'\s+(?P<name>\S.*?)' +
886 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
887 r'\s\[(?P<index>\d+)\]$'
890 _cg_child_re = _cg_parent_re
892 _cg_cycle_header_re = re.compile(
893 r'^\[(?P<index>\d+)\]?' +
894 r'\s+(?P<percentage_time>\d+\.\d+)' +
895 r'\s+(?P<self>\d+\.\d+)' +
896 r'\s+(?P<descendants>\d+\.\d+)' +
897 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
898 r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
899 r'\s\[(\d+)\]$'
902 _cg_cycle_member_re = re.compile(
903 r'^\s+(?P<self>\d+\.\d+)?' +
904 r'\s+(?P<descendants>\d+\.\d+)?' +
905 r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' +
906 r'\s+(?P<name>\S.*?)' +
907 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
908 r'\s\[(?P<index>\d+)\]$'
911 _cg_sep_re = re.compile(r'^--+$')
913 def parse_function_entry(self, lines):
914 parents = []
915 children = []
917 while True:
918 if not lines:
919 sys.stderr.write('warning: unexpected end of entry\n')
920 line = lines.pop(0)
921 if line.startswith('['):
922 break
924 # read function parent line
925 mo = self._cg_parent_re.match(line)
926 if not mo:
927 if self._cg_ignore_re.match(line):
928 continue
929 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
930 else:
931 parent = self.translate(mo)
932 parents.append(parent)
934 # read primary line
935 mo = self._cg_primary_re.match(line)
936 if not mo:
937 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
938 return
939 else:
940 function = self.translate(mo)
942 while lines:
943 line = lines.pop(0)
945 # read function subroutine line
946 mo = self._cg_child_re.match(line)
947 if not mo:
948 if self._cg_ignore_re.match(line):
949 continue
950 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
951 else:
952 child = self.translate(mo)
953 children.append(child)
955 function.parents = parents
956 function.children = children
958 self.functions[function.index] = function
960 def parse_cycle_entry(self, lines):
962 # read cycle header line
963 line = lines[0]
964 mo = self._cg_cycle_header_re.match(line)
965 if not mo:
966 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
967 return
968 cycle = self.translate(mo)
970 # read cycle member lines
971 cycle.functions = []
972 for line in lines[1:]:
973 mo = self._cg_cycle_member_re.match(line)
974 if not mo:
975 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
976 continue
977 call = self.translate(mo)
978 cycle.functions.append(call)
980 self.cycles[cycle.cycle] = cycle
982 def parse_cg_entry(self, lines):
983 if lines[0].startswith("["):
984 self.parse_cycle_entry(lines)
985 else:
986 self.parse_function_entry(lines)
988 def parse_cg(self):
989 """Parse the call graph."""
991 # skip call graph header
992 while not self._cg_header_re.match(self.readline()):
993 pass
994 line = self.readline()
995 while self._cg_header_re.match(line):
996 line = self.readline()
998 # process call graph entries
999 entry_lines = []
1000 while line != '\014': # form feed
1001 if line and not line.isspace():
1002 if self._cg_sep_re.match(line):
1003 self.parse_cg_entry(entry_lines)
1004 entry_lines = []
1005 else:
1006 entry_lines.append(line)
1007 line = self.readline()
1009 def parse(self):
1010 self.parse_cg()
1011 self.fp.close()
1013 profile = Profile()
1014 profile[TIME] = 0.0
1016 cycles = {}
1017 for index in self.cycles.iterkeys():
1018 cycles[index] = Cycle()
1020 for entry in self.functions.itervalues():
1021 # populate the function
1022 function = Function(entry.index, entry.name)
1023 function[TIME] = entry.self
1024 if entry.called is not None:
1025 function.called = entry.called
1026 if entry.called_self is not None:
1027 call = Call(entry.index)
1028 call[CALLS] = entry.called_self
1029 function.called += entry.called_self
1031 # populate the function calls
1032 for child in entry.children:
1033 call = Call(child.index)
1035 assert child.called is not None
1036 call[CALLS] = child.called
1038 if child.index not in self.functions:
1039 # NOTE: functions that were never called but were discovered by gprof's
1040 # static call graph analysis dont have a call graph entry so we need
1041 # to add them here
1042 missing = Function(child.index, child.name)
1043 function[TIME] = 0.0
1044 function.called = 0
1045 profile.add_function(missing)
1047 function.add_call(call)
1049 profile.add_function(function)
1051 if entry.cycle is not None:
1052 try:
1053 cycle = cycles[entry.cycle]
1054 except KeyError:
1055 sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
1056 cycle = Cycle()
1057 cycles[entry.cycle] = cycle
1058 cycle.add_function(function)
1060 profile[TIME] = profile[TIME] + function[TIME]
1062 for cycle in cycles.itervalues():
1063 profile.add_cycle(cycle)
1065 # Compute derived events
1066 profile.validate()
1067 profile.ratio(TIME_RATIO, TIME)
1068 profile.call_ratios(CALLS)
1069 profile.integrate(TOTAL_TIME, TIME)
1070 profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
1072 return profile
1075 class CallgrindParser(LineParser):
1076 """Parser for valgrind's callgrind tool.
1078 See also:
1079 - http://valgrind.org/docs/manual/cl-format.html
1082 _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
1084 def __init__(self, infile):
1085 LineParser.__init__(self, infile)
1087 # Textual positions
1088 self.position_ids = {}
1089 self.positions = {}
1091 # Numeric positions
1092 self.num_positions = 1
1093 self.cost_positions = ['line']
1094 self.last_positions = [0]
1096 # Events
1097 self.num_events = 0
1098 self.cost_events = []
1100 self.profile = Profile()
1101 self.profile[SAMPLES] = 0
1103 def parse(self):
1104 # read lookahead
1105 self.readline()
1107 self.parse_key('version')
1108 self.parse_key('creator')
1109 while self.parse_part():
1110 pass
1111 if not self.eof():
1112 sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no)
1113 sys.stderr.write('%s\n' % self.lookahead())
1115 # compute derived data
1116 self.profile.validate()
1117 self.profile.find_cycles()
1118 self.profile.ratio(TIME_RATIO, SAMPLES)
1119 self.profile.call_ratios(CALLS)
1120 self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1122 return self.profile
1124 def parse_part(self):
1125 if not self.parse_header_line():
1126 return False
1127 while self.parse_header_line():
1128 pass
1129 if not self.parse_body_line():
1130 return False
1131 while self.parse_body_line():
1132 pass
1133 return True
1135 def parse_header_line(self):
1136 return \
1137 self.parse_empty() or \
1138 self.parse_comment() or \
1139 self.parse_part_detail() or \
1140 self.parse_description() or \
1141 self.parse_event_specification() or \
1142 self.parse_cost_line_def() or \
1143 self.parse_cost_summary()
1145 _detail_keys = set(('cmd', 'pid', 'thread', 'part'))
1147 def parse_part_detail(self):
1148 return self.parse_keys(self._detail_keys)
1150 def parse_description(self):
1151 return self.parse_key('desc') is not None
1153 def parse_event_specification(self):
1154 event = self.parse_key('event')
1155 if event is None:
1156 return False
1157 return True
1159 def parse_cost_line_def(self):
1160 pair = self.parse_keys(('events', 'positions'))
1161 if pair is None:
1162 return False
1163 key, value = pair
1164 items = value.split()
1165 if key == 'events':
1166 self.num_events = len(items)
1167 self.cost_events = items
1168 if key == 'positions':
1169 self.num_positions = len(items)
1170 self.cost_positions = items
1171 self.last_positions = [0]*self.num_positions
1172 return True
1174 def parse_cost_summary(self):
1175 pair = self.parse_keys(('summary', 'totals'))
1176 if pair is None:
1177 return False
1178 return True
1180 def parse_body_line(self):
1181 return \
1182 self.parse_empty() or \
1183 self.parse_comment() or \
1184 self.parse_cost_line() or \
1185 self.parse_position_spec() or \
1186 self.parse_association_spec()
1188 __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)'
1189 _cost_re = re.compile(r'^' +
1190 __subpos_re + r'( +' + __subpos_re + r')*' +
1191 r'( +\d+)*' +
1192 '$')
1194 def parse_cost_line(self, calls=None):
1195 line = self.lookahead().rstrip()
1196 mo = self._cost_re.match(line)
1197 if not mo:
1198 return False
1200 function = self.get_function()
1202 if calls is None:
1203 # Unlike other aspects, call object (cob) is relative not to the
1204 # last call object, but to the caller's object (ob), so try to
1205 # update it when processing a functions cost line
1206 try:
1207 self.positions['cob'] = self.positions['ob']
1208 except KeyError:
1209 pass
1211 values = line.split()
1212 assert len(values) <= self.num_positions + self.num_events
1214 positions = values[0 : self.num_positions]
1215 events = values[self.num_positions : ]
1216 events += ['0']*(self.num_events - len(events))
1218 for i in range(self.num_positions):
1219 position = positions[i]
1220 if position == '*':
1221 position = self.last_positions[i]
1222 elif position[0] in '-+':
1223 position = self.last_positions[i] + int(position)
1224 elif position.startswith('0x'):
1225 position = int(position, 16)
1226 else:
1227 position = int(position)
1228 self.last_positions[i] = position
1230 events = map(float, events)
1232 if calls is None:
1233 function[SAMPLES] += events[0]
1234 self.profile[SAMPLES] += events[0]
1235 else:
1236 callee = self.get_callee()
1237 callee.called += calls
1239 try:
1240 call = function.calls[callee.id]
1241 except KeyError:
1242 call = Call(callee.id)
1243 call[CALLS] = calls
1244 call[SAMPLES] = events[0]
1245 function.add_call(call)
1246 else:
1247 call[CALLS] += calls
1248 call[SAMPLES] += events[0]
1250 self.consume()
1251 return True
1253 def parse_association_spec(self):
1254 line = self.lookahead()
1255 if not line.startswith('calls='):
1256 return False
1258 _, values = line.split('=', 1)
1259 values = values.strip().split()
1260 calls = int(values[0])
1261 call_position = values[1:]
1262 self.consume()
1264 self.parse_cost_line(calls)
1266 return True
1268 _position_re = re.compile('^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?')
1270 _position_table_map = {
1271 'ob': 'ob',
1272 'fl': 'fl',
1273 'fi': 'fl',
1274 'fe': 'fl',
1275 'fn': 'fn',
1276 'cob': 'ob',
1277 'cfl': 'fl',
1278 'cfi': 'fl',
1279 'cfe': 'fl',
1280 'cfn': 'fn',
1281 'jfi': 'fl',
1284 _position_map = {
1285 'ob': 'ob',
1286 'fl': 'fl',
1287 'fi': 'fl',
1288 'fe': 'fl',
1289 'fn': 'fn',
1290 'cob': 'cob',
1291 'cfl': 'cfl',
1292 'cfi': 'cfl',
1293 'cfe': 'cfl',
1294 'cfn': 'cfn',
1295 'jfi': 'jfi',
1298 def parse_position_spec(self):
1299 line = self.lookahead()
1301 if line.startswith('jump=') or line.startswith('jcnd='):
1302 self.consume()
1303 return True
1305 mo = self._position_re.match(line)
1306 if not mo:
1307 return False
1309 position, id, name = mo.groups()
1310 if id:
1311 table = self._position_table_map[position]
1312 if name:
1313 self.position_ids[(table, id)] = name
1314 else:
1315 name = self.position_ids.get((table, id), '')
1316 self.positions[self._position_map[position]] = name
1318 self.consume()
1319 return True
1321 def parse_empty(self):
1322 if self.eof():
1323 return False
1324 line = self.lookahead()
1325 if line.strip():
1326 return False
1327 self.consume()
1328 return True
1330 def parse_comment(self):
1331 line = self.lookahead()
1332 if not line.startswith('#'):
1333 return False
1334 self.consume()
1335 return True
1337 _key_re = re.compile(r'^(\w+):')
1339 def parse_key(self, key):
1340 pair = self.parse_keys((key,))
1341 if not pair:
1342 return None
1343 key, value = pair
1344 return value
1345 line = self.lookahead()
1346 mo = self._key_re.match(line)
1347 if not mo:
1348 return None
1349 key, value = line.split(':', 1)
1350 if key not in keys:
1351 return None
1352 value = value.strip()
1353 self.consume()
1354 return key, value
1356 def parse_keys(self, keys):
1357 line = self.lookahead()
1358 mo = self._key_re.match(line)
1359 if not mo:
1360 return None
1361 key, value = line.split(':', 1)
1362 if key not in keys:
1363 return None
1364 value = value.strip()
1365 self.consume()
1366 return key, value
1368 def make_function(self, module, filename, name):
1369 # FIXME: module and filename are not being tracked reliably
1370 #id = '|'.join((module, filename, name))
1371 id = name
1372 try:
1373 function = self.profile.functions[id]
1374 except KeyError:
1375 function = Function(id, name)
1376 if module:
1377 function.module = os.path.basename(module)
1378 function[SAMPLES] = 0
1379 function.called = 0
1380 self.profile.add_function(function)
1381 return function
1383 def get_function(self):
1384 module = self.positions.get('ob', '')
1385 filename = self.positions.get('fl', '')
1386 function = self.positions.get('fn', '')
1387 return self.make_function(module, filename, function)
1389 def get_callee(self):
1390 module = self.positions.get('cob', '')
1391 filename = self.positions.get('cfi', '')
1392 function = self.positions.get('cfn', '')
1393 return self.make_function(module, filename, function)
1396 class PerfParser(LineParser):
1397 """Parser for linux perf callgraph output.
1399 It expects output generated with
1401 perf record -g
1402 perf script | gprof2dot.py --format=perf
1405 def __init__(self, infile):
1406 LineParser.__init__(self, infile)
1407 self.profile = Profile()
1409 def readline(self):
1410 # Override LineParser.readline to ignore comment lines
1411 while True:
1412 LineParser.readline(self)
1413 if self.eof() or not self.lookahead().startswith('#'):
1414 break
1416 def parse(self):
1417 # read lookahead
1418 self.readline()
1420 profile = self.profile
1421 profile[SAMPLES] = 0
1422 while not self.eof():
1423 self.parse_event()
1425 # compute derived data
1426 profile.validate()
1427 profile.find_cycles()
1428 profile.ratio(TIME_RATIO, SAMPLES)
1429 profile.call_ratios(SAMPLES2)
1430 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1432 return profile
1434 def parse_event(self):
1435 if self.eof():
1436 return
1438 line = self.consume()
1439 assert line
1441 callchain = self.parse_callchain()
1442 if not callchain:
1443 return
1445 callee = callchain[0]
1446 callee[SAMPLES] += 1
1447 self.profile[SAMPLES] += 1
1449 for caller in callchain[1:]:
1450 try:
1451 call = caller.calls[callee.id]
1452 except KeyError:
1453 call = Call(callee.id)
1454 call[SAMPLES2] = 1
1455 caller.add_call(call)
1456 else:
1457 call[SAMPLES2] += 1
1459 callee = caller
1461 def parse_callchain(self):
1462 callchain = []
1463 while self.lookahead():
1464 function = self.parse_call()
1465 if function is None:
1466 break
1467 callchain.append(function)
1468 if self.lookahead() == '':
1469 self.consume()
1470 return callchain
1472 call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
1474 def parse_call(self):
1475 line = self.consume()
1476 mo = self.call_re.match(line)
1477 assert mo
1478 if not mo:
1479 return None
1481 function_name = mo.group('symbol')
1482 if not function_name:
1483 function_name = mo.group('address')
1485 module = mo.group('module')
1487 function_id = function_name + ':' + module
1489 try:
1490 function = self.profile.functions[function_id]
1491 except KeyError:
1492 function = Function(function_id, function_name)
1493 function.module = os.path.basename(module)
1494 function[SAMPLES] = 0
1495 self.profile.add_function(function)
1497 return function
1500 class OprofileParser(LineParser):
1501 """Parser for oprofile callgraph output.
1503 See also:
1504 - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
1507 _fields_re = {
1508 'samples': r'(\d+)',
1509 '%': r'(\S+)',
1510 'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
1511 'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
1512 'app name': r'(?P<application>\S+)',
1513 'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
1516 def __init__(self, infile):
1517 LineParser.__init__(self, infile)
1518 self.entries = {}
1519 self.entry_re = None
1521 def add_entry(self, callers, function, callees):
1522 try:
1523 entry = self.entries[function.id]
1524 except KeyError:
1525 self.entries[function.id] = (callers, function, callees)
1526 else:
1527 callers_total, function_total, callees_total = entry
1528 self.update_subentries_dict(callers_total, callers)
1529 function_total.samples += function.samples
1530 self.update_subentries_dict(callees_total, callees)
1532 def update_subentries_dict(self, totals, partials):
1533 for partial in partials.itervalues():
1534 try:
1535 total = totals[partial.id]
1536 except KeyError:
1537 totals[partial.id] = partial
1538 else:
1539 total.samples += partial.samples
1541 def parse(self):
1542 # read lookahead
1543 self.readline()
1545 self.parse_header()
1546 while self.lookahead():
1547 self.parse_entry()
1549 profile = Profile()
1551 reverse_call_samples = {}
1553 # populate the profile
1554 profile[SAMPLES] = 0
1555 for _callers, _function, _callees in self.entries.itervalues():
1556 function = Function(_function.id, _function.name)
1557 function[SAMPLES] = _function.samples
1558 profile.add_function(function)
1559 profile[SAMPLES] += _function.samples
1561 if _function.application:
1562 function.process = os.path.basename(_function.application)
1563 if _function.image:
1564 function.module = os.path.basename(_function.image)
1566 total_callee_samples = 0
1567 for _callee in _callees.itervalues():
1568 total_callee_samples += _callee.samples
1570 for _callee in _callees.itervalues():
1571 if not _callee.self:
1572 call = Call(_callee.id)
1573 call[SAMPLES2] = _callee.samples
1574 function.add_call(call)
1576 # compute derived data
1577 profile.validate()
1578 profile.find_cycles()
1579 profile.ratio(TIME_RATIO, SAMPLES)
1580 profile.call_ratios(SAMPLES2)
1581 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1583 return profile
1585 def parse_header(self):
1586 while not self.match_header():
1587 self.consume()
1588 line = self.lookahead()
1589 fields = re.split(r'\s\s+', line)
1590 entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
1591 self.entry_re = re.compile(entry_re)
1592 self.skip_separator()
1594 def parse_entry(self):
1595 callers = self.parse_subentries()
1596 if self.match_primary():
1597 function = self.parse_subentry()
1598 if function is not None:
1599 callees = self.parse_subentries()
1600 self.add_entry(callers, function, callees)
1601 self.skip_separator()
1603 def parse_subentries(self):
1604 subentries = {}
1605 while self.match_secondary():
1606 subentry = self.parse_subentry()
1607 subentries[subentry.id] = subentry
1608 return subentries
1610 def parse_subentry(self):
1611 entry = Struct()
1612 line = self.consume()
1613 mo = self.entry_re.match(line)
1614 if not mo:
1615 raise ParseError('failed to parse', line)
1616 fields = mo.groupdict()
1617 entry.samples = int(mo.group(1))
1618 if 'source' in fields and fields['source'] != '(no location information)':
1619 source = fields['source']
1620 filename, lineno = source.split(':')
1621 entry.filename = filename
1622 entry.lineno = int(lineno)
1623 else:
1624 source = ''
1625 entry.filename = None
1626 entry.lineno = None
1627 entry.image = fields.get('image', '')
1628 entry.application = fields.get('application', '')
1629 if 'symbol' in fields and fields['symbol'] != '(no symbols)':
1630 entry.symbol = fields['symbol']
1631 else:
1632 entry.symbol = ''
1633 if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
1634 entry.symbol = entry.symbol[1:-1]
1635 entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
1636 entry.self = fields.get('self', None) != None
1637 if entry.self:
1638 entry.id += ':self'
1639 if entry.symbol:
1640 entry.name = entry.symbol
1641 else:
1642 entry.name = entry.image
1643 return entry
1645 def skip_separator(self):
1646 while not self.match_separator():
1647 self.consume()
1648 self.consume()
1650 def match_header(self):
1651 line = self.lookahead()
1652 return line.startswith('samples')
1654 def match_separator(self):
1655 line = self.lookahead()
1656 return line == '-'*len(line)
1658 def match_primary(self):
1659 line = self.lookahead()
1660 return not line[:1].isspace()
1662 def match_secondary(self):
1663 line = self.lookahead()
1664 return line[:1].isspace()
1667 class HProfParser(LineParser):
1668 """Parser for java hprof output
1670 See also:
1671 - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html
1674 trace_re = re.compile(r'\t(.*)\((.*):(.*)\)')
1675 trace_id_re = re.compile(r'^TRACE (\d+):$')
1677 def __init__(self, infile):
1678 LineParser.__init__(self, infile)
1679 self.traces = {}
1680 self.samples = {}
1682 def parse(self):
1683 # read lookahead
1684 self.readline()
1686 while not self.lookahead().startswith('------'): self.consume()
1687 while not self.lookahead().startswith('TRACE '): self.consume()
1689 self.parse_traces()
1691 while not self.lookahead().startswith('CPU'):
1692 self.consume()
1694 self.parse_samples()
1696 # populate the profile
1697 profile = Profile()
1698 profile[SAMPLES] = 0
1700 functions = {}
1702 # build up callgraph
1703 for id, trace in self.traces.iteritems():
1704 if not id in self.samples: continue
1705 mtime = self.samples[id][0]
1706 last = None
1708 for func, file, line in trace:
1709 if not func in functions:
1710 function = Function(func, func)
1711 function[SAMPLES] = 0
1712 profile.add_function(function)
1713 functions[func] = function
1715 function = functions[func]
1716 # allocate time to the deepest method in the trace
1717 if not last:
1718 function[SAMPLES] += mtime
1719 profile[SAMPLES] += mtime
1720 else:
1721 c = function.get_call(last)
1722 c[SAMPLES2] += mtime
1724 last = func
1726 # compute derived data
1727 profile.validate()
1728 profile.find_cycles()
1729 profile.ratio(TIME_RATIO, SAMPLES)
1730 profile.call_ratios(SAMPLES2)
1731 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1733 return profile
1735 def parse_traces(self):
1736 while self.lookahead().startswith('TRACE '):
1737 self.parse_trace()
1739 def parse_trace(self):
1740 l = self.consume()
1741 mo = self.trace_id_re.match(l)
1742 tid = mo.group(1)
1743 last = None
1744 trace = []
1746 while self.lookahead().startswith('\t'):
1747 l = self.consume()
1748 match = self.trace_re.search(l)
1749 if not match:
1750 #sys.stderr.write('Invalid line: %s\n' % l)
1751 break
1752 else:
1753 function_name, file, line = match.groups()
1754 trace += [(function_name, file, line)]
1756 self.traces[int(tid)] = trace
1758 def parse_samples(self):
1759 self.consume()
1760 self.consume()
1762 while not self.lookahead().startswith('CPU'):
1763 rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split()
1764 self.samples[int(traceid)] = (int(count), method)
1765 self.consume()
1768 class SysprofParser(XmlParser):
1770 def __init__(self, stream):
1771 XmlParser.__init__(self, stream)
1773 def parse(self):
1774 objects = {}
1775 nodes = {}
1777 self.element_start('profile')
1778 while self.token.type == XML_ELEMENT_START:
1779 if self.token.name_or_data == 'objects':
1780 assert not objects
1781 objects = self.parse_items('objects')
1782 elif self.token.name_or_data == 'nodes':
1783 assert not nodes
1784 nodes = self.parse_items('nodes')
1785 else:
1786 self.parse_value(self.token.name_or_data)
1787 self.element_end('profile')
1789 return self.build_profile(objects, nodes)
1791 def parse_items(self, name):
1792 assert name[-1] == 's'
1793 items = {}
1794 self.element_start(name)
1795 while self.token.type == XML_ELEMENT_START:
1796 id, values = self.parse_item(name[:-1])
1797 assert id not in items
1798 items[id] = values
1799 self.element_end(name)
1800 return items
1802 def parse_item(self, name):
1803 attrs = self.element_start(name)
1804 id = int(attrs['id'])
1805 values = self.parse_values()
1806 self.element_end(name)
1807 return id, values
1809 def parse_values(self):
1810 values = {}
1811 while self.token.type == XML_ELEMENT_START:
1812 name = self.token.name_or_data
1813 value = self.parse_value(name)
1814 assert name not in values
1815 values[name] = value
1816 return values
1818 def parse_value(self, tag):
1819 self.element_start(tag)
1820 value = self.character_data()
1821 self.element_end(tag)
1822 if value.isdigit():
1823 return int(value)
1824 if value.startswith('"') and value.endswith('"'):
1825 return value[1:-1]
1826 return value
1828 def build_profile(self, objects, nodes):
1829 profile = Profile()
1831 profile[SAMPLES] = 0
1832 for id, object in objects.iteritems():
1833 # Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
1834 if object['self'] == 0:
1835 continue
1837 function = Function(id, object['name'])
1838 function[SAMPLES] = object['self']
1839 profile.add_function(function)
1840 profile[SAMPLES] += function[SAMPLES]
1842 for id, node in nodes.iteritems():
1843 # Ignore fake calls
1844 if node['self'] == 0:
1845 continue
1847 # Find a non-ignored parent
1848 parent_id = node['parent']
1849 while parent_id != 0:
1850 parent = nodes[parent_id]
1851 caller_id = parent['object']
1852 if objects[caller_id]['self'] != 0:
1853 break
1854 parent_id = parent['parent']
1855 if parent_id == 0:
1856 continue
1858 callee_id = node['object']
1860 assert objects[caller_id]['self']
1861 assert objects[callee_id]['self']
1863 function = profile.functions[caller_id]
1865 samples = node['self']
1866 try:
1867 call = function.calls[callee_id]
1868 except KeyError:
1869 call = Call(callee_id)
1870 call[SAMPLES2] = samples
1871 function.add_call(call)
1872 else:
1873 call[SAMPLES2] += samples
1875 # Compute derived events
1876 profile.validate()
1877 profile.find_cycles()
1878 profile.ratio(TIME_RATIO, SAMPLES)
1879 profile.call_ratios(SAMPLES2)
1880 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1882 return profile
1885 class SharkParser(LineParser):
1886 """Parser for MacOSX Shark output.
1888 Author: tom@dbservice.com
1891 def __init__(self, infile):
1892 LineParser.__init__(self, infile)
1893 self.stack = []
1894 self.entries = {}
1896 def add_entry(self, function):
1897 try:
1898 entry = self.entries[function.id]
1899 except KeyError:
1900 self.entries[function.id] = (function, { })
1901 else:
1902 function_total, callees_total = entry
1903 function_total.samples += function.samples
1905 def add_callee(self, function, callee):
1906 func, callees = self.entries[function.id]
1907 try:
1908 entry = callees[callee.id]
1909 except KeyError:
1910 callees[callee.id] = callee
1911 else:
1912 entry.samples += callee.samples
1914 def parse(self):
1915 self.readline()
1916 self.readline()
1917 self.readline()
1918 self.readline()
1920 match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)')
1922 while self.lookahead():
1923 line = self.consume()
1924 mo = match.match(line)
1925 if not mo:
1926 raise ParseError('failed to parse', line)
1928 fields = mo.groupdict()
1929 prefix = len(fields.get('prefix', 0)) / 2 - 1
1931 symbol = str(fields.get('symbol', 0))
1932 image = str(fields.get('image', 0))
1934 entry = Struct()
1935 entry.id = ':'.join([symbol, image])
1936 entry.samples = int(fields.get('samples', 0))
1938 entry.name = symbol
1939 entry.image = image
1941 # adjust the callstack
1942 if prefix < len(self.stack):
1943 del self.stack[prefix:]
1945 if prefix == len(self.stack):
1946 self.stack.append(entry)
1948 # if the callstack has had an entry, it's this functions caller
1949 if prefix > 0:
1950 self.add_callee(self.stack[prefix - 1], entry)
1952 self.add_entry(entry)
1954 profile = Profile()
1955 profile[SAMPLES] = 0
1956 for _function, _callees in self.entries.itervalues():
1957 function = Function(_function.id, _function.name)
1958 function[SAMPLES] = _function.samples
1959 profile.add_function(function)
1960 profile[SAMPLES] += _function.samples
1962 if _function.image:
1963 function.module = os.path.basename(_function.image)
1965 for _callee in _callees.itervalues():
1966 call = Call(_callee.id)
1967 call[SAMPLES] = _callee.samples
1968 function.add_call(call)
1970 # compute derived data
1971 profile.validate()
1972 profile.find_cycles()
1973 profile.ratio(TIME_RATIO, SAMPLES)
1974 profile.call_ratios(SAMPLES)
1975 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1977 return profile
1980 class XPerfParser(Parser):
1981 """Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools.
1984 def __init__(self, stream):
1985 Parser.__init__(self)
1986 self.stream = stream
1987 self.profile = Profile()
1988 self.profile[SAMPLES] = 0
1989 self.column = {}
1991 def parse(self):
1992 import csv
1993 reader = csv.reader(
1994 self.stream,
1995 delimiter = ',',
1996 quotechar = None,
1997 escapechar = None,
1998 doublequote = False,
1999 skipinitialspace = True,
2000 lineterminator = '\r\n',
2001 quoting = csv.QUOTE_NONE)
2002 it = iter(reader)
2003 row = reader.next()
2004 self.parse_header(row)
2005 for row in it:
2006 self.parse_row(row)
2008 # compute derived data
2009 self.profile.validate()
2010 self.profile.find_cycles()
2011 self.profile.ratio(TIME_RATIO, SAMPLES)
2012 self.profile.call_ratios(SAMPLES2)
2013 self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2015 return self.profile
2017 def parse_header(self, row):
2018 for column in range(len(row)):
2019 name = row[column]
2020 assert name not in self.column
2021 self.column[name] = column
2023 def parse_row(self, row):
2024 fields = {}
2025 for name, column in self.column.iteritems():
2026 value = row[column]
2027 for factory in int, float:
2028 try:
2029 value = factory(value)
2030 except ValueError:
2031 pass
2032 else:
2033 break
2034 fields[name] = value
2036 process = fields['Process Name']
2037 symbol = fields['Module'] + '!' + fields['Function']
2038 weight = fields['Weight']
2039 count = fields['Count']
2041 function = self.get_function(process, symbol)
2042 function[SAMPLES] += weight * count
2043 self.profile[SAMPLES] += weight * count
2045 stack = fields['Stack']
2046 if stack != '?':
2047 stack = stack.split('/')
2048 assert stack[0] == '[Root]'
2049 if stack[-1] != symbol:
2050 # XXX: some cases the sampled function does not appear in the stack
2051 stack.append(symbol)
2052 caller = None
2053 for symbol in stack[1:]:
2054 callee = self.get_function(process, symbol)
2055 if caller is not None:
2056 try:
2057 call = caller.calls[callee.id]
2058 except KeyError:
2059 call = Call(callee.id)
2060 call[SAMPLES2] = count
2061 caller.add_call(call)
2062 else:
2063 call[SAMPLES2] += count
2064 caller = callee
2066 def get_function(self, process, symbol):
2067 function_id = process + '!' + symbol
2069 try:
2070 function = self.profile.functions[function_id]
2071 except KeyError:
2072 module, name = symbol.split('!', 1)
2073 function = Function(function_id, name)
2074 function.process = process
2075 function.module = module
2076 function[SAMPLES] = 0
2077 self.profile.add_function(function)
2079 return function
2082 class SleepyParser(Parser):
2083 """Parser for GNU gprof output.
2085 See also:
2086 - http://www.codersnotes.com/sleepy/
2087 - http://sleepygraph.sourceforge.net/
2090 def __init__(self, filename):
2091 Parser.__init__(self)
2093 from zipfile import ZipFile
2095 self.database = ZipFile(filename)
2097 self.version_0_7 = 'Version 0.7 required' in self.database.namelist()
2099 self.symbols = {}
2100 self.calls = {}
2102 self.profile = Profile()
2104 _symbol_re = re.compile(
2105 r'^(?P<id>\w+)' +
2106 r'\s+"(?P<module>[^"]*)"' +
2107 r'\s+"(?P<procname>[^"]*)"' +
2108 r'\s+"(?P<sourcefile>[^"]*)"' +
2109 r'\s+(?P<sourceline>\d+)$'
2112 def parse_symbols(self):
2113 if self.version_0_7:
2114 symbols_txt = 'Symbols.txt'
2115 else:
2116 symbols_txt = 'symbols.txt'
2117 lines = self.database.read(symbols_txt).splitlines()
2118 for line in lines:
2119 mo = self._symbol_re.match(line)
2120 if mo:
2121 symbol_id, module, procname, sourcefile, sourceline = mo.groups()
2123 function_id = ':'.join([module, procname])
2125 try:
2126 function = self.profile.functions[function_id]
2127 except KeyError:
2128 function = Function(function_id, procname)
2129 function.module = module
2130 function[SAMPLES] = 0
2131 self.profile.add_function(function)
2133 self.symbols[symbol_id] = function
2135 def parse_callstacks(self):
2136 if self.version_0_7:
2137 callstacks_txt = 'Callstacks.txt'
2138 else:
2139 callstacks_txt = 'callstacks.txt'
2140 lines = self.database.read(callstacks_txt).splitlines()
2141 for line in lines:
2142 fields = line.split()
2143 samples = float(fields[0])
2144 callstack = fields[1:]
2146 callstack = [self.symbols[symbol_id] for symbol_id in callstack]
2148 callee = callstack[0]
2150 callee[SAMPLES] += samples
2151 self.profile[SAMPLES] += samples
2153 for caller in callstack[1:]:
2154 try:
2155 call = caller.calls[callee.id]
2156 except KeyError:
2157 call = Call(callee.id)
2158 call[SAMPLES2] = samples
2159 caller.add_call(call)
2160 else:
2161 call[SAMPLES2] += samples
2163 callee = caller
2165 def parse(self):
2166 profile = self.profile
2167 profile[SAMPLES] = 0
2169 self.parse_symbols()
2170 self.parse_callstacks()
2172 # Compute derived events
2173 profile.validate()
2174 profile.find_cycles()
2175 profile.ratio(TIME_RATIO, SAMPLES)
2176 profile.call_ratios(SAMPLES2)
2177 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2179 return profile
2182 class AQtimeTable:
2184 def __init__(self, name, fields):
2185 self.name = name
2187 self.fields = fields
2188 self.field_column = {}
2189 for column in range(len(fields)):
2190 self.field_column[fields[column]] = column
2191 self.rows = []
2193 def __len__(self):
2194 return len(self.rows)
2196 def __iter__(self):
2197 for values, children in self.rows:
2198 fields = {}
2199 for name, value in zip(self.fields, values):
2200 fields[name] = value
2201 children = dict([(child.name, child) for child in children])
2202 yield fields, children
2203 raise StopIteration
2205 def add_row(self, values, children=()):
2206 self.rows.append((values, children))
2209 class AQtimeParser(XmlParser):
2211 def __init__(self, stream):
2212 XmlParser.__init__(self, stream)
2213 self.tables = {}
2215 def parse(self):
2216 self.element_start('AQtime_Results')
2217 self.parse_headers()
2218 results = self.parse_results()
2219 self.element_end('AQtime_Results')
2220 return self.build_profile(results)
2222 def parse_headers(self):
2223 self.element_start('HEADERS')
2224 while self.token.type == XML_ELEMENT_START:
2225 self.parse_table_header()
2226 self.element_end('HEADERS')
2228 def parse_table_header(self):
2229 attrs = self.element_start('TABLE_HEADER')
2230 name = attrs['NAME']
2231 id = int(attrs['ID'])
2232 field_types = []
2233 field_names = []
2234 while self.token.type == XML_ELEMENT_START:
2235 field_type, field_name = self.parse_table_field()
2236 field_types.append(field_type)
2237 field_names.append(field_name)
2238 self.element_end('TABLE_HEADER')
2239 self.tables[id] = name, field_types, field_names
2241 def parse_table_field(self):
2242 attrs = self.element_start('TABLE_FIELD')
2243 type = attrs['TYPE']
2244 name = self.character_data()
2245 self.element_end('TABLE_FIELD')
2246 return type, name
2248 def parse_results(self):
2249 self.element_start('RESULTS')
2250 table = self.parse_data()
2251 self.element_end('RESULTS')
2252 return table
2254 def parse_data(self):
2255 rows = []
2256 attrs = self.element_start('DATA')
2257 table_id = int(attrs['TABLE_ID'])
2258 table_name, field_types, field_names = self.tables[table_id]
2259 table = AQtimeTable(table_name, field_names)
2260 while self.token.type == XML_ELEMENT_START:
2261 row, children = self.parse_row(field_types)
2262 table.add_row(row, children)
2263 self.element_end('DATA')
2264 return table
2266 def parse_row(self, field_types):
2267 row = [None]*len(field_types)
2268 children = []
2269 self.element_start('ROW')
2270 while self.token.type == XML_ELEMENT_START:
2271 if self.token.name_or_data == 'FIELD':
2272 field_id, field_value = self.parse_field(field_types)
2273 row[field_id] = field_value
2274 elif self.token.name_or_data == 'CHILDREN':
2275 children = self.parse_children()
2276 else:
2277 raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token)
2278 self.element_end('ROW')
2279 return row, children
2281 def parse_field(self, field_types):
2282 attrs = self.element_start('FIELD')
2283 id = int(attrs['ID'])
2284 type = field_types[id]
2285 value = self.character_data()
2286 if type == 'Integer':
2287 value = int(value)
2288 elif type == 'Float':
2289 value = float(value)
2290 elif type == 'Address':
2291 value = int(value)
2292 elif type == 'String':
2293 pass
2294 else:
2295 assert False
2296 self.element_end('FIELD')
2297 return id, value
2299 def parse_children(self):
2300 children = []
2301 self.element_start('CHILDREN')
2302 while self.token.type == XML_ELEMENT_START:
2303 table = self.parse_data()
2304 assert table.name not in children
2305 children.append(table)
2306 self.element_end('CHILDREN')
2307 return children
2309 def build_profile(self, results):
2310 assert results.name == 'Routines'
2311 profile = Profile()
2312 profile[TIME] = 0.0
2313 for fields, tables in results:
2314 function = self.build_function(fields)
2315 children = tables['Children']
2316 for fields, _ in children:
2317 call = self.build_call(fields)
2318 function.add_call(call)
2319 profile.add_function(function)
2320 profile[TIME] = profile[TIME] + function[TIME]
2321 profile[TOTAL_TIME] = profile[TIME]
2322 profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
2323 return profile
2325 def build_function(self, fields):
2326 function = Function(self.build_id(fields), self.build_name(fields))
2327 function[TIME] = fields['Time']
2328 function[TOTAL_TIME] = fields['Time with Children']
2329 #function[TIME_RATIO] = fields['% Time']/100.0
2330 #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
2331 return function
2333 def build_call(self, fields):
2334 call = Call(self.build_id(fields))
2335 call[TIME] = fields['Time']
2336 call[TOTAL_TIME] = fields['Time with Children']
2337 #call[TIME_RATIO] = fields['% Time']/100.0
2338 #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
2339 return call
2341 def build_id(self, fields):
2342 return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])
2344 def build_name(self, fields):
2345 # TODO: use more fields
2346 return fields['Routine Name']
2349 class PstatsParser:
2350 """Parser python profiling statistics saved with te pstats module."""
2352 def __init__(self, *filename):
2353 import pstats
2354 try:
2355 self.stats = pstats.Stats(*filename)
2356 except ValueError:
2357 import hotshot.stats
2358 self.stats = hotshot.stats.load(filename[0])
2359 self.profile = Profile()
2360 self.function_ids = {}
2362 def get_function_name(self, (filename, line, name)):
2363 funcname = ""
2364 if filename != "~":
2365 funcname = os.path.basename( filename ) + ":"
2366 if line != 0:
2367 funcname += str( line ) + ":"
2368 if filename == "~" and line == 0 and \
2369 name.startswith( '<' ) and name.endswith( '>' ):
2370 funcname += "{%s}" % name[ 1 : -1 ]
2371 else:
2372 funcname += name
2373 return funcname
2375 def get_function(self, key, index = None):
2376 try:
2377 id = self.function_ids[key]
2378 except KeyError:
2379 id = len(self.function_ids)
2380 name = self.get_function_name(key)
2381 function = Function(id, name)
2382 self.profile.functions[id] = function
2383 self.function_ids[key] = id
2384 else:
2385 function = self.profile.functions[id]
2387 if index is not None:
2388 function.index = index
2389 return function
2391 def parse(self):
2392 self.profile[TIME] = 0.0
2393 self.profile[TOTAL_TIME] = self.stats.total_tt
2394 index = 0
2395 for fn, (primitive_calls, actual_calls,
2396 tt, ct, callers) in self.stats.stats.iteritems():
2397 callee = self.get_function(fn, index)
2398 index += 1
2399 callee.called = actual_calls
2400 callee.primitive_called = primitive_calls
2401 callee[TOTAL_TIME] = ct
2402 callee[TIME] = tt
2403 self.profile[TIME] += tt
2404 self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
2405 for fn, value in callers.iteritems():
2406 caller = self.get_function(fn)
2407 call = Call(callee.id)
2408 if isinstance(value, tuple):
2409 for i in xrange(0, len(value), 4):
2410 actual_calls, primitive_calls, tt, ct = value[i:i+4]
2411 if CALLS in call:
2412 if callee.id != caller.id:
2413 call[CALLS] += primitive_calls
2414 else:
2415 # Recursive
2416 call[CALLS] += actual_calls
2417 else:
2418 if callee.id != caller.id:
2419 call[CALLS] = primitive_calls
2420 else:
2421 # Recursive
2422 call[CALLS] = actual_calls
2423 if TOTAL_TIME in call:
2424 call[TOTAL_TIME] += ct
2425 else:
2426 call[TOTAL_TIME] = ct
2427 else:
2428 call[CALLS] = value
2429 call[TOTAL_TIME] = ratio(value, actual_calls)*ct
2431 caller.add_call(call)
2432 #self.stats.print_stats()
2433 #self.stats.print_callees()
2435 # Compute derived events
2436 self.profile.validate()
2437 self.profile.ratio(TIME_RATIO, TIME)
2438 self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
2440 return self.profile
2443 class Theme:
2445 def __init__(self,
2446 bgcolor = (0.0, 0.0, 1.0),
2447 mincolor = (0.0, 0.0, 0.0),
2448 maxcolor = (0.0, 0.0, 1.0),
2449 fontname = "Arial",
2450 minfontsize = 10.0,
2451 maxfontsize = 10.0,
2452 minpenwidth = 0.5,
2453 maxpenwidth = 4.0,
2454 gamma = 2.2,
2455 skew = 1.0):
2456 self.bgcolor = bgcolor
2457 self.mincolor = mincolor
2458 self.maxcolor = maxcolor
2459 self.fontname = fontname
2460 self.minfontsize = minfontsize
2461 self.maxfontsize = maxfontsize
2462 self.minpenwidth = minpenwidth
2463 self.maxpenwidth = maxpenwidth
2464 self.gamma = gamma
2465 self.skew = skew
2467 def graph_bgcolor(self):
2468 return self.hsl_to_rgb(*self.bgcolor)
2470 def graph_fontname(self):
2471 return self.fontname
2473 def graph_fontsize(self):
2474 return self.minfontsize
2476 def node_bgcolor(self, weight):
2477 return self.color(weight)
2479 def node_fgcolor(self, weight):
2480 return self.graph_bgcolor()
2482 def node_fontsize(self, weight):
2483 return self.fontsize(weight)
2485 def edge_color(self, weight):
2486 return self.color(weight)
2488 def edge_fontsize(self, weight):
2489 return self.fontsize(weight)
2491 def edge_penwidth(self, weight):
2492 return max(weight*self.maxpenwidth, self.minpenwidth)
2494 def edge_arrowsize(self, weight):
2495 return 0.0
2496 # return 0.5 * math.sqrt(self.edge_penwidth(weight))
2498 def fontsize(self, weight):
2499 return max(weight**2 * self.maxfontsize, self.minfontsize)
2501 def color(self, weight):
2502 weight = min(max(weight, 0.0), 1.0)
2504 hmin, smin, lmin = self.mincolor
2505 hmax, smax, lmax = self.maxcolor
2507 if self.skew < 0:
2508 raise ValueError("Skew must be greater than 0")
2509 elif self.skew == 1.0:
2510 h = hmin + weight*(hmax - hmin)
2511 s = smin + weight*(smax - smin)
2512 l = lmin + weight*(lmax - lmin)
2513 else:
2514 base = self.skew
2515 h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2516 s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
2517 l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2519 return self.hsl_to_rgb(h, s, l)
2521 def hsl_to_rgb(self, h, s, l):
2522 """Convert a color from HSL color-model to RGB.
2524 See also:
2525 - http://www.w3.org/TR/css3-color/#hsl-color
2528 h = h % 1.0
2529 s = min(max(s, 0.0), 1.0)
2530 l = min(max(l, 0.0), 1.0)
2532 if l <= 0.5:
2533 m2 = l*(s + 1.0)
2534 else:
2535 m2 = l + s - l*s
2536 m1 = l*2.0 - m2
2537 r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
2538 g = self._hue_to_rgb(m1, m2, h)
2539 b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
2541 # Apply gamma correction
2542 r **= self.gamma
2543 g **= self.gamma
2544 b **= self.gamma
2546 return (r, g, b)
2548 def _hue_to_rgb(self, m1, m2, h):
2549 if h < 0.0:
2550 h += 1.0
2551 elif h > 1.0:
2552 h -= 1.0
2553 if h*6 < 1.0:
2554 return m1 + (m2 - m1)*h*6.0
2555 elif h*2 < 1.0:
2556 return m2
2557 elif h*3 < 2.0:
2558 return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
2559 else:
2560 return m1
2563 TEMPERATURE_COLORMAP = Theme(
2564 mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
2565 maxcolor = (0.0, 1.0, 0.5), # satured red
2566 gamma = 1.0
2569 PINK_COLORMAP = Theme(
2570 mincolor = (0.0, 1.0, 0.90), # pink
2571 maxcolor = (0.0, 1.0, 0.5), # satured red
2574 GRAY_COLORMAP = Theme(
2575 mincolor = (0.0, 0.0, 0.85), # light gray
2576 maxcolor = (0.0, 0.0, 0.0), # black
2579 BW_COLORMAP = Theme(
2580 minfontsize = 8.0,
2581 maxfontsize = 24.0,
2582 mincolor = (0.0, 0.0, 0.0), # black
2583 maxcolor = (0.0, 0.0, 0.0), # black
2584 minpenwidth = 0.1,
2585 maxpenwidth = 8.0,
2589 class DotWriter:
2590 """Writer for the DOT language.
2592 See also:
2593 - "The DOT Language" specification
2594 http://www.graphviz.org/doc/info/lang.html
2597 strip = False
2598 wrap = False
2600 def __init__(self, fp):
2601 self.fp = fp
2603 def wrap_function_name(self, name):
2604 """Split the function name on multiple lines."""
2606 if len(name) > 32:
2607 ratio = 2.0/3.0
2608 height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
2609 width = max(len(name)/height, 32)
2610 # TODO: break lines in symbols
2611 name = textwrap.fill(name, width, break_long_words=False)
2613 # Take away spaces
2614 name = name.replace(", ", ",")
2615 name = name.replace("> >", ">>")
2616 name = name.replace("> >", ">>") # catch consecutive
2618 return name
2620 def graph(self, profile, theme):
2621 self.begin_graph()
2623 fontname = theme.graph_fontname()
2625 self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
2626 self.attr('node', fontname=fontname, shape="box", style="filled", fontcolor="white", width=0, height=0)
2627 self.attr('edge', fontname=fontname)
2629 for function in profile.functions.itervalues():
2630 labels = []
2631 if function.process is not None:
2632 labels.append(function.process)
2633 if function.module is not None:
2634 labels.append(function.module)
2636 if self.strip:
2637 function_name = function.stripped_name()
2638 else:
2639 function_name = function.name
2640 if self.wrap:
2641 function_name = self.wrap_function_name(function_name)
2642 labels.append(function_name)
2644 for event in TOTAL_TIME_RATIO, TIME_RATIO:
2645 if event in function.events:
2646 label = event.format(function[event])
2647 labels.append(label)
2648 if function.called is not None and \
2649 function.primitive_called is not None and \
2650 function.called != function.primitive_called:
2651 labels.append("%d/%dx" % (function.called, function.primitive_called))
2652 elif function.called is not None:
2653 labels.append("%dx" % (function.called,))
2655 if function.weight is not None:
2656 weight = function.weight
2657 else:
2658 weight = 0.0
2660 label = '\n'.join(labels)
2661 if function.index is not None:
2662 label += "--" + str( function.index )
2663 self.node(function.id,
2664 label = label,
2665 color = self.color(theme.node_bgcolor(weight)),
2666 fontcolor = self.color(theme.node_fgcolor(weight)),
2667 fontsize = "%.2f" % theme.node_fontsize(weight),
2670 for call in function.calls.itervalues():
2671 callee = profile.functions[call.callee_id]
2673 labels = []
2674 for event in TOTAL_TIME_RATIO, CALLS:
2675 if event in call.events:
2676 label = event.format(call[event])
2677 labels.append(label)
2679 if call.weight is not None:
2680 weight = call.weight
2681 elif callee.weight is not None:
2682 weight = callee.weight
2683 else:
2684 weight = 0.0
2686 label = '\n'.join(labels)
2688 self.edge(function.id, call.callee_id,
2689 label = label,
2690 color = self.color(theme.edge_color(weight)),
2691 fontcolor = self.color(theme.edge_color(weight)),
2692 fontsize = "%.2f" % theme.edge_fontsize(weight),
2693 penwidth = "%.2f" % theme.edge_penwidth(weight),
2694 labeldistance = "%.2f" % theme.edge_penwidth(weight),
2695 arrowsize = "%.2f" % theme.edge_arrowsize(weight),
2698 self.end_graph()
2700 def begin_graph(self):
2701 self.write('digraph {\n')
2703 def end_graph(self):
2704 self.write('}\n')
2706 def attr(self, what, **attrs):
2707 self.write("\t")
2708 self.write(what)
2709 self.attr_list(attrs)
2710 self.write(";\n")
2712 def node(self, node, **attrs):
2713 self.write("\t")
2714 self.id(node)
2715 self.attr_list(attrs)
2716 self.write(";\n")
2718 def edge(self, src, dst, **attrs):
2719 self.write("\t")
2720 self.id(src)
2721 self.write(" -> ")
2722 self.id(dst)
2723 self.attr_list(attrs)
2724 self.write(";\n")
2726 def attr_list(self, attrs):
2727 if not attrs:
2728 return
2729 self.write(' [')
2730 first = True
2731 for name, value in attrs.iteritems():
2732 if first:
2733 first = False
2734 else:
2735 self.write(", ")
2736 self.id(name)
2737 self.write('=')
2738 self.id(value)
2739 self.write(']')
2741 def id(self, id):
2742 if isinstance(id, (int, float)):
2743 s = str(id)
2744 elif isinstance(id, basestring):
2745 if id.isalnum() and not id.startswith('0x'):
2746 s = id
2747 else:
2748 s = self.escape(id)
2749 else:
2750 raise TypeError
2751 self.write(s)
2753 def color(self, (r, g, b)):
2755 def float2int(f):
2756 if f <= 0.0:
2757 return 0
2758 if f >= 1.0:
2759 return 255
2760 return int(255.0*f + 0.5)
2762 return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
2764 def escape(self, s):
2765 s = s.encode('utf-8')
2766 s = s.replace('\\', r'\\')
2767 s = s.replace('\n', r'\n')
2768 s = s.replace('\t', r'\t')
2769 s = s.replace('"', r'\"')
2770 return '"' + s + '"'
2772 def write(self, s):
2773 self.fp.write(s)
2776 class Main:
2777 """Main program."""
2779 themes = {
2780 "color": TEMPERATURE_COLORMAP,
2781 "pink": PINK_COLORMAP,
2782 "gray": GRAY_COLORMAP,
2783 "bw": BW_COLORMAP,
2786 def main(self):
2787 """Main program."""
2789 parser = optparse.OptionParser(
2790 usage="\n\t%prog [options] [file] ...",
2791 version="%%prog %s" % __version__)
2792 parser.add_option(
2793 '-o', '--output', metavar='FILE',
2794 type="string", dest="output",
2795 help="output filename [stdout]")
2796 parser.add_option(
2797 '-n', '--node-thres', metavar='PERCENTAGE',
2798 type="float", dest="node_thres", default=0.5,
2799 help="eliminate nodes below this threshold [default: %default]")
2800 parser.add_option(
2801 '-e', '--edge-thres', metavar='PERCENTAGE',
2802 type="float", dest="edge_thres", default=0.1,
2803 help="eliminate edges below this threshold [default: %default]")
2804 parser.add_option(
2805 '-f', '--format',
2806 type="choice", choices=('prof', 'callgrind', 'perf', 'oprofile', 'hprof', 'sysprof', 'pstats', 'shark', 'sleepy', 'aqtime', 'xperf'),
2807 dest="format", default="prof",
2808 help="profile format: prof, callgrind, oprofile, hprof, sysprof, shark, sleepy, aqtime, pstats, or xperf [default: %default]")
2809 parser.add_option(
2810 '-c', '--colormap',
2811 type="choice", choices=('color', 'pink', 'gray', 'bw'),
2812 dest="theme", default="color",
2813 help="color map: color, pink, gray, or bw [default: %default]")
2814 parser.add_option(
2815 '-s', '--strip',
2816 action="store_true",
2817 dest="strip", default=False,
2818 help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
2819 parser.add_option(
2820 '-w', '--wrap',
2821 action="store_true",
2822 dest="wrap", default=False,
2823 help="wrap function names")
2824 # add a new option to control skew of the colorization curve
2825 parser.add_option(
2826 '--skew',
2827 type="float", dest="theme_skew", default=1.0,
2828 help="skew the colorization curve. Values < 1.0 give more variety to lower percentages. Value > 1.0 give less variety to lower percentages")
2829 (self.options, self.args) = parser.parse_args(sys.argv[1:])
2831 if len(self.args) > 1 and self.options.format != 'pstats':
2832 parser.error('incorrect number of arguments')
2834 try:
2835 self.theme = self.themes[self.options.theme]
2836 except KeyError:
2837 parser.error('invalid colormap \'%s\'' % self.options.theme)
2839 # set skew on the theme now that it has been picked.
2840 if self.options.theme_skew:
2841 self.theme.skew = self.options.theme_skew
2843 if self.options.format == 'prof':
2844 if not self.args:
2845 fp = sys.stdin
2846 else:
2847 fp = open(self.args[0], 'rt')
2848 parser = GprofParser(fp)
2849 elif self.options.format == 'callgrind':
2850 if not self.args:
2851 fp = sys.stdin
2852 else:
2853 fp = open(self.args[0], 'rt')
2854 parser = CallgrindParser(fp)
2855 elif self.options.format == 'perf':
2856 if not self.args:
2857 fp = sys.stdin
2858 else:
2859 fp = open(self.args[0], 'rt')
2860 parser = PerfParser(fp)
2861 elif self.options.format == 'oprofile':
2862 if not self.args:
2863 fp = sys.stdin
2864 else:
2865 fp = open(self.args[0], 'rt')
2866 parser = OprofileParser(fp)
2867 elif self.options.format == 'sysprof':
2868 if not self.args:
2869 fp = sys.stdin
2870 else:
2871 fp = open(self.args[0], 'rt')
2872 parser = SysprofParser(fp)
2873 elif self.options.format == 'hprof':
2874 if not self.args:
2875 fp = sys.stdin
2876 else:
2877 fp = open(self.args[0], 'rt')
2878 parser = HProfParser(fp)
2879 elif self.options.format == 'pstats':
2880 if not self.args:
2881 parser.error('at least a file must be specified for pstats input')
2882 parser = PstatsParser(*self.args)
2883 elif self.options.format == 'xperf':
2884 if not self.args:
2885 fp = sys.stdin
2886 else:
2887 fp = open(self.args[0], 'rt')
2888 parser = XPerfParser(fp)
2889 elif self.options.format == 'shark':
2890 if not self.args:
2891 fp = sys.stdin
2892 else:
2893 fp = open(self.args[0], 'rt')
2894 parser = SharkParser(fp)
2895 elif self.options.format == 'sleepy':
2896 if len(self.args) != 1:
2897 parser.error('exactly one file must be specified for sleepy input')
2898 parser = SleepyParser(self.args[0])
2899 elif self.options.format == 'aqtime':
2900 if not self.args:
2901 fp = sys.stdin
2902 else:
2903 fp = open(self.args[0], 'rt')
2904 parser = AQtimeParser(fp)
2905 else:
2906 parser.error('invalid format \'%s\'' % self.options.format)
2908 self.profile = parser.parse()
2910 if self.options.output is None:
2911 self.output = sys.stdout
2912 else:
2913 self.output = open(self.options.output, 'wt')
2915 self.write_graph()
2917 def write_graph(self):
2918 dot = DotWriter(self.output)
2919 dot.strip = self.options.strip
2920 dot.wrap = self.options.wrap
2922 profile = self.profile
2923 profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0)
2925 dot.graph(profile, self.theme)
2928 if __name__ == '__main__':
2929 Main().main()