thirdparty/gprof2dot/gprof2dot.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2008-2009 Jose Fonseca
   4 #
   5 # This program is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Lesser General Public License as published
   7 # by the Free Software Foundation, either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU Lesser General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public License
  16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 """Generate a dot graph from the output of several profilers."""
  20
  21 __author__ = "Jose Fonseca"
  22
  23 __version__ = "1.0"
  24
  25
  26 import sys
  27 import math
  28 import os.path
  29 import re
  30 import textwrap
  31 import optparse
  32 import xml.parsers.expat
  33
  34
  35 try:
  36     # Debugging helper module
  37     import debug
  38 except ImportError:
  39     pass
  40
  41
  42 def times(x):
  43     return "%dx" % (x,)
  44
  45 def percentage(p):
  46     return "%.02f%%" % (p*100.0,)
  47
  48 def add(a, b):
  49     return a + b
  50
  51 def equal(a, b):
  52     if a == b:
  53         return a
  54     else:
  55         return None
  56
  57 def fail(a, b):
  58     assert False
  59
  60
  61 tol = 2 ** -23
  62
  63 def ratio(numerator, denominator):
  64     try:
  65         ratio = float(numerator)/float(denominator)
  66     except ZeroDivisionError:
  67         # 0/0 is undefined, but 1.0 yields more useful results
  68         return 1.0
  69     if ratio < 0.0:
  70         if ratio < -tol:
  71             sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
  72         return 0.0
  73     if ratio > 1.0:
  74         if ratio > 1.0 + tol:
  75             sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
  76         return 1.0
  77     return ratio
  78
  79
  80 class UndefinedEvent(Exception):
  81     """Raised when attempting to get an event which is undefined."""
  82
  83     def __init__(self, event):
  84         Exception.__init__(self)
  85         self.event = event
  86
  87     def __str__(self):
  88         return 'unspecified event %s' % self.event.name
  89
  90
  91 class Event(object):
  92     """Describe a kind of event, and its basic operations."""
  93
  94     def __init__(self, name, null, aggregator, formatter = str):
  95         self.name = name
  96         self._null = null
  97         self._aggregator = aggregator
  98         self._formatter = formatter
  99
 100     def __eq__(self, other):
 101         return self is other
 102
 103     def __hash__(self):
 104         return id(self)
 105
 106     def null(self):
 107         return self._null
 108
 109     def aggregate(self, val1, val2):
 110         """Aggregate two event values."""
 111         assert val1 is not None
 112         assert val2 is not None
 113         return self._aggregator(val1, val2)
 114
 115     def format(self, val):
 116         """Format an event value."""
 117         assert val is not None
 118         return self._formatter(val)
 119
 120
 121 CALLS = Event("Calls", 0, add, times)
 122 SAMPLES = Event("Samples", 0, add)
 123 SAMPLES2 = Event("Samples", 0, add)
 124
 125 TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
 126 TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
 127 TOTAL_TIME = Event("Total time", 0.0, fail)
 128 TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
 129
 130
 131 class Object(object):
 132     """Base class for all objects in profile which can store events."""
 133
 134     def __init__(self, events=None):
 135         if events is None:
 136             self.events = {}
 137         else:
 138             self.events = events
 139
 140     def __hash__(self):
 141         return id(self)
 142
 143     def __eq__(self, other):
 144         return self is other
 145
 146     def __contains__(self, event):
 147         return event in self.events
 148
 149     def __getitem__(self, event):
 150         try:
 151             return self.events[event]
 152         except KeyError:
 153             raise UndefinedEvent(event)
 154
 155     def __setitem__(self, event, value):
 156         if value is None:
 157             if event in self.events:
 158                 del self.events[event]
 159         else:
 160             self.events[event] = value
 161
 162
 163 class Call(Object):
 164     """A call between functions.
 165
 166     There should be at most one call object for every pair of functions.
 167     """
 168
 169     def __init__(self, callee_id):
 170         Object.__init__(self)
 171         self.callee_id = callee_id
 172         self.ratio = None
 173         self.weight = None
 174
 175
 176 class Function(Object):
 177     """A function."""
 178
 179     def __init__(self, id, name):
 180         Object.__init__(self)
 181         self.id = id
 182         self.index = None
 183         self.name = name
 184         self.module = None
 185         self.process = None
 186         self.calls = {}
 187         self.called = None
 188         self.primitive_called = None
 189         self.weight = None
 190         self.cycle = None
 191
 192     def add_call(self, call):
 193         if call.callee_id in self.calls:
 194             sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
 195         self.calls[call.callee_id] = call
 196
 197     def get_call(self, callee_id):
 198         if not callee_id in self.calls:
 199             call = Call(callee_id)
 200             call[SAMPLES] = 0
 201             call[SAMPLES2] = 0
 202             call[CALLS] = 0
 203             self.calls[callee_id] = call
 204         return self.calls[callee_id]
 205
 206     _parenthesis_re = re.compile(r'\([^()]*\)')
 207     _angles_re = re.compile(r'<[^<>]*>')
 208     _const_re = re.compile(r'\s+const$')
 209
 210     def stripped_name(self):
 211         """Remove extraneous information from C++ demangled function names."""
 212
 213         name = self.name
 214
 215         # Strip function parameters from name by recursively removing paired parenthesis
 216         while True:
 217             name, n = self._parenthesis_re.subn('', name)
 218             if not n:
 219                 break
 220
 221         # Strip const qualifier
 222         name = self._const_re.sub('', name)
 223
 224         # Strip template parameters from name by recursively removing paired angles
 225         while True:
 226             name, n = self._angles_re.subn('', name)
 227             if not n:
 228                 break
 229
 230         return name
 231
 232     # TODO: write utility functions
 233
 234     def __repr__(self):
 235         return self.name
 236
 237
 238 class Cycle(Object):
 239     """A cycle made from recursive function calls."""
 240
 241     def __init__(self):
 242         Object.__init__(self)
 243         # XXX: Do cycles need an id?
 244         self.functions = set()
 245
 246     def add_function(self, function):
 247         assert function not in self.functions
 248         self.functions.add(function)
 249         # XXX: Aggregate events?
 250         if function.cycle is not None:
 251             for other in function.cycle.functions:
 252                 if function not in self.functions:
 253                     self.add_function(other)
 254         function.cycle = self
 255
 256
 257 class Profile(Object):
 258     """The whole profile."""
 259
 260     def __init__(self):
 261         Object.__init__(self)
 262         self.functions = {}
 263         self.cycles = []
 264
 265     def add_function(self, function):
 266         if function.id in self.functions:
 267             sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
 268         self.functions[function.id] = function
 269
 270     def add_cycle(self, cycle):
 271         self.cycles.append(cycle)
 272
 273     def validate(self):
 274         """Validate the edges."""
 275
 276         for function in self.functions.itervalues():
 277             for callee_id in function.calls.keys():
 278                 assert function.calls[callee_id].callee_id == callee_id
 279                 if callee_id not in self.functions:
 280                     sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
 281                     del function.calls[callee_id]
 282
 283     def find_cycles(self):
 284         """Find cycles using Tarjan's strongly connected components algorithm."""
 285
 286         # Apply the Tarjan's algorithm successively until all functions are visited
 287         visited = set()
 288         for function in self.functions.itervalues():
 289             if function not in visited:
 290                 self._tarjan(function, 0, [], {}, {}, visited)
 291         cycles = []
 292         for function in self.functions.itervalues():
 293             if function.cycle is not None and function.cycle not in cycles:
 294                 cycles.append(function.cycle)
 295         self.cycles = cycles
 296         if 0:
 297             for cycle in cycles:
 298                 sys.stderr.write("Cycle:\n")
 299                 for member in cycle.functions:
 300                     sys.stderr.write("\tFunction %s\n" % member.name)
 301
 302     def _tarjan(self, function, order, stack, orders, lowlinks, visited):
 303         """Tarjan's strongly connected components algorithm.
 304
 305         See also:
 306         - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
 307         """
 308
 309         visited.add(function)
 310         orders[function] = order
 311         lowlinks[function] = order
 312         order += 1
 313         pos = len(stack)
 314         stack.append(function)
 315         for call in function.calls.itervalues():
 316             callee = self.functions[call.callee_id]
 317             # TODO: use a set to optimize lookup
 318             if callee not in orders:
 319                 order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
 320                 lowlinks[function] = min(lowlinks[function], lowlinks[callee])
 321             elif callee in stack:
 322                 lowlinks[function] = min(lowlinks[function], orders[callee])
 323         if lowlinks[function] == orders[function]:
 324             # Strongly connected component found
 325             members = stack[pos:]
 326             del stack[pos:]
 327             if len(members) > 1:
 328                 cycle = Cycle()
 329                 for member in members:
 330                     cycle.add_function(member)
 331         return order
 332
 333     def call_ratios(self, event):
 334         # Aggregate for incoming calls
 335         cycle_totals = {}
 336         for cycle in self.cycles:
 337             cycle_totals[cycle] = 0.0
 338         function_totals = {}
 339         for function in self.functions.itervalues():
 340             function_totals[function] = 0.0
 341         for function in self.functions.itervalues():
 342             for call in function.calls.itervalues():
 343                 if call.callee_id != function.id:
 344                     callee = self.functions[call.callee_id]
 345                     function_totals[callee] += call[event]
 346                     if callee.cycle is not None and callee.cycle is not function.cycle:
 347                         cycle_totals[callee.cycle] += call[event]
 348
 349         # Compute the ratios
 350         for function in self.functions.itervalues():
 351             for call in function.calls.itervalues():
 352                 assert call.ratio is None
 353                 if call.callee_id != function.id:
 354                     callee = self.functions[call.callee_id]
 355                     if callee.cycle is not None and callee.cycle is not function.cycle:
 356                         total = cycle_totals[callee.cycle]
 357                     else:
 358                         total = function_totals[callee]
 359                     call.ratio = ratio(call[event], total)
 360
 361     def integrate(self, outevent, inevent):
 362         """Propagate function time ratio allong the function calls.
 363
 364         Must be called after finding the cycles.
 365
 366         See also:
 367         - http://citeseer.ist.psu.edu/graham82gprof.html
 368         """
 369
 370         # Sanity checking
 371         assert outevent not in self
 372         for function in self.functions.itervalues():
 373             assert outevent not in function
 374             assert inevent in function
 375             for call in function.calls.itervalues():
 376                 assert outevent not in call
 377                 if call.callee_id != function.id:
 378                     assert call.ratio is not None
 379
 380         # Aggregate the input for each cycle
 381         for cycle in self.cycles:
 382             total = inevent.null()
 383             for function in self.functions.itervalues():
 384                 total = inevent.aggregate(total, function[inevent])
 385             self[inevent] = total
 386
 387         # Integrate along the edges
 388         total = inevent.null()
 389         for function in self.functions.itervalues():
 390             total = inevent.aggregate(total, function[inevent])
 391             self._integrate_function(function, outevent, inevent)
 392         self[outevent] = total
 393
 394     def _integrate_function(self, function, outevent, inevent):
 395         if function.cycle is not None:
 396             return self._integrate_cycle(function.cycle, outevent, inevent)
 397         else:
 398             if outevent not in function:
 399                 total = function[inevent]
 400                 for call in function.calls.itervalues():
 401                     if call.callee_id != function.id:
 402                         total += self._integrate_call(call, outevent, inevent)
 403                 function[outevent] = total
 404             return function[outevent]
 405
 406     def _integrate_call(self, call, outevent, inevent):
 407         assert outevent not in call
 408         assert call.ratio is not None
 409         callee = self.functions[call.callee_id]
 410         subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
 411         call[outevent] = subtotal
 412         return subtotal
 413
 414     def _integrate_cycle(self, cycle, outevent, inevent):
 415         if outevent not in cycle:
 416
 417             # Compute the outevent for the whole cycle
 418             total = inevent.null()
 419             for member in cycle.functions:
 420                 subtotal = member[inevent]
 421                 for call in member.calls.itervalues():
 422                     callee = self.functions[call.callee_id]
 423                     if callee.cycle is not cycle:
 424                         subtotal += self._integrate_call(call, outevent, inevent)
 425                 total += subtotal
 426             cycle[outevent] = total
 427
 428             # Compute the time propagated to callers of this cycle
 429             callees = {}
 430             for function in self.functions.itervalues():
 431                 if function.cycle is not cycle:
 432                     for call in function.calls.itervalues():
 433                         callee = self.functions[call.callee_id]
 434                         if callee.cycle is cycle:
 435                             try:
 436                                 callees[callee] += call.ratio
 437                             except KeyError:
 438                                 callees[callee] = call.ratio
 439
 440             for member in cycle.functions:
 441                 member[outevent] = outevent.null()
 442
 443             for callee, call_ratio in callees.iteritems():
 444                 ranks = {}
 445                 call_ratios = {}
 446                 partials = {}
 447                 self._rank_cycle_function(cycle, callee, 0, ranks)
 448                 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
 449                 partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
 450                 assert partial == max(partials.values())
 451                 assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001
 452
 453         return cycle[outevent]
 454
 455     def _rank_cycle_function(self, cycle, function, rank, ranks):
 456         if function not in ranks or ranks[function] > rank:
 457             ranks[function] = rank
 458             for call in function.calls.itervalues():
 459                 if call.callee_id != function.id:
 460                     callee = self.functions[call.callee_id]
 461                     if callee.cycle is cycle:
 462                         self._rank_cycle_function(cycle, callee, rank + 1, ranks)
 463
 464     def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
 465         if function not in visited:
 466             visited.add(function)
 467             for call in function.calls.itervalues():
 468                 if call.callee_id != function.id:
 469                     callee = self.functions[call.callee_id]
 470                     if callee.cycle is cycle:
 471                         if ranks[callee] > ranks[function]:
 472                             call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
 473                             self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
 474
 475     def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
 476         if function not in partials:
 477             partial = partial_ratio*function[inevent]
 478             for call in function.calls.itervalues():
 479                 if call.callee_id != function.id:
 480                     callee = self.functions[call.callee_id]
 481                     if callee.cycle is not cycle:
 482                         assert outevent in call
 483                         partial += partial_ratio*call[outevent]
 484                     else:
 485                         if ranks[callee] > ranks[function]:
 486                             callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
 487                             call_ratio = ratio(call.ratio, call_ratios[callee])
 488                             call_partial = call_ratio*callee_partial
 489                             try:
 490                                 call[outevent] += call_partial
 491                             except UndefinedEvent:
 492                                 call[outevent] = call_partial
 493                             partial += call_partial
 494             partials[function] = partial
 495             try:
 496                 function[outevent] += partial
 497             except UndefinedEvent:
 498                 function[outevent] = partial
 499         return partials[function]
 500
 501     def aggregate(self, event):
 502         """Aggregate an event for the whole profile."""
 503
 504         total = event.null()
 505         for function in self.functions.itervalues():
 506             try:
 507                 total = event.aggregate(total, function[event])
 508             except UndefinedEvent:
 509                 return
 510         self[event] = total
 511
 512     def ratio(self, outevent, inevent):
 513         assert outevent not in self
 514         assert inevent in self
 515         for function in self.functions.itervalues():
 516             assert outevent not in function
 517             assert inevent in function
 518             function[outevent] = ratio(function[inevent], self[inevent])
 519             for call in function.calls.itervalues():
 520                 assert outevent not in call
 521                 if inevent in call:
 522                     call[outevent] = ratio(call[inevent], self[inevent])
 523         self[outevent] = 1.0
 524
 525     def prune(self, node_thres, edge_thres):
 526         """Prune the profile"""
 527
 528         # compute the prune ratios
 529         for function in self.functions.itervalues():
 530             try:
 531                 function.weight = function[TOTAL_TIME_RATIO]
 532             except UndefinedEvent:
 533                 pass
 534
 535             for call in function.calls.itervalues():
 536                 callee = self.functions[call.callee_id]
 537
 538                 if TOTAL_TIME_RATIO in call:
 539                     # handle exact cases first
 540                     call.weight = call[TOTAL_TIME_RATIO]
 541                 else:
 542                     try:
 543                         # make a safe estimate
 544                         call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO])
 545                     except UndefinedEvent:
 546                         pass
 547
 548         # prune the nodes
 549         for function_id in self.functions.keys():
 550             function = self.functions[function_id]
 551             if function.weight is not None:
 552                 if function.weight < node_thres:
 553                     del self.functions[function_id]
 554
 555         # prune the egdes
 556         for function in self.functions.itervalues():
 557             for callee_id in function.calls.keys():
 558                 call = function.calls[callee_id]
 559                 if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
 560                     del function.calls[callee_id]
 561
 562     def dump(self):
 563         for function in self.functions.itervalues():
 564             sys.stderr.write('Function %s:\n' % (function.name,))
 565             self._dump_events(function.events)
 566             for call in function.calls.itervalues():
 567                 callee = self.functions[call.callee_id]
 568                 sys.stderr.write('  Call %s:\n' % (callee.name,))
 569                 self._dump_events(call.events)
 570         for cycle in self.cycles:
 571             sys.stderr.write('Cycle:\n')
 572             self._dump_events(cycle.events)
 573             for function in cycle.functions:
 574                 sys.stderr.write('  Function %s\n' % (function.name,))
 575
 576     def _dump_events(self, events):
 577         for event, value in events.iteritems():
 578             sys.stderr.write('    %s: %s\n' % (event.name, event.format(value)))
 579
 580
 581 class Struct:
 582     """Masquerade a dictionary with a structure-like behavior."""
 583
 584     def __init__(self, attrs = None):
 585         if attrs is None:
 586             attrs = {}
 587         self.__dict__['_attrs'] = attrs
 588
 589     def __getattr__(self, name):
 590         try:
 591             return self._attrs[name]
 592         except KeyError:
 593             raise AttributeError(name)
 594
 595     def __setattr__(self, name, value):
 596         self._attrs[name] = value
 597
 598     def __str__(self):
 599         return str(self._attrs)
 600
 601     def __repr__(self):
 602         return repr(self._attrs)
 603
 604
 605 class ParseError(Exception):
 606     """Raised when parsing to signal mismatches."""
 607
 608     def __init__(self, msg, line):
 609         self.msg = msg
 610         # TODO: store more source line information
 611         self.line = line
 612
 613     def __str__(self):
 614         return '%s: %r' % (self.msg, self.line)
 615
 616
 617 class Parser:
 618     """Parser interface."""
 619
 620     def __init__(self):
 621         pass
 622
 623     def parse(self):
 624         raise NotImplementedError
 625
 626
 627 class LineParser(Parser):
 628     """Base class for parsers that read line-based formats."""
 629
 630     def __init__(self, file):
 631         Parser.__init__(self)
 632         self._file = file
 633         self.__line = None
 634         self.__eof = False
 635         self.line_no = 0
 636
 637     def readline(self):
 638         line = self._file.readline()
 639         if not line:
 640             self.__line = ''
 641             self.__eof = True
 642         else:
 643             self.line_no += 1
 644         self.__line = line.rstrip('\r\n')
 645
 646     def lookahead(self):
 647         assert self.__line is not None
 648         return self.__line
 649
 650     def consume(self):
 651         assert self.__line is not None
 652         line = self.__line
 653         self.readline()
 654         return line
 655
 656     def eof(self):
 657         assert self.__line is not None
 658         return self.__eof
 659
 660
 661 XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
 662
 663
 664 class XmlToken:
 665
 666     def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
 667         assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
 668         self.type = type
 669         self.name_or_data = name_or_data
 670         self.attrs = attrs
 671         self.line = line
 672         self.column = column
 673
 674     def __str__(self):
 675         if self.type == XML_ELEMENT_START:
 676             return '<' + self.name_or_data + ' ...>'
 677         if self.type == XML_ELEMENT_END:
 678             return '</' + self.name_or_data + '>'
 679         if self.type == XML_CHARACTER_DATA:
 680             return self.name_or_data
 681         if self.type == XML_EOF:
 682             return 'end of file'
 683         assert 0
 684
 685
 686 class XmlTokenizer:
 687     """Expat based XML tokenizer."""
 688
 689     def __init__(self, fp, skip_ws = True):
 690         self.fp = fp
 691         self.tokens = []
 692         self.index = 0
 693         self.final = False
 694         self.skip_ws = skip_ws
 695
 696         self.character_pos = 0, 0
 697         self.character_data = ''
 698
 699         self.parser = xml.parsers.expat.ParserCreate()
 700         self.parser.StartElementHandler  = self.handle_element_start
 701         self.parser.EndElementHandler    = self.handle_element_end
 702         self.parser.CharacterDataHandler = self.handle_character_data
 703
 704     def handle_element_start(self, name, attributes):
 705         self.finish_character_data()
 706         line, column = self.pos()
 707         token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
 708         self.tokens.append(token)
 709
 710     def handle_element_end(self, name):
 711         self.finish_character_data()
 712         line, column = self.pos()
 713         token = XmlToken(XML_ELEMENT_END, name, None, line, column)
 714         self.tokens.append(token)
 715
 716     def handle_character_data(self, data):
 717         if not self.character_data:
 718             self.character_pos = self.pos()
 719         self.character_data += data
 720
 721     def finish_character_data(self):
 722         if self.character_data:
 723             if not self.skip_ws or not self.character_data.isspace():
 724                 line, column = self.character_pos
 725                 token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
 726                 self.tokens.append(token)
 727             self.character_data = ''
 728
 729     def next(self):
 730         size = 16*1024
 731         while self.index >= len(self.tokens) and not self.final:
 732             self.tokens = []
 733             self.index = 0
 734             data = self.fp.read(size)
 735             self.final = len(data) < size
 736             try:
 737                 self.parser.Parse(data, self.final)
 738             except xml.parsers.expat.ExpatError, e:
 739                 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
 740                 if e.code == 3:
 741                     pass
 742                 else:
 743                     raise e
 744         if self.index >= len(self.tokens):
 745             line, column = self.pos()
 746             token = XmlToken(XML_EOF, None, None, line, column)
 747         else:
 748             token = self.tokens[self.index]
 749             self.index += 1
 750         return token
 751
 752     def pos(self):
 753         return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
 754
 755
 756 class XmlTokenMismatch(Exception):
 757
 758     def __init__(self, expected, found):
 759         self.expected = expected
 760         self.found = found
 761
 762     def __str__(self):
 763         return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
 764
 765
 766 class XmlParser(Parser):
 767     """Base XML document parser."""
 768
 769     def __init__(self, fp):
 770         Parser.__init__(self)
 771         self.tokenizer = XmlTokenizer(fp)
 772         self.consume()
 773
 774     def consume(self):
 775         self.token = self.tokenizer.next()
 776
 777     def match_element_start(self, name):
 778         return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
 779
 780     def match_element_end(self, name):
 781         return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
 782
 783     def element_start(self, name):
 784         while self.token.type == XML_CHARACTER_DATA:
 785             self.consume()
 786         if self.token.type != XML_ELEMENT_START:
 787             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
 788         if self.token.name_or_data != name:
 789             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
 790         attrs = self.token.attrs
 791         self.consume()
 792         return attrs
 793
 794     def element_end(self, name):
 795         while self.token.type == XML_CHARACTER_DATA:
 796             self.consume()
 797         if self.token.type != XML_ELEMENT_END:
 798             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
 799         if self.token.name_or_data != name:
 800             raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
 801         self.consume()
 802
 803     def character_data(self, strip = True):
 804         data = ''
 805         while self.token.type == XML_CHARACTER_DATA:
 806             data += self.token.name_or_data
 807             self.consume()
 808         if strip:
 809             data = data.strip()
 810         return data
 811
 812
 813 class GprofParser(Parser):
 814     """Parser for GNU gprof output.
 815
 816     See also:
 817     - Chapter "Interpreting gprof's Output" from the GNU gprof manual
 818       http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
 819     - File "cg_print.c" from the GNU gprof source code
 820       http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
 821     """
 822
 823     def __init__(self, fp):
 824         Parser.__init__(self)
 825         self.fp = fp
 826         self.functions = {}
 827         self.cycles = {}
 828
 829     def readline(self):
 830         line = self.fp.readline()
 831         if not line:
 832             sys.stderr.write('error: unexpected end of file\n')
 833             sys.exit(1)
 834         line = line.rstrip('\r\n')
 835         return line
 836
 837     _int_re = re.compile(r'^\d+$')
 838     _float_re = re.compile(r'^\d+\.\d+$')
 839
 840     def translate(self, mo):
 841         """Extract a structure from a match object, while translating the types in the process."""
 842         attrs = {}
 843         groupdict = mo.groupdict()
 844         for name, value in groupdict.iteritems():
 845             if value is None:
 846                 value = None
 847             elif self._int_re.match(value):
 848                 value = int(value)
 849             elif self._float_re.match(value):
 850                 value = float(value)
 851             attrs[name] = (value)
 852         return Struct(attrs)
 853
 854     _cg_header_re = re.compile(
 855         # original gprof header
 856         r'^\s+called/total\s+parents\s*$|' +
 857         r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
 858         r'^\s+called/total\s+children\s*$|' +
 859         # GNU gprof header
 860         r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
 861     )
 862
 863     _cg_ignore_re = re.compile(
 864         # spontaneous
 865         r'^\s+<spontaneous>\s*$|'
 866         # internal calls (such as "mcount")
 867         r'^.*\((\d+)\)$'
 868     )
 869
 870     _cg_primary_re = re.compile(
 871         r'^\[(?P<index>\d+)\]?' +
 872         r'\s+(?P<percentage_time>\d+\.\d+)' +
 873         r'\s+(?P<self>\d+\.\d+)' +
 874         r'\s+(?P<descendants>\d+\.\d+)' +
 875         r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
 876         r'\s+(?P<name>\S.*?)' +
 877         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
 878         r'\s\[(\d+)\]$'
 879     )
 880
 881     _cg_parent_re = re.compile(
 882         r'^\s+(?P<self>\d+\.\d+)?' +
 883         r'\s+(?P<descendants>\d+\.\d+)?' +
 884         r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' +
 885         r'\s+(?P<name>\S.*?)' +
 886         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
 887         r'\s\[(?P<index>\d+)\]$'
 888     )
 889
 890     _cg_child_re = _cg_parent_re
 891
 892     _cg_cycle_header_re = re.compile(
 893         r'^\[(?P<index>\d+)\]?' +
 894         r'\s+(?P<percentage_time>\d+\.\d+)' +
 895         r'\s+(?P<self>\d+\.\d+)' +
 896         r'\s+(?P<descendants>\d+\.\d+)' +
 897         r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
 898         r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
 899         r'\s\[(\d+)\]$'
 900     )
 901
 902     _cg_cycle_member_re = re.compile(
 903         r'^\s+(?P<self>\d+\.\d+)?' +
 904         r'\s+(?P<descendants>\d+\.\d+)?' +
 905         r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' +
 906         r'\s+(?P<name>\S.*?)' +
 907         r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
 908         r'\s\[(?P<index>\d+)\]$'
 909     )
 910
 911     _cg_sep_re = re.compile(r'^--+$')
 912
 913     def parse_function_entry(self, lines):
 914         parents = []
 915         children = []
 916
 917         while True:
 918             if not lines:
 919                 sys.stderr.write('warning: unexpected end of entry\n')
 920             line = lines.pop(0)
 921             if line.startswith('['):
 922                 break
 923
 924             # read function parent line
 925             mo = self._cg_parent_re.match(line)
 926             if not mo:
 927                 if self._cg_ignore_re.match(line):
 928                     continue
 929                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
 930             else:
 931                 parent = self.translate(mo)
 932                 parents.append(parent)
 933
 934         # read primary line
 935         mo = self._cg_primary_re.match(line)
 936         if not mo:
 937             sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
 938             return
 939         else:
 940             function = self.translate(mo)
 941
 942         while lines:
 943             line = lines.pop(0)
 944
 945             # read function subroutine line
 946             mo = self._cg_child_re.match(line)
 947             if not mo:
 948                 if self._cg_ignore_re.match(line):
 949                     continue
 950                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
 951             else:
 952                 child = self.translate(mo)
 953                 children.append(child)
 954
 955         function.parents = parents
 956         function.children = children
 957
 958         self.functions[function.index] = function
 959
 960     def parse_cycle_entry(self, lines):
 961
 962         # read cycle header line
 963         line = lines[0]
 964         mo = self._cg_cycle_header_re.match(line)
 965         if not mo:
 966             sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
 967             return
 968         cycle = self.translate(mo)
 969
 970         # read cycle member lines
 971         cycle.functions = []
 972         for line in lines[1:]:
 973             mo = self._cg_cycle_member_re.match(line)
 974             if not mo:
 975                 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
 976                 continue
 977             call = self.translate(mo)
 978             cycle.functions.append(call)
 979
 980         self.cycles[cycle.cycle] = cycle
 981
 982     def parse_cg_entry(self, lines):
 983         if lines[0].startswith("["):
 984             self.parse_cycle_entry(lines)
 985         else:
 986             self.parse_function_entry(lines)
 987
 988     def parse_cg(self):
 989         """Parse the call graph."""
 990
 991         # skip call graph header
 992         while not self._cg_header_re.match(self.readline()):
 993             pass
 994         line = self.readline()
 995         while self._cg_header_re.match(line):
 996             line = self.readline()
 997
 998         # process call graph entries
 999         entry_lines = []
1000         while line != '\014': # form feed
1001             if line and not line.isspace():
1002                 if self._cg_sep_re.match(line):
1003                     self.parse_cg_entry(entry_lines)
1004                     entry_lines = []
1005                 else:
1006                     entry_lines.append(line)
1007             line = self.readline()
1008
1009     def parse(self):
1010         self.parse_cg()
1011         self.fp.close()
1012
1013         profile = Profile()
1014         profile[TIME] = 0.0
1015
1016         cycles = {}
1017         for index in self.cycles.iterkeys():
1018             cycles[index] = Cycle()
1019
1020         for entry in self.functions.itervalues():
1021             # populate the function
1022             function = Function(entry.index, entry.name)
1023             function[TIME] = entry.self
1024             if entry.called is not None:
1025                 function.called = entry.called
1026             if entry.called_self is not None:
1027                 call = Call(entry.index)
1028                 call[CALLS] = entry.called_self
1029                 function.called += entry.called_self
1030
1031             # populate the function calls
1032             for child in entry.children:
1033                 call = Call(child.index)
1034
1035                 assert child.called is not None
1036                 call[CALLS] = child.called
1037
1038                 if child.index not in self.functions:
1039                     # NOTE: functions that were never called but were discovered by gprof's
1040                     # static call graph analysis dont have a call graph entry so we need
1041                     # to add them here
1042                     missing = Function(child.index, child.name)
1043                     function[TIME] = 0.0
1044                     function.called = 0
1045                     profile.add_function(missing)
1046
1047                 function.add_call(call)
1048
1049             profile.add_function(function)
1050
1051             if entry.cycle is not None:
1052                 try:
1053                     cycle = cycles[entry.cycle]
1054                 except KeyError:
1055                     sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
1056                     cycle = Cycle()
1057                     cycles[entry.cycle] = cycle
1058                 cycle.add_function(function)
1059
1060             profile[TIME] = profile[TIME] + function[TIME]
1061
1062         for cycle in cycles.itervalues():
1063             profile.add_cycle(cycle)
1064
1065         # Compute derived events
1066         profile.validate()
1067         profile.ratio(TIME_RATIO, TIME)
1068         profile.call_ratios(CALLS)
1069         profile.integrate(TOTAL_TIME, TIME)
1070         profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
1071
1072         return profile
1073
1074
1075 class CallgrindParser(LineParser):
1076     """Parser for valgrind's callgrind tool.
1077
1078     See also:
1079     - http://valgrind.org/docs/manual/cl-format.html
1080     """
1081
1082     _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
1083
1084     def __init__(self, infile):
1085         LineParser.__init__(self, infile)
1086
1087         # Textual positions
1088         self.position_ids = {}
1089         self.positions = {}
1090
1091         # Numeric positions
1092         self.num_positions = 1
1093         self.cost_positions = ['line']
1094         self.last_positions = [0]
1095
1096         # Events
1097         self.num_events = 0
1098         self.cost_events = []
1099
1100         self.profile = Profile()
1101         self.profile[SAMPLES] = 0
1102
1103     def parse(self):
1104         # read lookahead
1105         self.readline()
1106
1107         self.parse_key('version')
1108         self.parse_key('creator')
1109         while self.parse_part():
1110             pass
1111         if not self.eof():
1112             sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no)
1113             sys.stderr.write('%s\n' % self.lookahead())
1114
1115         # compute derived data
1116         self.profile.validate()
1117         self.profile.find_cycles()
1118         self.profile.ratio(TIME_RATIO, SAMPLES)
1119         self.profile.call_ratios(CALLS)
1120         self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1121
1122         return self.profile
1123
1124     def parse_part(self):
1125         if not self.parse_header_line():
1126             return False
1127         while self.parse_header_line():
1128             pass
1129         if not self.parse_body_line():
1130             return False
1131         while self.parse_body_line():
1132             pass
1133         return True
1134
1135     def parse_header_line(self):
1136         return \
1137             self.parse_empty() or \
1138             self.parse_comment() or \
1139             self.parse_part_detail() or \
1140             self.parse_description() or \
1141             self.parse_event_specification() or \
1142             self.parse_cost_line_def() or \
1143             self.parse_cost_summary()
1144
1145     _detail_keys = set(('cmd', 'pid', 'thread', 'part'))
1146
1147     def parse_part_detail(self):
1148         return self.parse_keys(self._detail_keys)
1149
1150     def parse_description(self):
1151         return self.parse_key('desc') is not None
1152
1153     def parse_event_specification(self):
1154         event = self.parse_key('event')
1155         if event is None:
1156             return False
1157         return True
1158
1159     def parse_cost_line_def(self):
1160         pair = self.parse_keys(('events', 'positions'))
1161         if pair is None:
1162             return False
1163         key, value = pair
1164         items = value.split()
1165         if key == 'events':
1166             self.num_events = len(items)
1167             self.cost_events = items
1168         if key == 'positions':
1169             self.num_positions = len(items)
1170             self.cost_positions = items
1171             self.last_positions = [0]*self.num_positions
1172         return True
1173
1174     def parse_cost_summary(self):
1175         pair = self.parse_keys(('summary', 'totals'))
1176         if pair is None:
1177             return False
1178         return True
1179
1180     def parse_body_line(self):
1181         return \
1182             self.parse_empty() or \
1183             self.parse_comment() or \
1184             self.parse_cost_line() or \
1185             self.parse_position_spec() or \
1186             self.parse_association_spec()
1187
1188     __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)'
1189     _cost_re = re.compile(r'^' +
1190         __subpos_re + r'( +' + __subpos_re + r')*' +
1191         r'( +\d+)*' +
1192     '$')
1193
1194     def parse_cost_line(self, calls=None):
1195         line = self.lookahead().rstrip()
1196         mo = self._cost_re.match(line)
1197         if not mo:
1198             return False
1199
1200         function = self.get_function()
1201
1202         if calls is None:
1203             # Unlike other aspects, call object (cob) is relative not to the
1204             # last call object, but to the caller's object (ob), so try to
1205             # update it when processing a functions cost line
1206             try:
1207                 self.positions['cob'] = self.positions['ob']
1208             except KeyError:
1209                 pass
1210
1211         values = line.split()
1212         assert len(values) <= self.num_positions + self.num_events
1213
1214         positions = values[0 : self.num_positions]
1215         events = values[self.num_positions : ]
1216         events += ['0']*(self.num_events - len(events))
1217
1218         for i in range(self.num_positions):
1219             position = positions[i]
1220             if position == '*':
1221                 position = self.last_positions[i]
1222             elif position[0] in '-+':
1223                 position = self.last_positions[i] + int(position)
1224             elif position.startswith('0x'):
1225                 position = int(position, 16)
1226             else:
1227                 position = int(position)
1228             self.last_positions[i] = position
1229
1230         events = map(float, events)
1231
1232         if calls is None:
1233             function[SAMPLES] += events[0]
1234             self.profile[SAMPLES] += events[0]
1235         else:
1236             callee = self.get_callee()
1237             callee.called += calls
1238
1239             try:
1240                 call = function.calls[callee.id]
1241             except KeyError:
1242                 call = Call(callee.id)
1243                 call[CALLS] = calls
1244                 call[SAMPLES] = events[0]
1245                 function.add_call(call)
1246             else:
1247                 call[CALLS] += calls
1248                 call[SAMPLES] += events[0]
1249
1250         self.consume()
1251         return True
1252
1253     def parse_association_spec(self):
1254         line = self.lookahead()
1255         if not line.startswith('calls='):
1256             return False
1257
1258         _, values = line.split('=', 1)
1259         values = values.strip().split()
1260         calls = int(values[0])
1261         call_position = values[1:]
1262         self.consume()
1263
1264         self.parse_cost_line(calls)
1265
1266         return True
1267
1268     _position_re = re.compile('^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?')
1269
1270     _position_table_map = {
1271         'ob': 'ob',
1272         'fl': 'fl',
1273         'fi': 'fl',
1274         'fe': 'fl',
1275         'fn': 'fn',
1276         'cob': 'ob',
1277         'cfl': 'fl',
1278         'cfi': 'fl',
1279         'cfe': 'fl',
1280         'cfn': 'fn',
1281         'jfi': 'fl',
1282     }
1283
1284     _position_map = {
1285         'ob': 'ob',
1286         'fl': 'fl',
1287         'fi': 'fl',
1288         'fe': 'fl',
1289         'fn': 'fn',
1290         'cob': 'cob',
1291         'cfl': 'cfl',
1292         'cfi': 'cfl',
1293         'cfe': 'cfl',
1294         'cfn': 'cfn',
1295         'jfi': 'jfi',
1296     }
1297
1298     def parse_position_spec(self):
1299         line = self.lookahead()
1300
1301         if line.startswith('jump=') or line.startswith('jcnd='):
1302             self.consume()
1303             return True
1304
1305         mo = self._position_re.match(line)
1306         if not mo:
1307             return False
1308
1309         position, id, name = mo.groups()
1310         if id:
1311             table = self._position_table_map[position]
1312             if name:
1313                 self.position_ids[(table, id)] = name
1314             else:
1315                 name = self.position_ids.get((table, id), '')
1316         self.positions[self._position_map[position]] = name
1317
1318         self.consume()
1319         return True
1320
1321     def parse_empty(self):
1322         if self.eof():
1323             return False
1324         line = self.lookahead()
1325         if line.strip():
1326             return False
1327         self.consume()
1328         return True
1329
1330     def parse_comment(self):
1331         line = self.lookahead()
1332         if not line.startswith('#'):
1333             return False
1334         self.consume()
1335         return True
1336
1337     _key_re = re.compile(r'^(\w+):')
1338
1339     def parse_key(self, key):
1340         pair = self.parse_keys((key,))
1341         if not pair:
1342             return None
1343         key, value = pair
1344         return value
1345         line = self.lookahead()
1346         mo = self._key_re.match(line)
1347         if not mo:
1348             return None
1349         key, value = line.split(':', 1)
1350         if key not in keys:
1351             return None
1352         value = value.strip()
1353         self.consume()
1354         return key, value
1355
1356     def parse_keys(self, keys):
1357         line = self.lookahead()
1358         mo = self._key_re.match(line)
1359         if not mo:
1360             return None
1361         key, value = line.split(':', 1)
1362         if key not in keys:
1363             return None
1364         value = value.strip()
1365         self.consume()
1366         return key, value
1367
1368     def make_function(self, module, filename, name):
1369         # FIXME: module and filename are not being tracked reliably
1370         #id = '|'.join((module, filename, name))
1371         id = name
1372         try:
1373             function = self.profile.functions[id]
1374         except KeyError:
1375             function = Function(id, name)
1376             if module:
1377                 function.module = os.path.basename(module)
1378             function[SAMPLES] = 0
1379             function.called = 0
1380             self.profile.add_function(function)
1381         return function
1382
1383     def get_function(self):
1384         module = self.positions.get('ob', '')
1385         filename = self.positions.get('fl', '')
1386         function = self.positions.get('fn', '')
1387         return self.make_function(module, filename, function)
1388
1389     def get_callee(self):
1390         module = self.positions.get('cob', '')
1391         filename = self.positions.get('cfi', '')
1392         function = self.positions.get('cfn', '')
1393         return self.make_function(module, filename, function)
1394
1395
1396 class PerfParser(LineParser):
1397     """Parser for linux perf callgraph output.
1398
1399     It expects output generated with
1400
1401         perf record -g
1402         perf script | gprof2dot.py --format=perf
1403     """
1404
1405     def __init__(self, infile):
1406         LineParser.__init__(self, infile)
1407         self.profile = Profile()
1408
1409     def readline(self):
1410         # Override LineParser.readline to ignore comment lines
1411         while True:
1412             LineParser.readline(self)
1413             if self.eof() or not self.lookahead().startswith('#'):
1414                 break
1415
1416     def parse(self):
1417         # read lookahead
1418         self.readline()
1419
1420         profile = self.profile
1421         profile[SAMPLES] = 0
1422         while not self.eof():
1423             self.parse_event()
1424
1425         # compute derived data
1426         profile.validate()
1427         profile.find_cycles()
1428         profile.ratio(TIME_RATIO, SAMPLES)
1429         profile.call_ratios(SAMPLES2)
1430         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1431
1432         return profile
1433
1434     def parse_event(self):
1435         if self.eof():
1436             return
1437
1438         line = self.consume()
1439         assert line
1440
1441         callchain = self.parse_callchain()
1442         if not callchain:
1443             return
1444
1445         callee = callchain[0]
1446         callee[SAMPLES] += 1
1447         self.profile[SAMPLES] += 1
1448
1449         for caller in callchain[1:]:
1450             try:
1451                 call = caller.calls[callee.id]
1452             except KeyError:
1453                 call = Call(callee.id)
1454                 call[SAMPLES2] = 1
1455                 caller.add_call(call)
1456             else:
1457                 call[SAMPLES2] += 1
1458
1459             callee = caller
1460
1461     def parse_callchain(self):
1462         callchain = []
1463         while self.lookahead():
1464             function = self.parse_call()
1465             if function is None:
1466                 break
1467             callchain.append(function)
1468         if self.lookahead() == '':
1469             self.consume()
1470         return callchain
1471
1472     call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
1473
1474     def parse_call(self):
1475         line = self.consume()
1476         mo = self.call_re.match(line)
1477         assert mo
1478         if not mo:
1479             return None
1480
1481         function_name = mo.group('symbol')
1482         if not function_name:
1483             function_name = mo.group('address')
1484
1485         module = mo.group('module')
1486
1487         function_id = function_name + ':' + module
1488
1489         try:
1490             function = self.profile.functions[function_id]
1491         except KeyError:
1492             function = Function(function_id, function_name)
1493             function.module = os.path.basename(module)
1494             function[SAMPLES] = 0
1495             self.profile.add_function(function)
1496
1497         return function
1498
1499
1500 class OprofileParser(LineParser):
1501     """Parser for oprofile callgraph output.
1502
1503     See also:
1504     - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
1505     """
1506
1507     _fields_re = {
1508         'samples': r'(\d+)',
1509         '%': r'(\S+)',
1510         'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
1511         'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
1512         'app name': r'(?P<application>\S+)',
1513         'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
1514     }
1515
1516     def __init__(self, infile):
1517         LineParser.__init__(self, infile)
1518         self.entries = {}
1519         self.entry_re = None
1520
1521     def add_entry(self, callers, function, callees):
1522         try:
1523             entry = self.entries[function.id]
1524         except KeyError:
1525             self.entries[function.id] = (callers, function, callees)
1526         else:
1527             callers_total, function_total, callees_total = entry
1528             self.update_subentries_dict(callers_total, callers)
1529             function_total.samples += function.samples
1530             self.update_subentries_dict(callees_total, callees)
1531
1532     def update_subentries_dict(self, totals, partials):
1533         for partial in partials.itervalues():
1534             try:
1535                 total = totals[partial.id]
1536             except KeyError:
1537                 totals[partial.id] = partial
1538             else:
1539                 total.samples += partial.samples
1540
1541     def parse(self):
1542         # read lookahead
1543         self.readline()
1544
1545         self.parse_header()
1546         while self.lookahead():
1547             self.parse_entry()
1548
1549         profile = Profile()
1550
1551         reverse_call_samples = {}
1552
1553         # populate the profile
1554         profile[SAMPLES] = 0
1555         for _callers, _function, _callees in self.entries.itervalues():
1556             function = Function(_function.id, _function.name)
1557             function[SAMPLES] = _function.samples
1558             profile.add_function(function)
1559             profile[SAMPLES] += _function.samples
1560
1561             if _function.application:
1562                 function.process = os.path.basename(_function.application)
1563             if _function.image:
1564                 function.module = os.path.basename(_function.image)
1565
1566             total_callee_samples = 0
1567             for _callee in _callees.itervalues():
1568                 total_callee_samples += _callee.samples
1569
1570             for _callee in _callees.itervalues():
1571                 if not _callee.self:
1572                     call = Call(_callee.id)
1573                     call[SAMPLES2] = _callee.samples
1574                     function.add_call(call)
1575
1576         # compute derived data
1577         profile.validate()
1578         profile.find_cycles()
1579         profile.ratio(TIME_RATIO, SAMPLES)
1580         profile.call_ratios(SAMPLES2)
1581         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1582
1583         return profile
1584
1585     def parse_header(self):
1586         while not self.match_header():
1587             self.consume()
1588         line = self.lookahead()
1589         fields = re.split(r'\s\s+', line)
1590         entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
1591         self.entry_re = re.compile(entry_re)
1592         self.skip_separator()
1593
1594     def parse_entry(self):
1595         callers = self.parse_subentries()
1596         if self.match_primary():
1597             function = self.parse_subentry()
1598             if function is not None:
1599                 callees = self.parse_subentries()
1600                 self.add_entry(callers, function, callees)
1601         self.skip_separator()
1602
1603     def parse_subentries(self):
1604         subentries = {}
1605         while self.match_secondary():
1606             subentry = self.parse_subentry()
1607             subentries[subentry.id] = subentry
1608         return subentries
1609
1610     def parse_subentry(self):
1611         entry = Struct()
1612         line = self.consume()
1613         mo = self.entry_re.match(line)
1614         if not mo:
1615             raise ParseError('failed to parse', line)
1616         fields = mo.groupdict()
1617         entry.samples = int(mo.group(1))
1618         if 'source' in fields and fields['source'] != '(no location information)':
1619             source = fields['source']
1620             filename, lineno = source.split(':')
1621             entry.filename = filename
1622             entry.lineno = int(lineno)
1623         else:
1624             source = ''
1625             entry.filename = None
1626             entry.lineno = None
1627         entry.image = fields.get('image', '')
1628         entry.application = fields.get('application', '')
1629         if 'symbol' in fields and fields['symbol'] != '(no symbols)':
1630             entry.symbol = fields['symbol']
1631         else:
1632             entry.symbol = ''
1633         if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
1634             entry.symbol = entry.symbol[1:-1]
1635         entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
1636         entry.self = fields.get('self', None) != None
1637         if entry.self:
1638             entry.id += ':self'
1639         if entry.symbol:
1640             entry.name = entry.symbol
1641         else:
1642             entry.name = entry.image
1643         return entry
1644
1645     def skip_separator(self):
1646         while not self.match_separator():
1647             self.consume()
1648         self.consume()
1649
1650     def match_header(self):
1651         line = self.lookahead()
1652         return line.startswith('samples')
1653
1654     def match_separator(self):
1655         line = self.lookahead()
1656         return line == '-'*len(line)
1657
1658     def match_primary(self):
1659         line = self.lookahead()
1660         return not line[:1].isspace()
1661
1662     def match_secondary(self):
1663         line = self.lookahead()
1664         return line[:1].isspace()
1665
1666
1667 class HProfParser(LineParser):
1668     """Parser for java hprof output
1669
1670     See also:
1671     - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html
1672     """
1673
1674     trace_re = re.compile(r'\t(.*)\((.*):(.*)\)')
1675     trace_id_re = re.compile(r'^TRACE (\d+):$')
1676
1677     def __init__(self, infile):
1678         LineParser.__init__(self, infile)
1679         self.traces = {}
1680         self.samples = {}
1681
1682     def parse(self):
1683         # read lookahead
1684         self.readline()
1685
1686         while not self.lookahead().startswith('------'): self.consume()
1687         while not self.lookahead().startswith('TRACE '): self.consume()
1688
1689         self.parse_traces()
1690
1691         while not self.lookahead().startswith('CPU'):
1692             self.consume()
1693
1694         self.parse_samples()
1695
1696         # populate the profile
1697         profile = Profile()
1698         profile[SAMPLES] = 0
1699
1700         functions = {}
1701
1702         # build up callgraph
1703         for id, trace in self.traces.iteritems():
1704             if not id in self.samples: continue
1705             mtime = self.samples[id][0]
1706             last = None
1707
1708             for func, file, line in trace:
1709                 if not func in functions:
1710                     function = Function(func, func)
1711                     function[SAMPLES] = 0
1712                     profile.add_function(function)
1713                     functions[func] = function
1714
1715                 function = functions[func]
1716                 # allocate time to the deepest method in the trace
1717                 if not last:
1718                     function[SAMPLES] += mtime
1719                     profile[SAMPLES] += mtime
1720                 else:
1721                     c = function.get_call(last)
1722                     c[SAMPLES2] += mtime
1723
1724                 last = func
1725
1726         # compute derived data
1727         profile.validate()
1728         profile.find_cycles()
1729         profile.ratio(TIME_RATIO, SAMPLES)
1730         profile.call_ratios(SAMPLES2)
1731         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1732
1733         return profile
1734
1735     def parse_traces(self):
1736         while self.lookahead().startswith('TRACE '):
1737             self.parse_trace()
1738
1739     def parse_trace(self):
1740         l = self.consume()
1741         mo = self.trace_id_re.match(l)
1742         tid = mo.group(1)
1743         last = None
1744         trace = []
1745
1746         while self.lookahead().startswith('\t'):
1747             l = self.consume()
1748             match = self.trace_re.search(l)
1749             if not match:
1750                 #sys.stderr.write('Invalid line: %s\n' % l)
1751                 break
1752             else:
1753                 function_name, file, line = match.groups()
1754                 trace += [(function_name, file, line)]
1755
1756         self.traces[int(tid)] = trace
1757
1758     def parse_samples(self):
1759         self.consume()
1760         self.consume()
1761
1762         while not self.lookahead().startswith('CPU'):
1763             rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split()
1764             self.samples[int(traceid)] = (int(count), method)
1765             self.consume()
1766
1767
1768 class SysprofParser(XmlParser):
1769
1770     def __init__(self, stream):
1771         XmlParser.__init__(self, stream)
1772
1773     def parse(self):
1774         objects = {}
1775         nodes = {}
1776
1777         self.element_start('profile')
1778         while self.token.type == XML_ELEMENT_START:
1779             if self.token.name_or_data == 'objects':
1780                 assert not objects
1781                 objects = self.parse_items('objects')
1782             elif self.token.name_or_data == 'nodes':
1783                 assert not nodes
1784                 nodes = self.parse_items('nodes')
1785             else:
1786                 self.parse_value(self.token.name_or_data)
1787         self.element_end('profile')
1788
1789         return self.build_profile(objects, nodes)
1790
1791     def parse_items(self, name):
1792         assert name[-1] == 's'
1793         items = {}
1794         self.element_start(name)
1795         while self.token.type == XML_ELEMENT_START:
1796             id, values = self.parse_item(name[:-1])
1797             assert id not in items
1798             items[id] = values
1799         self.element_end(name)
1800         return items
1801
1802     def parse_item(self, name):
1803         attrs = self.element_start(name)
1804         id = int(attrs['id'])
1805         values = self.parse_values()
1806         self.element_end(name)
1807         return id, values
1808
1809     def parse_values(self):
1810         values = {}
1811         while self.token.type == XML_ELEMENT_START:
1812             name = self.token.name_or_data
1813             value = self.parse_value(name)
1814             assert name not in values
1815             values[name] = value
1816         return values
1817
1818     def parse_value(self, tag):
1819         self.element_start(tag)
1820         value = self.character_data()
1821         self.element_end(tag)
1822         if value.isdigit():
1823             return int(value)
1824         if value.startswith('"') and value.endswith('"'):
1825             return value[1:-1]
1826         return value
1827
1828     def build_profile(self, objects, nodes):
1829         profile = Profile()
1830
1831         profile[SAMPLES] = 0
1832         for id, object in objects.iteritems():
1833             # Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
1834             if object['self'] == 0:
1835                 continue
1836
1837             function = Function(id, object['name'])
1838             function[SAMPLES] = object['self']
1839             profile.add_function(function)
1840             profile[SAMPLES] += function[SAMPLES]
1841
1842         for id, node in nodes.iteritems():
1843             # Ignore fake calls
1844             if node['self'] == 0:
1845                 continue
1846
1847             # Find a non-ignored parent
1848             parent_id = node['parent']
1849             while parent_id != 0:
1850                 parent = nodes[parent_id]
1851                 caller_id = parent['object']
1852                 if objects[caller_id]['self'] != 0:
1853                     break
1854                 parent_id = parent['parent']
1855             if parent_id == 0:
1856                 continue
1857
1858             callee_id = node['object']
1859
1860             assert objects[caller_id]['self']
1861             assert objects[callee_id]['self']
1862
1863             function = profile.functions[caller_id]
1864
1865             samples = node['self']
1866             try:
1867                 call = function.calls[callee_id]
1868             except KeyError:
1869                 call = Call(callee_id)
1870                 call[SAMPLES2] = samples
1871                 function.add_call(call)
1872             else:
1873                 call[SAMPLES2] += samples
1874
1875         # Compute derived events
1876         profile.validate()
1877         profile.find_cycles()
1878         profile.ratio(TIME_RATIO, SAMPLES)
1879         profile.call_ratios(SAMPLES2)
1880         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1881
1882         return profile
1883
1884
1885 class SharkParser(LineParser):
1886     """Parser for MacOSX Shark output.
1887
1888     Author: tom@dbservice.com
1889     """
1890
1891     def __init__(self, infile):
1892         LineParser.__init__(self, infile)
1893         self.stack = []
1894         self.entries = {}
1895
1896     def add_entry(self, function):
1897         try:
1898             entry = self.entries[function.id]
1899         except KeyError:
1900             self.entries[function.id] = (function, { })
1901         else:
1902             function_total, callees_total = entry
1903             function_total.samples += function.samples
1904
1905     def add_callee(self, function, callee):
1906         func, callees = self.entries[function.id]
1907         try:
1908             entry = callees[callee.id]
1909         except KeyError:
1910             callees[callee.id] = callee
1911         else:
1912             entry.samples += callee.samples
1913
1914     def parse(self):
1915         self.readline()
1916         self.readline()
1917         self.readline()
1918         self.readline()
1919
1920         match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)')
1921
1922         while self.lookahead():
1923             line = self.consume()
1924             mo = match.match(line)
1925             if not mo:
1926                 raise ParseError('failed to parse', line)
1927
1928             fields = mo.groupdict()
1929             prefix = len(fields.get('prefix', 0)) / 2 - 1
1930
1931             symbol = str(fields.get('symbol', 0))
1932             image = str(fields.get('image', 0))
1933
1934             entry = Struct()
1935             entry.id = ':'.join([symbol, image])
1936             entry.samples = int(fields.get('samples', 0))
1937
1938             entry.name = symbol
1939             entry.image = image
1940
1941             # adjust the callstack
1942             if prefix < len(self.stack):
1943                 del self.stack[prefix:]
1944
1945             if prefix == len(self.stack):
1946                 self.stack.append(entry)
1947
1948             # if the callstack has had an entry, it's this functions caller
1949             if prefix > 0:
1950                 self.add_callee(self.stack[prefix - 1], entry)
1951
1952             self.add_entry(entry)
1953
1954         profile = Profile()
1955         profile[SAMPLES] = 0
1956         for _function, _callees in self.entries.itervalues():
1957             function = Function(_function.id, _function.name)
1958             function[SAMPLES] = _function.samples
1959             profile.add_function(function)
1960             profile[SAMPLES] += _function.samples
1961
1962             if _function.image:
1963                 function.module = os.path.basename(_function.image)
1964
1965             for _callee in _callees.itervalues():
1966                 call = Call(_callee.id)
1967                 call[SAMPLES] = _callee.samples
1968                 function.add_call(call)
1969
1970         # compute derived data
1971         profile.validate()
1972         profile.find_cycles()
1973         profile.ratio(TIME_RATIO, SAMPLES)
1974         profile.call_ratios(SAMPLES)
1975         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1976
1977         return profile
1978
1979
1980 class XPerfParser(Parser):
1981     """Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools.
1982     """
1983
1984     def __init__(self, stream):
1985         Parser.__init__(self)
1986         self.stream = stream
1987         self.profile = Profile()
1988         self.profile[SAMPLES] = 0
1989         self.column = {}
1990
1991     def parse(self):
1992         import csv
1993         reader = csv.reader(
1994             self.stream,
1995             delimiter = ',',
1996             quotechar = None,
1997             escapechar = None,
1998             doublequote = False,
1999             skipinitialspace = True,
2000             lineterminator = '\r\n',
2001             quoting = csv.QUOTE_NONE)
2002         it = iter(reader)
2003         row = reader.next()
2004         self.parse_header(row)
2005         for row in it:
2006             self.parse_row(row)
2007
2008         # compute derived data
2009         self.profile.validate()
2010         self.profile.find_cycles()
2011         self.profile.ratio(TIME_RATIO, SAMPLES)
2012         self.profile.call_ratios(SAMPLES2)
2013         self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2014
2015         return self.profile
2016
2017     def parse_header(self, row):
2018         for column in range(len(row)):
2019             name = row[column]
2020             assert name not in self.column
2021             self.column[name] = column
2022
2023     def parse_row(self, row):
2024         fields = {}
2025         for name, column in self.column.iteritems():
2026             value = row[column]
2027             for factory in int, float:
2028                 try:
2029                     value = factory(value)
2030                 except ValueError:
2031                     pass
2032                 else:
2033                     break
2034             fields[name] = value
2035
2036         process = fields['Process Name']
2037         symbol = fields['Module'] + '!' + fields['Function']
2038         weight = fields['Weight']
2039         count = fields['Count']
2040
2041         function = self.get_function(process, symbol)
2042         function[SAMPLES] += weight * count
2043         self.profile[SAMPLES] += weight * count
2044
2045         stack = fields['Stack']
2046         if stack != '?':
2047             stack = stack.split('/')
2048             assert stack[0] == '[Root]'
2049             if stack[-1] != symbol:
2050                 # XXX: some cases the sampled function does not appear in the stack
2051                 stack.append(symbol)
2052             caller = None
2053             for symbol in stack[1:]:
2054                 callee = self.get_function(process, symbol)
2055                 if caller is not None:
2056                     try:
2057                         call = caller.calls[callee.id]
2058                     except KeyError:
2059                         call = Call(callee.id)
2060                         call[SAMPLES2] = count
2061                         caller.add_call(call)
2062                     else:
2063                         call[SAMPLES2] += count
2064                 caller = callee
2065
2066     def get_function(self, process, symbol):
2067         function_id = process + '!' + symbol
2068
2069         try:
2070             function = self.profile.functions[function_id]
2071         except KeyError:
2072             module, name = symbol.split('!', 1)
2073             function = Function(function_id, name)
2074             function.process = process
2075             function.module = module
2076             function[SAMPLES] = 0
2077             self.profile.add_function(function)
2078
2079         return function
2080
2081
2082 class SleepyParser(Parser):
2083     """Parser for GNU gprof output.
2084
2085     See also:
2086     - http://www.codersnotes.com/sleepy/
2087     - http://sleepygraph.sourceforge.net/
2088     """
2089
2090     def __init__(self, filename):
2091         Parser.__init__(self)
2092
2093         from zipfile import ZipFile
2094
2095         self.database = ZipFile(filename)
2096
2097         self.version_0_7 = 'Version 0.7 required' in self.database.namelist()
2098
2099         self.symbols = {}
2100         self.calls = {}
2101
2102         self.profile = Profile()
2103
2104     _symbol_re = re.compile(
2105         r'^(?P<id>\w+)' +
2106         r'\s+"(?P<module>[^"]*)"' +
2107         r'\s+"(?P<procname>[^"]*)"' +
2108         r'\s+"(?P<sourcefile>[^"]*)"' +
2109         r'\s+(?P<sourceline>\d+)$'
2110     )
2111
2112     def parse_symbols(self):
2113         if self.version_0_7:
2114             symbols_txt = 'Symbols.txt'
2115         else:
2116             symbols_txt = 'symbols.txt'
2117         lines = self.database.read(symbols_txt).splitlines()
2118         for line in lines:
2119             mo = self._symbol_re.match(line)
2120             if mo:
2121                 symbol_id, module, procname, sourcefile, sourceline = mo.groups()
2122
2123                 function_id = ':'.join([module, procname])
2124
2125                 try:
2126                     function = self.profile.functions[function_id]
2127                 except KeyError:
2128                     function = Function(function_id, procname)
2129                     function.module = module
2130                     function[SAMPLES] = 0
2131                     self.profile.add_function(function)
2132
2133                 self.symbols[symbol_id] = function
2134
2135     def parse_callstacks(self):
2136         if self.version_0_7:
2137             callstacks_txt = 'Callstacks.txt'
2138         else:
2139             callstacks_txt = 'callstacks.txt'
2140         lines = self.database.read(callstacks_txt).splitlines()
2141         for line in lines:
2142             fields = line.split()
2143             samples = float(fields[0])
2144             callstack = fields[1:]
2145
2146             callstack = [self.symbols[symbol_id] for symbol_id in callstack]
2147
2148             callee = callstack[0]
2149
2150             callee[SAMPLES] += samples
2151             self.profile[SAMPLES] += samples
2152
2153             for caller in callstack[1:]:
2154                 try:
2155                     call = caller.calls[callee.id]
2156                 except KeyError:
2157                     call = Call(callee.id)
2158                     call[SAMPLES2] = samples
2159                     caller.add_call(call)
2160                 else:
2161                     call[SAMPLES2] += samples
2162
2163                 callee = caller
2164
2165     def parse(self):
2166         profile = self.profile
2167         profile[SAMPLES] = 0
2168
2169         self.parse_symbols()
2170         self.parse_callstacks()
2171
2172         # Compute derived events
2173         profile.validate()
2174         profile.find_cycles()
2175         profile.ratio(TIME_RATIO, SAMPLES)
2176         profile.call_ratios(SAMPLES2)
2177         profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2178
2179         return profile
2180
2181
2182 class AQtimeTable:
2183
2184     def __init__(self, name, fields):
2185         self.name = name
2186
2187         self.fields = fields
2188         self.field_column = {}
2189         for column in range(len(fields)):
2190             self.field_column[fields[column]] = column
2191         self.rows = []
2192
2193     def __len__(self):
2194         return len(self.rows)
2195
2196     def __iter__(self):
2197         for values, children in self.rows:
2198             fields = {}
2199             for name, value in zip(self.fields, values):
2200                 fields[name] = value
2201             children = dict([(child.name, child) for child in children])
2202             yield fields, children
2203         raise StopIteration
2204
2205     def add_row(self, values, children=()):
2206         self.rows.append((values, children))
2207
2208
2209 class AQtimeParser(XmlParser):
2210
2211     def __init__(self, stream):
2212         XmlParser.__init__(self, stream)
2213         self.tables = {}
2214
2215     def parse(self):
2216         self.element_start('AQtime_Results')
2217         self.parse_headers()
2218         results = self.parse_results()
2219         self.element_end('AQtime_Results')
2220         return self.build_profile(results)
2221
2222     def parse_headers(self):
2223         self.element_start('HEADERS')
2224         while self.token.type == XML_ELEMENT_START:
2225             self.parse_table_header()
2226         self.element_end('HEADERS')
2227
2228     def parse_table_header(self):
2229         attrs = self.element_start('TABLE_HEADER')
2230         name = attrs['NAME']
2231         id = int(attrs['ID'])
2232         field_types = []
2233         field_names = []
2234         while self.token.type == XML_ELEMENT_START:
2235             field_type, field_name = self.parse_table_field()
2236             field_types.append(field_type)
2237             field_names.append(field_name)
2238         self.element_end('TABLE_HEADER')
2239         self.tables[id] = name, field_types, field_names
2240
2241     def parse_table_field(self):
2242         attrs = self.element_start('TABLE_FIELD')
2243         type = attrs['TYPE']
2244         name = self.character_data()
2245         self.element_end('TABLE_FIELD')
2246         return type, name
2247
2248     def parse_results(self):
2249         self.element_start('RESULTS')
2250         table = self.parse_data()
2251         self.element_end('RESULTS')
2252         return table
2253
2254     def parse_data(self):
2255         rows = []
2256         attrs = self.element_start('DATA')
2257         table_id = int(attrs['TABLE_ID'])
2258         table_name, field_types, field_names = self.tables[table_id]
2259         table = AQtimeTable(table_name, field_names)
2260         while self.token.type == XML_ELEMENT_START:
2261             row, children = self.parse_row(field_types)
2262             table.add_row(row, children)
2263         self.element_end('DATA')
2264         return table
2265
2266     def parse_row(self, field_types):
2267         row = [None]*len(field_types)
2268         children = []
2269         self.element_start('ROW')
2270         while self.token.type == XML_ELEMENT_START:
2271             if self.token.name_or_data == 'FIELD':
2272                 field_id, field_value = self.parse_field(field_types)
2273                 row[field_id] = field_value
2274             elif self.token.name_or_data == 'CHILDREN':
2275                 children = self.parse_children()
2276             else:
2277                 raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token)
2278         self.element_end('ROW')
2279         return row, children
2280
2281     def parse_field(self, field_types):
2282         attrs = self.element_start('FIELD')
2283         id = int(attrs['ID'])
2284         type = field_types[id]
2285         value = self.character_data()
2286         if type == 'Integer':
2287             value = int(value)
2288         elif type == 'Float':
2289             value = float(value)
2290         elif type == 'Address':
2291             value = int(value)
2292         elif type == 'String':
2293             pass
2294         else:
2295             assert False
2296         self.element_end('FIELD')
2297         return id, value
2298
2299     def parse_children(self):
2300         children = []
2301         self.element_start('CHILDREN')
2302         while self.token.type == XML_ELEMENT_START:
2303             table = self.parse_data()
2304             assert table.name not in children
2305             children.append(table)
2306         self.element_end('CHILDREN')
2307         return children
2308
2309     def build_profile(self, results):
2310         assert results.name == 'Routines'
2311         profile = Profile()
2312         profile[TIME] = 0.0
2313         for fields, tables in results:
2314             function = self.build_function(fields)
2315             children = tables['Children']
2316             for fields, _ in children:
2317                 call = self.build_call(fields)
2318                 function.add_call(call)
2319             profile.add_function(function)
2320             profile[TIME] = profile[TIME] + function[TIME]
2321         profile[TOTAL_TIME] = profile[TIME]
2322         profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
2323         return profile
2324
2325     def build_function(self, fields):
2326         function = Function(self.build_id(fields), self.build_name(fields))
2327         function[TIME] = fields['Time']
2328         function[TOTAL_TIME] = fields['Time with Children']
2329         #function[TIME_RATIO] = fields['% Time']/100.0
2330         #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
2331         return function
2332
2333     def build_call(self, fields):
2334         call = Call(self.build_id(fields))
2335         call[TIME] = fields['Time']
2336         call[TOTAL_TIME] = fields['Time with Children']
2337         #call[TIME_RATIO] = fields['% Time']/100.0
2338         #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
2339         return call
2340
2341     def build_id(self, fields):
2342         return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])
2343
2344     def build_name(self, fields):
2345         # TODO: use more fields
2346         return fields['Routine Name']
2347
2348
2349 class PstatsParser:
2350     """Parser python profiling statistics saved with te pstats module."""
2351
2352     def __init__(self, *filename):
2353         import pstats
2354         try:
2355             self.stats = pstats.Stats(*filename)
2356         except ValueError:
2357             import hotshot.stats
2358             self.stats = hotshot.stats.load(filename[0])
2359         self.profile = Profile()
2360         self.function_ids = {}
2361
2362     def get_function_name(self, (filename, line, name)):
2363         funcname = ""
2364         if filename != "~":
2365             funcname = os.path.basename( filename ) + ":"
2366         if line != 0:
2367             funcname += str( line ) + ":"
2368         if filename == "~" and line == 0 and \
2369            name.startswith( '<' ) and name.endswith( '>' ):
2370             funcname += "{%s}" % name[ 1 : -1 ]
2371         else:
2372             funcname += name
2373         return funcname
2374
2375     def get_function(self, key, index = None):
2376         try:
2377             id = self.function_ids[key]
2378         except KeyError:
2379             id = len(self.function_ids)
2380             name = self.get_function_name(key)
2381             function = Function(id, name)
2382             self.profile.functions[id] = function
2383             self.function_ids[key] = id
2384         else:
2385             function = self.profile.functions[id]
2386
2387         if index is not None:
2388             function.index = index
2389         return function
2390
2391     def parse(self):
2392         self.profile[TIME] = 0.0
2393         self.profile[TOTAL_TIME] = self.stats.total_tt
2394         index = 0
2395         for fn, (primitive_calls, actual_calls,
2396                  tt, ct, callers) in self.stats.stats.iteritems():
2397             callee = self.get_function(fn, index)
2398             index += 1
2399             callee.called = actual_calls
2400             callee.primitive_called = primitive_calls
2401             callee[TOTAL_TIME] = ct
2402             callee[TIME] = tt
2403             self.profile[TIME] += tt
2404             self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
2405             for fn, value in callers.iteritems():
2406                 caller = self.get_function(fn)
2407                 call = Call(callee.id)
2408                 if isinstance(value, tuple):
2409                     for i in xrange(0, len(value), 4):
2410                         actual_calls, primitive_calls, tt, ct = value[i:i+4]
2411                         if CALLS in call:
2412                             if callee.id != caller.id:
2413                                 call[CALLS] += primitive_calls
2414                             else:
2415                                 # Recursive
2416                                 call[CALLS] += actual_calls
2417                         else:
2418                             if callee.id != caller.id:
2419                                 call[CALLS] = primitive_calls
2420                             else:
2421                                 # Recursive
2422                                 call[CALLS] = actual_calls
2423                         if TOTAL_TIME in call:
2424                             call[TOTAL_TIME] += ct
2425                         else:
2426                             call[TOTAL_TIME] = ct
2427                 else:
2428                     call[CALLS] = value
2429                     call[TOTAL_TIME] = ratio(value, actual_calls)*ct
2430
2431                 caller.add_call(call)
2432         #self.stats.print_stats()
2433         #self.stats.print_callees()
2434
2435         # Compute derived events
2436         self.profile.validate()
2437         self.profile.ratio(TIME_RATIO, TIME)
2438         self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
2439
2440         return self.profile
2441
2442
2443 class Theme:
2444
2445     def __init__(self,
2446             bgcolor = (0.0, 0.0, 1.0),
2447             mincolor = (0.0, 0.0, 0.0),
2448             maxcolor = (0.0, 0.0, 1.0),
2449             fontname = "Arial",
2450             minfontsize = 10.0,
2451             maxfontsize = 10.0,
2452             minpenwidth = 0.5,
2453             maxpenwidth = 4.0,
2454             gamma = 2.2,
2455             skew = 1.0):
2456         self.bgcolor = bgcolor
2457         self.mincolor = mincolor
2458         self.maxcolor = maxcolor
2459         self.fontname = fontname
2460         self.minfontsize = minfontsize
2461         self.maxfontsize = maxfontsize
2462         self.minpenwidth = minpenwidth
2463         self.maxpenwidth = maxpenwidth
2464         self.gamma = gamma
2465         self.skew = skew
2466
2467     def graph_bgcolor(self):
2468         return self.hsl_to_rgb(*self.bgcolor)
2469
2470     def graph_fontname(self):
2471         return self.fontname
2472
2473     def graph_fontsize(self):
2474         return self.minfontsize
2475
2476     def node_bgcolor(self, weight):
2477         return self.color(weight)
2478
2479     def node_fgcolor(self, weight):
2480         return self.graph_bgcolor()
2481
2482     def node_fontsize(self, weight):
2483         return self.fontsize(weight)
2484
2485     def edge_color(self, weight):
2486         return self.color(weight)
2487
2488     def edge_fontsize(self, weight):
2489         return self.fontsize(weight)
2490
2491     def edge_penwidth(self, weight):
2492         return max(weight*self.maxpenwidth, self.minpenwidth)
2493
2494     def edge_arrowsize(self, weight):
2495         return 0.0
2496         # return 0.5 * math.sqrt(self.edge_penwidth(weight))
2497
2498     def fontsize(self, weight):
2499         return max(weight**2 * self.maxfontsize, self.minfontsize)
2500
2501     def color(self, weight):
2502         weight = min(max(weight, 0.0), 1.0)
2503
2504         hmin, smin, lmin = self.mincolor
2505         hmax, smax, lmax = self.maxcolor
2506
2507         if self.skew < 0:
2508             raise ValueError("Skew must be greater than 0")
2509         elif self.skew == 1.0:
2510             h = hmin + weight*(hmax - hmin)
2511             s = smin + weight*(smax - smin)
2512             l = lmin + weight*(lmax - lmin)
2513         else:
2514             base = self.skew
2515             h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2516             s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
2517             l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2518
2519         return self.hsl_to_rgb(h, s, l)
2520
2521     def hsl_to_rgb(self, h, s, l):
2522         """Convert a color from HSL color-model to RGB.
2523
2524         See also:
2525         - http://www.w3.org/TR/css3-color/#hsl-color
2526         """
2527
2528         h = h % 1.0
2529         s = min(max(s, 0.0), 1.0)
2530         l = min(max(l, 0.0), 1.0)
2531
2532         if l <= 0.5:
2533             m2 = l*(s + 1.0)
2534         else:
2535             m2 = l + s - l*s
2536         m1 = l*2.0 - m2
2537         r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
2538         g = self._hue_to_rgb(m1, m2, h)
2539         b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
2540
2541         # Apply gamma correction
2542         r **= self.gamma
2543         g **= self.gamma
2544         b **= self.gamma
2545
2546         return (r, g, b)
2547
2548     def _hue_to_rgb(self, m1, m2, h):
2549         if h < 0.0:
2550             h += 1.0
2551         elif h > 1.0:
2552             h -= 1.0
2553         if h*6 < 1.0:
2554             return m1 + (m2 - m1)*h*6.0
2555         elif h*2 < 1.0:
2556             return m2
2557         elif h*3 < 2.0:
2558             return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
2559         else:
2560             return m1
2561
2562
2563 TEMPERATURE_COLORMAP = Theme(
2564     mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
2565     maxcolor = (0.0, 1.0, 0.5), # satured red
2566     gamma = 1.0
2567 )
2568
2569 PINK_COLORMAP = Theme(
2570     mincolor = (0.0, 1.0, 0.90), # pink
2571     maxcolor = (0.0, 1.0, 0.5), # satured red
2572 )
2573
2574 GRAY_COLORMAP = Theme(
2575     mincolor = (0.0, 0.0, 0.85), # light gray
2576     maxcolor = (0.0, 0.0, 0.0), # black
2577 )
2578
2579 BW_COLORMAP = Theme(
2580     minfontsize = 8.0,
2581     maxfontsize = 24.0,
2582     mincolor = (0.0, 0.0, 0.0), # black
2583     maxcolor = (0.0, 0.0, 0.0), # black
2584     minpenwidth = 0.1,
2585     maxpenwidth = 8.0,
2586 )
2587
2588
2589 class DotWriter:
2590     """Writer for the DOT language.
2591
2592     See also:
2593     - "The DOT Language" specification
2594       http://www.graphviz.org/doc/info/lang.html
2595     """
2596
2597     strip = False
2598     wrap = False
2599
2600     def __init__(self, fp):
2601         self.fp = fp
2602
2603     def wrap_function_name(self, name):
2604         """Split the function name on multiple lines."""
2605
2606         if len(name) > 32:
2607             ratio = 2.0/3.0
2608             height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
2609             width = max(len(name)/height, 32)
2610             # TODO: break lines in symbols
2611             name = textwrap.fill(name, width, break_long_words=False)
2612
2613         # Take away spaces
2614         name = name.replace(", ", ",")
2615         name = name.replace("> >", ">>")
2616         name = name.replace("> >", ">>") # catch consecutive
2617
2618         return name
2619
2620     def graph(self, profile, theme):
2621         self.begin_graph()
2622
2623         fontname = theme.graph_fontname()
2624
2625         self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
2626         self.attr('node', fontname=fontname, shape="box", style="filled", fontcolor="white", width=0, height=0)
2627         self.attr('edge', fontname=fontname)
2628
2629         for function in profile.functions.itervalues():
2630             labels = []
2631             if function.process is not None:
2632                 labels.append(function.process)
2633             if function.module is not None:
2634                 labels.append(function.module)
2635
2636             if self.strip:
2637                 function_name = function.stripped_name()
2638             else:
2639                 function_name = function.name
2640             if self.wrap:
2641                 function_name = self.wrap_function_name(function_name)
2642             labels.append(function_name)
2643
2644             for event in TOTAL_TIME_RATIO, TIME_RATIO:
2645                 if event in function.events:
2646                     label = event.format(function[event])
2647                     labels.append(label)
2648             if function.called is not None and \
2649                function.primitive_called is not None and \
2650                function.called != function.primitive_called:
2651                 labels.append("%d/%dx" % (function.called, function.primitive_called))
2652             elif function.called is not None:
2653                 labels.append("%dx" % (function.called,))
2654
2655             if function.weight is not None:
2656                 weight = function.weight
2657             else:
2658                 weight = 0.0
2659
2660             label = '\n'.join(labels)
2661             if function.index is not None:
2662                 label += "--" + str( function.index )
2663             self.node(function.id,
2664                 label = label,
2665                 color = self.color(theme.node_bgcolor(weight)),
2666                 fontcolor = self.color(theme.node_fgcolor(weight)),
2667                 fontsize = "%.2f" % theme.node_fontsize(weight),
2668             )
2669
2670             for call in function.calls.itervalues():
2671                 callee = profile.functions[call.callee_id]
2672
2673                 labels = []
2674                 for event in TOTAL_TIME_RATIO, CALLS:
2675                     if event in call.events:
2676                         label = event.format(call[event])
2677                         labels.append(label)
2678
2679                 if call.weight is not None:
2680                     weight = call.weight
2681                 elif callee.weight is not None:
2682                     weight = callee.weight
2683                 else:
2684                     weight = 0.0
2685
2686                 label = '\n'.join(labels)
2687
2688                 self.edge(function.id, call.callee_id,
2689                     label = label,
2690                     color = self.color(theme.edge_color(weight)),
2691                     fontcolor = self.color(theme.edge_color(weight)),
2692                     fontsize = "%.2f" % theme.edge_fontsize(weight),
2693                     penwidth = "%.2f" % theme.edge_penwidth(weight),
2694                     labeldistance = "%.2f" % theme.edge_penwidth(weight),
2695                     arrowsize = "%.2f" % theme.edge_arrowsize(weight),
2696                 )
2697
2698         self.end_graph()
2699
2700     def begin_graph(self):
2701         self.write('digraph {\n')
2702
2703     def end_graph(self):
2704         self.write('}\n')
2705
2706     def attr(self, what, **attrs):
2707         self.write("\t")
2708         self.write(what)
2709         self.attr_list(attrs)
2710         self.write(";\n")
2711
2712     def node(self, node, **attrs):
2713         self.write("\t")
2714         self.id(node)
2715         self.attr_list(attrs)
2716         self.write(";\n")
2717
2718     def edge(self, src, dst, **attrs):
2719         self.write("\t")
2720         self.id(src)
2721         self.write(" -> ")
2722         self.id(dst)
2723         self.attr_list(attrs)
2724         self.write(";\n")
2725
2726     def attr_list(self, attrs):
2727         if not attrs:
2728             return
2729         self.write(' [')
2730         first = True
2731         for name, value in attrs.iteritems():
2732             if first:
2733                 first = False
2734             else:
2735                 self.write(", ")
2736             self.id(name)
2737             self.write('=')
2738             self.id(value)
2739         self.write(']')
2740
2741     def id(self, id):
2742         if isinstance(id, (int, float)):
2743             s = str(id)
2744         elif isinstance(id, basestring):
2745             if id.isalnum() and not id.startswith('0x'):
2746                 s = id
2747             else:
2748                 s = self.escape(id)
2749         else:
2750             raise TypeError
2751         self.write(s)
2752
2753     def color(self, (r, g, b)):
2754
2755         def float2int(f):
2756             if f <= 0.0:
2757                 return 0
2758             if f >= 1.0:
2759                 return 255
2760             return int(255.0*f + 0.5)
2761
2762         return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
2763
2764     def escape(self, s):
2765         s = s.encode('utf-8')
2766         s = s.replace('\\', r'\\')
2767         s = s.replace('\n', r'\n')
2768         s = s.replace('\t', r'\t')
2769         s = s.replace('"', r'\"')
2770         return '"' + s + '"'
2771
2772     def write(self, s):
2773         self.fp.write(s)
2774
2775
2776 class Main:
2777     """Main program."""
2778
2779     themes = {
2780             "color": TEMPERATURE_COLORMAP,
2781             "pink": PINK_COLORMAP,
2782             "gray": GRAY_COLORMAP,
2783             "bw": BW_COLORMAP,
2784     }
2785
2786     def main(self):
2787         """Main program."""
2788
2789         parser = optparse.OptionParser(
2790             usage="\n\t%prog [options] [file] ...",
2791             version="%%prog %s" % __version__)
2792         parser.add_option(
2793             '-o', '--output', metavar='FILE',
2794             type="string", dest="output",
2795             help="output filename [stdout]")
2796         parser.add_option(
2797             '-n', '--node-thres', metavar='PERCENTAGE',
2798             type="float", dest="node_thres", default=0.5,
2799             help="eliminate nodes below this threshold [default: %default]")
2800         parser.add_option(
2801             '-e', '--edge-thres', metavar='PERCENTAGE',
2802             type="float", dest="edge_thres", default=0.1,
2803             help="eliminate edges below this threshold [default: %default]")
2804         parser.add_option(
2805             '-f', '--format',
2806             type="choice", choices=('prof', 'callgrind', 'perf', 'oprofile', 'hprof', 'sysprof', 'pstats', 'shark', 'sleepy', 'aqtime', 'xperf'),
2807             dest="format", default="prof",
2808             help="profile format: prof, callgrind, oprofile, hprof, sysprof, shark, sleepy, aqtime, pstats, or xperf [default: %default]")
2809         parser.add_option(
2810             '-c', '--colormap',
2811             type="choice", choices=('color', 'pink', 'gray', 'bw'),
2812             dest="theme", default="color",
2813             help="color map: color, pink, gray, or bw [default: %default]")
2814         parser.add_option(
2815             '-s', '--strip',
2816             action="store_true",
2817             dest="strip", default=False,
2818             help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
2819         parser.add_option(
2820             '-w', '--wrap',
2821             action="store_true",
2822             dest="wrap", default=False,
2823             help="wrap function names")
2824         # add a new option to control skew of the colorization curve
2825         parser.add_option(
2826             '--skew',
2827             type="float", dest="theme_skew", default=1.0,
2828             help="skew the colorization curve.  Values < 1.0 give more variety to lower percentages.  Value > 1.0 give less variety to lower percentages")
2829         (self.options, self.args) = parser.parse_args(sys.argv[1:])
2830
2831         if len(self.args) > 1 and self.options.format != 'pstats':
2832             parser.error('incorrect number of arguments')
2833
2834         try:
2835             self.theme = self.themes[self.options.theme]
2836         except KeyError:
2837             parser.error('invalid colormap \'%s\'' % self.options.theme)
2838
2839         # set skew on the theme now that it has been picked.
2840         if self.options.theme_skew:
2841             self.theme.skew = self.options.theme_skew
2842
2843         if self.options.format == 'prof':
2844             if not self.args:
2845                 fp = sys.stdin
2846             else:
2847                 fp = open(self.args[0], 'rt')
2848             parser = GprofParser(fp)
2849         elif self.options.format == 'callgrind':
2850             if not self.args:
2851                 fp = sys.stdin
2852             else:
2853                 fp = open(self.args[0], 'rt')
2854             parser = CallgrindParser(fp)
2855         elif self.options.format == 'perf':
2856             if not self.args:
2857                 fp = sys.stdin
2858             else:
2859                 fp = open(self.args[0], 'rt')
2860             parser = PerfParser(fp)
2861         elif self.options.format == 'oprofile':
2862             if not self.args:
2863                 fp = sys.stdin
2864             else:
2865                 fp = open(self.args[0], 'rt')
2866             parser = OprofileParser(fp)
2867         elif self.options.format == 'sysprof':
2868             if not self.args:
2869                 fp = sys.stdin
2870             else:
2871                 fp = open(self.args[0], 'rt')
2872             parser = SysprofParser(fp)
2873         elif self.options.format == 'hprof':
2874             if not self.args:
2875                 fp = sys.stdin
2876             else:
2877                 fp = open(self.args[0], 'rt')
2878             parser = HProfParser(fp)
2879         elif self.options.format == 'pstats':
2880             if not self.args:
2881                 parser.error('at least a file must be specified for pstats input')
2882             parser = PstatsParser(*self.args)
2883         elif self.options.format == 'xperf':
2884             if not self.args:
2885                 fp = sys.stdin
2886             else:
2887                 fp = open(self.args[0], 'rt')
2888             parser = XPerfParser(fp)
2889         elif self.options.format == 'shark':
2890             if not self.args:
2891                 fp = sys.stdin
2892             else:
2893                 fp = open(self.args[0], 'rt')
2894             parser = SharkParser(fp)
2895         elif self.options.format == 'sleepy':
2896             if len(self.args) != 1:
2897                 parser.error('exactly one file must be specified for sleepy input')
2898             parser = SleepyParser(self.args[0])
2899         elif self.options.format == 'aqtime':
2900             if not self.args:
2901                 fp = sys.stdin
2902             else:
2903                 fp = open(self.args[0], 'rt')
2904             parser = AQtimeParser(fp)
2905         else:
2906             parser.error('invalid format \'%s\'' % self.options.format)
2907
2908         self.profile = parser.parse()
2909
2910         if self.options.output is None:
2911             self.output = sys.stdout
2912         else:
2913             self.output = open(self.options.output, 'wt')
2914
2915         self.write_graph()
2916
2917     def write_graph(self):
2918         dot = DotWriter(self.output)
2919         dot.strip = self.options.strip
2920         dot.wrap = self.options.wrap
2921
2922         profile = self.profile
2923         profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0)
2924
2925         dot.graph(profile, self.theme)
2926
2927
2928 if __name__ == '__main__':
2929     Main().main()