Lib/pstats.py

   1 """Class for printing reports on profiled python code."""
   2
   3 # Class for printing reports on profiled python code. rev 1.0  4/1/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # see profile.doc and profile.py for more info.
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35 import os
  36 import time
  37 import marshal
  38 import re
  39
  40 __all__ = ["Stats"]
  41
  42 class Stats:
  43     """This class is used for creating reports from data generated by the
  44     Profile class.  It is a "friend" of that class, and imports data either
  45     by direct access to members of Profile class, or by reading in a dictionary
  46     that was emitted (via marshal) from the Profile class.
  47
  48     The big change from the previous Profiler (in terms of raw functionality)
  49     is that an "add()" method has been provided to combine Stats from
  50     several distinct profile runs.  Both the constructor and the add()
  51     method now take arbitrarily many file names as arguments.
  52
  53     All the print methods now take an argument that indicates how many lines
  54     to print.  If the arg is a floating point number between 0 and 1.0, then
  55     it is taken as a decimal percentage of the available lines to be printed
  56     (e.g., .1 means print 10% of all available lines).  If it is an integer,
  57     it is taken to mean the number of lines of data that you wish to have
  58     printed.
  59
  60     The sort_stats() method now processes some additional options (i.e., in
  61     addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of quoted
  62     strings to select the sort order.  For example sort_stats('time', 'name')
  63     sorts on the major key of "internal function time", and on the minor
  64     key of 'the name of the function'.  Look at the two tables in sort_stats()
  65     and get_sort_arg_defs(self) for more examples.
  66
  67     All methods now return "self",  so you can string together commands like:
  68         Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
  69                             print_stats(5).print_callers(5)
  70     """
  71
  72     def __init__(self, *args):
  73         if not len(args):
  74             arg = None
  75         else:
  76             arg = args[0]
  77             args = args[1:]
  78         self.init(arg)
  79         apply(self.add, args)
  80
  81     def init(self, arg):
  82         self.all_callees = None  # calc only if needed
  83         self.files = []
  84         self.fcn_list = None
  85         self.total_tt = 0
  86         self.total_calls = 0
  87         self.prim_calls = 0
  88         self.max_name_len = 0
  89         self.top_level = {}
  90         self.stats = {}
  91         self.sort_arg_dict = {}
  92         self.load_stats(arg)
  93         trouble = 1
  94         try:
  95             self.get_top_level_stats()
  96             trouble = 0
  97         finally:
  98             if trouble:
  99                 print "Invalid timing data",
 100                 if self.files: print self.files[-1],
 101                 print
 102
 103     def load_stats(self, arg):
 104         if not arg:  self.stats = {}
 105         elif type(arg) == type(""):
 106             f = open(arg, 'rb')
 107             self.stats = marshal.load(f)
 108             f.close()
 109             try:
 110                 file_stats = os.stat(arg)
 111                 arg = time.ctime(file_stats[8]) + "    " + arg
 112             except:  # in case this is not unix
 113                 pass
 114             self.files = [ arg ]
 115         elif hasattr(arg, 'create_stats'):
 116             arg.create_stats()
 117             self.stats = arg.stats
 118             arg.stats = {}
 119         if not self.stats:
 120             raise TypeError,  "Cannot create or construct a " \
 121                       + `self.__class__` \
 122                       + " object from '" + `arg` + "'"
 123         return
 124
 125     def get_top_level_stats(self):
 126         for func, (cc, nc, tt, ct, callers) in self.stats.items():
 127             self.total_calls += nc
 128             self.prim_calls  += cc
 129             self.total_tt    += tt
 130             if callers.has_key(("jprofile", 0, "profiler")):
 131                 self.top_level[func] = None
 132             if len(func_std_string(func)) > self.max_name_len:
 133                 self.max_name_len = len(func_std_string(func))
 134
 135     def add(self, *arg_list):
 136         if not arg_list: return self
 137         if len(arg_list) > 1: apply(self.add, arg_list[1:])
 138         other = arg_list[0]
 139         if type(self) != type(other) or self.__class__ != other.__class__:
 140             other = Stats(other)
 141         self.files += other.files
 142         self.total_calls += other.total_calls
 143         self.prim_calls += other.prim_calls
 144         self.total_tt += other.total_tt
 145         for func in other.top_level.keys():
 146             self.top_level[func] = None
 147
 148         if self.max_name_len < other.max_name_len:
 149             self.max_name_len = other.max_name_len
 150
 151         self.fcn_list = None
 152
 153         for func in other.stats.keys():
 154             if self.stats.has_key(func):
 155                 old_func_stat = self.stats[func]
 156             else:
 157                 old_func_stat = (0, 0, 0, 0, {},)
 158             self.stats[func] = add_func_stats(old_func_stat, other.stats[func])
 159         return self
 160
 161     # list the tuple indices and directions for sorting,
 162     # along with some printable description
 163     sort_arg_dict_default = {
 164               "calls"     : (((1,-1),              ), "call count"),
 165               "cumulative": (((3,-1),              ), "cumulative time"),
 166               "file"      : (((4, 1),              ), "file name"),
 167               "line"      : (((5, 1),              ), "line number"),
 168               "module"    : (((4, 1),              ), "file name"),
 169               "name"      : (((6, 1),              ), "function name"),
 170               "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
 171               "pcalls"    : (((0,-1),              ), "call count"),
 172               "stdname"   : (((7, 1),              ), "standard name"),
 173               "time"      : (((2,-1),              ), "internal time"),
 174               }
 175
 176     def get_sort_arg_defs(self):
 177         """Expand all abbreviations that are unique."""
 178         if not self.sort_arg_dict:
 179             self.sort_arg_dict = dict = {}
 180             bad_list = {}
 181             for word in self.sort_arg_dict_default.keys():
 182                 fragment = word
 183                 while fragment:
 184                     if not fragment:
 185                         break
 186                     if dict.has_key(fragment):
 187                         bad_list[fragment] = 0
 188                         break
 189                     dict[fragment] = self.sort_arg_dict_default[word]
 190                     fragment = fragment[:-1]
 191             for word in bad_list.keys():
 192                 del dict[word]
 193         return self.sort_arg_dict
 194
 195     def sort_stats(self, *field):
 196         if not field:
 197             self.fcn_list = 0
 198             return self
 199         if len(field) == 1 and type(field[0]) == type(1):
 200             # Be compatible with old profiler
 201             field = [ {-1: "stdname",
 202                       0:"calls",
 203                       1:"time",
 204                       2: "cumulative" }  [ field[0] ] ]
 205
 206         sort_arg_defs = self.get_sort_arg_defs()
 207         sort_tuple = ()
 208         self.sort_type = ""
 209         connector = ""
 210         for word in field:
 211             sort_tuple = sort_tuple + sort_arg_defs[word][0]
 212             self.sort_type += connector + sort_arg_defs[word][1]
 213             connector = ", "
 214
 215         stats_list = []
 216         for func in self.stats.keys():
 217             cc, nc, tt, ct, callers = self.stats[func]
 218             stats_list.append((cc, nc, tt, ct) + func +
 219                               (func_std_string(func), func))
 220
 221         stats_list.sort(TupleComp(sort_tuple).compare)
 222
 223         self.fcn_list = fcn_list = []
 224         for tuple in stats_list:
 225             fcn_list.append(tuple[-1])
 226         return self
 227
 228     def reverse_order(self):
 229         if self.fcn_list:
 230             self.fcn_list.reverse()
 231         return self
 232
 233     def strip_dirs(self):
 234         oldstats = self.stats
 235         self.stats = newstats = {}
 236         max_name_len = 0
 237         for func in oldstats.keys():
 238             cc, nc, tt, ct, callers = oldstats[func]
 239             newfunc = func_strip_path(func)
 240             if len(func_std_string(newfunc)) > max_name_len:
 241                 max_name_len = len(func_std_string(newfunc))
 242             newcallers = {}
 243             for func2 in callers.keys():
 244                 newcallers[func_strip_path(func2)] = callers[func2]
 245
 246             if newstats.has_key(newfunc):
 247                 newstats[newfunc] = add_func_stats(
 248                                         newstats[newfunc],
 249                                         (cc, nc, tt, ct, newcallers))
 250             else:
 251                 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
 252         old_top = self.top_level
 253         self.top_level = new_top = {}
 254         for func in old_top.keys():
 255             new_top[func_strip_path(func)] = None
 256
 257         self.max_name_len = max_name_len
 258
 259         self.fcn_list = None
 260         self.all_callees = None
 261         return self
 262
 263     def calc_callees(self):
 264         if self.all_callees: return
 265         self.all_callees = all_callees = {}
 266         for func in self.stats.keys():
 267             if not all_callees.has_key(func):
 268                 all_callees[func] = {}
 269             cc, nc, tt, ct, callers = self.stats[func]
 270             for func2 in callers.keys():
 271                 if not all_callees.has_key(func2):
 272                     all_callees[func2] = {}
 273                 all_callees[func2][func]  = callers[func2]
 274         return
 275
 276     #******************************************************************
 277     # The following functions support actual printing of reports
 278     #******************************************************************
 279
 280     # Optional "amount" is either a line count, or a percentage of lines.
 281
 282     def eval_print_amount(self, sel, list, msg):
 283         new_list = list
 284         if type(sel) == type(""):
 285             new_list = []
 286             for func in list:
 287                 if re.search(sel, func_std_string(func)):
 288                     new_list.append(func)
 289         else:
 290             count = len(list)
 291             if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
 292                 count = int(count * sel + .5)
 293                 new_list = list[:count]
 294             elif type(sel) == type(1) and 0 <= sel < count:
 295                 count = sel
 296                 new_list = list[:count]
 297         if len(list) != len(new_list):
 298             msg = msg + "   List reduced from " + `len(list)` \
 299                       + " to " + `len(new_list)` + \
 300                       " due to restriction <" + `sel` + ">\n"
 301
 302         return new_list, msg
 303
 304     def get_print_list(self, sel_list):
 305         width = self.max_name_len
 306         if self.fcn_list:
 307             list = self.fcn_list[:]
 308             msg = "   Ordered by: " + self.sort_type + '\n'
 309         else:
 310             list = self.stats.keys()
 311             msg = "   Random listing order was used\n"
 312
 313         for selection in sel_list:
 314             list, msg = self.eval_print_amount(selection, list, msg)
 315
 316         count = len(list)
 317
 318         if not list:
 319             return 0, list
 320         print msg
 321         if count < len(self.stats):
 322             width = 0
 323             for func in list:
 324                 if  len(func_std_string(func)) > width:
 325                     width = len(func_std_string(func))
 326         return width+2, list
 327
 328     def print_stats(self, *amount):
 329         for filename in self.files:
 330             print filename
 331         if self.files: print
 332         indent = ' ' * 8
 333         for func in self.top_level.keys():
 334             print indent, func_get_function_name(func)
 335
 336         print indent, self.total_calls, "function calls",
 337         if self.total_calls != self.prim_calls:
 338             print "(%d primitive calls)" % self.prim_calls,
 339         print "in %.3f CPU seconds" % self.total_tt
 340         print
 341         width, list = self.get_print_list(amount)
 342         if list:
 343             self.print_title()
 344             for func in list:
 345                 self.print_line(func)
 346             print
 347             print
 348         return self
 349
 350     def print_callees(self, *amount):
 351         width, list = self.get_print_list(amount)
 352         if list:
 353             self.calc_callees()
 354
 355             self.print_call_heading(width, "called...")
 356             for func in list:
 357                 if self.all_callees.has_key(func):
 358                     self.print_call_line(width, func, self.all_callees[func])
 359                 else:
 360                     self.print_call_line(width, func, {})
 361             print
 362             print
 363         return self
 364
 365     def print_callers(self, *amount):
 366         width, list = self.get_print_list(amount)
 367         if list:
 368             self.print_call_heading(width, "was called by...")
 369             for func in list:
 370                 cc, nc, tt, ct, callers = self.stats[func]
 371                 self.print_call_line(width, func, callers)
 372             print
 373             print
 374         return self
 375
 376     def print_call_heading(self, name_size, column_title):
 377         print "Function ".ljust(name_size) + column_title
 378
 379     def print_call_line(self, name_size, source, call_dict):
 380         print func_std_string(source).ljust(name_size),
 381         if not call_dict:
 382             print "--"
 383             return
 384         clist = call_dict.keys()
 385         clist.sort()
 386         name_size = name_size + 1
 387         indent = ""
 388         for func in clist:
 389             name = func_std_string(func)
 390             print indent*name_size + name + '(' \
 391                       + `call_dict[func]`+')', \
 392                       f8(self.stats[func][3])
 393             indent = " "
 394
 395     def print_title(self):
 396         print '   ncalls  tottime  percall  cumtime  percall', \
 397               'filename:lineno(function)'
 398
 399     def print_line(self, func):  # hack : should print percentages
 400         cc, nc, tt, ct, callers = self.stats[func]
 401         c = str(nc)
 402         if nc != cc:
 403             c = c + '/' + str(cc)
 404         print c.rjust(9),
 405         print f8(tt),
 406         if nc == 0:
 407             print ' '*8,
 408         else:
 409             print f8(tt/nc),
 410         print f8(ct),
 411         if cc == 0:
 412             print ' '*8,
 413         else:
 414             print f8(ct/cc),
 415         print func_std_string(func)
 416
 417     def ignore(self):
 418         # Deprecated since 1.5.1 -- see the docs.
 419         pass # has no return value, so use at end of line :-)
 420
 421 class TupleComp:
 422     """This class provides a generic function for comparing any two tuples.
 423     Each instance records a list of tuple-indices (from most significant
 424     to least significant), and sort direction (ascending or decending) for
 425     each tuple-index.  The compare functions can then be used as the function
 426     argument to the system sort() function when a list of tuples need to be
 427     sorted in the instances order."""
 428
 429     def __init__(self, comp_select_list):
 430         self.comp_select_list = comp_select_list
 431
 432     def compare (self, left, right):
 433         for index, direction in self.comp_select_list:
 434             l = left[index]
 435             r = right[index]
 436             if l < r:
 437                 return -direction
 438             if l > r:
 439                 return direction
 440         return 0
 441
 442 #**************************************************************************
 443 # func_name is a triple (file:string, line:int, name:string)
 444
 445 def func_strip_path(func_name):
 446     file, line, name = func_name
 447     return os.path.basename(file), line, name
 448
 449 def func_get_function_name(func):
 450     return func[2]
 451
 452 def func_std_string(func_name): # match what old profile produced
 453     return "%s:%d(%s)" % func_name
 454
 455 #**************************************************************************
 456 # The following functions combine statists for pairs functions.
 457 # The bulk of the processing involves correctly handling "call" lists,
 458 # such as callers and callees.
 459 #**************************************************************************
 460
 461 def add_func_stats(target, source):
 462     """Add together all the stats for two profile entries."""
 463     cc, nc, tt, ct, callers = source
 464     t_cc, t_nc, t_tt, t_ct, t_callers = target
 465     return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
 466               add_callers(t_callers, callers))
 467
 468 def add_callers(target, source):
 469     """Combine two caller lists in a single list."""
 470     new_callers = {}
 471     for func in target.keys():
 472         new_callers[func] = target[func]
 473     for func in source.keys():
 474         if new_callers.has_key(func):
 475             new_callers[func] = source[func] + new_callers[func]
 476         else:
 477             new_callers[func] = source[func]
 478     return new_callers
 479
 480 def count_calls(callers):
 481     """Sum the caller statistics to get total number of calls received."""
 482     nc = 0
 483     for func in callers.keys():
 484         nc += callers[func]
 485     return nc
 486
 487 #**************************************************************************
 488 # The following functions support printing of reports
 489 #**************************************************************************
 490
 491 def f8(x):
 492     return "%8.3f" % x
 493
 494 #**************************************************************************
 495 # Statistics browser added by ESR, April 2001
 496 #**************************************************************************
 497
 498 if __name__ == '__main__':
 499     import cmd
 500     try:
 501         import readline
 502     except ImportError:
 503         pass
 504
 505     class ProfileBrowser(cmd.Cmd):
 506         def __init__(self, profile=None):
 507             cmd.Cmd.__init__(self)
 508             self.prompt = "% "
 509             if profile:
 510                 self.stats = Stats(profile)
 511             else:
 512                 self.stats = None
 513
 514         def generic(self, fn, line):
 515             args = line.split()
 516             processed = []
 517             for term in args:
 518                 try:
 519                     processed.append(int(term))
 520                     continue
 521                 except ValueError:
 522                     pass
 523                 try:
 524                     frac = float(term)
 525                     if frac > 1 or frac < 0:
 526                         print "Fraction argument mus be in [0, 1]"
 527                         continue
 528                     processed.append(frac)
 529                     continue
 530                 except ValueError:
 531                     pass
 532                 processed.append(term)
 533             if self.stats:
 534                 apply(getattr(self.stats, fn), processed)
 535             else:
 536                 print "No statistics object is loaded."
 537             return 0
 538         def generic_help(self):
 539             print "Arguments may be:"
 540             print "* An integer maximum number of entries to print."
 541             print "* A decimal fractional number between 0 and 1, controlling"
 542             print "  what fraction of selected entries to print."
 543             print "* A regular expression; only entries with function names"
 544             print "  that match it are printed."
 545
 546         def do_add(self, line):
 547             self.stats.add(line)
 548             return 0
 549         def help_add(self):
 550             print "Add profile info from given file to current statistics object."
 551
 552         def do_callees(self, line):
 553             return self.generic('print_callees', line)
 554         def help_callees(self):
 555             print "Print callees statistics from the current stat object."
 556             self.generic_help()
 557
 558         def do_callers(self, line):
 559             return self.generic('print_callers', line)
 560         def help_callers(self):
 561             print "Print callers statistics from the current stat object."
 562             self.generic_help()
 563
 564         def do_EOF(self, line):
 565             print ""
 566             return 1
 567         def help_EOF(self):
 568             print "Leave the profile brower."
 569
 570         def do_quit(self, line):
 571             return 1
 572         def help_quit(self):
 573             print "Leave the profile brower."
 574
 575         def do_read(self, line):
 576             if line:
 577                 try:
 578                     self.stats = Stats(line)
 579                 except IOError, args:
 580                     print args[1]
 581                     return
 582                 self.prompt = line + "% "
 583             elif len(self.prompt) > 2:
 584                 line = self.prompt[-2:]
 585             else:
 586                 print "No statistics object is current -- cannot reload."
 587             return 0
 588         def help_read(self):
 589             print "Read in profile data from a specified file."
 590
 591         def do_reverse(self, line):
 592             self.stats.reverse_order()
 593             return 0
 594         def help_reverse(self):
 595             print "Reverse the sort order of the profiling report."
 596
 597         def do_sort(self, line):
 598             abbrevs = self.stats.get_sort_arg_defs().keys()
 599             if line and not filter(lambda x,a=abbrevs: x not in a,line.split()):
 600                 apply(self.stats.sort_stats, line.split())
 601             else:
 602                 print "Valid sort keys (unique prefixes are accepted):"
 603                 for (key, value) in Stats.sort_arg_dict_default.items():
 604                     print "%s -- %s" % (key, value[1])
 605             return 0
 606         def help_sort(self):
 607             print "Sort profile data according to specified keys."
 608             print "(Typing `sort' without arguments lists valid keys.)"
 609         def complete_sort(self, text, *args):
 610             return [a for a in Stats.sort_arg_dict_default.keys() if a.startswith(text)]
 611
 612         def do_stats(self, line):
 613             return self.generic('print_stats', line)
 614         def help_stats(self):
 615             print "Print statistics from the current stat object."
 616             self.generic_help()
 617
 618         def do_strip(self, line):
 619             self.stats.strip_dirs()
 620             return 0
 621         def help_strip(self):
 622             print "Strip leading path information from filenames in the report."
 623
 624         def postcmd(self, stop, line):
 625             if stop:
 626                 return stop
 627             return None
 628
 629     import sys
 630     print "Welcome to the profile statistics browser."
 631     if len(sys.argv) > 1:
 632         initprofile = sys.argv[1]
 633     else:
 634         initprofile = None
 635     try:
 636         ProfileBrowser(initprofile).cmdloop()
 637         print "Goodbye."
 638     except KeyboardInterrupt:
 639         pass
 640
 641 # That's all, folks.