Lib/pstats.py

   1 """Class for printing reports on profiled python code."""
   2
   3 # Class for printing reports on profiled python code. rev 1.0  4/1/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # see profile.doc and profile.py for more info.
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35 import os
  36 import time
  37 import marshal
  38 import re
  39
  40 __all__ = ["Stats"]
  41
  42 class Stats:
  43     """This class is used for creating reports from data generated by the
  44     Profile class.  It is a "friend" of that class, and imports data either
  45     by direct access to members of Profile class, or by reading in a dictionary
  46     that was emitted (via marshal) from the Profile class.
  47
  48     The big change from the previous Profiler (in terms of raw functionality)
  49     is that an "add()" method has been provided to combine Stats from
  50     several distinct profile runs.  Both the constructor and the add()
  51     method now take arbitrarily many file names as arguments.
  52
  53     All the print methods now take an argument that indicates how many lines
  54     to print.  If the arg is a floating point number between 0 and 1.0, then
  55     it is taken as a decimal percentage of the available lines to be printed
  56     (e.g., .1 means print 10% of all available lines).  If it is an integer,
  57     it is taken to mean the number of lines of data that you wish to have
  58     printed.
  59
  60     The sort_stats() method now processes some additional options (i.e., in
  61     addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of quoted
  62     strings to select the sort order.  For example sort_stats('time', 'name')
  63     sorts on the major key of "internal function time", and on the minor
  64     key of 'the name of the function'.  Look at the two tables in sort_stats()
  65     and get_sort_arg_defs(self) for more examples.
  66
  67     All methods now return "self",  so you can string together commands like:
  68         Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
  69                             print_stats(5).print_callers(5)
  70     """
  71
  72     def __init__(self, *args):
  73         if not len(args):
  74             arg = None
  75         else:
  76             arg = args[0]
  77             args = args[1:]
  78         self.init(arg)
  79         self.add(*args)
  80
  81     def init(self, arg):
  82         self.all_callees = None  # calc only if needed
  83         self.files = []
  84         self.fcn_list = None
  85         self.total_tt = 0
  86         self.total_calls = 0
  87         self.prim_calls = 0
  88         self.max_name_len = 0
  89         self.top_level = {}
  90         self.stats = {}
  91         self.sort_arg_dict = {}
  92         self.load_stats(arg)
  93         trouble = 1
  94         try:
  95             self.get_top_level_stats()
  96             trouble = 0
  97         finally:
  98             if trouble:
  99                 print "Invalid timing data",
 100                 if self.files: print self.files[-1],
 101                 print
 102
 103     def load_stats(self, arg):
 104         if not arg:  self.stats = {}
 105         elif type(arg) == type(""):
 106             f = open(arg, 'rb')
 107             self.stats = marshal.load(f)
 108             f.close()
 109             try:
 110                 file_stats = os.stat(arg)
 111                 arg = time.ctime(file_stats.st_mtime) + "    " + arg
 112             except:  # in case this is not unix
 113                 pass
 114             self.files = [ arg ]
 115         elif hasattr(arg, 'create_stats'):
 116             arg.create_stats()
 117             self.stats = arg.stats
 118             arg.stats = {}
 119         if not self.stats:
 120             raise TypeError,  "Cannot create or construct a " \
 121                       + `self.__class__` \
 122                       + " object from '" + `arg` + "'"
 123         return
 124
 125     def get_top_level_stats(self):
 126         for func, (cc, nc, tt, ct, callers) in self.stats.items():
 127             self.total_calls += nc
 128             self.prim_calls  += cc
 129             self.total_tt    += tt
 130             if callers.has_key(("jprofile", 0, "profiler")):
 131                 self.top_level[func] = None
 132             if len(func_std_string(func)) > self.max_name_len:
 133                 self.max_name_len = len(func_std_string(func))
 134
 135     def add(self, *arg_list):
 136         if not arg_list: return self
 137         if len(arg_list) > 1: self.add(*arg_list[1:])
 138         other = arg_list[0]
 139         if type(self) != type(other) or self.__class__ != other.__class__:
 140             other = Stats(other)
 141         self.files += other.files
 142         self.total_calls += other.total_calls
 143         self.prim_calls += other.prim_calls
 144         self.total_tt += other.total_tt
 145         for func in other.top_level:
 146             self.top_level[func] = None
 147
 148         if self.max_name_len < other.max_name_len:
 149             self.max_name_len = other.max_name_len
 150
 151         self.fcn_list = None
 152
 153         for func, stat in other.stats.iteritems():
 154             if func in self.stats:
 155                 old_func_stat = self.stats[func]
 156             else:
 157                 old_func_stat = (0, 0, 0, 0, {},)
 158             self.stats[func] = add_func_stats(old_func_stat, stat)
 159         return self
 160
 161     # list the tuple indices and directions for sorting,
 162     # along with some printable description
 163     sort_arg_dict_default = {
 164               "calls"     : (((1,-1),              ), "call count"),
 165               "cumulative": (((3,-1),              ), "cumulative time"),
 166               "file"      : (((4, 1),              ), "file name"),
 167               "line"      : (((5, 1),              ), "line number"),
 168               "module"    : (((4, 1),              ), "file name"),
 169               "name"      : (((6, 1),              ), "function name"),
 170               "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
 171               "pcalls"    : (((0,-1),              ), "call count"),
 172               "stdname"   : (((7, 1),              ), "standard name"),
 173               "time"      : (((2,-1),              ), "internal time"),
 174               }
 175
 176     def get_sort_arg_defs(self):
 177         """Expand all abbreviations that are unique."""
 178         if not self.sort_arg_dict:
 179             self.sort_arg_dict = dict = {}
 180             bad_list = {}
 181             for word, tup in self.sort_arg_dict_default.iteritems():
 182                 fragment = word
 183                 while fragment:
 184                     if not fragment:
 185                         break
 186                     if fragment in dict:
 187                         bad_list[fragment] = 0
 188                         break
 189                     dict[fragment] = tup
 190                     fragment = fragment[:-1]
 191             for word in bad_list:
 192                 del dict[word]
 193         return self.sort_arg_dict
 194
 195     def sort_stats(self, *field):
 196         if not field:
 197             self.fcn_list = 0
 198             return self
 199         if len(field) == 1 and type(field[0]) == type(1):
 200             # Be compatible with old profiler
 201             field = [ {-1: "stdname",
 202                       0:"calls",
 203                       1:"time",
 204                       2: "cumulative" }  [ field[0] ] ]
 205
 206         sort_arg_defs = self.get_sort_arg_defs()
 207         sort_tuple = ()
 208         self.sort_type = ""
 209         connector = ""
 210         for word in field:
 211             sort_tuple = sort_tuple + sort_arg_defs[word][0]
 212             self.sort_type += connector + sort_arg_defs[word][1]
 213             connector = ", "
 214
 215         stats_list = []
 216         for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
 217             stats_list.append((cc, nc, tt, ct) + func +
 218                               (func_std_string(func), func))
 219
 220         stats_list.sort(TupleComp(sort_tuple).compare)
 221
 222         self.fcn_list = fcn_list = []
 223         for tuple in stats_list:
 224             fcn_list.append(tuple[-1])
 225         return self
 226
 227     def reverse_order(self):
 228         if self.fcn_list:
 229             self.fcn_list.reverse()
 230         return self
 231
 232     def strip_dirs(self):
 233         oldstats = self.stats
 234         self.stats = newstats = {}
 235         max_name_len = 0
 236         for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
 237             newfunc = func_strip_path(func)
 238             if len(func_std_string(newfunc)) > max_name_len:
 239                 max_name_len = len(func_std_string(newfunc))
 240             newcallers = {}
 241             for func2, caller in callers.iteritems():
 242                 newcallers[func_strip_path(func2)] = caller
 243
 244             if newfunc in newstats:
 245                 newstats[newfunc] = add_func_stats(
 246                                         newstats[newfunc],
 247                                         (cc, nc, tt, ct, newcallers))
 248             else:
 249                 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
 250         old_top = self.top_level
 251         self.top_level = new_top = {}
 252         for func in old_top:
 253             new_top[func_strip_path(func)] = None
 254
 255         self.max_name_len = max_name_len
 256
 257         self.fcn_list = None
 258         self.all_callees = None
 259         return self
 260
 261     def calc_callees(self):
 262         if self.all_callees: return
 263         self.all_callees = all_callees = {}
 264         for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
 265             if not func in all_callees:
 266                 all_callees[func] = {}
 267             for func2, caller in callers.iteritems():
 268                 if not func2 in all_callees:
 269                     all_callees[func2] = {}
 270                 all_callees[func2][func]  = caller
 271         return
 272
 273     #******************************************************************
 274     # The following functions support actual printing of reports
 275     #******************************************************************
 276
 277     # Optional "amount" is either a line count, or a percentage of lines.
 278
 279     def eval_print_amount(self, sel, list, msg):
 280         new_list = list
 281         if type(sel) == type(""):
 282             new_list = []
 283             for func in list:
 284                 if re.search(sel, func_std_string(func)):
 285                     new_list.append(func)
 286         else:
 287             count = len(list)
 288             if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
 289                 count = int(count * sel + .5)
 290                 new_list = list[:count]
 291             elif type(sel) == type(1) and 0 <= sel < count:
 292                 count = sel
 293                 new_list = list[:count]
 294         if len(list) != len(new_list):
 295             msg = msg + "   List reduced from " + `len(list)` \
 296                       + " to " + `len(new_list)` + \
 297                       " due to restriction <" + `sel` + ">\n"
 298
 299         return new_list, msg
 300
 301     def get_print_list(self, sel_list):
 302         width = self.max_name_len
 303         if self.fcn_list:
 304             list = self.fcn_list[:]
 305             msg = "   Ordered by: " + self.sort_type + '\n'
 306         else:
 307             list = self.stats.keys()
 308             msg = "   Random listing order was used\n"
 309
 310         for selection in sel_list:
 311             list, msg = self.eval_print_amount(selection, list, msg)
 312
 313         count = len(list)
 314
 315         if not list:
 316             return 0, list
 317         print msg
 318         if count < len(self.stats):
 319             width = 0
 320             for func in list:
 321                 if  len(func_std_string(func)) > width:
 322                     width = len(func_std_string(func))
 323         return width+2, list
 324
 325     def print_stats(self, *amount):
 326         for filename in self.files:
 327             print filename
 328         if self.files: print
 329         indent = ' ' * 8
 330         for func in self.top_level:
 331             print indent, func_get_function_name(func)
 332
 333         print indent, self.total_calls, "function calls",
 334         if self.total_calls != self.prim_calls:
 335             print "(%d primitive calls)" % self.prim_calls,
 336         print "in %.3f CPU seconds" % self.total_tt
 337         print
 338         width, list = self.get_print_list(amount)
 339         if list:
 340             self.print_title()
 341             for func in list:
 342                 self.print_line(func)
 343             print
 344             print
 345         return self
 346
 347     def print_callees(self, *amount):
 348         width, list = self.get_print_list(amount)
 349         if list:
 350             self.calc_callees()
 351
 352             self.print_call_heading(width, "called...")
 353             for func in list:
 354                 if func in self.all_callees:
 355                     self.print_call_line(width, func, self.all_callees[func])
 356                 else:
 357                     self.print_call_line(width, func, {})
 358             print
 359             print
 360         return self
 361
 362     def print_callers(self, *amount):
 363         width, list = self.get_print_list(amount)
 364         if list:
 365             self.print_call_heading(width, "was called by...")
 366             for func in list:
 367                 cc, nc, tt, ct, callers = self.stats[func]
 368                 self.print_call_line(width, func, callers)
 369             print
 370             print
 371         return self
 372
 373     def print_call_heading(self, name_size, column_title):
 374         print "Function ".ljust(name_size) + column_title
 375
 376     def print_call_line(self, name_size, source, call_dict):
 377         print func_std_string(source).ljust(name_size),
 378         if not call_dict:
 379             print "--"
 380             return
 381         clist = call_dict.keys()
 382         clist.sort()
 383         name_size = name_size + 1
 384         indent = ""
 385         for func in clist:
 386             name = func_std_string(func)
 387             print indent*name_size + name + '(' \
 388                       + `call_dict[func]`+')', \
 389                       f8(self.stats[func][3])
 390             indent = " "
 391
 392     def print_title(self):
 393         print '   ncalls  tottime  percall  cumtime  percall', \
 394               'filename:lineno(function)'
 395
 396     def print_line(self, func):  # hack : should print percentages
 397         cc, nc, tt, ct, callers = self.stats[func]
 398         c = str(nc)
 399         if nc != cc:
 400             c = c + '/' + str(cc)
 401         print c.rjust(9),
 402         print f8(tt),
 403         if nc == 0:
 404             print ' '*8,
 405         else:
 406             print f8(tt/nc),
 407         print f8(ct),
 408         if cc == 0:
 409             print ' '*8,
 410         else:
 411             print f8(ct/cc),
 412         print func_std_string(func)
 413
 414     def ignore(self):
 415         # Deprecated since 1.5.1 -- see the docs.
 416         pass # has no return value, so use at end of line :-)
 417
 418 class TupleComp:
 419     """This class provides a generic function for comparing any two tuples.
 420     Each instance records a list of tuple-indices (from most significant
 421     to least significant), and sort direction (ascending or decending) for
 422     each tuple-index.  The compare functions can then be used as the function
 423     argument to the system sort() function when a list of tuples need to be
 424     sorted in the instances order."""
 425
 426     def __init__(self, comp_select_list):
 427         self.comp_select_list = comp_select_list
 428
 429     def compare (self, left, right):
 430         for index, direction in self.comp_select_list:
 431             l = left[index]
 432             r = right[index]
 433             if l < r:
 434                 return -direction
 435             if l > r:
 436                 return direction
 437         return 0
 438
 439 #**************************************************************************
 440 # func_name is a triple (file:string, line:int, name:string)
 441
 442 def func_strip_path(func_name):
 443     file, line, name = func_name
 444     return os.path.basename(file), line, name
 445
 446 def func_get_function_name(func):
 447     return func[2]
 448
 449 def func_std_string(func_name): # match what old profile produced
 450     return "%s:%d(%s)" % func_name
 451
 452 #**************************************************************************
 453 # The following functions combine statists for pairs functions.
 454 # The bulk of the processing involves correctly handling "call" lists,
 455 # such as callers and callees.
 456 #**************************************************************************
 457
 458 def add_func_stats(target, source):
 459     """Add together all the stats for two profile entries."""
 460     cc, nc, tt, ct, callers = source
 461     t_cc, t_nc, t_tt, t_ct, t_callers = target
 462     return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
 463               add_callers(t_callers, callers))
 464
 465 def add_callers(target, source):
 466     """Combine two caller lists in a single list."""
 467     new_callers = {}
 468     for func, caller in target.iteritems():
 469         new_callers[func] = caller
 470     for func, caller in source.iteritems():
 471         if func in new_callers:
 472             new_callers[func] = caller + new_callers[func]
 473         else:
 474             new_callers[func] = caller
 475     return new_callers
 476
 477 def count_calls(callers):
 478     """Sum the caller statistics to get total number of calls received."""
 479     nc = 0
 480     for calls in callers.itervalues():
 481         nc += calls
 482     return nc
 483
 484 #**************************************************************************
 485 # The following functions support printing of reports
 486 #**************************************************************************
 487
 488 def f8(x):
 489     return "%8.3f" % x
 490
 491 #**************************************************************************
 492 # Statistics browser added by ESR, April 2001
 493 #**************************************************************************
 494
 495 if __name__ == '__main__':
 496     import cmd
 497     try:
 498         import readline
 499     except ImportError:
 500         pass
 501
 502     class ProfileBrowser(cmd.Cmd):
 503         def __init__(self, profile=None):
 504             cmd.Cmd.__init__(self)
 505             self.prompt = "% "
 506             if profile is not None:
 507                 self.stats = Stats(profile)
 508             else:
 509                 self.stats = None
 510
 511         def generic(self, fn, line):
 512             args = line.split()
 513             processed = []
 514             for term in args:
 515                 try:
 516                     processed.append(int(term))
 517                     continue
 518                 except ValueError:
 519                     pass
 520                 try:
 521                     frac = float(term)
 522                     if frac > 1 or frac < 0:
 523                         print "Fraction argument mus be in [0, 1]"
 524                         continue
 525                     processed.append(frac)
 526                     continue
 527                 except ValueError:
 528                     pass
 529                 processed.append(term)
 530             if self.stats:
 531                 getattr(self.stats, fn)(*processed)
 532             else:
 533                 print "No statistics object is loaded."
 534             return 0
 535         def generic_help(self):
 536             print "Arguments may be:"
 537             print "* An integer maximum number of entries to print."
 538             print "* A decimal fractional number between 0 and 1, controlling"
 539             print "  what fraction of selected entries to print."
 540             print "* A regular expression; only entries with function names"
 541             print "  that match it are printed."
 542
 543         def do_add(self, line):
 544             self.stats.add(line)
 545             return 0
 546         def help_add(self):
 547             print "Add profile info from given file to current statistics object."
 548
 549         def do_callees(self, line):
 550             return self.generic('print_callees', line)
 551         def help_callees(self):
 552             print "Print callees statistics from the current stat object."
 553             self.generic_help()
 554
 555         def do_callers(self, line):
 556             return self.generic('print_callers', line)
 557         def help_callers(self):
 558             print "Print callers statistics from the current stat object."
 559             self.generic_help()
 560
 561         def do_EOF(self, line):
 562             print ""
 563             return 1
 564         def help_EOF(self):
 565             print "Leave the profile brower."
 566
 567         def do_quit(self, line):
 568             return 1
 569         def help_quit(self):
 570             print "Leave the profile brower."
 571
 572         def do_read(self, line):
 573             if line:
 574                 try:
 575                     self.stats = Stats(line)
 576                 except IOError, args:
 577                     print args[1]
 578                     return
 579                 self.prompt = line + "% "
 580             elif len(self.prompt) > 2:
 581                 line = self.prompt[-2:]
 582             else:
 583                 print "No statistics object is current -- cannot reload."
 584             return 0
 585         def help_read(self):
 586             print "Read in profile data from a specified file."
 587
 588         def do_reverse(self, line):
 589             self.stats.reverse_order()
 590             return 0
 591         def help_reverse(self):
 592             print "Reverse the sort order of the profiling report."
 593
 594         def do_sort(self, line):
 595             abbrevs = self.stats.get_sort_arg_defs()
 596             if line and not filter(lambda x,a=abbrevs: x not in a,line.split()):
 597                 self.stats.sort_stats(*line.split())
 598             else:
 599                 print "Valid sort keys (unique prefixes are accepted):"
 600                 for (key, value) in Stats.sort_arg_dict_default.iteritems():
 601                     print "%s -- %s" % (key, value[1])
 602             return 0
 603         def help_sort(self):
 604             print "Sort profile data according to specified keys."
 605             print "(Typing `sort' without arguments lists valid keys.)"
 606         def complete_sort(self, text, *args):
 607             return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
 608
 609         def do_stats(self, line):
 610             return self.generic('print_stats', line)
 611         def help_stats(self):
 612             print "Print statistics from the current stat object."
 613             self.generic_help()
 614
 615         def do_strip(self, line):
 616             self.stats.strip_dirs()
 617             return 0
 618         def help_strip(self):
 619             print "Strip leading path information from filenames in the report."
 620
 621         def postcmd(self, stop, line):
 622             if stop:
 623                 return stop
 624             return None
 625
 626     import sys
 627     print "Welcome to the profile statistics browser."
 628     if len(sys.argv) > 1:
 629         initprofile = sys.argv[1]
 630     else:
 631         initprofile = None
 632     try:
 633         ProfileBrowser(initprofile).cmdloop()
 634         print "Goodbye."
 635     except KeyboardInterrupt:
 636         pass
 637
 638 # That's all, folks.