Lib/pstats.py

   1 """Class for printing reports on profiled python code."""
   2
   3 # Class for printing reports on profiled python code. rev 1.0  4/1/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # see profile.doc and profile.py for more info.
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35 import os
  36 import time
  37 import marshal
  38 import re
  39
  40 import fpformat
  41
  42 __all__ = ["Stats"]
  43
  44 class Stats:
  45     """This class is used for creating reports from data generated by the
  46     Profile class.  It is a "friend" of that class, and imports data either
  47     by direct access to members of Profile class, or by reading in a dictionary
  48     that was emitted (via marshal) from the Profile class.
  49
  50     The big change from the previous Profiler (in terms of raw functionality)
  51     is that an "add()" method has been provided to combine Stats from
  52     several distinct profile runs.  Both the constructor and the add()
  53     method now take arbitrarily many file names as arguments.
  54
  55     All the print methods now take an argument that indicates how many lines
  56     to print.  If the arg is a floating point number between 0 and 1.0, then
  57     it is taken as a decimal percentage of the available lines to be printed
  58     (e.g., .1 means print 10% of all available lines).  If it is an integer,
  59     it is taken to mean the number of lines of data that you wish to have
  60     printed.
  61
  62     The sort_stats() method now processes some additional options (i.e., in
  63     addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of quoted
  64     strings to select the sort order.  For example sort_stats('time', 'name')
  65     sorts on the major key of "internal function time", and on the minor
  66     key of 'the name of the function'.  Look at the two tables in sort_stats()
  67     and get_sort_arg_defs(self) for more examples.
  68
  69     All methods now return "self",  so you can string together commands like:
  70         Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
  71                             print_stats(5).print_callers(5)
  72     """
  73
  74     def __init__(self, *args):
  75         if not len(args):
  76             arg = None
  77         else:
  78             arg = args[0]
  79             args = args[1:]
  80         self.init(arg)
  81         apply(self.add, args).ignore()
  82
  83     def init(self, arg):
  84         self.all_callees = None  # calc only if needed
  85         self.files = []
  86         self.fcn_list = None
  87         self.total_tt = 0
  88         self.total_calls = 0
  89         self.prim_calls = 0
  90         self.max_name_len = 0
  91         self.top_level = {}
  92         self.stats = {}
  93         self.sort_arg_dict = {}
  94         self.load_stats(arg)
  95         trouble = 1
  96         try:
  97             self.get_top_level_stats()
  98             trouble = 0
  99         finally:
 100             if trouble:
 101                 print "Invalid timing data",
 102                 if self.files: print self.files[-1],
 103                 print
 104
 105
 106     def load_stats(self, arg):
 107         if not arg:  self.stats = {}
 108         elif type(arg) == type(""):
 109             f = open(arg, 'rb')
 110             self.stats = marshal.load(f)
 111             f.close()
 112             try:
 113                 file_stats = os.stat(arg)
 114                 arg = time.ctime(file_stats[8]) + "    " + arg
 115             except:  # in case this is not unix
 116                 pass
 117             self.files = [ arg ]
 118         elif hasattr(arg, 'create_stats'):
 119             arg.create_stats()
 120             self.stats = arg.stats
 121             arg.stats = {}
 122         if not self.stats:
 123             raise TypeError,  "Cannot create or construct a " \
 124                       + `self.__class__` \
 125                       + " object from '" + `arg` + "'"
 126         return
 127
 128     def get_top_level_stats(self):
 129         for func in self.stats.keys():
 130             cc, nc, tt, ct, callers = self.stats[func]
 131             self.total_calls = self.total_calls + nc
 132             self.prim_calls  = self.prim_calls  + cc
 133             self.total_tt    = self.total_tt    + tt
 134             if callers.has_key(("jprofile", 0, "profiler")):
 135                 self.top_level[func] = None
 136             if len(func_std_string(func)) > self.max_name_len:
 137                 self.max_name_len = len(func_std_string(func))
 138
 139     def add(self, *arg_list):
 140         if not arg_list: return self
 141         if len(arg_list) > 1: apply(self.add, arg_list[1:])
 142         other = arg_list[0]
 143         if type(self) != type(other) or \
 144                   self.__class__ != other.__class__:
 145             other = Stats(other)
 146         self.files = self.files + other.files
 147         self.total_calls = self.total_calls + other.total_calls
 148         self.prim_calls = self.prim_calls + other.prim_calls
 149         self.total_tt = self.total_tt + other.total_tt
 150         for func in other.top_level.keys():
 151             self.top_level[func] = None
 152
 153         if self.max_name_len < other.max_name_len:
 154             self.max_name_len = other.max_name_len
 155
 156         self.fcn_list = None
 157
 158         for func in other.stats.keys():
 159             if self.stats.has_key(func):
 160                 old_func_stat = self.stats[func]
 161             else:
 162                 old_func_stat = (0, 0, 0, 0, {},)
 163             self.stats[func] = add_func_stats(old_func_stat, \
 164                       other.stats[func])
 165         return self
 166
 167
 168
 169     # list the tuple indices and directions for sorting,
 170     # along with some printable description
 171     sort_arg_dict_default = {\
 172               "calls"     : (((1,-1),              ), "call count"),\
 173               "cumulative": (((3,-1),              ), "cumulative time"),\
 174               "file"      : (((4, 1),              ), "file name"),\
 175               "line"      : (((5, 1),              ), "line number"),\
 176               "module"    : (((4, 1),              ), "file name"),\
 177               "name"      : (((6, 1),              ), "function name"),\
 178               "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"), \
 179               "pcalls"    : (((0,-1),              ), "call count"),\
 180               "stdname"   : (((7, 1),              ), "standard name"),\
 181               "time"      : (((2,-1),              ), "internal time"),\
 182               }
 183
 184     def get_sort_arg_defs(self):
 185         """Expand all abbreviations that are unique."""
 186         if not self.sort_arg_dict:
 187             self.sort_arg_dict = dict = {}
 188             std_list = dict.keys()
 189             bad_list = {}
 190             for word in self.sort_arg_dict_default.keys():
 191                 fragment = word
 192                 while fragment:
 193                     if not fragment:
 194                         break
 195                     if dict.has_key(fragment):
 196                         bad_list[fragment] = 0
 197                         break
 198                     dict[fragment] = self. \
 199                               sort_arg_dict_default[word]
 200                     fragment = fragment[:-1]
 201             for word in bad_list.keys():
 202                 del dict[word]
 203         return self.sort_arg_dict
 204
 205
 206     def sort_stats(self, *field):
 207         if not field:
 208             self.fcn_list = 0
 209             return self
 210         if len(field) == 1 and type(field[0]) == type(1):
 211             # Be compatible with old profiler
 212             field = [ {-1: "stdname", \
 213                       0:"calls", \
 214                       1:"time", \
 215                       2: "cumulative" }  [ field[0] ] ]
 216
 217         sort_arg_defs = self.get_sort_arg_defs()
 218         sort_tuple = ()
 219         self.sort_type = ""
 220         connector = ""
 221         for word in field:
 222             sort_tuple = sort_tuple + sort_arg_defs[word][0]
 223             self.sort_type = self.sort_type + connector + \
 224                       sort_arg_defs[word][1]
 225             connector = ", "
 226
 227         stats_list = []
 228         for func in self.stats.keys():
 229             cc, nc, tt, ct, callers = self.stats[func]
 230             stats_list.append((cc, nc, tt, ct) + func_split(func) \
 231                                + (func_std_string(func), func,)  )
 232
 233         stats_list.sort(TupleComp(sort_tuple).compare)
 234
 235         self.fcn_list = fcn_list = []
 236         for tuple in stats_list:
 237             fcn_list.append(tuple[-1])
 238         return self
 239
 240
 241     def reverse_order(self):
 242         if self.fcn_list: self.fcn_list.reverse()
 243         return self
 244
 245     def strip_dirs(self):
 246         oldstats = self.stats
 247         self.stats = newstats = {}
 248         max_name_len = 0
 249         for func in oldstats.keys():
 250             cc, nc, tt, ct, callers = oldstats[func]
 251             newfunc = func_strip_path(func)
 252             if len(func_std_string(newfunc)) > max_name_len:
 253                 max_name_len = len(func_std_string(newfunc))
 254             newcallers = {}
 255             for func2 in callers.keys():
 256                 newcallers[func_strip_path(func2)] = \
 257                           callers[func2]
 258
 259             if newstats.has_key(newfunc):
 260                 newstats[newfunc] = add_func_stats( \
 261                           newstats[newfunc],\
 262                           (cc, nc, tt, ct, newcallers))
 263             else:
 264                 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
 265         old_top = self.top_level
 266         self.top_level = new_top = {}
 267         for func in old_top.keys():
 268             new_top[func_strip_path(func)] = None
 269
 270         self.max_name_len = max_name_len
 271
 272         self.fcn_list = None
 273         self.all_callees = None
 274         return self
 275
 276
 277
 278     def calc_callees(self):
 279         if self.all_callees: return
 280         self.all_callees = all_callees = {}
 281         for func in self.stats.keys():
 282             if not all_callees.has_key(func):
 283                 all_callees[func] = {}
 284             cc, nc, tt, ct, callers = self.stats[func]
 285             for func2 in callers.keys():
 286                 if not all_callees.has_key(func2):
 287                     all_callees[func2] = {}
 288                 all_callees[func2][func]  = callers[func2]
 289         return
 290
 291     #******************************************************************
 292     # The following functions support actual printing of reports
 293     #******************************************************************
 294
 295     # Optional "amount" is either a line count, or a percentage of lines.
 296
 297     def eval_print_amount(self, sel, list, msg):
 298         new_list = list
 299         if type(sel) == type(""):
 300             new_list = []
 301             for func in list:
 302                 if re.search(sel, func_std_string(func)):
 303                     new_list.append(func)
 304         else:
 305             count = len(list)
 306             if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
 307                 count = int (count * sel + .5)
 308                 new_list = list[:count]
 309             elif type(sel) == type(1) and 0 <= sel < count:
 310                 count = sel
 311                 new_list = list[:count]
 312         if len(list) != len(new_list):
 313             msg = msg + "   List reduced from " + `len(list)` \
 314                       + " to " + `len(new_list)` + \
 315                       " due to restriction <" + `sel` + ">\n"
 316
 317         return new_list, msg
 318
 319
 320
 321     def get_print_list(self, sel_list):
 322         width = self.max_name_len
 323         if self.fcn_list:
 324             list = self.fcn_list[:]
 325             msg = "   Ordered by: " + self.sort_type + '\n'
 326         else:
 327             list = self.stats.keys()
 328             msg = "   Random listing order was used\n"
 329
 330         for selection in sel_list:
 331             list,msg = self.eval_print_amount(selection, list, msg)
 332
 333         count = len(list)
 334
 335         if not list:
 336             return 0, list
 337         print msg
 338         if count < len(self.stats):
 339             width = 0
 340             for func in list:
 341                 if  len(func_std_string(func)) > width:
 342                     width = len(func_std_string(func))
 343         return width+2, list
 344
 345     def print_stats(self, *amount):
 346         for filename in self.files:
 347             print filename
 348         if self.files: print
 349         indent = "        "
 350         for func in self.top_level.keys():
 351             print indent, func_get_function_name(func)
 352
 353         print  indent, self.total_calls, "function calls",
 354         if self.total_calls != self.prim_calls:
 355             print "(" + `self.prim_calls`, "primitive calls)",
 356         print "in", fpformat.fix(self.total_tt, 3), "CPU seconds"
 357         print
 358         width, list = self.get_print_list(amount)
 359         if list:
 360             self.print_title()
 361             for func in list:
 362                 self.print_line(func)
 363             print
 364             print
 365         return self
 366
 367
 368     def print_callees(self, *amount):
 369         width, list = self.get_print_list(amount)
 370         if list:
 371             self.calc_callees()
 372
 373             self.print_call_heading(width, "called...")
 374             for func in list:
 375                 if self.all_callees.has_key(func):
 376                     self.print_call_line(width, \
 377                               func, self.all_callees[func])
 378                 else:
 379                     self.print_call_line(width, func, {})
 380             print
 381             print
 382         return self
 383
 384     def print_callers(self, *amount):
 385         width, list = self.get_print_list(amount)
 386         if list:
 387             self.print_call_heading(width, "was called by...")
 388             for func in list:
 389                 cc, nc, tt, ct, callers = self.stats[func]
 390                 self.print_call_line(width, func, callers)
 391             print
 392             print
 393         return self
 394
 395     def print_call_heading(self, name_size, column_title):
 396         print "Function ".ljust(name_size) + column_title
 397
 398
 399     def print_call_line(self, name_size, source, call_dict):
 400         print func_std_string(source).ljust(name_size),
 401         if not call_dict:
 402             print "--"
 403             return
 404         clist = call_dict.keys()
 405         clist.sort()
 406         name_size = name_size + 1
 407         indent = ""
 408         for func in clist:
 409             name = func_std_string(func)
 410             print indent*name_size + name + '(' \
 411                       + `call_dict[func]`+')', \
 412                       f8(self.stats[func][3])
 413             indent = " "
 414
 415
 416
 417     def print_title(self):
 418         print 'ncalls'.rjust(9),
 419         print 'tottime'.rjust(8),
 420         print 'percall'.rjust(8),
 421         print 'cumtime'.rjust(8),
 422         print 'percall'.rjust(8),
 423         print 'filename:lineno(function)'
 424
 425
 426     def print_line(self, func):  # hack : should print percentages
 427         cc, nc, tt, ct, callers = self.stats[func]
 428         c = `nc`
 429         if nc != cc:
 430             c = c + '/' + `cc`
 431         print c.rjust(9),
 432         print f8(tt),
 433         if nc == 0:
 434             print ' '*8,
 435         else:
 436             print f8(tt/nc),
 437         print f8(ct),
 438         if cc == 0:
 439             print ' '*8,
 440         else:
 441             print f8(ct/cc),
 442         print func_std_string(func)
 443
 444
 445     def ignore(self):
 446         pass # has no return value, so use at end of line :-)
 447
 448
 449 class TupleComp:
 450     """This class provides a generic function for comparing any two tuples.
 451     Each instance records a list of tuple-indices (from most significant
 452     to least significant), and sort direction (ascending or decending) for
 453     each tuple-index.  The compare functions can then be used as the function
 454     argument to the system sort() function when a list of tuples need to be
 455     sorted in the instances order."""
 456
 457     def __init__(self, comp_select_list):
 458         self.comp_select_list = comp_select_list
 459
 460     def compare (self, left, right):
 461         for index, direction in self.comp_select_list:
 462             l = left[index]
 463             r = right[index]
 464             if l < r:
 465                 return -direction
 466             if l > r:
 467                 return direction
 468         return 0
 469
 470
 471
 472 #**************************************************************************
 473
 474 def func_strip_path(func_name):
 475     file, line, name = func_name
 476     return os.path.basename(file), line, name
 477
 478 def func_get_function_name(func):
 479     return func[2]
 480
 481 def func_std_string(func_name): # match what old profile produced
 482     file, line, name = func_name
 483     return file + ":" + `line` + "(" + name + ")"
 484
 485 def func_split(func_name):
 486     return func_name
 487
 488 #**************************************************************************
 489 # The following functions combine statists for pairs functions.
 490 # The bulk of the processing involves correctly handling "call" lists,
 491 # such as callers and callees.
 492 #**************************************************************************
 493
 494 def add_func_stats(target, source):
 495     """Add together all the stats for two profile entries."""
 496     cc, nc, tt, ct, callers = source
 497     t_cc, t_nc, t_tt, t_ct, t_callers = target
 498     return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct, \
 499               add_callers(t_callers, callers))
 500
 501
 502 def add_callers(target, source):
 503     """Combine two caller lists in a single list."""
 504     new_callers = {}
 505     for func in target.keys():
 506         new_callers[func] = target[func]
 507     for func in source.keys():
 508         if new_callers.has_key(func):
 509             new_callers[func] = source[func] + new_callers[func]
 510         else:
 511             new_callers[func] = source[func]
 512     return new_callers
 513
 514 def count_calls(callers):
 515     """Sum the caller statistics to get total number of calls received."""
 516     nc = 0
 517     for func in callers.keys():
 518         nc = nc + callers[func]
 519     return nc
 520
 521 #**************************************************************************
 522 # The following functions support printing of reports
 523 #**************************************************************************
 524
 525 def f8(x):
 526     return fpformat.fix(x, 3).rjust(8)