Lib/pstats.py

   1 """Class for printing reports on profiled python code."""
   2
   3 # Class for printing reports on profiled python code. rev 1.0  4/1/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # see profile.doc and profile.py for more info.
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35 import os
  36 import time
  37 import string
  38 import marshal
  39 import re
  40
  41 import fpformat
  42
  43 class Stats:
  44         """This class is used for creating reports from data generated by the
  45         Profile class.  It is a "friend" of that class, and imports data either
  46         by direct access to members of Profile class, or by reading in a dictionary
  47         that was emitted (via marshal) from the Profile class.
  48
  49         The big change from the previous Profiler (in terms of raw functionality)
  50         is that an "add()" method has been provided to combine Stats from
  51         several distinct profile runs.  Both the constructor and the add()
  52         method now take arbitrarily many file names as arguments.
  53
  54         All the print methods now take an argument that indicates how many lines
  55         to print.  If the arg is a floating point number between 0 and 1.0, then
  56         it is taken as a decimal percentage of the available lines to be printed
  57         (e.g., .1 means print 10% of all available lines).  If it is an integer,
  58         it is taken to mean the number of lines of data that you wish to have
  59         printed.
  60
  61         The sort_stats() method now processes some additional options (i.e., in
  62         addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of quoted
  63         strings to select the sort order.  For example sort_stats('time', 'name')
  64         sorts on the major key of "internal function time", and on the minor
  65         key of 'the name of the function'.  Look at the two tables in sort_stats()
  66         and get_sort_arg_defs(self) for more examples.
  67
  68         All methods now return "self",  so you can string together commands like:
  69             Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
  70                                 print_stats(5).print_callers(5)
  71         """
  72
  73         def __init__(self, *args):
  74                 if not len(args):
  75                         arg = None
  76                 else:
  77                         arg = args[0]
  78                         args = args[1:]
  79                 self.init(arg)
  80                 apply(self.add, args).ignore()
  81
  82         def init(self, arg):
  83                 self.all_callees = None  # calc only if needed
  84                 self.files = []
  85                 self.fcn_list = None
  86                 self.total_tt = 0
  87                 self.total_calls = 0
  88                 self.prim_calls = 0
  89                 self.max_name_len = 0
  90                 self.top_level = {}
  91                 self.stats = {}
  92                 self.sort_arg_dict = {}
  93                 self.load_stats(arg)
  94                 trouble = 1
  95                 try:
  96                         self.get_top_level_stats()
  97                         trouble = 0
  98                 finally:
  99                         if trouble:
 100                                 print "Invalid timing data",
 101                                 if self.files: print self.files[-1],
 102                                 print
 103
 104
 105         def load_stats(self, arg):
 106                 if not arg:  self.stats = {}
 107                 elif type(arg) == type(""):
 108                         f = open(arg, 'rb')
 109                         self.stats = marshal.load(f)
 110                         f.close()
 111                         try:
 112                                 file_stats = os.stat(arg)
 113                                 arg = time.ctime(file_stats[8]) + "    " + arg
 114                         except:  # in case this is not unix
 115                                 pass
 116                         self.files = [ arg ]
 117                 elif hasattr(arg, 'create_stats'):
 118                         arg.create_stats()
 119                         self.stats = arg.stats
 120                         arg.stats = {}
 121                 if not self.stats:
 122                         raise TypeError,  "Cannot create or construct a " \
 123                                   + `self.__class__` \
 124                                   + " object from '" + `arg` + "'"
 125                 return
 126
 127         def get_top_level_stats(self):
 128                 for func in self.stats.keys():
 129                         cc, nc, tt, ct, callers = self.stats[func]
 130                         self.total_calls = self.total_calls + nc
 131                         self.prim_calls  = self.prim_calls  + cc
 132                         self.total_tt    = self.total_tt    + tt
 133                         if callers.has_key(("jprofile", 0, "profiler")):
 134                                 self.top_level[func] = None
 135                         if len(func_std_string(func)) > self.max_name_len:
 136                                 self.max_name_len = len(func_std_string(func))
 137
 138         def add(self, *arg_list):
 139                 if not arg_list: return self
 140                 if len(arg_list) > 1: apply(self.add, arg_list[1:])
 141                 other = arg_list[0]
 142                 if type(self) != type(other) or \
 143                           self.__class__ != other.__class__:
 144                         other = Stats(other)
 145                 self.files = self.files + other.files
 146                 self.total_calls = self.total_calls + other.total_calls
 147                 self.prim_calls = self.prim_calls + other.prim_calls
 148                 self.total_tt = self.total_tt + other.total_tt
 149                 for func in other.top_level.keys():
 150                         self.top_level[func] = None
 151
 152                 if self.max_name_len < other.max_name_len:
 153                         self.max_name_len = other.max_name_len
 154
 155                 self.fcn_list = None
 156
 157                 for func in other.stats.keys():
 158                         if self.stats.has_key(func):
 159                                 old_func_stat = self.stats[func]
 160                         else:
 161                                 old_func_stat = (0, 0, 0, 0, {},)
 162                         self.stats[func] = add_func_stats(old_func_stat, \
 163                                   other.stats[func])
 164                 return self
 165
 166
 167
 168         # list the tuple indices and directions for sorting,
 169         # along with some printable description
 170         sort_arg_dict_default = {\
 171                   "calls"     : (((1,-1),              ), "call count"),\
 172                   "cumulative": (((3,-1),              ), "cumulative time"),\
 173                   "file"      : (((4, 1),              ), "file name"),\
 174                   "line"      : (((5, 1),              ), "line number"),\
 175                   "module"    : (((4, 1),              ), "file name"),\
 176                   "name"      : (((6, 1),              ), "function name"),\
 177                   "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"), \
 178                   "pcalls"    : (((0,-1),              ), "call count"),\
 179                   "stdname"   : (((7, 1),              ), "standard name"),\
 180                   "time"      : (((2,-1),              ), "internal time"),\
 181                   }
 182
 183         def get_sort_arg_defs(self):
 184                 """Expand all abbreviations that are unique."""
 185                 if not self.sort_arg_dict:
 186                         self.sort_arg_dict = dict = {}
 187                         std_list = dict.keys()
 188                         bad_list = {}
 189                         for word in self.sort_arg_dict_default.keys():
 190                                 fragment = word
 191                                 while fragment:
 192                                         if not fragment:
 193                                                 break
 194                                         if dict.has_key(fragment):
 195                                                 bad_list[fragment] = 0
 196                                                 break
 197                                         dict[fragment] = self. \
 198                                                   sort_arg_dict_default[word]
 199                                         fragment = fragment[:-1]
 200                         for word in bad_list.keys():
 201                                 del dict[word]
 202                 return self.sort_arg_dict
 203
 204
 205         def sort_stats(self, *field):
 206                 if not field:
 207                         self.fcn_list = 0
 208                         return self
 209                 if len(field) == 1 and type(field[0]) == type(1):
 210                         # Be compatible with old profiler
 211                         field = [ {-1: "stdname", \
 212                                   0:"calls", \
 213                                   1:"time", \
 214                                   2: "cumulative" }  [ field[0] ] ]
 215
 216                 sort_arg_defs = self.get_sort_arg_defs()
 217                 sort_tuple = ()
 218                 self.sort_type = ""
 219                 connector = ""
 220                 for word in field:
 221                         sort_tuple = sort_tuple + sort_arg_defs[word][0]
 222                         self.sort_type = self.sort_type + connector + \
 223                                   sort_arg_defs[word][1]
 224                         connector = ", "
 225
 226                 stats_list = []
 227                 for func in self.stats.keys():
 228                         cc, nc, tt, ct, callers = self.stats[func]
 229                         stats_list.append((cc, nc, tt, ct) + func_split(func) \
 230                                            + (func_std_string(func), func,)  )
 231
 232                 stats_list.sort(TupleComp(sort_tuple).compare)
 233
 234                 self.fcn_list = fcn_list = []
 235                 for tuple in stats_list:
 236                         fcn_list.append(tuple[-1])
 237                 return self
 238
 239
 240         def reverse_order(self):
 241                 if self.fcn_list: self.fcn_list.reverse()
 242                 return self
 243
 244         def strip_dirs(self):
 245                 oldstats = self.stats
 246                 self.stats = newstats = {}
 247                 max_name_len = 0
 248                 for func in oldstats.keys():
 249                         cc, nc, tt, ct, callers = oldstats[func]
 250                         newfunc = func_strip_path(func)
 251                         if len(func_std_string(newfunc)) > max_name_len:
 252                                 max_name_len = len(func_std_string(newfunc))
 253                         newcallers = {}
 254                         for func2 in callers.keys():
 255                                 newcallers[func_strip_path(func2)] = \
 256                                           callers[func2]
 257
 258                         if newstats.has_key(newfunc):
 259                                 newstats[newfunc] = add_func_stats( \
 260                                           newstats[newfunc],\
 261                                           (cc, nc, tt, ct, newcallers))
 262                         else:
 263                                 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
 264                 old_top = self.top_level
 265                 self.top_level = new_top = {}
 266                 for func in old_top.keys():
 267                         new_top[func_strip_path(func)] = None
 268
 269                 self.max_name_len = max_name_len
 270
 271                 self.fcn_list = None
 272                 self.all_callees = None
 273                 return self
 274
 275
 276
 277         def calc_callees(self):
 278                 if self.all_callees: return
 279                 self.all_callees = all_callees = {}
 280                 for func in self.stats.keys():
 281                         if not all_callees.has_key(func):
 282                                 all_callees[func] = {}
 283                         cc, nc, tt, ct, callers = self.stats[func]
 284                         for func2 in callers.keys():
 285                                 if not all_callees.has_key(func2):
 286                                         all_callees[func2] = {}
 287                                 all_callees[func2][func]  = callers[func2]
 288                 return
 289
 290         #******************************************************************
 291         # The following functions support actual printing of reports
 292         #******************************************************************
 293
 294         # Optional "amount" is either a line count, or a percentage of lines.
 295
 296         def eval_print_amount(self, sel, list, msg):
 297                 new_list = list
 298                 if type(sel) == type(""):
 299                         new_list = []
 300                         for func in list:
 301                                 if re.search(sel, func_std_string(func)):
 302                                         new_list.append(func)
 303                 else:
 304                         count = len(list)
 305                         if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
 306                                 count = int (count * sel + .5)
 307                                 new_list = list[:count]
 308                         elif type(sel) == type(1) and 0 <= sel < count:
 309                                 count = sel
 310                                 new_list = list[:count]
 311                 if len(list) != len(new_list):
 312                         msg = msg + "   List reduced from " + `len(list)` \
 313                                   + " to " + `len(new_list)` + \
 314                                   " due to restriction <" + `sel` + ">\n"
 315
 316                 return new_list, msg
 317
 318
 319
 320         def get_print_list(self, sel_list):
 321                 width = self.max_name_len
 322                 if self.fcn_list:
 323                         list = self.fcn_list[:]
 324                         msg = "   Ordered by: " + self.sort_type + '\n'
 325                 else:
 326                         list = self.stats.keys()
 327                         msg = "   Random listing order was used\n"
 328
 329                 for selection in sel_list:
 330                         list,msg = self.eval_print_amount(selection, list, msg)
 331
 332                 count = len(list)
 333
 334                 if not list:
 335                         return 0, list
 336                 print msg
 337                 if count < len(self.stats):
 338                         width = 0
 339                         for func in list:
 340                                 if  len(func_std_string(func)) > width:
 341                                         width = len(func_std_string(func))
 342                 return width+2, list
 343
 344         def print_stats(self, *amount):
 345                 for filename in self.files:
 346                         print filename
 347                 if self.files: print
 348                 indent = "        "
 349                 for func in self.top_level.keys():
 350                         print indent, func_get_function_name(func)
 351
 352                 print  indent, self.total_calls, "function calls",
 353                 if self.total_calls != self.prim_calls:
 354                         print "(" + `self.prim_calls`, "primitive calls)",
 355                 print "in", fpformat.fix(self.total_tt, 3), "CPU seconds"
 356                 print
 357                 width, list = self.get_print_list(amount)
 358                 if list:
 359                         self.print_title()
 360                         for func in list:
 361                                 self.print_line(func)
 362                         print
 363                         print
 364                 return self
 365
 366
 367         def print_callees(self, *amount):
 368                 width, list = self.get_print_list(amount)
 369                 if list:
 370                         self.calc_callees()
 371
 372                         self.print_call_heading(width, "called...")
 373                         for func in list:
 374                                 if self.all_callees.has_key(func):
 375                                         self.print_call_line(width, \
 376                                                   func, self.all_callees[func])
 377                                 else:
 378                                         self.print_call_line(width, func, {})
 379                         print
 380                         print
 381                 return self
 382
 383         def print_callers(self, *amount):
 384                 width, list = self.get_print_list(amount)
 385                 if list:
 386                         self.print_call_heading(width, "was called by...")
 387                         for func in list:
 388                                 cc, nc, tt, ct, callers = self.stats[func]
 389                                 self.print_call_line(width, func, callers)
 390                         print
 391                         print
 392                 return self
 393
 394         def print_call_heading(self, name_size, column_title):
 395                 print string.ljust("Function ", name_size) + column_title
 396
 397
 398         def print_call_line(self, name_size, source, call_dict):
 399                 print string.ljust(func_std_string(source), name_size),
 400                 if not call_dict:
 401                         print "--"
 402                         return
 403                 clist = call_dict.keys()
 404                 clist.sort()
 405                 name_size = name_size + 1
 406                 indent = ""
 407                 for func in clist:
 408                         name = func_std_string(func)
 409                         print indent*name_size + name + '(' \
 410                                   + `call_dict[func]`+')', \
 411                                   f8(self.stats[func][3])
 412                         indent = " "
 413
 414
 415
 416         def print_title(self):
 417                 print string.rjust('ncalls', 9),
 418                 print string.rjust('tottime', 8),
 419                 print string.rjust('percall', 8),
 420                 print string.rjust('cumtime', 8),
 421                 print string.rjust('percall', 8),
 422                 print 'filename:lineno(function)'
 423
 424
 425         def print_line(self, func):  # hack : should print percentages
 426                 cc, nc, tt, ct, callers = self.stats[func]
 427                 c = `nc`
 428                 if nc != cc:
 429                         c = c + '/' + `cc`
 430                 print string.rjust(c, 9),
 431                 print f8(tt),
 432                 if nc == 0:
 433                         print ' '*8,
 434                 else:
 435                         print f8(tt/nc),
 436                 print f8(ct),
 437                 if cc == 0:
 438                         print ' '*8,
 439                 else:
 440                         print f8(ct/cc),
 441                 print func_std_string(func)
 442
 443
 444         def ignore(self):
 445                 pass # has no return value, so use at end of line :-)
 446
 447
 448 class TupleComp:
 449         """This class provides a generic function for comparing any two tuples.
 450         Each instance records a list of tuple-indices (from most significant
 451         to least significant), and sort direction (ascending or decending) for
 452         each tuple-index.  The compare functions can then be used as the function
 453         argument to the system sort() function when a list of tuples need to be
 454         sorted in the instances order."""
 455
 456         def __init__(self, comp_select_list):
 457                 self.comp_select_list = comp_select_list
 458
 459         def compare (self, left, right):
 460                 for index, direction in self.comp_select_list:
 461                         l = left[index]
 462                         r = right[index]
 463                         if l < r:
 464                                 return -direction
 465                         if l > r:
 466                                 return direction
 467                 return 0
 468
 469
 470
 471 #**************************************************************************
 472
 473 def func_strip_path(func_name):
 474         file, line, name = func_name
 475         return os.path.basename(file), line, name
 476
 477 def func_get_function_name(func):
 478         return func[2]
 479
 480 def func_std_string(func_name): # match what old profile produced
 481         file, line, name = func_name
 482         return file + ":" + `line` + "(" + name + ")"
 483
 484 def func_split(func_name):
 485         return func_name
 486
 487 #**************************************************************************
 488 # The following functions combine statists for pairs functions.
 489 # The bulk of the processing involves correctly handling "call" lists,
 490 # such as callers and callees.
 491 #**************************************************************************
 492
 493 def add_func_stats(target, source):
 494         """Add together all the stats for two profile entries."""
 495         cc, nc, tt, ct, callers = source
 496         t_cc, t_nc, t_tt, t_ct, t_callers = target
 497         return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct, \
 498                   add_callers(t_callers, callers))
 499
 500
 501 def add_callers(target, source):
 502         """Combine two caller lists in a single list."""
 503         new_callers = {}
 504         for func in target.keys():
 505                 new_callers[func] = target[func]
 506         for func in source.keys():
 507                 if new_callers.has_key(func):
 508                         new_callers[func] = source[func] + new_callers[func]
 509                 else:
 510                         new_callers[func] = source[func]
 511         return new_callers
 512
 513 def count_calls(callers):
 514         """Sum the caller statistics to get total number of calls received."""
 515         nc = 0
 516         for func in callers.keys():
 517                 nc = nc + callers[func]
 518         return nc
 519
 520 #**************************************************************************
 521 # The following functions support printing of reports
 522 #**************************************************************************
 523
 524 def f8(x):
 525         return string.rjust(fpformat.fix(x, 3), 8)
 526