tools/binary_size/explain_binary_size_delta.py

   1 #!/usr/bin/env python
   2 # Copyright 2014 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Describe the size difference of two binaries.
   7
   8 Generates a description of the size difference of two binaries based
   9 on the difference of the size of various symbols.
  10
  11 This tool needs "nm" dumps of each binary with full symbol
  12 information. You can obtain the necessary dumps by running the
  13 run_binary_size_analysis.py script upon each binary, with the
  14 "--nm-out" parameter set to the location in which you want to save the
  15 dumps. Example:
  16
  17   # obtain symbol data from first binary in /tmp/nm1.dump
  18   cd $CHECKOUT1_SRC
  19   ninja -C out/Release binary_size_tool
  20   tools/binary_size/run_binary_size_analysis \
  21       --library <path_to_library>
  22       --destdir /tmp/throwaway
  23       --nm-out /tmp/nm1.dump
  24
  25   # obtain symbol data from second binary in /tmp/nm2.dump
  26   cd $CHECKOUT2_SRC
  27   ninja -C out/Release binary_size_tool
  28   tools/binary_size/run_binary_size_analysis \
  29       --library <path_to_library>
  30       --destdir /tmp/throwaway
  31       --nm-out /tmp/nm2.dump
  32
  33   # cleanup useless files
  34   rm -r /tmp/throwaway
  35
  36   # run this tool
  37   explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
  38 """
  39
  40 import collections
  41 import operator
  42 import optparse
  43 import os
  44 import sys
  45
  46 import binary_size_utils
  47
  48
  49 def Compare(symbols1, symbols2):
  50   """Executes a comparison of the symbols in symbols1 and symbols2.
  51
  52   Returns:
  53       tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
  54   """
  55   added = [] # tuples
  56   removed = [] # tuples
  57   changed = [] # tuples
  58   unchanged = [] # tuples
  59
  60   cache1 = {}
  61   cache2 = {}
  62   # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
  63   for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
  64     for symbol_name, symbol_type, symbol_size, file_path in symbols:
  65       if 'vtable for ' in symbol_name:
  66         symbol_type = '@' # hack to categorize these separately
  67       if file_path:
  68         file_path = os.path.normpath(file_path)
  69         if sys.platform.startswith('win'):
  70           file_path = file_path.replace('\\', '/')
  71       else:
  72         file_path = '(No Path)'
  73       key = (file_path, symbol_type)
  74       bucket = cache.setdefault(key, {})
  75       size_list = bucket.setdefault(symbol_name, [])
  76       size_list.append(symbol_size)
  77
  78   # Now diff them. We iterate over the elements in cache1. For each symbol
  79   # that we find in cache2, we record whether it was deleted, changed, or
  80   # unchanged. We then remove it from cache2; all the symbols that remain
  81   # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
  82   for key, bucket1 in cache1.items():
  83     bucket2 = cache2.get(key)
  84     if not bucket2:
  85       # A file was removed. Everything in bucket1 is dead.
  86       for symbol_name, symbol_size_list in bucket1.items():
  87         for symbol_size in symbol_size_list:
  88           removed.append((key[0], key[1], symbol_name, symbol_size, None))
  89     else:
  90       # File still exists, look for changes within.
  91       for symbol_name, symbol_size_list in bucket1.items():
  92         size_list2 = bucket2.get(symbol_name)
  93         if size_list2 is None:
  94           # Symbol no longer exists in bucket2.
  95           for symbol_size in symbol_size_list:
  96             removed.append((key[0], key[1], symbol_name, symbol_size, None))
  97         else:
  98           del bucket2[symbol_name] # Symbol is not new, delete from cache2.
  99           if len(symbol_size_list) == 1 and len(size_list2) == 1:
 100             symbol_size = symbol_size_list[0]
 101             size2 = size_list2[0]
 102             if symbol_size != size2:
 103               # Symbol has change size in bucket.
 104               changed.append((key[0], key[1], symbol_name, symbol_size, size2))
 105             else:
 106               # Symbol is unchanged.
 107               unchanged.append((key[0], key[1], symbol_name, symbol_size,
 108                                 size2))
 109           else:
 110             # Complex comparison for when a symbol exists multiple times
 111             # in the same file (where file can be "unknown file").
 112             symbol_size_counter = collections.Counter(symbol_size_list)
 113             delta_counter = collections.Counter(symbol_size_list)
 114             delta_counter.subtract(size_list2)
 115             for symbol_size in sorted(delta_counter.keys()):
 116               delta = delta_counter[symbol_size]
 117               unchanged_count = symbol_size_counter[symbol_size]
 118               if delta > 0:
 119                 unchanged_count -= delta
 120               for _ in range(unchanged_count):
 121                 unchanged.append((key[0], key[1], symbol_name, symbol_size,
 122                                   symbol_size))
 123               if delta > 0: # Used to be more of these than there is now.
 124                 for _ in range(delta):
 125                   removed.append((key[0], key[1], symbol_name, symbol_size,
 126                                   None))
 127               elif delta < 0: # More of this (symbol,size) now.
 128                 for _ in range(-delta):
 129                   added.append((key[0], key[1], symbol_name, None, symbol_size))
 130
 131           if len(bucket2) == 0:
 132             del cache1[key] # Entire bucket is empty, delete from cache2
 133
 134   # We have now analyzed all symbols that are in cache1 and removed all of
 135   # the encountered symbols from cache2. What's left in cache2 is the new
 136   # symbols.
 137   for key, bucket2 in cache2.iteritems():
 138     for symbol_name, symbol_size_list in bucket2.items():
 139       for symbol_size in symbol_size_list:
 140         added.append((key[0], key[1], symbol_name, None, symbol_size))
 141   return (added, removed, changed, unchanged)
 142
 143 def DeltaStr(number):
 144   """Returns the number as a string with a '+' prefix if it's > 0 and
 145   a '-' prefix if it's < 0."""
 146   result = str(number)
 147   if number > 0:
 148     result = '+' + result
 149   return result
 150
 151
 152 class CrunchStatsData(object):
 153   """Stores a summary of data of a certain kind."""
 154   def __init__(self, symbols):
 155     self.symbols = symbols
 156     self.sources = set()
 157     self.before_size = 0
 158     self.after_size = 0
 159     self.symbols_by_path = {}
 160
 161
 162 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
 163   """Outputs to stdout a summary of changes based on the symbol lists."""
 164   # Split changed into grown and shrunk because that is easier to
 165   # discuss.
 166   grown = []
 167   shrunk = []
 168   for item in changed:
 169     file_path, symbol_type, symbol_name, size1, size2 = item
 170     if size1 < size2:
 171       grown.append(item)
 172     else:
 173       shrunk.append(item)
 174
 175   new_symbols = CrunchStatsData(added)
 176   removed_symbols = CrunchStatsData(removed)
 177   grown_symbols = CrunchStatsData(grown)
 178   shrunk_symbols = CrunchStatsData(shrunk)
 179   sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
 180   for section in sections:
 181     for file_path, symbol_type, symbol_name, size1, size2 in section.symbols:
 182       section.sources.add(file_path)
 183       if size1 is not None:
 184         section.before_size += size1
 185       if size2 is not None:
 186         section.after_size += size2
 187       bucket = section.symbols_by_path.setdefault(file_path, [])
 188       bucket.append((symbol_name, symbol_type, size1, size2))
 189
 190   total_change = sum(s.after_size - s.before_size for s in sections)
 191   summary = 'Total change: %s bytes' % DeltaStr(total_change)
 192   print(summary)
 193   print('=' * len(summary))
 194   for section in sections:
 195     if not section.symbols:
 196       continue
 197     if section.before_size == 0:
 198       description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
 199     elif section.after_size == 0:
 200       description = ('removed, totalling %s bytes' %
 201                      DeltaStr(-section.before_size))
 202     else:
 203       if section.after_size > section.before_size:
 204         type_str = 'grown'
 205       else:
 206         type_str = 'shrunk'
 207       description = ('%s, for a net change of %s bytes '
 208                      '(%d bytes before, %d bytes after)' %
 209             (type_str, DeltaStr(section.after_size - section.before_size),
 210              section.before_size, section.after_size))
 211     print('  %d %s across %d sources' %
 212           (len(section.symbols), description, len(section.sources)))
 213
 214   maybe_unchanged_sources = set()
 215   unchanged_symbols_size = 0
 216   for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
 217     maybe_unchanged_sources.add(file_path)
 218     unchanged_symbols_size += size1 # == size2
 219   print('  %d unchanged, totalling %d bytes' %
 220         (len(unchanged), unchanged_symbols_size))
 221
 222   # High level analysis, always output.
 223   unchanged_sources = maybe_unchanged_sources
 224   for section in sections:
 225     unchanged_sources = unchanged_sources - section.sources
 226   new_sources = (new_symbols.sources -
 227     maybe_unchanged_sources -
 228     removed_symbols.sources)
 229   removed_sources = (removed_symbols.sources -
 230     maybe_unchanged_sources -
 231     new_symbols.sources)
 232   partially_changed_sources = (grown_symbols.sources |
 233     shrunk_symbols.sources | new_symbols.sources |
 234     removed_symbols.sources) - removed_sources - new_sources
 235   allFiles = set()
 236   for section in sections:
 237     allFiles = allFiles | section.sources
 238   allFiles = allFiles | maybe_unchanged_sources
 239   print 'Source stats:'
 240   print('  %d sources encountered.' % len(allFiles))
 241   print('  %d completely new.' % len(new_sources))
 242   print('  %d removed completely.' % len(removed_sources))
 243   print('  %d partially changed.' % len(partially_changed_sources))
 244   print('  %d completely unchanged.' % len(unchanged_sources))
 245   remainder = (allFiles - new_sources - removed_sources -
 246     partially_changed_sources - unchanged_sources)
 247   assert len(remainder) == 0
 248
 249   if not showsources:
 250     return  # Per-source analysis, only if requested
 251   print 'Per-source Analysis:'
 252   delta_by_path = {}
 253   for section in sections:
 254     for path in section.symbols_by_path:
 255       entry = delta_by_path.get(path)
 256       if not entry:
 257         entry = {'plus': 0, 'minus': 0}
 258         delta_by_path[path] = entry
 259       for symbol_name, symbol_type, size1, size2 in \
 260             section.symbols_by_path[path]:
 261         if size1 is None:
 262           delta = size2
 263         elif size2 is None:
 264           delta = -size1
 265         else:
 266           delta = size2 - size1
 267
 268         if delta > 0:
 269           entry['plus'] += delta
 270         else:
 271           entry['minus'] += (-1 * delta)
 272
 273   def delta_sort_key(item):
 274     _path, size_data = item
 275     growth = size_data['plus'] - size_data['minus']
 276     return growth
 277
 278   for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
 279                                 reverse=True):
 280     gain = size_data['plus']
 281     loss = size_data['minus']
 282     delta = size_data['plus'] - size_data['minus']
 283     header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
 284                                                           path, gain, loss)
 285     divider = '-' * len(header)
 286     print ''
 287     print divider
 288     print header
 289     print divider
 290     if showsymbols:
 291       if path in new_symbols.symbols_by_path:
 292         print '  New symbols:'
 293         for symbol_name, symbol_type, size1, size2 in \
 294             sorted(new_symbols.symbols_by_path[path],
 295                    key=operator.itemgetter(3),
 296                    reverse=True):
 297           print ('   %8s: %s type=%s, size=%d bytes' %
 298                  (DeltaStr(size2), symbol_name, symbol_type, size2))
 299       if path in removed_symbols.symbols_by_path:
 300         print '  Removed symbols:'
 301         for symbol_name, symbol_type, size1, size2 in \
 302             sorted(removed_symbols.symbols_by_path[path],
 303                    key=operator.itemgetter(2)):
 304           print ('   %8s: %s type=%s, size=%d bytes' %
 305                  (DeltaStr(-size1), symbol_name, symbol_type, size1))
 306       for (changed_symbols_by_path, type_str) in [
 307         (grown_symbols.symbols_by_path, "Grown"),
 308         (shrunk_symbols.symbols_by_path, "Shrunk")]:
 309         if path in changed_symbols_by_path:
 310           print '  %s symbols:' % type_str
 311           def changed_symbol_sortkey(item):
 312             symbol_name, _symbol_type, size1, size2 = item
 313             return (size1 - size2, symbol_name)
 314           for symbol_name, symbol_type, size1, size2 in \
 315               sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
 316             print ('   %8s: %s type=%s, (was %d bytes, now %d bytes)'
 317                    % (DeltaStr(size2 - size1), symbol_name,
 318                       symbol_type, size1, size2))
 319
 320
 321 def main():
 322   usage = """%prog [options]
 323
 324   Analyzes the symbolic differences between two binary files
 325   (typically, not necessarily, two different builds of the same
 326   library) and produces a detailed description of symbols that have
 327   been added, removed, or whose size has changed.
 328
 329   Example:
 330        explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
 331
 332   Options are available via '--help'.
 333   """
 334   parser = optparse.OptionParser(usage=usage)
 335   parser.add_option('--nm1', metavar='PATH',
 336                     help='the nm dump of the first library')
 337   parser.add_option('--nm2', metavar='PATH',
 338                     help='the nm dump of the second library')
 339   parser.add_option('--showsources', action='store_true', default=False,
 340                     help='show per-source statistics')
 341   parser.add_option('--showsymbols', action='store_true', default=False,
 342                     help='show all symbol information; implies --showfiles')
 343   parser.add_option('--verbose', action='store_true', default=False,
 344                     help='output internal debugging stuff')
 345   opts, _args = parser.parse_args()
 346
 347   if not opts.nm1:
 348     parser.error('--nm1 is required')
 349   if not opts.nm2:
 350     parser.error('--nm2 is required')
 351   symbols = []
 352   for path in [opts.nm1, opts.nm2]:
 353     with file(path, 'r') as nm_input:
 354       if opts.verbose:
 355         print 'parsing ' + path + '...'
 356       symbols.append(list(binary_size_utils.ParseNm(nm_input)))
 357   (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
 358   CrunchStats(added, removed, changed, unchanged,
 359     opts.showsources | opts.showsymbols, opts.showsymbols)
 360
 361 if __name__ == '__main__':
 362   sys.exit(main())