Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / tools / binary_size / explain_binary_size_delta.py
blob45c1236271f1b9ca843df6cc154a850f26ca5317
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Describe the size difference of two binaries.
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
15 dumps. Example:
17 # obtain symbol data from first binary in /tmp/nm1.dump
18 cd $CHECKOUT1_SRC
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
25 # obtain symbol data from second binary in /tmp/nm2.dump
26 cd $CHECKOUT2_SRC
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
33 # cleanup useless files
34 rm -r /tmp/throwaway
36 # run this tool
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
38 """
40 import collections
41 from collections import Counter
42 from math import ceil
43 import operator
44 import optparse
45 import os
46 import sys
48 import binary_size_utils
51 def CalculateSharedAddresses(symbols):
52 """Checks how many symbols share the same memory space. This returns a
53 Counter result where result[address] will tell you how many times address was
54 used by symbols."""
55 count = Counter()
56 for _, _, _, _, address in symbols:
57 count[address] += 1
59 return count
62 def CalculateEffectiveSize(share_count, address, symbol_size):
63 """Given a raw symbol_size and an address, this method returns the
64 size we should blame on this symbol considering it might share the
65 machine code/data with other symbols. Using the raw symbol_size for
66 each symbol would in those cases over estimate the true cost of that
67 block.
69 """
70 shared_count = share_count[address]
71 if shared_count == 1:
72 return symbol_size
74 assert shared_count > 1
75 return int(ceil(symbol_size / float(shared_count)))
77 class SymbolDelta(object):
78 """Stores old size, new size and some metadata."""
79 def __init__(self, shared):
80 self.old_size = None
81 self.new_size = None
82 self.shares_space_with_other_symbols = shared
84 def __eq__(self, other):
85 return (self.old_size == other.old_size and
86 self.new_size == other.new_size and
87 self.shares_space_with_other_symbols ==
88 other.shares_space_with_other_symbols)
90 def __ne__(self, other):
91 return not self.__eq__(other)
93 def copy_symbol_delta(self):
94 symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)
95 symbol_delta.old_size = self.old_size
96 symbol_delta.new_size = self.new_size
97 return symbol_delta
99 class DeltaInfo(SymbolDelta):
100 """Summary of a the change for one symbol between two instances."""
101 def __init__(self, file_path, symbol_type, symbol_name, shared):
102 SymbolDelta.__init__(self, shared)
103 self.file_path = file_path
104 self.symbol_type = symbol_type
105 self.symbol_name = symbol_name
107 def __eq__(self, other):
108 return (self.file_path == other.file_path and
109 self.symbol_type == other.symbol_type and
110 self.symbol_name == other.symbol_name and
111 SymbolDelta.__eq__(self, other))
113 def __ne__(self, other):
114 return not self.__eq__(other)
116 def ExtractSymbolDelta(self):
117 """Returns a copy of the SymbolDelta for this DeltaInfo."""
118 return SymbolDelta.copy_symbol_delta(self)
120 def Compare(symbols1, symbols2):
121 """Executes a comparison of the symbols in symbols1 and symbols2.
123 Returns:
124 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
125 where each list contains DeltaInfo objects.
127 added = [] # tuples
128 removed = [] # tuples
129 changed = [] # tuples
130 unchanged = [] # tuples
132 cache1 = {}
133 cache2 = {}
134 # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
135 share_count1 = CalculateSharedAddresses(symbols1)
136 share_count2 = CalculateSharedAddresses(symbols2)
137 for cache, symbols, share_count in ((cache1, symbols1, share_count1),
138 (cache2, symbols2, share_count2)):
139 for symbol_name, symbol_type, symbol_size, file_path, address in symbols:
140 if 'vtable for ' in symbol_name:
141 symbol_type = '@' # hack to categorize these separately
142 if file_path:
143 file_path = os.path.normpath(file_path)
144 if sys.platform.startswith('win'):
145 file_path = file_path.replace('\\', '/')
146 else:
147 file_path = '(No Path)'
148 # Take into consideration that multiple symbols might share the same
149 # block of code.
150 effective_symbol_size = CalculateEffectiveSize(share_count, address,
151 symbol_size)
152 key = (file_path, symbol_type)
153 bucket = cache.setdefault(key, {})
154 size_list = bucket.setdefault(symbol_name, [])
155 size_list.append((effective_symbol_size,
156 effective_symbol_size != symbol_size))
158 # Now diff them. We iterate over the elements in cache1. For each symbol
159 # that we find in cache2, we record whether it was deleted, changed, or
160 # unchanged. We then remove it from cache2; all the symbols that remain
161 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
162 for key, bucket1 in cache1.items():
163 bucket2 = cache2.get(key)
164 file_path, symbol_type = key;
165 if not bucket2:
166 # A file was removed. Everything in bucket1 is dead.
167 for symbol_name, symbol_size_list in bucket1.items():
168 for (symbol_size, shared) in symbol_size_list:
169 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
170 delta_info.old_size = symbol_size
171 removed.append(delta_info)
172 else:
173 # File still exists, look for changes within.
174 for symbol_name, symbol_size_list in bucket1.items():
175 size_list2 = bucket2.get(symbol_name)
176 if size_list2 is None:
177 # Symbol no longer exists in bucket2.
178 for (symbol_size, shared) in symbol_size_list:
179 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
180 delta_info.old_size = symbol_size
181 removed.append(delta_info)
182 else:
183 del bucket2[symbol_name] # Symbol is not new, delete from cache2.
184 if len(symbol_size_list) == 1 and len(size_list2) == 1:
185 symbol_size, shared1 = symbol_size_list[0]
186 size2, shared2 = size_list2[0]
187 delta_info = DeltaInfo(file_path, symbol_type, symbol_name,
188 shared1 or shared2)
189 delta_info.old_size = symbol_size
190 delta_info.new_size = size2
191 if symbol_size != size2:
192 # Symbol has change size in bucket.
193 changed.append(delta_info)
194 else:
195 # Symbol is unchanged.
196 unchanged.append(delta_info)
197 else:
198 # Complex comparison for when a symbol exists multiple times
199 # in the same file (where file can be "unknown file").
200 symbol_size_counter = collections.Counter(symbol_size_list)
201 delta_counter = collections.Counter(symbol_size_list)
202 delta_counter.subtract(size_list2)
203 for delta_counter_key in sorted(delta_counter.keys()):
204 delta = delta_counter[delta_counter_key]
205 unchanged_count = symbol_size_counter[delta_counter_key]
206 (symbol_size, shared) = delta_counter_key
207 if delta > 0:
208 unchanged_count -= delta
209 for _ in range(unchanged_count):
210 delta_info = DeltaInfo(file_path, symbol_type,
211 symbol_name, shared)
212 delta_info.old_size = symbol_size
213 delta_info.new_size = symbol_size
214 unchanged.append(delta_info)
215 if delta > 0: # Used to be more of these than there is now.
216 for _ in range(delta):
217 delta_info = DeltaInfo(file_path, symbol_type,
218 symbol_name, shared)
219 delta_info.old_size = symbol_size
220 removed.append(delta_info)
221 elif delta < 0: # More of this (symbol,size) now.
222 for _ in range(-delta):
223 delta_info = DeltaInfo(file_path, symbol_type,
224 symbol_name, shared)
225 delta_info.new_size = symbol_size
226 added.append(delta_info)
228 if len(bucket2) == 0:
229 del cache1[key] # Entire bucket is empty, delete from cache2
231 # We have now analyzed all symbols that are in cache1 and removed all of
232 # the encountered symbols from cache2. What's left in cache2 is the new
233 # symbols.
234 for key, bucket2 in cache2.iteritems():
235 file_path, symbol_type = key;
236 for symbol_name, symbol_size_list in bucket2.items():
237 for (symbol_size, shared) in symbol_size_list:
238 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
239 delta_info.new_size = symbol_size
240 added.append(delta_info)
241 return (added, removed, changed, unchanged)
244 def DeltaStr(number):
245 """Returns the number as a string with a '+' prefix if it's > 0 and
246 a '-' prefix if it's < 0."""
247 result = str(number)
248 if number > 0:
249 result = '+' + result
250 return result
253 def SharedInfoStr(symbol_info):
254 """Returns a string (prefixed by space) explaining that numbers are
255 adjusted because of shared space between symbols, or an empty string
256 if space had not been shared."""
258 if symbol_info.shares_space_with_other_symbols:
259 return " (adjusted sizes because of memory sharing)"
261 return ""
263 class CrunchStatsData(object):
264 """Stores a summary of data of a certain kind."""
265 def __init__(self, symbols):
266 self.symbols = symbols
267 self.sources = set()
268 self.before_size = 0
269 self.after_size = 0
270 self.symbols_by_path = {}
273 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
274 """Outputs to stdout a summary of changes based on the symbol lists."""
275 # Split changed into grown and shrunk because that is easier to
276 # discuss.
277 grown = []
278 shrunk = []
279 for item in changed:
280 if item.old_size < item.new_size:
281 grown.append(item)
282 else:
283 shrunk.append(item)
285 new_symbols = CrunchStatsData(added)
286 removed_symbols = CrunchStatsData(removed)
287 grown_symbols = CrunchStatsData(grown)
288 shrunk_symbols = CrunchStatsData(shrunk)
289 sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
290 for section in sections:
291 for item in section.symbols:
292 section.sources.add(item.file_path)
293 if item.old_size is not None:
294 section.before_size += item.old_size
295 if item.new_size is not None:
296 section.after_size += item.new_size
297 bucket = section.symbols_by_path.setdefault(item.file_path, [])
298 bucket.append((item.symbol_name, item.symbol_type,
299 item.ExtractSymbolDelta()))
301 total_change = sum(s.after_size - s.before_size for s in sections)
302 summary = 'Total change: %s bytes' % DeltaStr(total_change)
303 print(summary)
304 print('=' * len(summary))
305 for section in sections:
306 if not section.symbols:
307 continue
308 if section.before_size == 0:
309 description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
310 elif section.after_size == 0:
311 description = ('removed, totalling %s bytes' %
312 DeltaStr(-section.before_size))
313 else:
314 if section.after_size > section.before_size:
315 type_str = 'grown'
316 else:
317 type_str = 'shrunk'
318 description = ('%s, for a net change of %s bytes '
319 '(%d bytes before, %d bytes after)' %
320 (type_str, DeltaStr(section.after_size - section.before_size),
321 section.before_size, section.after_size))
322 print(' %d %s across %d sources' %
323 (len(section.symbols), description, len(section.sources)))
325 maybe_unchanged_sources = set()
326 unchanged_symbols_size = 0
327 for item in unchanged:
328 maybe_unchanged_sources.add(item.file_path)
329 unchanged_symbols_size += item.old_size # == item.new_size
330 print(' %d unchanged, totalling %d bytes' %
331 (len(unchanged), unchanged_symbols_size))
333 # High level analysis, always output.
334 unchanged_sources = maybe_unchanged_sources
335 for section in sections:
336 unchanged_sources = unchanged_sources - section.sources
337 new_sources = (new_symbols.sources -
338 maybe_unchanged_sources -
339 removed_symbols.sources)
340 removed_sources = (removed_symbols.sources -
341 maybe_unchanged_sources -
342 new_symbols.sources)
343 partially_changed_sources = (grown_symbols.sources |
344 shrunk_symbols.sources | new_symbols.sources |
345 removed_symbols.sources) - removed_sources - new_sources
346 allFiles = set()
347 for section in sections:
348 allFiles = allFiles | section.sources
349 allFiles = allFiles | maybe_unchanged_sources
350 print 'Source stats:'
351 print(' %d sources encountered.' % len(allFiles))
352 print(' %d completely new.' % len(new_sources))
353 print(' %d removed completely.' % len(removed_sources))
354 print(' %d partially changed.' % len(partially_changed_sources))
355 print(' %d completely unchanged.' % len(unchanged_sources))
356 remainder = (allFiles - new_sources - removed_sources -
357 partially_changed_sources - unchanged_sources)
358 assert len(remainder) == 0
360 if not showsources:
361 return # Per-source analysis, only if requested
362 print 'Per-source Analysis:'
363 delta_by_path = {}
364 for section in sections:
365 for path in section.symbols_by_path:
366 entry = delta_by_path.get(path)
367 if not entry:
368 entry = {'plus': 0, 'minus': 0}
369 delta_by_path[path] = entry
370 for symbol_name, symbol_type, symbol_delta in \
371 section.symbols_by_path[path]:
372 if symbol_delta.old_size is None:
373 delta = symbol_delta.new_size
374 elif symbol_delta.new_size is None:
375 delta = -symbol_delta.old_size
376 else:
377 delta = symbol_delta.new_size - symbol_delta.old_size
379 if delta > 0:
380 entry['plus'] += delta
381 else:
382 entry['minus'] += (-1 * delta)
384 def delta_sort_key(item):
385 _path, size_data = item
386 growth = size_data['plus'] - size_data['minus']
387 return growth
389 for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
390 reverse=True):
391 gain = size_data['plus']
392 loss = size_data['minus']
393 delta = size_data['plus'] - size_data['minus']
394 header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
395 path, gain, loss)
396 divider = '-' * len(header)
397 print ''
398 print divider
399 print header
400 print divider
401 if showsymbols:
402 def ExtractNewSize(tup):
403 symbol_delta = tup[2]
404 return symbol_delta.new_size
405 def ExtractOldSize(tup):
406 symbol_delta = tup[2]
407 return symbol_delta.old_size
408 if path in new_symbols.symbols_by_path:
409 print ' New symbols:'
410 for symbol_name, symbol_type, symbol_delta in \
411 sorted(new_symbols.symbols_by_path[path],
412 key=ExtractNewSize,
413 reverse=True):
414 print (' %8s: %s type=%s, size=%d bytes%s' %
415 (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,
416 symbol_delta.new_size, SharedInfoStr(symbol_delta)))
417 if path in removed_symbols.symbols_by_path:
418 print ' Removed symbols:'
419 for symbol_name, symbol_type, symbol_delta in \
420 sorted(removed_symbols.symbols_by_path[path],
421 key=ExtractOldSize):
422 print (' %8s: %s type=%s, size=%d bytes%s' %
423 (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,
424 symbol_delta.old_size,
425 SharedInfoStr(symbol_delta)))
426 for (changed_symbols_by_path, type_str) in [
427 (grown_symbols.symbols_by_path, "Grown"),
428 (shrunk_symbols.symbols_by_path, "Shrunk")]:
429 if path in changed_symbols_by_path:
430 print ' %s symbols:' % type_str
431 def changed_symbol_sortkey(item):
432 symbol_name, _symbol_type, symbol_delta = item
433 return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)
434 for symbol_name, symbol_type, symbol_delta in \
435 sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
436 print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
437 % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),
438 symbol_name, symbol_type,
439 symbol_delta.old_size, symbol_delta.new_size,
440 SharedInfoStr(symbol_delta)))
443 def main():
444 usage = """%prog [options]
446 Analyzes the symbolic differences between two binary files
447 (typically, not necessarily, two different builds of the same
448 library) and produces a detailed description of symbols that have
449 been added, removed, or whose size has changed.
451 Example:
452 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
454 Options are available via '--help'.
456 parser = optparse.OptionParser(usage=usage)
457 parser.add_option('--nm1', metavar='PATH',
458 help='the nm dump of the first library')
459 parser.add_option('--nm2', metavar='PATH',
460 help='the nm dump of the second library')
461 parser.add_option('--showsources', action='store_true', default=False,
462 help='show per-source statistics')
463 parser.add_option('--showsymbols', action='store_true', default=False,
464 help='show all symbol information; implies --showsources')
465 parser.add_option('--verbose', action='store_true', default=False,
466 help='output internal debugging stuff')
467 opts, _args = parser.parse_args()
469 if not opts.nm1:
470 parser.error('--nm1 is required')
471 if not opts.nm2:
472 parser.error('--nm2 is required')
473 symbols = []
474 for path in [opts.nm1, opts.nm2]:
475 with file(path, 'r') as nm_input:
476 if opts.verbose:
477 print 'parsing ' + path + '...'
478 symbols.append(list(binary_size_utils.ParseNm(nm_input)))
479 (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
480 CrunchStats(added, removed, changed, unchanged,
481 opts.showsources | opts.showsymbols, opts.showsymbols)
483 if __name__ == '__main__':
484 sys.exit(main())