Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / tools / binary_size / explain_binary_size_delta.py
blobcb99fe6cc93981b3bec3ecff66e8ea0cb1a9fa07
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Describe the size difference of two binaries.
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
15 dumps. Example:
17 # obtain symbol data from first binary in /tmp/nm1.dump
18 cd $CHECKOUT1_SRC
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
25 # obtain symbol data from second binary in /tmp/nm2.dump
26 cd $CHECKOUT2_SRC
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
33 # cleanup useless files
34 rm -r /tmp/throwaway
36 # run this tool
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
38 """
40 import collections
41 import operator
42 import optparse
43 import os
44 import sys
46 import binary_size_utils
49 def Compare(symbols1, symbols2):
50 """Executes a comparison of the symbols in symbols1 and symbols2.
52 Returns:
53 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
54 """
55 added = [] # tuples
56 removed = [] # tuples
57 changed = [] # tuples
58 unchanged = [] # tuples
60 cache1 = {}
61 cache2 = {}
62 # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
63 for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
64 for symbol_name, symbol_type, symbol_size, file_path in symbols:
65 if 'vtable for ' in symbol_name:
66 symbol_type = '@' # hack to categorize these separately
67 if file_path:
68 file_path = os.path.normpath(file_path)
69 else:
70 file_path = '(No Path)'
71 key = (file_path, symbol_type)
72 bucket = cache.setdefault(key, {})
73 size_list = bucket.setdefault(symbol_name, [])
74 size_list.append(symbol_size)
76 # Now diff them. We iterate over the elements in cache1. For each symbol
77 # that we find in cache2, we record whether it was deleted, changed, or
78 # unchanged. We then remove it from cache2; all the symbols that remain
79 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
80 for key, bucket1 in cache1.items():
81 bucket2 = cache2.get(key)
82 if not bucket2:
83 # A file was removed. Everything in bucket1 is dead.
84 for symbol_name, symbol_size_list in bucket1.items():
85 for symbol_size in symbol_size_list:
86 removed.append((key[0], key[1], symbol_name, symbol_size, None))
87 else:
88 # File still exists, look for changes within.
89 for symbol_name, symbol_size_list in bucket1.items():
90 size_list2 = bucket2.get(symbol_name)
91 if size_list2 is None:
92 # Symbol no longer exists in bucket2.
93 for symbol_size in symbol_size_list:
94 removed.append((key[0], key[1], symbol_name, symbol_size, None))
95 else:
96 del bucket2[symbol_name] # Symbol is not new, delete from cache2.
97 if len(symbol_size_list) == 1 and len(size_list2) == 1:
98 symbol_size = symbol_size_list[0]
99 size2 = size_list2[0]
100 if symbol_size != size2:
101 # Symbol has change size in bucket.
102 changed.append((key[0], key[1], symbol_name, symbol_size, size2))
103 else:
104 # Symbol is unchanged.
105 unchanged.append((key[0], key[1], symbol_name, symbol_size,
106 size2))
107 else:
108 # Complex comparison for when a symbol exists multiple times
109 # in the same file (where file can be "unknown file").
110 symbol_size_counter = collections.Counter(symbol_size_list)
111 delta_counter = collections.Counter(symbol_size_list)
112 delta_counter.subtract(size_list2)
113 for symbol_size in sorted(delta_counter.keys()):
114 delta = delta_counter[symbol_size]
115 unchanged_count = symbol_size_counter[symbol_size]
116 if delta > 0:
117 unchanged_count -= delta
118 for _ in range(unchanged_count):
119 unchanged.append((key[0], key[1], symbol_name, symbol_size,
120 symbol_size))
121 if delta > 0: # Used to be more of these than there is now.
122 for _ in range(delta):
123 removed.append((key[0], key[1], symbol_name, symbol_size,
124 None))
125 elif delta < 0: # More of this (symbol,size) now.
126 for _ in range(-delta):
127 added.append((key[0], key[1], symbol_name, None, symbol_size))
129 if len(bucket2) == 0:
130 del cache1[key] # Entire bucket is empty, delete from cache2
132 # We have now analyzed all symbols that are in cache1 and removed all of
133 # the encountered symbols from cache2. What's left in cache2 is the new
134 # symbols.
135 for key, bucket2 in cache2.iteritems():
136 for symbol_name, symbol_size_list in bucket2.items():
137 for symbol_size in symbol_size_list:
138 added.append((key[0], key[1], symbol_name, None, symbol_size))
139 return (added, removed, changed, unchanged)
141 def DeltaStr(number):
142 """Returns the number as a string with a '+' prefix if it's > 0 and
143 a '-' prefix if it's < 0."""
144 result = str(number)
145 if number > 0:
146 result = '+' + result
147 return result
150 class CrunchStatsData(object):
151 """Stores a summary of data of a certain kind."""
152 def __init__(self, symbols):
153 self.symbols = symbols
154 self.sources = set()
155 self.before_size = 0
156 self.after_size = 0
157 self.symbols_by_path = {}
160 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
161 """Outputs to stdout a summary of changes based on the symbol lists."""
162 # Split changed into grown and shrunk because that is easier to
163 # discuss.
164 grown = []
165 shrunk = []
166 for item in changed:
167 file_path, symbol_type, symbol_name, size1, size2 = item
168 if size1 < size2:
169 grown.append(item)
170 else:
171 shrunk.append(item)
173 new_symbols = CrunchStatsData(added)
174 removed_symbols = CrunchStatsData(removed)
175 grown_symbols = CrunchStatsData(grown)
176 shrunk_symbols = CrunchStatsData(shrunk)
177 sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
178 for section in sections:
179 for file_path, symbol_type, symbol_name, size1, size2 in section.symbols:
180 section.sources.add(file_path)
181 if size1 is not None:
182 section.before_size += size1
183 if size2 is not None:
184 section.after_size += size2
185 bucket = section.symbols_by_path.setdefault(file_path, [])
186 bucket.append((symbol_name, symbol_type, size1, size2))
188 total_change = sum(s.after_size - s.before_size for s in sections)
189 summary = 'Total change: %s bytes' % DeltaStr(total_change)
190 print(summary)
191 print('=' * len(summary))
192 for section in sections:
193 if not section.symbols:
194 continue
195 if section.before_size == 0:
196 description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
197 elif section.after_size == 0:
198 description = ('removed, totalling %s bytes' %
199 DeltaStr(-section.before_size))
200 else:
201 if section.after_size > section.before_size:
202 type_str = 'grown'
203 else:
204 type_str = 'shrunk'
205 description = ('%s, for a net change of %s bytes '
206 '(%d bytes before, %d bytes after)' %
207 (type_str, DeltaStr(section.after_size - section.before_size),
208 section.before_size, section.after_size))
209 print(' %d %s across %d sources' %
210 (len(section.symbols), description, len(section.sources)))
212 maybe_unchanged_sources = set()
213 unchanged_symbols_size = 0
214 for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
215 maybe_unchanged_sources.add(file_path)
216 unchanged_symbols_size += size1 # == size2
217 print(' %d unchanged, totalling %d bytes' %
218 (len(unchanged), unchanged_symbols_size))
220 # High level analysis, always output.
221 unchanged_sources = maybe_unchanged_sources
222 for section in sections:
223 unchanged_sources = unchanged_sources - section.sources
224 new_sources = (new_symbols.sources -
225 maybe_unchanged_sources -
226 removed_symbols.sources)
227 removed_sources = (removed_symbols.sources -
228 maybe_unchanged_sources -
229 new_symbols.sources)
230 partially_changed_sources = (grown_symbols.sources |
231 shrunk_symbols.sources | new_symbols.sources |
232 removed_symbols.sources) - removed_sources - new_sources
233 allFiles = set()
234 for section in sections:
235 allFiles = allFiles | section.sources
236 allFiles = allFiles | maybe_unchanged_sources
237 print 'Source stats:'
238 print(' %d sources encountered.' % len(allFiles))
239 print(' %d completely new.' % len(new_sources))
240 print(' %d removed completely.' % len(removed_sources))
241 print(' %d partially changed.' % len(partially_changed_sources))
242 print(' %d completely unchanged.' % len(unchanged_sources))
243 remainder = (allFiles - new_sources - removed_sources -
244 partially_changed_sources - unchanged_sources)
245 assert len(remainder) == 0
247 if not showsources:
248 return # Per-source analysis, only if requested
249 print 'Per-source Analysis:'
250 delta_by_path = {}
251 for section in sections:
252 for path in section.symbols_by_path:
253 entry = delta_by_path.get(path)
254 if not entry:
255 entry = {'plus': 0, 'minus': 0}
256 delta_by_path[path] = entry
257 for symbol_name, symbol_type, size1, size2 in \
258 section.symbols_by_path[path]:
259 if size1 is None:
260 delta = size2
261 elif size2 is None:
262 delta = -size1
263 else:
264 delta = size2 - size1
266 if delta > 0:
267 entry['plus'] += delta
268 else:
269 entry['minus'] += (-1 * delta)
271 def delta_sort_key(item):
272 _path, size_data = item
273 growth = size_data['plus'] - size_data['minus']
274 return growth
276 for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
277 reverse=True):
278 gain = size_data['plus']
279 loss = size_data['minus']
280 delta = size_data['plus'] - size_data['minus']
281 header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
282 path, gain, loss)
283 divider = '-' * len(header)
284 print ''
285 print divider
286 print header
287 print divider
288 if showsymbols:
289 if path in new_symbols.symbols_by_path:
290 print ' New symbols:'
291 for symbol_name, symbol_type, size1, size2 in \
292 sorted(new_symbols.symbols_by_path[path],
293 key=operator.itemgetter(3),
294 reverse=True):
295 print (' %8s: %s type=%s, size=%d bytes' %
296 (DeltaStr(size2), symbol_name, symbol_type, size2))
297 if path in removed_symbols.symbols_by_path:
298 print ' Removed symbols:'
299 for symbol_name, symbol_type, size1, size2 in \
300 sorted(removed_symbols.symbols_by_path[path],
301 key=operator.itemgetter(2)):
302 print (' %8s: %s type=%s, size=%d bytes' %
303 (DeltaStr(-size1), symbol_name, symbol_type, size1))
304 for (changed_symbols_by_path, type_str) in [
305 (grown_symbols.symbols_by_path, "Grown"),
306 (shrunk_symbols.symbols_by_path, "Shrunk")]:
307 if path in changed_symbols_by_path:
308 print ' %s symbols:' % type_str
309 def changed_symbol_sortkey(item):
310 symbol_name, _symbol_type, size1, size2 = item
311 return (size1 - size2, symbol_name)
312 for symbol_name, symbol_type, size1, size2 in \
313 sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
314 print (' %8s: %s type=%s, (was %d bytes, now %d bytes)'
315 % (DeltaStr(size2 - size1), symbol_name,
316 symbol_type, size1, size2))
319 def main():
320 usage = """%prog [options]
322 Analyzes the symbolic differences between two binary files
323 (typically, not necessarily, two different builds of the same
324 library) and produces a detailed description of symbols that have
325 been added, removed, or whose size has changed.
327 Example:
328 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
330 Options are available via '--help'.
332 parser = optparse.OptionParser(usage=usage)
333 parser.add_option('--nm1', metavar='PATH',
334 help='the nm dump of the first library')
335 parser.add_option('--nm2', metavar='PATH',
336 help='the nm dump of the second library')
337 parser.add_option('--showsources', action='store_true', default=False,
338 help='show per-source statistics')
339 parser.add_option('--showsymbols', action='store_true', default=False,
340 help='show all symbol information; implies --showfiles')
341 parser.add_option('--verbose', action='store_true', default=False,
342 help='output internal debugging stuff')
343 opts, _args = parser.parse_args()
345 if not opts.nm1:
346 parser.error('--nm1 is required')
347 if not opts.nm2:
348 parser.error('--nm2 is required')
349 symbols = []
350 for path in [opts.nm1, opts.nm2]:
351 with file(path, 'r') as nm_input:
352 if opts.verbose:
353 print 'parsing ' + path + '...'
354 symbols.append(list(binary_size_utils.ParseNm(nm_input)))
355 (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
356 CrunchStats(added, removed, changed, unchanged,
357 opts.showsources | opts.showsymbols, opts.showsymbols)
359 if __name__ == '__main__':
360 sys.exit(main())