2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Describe the size difference of two binaries.
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
17 # obtain symbol data from first binary in /tmp/nm1.dump
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
25 # obtain symbol data from second binary in /tmp/nm2.dump
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
33 # cleanup useless files
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
46 import binary_size_utils
49 def Compare(symbols1
, symbols2
):
50 """Executes a comparison of the symbols in symbols1 and symbols2.
53 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
58 unchanged
= [] # tuples
62 # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
63 for cache
, symbols
in ((cache1
, symbols1
), (cache2
, symbols2
)):
64 for symbol_name
, symbol_type
, symbol_size
, file_path
in symbols
:
65 if 'vtable for ' in symbol_name
:
66 symbol_type
= '@' # hack to categorize these separately
68 file_path
= os
.path
.normpath(file_path
)
69 if sys
.platform
.startswith('win'):
70 file_path
= file_path
.replace('\\', '/')
72 file_path
= '(No Path)'
73 key
= (file_path
, symbol_type
)
74 bucket
= cache
.setdefault(key
, {})
75 size_list
= bucket
.setdefault(symbol_name
, [])
76 size_list
.append(symbol_size
)
78 # Now diff them. We iterate over the elements in cache1. For each symbol
79 # that we find in cache2, we record whether it was deleted, changed, or
80 # unchanged. We then remove it from cache2; all the symbols that remain
81 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
82 for key
, bucket1
in cache1
.items():
83 bucket2
= cache2
.get(key
)
85 # A file was removed. Everything in bucket1 is dead.
86 for symbol_name
, symbol_size_list
in bucket1
.items():
87 for symbol_size
in symbol_size_list
:
88 removed
.append((key
[0], key
[1], symbol_name
, symbol_size
, None))
90 # File still exists, look for changes within.
91 for symbol_name
, symbol_size_list
in bucket1
.items():
92 size_list2
= bucket2
.get(symbol_name
)
93 if size_list2
is None:
94 # Symbol no longer exists in bucket2.
95 for symbol_size
in symbol_size_list
:
96 removed
.append((key
[0], key
[1], symbol_name
, symbol_size
, None))
98 del bucket2
[symbol_name
] # Symbol is not new, delete from cache2.
99 if len(symbol_size_list
) == 1 and len(size_list2
) == 1:
100 symbol_size
= symbol_size_list
[0]
101 size2
= size_list2
[0]
102 if symbol_size
!= size2
:
103 # Symbol has change size in bucket.
104 changed
.append((key
[0], key
[1], symbol_name
, symbol_size
, size2
))
106 # Symbol is unchanged.
107 unchanged
.append((key
[0], key
[1], symbol_name
, symbol_size
,
110 # Complex comparison for when a symbol exists multiple times
111 # in the same file (where file can be "unknown file").
112 symbol_size_counter
= collections
.Counter(symbol_size_list
)
113 delta_counter
= collections
.Counter(symbol_size_list
)
114 delta_counter
.subtract(size_list2
)
115 for symbol_size
in sorted(delta_counter
.keys()):
116 delta
= delta_counter
[symbol_size
]
117 unchanged_count
= symbol_size_counter
[symbol_size
]
119 unchanged_count
-= delta
120 for _
in range(unchanged_count
):
121 unchanged
.append((key
[0], key
[1], symbol_name
, symbol_size
,
123 if delta
> 0: # Used to be more of these than there is now.
124 for _
in range(delta
):
125 removed
.append((key
[0], key
[1], symbol_name
, symbol_size
,
127 elif delta
< 0: # More of this (symbol,size) now.
128 for _
in range(-delta
):
129 added
.append((key
[0], key
[1], symbol_name
, None, symbol_size
))
131 if len(bucket2
) == 0:
132 del cache1
[key
] # Entire bucket is empty, delete from cache2
134 # We have now analyzed all symbols that are in cache1 and removed all of
135 # the encountered symbols from cache2. What's left in cache2 is the new
137 for key
, bucket2
in cache2
.iteritems():
138 for symbol_name
, symbol_size_list
in bucket2
.items():
139 for symbol_size
in symbol_size_list
:
140 added
.append((key
[0], key
[1], symbol_name
, None, symbol_size
))
141 return (added
, removed
, changed
, unchanged
)
143 def DeltaStr(number
):
144 """Returns the number as a string with a '+' prefix if it's > 0 and
145 a '-' prefix if it's < 0."""
148 result
= '+' + result
152 class CrunchStatsData(object):
153 """Stores a summary of data of a certain kind."""
154 def __init__(self
, symbols
):
155 self
.symbols
= symbols
159 self
.symbols_by_path
= {}
162 def CrunchStats(added
, removed
, changed
, unchanged
, showsources
, showsymbols
):
163 """Outputs to stdout a summary of changes based on the symbol lists."""
164 # Split changed into grown and shrunk because that is easier to
169 file_path
, symbol_type
, symbol_name
, size1
, size2
= item
175 new_symbols
= CrunchStatsData(added
)
176 removed_symbols
= CrunchStatsData(removed
)
177 grown_symbols
= CrunchStatsData(grown
)
178 shrunk_symbols
= CrunchStatsData(shrunk
)
179 sections
= [new_symbols
, removed_symbols
, grown_symbols
, shrunk_symbols
]
180 for section
in sections
:
181 for file_path
, symbol_type
, symbol_name
, size1
, size2
in section
.symbols
:
182 section
.sources
.add(file_path
)
183 if size1
is not None:
184 section
.before_size
+= size1
185 if size2
is not None:
186 section
.after_size
+= size2
187 bucket
= section
.symbols_by_path
.setdefault(file_path
, [])
188 bucket
.append((symbol_name
, symbol_type
, size1
, size2
))
190 total_change
= sum(s
.after_size
- s
.before_size
for s
in sections
)
191 summary
= 'Total change: %s bytes' % DeltaStr(total_change
)
193 print('=' * len(summary
))
194 for section
in sections
:
195 if not section
.symbols
:
197 if section
.before_size
== 0:
198 description
= ('added, totalling %s bytes' % DeltaStr(section
.after_size
))
199 elif section
.after_size
== 0:
200 description
= ('removed, totalling %s bytes' %
201 DeltaStr(-section
.before_size
))
203 if section
.after_size
> section
.before_size
:
207 description
= ('%s, for a net change of %s bytes '
208 '(%d bytes before, %d bytes after)' %
209 (type_str
, DeltaStr(section
.after_size
- section
.before_size
),
210 section
.before_size
, section
.after_size
))
211 print(' %d %s across %d sources' %
212 (len(section
.symbols
), description
, len(section
.sources
)))
214 maybe_unchanged_sources
= set()
215 unchanged_symbols_size
= 0
216 for file_path
, symbol_type
, symbol_name
, size1
, size2
in unchanged
:
217 maybe_unchanged_sources
.add(file_path
)
218 unchanged_symbols_size
+= size1
# == size2
219 print(' %d unchanged, totalling %d bytes' %
220 (len(unchanged
), unchanged_symbols_size
))
222 # High level analysis, always output.
223 unchanged_sources
= maybe_unchanged_sources
224 for section
in sections
:
225 unchanged_sources
= unchanged_sources
- section
.sources
226 new_sources
= (new_symbols
.sources
-
227 maybe_unchanged_sources
-
228 removed_symbols
.sources
)
229 removed_sources
= (removed_symbols
.sources
-
230 maybe_unchanged_sources
-
232 partially_changed_sources
= (grown_symbols
.sources |
233 shrunk_symbols
.sources | new_symbols
.sources |
234 removed_symbols
.sources
) - removed_sources
- new_sources
236 for section
in sections
:
237 allFiles
= allFiles | section
.sources
238 allFiles
= allFiles | maybe_unchanged_sources
239 print 'Source stats:'
240 print(' %d sources encountered.' % len(allFiles
))
241 print(' %d completely new.' % len(new_sources
))
242 print(' %d removed completely.' % len(removed_sources
))
243 print(' %d partially changed.' % len(partially_changed_sources
))
244 print(' %d completely unchanged.' % len(unchanged_sources
))
245 remainder
= (allFiles
- new_sources
- removed_sources
-
246 partially_changed_sources
- unchanged_sources
)
247 assert len(remainder
) == 0
250 return # Per-source analysis, only if requested
251 print 'Per-source Analysis:'
253 for section
in sections
:
254 for path
in section
.symbols_by_path
:
255 entry
= delta_by_path
.get(path
)
257 entry
= {'plus': 0, 'minus': 0}
258 delta_by_path
[path
] = entry
259 for symbol_name
, symbol_type
, size1
, size2
in \
260 section
.symbols_by_path
[path
]:
266 delta
= size2
- size1
269 entry
['plus'] += delta
271 entry
['minus'] += (-1 * delta
)
273 def delta_sort_key(item
):
274 _path
, size_data
= item
275 growth
= size_data
['plus'] - size_data
['minus']
278 for path
, size_data
in sorted(delta_by_path
.iteritems(), key
=delta_sort_key
,
280 gain
= size_data
['plus']
281 loss
= size_data
['minus']
282 delta
= size_data
['plus'] - size_data
['minus']
283 header
= ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta
),
285 divider
= '-' * len(header
)
291 if path
in new_symbols
.symbols_by_path
:
292 print ' New symbols:'
293 for symbol_name
, symbol_type
, size1
, size2
in \
294 sorted(new_symbols
.symbols_by_path
[path
],
295 key
=operator
.itemgetter(3),
297 print (' %8s: %s type=%s, size=%d bytes' %
298 (DeltaStr(size2
), symbol_name
, symbol_type
, size2
))
299 if path
in removed_symbols
.symbols_by_path
:
300 print ' Removed symbols:'
301 for symbol_name
, symbol_type
, size1
, size2
in \
302 sorted(removed_symbols
.symbols_by_path
[path
],
303 key
=operator
.itemgetter(2)):
304 print (' %8s: %s type=%s, size=%d bytes' %
305 (DeltaStr(-size1
), symbol_name
, symbol_type
, size1
))
306 for (changed_symbols_by_path
, type_str
) in [
307 (grown_symbols
.symbols_by_path
, "Grown"),
308 (shrunk_symbols
.symbols_by_path
, "Shrunk")]:
309 if path
in changed_symbols_by_path
:
310 print ' %s symbols:' % type_str
311 def changed_symbol_sortkey(item
):
312 symbol_name
, _symbol_type
, size1
, size2
= item
313 return (size1
- size2
, symbol_name
)
314 for symbol_name
, symbol_type
, size1
, size2
in \
315 sorted(changed_symbols_by_path
[path
], key
=changed_symbol_sortkey
):
316 print (' %8s: %s type=%s, (was %d bytes, now %d bytes)'
317 % (DeltaStr(size2
- size1
), symbol_name
,
318 symbol_type
, size1
, size2
))
322 usage
= """%prog [options]
324 Analyzes the symbolic differences between two binary files
325 (typically, not necessarily, two different builds of the same
326 library) and produces a detailed description of symbols that have
327 been added, removed, or whose size has changed.
330 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
332 Options are available via '--help'.
334 parser
= optparse
.OptionParser(usage
=usage
)
335 parser
.add_option('--nm1', metavar
='PATH',
336 help='the nm dump of the first library')
337 parser
.add_option('--nm2', metavar
='PATH',
338 help='the nm dump of the second library')
339 parser
.add_option('--showsources', action
='store_true', default
=False,
340 help='show per-source statistics')
341 parser
.add_option('--showsymbols', action
='store_true', default
=False,
342 help='show all symbol information; implies --showfiles')
343 parser
.add_option('--verbose', action
='store_true', default
=False,
344 help='output internal debugging stuff')
345 opts
, _args
= parser
.parse_args()
348 parser
.error('--nm1 is required')
350 parser
.error('--nm2 is required')
352 for path
in [opts
.nm1
, opts
.nm2
]:
353 with
file(path
, 'r') as nm_input
:
355 print 'parsing ' + path
+ '...'
356 symbols
.append(list(binary_size_utils
.ParseNm(nm_input
)))
357 (added
, removed
, changed
, unchanged
) = Compare(symbols
[0], symbols
[1])
358 CrunchStats(added
, removed
, changed
, unchanged
,
359 opts
.showsources | opts
.showsymbols
, opts
.showsymbols
)
361 if __name__
== '__main__':