2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Describe the size difference of two binaries.
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
17 # obtain symbol data from first binary in /tmp/nm1.dump
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
25 # obtain symbol data from second binary in /tmp/nm2.dump
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
33 # cleanup useless files
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
44 import binary_size_utils
47 def Compare(symbols1
, symbols2
):
48 """Executes a comparison of the symbols in symbols1 and symbols2.
51 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
56 unchanged
= [] # tuples
60 # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
61 for cache
, symbols
in ((cache1
, symbols1
), (cache2
, symbols2
)):
62 for symbol_name
, symbol_type
, symbol_size
, file_path
in symbols
:
63 if 'vtable for ' in symbol_name
:
64 symbol_type
= '@' # hack to categorize these separately
66 file_path
= os
.path
.normpath(file_path
)
68 file_path
= '(No Path)'
69 key
= (file_path
, symbol_type
)
70 bucket
= cache
.setdefault(key
, {})
71 bucket
[symbol_name
] = symbol_size
73 # Now diff them. We iterate over the elements in cache1. For each symbol
74 # that we find in cache2, we record whether it was deleted, changed, or
75 # unchanged. We then remove it from cache2; all the symbols that remain
76 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
77 for key
, bucket1
in cache1
.items():
78 bucket2
= cache2
.get(key
)
80 # A file was removed. Everything in bucket1 is dead.
81 for symbol_name
, symbol_size
in bucket1
.items():
82 removed
.append((key
[0], key
[1], symbol_name
, symbol_size
, None))
84 # File still exists, look for changes within.
85 for symbol_name
, symbol_size
in bucket1
.items():
86 size2
= bucket2
.get(symbol_name
)
88 # Symbol no longer exists in bucket2.
89 removed
.append((key
[0], key
[1], symbol_name
, symbol_size
, None))
91 del bucket2
[symbol_name
] # Symbol is not new, delete from cache2.
93 del cache1
[key
] # Entire bucket is empty, delete from cache2
94 if symbol_size
!= size2
:
95 # Symbol has change size in bucket.
96 changed
.append((key
[0], key
[1], symbol_name
, symbol_size
, size2
))
98 # Symbol is unchanged.
99 unchanged
.append((key
[0], key
[1], symbol_name
, symbol_size
, size2
))
101 # We have now analyzed all symbols that are in cache1 and removed all of
102 # the encountered symbols from cache2. What's left in cache2 is the new
104 for key
, bucket2
in cache2
.iteritems():
105 for symbol_name
, symbol_size
in bucket2
.items():
106 added
.append((key
[0], key
[1], symbol_name
, None, symbol_size
))
107 return (added
, removed
, changed
, unchanged
)
110 def CrunchStats(added
, removed
, changed
, unchanged
, showsources
, showsymbols
):
111 """Outputs to stdout a summary of changes based on the symbol lists."""
112 print 'Symbol statistics:'
113 sources_with_new_symbols
= set()
115 new_symbols_by_path
= {}
116 for file_path
, symbol_type
, symbol_name
, size1
, size2
in added
:
117 sources_with_new_symbols
.add(file_path
)
118 new_symbols_size
+= size2
119 bucket
= new_symbols_by_path
.setdefault(file_path
, [])
120 bucket
.append((symbol_name
, symbol_type
, None, size2
))
121 print(' %d added, totalling %d bytes across %d sources' %
122 (len(added
), new_symbols_size
, len(sources_with_new_symbols
)))
124 sources_with_removed_symbols
= set()
125 removed_symbols_size
= 0
126 removed_symbols_by_path
= {}
127 for file_path
, symbol_type
, symbol_name
, size1
, size2
in removed
:
128 sources_with_removed_symbols
.add(file_path
)
129 removed_symbols_size
+= size1
130 bucket
= removed_symbols_by_path
.setdefault(file_path
, [])
131 bucket
.append((symbol_name
, symbol_type
, size1
, None))
132 print(' %d removed, totalling %d bytes removed across %d sources' %
133 (len(removed
), removed_symbols_size
, len(sources_with_removed_symbols
)))
135 sources_with_changed_symbols
= set()
138 changed_symbols_by_path
= {}
139 for file_path
, symbol_type
, symbol_name
, size1
, size2
in changed
:
140 sources_with_changed_symbols
.add(file_path
)
143 bucket
= changed_symbols_by_path
.setdefault(file_path
, [])
144 bucket
.append((symbol_name
, symbol_type
, size1
, size2
))
145 print(' %d changed, resulting in a net change of %d bytes '
146 '(%d bytes before, %d bytes after) across %d sources' %
147 (len(changed
), (after_size
- before_size
), before_size
, after_size
,
148 len(sources_with_changed_symbols
)))
150 maybe_unchanged_sources
= set()
151 unchanged_symbols_size
= 0
152 for file_path
, symbol_type
, symbol_name
, size1
, size2
in unchanged
:
153 maybe_unchanged_sources
.add(file_path
)
154 unchanged_symbols_size
+= size1
# == size2
155 print(' %d unchanged, totalling %d bytes' %
156 (len(unchanged
), unchanged_symbols_size
))
158 # High level analysis, always output.
159 unchanged_sources
= (maybe_unchanged_sources
-
160 sources_with_changed_symbols
-
161 sources_with_removed_symbols
-
162 sources_with_new_symbols
)
163 new_sources
= (sources_with_new_symbols
-
164 maybe_unchanged_sources
-
165 sources_with_removed_symbols
)
166 removed_sources
= (sources_with_removed_symbols
-
167 maybe_unchanged_sources
-
168 sources_with_new_symbols
)
169 partially_changed_sources
= (sources_with_changed_symbols |
170 sources_with_new_symbols |
171 sources_with_removed_symbols
) - removed_sources
- new_sources
172 allFiles
= (sources_with_new_symbols |
173 sources_with_removed_symbols |
174 sources_with_changed_symbols |
175 maybe_unchanged_sources
)
176 print 'Source stats:'
177 print(' %d sources encountered.' % len(allFiles
))
178 print(' %d completely new.' % len(new_sources
))
179 print(' %d removed completely.' % len(removed_sources
))
180 print(' %d partially changed.' % len(partially_changed_sources
))
181 print(' %d completely unchanged.' % len(unchanged_sources
))
182 remainder
= (allFiles
- new_sources
- removed_sources
-
183 partially_changed_sources
- unchanged_sources
)
184 assert len(remainder
) == 0
187 return # Per-source analysis, only if requested
188 print 'Per-source Analysis:'
190 for path
in new_symbols_by_path
:
191 entry
= delta_by_path
.get(path
)
193 entry
= {'plus': 0, 'minus': 0}
194 delta_by_path
[path
] = entry
195 for symbol_name
, symbol_type
, size1
, size2
in new_symbols_by_path
[path
]:
196 entry
['plus'] += size2
197 for path
in removed_symbols_by_path
:
198 entry
= delta_by_path
.get(path
)
200 entry
= {'plus': 0, 'minus': 0}
201 delta_by_path
[path
] = entry
202 for symbol_name
, symbol_type
, size1
, size2
in removed_symbols_by_path
[path
]:
203 entry
['minus'] += size1
204 for path
in changed_symbols_by_path
:
205 entry
= delta_by_path
.get(path
)
207 entry
= {'plus': 0, 'minus': 0}
208 delta_by_path
[path
] = entry
209 for symbol_name
, symbol_type
, size1
, size2
in changed_symbols_by_path
[path
]:
210 delta
= size2
- size1
212 entry
['plus'] += delta
214 entry
['minus'] += (-1 * delta
)
216 for path
in sorted(delta_by_path
):
217 print ' Source: ' + path
218 size_data
= delta_by_path
[path
]
219 gain
= size_data
['plus']
220 loss
= size_data
['minus']
221 delta
= size_data
['plus'] - size_data
['minus']
222 print (' Change: %d bytes (gained %d, lost %d)' % (delta
, gain
, loss
))
224 if path
in new_symbols_by_path
:
225 print ' New symbols:'
226 for symbol_name
, symbol_type
, size1
, size2
in \
227 new_symbols_by_path
[path
]:
228 print (' %s type=%s, size=%d bytes' %
229 (symbol_name
, symbol_type
, size2
))
230 if path
in removed_symbols_by_path
:
231 print ' Removed symbols:'
232 for symbol_name
, symbol_type
, size1
, size2
in \
233 removed_symbols_by_path
[path
]:
234 print (' %s type=%s, size=%d bytes' %
235 (symbol_name
, symbol_type
, size1
))
236 if path
in changed_symbols_by_path
:
237 print ' Changed symbols:'
239 symbol_name
, _symbol_type
, size1
, size2
= item
240 return (size1
- size2
, symbol_name
)
241 for symbol_name
, symbol_type
, size1
, size2
in \
242 sorted(changed_symbols_by_path
[path
], key
=sortkey
):
243 print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)'
244 % (symbol_name
, symbol_type
, (size2
- size1
), size1
, size2
))
248 usage
= """%prog [options]
250 Analyzes the symbolic differences between two binary files
251 (typically, not necessarily, two different builds of the same
252 library) and produces a detailed description of symbols that have
253 been added, removed, or whose size has changed.
256 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
258 Options are available via '--help'.
260 parser
= optparse
.OptionParser(usage
=usage
)
261 parser
.add_option('--nm1', metavar
='PATH',
262 help='the nm dump of the first library')
263 parser
.add_option('--nm2', metavar
='PATH',
264 help='the nm dump of the second library')
265 parser
.add_option('--showsources', action
='store_true', default
=False,
266 help='show per-source statistics')
267 parser
.add_option('--showsymbols', action
='store_true', default
=False,
268 help='show all symbol information; implies --showfiles')
269 parser
.add_option('--verbose', action
='store_true', default
=False,
270 help='output internal debugging stuff')
271 opts
, _args
= parser
.parse_args()
274 parser
.error('--nm1 is required')
276 parser
.error('--nm2 is required')
278 for path
in [opts
.nm1
, opts
.nm2
]:
279 with
file(path
, 'r') as nm_input
:
281 print 'parsing ' + path
+ '...'
282 symbols
.append(list(binary_size_utils
.ParseNm(nm_input
)))
283 (added
, removed
, changed
, unchanged
) = Compare(symbols
[0], symbols
[1])
284 CrunchStats(added
, removed
, changed
, unchanged
,
285 opts
.showsources | opts
.showsymbols
, opts
.showsymbols
)
287 if __name__
== '__main__':