Add ICU message format support
[chromium-blink-merge.git] / tools / linux / dump-static-initializers.py
blobb71d06274821d74d4548f655a85048541de9e820
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Dump functions called by static intializers in a Linux Release binary.
8 Usage example:
9 tools/linux/dump-static-intializers.py out/Release/chrome
11 A brief overview of static initialization:
12 1) the compiler writes out, per object file, a function that contains
13 the static intializers for that file.
14 2) the compiler also writes out a pointer to that function in a special
15 section.
16 3) at link time, the linker concatenates the function pointer sections
17 into a single list of all initializers.
18 4) at run time, on startup the binary runs all function pointers.
20 The functions in (1) all have mangled names of the form
21 _GLOBAL__I_foobar.cc
22 using objdump, we can disassemble those functions and dump all symbols that
23 they reference.
24 """
26 import optparse
27 import re
28 import subprocess
29 import sys
31 # A map of symbol => informative text about it.
32 NOTES = {
33 '__cxa_atexit@plt': 'registers a dtor to run at exit',
34 'std::__ioinit': '#includes <iostream>, use <ostream> instead',
37 # Determine whether this is a git checkout (as opposed to e.g. svn).
38 IS_GIT_WORKSPACE = (subprocess.Popen(
39 ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
41 class Demangler(object):
42 """A wrapper around c++filt to provide a function to demangle symbols."""
43 def __init__(self, toolchain):
44 self.cppfilt = subprocess.Popen([toolchain + 'c++filt'],
45 stdin=subprocess.PIPE,
46 stdout=subprocess.PIPE)
48 def Demangle(self, sym):
49 """Given mangled symbol |sym|, return its demangled form."""
50 self.cppfilt.stdin.write(sym + '\n')
51 return self.cppfilt.stdout.readline().strip()
53 # Matches for example: "cert_logger.pb.cc", capturing "cert_logger".
54 protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
55 def QualifyFilenameAsProto(filename):
56 """Attempt to qualify a bare |filename| with a src-relative path, assuming it
57 is a protoc-generated file. If a single match is found, it is returned.
58 Otherwise the original filename is returned."""
59 if not IS_GIT_WORKSPACE:
60 return filename
61 match = protobuf_filename_re.match(filename)
62 if not match:
63 return filename
64 basename = match.groups(0)
65 gitlsfiles = subprocess.Popen(
66 ['git', 'ls-files', '--', '*/%s.proto' % basename],
67 stdout=subprocess.PIPE)
68 candidate = filename
69 for line in gitlsfiles.stdout:
70 if candidate != filename:
71 return filename # Multiple hits, can't help.
72 candidate = line.strip()
73 return candidate
75 # Regex matching the substring of a symbol's demangled text representation most
76 # likely to appear in a source file.
77 # Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
78 # "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
79 # picks up any ::-qualification and the last fragment picks up a suffix that
80 # starts with an opener.
81 symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
82 def QualifyFilename(filename, symbol):
83 """Given a bare filename and a symbol that occurs in it, attempt to qualify
84 it with a src-relative path. If more than one file matches, return the
85 original filename."""
86 if not IS_GIT_WORKSPACE:
87 return filename
88 match = symbol_code_name_re.match(symbol)
89 if not match:
90 return filename
91 symbol = match.group(1)
92 gitgrep = subprocess.Popen(
93 ['git', 'grep', '-l', symbol, '--', '*/%s' % filename],
94 stdout=subprocess.PIPE)
95 candidate = filename
96 for line in gitgrep.stdout:
97 if candidate != filename: # More than one candidate; return bare filename.
98 return filename
99 candidate = line.strip()
100 return candidate
102 # Regex matching nm output for the symbols we're interested in.
103 # See test_ParseNmLine for examples.
104 nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)')
105 def ParseNmLine(line):
106 """Given a line of nm output, parse static initializers as a
107 (file, start, size) tuple."""
108 match = nm_re.match(line)
109 if match:
110 addr, size, filename = match.groups()
111 return (filename, int(addr, 16), int(size, 16))
114 def test_ParseNmLine():
115 """Verify the nm_re regex matches some sample lines."""
116 parse = ParseNmLine(
117 '0000000001919920 0000000000000008 t '
118 '_ZN12_GLOBAL__I_safe_browsing_service.cc')
119 assert parse == ('safe_browsing_service.cc', 26319136, 8), parse
121 parse = ParseNmLine(
122 '00000000026b9eb0 0000000000000024 t '
123 '_GLOBAL__sub_I_extension_specifics.pb.cc')
124 assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse
126 # Just always run the test; it is fast enough.
127 test_ParseNmLine()
130 def ParseNm(toolchain, binary):
131 """Given a binary, yield static initializers as (file, start, size) tuples."""
132 nm = subprocess.Popen([toolchain + 'nm', '-S', binary],
133 stdout=subprocess.PIPE)
134 for line in nm.stdout:
135 parse = ParseNmLine(line)
136 if parse:
137 yield parse
139 # Regex matching objdump output for the symbols we're interested in.
140 # Example line:
141 # 12354ab: (disassembly, including <FunctionReference>)
142 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
143 def ExtractSymbolReferences(toolchain, binary, start, end):
144 """Given a span of addresses, returns symbol references from disassembly."""
145 cmd = [toolchain + 'objdump', binary, '--disassemble',
146 '--start-address=0x%x' % start, '--stop-address=0x%x' % end]
147 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE)
149 refs = set()
150 for line in objdump.stdout:
151 if '__static_initialization_and_destruction' in line:
152 raise RuntimeError, ('code mentions '
153 '__static_initialization_and_destruction; '
154 'did you accidentally run this on a Debug binary?')
155 match = disassembly_re.search(line)
156 if match:
157 (ref,) = match.groups()
158 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
159 # Ignore these, they are uninformative.
160 continue
161 if ref.startswith('_GLOBAL__I_'):
162 # Probably a relative jump within this function.
163 continue
164 refs.add(ref)
166 return sorted(refs)
168 def main():
169 parser = optparse.OptionParser(usage='%prog [option] filename')
170 parser.add_option('-d', '--diffable', dest='diffable',
171 action='store_true', default=False,
172 help='Prints the filename on each line, for more easily '
173 'diff-able output. (Used by sizes.py)')
174 parser.add_option('-t', '--toolchain-prefix', dest='toolchain',
175 action='store', default='',
176 help='Toolchain prefix to append to all tool invocations '
177 '(nm, objdump).')
178 opts, args = parser.parse_args()
179 if len(args) != 1:
180 parser.error('missing filename argument')
181 return 1
182 binary = args[0]
184 demangler = Demangler(opts.toolchain)
185 file_count = 0
186 initializer_count = 0
188 files = ParseNm(opts.toolchain, binary)
189 if opts.diffable:
190 files = sorted(files)
191 for filename, addr, size in files:
192 file_count += 1
193 ref_output = []
195 qualified_filename = QualifyFilenameAsProto(filename)
197 if size == 2:
198 # gcc generates a two-byte 'repz retq' initializer when there is a
199 # ctor even when the ctor is empty. This is fixed in gcc 4.6, but
200 # Android uses gcc 4.4.
201 ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
202 else:
203 for ref in ExtractSymbolReferences(opts.toolchain, binary, addr,
204 addr+size):
205 initializer_count += 1
207 ref = demangler.Demangle(ref)
208 if qualified_filename == filename:
209 qualified_filename = QualifyFilename(filename, ref)
211 note = ''
212 if ref in NOTES:
213 note = NOTES[ref]
214 elif ref.endswith('_2eproto()'):
215 note = 'protocol compiler bug: crbug.com/105626'
217 if note:
218 ref_output.append('%s [%s]' % (ref, note))
219 else:
220 ref_output.append(ref)
222 if opts.diffable:
223 if ref_output:
224 print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output)
225 else:
226 print '# %s: (empty initializer list)' % qualified_filename
227 else:
228 print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
229 addr, size)
230 print ''.join(' %s\n' % r for r in ref_output)
232 if opts.diffable:
233 print '#',
234 print 'Found %d static initializers in %d files.' % (initializer_count,
235 file_count)
237 return 0
239 if '__main__' == __name__:
240 sys.exit(main())