1 # a waf tool to extract symbols from object files or libraries
2 # using nm, producing a set of exposed defined/undefined symbols
4 import os
, re
, subprocess
5 from waflib
import Utils
, Build
, Options
, Logs
, Errors
6 from waflib
.Logs
import debug
7 from samba_utils
import TO_LIST
, LOCAL_CACHE
, get_tgt_list
9 # these are the data structures used in symbols.py:
11 # bld.env.symbol_map : dictionary mapping public symbol names to list of
12 # subsystem names where that symbol exists
14 # t.in_library : list of libraries that t is in
16 # bld.env.public_symbols: set of public symbols for each subsystem
17 # bld.env.used_symbols : set of used symbols for each subsystem
19 # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
21 # bld.env.library_dict : dictionary mapping built library paths to subsystem names
23 # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
26 def symbols_extract(bld
, objfiles
, dynamic
=False):
27 '''extract symbols from objfile, returning a dictionary containing
28 the set of undefined and public symbols for each file'''
32 # see if we can get some results from the nm cache
33 if not bld
.env
.nm_cache
:
36 objfiles
= set(objfiles
).copy()
40 if obj
in bld
.env
.nm_cache
:
41 ret
[obj
] = bld
.env
.nm_cache
[obj
].copy()
46 if len(objfiles
) == 0:
51 # needed for some .so files
53 cmd
.extend(list(objfiles
))
55 nmpipe
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
).stdout
56 if len(objfiles
) == 1:
57 filename
= list(objfiles
)[0]
58 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set()}
62 if line
.endswith(b
':'):
64 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set() }
66 cols
= line
.split(b
" ")
69 # see if the line starts with an address
76 if symbol_type
in b
"BDGTRVWSi":
78 ret
[filename
]["PUBLIC"].add(symbol
)
79 elif symbol_type
in b
"U":
80 ret
[filename
]["UNDEFINED"].add(symbol
)
85 bld
.env
.nm_cache
[obj
] = ret
[obj
].copy()
87 bld
.env
.nm_cache
[obj
] = { "PUBLIC": set(), "UNDEFINED" : set() }
93 if name
.find(".objlist") != -1:
98 def find_ldd_path(bld
, libname
, binary
):
99 '''find the path to the syslib we will link against'''
101 if not bld
.env
.syslib_paths
:
102 bld
.env
.syslib_paths
= {}
103 if libname
in bld
.env
.syslib_paths
:
104 return bld
.env
.syslib_paths
[libname
]
106 lddpipe
= subprocess
.Popen(['ldd', binary
], stdout
=subprocess
.PIPE
).stdout
109 cols
= line
.split(b
" ")
110 if len(cols
) < 3 or cols
[1] != b
"=>":
112 if cols
[0].startswith(b
"libc."):
114 bld
.env
.libc_path
= cols
[2]
115 if cols
[0].startswith(libname
):
117 bld
.env
.syslib_paths
[libname
] = ret
121 # some regular expressions for parsing readelf output
122 re_sharedlib
= re
.compile(rb
'Shared library: \[(.*)\]')
123 # output from readelf could be `Library rpath` or `Libray runpath`
124 re_rpath
= re
.compile(rb
'Library (rpath|runpath): \[(.*)\]')
126 def get_libs(bld
, binname
):
127 '''find the list of linked libraries for any binary or library
128 binname is the path to the binary/library on disk
130 We do this using readelf instead of ldd as we need to avoid recursing
131 into system libraries
134 # see if we can get the result from the ldd cache
135 if not bld
.env
.lib_cache
:
136 bld
.env
.lib_cache
= {}
137 if binname
in bld
.env
.lib_cache
:
138 return bld
.env
.lib_cache
[binname
].copy()
143 elfpipe
= subprocess
.Popen(['readelf', '--dynamic', binname
], stdout
=subprocess
.PIPE
).stdout
145 m
= re_sharedlib
.search(line
)
148 m
= re_rpath
.search(line
)
150 # output from Popen is always bytestr even in py3
151 rpath
.extend(m
.group(2).split(b
":"))
157 path
= os
.path
.join(r
, lib
)
158 if os
.path
.exists(path
):
159 ret
.add(os
.path
.realpath(path
))
163 # we didn't find this lib using rpath. It is probably a system
164 # library, so to find the path to it we either need to use ldd
165 # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
166 # use ldd for now, even though it is slow
167 path
= find_ldd_path(bld
, lib
, binname
)
169 ret
.add(os
.path
.realpath(path
))
171 bld
.env
.lib_cache
[binname
] = ret
.copy()
176 def get_libs_recursive(bld
, binname
, seen
):
177 '''find the recursive list of linked libraries for any binary or library
178 binname is the path to the binary/library on disk. seen is a set used
183 ret
= get_libs(bld
, binname
)
186 # we don't want to recurse into system libraries. If a system
187 # library that we use (eg. libcups) happens to use another library
188 # (such as libkrb5) which contains common symbols with our own
189 # libraries, then that is not an error
190 if lib
in bld
.env
.library_dict
:
191 ret
= ret
.union(get_libs_recursive(bld
, lib
, seen
))
196 def find_syslib_path(bld
, libname
, deps
):
197 '''find the path to the syslib we will link against'''
198 # the strategy is to use the targets that depend on the library, and run ldd
199 # on it to find the real location of the library that is used
201 linkpath
= deps
[0].link_task
.outputs
[0].abspath(bld
.env
)
203 if libname
== "python":
204 libname
+= bld
.env
.PYTHON_VERSION
206 return find_ldd_path(bld
, "lib%s" % libname
.lower(), linkpath
)
209 def build_symbol_sets(bld
, tgt_list
):
210 '''build the public_symbols and undefined_symbols attributes for each target'''
212 if bld
.env
.public_symbols
:
215 objlist
= [] # list of object file
216 objmap
= {} # map from object filename to target (subsystem) name
219 t
.public_symbols
= set()
220 t
.undefined_symbols
= set()
221 t
.used_symbols
= set()
222 for tsk
in getattr(t
, 'compiled_tasks', []):
223 for output
in tsk
.outputs
:
224 objpath
= output
.abspath(bld
.env
)
225 objlist
.append(objpath
)
228 symbols
= symbols_extract(bld
, objlist
)
231 t
.public_symbols
= t
.public_symbols
.union(symbols
[obj
]["PUBLIC"])
232 t
.undefined_symbols
= t
.undefined_symbols
.union(symbols
[obj
]["UNDEFINED"])
233 t
.used_symbols
= t
.used_symbols
.union(symbols
[obj
]["UNDEFINED"])
235 t
.undefined_symbols
= t
.undefined_symbols
.difference(t
.public_symbols
)
237 # and the reverse map of public symbols to subsystem name
238 bld
.env
.symbol_map
= {}
241 for s
in t
.public_symbols
:
242 if not s
in bld
.env
.symbol_map
:
243 bld
.env
.symbol_map
[s
] = []
244 bld
.env
.symbol_map
[s
].append(real_name(t
.sname
))
246 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
248 bld
.env
.public_symbols
= {}
250 name
= real_name(t
.sname
)
251 if name
in bld
.env
.public_symbols
:
252 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t
.public_symbols
)
254 bld
.env
.public_symbols
[name
] = t
.public_symbols
255 if t
.samba_type
in ['LIBRARY', 'PLUGIN']:
256 for dep
in t
.add_objects
:
257 t2
= bld
.get_tgen_by_name(dep
)
258 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
259 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t2
.public_symbols
)
261 bld
.env
.used_symbols
= {}
263 name
= real_name(t
.sname
)
264 if name
in bld
.env
.used_symbols
:
265 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t
.used_symbols
)
267 bld
.env
.used_symbols
[name
] = t
.used_symbols
268 if t
.samba_type
in ['LIBRARY', 'PLUGIN']:
269 for dep
in t
.add_objects
:
270 t2
= bld
.get_tgen_by_name(dep
)
271 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
272 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t2
.used_symbols
)
275 def build_library_dict(bld
, tgt_list
):
276 '''build the library_dict dictionary'''
278 if bld
.env
.library_dict
:
281 bld
.env
.library_dict
= {}
284 if t
.samba_type
in [ 'LIBRARY', 'PLUGIN', 'PYTHON' ]:
285 linkpath
= os
.path
.realpath(t
.link_task
.outputs
[0].abspath(bld
.env
))
286 bld
.env
.library_dict
[linkpath
] = t
.sname
289 def build_syslib_sets(bld
, tgt_list
):
290 '''build the public_symbols for all syslibs'''
292 if bld
.env
.syslib_symbols
:
295 # work out what syslibs we depend on, and what targets those are used in
299 if getattr(t
, 'uselib', []) and t
.samba_type
in [ 'LIBRARY', 'PLUGIN', 'BINARY', 'PYTHON' ]:
301 if lib
in ['PYEMBED', 'PYEXT']:
303 if not lib
in syslibs
:
305 syslibs
[lib
].append(t
)
307 # work out the paths to each syslib
310 path
= find_syslib_path(bld
, lib
, syslibs
[lib
])
312 Logs
.warn("Unable to find syslib path for %s" % lib
)
314 syslib_paths
.append(path
)
315 objmap
[path
] = lib
.lower()
318 syslib_paths
.append(bld
.env
.libc_path
)
319 objmap
[bld
.env
.libc_path
] = 'c'
321 symbols
= symbols_extract(bld
, syslib_paths
, dynamic
=True)
323 # keep a map of syslib names to public symbols
324 bld
.env
.syslib_symbols
= {}
326 bld
.env
.syslib_symbols
[lib
] = symbols
[lib
]["PUBLIC"]
328 # add to the map of symbols to dependencies
330 for sym
in symbols
[lib
]["PUBLIC"]:
331 if not sym
in bld
.env
.symbol_map
:
332 bld
.env
.symbol_map
[sym
] = []
333 bld
.env
.symbol_map
[sym
].append(objmap
[lib
])
335 # keep the libc symbols as well, as these are useful for some of the
337 bld
.env
.libc_symbols
= symbols
[bld
.env
.libc_path
]["PUBLIC"]
339 # add to the combined map of dependency name to public_symbols
340 for lib
in bld
.env
.syslib_symbols
:
341 bld
.env
.public_symbols
[objmap
[lib
]] = bld
.env
.syslib_symbols
[lib
]
344 def build_autodeps(bld
, t
):
345 '''build the set of dependencies for a target'''
347 name
= real_name(t
.sname
)
349 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
351 for sym
in t
.undefined_symbols
:
352 if sym
in t
.public_symbols
:
354 if sym
in bld
.env
.symbol_map
:
355 depname
= bld
.env
.symbol_map
[sym
]
356 if depname
== [ name
]:
357 # self dependencies aren't interesting
359 if t
.in_library
== depname
:
360 # no need to depend on the library we are part of
362 if depname
[0] in ['c', 'python']:
363 # these don't go into autodeps
365 if targets
[depname
[0]] in [ 'SYSLIB' ]:
368 t2
= bld
.get_tgen_by_name(depname
[0])
369 if len(t2
.in_library
) != 1:
372 if t2
.in_library
== t
.in_library
:
373 # if we're part of the same library, we don't need to autodep
375 deps
.add(t2
.in_library
[0])
379 def build_library_names(bld
, tgt_list
):
380 '''add a in_library attribute to all targets that are part of a library'''
382 if bld
.env
.done_build_library_names
:
389 if t
.samba_type
in ['LIBRARY', 'PLUGIN']:
390 for obj
in t
.samba_deps_extended
:
391 t2
= bld
.get_tgen_by_name(obj
)
392 if t2
and t2
.samba_type
in [ 'SUBSYSTEM', 'BUILTIN', 'ASN1' ]:
393 if not t
.sname
in t2
.in_library
:
394 t2
.in_library
.append(t
.sname
)
395 bld
.env
.done_build_library_names
= True
398 def check_library_deps(bld
, t
):
399 '''check that all the autodeps that have mutual dependency of this
400 target are in the same library as the target'''
402 name
= real_name(t
.sname
)
404 if len(t
.in_library
) > 1:
405 Logs
.warn("WARNING: Target '%s' in multiple libraries: %s" % (t
.sname
, t
.in_library
))
407 for dep
in t
.autodeps
:
408 t2
= bld
.get_tgen_by_name(dep
)
411 for dep2
in t2
.autodeps
:
412 if dep2
== name
and t
.in_library
!= t2
.in_library
:
413 Logs
.warn("WARNING: mutual dependency %s <=> %s" % (name
, real_name(t2
.sname
)))
414 Logs
.warn("Libraries should match. %s != %s" % (t
.in_library
, t2
.in_library
))
415 # raise Errors.WafError("illegal mutual dependency")
418 def check_syslib_collisions(bld
, tgt_list
):
419 '''check if a target has any symbol collisions with a syslib
421 We do not want any code in Samba to use a symbol name from a
422 system library. The chance of that causing problems is just too
423 high. Note that libreplace uses a rep_XX approach of renaming
429 for lib
in bld
.env
.syslib_symbols
:
430 common
= t
.public_symbols
.intersection(bld
.env
.syslib_symbols
[lib
])
432 Logs
.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t
.sname
, common
, lib
))
435 raise Errors
.WafError("symbols in common with system libraries")
438 def check_dependencies(bld
, t
):
439 '''check for dependencies that should be changed'''
441 if bld
.get_tgen_by_name(t
.sname
+ ".objlist"):
444 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
446 remaining
= t
.undefined_symbols
.copy()
447 remaining
= remaining
.difference(t
.public_symbols
)
449 sname
= real_name(t
.sname
)
451 deps
= set(t
.samba_deps
)
452 for d
in t
.samba_deps
:
453 if targets
[d
] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
455 bld
.ASSERT(d
in bld
.env
.public_symbols
, "Failed to find symbol list for dependency '%s'" % d
)
456 diff
= remaining
.intersection(bld
.env
.public_symbols
[d
])
457 if not diff
and targets
[sname
] != 'LIBRARY':
458 Logs
.info("Target '%s' has no dependency on %s" % (sname
, d
))
460 remaining
= remaining
.difference(diff
)
462 t
.unsatisfied_symbols
= set()
464 for sym
in remaining
:
465 if sym
in bld
.env
.symbol_map
:
466 dep
= bld
.env
.symbol_map
[sym
]
467 if not dep
[0] in needed
:
468 needed
[dep
[0]] = set()
469 needed
[dep
[0]].add(sym
)
471 t
.unsatisfied_symbols
.add(sym
)
474 Logs
.info("Target '%s' should add dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
478 def check_syslib_dependencies(bld
, t
):
479 '''check for syslib depenencies'''
481 if bld
.get_tgen_by_name(t
.sname
+ ".objlist"):
484 sname
= real_name(t
.sname
)
488 features
= TO_LIST(t
.features
)
489 if 'pyembed' in features
or 'pyext' in features
:
490 if 'python' in bld
.env
.public_symbols
:
491 t
.unsatisfied_symbols
= t
.unsatisfied_symbols
.difference(bld
.env
.public_symbols
['python'])
494 for sym
in t
.unsatisfied_symbols
:
495 if sym
in bld
.env
.symbol_map
:
496 dep
= bld
.env
.symbol_map
[sym
][0]
499 if not dep
in needed
:
506 Logs
.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
509 debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname
, " ".join(remaining
)))
513 def symbols_symbolcheck(task
):
514 '''check the internal dependency lists'''
516 tgt_list
= get_tgt_list(bld
)
518 build_symbol_sets(bld
, tgt_list
)
519 build_library_names(bld
, tgt_list
)
523 if getattr(t
, 'source', ''):
524 build_autodeps(bld
, t
)
527 check_dependencies(bld
, t
)
530 check_library_deps(bld
, t
)
532 def symbols_syslibcheck(task
):
533 '''check the syslib dependencies'''
535 tgt_list
= get_tgt_list(bld
)
537 build_syslib_sets(bld
, tgt_list
)
538 check_syslib_collisions(bld
, tgt_list
)
541 check_syslib_dependencies(bld
, t
)
544 def symbols_whyneeded(task
):
545 """check why 'target' needs to link to 'subsystem'"""
547 tgt_list
= get_tgt_list(bld
)
549 why
= Options
.options
.WHYNEEDED
.split(":")
551 raise Errors
.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
555 build_symbol_sets(bld
, tgt_list
)
556 build_library_names(bld
, tgt_list
)
557 build_syslib_sets(bld
, tgt_list
)
559 Logs
.info("Checking why %s needs to link to %s" % (target
, subsystem
))
560 if not target
in bld
.env
.used_symbols
:
561 Logs
.warn("unable to find target '%s' in used_symbols dict" % target
)
563 if not subsystem
in bld
.env
.public_symbols
:
564 Logs
.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem
)
566 overlap
= bld
.env
.used_symbols
[target
].intersection(bld
.env
.public_symbols
[subsystem
])
568 Logs
.info("target '%s' doesn't use any public symbols from '%s'" % (target
, subsystem
))
570 Logs
.info("target '%s' uses symbols %s from '%s'" % (target
, overlap
, subsystem
))
573 def report_duplicate(bld
, binname
, sym
, libs
, fail_on_error
):
574 '''report duplicated symbols'''
575 if sym
in ['_init', '_fini', '_edata', '_end', '__bss_start']:
579 if lib
in bld
.env
.library_dict
:
580 libnames
.append(bld
.env
.library_dict
[lib
])
584 raise Errors
.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
586 print("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
589 def symbols_dupcheck_binary(bld
, binname
, fail_on_error
):
590 '''check for duplicated symbols in one binary'''
592 libs
= get_libs_recursive(bld
, binname
, set())
593 symlist
= symbols_extract(bld
, libs
, dynamic
=True)
596 for libpath
in symlist
:
597 for sym
in symlist
[libpath
]['PUBLIC']:
598 if sym
== '_GLOBAL_OFFSET_TABLE_':
600 if not sym
in symmap
:
602 symmap
[sym
].add(libpath
)
604 if len(symmap
[sym
]) > 1:
605 for libpath
in symmap
[sym
]:
606 if libpath
in bld
.env
.library_dict
:
607 report_duplicate(bld
, binname
, sym
, symmap
[sym
], fail_on_error
)
610 def symbols_dupcheck(task
, fail_on_error
=False):
611 '''check for symbols defined in two different subsystems'''
613 tgt_list
= get_tgt_list(bld
)
615 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
617 build_library_dict(bld
, tgt_list
)
619 if t
.samba_type
== 'BINARY':
620 binname
= os
.path
.relpath(t
.link_task
.outputs
[0].abspath(bld
.env
), os
.getcwd())
621 symbols_dupcheck_binary(bld
, binname
, fail_on_error
)
624 def symbols_dupcheck_fatal(task
):
625 '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
626 symbols_dupcheck(task
, fail_on_error
=True)
629 def SYMBOL_CHECK(bld
):
630 '''check our dependency lists'''
631 if Options
.options
.SYMBOLCHECK
:
632 bld
.SET_BUILD_GROUP('symbolcheck')
633 task
= bld(rule
=symbols_symbolcheck
, always
=True, name
='symbol checking')
636 bld
.SET_BUILD_GROUP('syslibcheck')
637 task
= bld(rule
=symbols_syslibcheck
, always
=True, name
='syslib checking')
640 bld
.SET_BUILD_GROUP('syslibcheck')
641 task
= bld(rule
=symbols_dupcheck
, always
=True, name
='symbol duplicate checking')
644 if Options
.options
.WHYNEEDED
:
645 bld
.SET_BUILD_GROUP('syslibcheck')
646 task
= bld(rule
=symbols_whyneeded
, always
=True, name
='check why a dependency is needed')
650 Build
.BuildContext
.SYMBOL_CHECK
= SYMBOL_CHECK
652 def DUP_SYMBOL_CHECK(bld
):
653 if Options
.options
.DUP_SYMBOLCHECK
and bld
.env
.DEVELOPER
:
654 '''check for duplicate symbols'''
655 bld
.SET_BUILD_GROUP('syslibcheck')
656 task
= bld(rule
=symbols_dupcheck_fatal
, always
=True, name
='symbol duplicate checking')
659 Build
.BuildContext
.DUP_SYMBOL_CHECK
= DUP_SYMBOL_CHECK