LinkageMap.findConsumers catches shadowed obj keys.
[revdep-rebuild-reimplementation.git] / vartree.py.2.2_rc8.patch
blob9a9965809cb491da955d369c128e691779d42b2a
1 --- vartree.py.2.2_rc8 2008-08-20 20:49:18.000000000 -0500
2 +++ pym/portage/dbapi/vartree.py 2008-08-23 01:02:09.000000000 -0500
3 @@ -139,14 +139,77 @@
4 return rValue
6 class LinkageMap(object):
8 + """Models dynamic linker dependencies."""
10 def __init__(self, vardbapi):
11 self._dbapi = vardbapi
12 self._libs = {}
13 self._obj_properties = {}
14 - self._defpath = getlibpaths()
16 + self._defpath = set(getlibpaths())
17 + self._obj_key_cache = {}
19 + class _ObjectKey(object):
21 + """Helper class used as _obj_properties keys for objects."""
23 + def __init__(self, object):
24 + """
25 + This takes a path to an object.
27 + @param object: path to a file
28 + @type object: string (example: '/usr/bin/bar')
30 + """
31 + self._key = self._generate_object_key(object)
33 + def __hash__(self):
34 + return hash(self._key)
36 + def __eq__(self, other):
37 + return self._key == other._key
39 + def __ne__(self, other):
40 + return self._key != other._key
42 + def _generate_object_key(self, object):
43 + """
44 + Generate object key for a given object.
46 + @param object: path to a file
47 + @type object: string (example: '/usr/bin/bar')
48 + @rtype: 2-tuple of types (long, int) if object exists. string if
49 + object does not exist.
50 + @return:
51 + 1. 2-tuple of object's inode and device from a stat call, if object
52 + exists.
53 + 2. realpath of object if object does not exist.
55 + """
56 + try:
57 + object_stat = os.stat(object)
58 + except OSError:
59 + # Use the realpath as the key if the file does not exists on the
60 + # filesystem.
61 + return os.path.realpath(object)
62 + # Return a tuple of the device and inode.
63 + return (object_stat.st_dev, object_stat.st_ino)
65 + def file_exists(self):
66 + """
67 + Determine if the file for this key exists on the filesystem.
69 + @rtype: Boolean
70 + @return:
71 + 1. True if the file exists.
72 + 2. False if the file does not exist or is a broken symlink.
74 + """
75 + return isinstance(self._key, tuple)
77 def rebuild(self, include_file=None):
78 libs = {}
79 + obj_key_cache = {}
80 obj_properties = {}
81 lines = []
82 for cpv in self._dbapi.cpv_all():
83 @@ -176,97 +239,109 @@
84 # insufficient field length
85 continue
86 arch = fields[0]
87 - obj = os.path.realpath(fields[1])
88 + obj = fields[1]
89 + obj_key = self._ObjectKey(obj)
90 soname = fields[2]
91 - path = filter(None, fields[3].replace(
92 + path = set([normalize_path(x)
93 + for x in filter(None, fields[3].replace(
94 "${ORIGIN}", os.path.dirname(obj)).replace(
95 - "$ORIGIN", os.path.dirname(obj)).split(":"))
96 + "$ORIGIN", os.path.dirname(obj)).split(":"))])
97 needed = filter(None, fields[4].split(","))
98 if soname:
99 - libs.setdefault(soname, {arch: {"providers": [], "consumers": []}})
100 - libs[soname].setdefault(arch, {"providers": [], "consumers": []})
101 - libs[soname][arch]["providers"].append(obj)
102 + libs.setdefault(soname, \
103 + {arch: {"providers": set(), "consumers": set()}})
104 + libs[soname].setdefault(arch, \
105 + {"providers": set(), "consumers": set()})
106 + libs[soname][arch]["providers"].add(obj_key)
107 for x in needed:
108 - libs.setdefault(x, {arch: {"providers": [], "consumers": []}})
109 - libs[x].setdefault(arch, {"providers": [], "consumers": []})
110 - libs[x][arch]["consumers"].append(obj)
111 - obj_properties[obj] = (arch, needed, path, soname)
113 + libs.setdefault(x, \
114 + {arch: {"providers": set(), "consumers": set()}})
115 + libs[x].setdefault(arch, {"providers": set(), "consumers": set()})
116 + libs[x][arch]["consumers"].add(obj_key)
117 + obj_key_cache.setdefault(obj, obj_key)
118 + # All object paths are added into the obj_properties tuple
119 + obj_properties.setdefault(obj_key, \
120 + (arch, needed, path, soname, set()))[4].add(obj)
122 self._libs = libs
123 self._obj_properties = obj_properties
124 + self._obj_key_cache = obj_key_cache
126 - def listBrokenBinaries(self):
127 + def listBrokenBinaries(self, debug=False):
129 Find binaries and their needed sonames, which have no providers.
131 + @param debug: Boolean to enable debug output
132 + @type debug: Boolean
133 @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
134 @return: The return value is an object -> set-of-sonames mapping, where
135 object is a broken binary and the set consists of sonames needed by
136 object that have no corresponding libraries to fulfill the dependency.
139 - class LibraryCache(object):
140 + class _LibraryCache(object):
143 - Caches sonames and realpaths associated with paths.
144 + Caches properties associated with paths.
146 - The purpose of this class is to prevent multiple calls of
147 - os.path.realpath and os.path.isfile on the same paths.
148 + The purpose of this class is to prevent multiple instances of
149 + _ObjectKey for the same paths.
153 def __init__(cache_self):
154 cache_self.cache = {}
156 - def get(cache_self, path):
157 + def get(cache_self, obj):
159 - Caches and returns the soname and realpath for a path.
160 + Caches and returns properties associated with an object.
162 - @param path: absolute path (can be symlink)
163 - @type path: string (example: '/usr/lib/libfoo.so')
164 - @rtype: 3-tuple with types (string or None, string, boolean)
165 - @return: 3-tuple with the following components:
166 - 1. soname as a string or None if it does not exist,
167 - 2. realpath as a string,
168 - 3. the result of os.path.isfile(realpath)
169 - (example: ('libfoo.so.1', '/usr/lib/libfoo.so.1.5.1', True))
170 + @param obj: absolute path (can be symlink)
171 + @type obj: string (example: '/usr/lib/libfoo.so')
172 + @rtype: 4-tuple with types
173 + (string or None, string or None, 2-tuple, Boolean)
174 + @return: 4-tuple with the following components:
175 + 1. arch as a string or None if it does not exist,
176 + 2. soname as a string or None if it does not exist,
177 + 3. obj_key as 2-tuple,
178 + 4. Boolean representing whether the object exists.
179 + (example: ('libfoo.so.1', (123L, 456L), True))
182 - if path in cache_self.cache:
183 - return cache_self.cache[path]
184 + if obj in cache_self.cache:
185 + return cache_self.cache[obj]
186 else:
187 - realpath = os.path.realpath(path)
188 + if obj in self._obj_key_cache:
189 + obj_key = self._obj_key_cache.get(obj)
190 + else:
191 + obj_key = self._ObjectKey(obj)
192 # Check that the library exists on the filesystem.
193 - if os.path.isfile(realpath):
194 - # Get the soname from LinkageMap._obj_properties if it
195 - # exists. Otherwise, None.
196 - soname = self._obj_properties.get(realpath, (None,)*3)[3]
197 - # Both path and realpath are cached and the result is
198 - # returned.
199 - cache_self.cache.setdefault(realpath, \
200 - (soname, realpath, True))
201 - return cache_self.cache.setdefault(path, \
202 - (soname, realpath, True))
203 + if obj_key.file_exists():
204 + # Get the arch and soname from LinkageMap._obj_properties if
205 + # it exists. Otherwise, None.
206 + arch, _, _, soname, _ = \
207 + self._obj_properties.get(obj_key, (None,)*5)
208 + return cache_self.cache.setdefault(obj, \
209 + (arch, soname, obj_key, True))
210 else:
211 - # realpath is not cached here, because the majority of cases
212 - # where realpath is not a file, path is the same as realpath.
213 - # Thus storing twice slows down the cache performance.
214 - return cache_self.cache.setdefault(path, \
215 - (None, realpath, False))
216 + return cache_self.cache.setdefault(obj, \
217 + (None, None, obj_key, False))
219 - debug = False
220 rValue = {}
221 - cache = LibraryCache()
222 + cache = _LibraryCache()
223 providers = self.listProviders()
225 - # Iterate over all binaries and their providers.
226 - for obj, sonames in providers.items():
227 + # Iterate over all obj_keys and their providers.
228 + for obj_key, sonames in providers.items():
229 + arch, _, path, _, objs = self._obj_properties[obj_key]
230 + path = path.union(self._defpath)
231 # Iterate over each needed soname and the set of library paths that
232 # fulfill the soname to determine if the dependency is broken.
233 for soname, libraries in sonames.items():
234 # validLibraries is used to store libraries, which satisfy soname,
235 # so if no valid libraries are found, the soname is not satisfied
236 - # for obj. Thus obj must be emerged.
237 + # for obj_key. If unsatisfied, objects associated with obj_key
238 + # must be emerged.
239 validLibraries = set()
240 # It could be the case that the library to satisfy the soname is
241 # not in the obj's runpath, but a symlink to the library is (eg
242 @@ -274,67 +349,60 @@
243 # does not catalog symlinks, broken or missing symlinks may go
244 # unnoticed. As a result of these cases, check that a file with
245 # the same name as the soname exists in obj's runpath.
246 - path = self._obj_properties[obj][2] + self._defpath
247 - for d in path:
248 - cachedSoname, cachedRealpath, cachedExists = \
249 - cache.get(os.path.join(d, soname))
250 - # Check that the this library provides the needed soname. Doing
251 + # XXX If we catalog symlinks in LinkageMap, this could be improved.
252 + for directory in path:
253 + cachedArch, cachedSoname, cachedKey, cachedExists = \
254 + cache.get(os.path.join(directory, soname))
255 + # Check that this library provides the needed soname. Doing
256 # this, however, will cause consumers of libraries missing
257 # sonames to be unnecessarily emerged. (eg libmix.so)
258 - if cachedSoname == soname:
259 - validLibraries.add(cachedRealpath)
260 - if debug and cachedRealpath not in libraries:
261 + if cachedSoname == soname and cachedArch == arch:
262 + validLibraries.add(cachedKey)
263 + if debug and cachedKey not in \
264 + set(map(self._obj_key_cache.get, libraries)):
265 + # XXX This is most often due to soname symlinks not in
266 + # a library's directory. We could catalog symlinks in
267 + # LinkageMap to avoid checking for this edge case here.
268 print "Found provider outside of findProviders:", \
269 - os.path.join(d, soname), "->", cachedRealpath
270 + os.path.join(directory, soname), "->", \
271 + self._obj_properties[cachedKey][4], libraries
272 # A valid library has been found, so there is no need to
273 # continue.
274 break
275 - if debug and cachedRealpath in self._obj_properties:
276 + if debug and cachedArch == arch and \
277 + cachedKey in self._obj_properties:
278 print "Broken symlink or missing/bad soname:", \
279 - os.path.join(d, soname), '->', cachedRealpath, \
280 - "with soname", cachedSoname, "but expecting", soname
281 + os.path.join(directory, soname), '->', \
282 + self._obj_properties[cachedKey], "with soname", \
283 + cachedSoname, "but expecting", soname
284 # This conditional checks if there are no libraries to satisfy the
285 # soname (empty set).
286 if not validLibraries:
287 - rValue.setdefault(obj, set()).add(soname)
288 + for obj in objs:
289 + rValue.setdefault(obj, set()).add(soname)
290 # If no valid libraries have been found by this point, then
291 # there are no files named with the soname within obj's runpath,
292 # but if there are libraries (from the providers mapping), it is
293 - # likely that symlinks or the actual libraries are missing.
294 - # Thus possible symlinks and missing libraries are added to
295 - # rValue in order to emerge corrupt library packages.
296 + # likely that soname symlinks or the actual libraries are
297 + # missing or broken. Thus those libraries are added to rValue
298 + # in order to emerge corrupt library packages.
299 for lib in libraries:
300 - cachedSoname, cachedRealpath, cachedExists = cache.get(lib)
301 - if not cachedExists:
302 - # The library's package needs to be emerged to repair the
303 - # missing library.
304 - rValue.setdefault(lib, set()).add(soname)
305 - else:
306 - # A library providing the soname exists in the obj's
307 - # runpath, but no file named as the soname exists, so add
308 - # the path constructed from the lib's directory and the
309 - # soname to rValue to fix cases of vanishing (or modified)
310 - # symlinks. This path is not guaranteed to exist, but it
311 - # follows the symlink convention found in the majority of
312 - # packages.
313 - rValue.setdefault(os.path.join(os.path.dirname(lib), \
314 - soname), set()).add(soname)
315 + rValue.setdefault(lib, set()).add(soname)
316 if debug:
317 - if not cachedExists:
318 + if not os.path.isfile(lib):
319 print "Missing library:", lib
320 else:
321 print "Possibly missing symlink:", \
322 os.path.join(os.path.dirname(lib), soname)
324 return rValue
326 def listProviders(self):
328 - Find the providers for all binaries.
329 + Find the providers for all object keys in LinkageMap.
331 @rtype: dict (example:
332 - {'/usr/bin/foo': {'libbar.so': set(['/lib/libbar.so.1.5'])}})
333 - @return: The return value is an object -> providers mapping, where
334 + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
335 + @return: The return value is an object key -> providers mapping, where
336 providers is a mapping of soname -> set-of-library-paths returned
337 from the findProviders method.
339 @@ -342,118 +410,183 @@
340 rValue = {}
341 if not self._libs:
342 self.rebuild()
343 - # Iterate over all binaries within LinkageMap.
344 - for obj in self._obj_properties:
345 - rValue.setdefault(obj, self.findProviders(obj))
346 + # Iterate over all object keys within LinkageMap.
347 + for obj_key in self._obj_properties:
348 + rValue.setdefault(obj_key, self.findProviders(obj_key))
349 return rValue
351 def isMasterLink(self, obj):
352 + """
353 + Determine whether an object is a master link.
355 + @param obj: absolute path to an object
356 + @type obj: string (example: '/usr/bin/foo')
357 + @rtype: Boolean
358 + @return:
359 + 1. True if obj is a master link
360 + 2. False if obj is not a master link
362 + """
363 basename = os.path.basename(obj)
364 - if obj not in self._obj_properties:
365 - obj = os.path.realpath(obj)
366 - if obj not in self._obj_properties:
367 - raise KeyError("%s not in object list" % obj)
368 - soname = self._obj_properties[obj][3]
369 + obj_key = self._ObjectKey(obj)
370 + if obj_key not in self._obj_properties:
371 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
372 + soname = self._obj_properties[obj_key][3]
373 return (len(basename) < len(soname))
376 def listLibraryObjects(self):
377 + """
378 + Return a list of library objects.
380 + Known limitation: library objects lacking an soname are not included.
382 + @rtype: list of strings
383 + @return: list of paths to all providers
385 + """
386 rValue = []
387 if not self._libs:
388 self.rebuild()
389 for soname in self._libs:
390 for arch in self._libs[soname]:
391 - rValue.extend(self._libs[soname][arch]["providers"])
392 + for obj_key in self._libs[soname][arch]["providers"]:
393 + rValue.extend(self._obj_properties[obj_key][4])
394 return rValue
396 def getSoname(self, obj):
397 + """
398 + Return the soname associated with an object.
400 + @param obj: absolute path to an object
401 + @type obj: string (example: '/usr/bin/bar')
402 + @rtype: string
403 + @return: soname as a string
405 + """
406 if not self._libs:
407 self.rebuild()
408 - if obj not in self._obj_properties:
409 - obj = os.path.realpath(obj)
410 - if obj not in self._obj_properties:
411 - raise KeyError("%s not in object list" % obj)
412 - arch, needed, path, soname = self._obj_properties[obj]
413 - return soname
414 + if obj not in self._obj_key_cache:
415 + raise KeyError("%s not in object list" % obj)
416 + return self._obj_properties[self._obj_key_cache[obj]][3]
418 def findProviders(self, obj):
419 - if not self._libs:
420 - self.rebuild()
421 + """
422 + Find providers for an object or object key.
424 + This method may be called with a key from _obj_properties.
426 - realpath_cache = {}
427 - def realpath(p):
428 - real_path = realpath_cache.get(p)
429 - if real_path is None:
430 - real_path = os.path.realpath(p)
431 - realpath_cache[p] = real_path
432 - return real_path
433 + In some cases, not all valid libraries are returned. This may occur when
434 + an soname symlink referencing a library is in an object's runpath while
435 + the actual library is not. We should consider cataloging symlinks within
436 + LinkageMap as this would avoid those cases and would be a better model of
437 + library dependencies (since the dynamic linker actually searches for
438 + files named with the soname in the runpaths).
440 + @param obj: absolute path to an object or a key from _obj_properties
441 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
442 + @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
443 + @return: The return value is a soname -> set-of-library-paths, where
444 + set-of-library-paths satisfy soname.
446 + """
447 rValue = {}
448 - if obj not in self._obj_properties:
449 - obj = realpath(obj)
450 - if obj not in self._obj_properties:
451 - raise KeyError("%s not in object list" % obj)
452 - arch, needed, path, soname = self._obj_properties[obj]
453 - path = path[:]
454 - path.extend(self._defpath)
455 - path = set(realpath(x) for x in path)
456 - for x in needed:
457 - rValue[x] = set()
458 - if x not in self._libs or arch not in self._libs[x]:
459 - continue
460 - for y in self._libs[x][arch]["providers"]:
461 - if x[0] == os.sep and realpath(x) == realpath(y):
462 - rValue[x].add(y)
463 - elif realpath(os.path.dirname(y)) in path:
464 - rValue[x].add(y)
466 + if not self._libs:
467 + self.rebuild()
469 + # Determine the obj_key from the arguments.
470 + if isinstance(obj, self._ObjectKey):
471 + obj_key = obj
472 + if obj_key not in self._obj_properties:
473 + raise KeyError("%s not in object list" % obj_key)
474 + else:
475 + obj_key = self._obj_key_cache.get(obj)
476 + if obj_key not in self._obj_properties:
477 + obj_key = self._ObjectKey(obj)
478 + if obj_key not in self._obj_properties:
479 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
481 + arch, needed, path, _, _ = self._obj_properties[obj_key]
482 + path = path.union(self._defpath)
483 + for soname in needed:
484 + rValue[soname] = set()
485 + if soname not in self._libs or arch not in self._libs[soname]:
486 + continue
487 + # For each potential provider of the soname, add it to rValue if it
488 + # resides in the obj's runpath.
489 + for provider_key in self._libs[soname][arch]["providers"]:
490 + providers = self._obj_properties[provider_key][4]
491 + for provider in providers:
492 + if os.path.dirname(provider) in path:
493 + rValue[soname].add(provider)
494 return rValue
497 def findConsumers(self, obj):
498 + """
499 + Find consumers of an object or object key.
501 + This method may be called with a key from _obj_properties.
503 + In some cases, not all consumers are returned. This may occur when
504 + an soname symlink referencing a library is in an object's runpath while
505 + the actual library is not.
507 + @param obj: absolute path to an object or a key from _obj_properties
508 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
509 + @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
510 + @return: The return value is a soname -> set-of-library-paths, where
511 + set-of-library-paths satisfy soname.
513 + """
514 + rValue = set()
516 if not self._libs:
517 self.rebuild()
519 - realpath_cache = {}
520 - def realpath(p):
521 - real_path = realpath_cache.get(p)
522 - if real_path is None:
523 - real_path = os.path.realpath(p)
524 - realpath_cache[p] = real_path
525 - return real_path
527 - if obj not in self._obj_properties:
528 - obj = realpath(obj)
529 - if obj not in self._obj_properties:
530 - raise KeyError("%s not in object list" % obj)
531 + # Determine the obj_key and the set of objects matching the arguments.
532 + if isinstance(obj, self._ObjectKey):
533 + obj_key = obj
534 + if obj_key not in self._obj_properties:
535 + raise KeyError("%s not in object list" % obj_key)
536 + objs = self._obj_properties[obj_key][4]
537 + else:
538 + objs = set([obj])
539 + obj_key = self._obj_key_cache.get(obj)
540 + if obj_key not in self._obj_properties:
541 + obj_key = self._ObjectKey(obj)
542 + if obj_key not in self._obj_properties:
543 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
545 + # Determine the directory(ies) from the set of objects.
546 + objs_dirs = set([os.path.dirname(x) for x in objs])
548 # If there is another version of this lib with the
549 # same soname and the master link points to that
550 # other version, this lib will be shadowed and won't
551 # have any consumers.
552 - arch, needed, path, soname = self._obj_properties[obj]
553 - obj_dir = os.path.dirname(obj)
554 - master_link = os.path.join(obj_dir, soname)
555 - try:
556 - master_st = os.stat(master_link)
557 - obj_st = os.stat(obj)
558 - except OSError:
559 - pass
560 - else:
561 - if (obj_st.st_dev, obj_st.st_ino) != \
562 - (master_st.st_dev, master_st.st_ino):
563 - return set()
564 + soname = self._obj_properties[obj_key][3]
565 + shadowed_library = True
566 + for obj_dir in objs_dirs:
567 + master_link = os.path.join(obj_dir, soname)
568 + master_link_obj_key = self._ObjectKey(master_link)
569 + if obj_key == master_link_obj_key:
570 + shadowed_library = False
571 + break
572 + if shadowed_library:
573 + return set()
575 - rValue = set()
576 - for soname in self._libs:
577 - for arch in self._libs[soname]:
578 - if obj in self._libs[soname][arch]["providers"]:
579 - for x in self._libs[soname][arch]["consumers"]:
580 - path = self._obj_properties[x][2]
581 - path = [realpath(y) for y in path+self._defpath]
582 - if soname[0] == os.sep and realpath(soname) == realpath(obj):
583 - rValue.add(x)
584 - elif realpath(obj_dir) in path:
585 - rValue.add(x)
586 + arch, _, _, soname, _ = self._obj_properties[obj_key]
587 + if soname in self._libs and arch in self._libs[soname]:
588 + # For each potential consumer, add it to rValue if an object from the
589 + # arguments resides in the consumer's runpath.
590 + for consumer_key in self._libs[soname][arch]["consumers"]:
591 + _, _, path, _, consumer_objs = \
592 + self._obj_properties[consumer_key]
593 + path = path.union(self._defpath)
594 + if objs_dirs.intersection(path):
595 + rValue.update(consumer_objs)
596 return rValue
599 class vardbapi(dbapi):
601 _excluded_dirs = ["CVS", "lost+found"]