Patched up LinkageMap.listBrokenBinaries
[revdep-rebuild-reimplementation.git] / vartree.py.2.2_rc6.patch
blobd6e8bb4de5997eda0b12fd223193c45f6bdcaf5b
1 --- vartree.py.2.2_rc6 2008-08-01 15:41:03.000000000 -0500
2 +++ pym/portage/dbapi/vartree.py 2008-08-14 01:49:42.000000000 -0500
3 @@ -139,9 +139,11 @@
4 self._libs = {}
5 self._obj_properties = {}
6 self._defpath = getlibpaths()
7 -
8 + self._obj_key_cache = {}
10 def rebuild(self, include_file=None):
11 libs = {}
12 + obj_key_cache = {}
13 obj_properties = {}
14 lines = []
15 for cpv in self._dbapi.cpv_all():
16 @@ -171,130 +173,389 @@
17 # insufficient field length
18 continue
19 arch = fields[0]
20 - obj = os.path.realpath(fields[1])
21 + obj = fields[1]
22 soname = fields[2]
23 - path = fields[3].replace("${ORIGIN}", os.path.dirname(obj)).replace("$ORIGIN", os.path.dirname(obj)).split(":")
24 - needed = fields[4].split(",")
25 + path = filter(None, fields[3].replace(
26 + "${ORIGIN}", os.path.dirname(obj)).replace(
27 + "$ORIGIN", os.path.dirname(obj)).split(":"))
28 + needed = filter(None, fields[4].split(","))
29 + obj_key = self._generateObjKey(obj)
30 + # We ignore NEEDED entries that do not exist on the filesystem.
31 + if obj_key is None:
32 + continue
33 if soname:
34 - libs.setdefault(soname, {arch: {"providers": [], "consumers": []}})
35 - libs[soname].setdefault(arch, {"providers": [], "consumers": []})
36 - libs[soname][arch]["providers"].append(obj)
37 + libs.setdefault(soname, \
38 + {arch: {"providers": set(), "consumers": set()}})
39 + libs[soname].setdefault(arch, \
40 + {"providers": set(), "consumers": set()})
41 + # XXX append obj or key?
42 + # XXX maybe use sets vs lists
43 + libs[soname][arch]["providers"].add(obj_key)
44 for x in needed:
45 - libs.setdefault(x, {arch: {"providers": [], "consumers": []}})
46 - libs[x].setdefault(arch, {"providers": [], "consumers": []})
47 - libs[x][arch]["consumers"].append(obj)
48 - obj_properties[obj] = (arch, needed, path, soname)
50 + libs.setdefault(x, \
51 + {arch: {"providers": set(), "consumers": set()}})
52 + libs[x].setdefault(arch, {"providers": set(), "consumers": set()})
53 + libs[x][arch]["consumers"].add(obj_key)
54 + obj_key_cache.setdefault(obj, obj_key)
55 + # All object paths are added into the obj_properties tuple
56 + obj_properties.setdefault(obj_key, \
57 + (arch, needed, path, soname, set()))[4].add(obj)
58 +# obj_properties[obj] = (arch, needed, path, soname)
60 self._libs = libs
61 self._obj_properties = obj_properties
62 + # XXX Should we reset the cache on each rebuild?
63 + self._obj_key_cache = obj_key_cache
65 + def _generateObjKey(self, path):
66 + """
67 + Generate obj key for a given path.
69 + @param path: path to an existing file
70 + @type path: string (example: '/usr/bin/bar')
71 + @rtype: 2-tuple of longs or string
72 + @return: If path exists, a 2-tuple of path's inode and device from a stat
73 + call is returned. Otherwise, the path is returned.
75 + """
76 + try:
77 + return os.stat(path)[1:3]
78 + except OSError:
79 +# from portage.output import teal
80 +# writemsg(bold(red("Error in ")) + \
81 +# bold(teal("_generateObjKey. Stat failed on %s" % path)) + '\n')
82 + return path
84 + def listBrokenBinaries(self):
85 + """
86 + Find binaries and their needed sonames, which have no providers.
88 + @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
89 + @return: The return value is an object -> set-of-sonames mapping, where
90 + object is a broken binary and the set consists of sonames needed by
91 + object that have no corresponding libraries to fulfill the dependency.
93 + """
94 + class LibraryCache(object):
96 + """
97 + Caches sonames and realpaths associated with paths.
99 + The purpose of this class is to prevent multiple calls of
100 + os.path.realpath and os.path.isfile on the same paths.
102 + """
104 + def __init__(cache_self):
105 + cache_self.cache = {}
107 + def get(cache_self, path):
108 + """
109 + Caches and returns the soname and obj_key for a path.
111 + @param path: absolute path (can be symlink)
112 + @type path: string (example: '/usr/lib/libfoo.so')
113 + @rtype: 3-tuple with types (string or None, 2-tuple, boolean)
114 + @return: 3-tuple with the following components:
115 + 1. soname as a string or None if it does not exist,
116 + 2. obj_key as 2-tuple,
117 + 3. the result of os.path.isfile(realpath)
118 + (example: ('libfoo.so.1', (123L, 456L), True))
120 + """
121 + if path in cache_self.cache:
122 + return cache_self.cache[path]
123 + else:
124 + if path in self._obj_key_cache:
125 + obj_key = self._obj_key_cache.get(path)
126 + else:
127 + obj_key = self._generateObjKey(path)
128 + # Check that the library exists on the filesystem.
129 + if isinstance(obj_key, tuple):
130 + # Get the soname from LinkageMap._obj_properties if it
131 + # exists. Otherwise, None.
132 + soname = self._obj_properties.get(obj_key, (None,)*3)[3]
133 + # Both path and realpath are cached and the result is
134 + # returned.
135 + return cache_self.cache.setdefault(path, \
136 + (soname, obj_key, True))
137 + else:
138 + # realpath is not cached here, because the majority of cases
139 + # where realpath is not a file, path is the same as realpath.
140 + # Thus storing twice slows down the cache performance.
141 + return cache_self.cache.setdefault(path, \
142 + (None, obj_key, False))
144 + debug = False
145 + rValue = {}
146 + cache = LibraryCache()
147 + providers = self.listProviders()
149 + # Iterate over all binaries and their providers.
150 + for obj_key, sonames in providers.items():
151 + # Iterate over each needed soname and the set of library paths that
152 + # fulfill the soname to determine if the dependency is broken.
153 + for soname, libraries in sonames.items():
154 + # validLibraries is used to store libraries, which satisfy soname,
155 + # so if no valid libraries are found, the soname is not satisfied
156 + # for obj_key. Thus objs associated with obj_key must be emerged.
157 + validLibraries = set()
158 + # It could be the case that the library to satisfy the soname is
159 + # not in the obj's runpath, but a symlink to the library is (eg
160 + # libnvidia-tls.so.1 in nvidia-drivers). Also, since LinkageMap
161 + # does not catalog symlinks, broken or missing symlinks may go
162 + # unnoticed. As a result of these cases, check that a file with
163 + # the same name as the soname exists in obj's runpath.
164 + path = self._obj_properties[obj_key][2] + self._defpath
165 + for directory in path:
166 + cachedSoname, cachedKey, cachedExists = \
167 + cache.get(os.path.join(directory, soname))
168 + # Check that the this library provides the needed soname. Doing
169 + # this, however, will cause consumers of libraries missing
170 + # sonames to be unnecessarily emerged. (eg libmix.so)
171 + if cachedSoname == soname:
172 + validLibraries.add(cachedKey)
173 + if debug and cachedKey not in \
174 + map(self._obj_key_cache.get, libraries):
175 + print "Found provider outside of findProviders:", \
176 + os.path.join(directory, soname), "->", \
177 + cachedKey
178 + # A valid library has been found, so there is no need to
179 + # continue.
180 + break
181 + if debug and cachedKey in self._obj_properties:
182 + print "Broken symlink or missing/bad soname:", \
183 + os.path.join(directory, soname), '->', \
184 + cachedKey, "with soname", cachedSoname, \
185 + "but expecting", soname
186 + # This conditional checks if there are no libraries to satisfy the
187 + # soname (empty set).
188 + if not validLibraries:
189 + for obj in self._obj_properties[obj_key][4]:
190 + rValue.setdefault(obj, set()).add(soname)
191 + # If no valid libraries have been found by this point, then
192 + # there are no files named with the soname within obj's runpath,
193 + # but if there are libraries (from the providers mapping), it is
194 + # likely that symlinks or the actual libraries are missing.
195 + # Thus possible symlinks and missing libraries are added to
196 + # rValue in order to emerge corrupt library packages.
197 + for lib in libraries:
198 + cachedSoname, cachedKey, cachedExists = cache.get(lib)
199 + if not cachedExists:
200 + # The library's package needs to be emerged to repair the
201 + # missing library.
202 + rValue.setdefault(lib, set()).add(soname)
203 + else:
204 + # A library providing the soname exists in the obj's
205 + # runpath, but no file named as the soname exists, so add
206 + # the path constructed from the lib's directory and the
207 + # soname to rValue to fix cases of vanishing (or modified)
208 + # symlinks. This path is not guaranteed to exist, but it
209 + # follows the symlink convention found in the majority of
210 + # packages.
211 + rValue.setdefault(os.path.join(os.path.dirname(lib), \
212 + soname), set()).add(soname)
213 + if debug:
214 + if not cachedExists:
215 + print "Missing library:", lib
216 + else:
217 + print "Possibly missing symlink:", \
218 + os.path.join(os.path.dirname(lib), soname)
220 + return rValue
222 + def listProviders(self):
223 + """
224 + Find the providers for all keys in LinkageMap.
226 + @rtype: dict (example:
227 + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
228 + @return: The return value is an object key -> providers mapping, where
229 + providers is a mapping of soname -> set-of-library-paths returned
230 + from the findProviders method.
232 + """
233 + rValue = {}
234 + if not self._libs:
235 + self.rebuild()
236 + # Iterate over all binaries within LinkageMap.
237 + for obj_key in self._obj_properties:
238 + # XXX remove this
239 + if len(self._obj_properties[obj_key][4]) != 1:
240 + writemsg(bold(red(self._obj_properties[obj_key])))
241 + rValue.setdefault(obj_key, self.findProviders(obj_key=obj_key))
242 + return rValue
244 def isMasterLink(self, obj):
245 basename = os.path.basename(obj)
246 - if obj not in self._obj_properties:
247 - obj = os.path.realpath(obj)
248 - if obj not in self._obj_properties:
249 - raise KeyError("%s not in object list" % obj)
250 - soname = self._obj_properties[obj][3]
251 + obj_key = self._generateObjKey(obj)
252 + if obj_key not in self._obj_properties:
253 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
254 + soname = self._obj_properties[obj_key][3]
255 return (len(basename) < len(soname))
258 def listLibraryObjects(self):
259 rValue = []
260 if not self._libs:
261 self.rebuild()
262 for soname in self._libs:
263 for arch in self._libs[soname]:
264 - rValue.extend(self._libs[soname][arch]["providers"])
265 + for obj_key in self._libs[soname][arch]["providers"]:
266 + rValue.extend(self._obj_properties[obj_key][4])
267 + # XXX Should be done. need testing
268 + #rValue.extend(self._libs[soname][arch]["providers"])
269 return rValue
271 def getSoname(self, obj):
272 if not self._libs:
273 self.rebuild()
274 - if obj not in self._obj_properties:
275 - obj = realpath(obj)
276 - if obj not in self._obj_properties:
277 - raise KeyError("%s not in object list" % obj)
278 - arch, needed, path, soname = self._obj_properties[obj]
279 - return soname
280 + if obj not in self._obj_key_cache:
281 + raise KeyError("%s not in object list" % obj)
282 + return self._obj_properties[self._obj_key_cache[obj]][3]
284 + def findProviders(self, obj=None, obj_key=None):
285 + """
286 + Find providers for an object or object key.
288 + This method should be called with either an obj or obj_key. If called
289 + with both, the obj_key is ignored. If called with neither, KeyError is
290 + raised as if an invalid obj was passed.
292 + @param obj:
293 + @type obj:
294 + @param obj_key:
295 + @type obj_key:
296 + @rtype:
297 + @return:
299 + """
300 + rValue = {}
302 - def findProviders(self, obj):
303 if not self._libs:
304 self.rebuild()
306 - realpath_cache = {}
307 - def realpath(p):
308 - real_path = realpath_cache.get(p)
309 - if real_path is None:
310 - real_path = os.path.realpath(p)
311 - realpath_cache[p] = real_path
312 - return real_path
313 + if obj is not None:
314 + obj_key = self._obj_key_cache.get(obj)
315 + if obj_key not in self._obj_properties:
316 + obj_key = self._generateObjKey(obj)
317 + if obj_key not in self._obj_properties:
318 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
319 + elif obj_key not in self._obj_properties:
320 + raise KeyError("%s not in object list" % obj_key)
322 +# realpath_cache = {}
323 +# def realpath(p):
324 +# real_path = realpath_cache.get(p)
325 +# if real_path is None:
326 +# real_path = os.path.realpath(p)
327 +# realpath_cache[p] = real_path
328 +# return real_path
330 - rValue = {}
331 - if obj not in self._obj_properties:
332 - obj = realpath(obj)
333 - if obj not in self._obj_properties:
334 - raise KeyError("%s not in object list" % obj)
335 - arch, needed, path, soname = self._obj_properties[obj]
336 + arch, needed, path, soname, objs = self._obj_properties[obj_key]
337 path = path[:]
338 path.extend(self._defpath)
339 - path = set(realpath(x) for x in path)
340 + # XXX Why?
341 + # path = set(realpath(x) for x in path)
342 for x in needed:
343 rValue[x] = set()
344 if x not in self._libs or arch not in self._libs[x]:
345 continue
346 for y in self._libs[x][arch]["providers"]:
347 - if x[0] == os.sep and realpath(x) == realpath(y):
348 - rValue[x].add(y)
349 - elif realpath(os.path.dirname(y)) in path:
350 - rValue[x].add(y)
351 + objs = self._obj_properties[y][4]
352 + # XXX x is an soname, so it should never start with os.sep, right?
353 + #if x[0] == os.sep and realpath(x) == realpath(y):
354 + # rValue[x].add(y)
355 + for o in objs:
356 + if os.path.dirname(o) in path:
357 + rValue[x].add(o)
359 return rValue
361 - def findConsumers(self, obj):
363 + def findConsumers(self, obj=None, obj_key=None):
364 + """
365 + Find consumers of an object or object key.
367 + This method should be called with either an obj or obj_key. If called
368 + with both, the obj_key is ignored. If called with neither, KeyError is
369 + raised as if an invalid obj was passed.
371 + @param obj:
372 + @type obj:
373 + @param obj_key:
374 + @type obj_key:
375 + @rtype:
376 + @return:
378 + """
379 + rValue = set()
381 if not self._libs:
382 self.rebuild()
384 - realpath_cache = {}
385 - def realpath(p):
386 - real_path = realpath_cache.get(p)
387 - if real_path is None:
388 - real_path = os.path.realpath(p)
389 - realpath_cache[p] = real_path
390 - return real_path
392 - if obj not in self._obj_properties:
393 - obj = realpath(obj)
394 - if obj not in self._obj_properties:
395 - raise KeyError("%s not in object list" % obj)
396 +# realpath_cache = {}
397 +# def realpath(p):
398 +# real_path = realpath_cache.get(p)
399 +# if real_path is None:
400 +# real_path = os.path.realpath(p)
401 +# realpath_cache[p] = real_path
402 +# return real_path
404 + if obj is not None:
405 + a = set([obj])
406 + obj_key = self._obj_key_cache.get(obj)
407 + if obj_key not in self._obj_properties:
408 + obj_key = self._generateObjKey(obj)
409 + if obj_key not in self._obj_properties:
410 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
411 + else:
412 + if obj_key not in self._obj_properties:
413 + raise KeyError("%s not in object list" % obj_key)
414 + a = self._obj_properties[obj_key][4]
416 + b = set()
417 + for x in a.copy():
418 + b.add((os.path.dirname(x), x))
420 # If there is another version of this lib with the
421 # same soname and the master link points to that
422 # other version, this lib will be shadowed and won't
423 # have any consumers.
424 - arch, needed, path, soname = self._obj_properties[obj]
425 - obj_dir = os.path.dirname(obj)
426 - master_link = os.path.join(obj_dir, soname)
427 - try:
428 - master_st = os.stat(master_link)
429 - obj_st = os.stat(obj)
430 - except OSError:
431 - pass
432 - else:
433 - if (obj_st.st_dev, obj_st.st_ino) != \
434 - (master_st.st_dev, master_st.st_ino):
435 - return set()
436 + # Only necessary if given obj, which may be a symlink.
437 + if obj:
438 + arch, needed, path, soname, objs = self._obj_properties[obj_key]
439 + obj_dir = os.path.dirname(obj)
440 + master_link = os.path.join(obj_dir, soname)
441 + try:
442 + master_st = os.stat(master_link)
443 + obj_st = os.stat(obj)
444 + except OSError:
445 + pass
446 + else:
447 + if (obj_st.st_dev, obj_st.st_ino) != \
448 + (master_st.st_dev, master_st.st_ino):
449 + return set()
451 - rValue = set()
452 for soname in self._libs:
453 for arch in self._libs[soname]:
454 - if obj in self._libs[soname][arch]["providers"]:
455 + # XXX
456 + if obj_key in self._libs[soname][arch]["providers"]:
457 for x in self._libs[soname][arch]["consumers"]:
458 - path = self._obj_properties[x][2]
459 - path = [realpath(y) for y in path+self._defpath]
460 - if soname[0] == os.sep and realpath(soname) == realpath(obj):
461 - rValue.add(x)
462 - elif realpath(obj_dir) in path:
463 - rValue.add(x)
464 + _, _, path, _, objs = self._obj_properties[x]
465 + # XXX necessary?
466 + #path = [realpath(y) for y in path+self._defpath]
467 + path = path + self._defpath
468 + # XXX x is an soname, so it should never start with os.sep,
469 + # right?
470 + #if soname[0] == os.sep and realpath(soname) == realpath(obj):
471 + # rValue.add(x)
472 + #if realpath(obj_dir) in path:
473 + # rValue.add(x)
474 + for y in objs:
475 + for z in b:
476 + if z[0] in path:
477 + rValue.add(y)
478 return rValue
481 class vardbapi(dbapi):
483 _excluded_dirs = ["CVS", "lost+found"]
484 @@ -1667,11 +1928,14 @@
485 writemsg("!!! FAILED postrm: %s\n" % retval, noiselevel=-1)
486 return retval
488 - # regenerate reverse NEEDED map
489 + # regenerate reverse NEEDED map before removing files for the purpose
490 + # of finding preserved libraries that have no consumers. The
491 + # libraries found will be removed after unmerging files.
492 self.vartree.dbapi.linkmap.rebuild()
494 - # remove preserved libraries that don't have any consumers left
495 - # FIXME: this code is quite ugly and can likely be optimized in several ways
496 + # Find preserved libraries that don't have any consumers left.
497 + # FIXME: This code is quite ugly and can likely be optimized in
498 + # several ways
499 plib_dict = plib_registry.getPreservedLibs()
500 for cpv in plib_dict:
501 plib_dict[cpv].sort()
502 @@ -1707,6 +1971,7 @@
503 break
504 if not keep:
505 unlink_list.append(f)
506 + # Remove preserved libraries that don't have any consumers left.
507 for obj in unlink_list:
508 try:
509 if os.path.islink(obj):
510 @@ -1720,6 +1985,7 @@
511 pass
512 else:
513 raise e
515 plib_registry.pruneNonExisting()
517 finally: