[SyncFS] Build indexes from FileTracker entries on disk.
[chromium-blink-merge.git] / mojo / tools / pylib / transitive_hash.py
blob93e8dc4e75ed3f327163afa5b05e9e7a34f7ffa6
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import logging
6 import subprocess
7 import sys
9 from hashlib import sha256
10 from os.path import basename, realpath
12 _logging = logging.getLogger()
14 # Based on/taken from
15 # http://code.activestate.com/recipes/578231-probably-the-fastest-memoization-decorator-in-the-/
16 # (with cosmetic changes).
17 def _memoize(f):
18 """Memoization decorator for a function taking a single argument."""
19 class Memoize(dict):
20 def __missing__(self, key):
21 rv = self[key] = f(key)
22 return rv
23 return Memoize().__getitem__
25 @_memoize
26 def _file_hash(filename):
27 """Returns a string representing the hash of the given file."""
28 _logging.debug("Hashing %s ...", filename)
29 rv = subprocess.check_output(['sha256sum', '-b', filename]).split(None, 1)[0]
30 _logging.debug(" => %s", rv)
31 return rv
33 @_memoize
34 def _get_dependencies(filename):
35 """Returns a list of filenames for files that the given file depends on."""
36 _logging.debug("Getting dependencies for %s ...", filename)
37 lines = subprocess.check_output(['ldd', filename]).splitlines()
38 rv = []
39 for line in lines:
40 i = line.find('/')
41 if i < 0:
42 _logging.debug(" => no file found in line: %s", line)
43 continue
44 rv.append(line[i:].split(None, 1)[0])
45 _logging.debug(" => %s", rv)
46 return rv
48 def transitive_hash(filename):
49 """Returns a string that represents the "transitive" hash of the given
50 file. The transitive hash is a hash of the file and all the shared libraries
51 on which it depends (done in an order-independent way)."""
52 hashes = set()
53 to_hash = [filename]
54 while to_hash:
55 current_filename = realpath(to_hash.pop())
56 current_hash = _file_hash(current_filename)
57 if current_hash in hashes:
58 _logging.debug("Already seen %s (%s) ...", current_filename, current_hash)
59 continue
60 _logging.debug("Haven't seen %s (%s) ...", current_filename, current_hash)
61 hashes.add(current_hash)
62 to_hash.extend(_get_dependencies(current_filename))
63 return sha256('|'.join(sorted(hashes))).hexdigest()
65 def main(argv):
66 logging.basicConfig()
67 # Uncomment to debug:
68 # _logging.setLevel(logging.DEBUG)
70 if len(argv) < 2:
71 print """\
72 Usage: %s [file] ...
74 Prints the \"transitive\" hash of each (executable) file. The transitive
75 hash is a hash of the file and all the shared libraries on which it
76 depends (done in an order-independent way).""" % basename(argv[0])
77 return 0
79 rv = 0
80 for filename in argv[1:]:
81 try:
82 print transitive_hash(filename), filename
83 except:
84 print "ERROR", filename
85 rv = 1
86 return rv
88 if __name__ == '__main__':
89 sys.exit(main(sys.argv))