Compute and compare md5 hashes on new files.
[asure.git] / asure.py
blobe3f1c1da6da79465a1fba6e8e15cc3e7174443da
1 #! /usr/bin/env python
3 # Directory integrity scanner.
5 from stat import *
6 import os
7 import sys
8 from os.path import join
10 from cPickle import dump, load
11 import gzip
13 import hashing
15 def walk(top):
16 """Root of directory generator"""
17 topstat = os.lstat(top)
18 for x in walker(top, '.', topstat):
19 yield x
21 def walker(path, name, dirstat):
22 """Directory tree generator.
24 At one point, this started as a copy of os.walk from Python's
25 library. Even the arguments are different now.
26 """
28 try:
29 names = os.listdir(path)
30 except OSError:
31 sys.stderr.write("Warning, can't read dir: %s\n" % path)
32 return
34 # The verification algorithm requires the names to be sorted.
35 names.sort()
37 # Stat each name found, and put the result in one of two lists.
38 dirs, nondirs = [], []
39 for onename in names:
40 if path == '.' and (onename == "0sure.dat.gz" or
41 onename == "0sure.bak.gz" or
42 onename == "0sure.0.gz"):
43 continue
44 st = os.lstat(join(path, onename))
45 if S_ISDIR(st.st_mode):
46 dirs.append((onename, st))
47 else:
48 nondirs.append((onename, st))
50 # Indicate "entering" the directory.
51 yield 'd', name, convert_stat(dirstat)
53 # Then recursively walk into all of the subdirectories.
54 for (onename, st) in dirs:
55 subpath = join(path, onename)
56 if st.st_dev == dirstat.st_dev:
57 for x in walker(subpath, onename, st):
58 yield x
60 # Then yield each entry that is not a subdirectory.
61 for (onename, st) in nondirs:
62 yield '-', onename, convert_stat(st)
64 # Last, yield the leaving.
65 yield ('u',)
67 # Convert the passed stat info into an association of the information
68 # itself. Does not do anything that requires reading the file (such
69 # as readlink or md5).
70 def convert_stat(st):
71 if S_ISDIR(st.st_mode):
72 return { 'kind': 'dir',
73 'uid': st.st_uid,
74 'gid': st.st_gid,
75 'perm': S_IMODE(st.st_mode) }
77 elif S_ISREG(st.st_mode):
78 return { 'kind': 'file',
79 'uid': st.st_uid,
80 'gid': st.st_gid,
81 'mtime': st.st_mtime,
82 'ctime': st.st_ctime,
83 'ino': st.st_ino,
84 'perm': S_IMODE(st.st_mode) }
86 elif S_ISLNK(st.st_mode):
87 return { 'kind': 'lnk' }
89 elif S_ISSOCK:
90 return { 'kind': 'sock',
91 'uid': st.st_uid,
92 'gid': st.st_gid,
93 'perm': S_IMODE(st.st_mode) }
95 elif S_ISFIFO:
96 return { 'kind': 'fifo',
97 'uid': st.st_uid,
98 'gid': st.st_gid,
99 'perm': S_IMODE(st.st_mode) }
101 elif S_ISBLK:
102 return { 'kind': 'blk',
103 'uid': st.st_uid,
104 'gid': st.st_gid,
105 'devmaj': os.major(st.st_rdev),
106 'devmin': os.minor(st.st_rdev),
107 'perm': S_IMODE(st.st_mode) }
109 elif S_ISCHR:
110 return { 'kind': 'chr',
111 'uid': st.st_uid,
112 'gid': st.st_gid,
113 'devmaj': os.major(st.st_rdev),
114 'devmin': os.minor(st.st_rdev),
115 'perm': S_IMODE(st.st_mode) }
117 else:
118 raise "Unknown file kind"
120 def empty_tree():
121 """Make an empty tree. No meaningful attributes for the root
122 directory"""
123 yield 'd', '.', {}
124 yield 'u',
125 return
127 def empty_generator():
128 return
129 yield ()
131 class comparer:
132 """Class for comparing two directory iterations. Keeps track of
133 state, and allows child classes to define handlers for the various
134 types of differences found."""
136 def __init__(self, left, right):
137 self.__left = left
138 self.__right = right
140 # Default handlers for the 6 possible changes (or not changes)
141 # that can happen in a directory. The adds and deletes take an
142 # additional argument that will be set to true if this added or
143 # remoted entity is contained in an entirely new directory. Some
144 # handlers may want to avoid printing verbose messages for the
145 # contents of added or deleted directories, and can use this
146 # value.
147 def handle_same_dir(self, path, a, b):
148 #print "same_dir(%s, %s, %s)" % (path, a, b)
149 return empty_generator()
150 def handle_delete_dir(self, path, a, recursing):
151 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
152 return empty_generator()
153 def handle_add_dir(self, path, a, recursing):
154 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
155 return empty_generator()
156 def handle_same_nondir(self, path, a, b):
157 #print "same_nondir(%s, %s, %s)" % (path, a, b)
158 return empty_generator()
159 def handle_delete_nondir(self, path, a, recursing):
160 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
161 return empty_generator()
162 def handle_add_nondir(self, path, a, recursing):
163 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
164 return empty_generator()
165 def handle_leave(self, path, recursing):
166 return empty_generator()
168 def run(self):
169 a = self.__left.next()
170 if a[0] != 'd':
171 raise "Scan doesn't start with a directory"
172 b = self.__right.next()
173 if b[0] != 'd':
174 raise "Tree walk doesn't start with a directory"
175 for x in self.handle_same_dir(".", a, b):
176 yield x
177 for x in self.__run(b[1], 1):
178 yield x
180 def __run(self, path, depth):
181 """Iterate both pairs of directories equally
183 Processes the contents of a single directory, recursively
184 calling itself to handle child directories. Returns with both
185 iterators advanced past the 'u' node that ends the dir."""
186 # print "run(%d): '%s'" % (depth, path)
187 a = self.__left.next()
188 b = self.__right.next()
190 while True:
191 # print "Comparing (%d) %s and %s" % (depth, a, b)
192 if a[0] == 'u' and b[0] == 'u':
193 # Both are leaving the directory.
194 # print "leave(%d): '%s'" % (depth, path)
195 for x in self.handle_leave(path, False):
196 yield x
197 return
199 elif a[0] == 'd' and b[0] == 'd':
200 # Both looking at a directory entry.
202 if a[1] == b[1]:
203 # if the name is the same, walk the tree.
204 for x in self.handle_same_dir(path, a, b):
205 yield x
206 for x in self.__run(os.path.join(path, a[1]), depth + 1):
207 yield x
208 a = self.__left.next()
209 b = self.__right.next()
210 continue
212 elif a[1] < b[1]:
213 # A directory has been deleted.
214 for x in self.handle_delete_dir(path, a, False):
215 yield x
216 for x in self.delete_whole_dir(self.__left,
217 os.path.join(path, a[1])):
218 yield x
219 a = self.__left.next()
220 continue
222 else:
223 # A directory has been added.
224 for x in self.handle_add_dir(path, b, False):
225 yield x
227 for x in self.add_whole_dir(self.__right,
228 os.path.join(path, b[1])):
229 yield x
230 b = self.__right.next()
231 continue
233 elif a[0] == '-' and b[0] == '-':
234 # Both are looking at a non-dir.
236 if a[1] == b[1]:
237 # Same name as well.
238 for x in self.handle_same_nondir(path, a, b):
239 yield x
240 a = self.__left.next()
241 b = self.__right.next()
242 continue
244 elif a[1] < b[1]:
245 # Deleted non-dir.
246 for x in self.handle_delete_nondir(path, a, False):
247 yield x
248 a = self.__left.next()
249 continue
251 else:
252 # Added non-dir.
253 for x in self.handle_add_nondir(path, b, False):
254 yield x
255 b = self.__right.next()
256 continue
258 elif a[0] == '-' and b[0] == 'u':
259 for x in self.handle_delete_nondir(path, a, False):
260 yield x
261 a = self.__left.next()
262 continue
264 elif a[0] == 'u' and b[0] == '-':
265 for x in self.handle_add_nondir(path, b, False):
266 yield x
267 b = self.__right.next()
268 continue
270 elif a[0] == 'd' and (b[0] == '-' or b[0] == 'u'):
271 for x in self.handle_delete_dir(path, a, False):
272 yield x
273 for x in self.delete_whole_dir(self.__left,
274 os.path.join(path, a[1])):
275 yield x
276 a = self.__left.next()
277 continue
279 elif (a[0] == '-' or a[0] == 'u') and b[0] == 'd':
280 for x in self.handle_add_dir(path, b, False):
281 yield x
282 for x in self.add_whole_dir(self.__right,
283 os.path.join(path, b[1])):
284 yield x
285 b = self.__right.next()
286 continue
288 else:
289 print "Unhandled case: '%s' and '%s'" % (a[0], b[0])
290 sys.exit(2)
292 def add_whole_dir(self, iter, path):
293 "Consume entries until this directory has been added"
294 # print "add_whole_dir: %s" % path
295 while True:
296 a = iter.next()
297 if a[0] == 'u':
298 for x in self.handle_leave(path, True):
299 yield x
300 return
301 elif a[0] == 'd':
302 for x in self.handle_add_dir(path, a, True):
303 yield x
304 for x in self.add_whole_dir(iter, os.path.join(path, a[1])):
305 yield x
306 else:
307 for x in self.handle_add_nondir(path, a, True):
308 yield x
310 def delete_whole_dir(self, iter, path):
311 "Consume entries until this directory has been deleted"
312 # print "delete_whole_dir: %s" % path
313 while True:
314 a = iter.next()
315 if a[0] == 'u':
316 for x in self.handle_leave(path, True):
317 yield x
318 return
319 elif a[0] == 'd':
320 for x in self.handle_delete_dir(path, a, True):
321 yield x
322 for x in self.delete_whole_dir(iter, os.path.join(path, a[1])):
323 yield x
324 else:
325 for x in self.handle_delete_nondir(path, a, True):
326 yield x
328 __must_match = {
329 'dir': ['uid', 'gid', 'perm'],
330 'file': ['uid', 'gid', 'mtime', 'perm', 'md5'],
331 'lnk': ['targ'],
332 'sock': ['uid', 'gid', 'perm'],
333 'fifo': ['uid', 'gid', 'perm'],
334 'blk': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
335 'chr': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
337 def compare_entries(path, a, b):
338 if a['kind'] != b['kind']:
339 yield "- %-4s %s" % (a['kind'], path)
340 yield "+ %-4s %s" % (b['kind'], path)
341 return
342 misses = []
343 for item in __must_match[a['kind']]:
344 if not (a.has_key(item) and b.has_key(item)):
345 misses.append(item)
346 elif a[item] != b[item]:
347 misses.append(item)
348 if misses:
349 yield "[%s] %s" % (",".join(misses), path)
350 if 'targ' in misses:
351 if a.has_key('targ'):
352 yield " old targ: %s" % a['targ']
353 if b.has_key('targ'):
354 yield " new targ: %s" % b['targ']
355 return
357 class check_comparer(comparer):
358 """Comparer for comparing either two trees, or a tree and a
359 filesystem. 'right' should be the newer tree.
360 Yields strings giving the tree differences.
362 def handle_same_dir(self, path, a, b):
363 return compare_entries(os.path.join(path, a[1]), a[2], b[2])
365 def handle_delete_dir(self, path, a, recursing):
366 if recursing:
367 return
368 else:
369 yield "- dir %s" % (os.path.join(path, a[1]))
370 def handle_add_dir(self, path, a, recursing):
371 if recursing:
372 return
373 else:
374 yield "+ dir %s" % (os.path.join(path, a[1]))
375 def handle_same_nondir(self, path, a, b):
376 return compare_entries(os.path.join(path, a[1]), a[2], b[2])
378 def handle_delete_nondir(self, path, a, recursing):
379 if recursing:
380 return
381 else:
382 yield "- %s" % (os.path.join(path, a[1]))
383 def handle_add_nondir(self, path, a, recursing):
384 if recursing:
385 return
386 else:
387 yield "+ %s" % (os.path.join(path, a[1]))
389 def update_link(assoc, path, name):
390 if assoc['kind'] == 'lnk':
391 assoc['targ'] = os.readlink(os.path.join(path, name))
393 class update_comparer(comparer):
394 """Yields a tree equivalent to the right tree, which should be
395 coming from a live filesystem. Fills in symlink destinations and
396 file md5sums (if possible)."""
398 def handle_same_dir(self, path, a, b):
399 yield b
400 return
402 def handle_add_dir(self, path, a, recursing):
403 yield a
404 return
406 def handle_same_nondir(self, path, a, b):
407 update_link(b[2], path, b[1])
408 yield b
409 return
411 def handle_add_nondir(self, path, a, recursing):
412 update_link(a[2], path, a[1])
413 if a[2]['kind'] == 'file':
414 a[2]['md5'] = hashing.hashof(os.path.join(path, a[1]))
415 yield a
416 return
418 def handle_leave(self, path, recursing):
419 yield 'u',
420 return
422 version = 'Asure scan version 1.0'
424 def reader(path):
425 """Iterate over a previously written dump"""
426 fd = gzip.open(path, 'rb')
427 vers = load(fd)
428 if version != vers:
429 raise "incompatible version of asure file"
430 try:
431 while True:
432 yield load(fd)
433 except EOFError:
434 return
436 def writer(path, iter):
437 """Write the given item (probably assembled iterator)"""
438 fd = gzip.open(path, 'wb')
439 dump(version, fd, -1)
440 for item in iter:
441 # print item
442 dump(item, fd, -1)
443 fd.close
445 def fresh_scan():
446 """Perform a fresh scan of the filesystem"""
447 tree = update_comparer(empty_tree(), walk('.'))
448 writer('0sure.0.gz', tree.run())
449 try:
450 os.rename('0sure.dat.gz', '0sure.bak.gz')
451 except OSError:
452 pass
453 os.rename('0sure.0.gz', '0sure.dat.gz')
455 def check_scan():
456 """Perform a scan of the filesystem, and compare it with the scan
457 file. reports differences."""
458 prior = reader('0sure.dat.gz')
459 cur = update_comparer(empty_tree(), walk('.')).run()
460 # compare_trees(prior, cur)
461 for x in check_comparer(prior, cur).run():
462 print x
464 def signoff():
465 """Compare the previous scan with the current."""
466 prior = reader('0sure.bak.gz')
467 cur = reader('0sure.dat.gz')
468 for x in check_comparer(prior, cur).run():
469 print x
471 def main(argv):
472 if len(argv) != 1:
473 usage()
474 if argv[0] == 'scan':
475 fresh_scan()
476 elif argv[0] == 'update':
477 print "Update"
478 elif argv[0] == 'check':
479 check_scan()
480 elif argv[0] == 'signoff':
481 signoff()
482 elif argv[0] == 'show':
483 for i in reader('0sure.dat.gz'):
484 print i
486 def usage():
487 print "Usage: asure {scan|update|check}"
488 sys.exit(1)
490 if __name__ == '__main__':
491 "Test this"
492 main(sys.argv[1:])