3 # Directory integrity scanner.
8 from os
.path
import join
10 from cPickle
import dump
, load
16 """Root of directory generator"""
17 topstat
= os
.lstat(top
)
18 for x
in walker(top
, '.', topstat
):
21 def walker(path
, name
, dirstat
):
22 """Directory tree generator.
24 At one point, this started as a copy of os.walk from Python's
25 library. Even the arguments are different now.
29 names
= os
.listdir(path
)
31 sys
.stderr
.write("Warning, can't read dir: %s\n" % path
)
34 # The verification algorithm requires the names to be sorted.
37 # Stat each name found, and put the result in one of two lists.
38 dirs
, nondirs
= [], []
40 if path
== '.' and (onename
== "0sure.dat.gz" or
41 onename
== "0sure.bak.gz" or
42 onename
== "0sure.0.gz"):
44 st
= os
.lstat(join(path
, onename
))
45 if S_ISDIR(st
.st_mode
):
46 dirs
.append((onename
, st
))
48 nondirs
.append((onename
, st
))
50 # Indicate "entering" the directory.
51 yield 'd', name
, convert_stat(dirstat
)
53 # Then recursively walk into all of the subdirectories.
54 for (onename
, st
) in dirs
:
55 subpath
= join(path
, onename
)
56 if st
.st_dev
== dirstat
.st_dev
:
57 for x
in walker(subpath
, onename
, st
):
60 # Then yield each entry that is not a subdirectory.
61 for (onename
, st
) in nondirs
:
62 yield '-', onename
, convert_stat(st
)
64 # Last, yield the leaving.
67 # Convert the passed stat info into an association of the information
68 # itself. Does not do anything that requires reading the file (such
69 # as readlink or md5).
71 if S_ISDIR(st
.st_mode
):
72 return { 'kind': 'dir',
75 'perm': S_IMODE(st
.st_mode
) }
77 elif S_ISREG(st
.st_mode
):
78 return { 'kind': 'file',
84 'perm': S_IMODE(st
.st_mode
) }
86 elif S_ISLNK(st
.st_mode
):
87 return { 'kind': 'lnk' }
90 return { 'kind': 'sock',
93 'perm': S_IMODE(st
.st_mode
) }
96 return { 'kind': 'fifo',
99 'perm': S_IMODE(st
.st_mode
) }
102 return { 'kind': 'blk',
105 'devmaj': os
.major(st
.st_rdev
),
106 'devmin': os
.minor(st
.st_rdev
),
107 'perm': S_IMODE(st
.st_mode
) }
110 return { 'kind': 'chr',
113 'devmaj': os
.major(st
.st_rdev
),
114 'devmin': os
.minor(st
.st_rdev
),
115 'perm': S_IMODE(st
.st_mode
) }
118 raise "Unknown file kind"
121 """Make an empty tree. No meaningful attributes for the root
127 def empty_generator():
131 mode_add
, mode_delete
, mode_both
= (1, 2, 3)
134 """Class for comparing two directory iterations. Keeps track of
135 state, and allows child classes to define handlers for the various
136 types of differences found."""
138 def __init__(self
, left
, right
):
142 # Default handlers for the 6 possible changes (or not changes)
143 # that can happen in a directory. The adds and deletes take an
144 # additional argument that will be set to true if this added or
145 # remoted entity is contained in an entirely new directory. Some
146 # handlers may want to avoid printing verbose messages for the
147 # contents of added or deleted directories, and can use this
149 def handle_same_dir(self
, path
, a
, b
):
150 #print "same_dir(%s, %s, %s)" % (path, a, b)
151 return empty_generator()
152 def handle_delete_dir(self
, path
, a
, recursing
):
153 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
154 return empty_generator()
155 def handle_add_dir(self
, path
, a
, recursing
):
156 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
157 return empty_generator()
158 def handle_same_nondir(self
, path
, a
, b
):
159 #print "same_nondir(%s, %s, %s)" % (path, a, b)
160 return empty_generator()
161 def handle_delete_nondir(self
, path
, a
, recursing
):
162 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
163 return empty_generator()
164 def handle_add_nondir(self
, path
, a
, recursing
):
165 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
166 return empty_generator()
167 def handle_leave(self
, path
, mode
):
168 """Handle the leaving of a directory. Instead of 'recursing',
169 the mode is defined as 'mode_add' (1) for add, 'mode_delete'
170 (2) for delete, or these two or'd together 'mode_both' (3) for
172 return empty_generator()
175 a
= self
.__left
.next()
177 raise "Scan doesn't start with a directory"
178 b
= self
.__right
.next()
180 raise "Tree walk doesn't start with a directory"
181 for x
in self
.handle_same_dir(".", a
, b
):
183 for x
in self
.__run
(b
[1], 1):
186 def __run(self
, path
, depth
):
187 """Iterate both pairs of directories equally
189 Processes the contents of a single directory, recursively
190 calling itself to handle child directories. Returns with both
191 iterators advanced past the 'u' node that ends the dir."""
192 # print "run(%d): '%s'" % (depth, path)
193 a
= self
.__left
.next()
194 b
= self
.__right
.next()
197 # print "Comparing (%d) %s and %s" % (depth, a, b)
198 if a
[0] == 'u' and b
[0] == 'u':
199 # Both are leaving the directory.
200 # print "leave(%d): '%s'" % (depth, path)
201 for x
in self
.handle_leave(path
, mode_both
):
205 elif a
[0] == 'd' and b
[0] == 'd':
206 # Both looking at a directory entry.
209 # if the name is the same, walk the tree.
210 for x
in self
.handle_same_dir(path
, a
, b
):
212 for x
in self
.__run
(os
.path
.join(path
, a
[1]), depth
+ 1):
214 a
= self
.__left
.next()
215 b
= self
.__right
.next()
219 # A directory has been deleted.
220 for x
in self
.handle_delete_dir(path
, a
, False):
222 for x
in self
.delete_whole_dir(self
.__left
,
223 os
.path
.join(path
, a
[1])):
225 a
= self
.__left
.next()
229 # A directory has been added.
230 for x
in self
.handle_add_dir(path
, b
, False):
233 for x
in self
.add_whole_dir(self
.__right
,
234 os
.path
.join(path
, b
[1])):
236 b
= self
.__right
.next()
239 elif a
[0] == '-' and b
[0] == '-':
240 # Both are looking at a non-dir.
244 for x
in self
.handle_same_nondir(path
, a
, b
):
246 a
= self
.__left
.next()
247 b
= self
.__right
.next()
252 for x
in self
.handle_delete_nondir(path
, a
, False):
254 a
= self
.__left
.next()
259 for x
in self
.handle_add_nondir(path
, b
, False):
261 b
= self
.__right
.next()
264 elif a
[0] == '-' and b
[0] == 'u':
265 for x
in self
.handle_delete_nondir(path
, a
, False):
267 a
= self
.__left
.next()
270 elif a
[0] == 'u' and b
[0] == '-':
271 for x
in self
.handle_add_nondir(path
, b
, False):
273 b
= self
.__right
.next()
276 elif a
[0] == 'd' and (b
[0] == '-' or b
[0] == 'u'):
277 for x
in self
.handle_delete_dir(path
, a
, False):
279 for x
in self
.delete_whole_dir(self
.__left
,
280 os
.path
.join(path
, a
[1])):
282 a
= self
.__left
.next()
285 elif (a
[0] == '-' or a
[0] == 'u') and b
[0] == 'd':
286 for x
in self
.handle_add_dir(path
, b
, False):
288 for x
in self
.add_whole_dir(self
.__right
,
289 os
.path
.join(path
, b
[1])):
291 b
= self
.__right
.next()
295 print "Unhandled case: '%s' and '%s'" % (a
[0], b
[0])
298 def add_whole_dir(self
, iter, path
):
299 "Consume entries until this directory has been added"
300 # print "add_whole_dir: %s" % path
304 for x
in self
.handle_leave(path
, mode_add
):
308 for x
in self
.handle_add_dir(path
, a
, True):
310 for x
in self
.add_whole_dir(iter, os
.path
.join(path
, a
[1])):
313 for x
in self
.handle_add_nondir(path
, a
, True):
316 def delete_whole_dir(self
, iter, path
):
317 "Consume entries until this directory has been deleted"
318 # print "delete_whole_dir: %s" % path
322 for x
in self
.handle_leave(path
, mode_delete
):
326 for x
in self
.handle_delete_dir(path
, a
, True):
328 for x
in self
.delete_whole_dir(iter, os
.path
.join(path
, a
[1])):
331 for x
in self
.handle_delete_nondir(path
, a
, True):
335 'dir': ['uid', 'gid', 'perm'],
336 'file': ['uid', 'gid', 'mtime', 'perm', 'md5'],
338 'sock': ['uid', 'gid', 'perm'],
339 'fifo': ['uid', 'gid', 'perm'],
340 'blk': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
341 'chr': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
343 def compare_entries(path
, a
, b
):
344 if a
['kind'] != b
['kind']:
345 yield "- %-20s %s" % (a
['kind'], path
)
346 yield "+ %-20s %s" % (b
['kind'], path
)
349 for item
in __must_match
[a
['kind']]:
350 if not (a
.has_key(item
) and b
.has_key(item
)):
352 elif a
[item
] != b
[item
]:
355 yield " [%-18s] %s" % (",".join(misses
), path
)
357 if a
.has_key('targ'):
358 yield " old targ: %s" % a
['targ']
359 if b
.has_key('targ'):
360 yield " new targ: %s" % b
['targ']
363 class check_comparer(comparer
):
364 """Comparer for comparing either two trees, or a tree and a
365 filesystem. 'right' should be the newer tree.
366 Yields strings giving the tree differences.
368 def handle_same_dir(self
, path
, a
, b
):
369 return compare_entries(os
.path
.join(path
, a
[1]), a
[2], b
[2])
371 def handle_delete_dir(self
, path
, a
, recursing
):
375 yield "- %-20s %s" % ('dir', os
.path
.join(path
, a
[1]))
376 def handle_add_dir(self
, path
, a
, recursing
):
380 yield "+ %-20s %s" % ('dir', os
.path
.join(path
, a
[1]))
381 def handle_same_nondir(self
, path
, a
, b
):
382 return compare_entries(os
.path
.join(path
, a
[1]), a
[2], b
[2])
384 def handle_delete_nondir(self
, path
, a
, recursing
):
388 yield "- %-20s %s" % (a
[2]['kind'], os
.path
.join(path
, a
[1]))
389 def handle_add_nondir(self
, path
, a
, recursing
):
393 yield "+ %-20s %s" % (a
[2]['kind'], os
.path
.join(path
, a
[1]))
395 def update_link(assoc
, path
, name
):
396 if assoc
['kind'] == 'lnk':
397 assoc
['targ'] = os
.readlink(os
.path
.join(path
, name
))
399 def same_inode(a
, b
):
400 """Do these two nodes reference what appears to be the same,
402 return (a
['kind'] == b
['kind'] and
403 a
['ino'] == b
['ino'] and
404 a
['ctime'] == b
['ctime'])
406 class update_comparer(comparer
):
407 """Yields a tree equivalent to the right tree, which should be
408 coming from a live filesystem. Fills in symlink destinations and
409 file md5sums (if possible)."""
411 def handle_same_dir(self
, path
, a
, b
):
415 def handle_add_dir(self
, path
, a
, recursing
):
419 def handle_same_nondir(self
, path
, a
, b
):
420 update_link(b
[2], path
, b
[1])
421 if b
[2]['kind'] == 'file':
422 if same_inode(a
[2], b
[2]):
423 b
[2]['md5'] = a
[2]['md5']
426 b
[2]['md5'] = hashing
.hashof(os
.path
.join(path
, b
[1]))
428 b
[2]['md5'] = '[error]'
432 def handle_add_nondir(self
, path
, a
, recursing
):
433 update_link(a
[2], path
, a
[1])
434 if a
[2]['kind'] == 'file':
436 a
[2]['md5'] = hashing
.hashof(os
.path
.join(path
, a
[1]))
438 a
[2]['md5'] = '[error]'
442 def handle_leave(self
, path
, mode
):
443 if (mode
& mode_add
) != 0:
447 version
= 'Asure scan version 1.1'
459 for item
in load(fd
):
465 'Asure scan version 1.0': read1_0
,
466 'Asure scan version 1.1': read1_1
}
469 """Iterate over a previously written dump"""
470 fd
= gzip
.open(path
, 'rb')
472 if readers
.has_key(vers
):
473 for item
in readers
[vers
](fd
):
476 raise "Unsupported version of asure file"
481 def writer_new(path
, iter):
482 """Write the given item (probably assembled iterator)"""
483 fd
= gzip
.open(path
, 'wb')
484 dump(version
, fd
, use_protocol
)
488 if len(items
) >= 100:
489 dump(items
, fd
, use_protocol
)
492 dump(items
, fd
, use_protocol
)
495 def writer_old(path
, iter):
496 """Write the given item (probably assembled iterator)"""
497 fd
= gzip
.open(path
, 'wb')
498 dump('Asure scan version 1.0', fd
, use_protocol
)
500 dump(item
, fd
, use_protocol
)
503 def writer(path
, iter):
504 writer_new(path
, iter)
507 """Cycle through the names"""
509 os
.rename('0sure.dat.gz', '0sure.bak.gz')
512 os
.rename('0sure.0.gz', '0sure.dat.gz')
515 """Perform a fresh scan of the filesystem"""
516 tree
= update_comparer(empty_tree(), walk('.'))
517 writer('0sure.0.gz', tree
.run())
521 """Perform a scan of the filesystem, and compare it with the scan
522 file. reports differences."""
523 prior
= reader('0sure.dat.gz')
524 cur
= update_comparer(empty_tree(), walk('.')).run()
525 # compare_trees(prior, cur)
526 for x
in check_comparer(prior
, cur
).run():
530 """Scan filesystem, but also read the previous scan to cache md5
531 hashes of files that haven't had any inode changes"""
532 prior
= reader('0sure.dat.gz')
533 cur
= update_comparer(prior
, walk('.')).run()
534 writer('0sure.0.gz', cur
)
538 """Compare the previous scan with the current."""
539 prior
= reader('0sure.bak.gz')
540 cur
= reader('0sure.dat.gz')
541 for x
in check_comparer(prior
, cur
).run():
545 """Show the contents of the scan file"""
547 for i
in reader('0sure.dat.gz'):
550 print "%s%s" % (" " * indent
, i
)
555 """Just read the scan file, doing nothing with it"""
556 for i
in reader('0sure.dat.gz'):
560 """Copy the latest scan, can be used to update to a newer storage
562 writer('0sure.0.gz', reader('0sure.dat.gz'))
563 os
.rename('0sure.0.gz', '0sure.dat.gz')
577 if commands
.has_key(argv
[0]):
583 print "Usage: asure {%s}" % '|'.join(commands
.keys())
586 if __name__
== '__main__':