3 # Directory integrity scanner.
9 from os
.path
import join
11 from cPickle
import dump
, load
18 """Root of directory generator"""
19 topstat
= os
.lstat(top
)
20 for x
in walker(top
, '.', topstat
):
23 def walker(path
, name
, dirstat
):
24 """Directory tree generator.
26 At one point, this started as a copy of os.walk from Python's
27 library. Even the arguments are different now.
31 names
= os
.listdir(path
)
33 sys
.stderr
.write("Warning, can't read dir: %s\n" % path
)
36 # The verification algorithm requires the names to be sorted.
39 # Stat each name found, and put the result in one of two lists.
40 dirs
, nondirs
= [], []
42 if path
== '.' and (onename
== "0sure.dat.gz" or
43 onename
== "0sure.bak.gz" or
44 onename
== "0sure.0.gz"):
46 st
= os
.lstat(join(path
, onename
))
47 if S_ISDIR(st
.st_mode
):
48 dirs
.append((onename
, st
))
50 nondirs
.append((onename
, st
))
52 # Indicate "entering" the directory.
53 yield 'd', name
, convert_stat(dirstat
)
55 # Then recursively walk into all of the subdirectories.
56 for (onename
, st
) in dirs
:
57 subpath
= join(path
, onename
)
58 if st
.st_dev
== dirstat
.st_dev
:
59 for x
in walker(subpath
, onename
, st
):
62 # Then yield each entry that is not a subdirectory.
63 for (onename
, st
) in nondirs
:
64 yield '-', onename
, convert_stat(st
)
66 # Last, yield the leaving.
69 # Convert the passed stat info into an association of the information
70 # itself. Does not do anything that requires reading the file (such
71 # as readlink or md5).
73 if S_ISDIR(st
.st_mode
):
74 return { 'kind': 'dir',
77 'perm': S_IMODE(st
.st_mode
) }
79 elif S_ISREG(st
.st_mode
):
80 return { 'kind': 'file',
86 'perm': S_IMODE(st
.st_mode
) }
88 elif S_ISLNK(st
.st_mode
):
89 return { 'kind': 'lnk' }
92 return { 'kind': 'sock',
95 'perm': S_IMODE(st
.st_mode
) }
98 return { 'kind': 'fifo',
101 'perm': S_IMODE(st
.st_mode
) }
104 return { 'kind': 'blk',
107 'devmaj': os
.major(st
.st_rdev
),
108 'devmin': os
.minor(st
.st_rdev
),
109 'perm': S_IMODE(st
.st_mode
) }
112 return { 'kind': 'chr',
115 'devmaj': os
.major(st
.st_rdev
),
116 'devmin': os
.minor(st
.st_rdev
),
117 'perm': S_IMODE(st
.st_mode
) }
120 raise "Unknown file kind"
123 """Make an empty tree. No meaningful attributes for the root
129 def empty_generator():
133 mode_add
, mode_delete
, mode_both
= (1, 2, 3)
136 """Class for comparing two directory iterations. Keeps track of
137 state, and allows child classes to define handlers for the various
138 types of differences found."""
140 def __init__(self
, left
, right
):
144 # Default handlers for the 6 possible changes (or not changes)
145 # that can happen in a directory. The adds and deletes take an
146 # additional argument that will be set to true if this added or
147 # remoted entity is contained in an entirely new directory. Some
148 # handlers may want to avoid printing verbose messages for the
149 # contents of added or deleted directories, and can use this
151 def handle_same_dir(self
, path
, a
, b
):
152 #print "same_dir(%s, %s, %s)" % (path, a, b)
153 return empty_generator()
154 def handle_delete_dir(self
, path
, a
, recursing
):
155 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
156 return empty_generator()
157 def handle_add_dir(self
, path
, a
, recursing
):
158 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
159 return empty_generator()
160 def handle_same_nondir(self
, path
, a
, b
):
161 #print "same_nondir(%s, %s, %s)" % (path, a, b)
162 return empty_generator()
163 def handle_delete_nondir(self
, path
, a
, recursing
):
164 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
165 return empty_generator()
166 def handle_add_nondir(self
, path
, a
, recursing
):
167 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
168 return empty_generator()
169 def handle_leave(self
, path
, mode
):
170 """Handle the leaving of a directory. Instead of 'recursing',
171 the mode is defined as 'mode_add' (1) for add, 'mode_delete'
172 (2) for delete, or these two or'd together 'mode_both' (3) for
174 return empty_generator()
177 a
= self
.__left
.next()
179 raise "Scan doesn't start with a directory"
180 b
= self
.__right
.next()
182 raise "Tree walk doesn't start with a directory"
183 for x
in self
.handle_same_dir(".", a
, b
):
185 for x
in self
.__run
(b
[1], 1):
188 def __run(self
, path
, depth
):
189 """Iterate both pairs of directories equally
191 Processes the contents of a single directory, recursively
192 calling itself to handle child directories. Returns with both
193 iterators advanced past the 'u' node that ends the dir."""
194 # print "run(%d): '%s'" % (depth, path)
195 a
= self
.__left
.next()
196 b
= self
.__right
.next()
199 # print "Comparing (%d) %s and %s" % (depth, a, b)
200 if a
[0] == 'u' and b
[0] == 'u':
201 # Both are leaving the directory.
202 # print "leave(%d): '%s'" % (depth, path)
203 for x
in self
.handle_leave(path
, mode_both
):
207 elif a
[0] == 'd' and b
[0] == 'd':
208 # Both looking at a directory entry.
211 # if the name is the same, walk the tree.
212 for x
in self
.handle_same_dir(path
, a
, b
):
214 for x
in self
.__run
(os
.path
.join(path
, a
[1]), depth
+ 1):
216 a
= self
.__left
.next()
217 b
= self
.__right
.next()
221 # A directory has been deleted.
222 for x
in self
.handle_delete_dir(path
, a
, False):
224 for x
in self
.delete_whole_dir(self
.__left
,
225 os
.path
.join(path
, a
[1])):
227 a
= self
.__left
.next()
231 # A directory has been added.
232 for x
in self
.handle_add_dir(path
, b
, False):
235 for x
in self
.add_whole_dir(self
.__right
,
236 os
.path
.join(path
, b
[1])):
238 b
= self
.__right
.next()
241 elif a
[0] == '-' and b
[0] == '-':
242 # Both are looking at a non-dir.
246 for x
in self
.handle_same_nondir(path
, a
, b
):
248 a
= self
.__left
.next()
249 b
= self
.__right
.next()
254 for x
in self
.handle_delete_nondir(path
, a
, False):
256 a
= self
.__left
.next()
261 for x
in self
.handle_add_nondir(path
, b
, False):
263 b
= self
.__right
.next()
266 elif a
[0] == '-' and b
[0] == 'u':
267 for x
in self
.handle_delete_nondir(path
, a
, False):
269 a
= self
.__left
.next()
272 elif a
[0] == 'u' and b
[0] == '-':
273 for x
in self
.handle_add_nondir(path
, b
, False):
275 b
= self
.__right
.next()
278 elif a
[0] == 'd' and (b
[0] == '-' or b
[0] == 'u'):
279 for x
in self
.handle_delete_dir(path
, a
, False):
281 for x
in self
.delete_whole_dir(self
.__left
,
282 os
.path
.join(path
, a
[1])):
284 a
= self
.__left
.next()
287 elif (a
[0] == '-' or a
[0] == 'u') and b
[0] == 'd':
288 for x
in self
.handle_add_dir(path
, b
, False):
290 for x
in self
.add_whole_dir(self
.__right
,
291 os
.path
.join(path
, b
[1])):
293 b
= self
.__right
.next()
297 print "Unhandled case: '%s' and '%s'" % (a
[0], b
[0])
300 def add_whole_dir(self
, iter, path
):
301 "Consume entries until this directory has been added"
302 # print "add_whole_dir: %s" % path
306 for x
in self
.handle_leave(path
, mode_add
):
310 for x
in self
.handle_add_dir(path
, a
, True):
312 for x
in self
.add_whole_dir(iter, os
.path
.join(path
, a
[1])):
315 for x
in self
.handle_add_nondir(path
, a
, True):
318 def delete_whole_dir(self
, iter, path
):
319 "Consume entries until this directory has been deleted"
320 # print "delete_whole_dir: %s" % path
324 for x
in self
.handle_leave(path
, mode_delete
):
328 for x
in self
.handle_delete_dir(path
, a
, True):
330 for x
in self
.delete_whole_dir(iter, os
.path
.join(path
, a
[1])):
333 for x
in self
.handle_delete_nondir(path
, a
, True):
337 'dir': ['uid', 'gid', 'perm'],
338 'file': ['uid', 'gid', 'mtime', 'perm', 'md5'],
340 'sock': ['uid', 'gid', 'perm'],
341 'fifo': ['uid', 'gid', 'perm'],
342 'blk': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
343 'chr': ['uid', 'gid', 'perm', 'devmaj', 'devmin'],
345 def compare_entries(path
, a
, b
):
346 if a
['kind'] != b
['kind']:
347 yield "- %-20s %s" % (a
['kind'], path
)
348 yield "+ %-20s %s" % (b
['kind'], path
)
351 for item
in __must_match
[a
['kind']]:
352 if not (a
.has_key(item
) and b
.has_key(item
)):
354 elif a
[item
] != b
[item
]:
355 # Python 2.5 stat is returning subseconds, which tar
356 # doesn't backup. We can check this later, but for now,
357 # just ignore the subsecond portions
358 if (item
== 'mtime' and
359 math
.floor(a
[item
]) == math
.floor(b
[item
])):
364 yield " [%-18s] %s" % (",".join(misses
), path
)
366 if a
.has_key('targ'):
367 yield " old targ: %s" % a
['targ']
368 if b
.has_key('targ'):
369 yield " new targ: %s" % b
['targ']
372 class check_comparer(comparer
):
373 """Comparer for comparing either two trees, or a tree and a
374 filesystem. 'right' should be the newer tree.
375 Yields strings giving the tree differences.
377 def handle_same_dir(self
, path
, a
, b
):
378 return compare_entries(os
.path
.join(path
, a
[1]), a
[2], b
[2])
380 def handle_delete_dir(self
, path
, a
, recursing
):
384 yield "- %-20s %s" % ('dir', os
.path
.join(path
, a
[1]))
385 def handle_add_dir(self
, path
, a
, recursing
):
389 yield "+ %-20s %s" % ('dir', os
.path
.join(path
, a
[1]))
390 def handle_same_nondir(self
, path
, a
, b
):
391 return compare_entries(os
.path
.join(path
, a
[1]), a
[2], b
[2])
393 def handle_delete_nondir(self
, path
, a
, recursing
):
397 yield "- %-20s %s" % (a
[2]['kind'], os
.path
.join(path
, a
[1]))
398 def handle_add_nondir(self
, path
, a
, recursing
):
402 yield "+ %-20s %s" % (a
[2]['kind'], os
.path
.join(path
, a
[1]))
404 def update_link(assoc
, path
, name
):
405 if assoc
['kind'] == 'lnk':
406 assoc
['targ'] = os
.readlink(os
.path
.join(path
, name
))
408 def same_inode(a
, b
):
409 """Do these two nodes reference what appears to be the same,
411 return (a
['kind'] == b
['kind'] and
412 a
['ino'] == b
['ino'] and
413 a
['ctime'] == b
['ctime'])
415 class update_comparer(comparer
):
416 """Yields a tree equivalent to the right tree, which should be
417 coming from a live filesystem. Fills in symlink destinations and
418 file md5sums (if possible)."""
420 def handle_same_dir(self
, path
, a
, b
):
424 def handle_add_dir(self
, path
, a
, recursing
):
428 def handle_same_nondir(self
, path
, a
, b
):
429 update_link(b
[2], path
, b
[1])
430 if b
[2]['kind'] == 'file':
431 if same_inode(a
[2], b
[2]):
432 b
[2]['md5'] = a
[2]['md5']
435 b
[2]['md5'] = hashing
.hashof(os
.path
.join(path
, b
[1]))
437 b
[2]['md5'] = '[error]'
441 def handle_add_nondir(self
, path
, a
, recursing
):
442 update_link(a
[2], path
, a
[1])
443 if a
[2]['kind'] == 'file':
445 a
[2]['md5'] = hashing
.hashof(os
.path
.join(path
, a
[1]))
447 a
[2]['md5'] = '[error]'
451 def handle_leave(self
, path
, mode
):
452 if (mode
& mode_add
) != 0:
456 file_version
= 'Asure scan version 1.1'
468 for item
in load(fd
):
474 'Asure scan version 1.0': read1_0
,
475 'Asure scan version 1.1': read1_1
}
478 """Iterate over a previously written dump"""
479 fd
= gzip
.open(path
, 'rb')
481 if readers
.has_key(vers
):
482 for item
in readers
[vers
](fd
):
485 raise "Unsupported version of asure file"
490 def writer_new(path
, iter):
491 """Write the given item (probably assembled iterator)"""
492 fd
= gzip
.open(path
, 'wb')
493 dump(file_version
, fd
, use_protocol
)
497 if len(items
) >= 100:
498 dump(items
, fd
, use_protocol
)
501 dump(items
, fd
, use_protocol
)
504 def writer_old(path
, iter):
505 """Write the given item (probably assembled iterator)"""
506 fd
= gzip
.open(path
, 'wb')
507 dump('Asure scan version 1.0', fd
, use_protocol
)
509 dump(item
, fd
, use_protocol
)
512 def writer(path
, iter):
513 writer_new(path
, iter)
516 """Cycle through the names"""
518 os
.rename('0sure.dat.gz', '0sure.bak.gz')
521 os
.rename('0sure.0.gz', '0sure.dat.gz')
524 """Perform a fresh scan of the filesystem"""
525 tree
= update_comparer(empty_tree(), walk('.'))
526 writer('0sure.0.gz', tree
.run())
530 """Perform a scan of the filesystem, and compare it with the scan
531 file. reports differences."""
532 prior
= reader('0sure.dat.gz')
533 cur
= update_comparer(empty_tree(), walk('.')).run()
534 # compare_trees(prior, cur)
535 for x
in check_comparer(prior
, cur
).run():
539 """Scan filesystem, but also read the previous scan to cache md5
540 hashes of files that haven't had any inode changes"""
541 prior
= reader('0sure.dat.gz')
542 cur
= update_comparer(prior
, walk('.')).run()
543 writer('0sure.0.gz', cur
)
547 """Compare the previous scan with the current."""
548 prior
= reader('0sure.bak.gz')
549 cur
= reader('0sure.dat.gz')
550 for x
in check_comparer(prior
, cur
).run():
554 """Show the contents of the scan file"""
556 for i
in reader('0sure.dat.gz'):
559 print "%s%s" % (" " * indent
, i
)
564 """Just read the scan file, doing nothing with it"""
565 for i
in reader('0sure.dat.gz'):
569 """Copy the latest scan, can be used to update to a newer storage
571 writer('0sure.0.gz', reader('0sure.dat.gz'))
572 os
.rename('0sure.0.gz', '0sure.dat.gz')
586 if commands
.has_key(argv
[0]):
592 print "Asure, version %s" % version
.version
593 print "Usage: asure {%s}" % '|'.join(commands
.keys())
596 if __name__
== '__main__':