Convert tree comparer into a generator.
[asure.git] / asure.py
blob82298e425c5c7659c79e5482d7486915b917000e
1 #! /usr/bin/env python
3 # Directory integrity scanner.
5 from stat import *
6 import os
7 import sys
8 from os.path import join
10 from cPickle import dump, load
11 import gzip
13 def walk(top):
14 """Root of directory generator"""
15 topstat = os.lstat(top)
16 for x in walker(top, '.', topstat):
17 yield x
19 def walker(path, name, topstat):
20 """Directory tree generator.
22 At one point, this started as a copy of os.walk from Python's
23 library. Even the arguments are different now.
24 """
26 try:
27 names = os.listdir(path)
28 except OSError:
29 sys.stderr.write("Warning, can't read dir: %s\n" % path)
30 return
32 # The verification algorithm requires the names to be sorted.
33 names.sort()
35 # Stat each name found, and put the result in one of two lists.
36 dirs, nondirs = [], []
37 for onename in names:
38 if path == '.' and (onename == "0sure.dat.gz" or
39 onename == "0sure.bak.gz" or
40 onename == "0sure.0.gz"):
41 continue
42 st = os.lstat(join(path, onename))
43 if S_ISDIR(st.st_mode):
44 dirs.append((onename, st))
45 else:
46 nondirs.append((onename, st))
48 # Indicate "entering" the directory.
49 yield 'd', name
51 # Then recursively walk into all of the subdirectories.
52 for (onename, st) in dirs:
53 subpath = join(path, onename)
54 if st.st_dev == topstat.st_dev:
55 for x in walker(subpath, onename, topstat):
56 yield x
58 # Then yield each entry that is not a subdirectory.
59 for (onename, st) in nondirs:
60 yield '-', onename
62 # Last, yield the leaving.
63 yield ('u',)
65 def empty_generator():
66 return
67 yield ()
69 class comparer:
70 """Class for comparing two directory iterations. Keeps track of
71 state, and allows child classes to define handlers for the various
72 types of differences found."""
74 def __init__(self, left, right):
75 self.__left = left
76 self.__right = right
78 # Default handlers for the 6 possible changes (or not changes)
79 # that can happen in a directory. The adds and deletes take an
80 # additional argument that will be set to true if this added or
81 # remoted entity is contained in an entirely new directory. Some
82 # handlers may want to avoid printing verbose messages for the
83 # contents of added or deleted directories, and can use this
84 # value.
85 def handle_same_dir(self, path, a, b):
86 #print "same_dir(%s, %s, %s)" % (path, a, b)
87 return empty_generator()
88 def handle_delete_dir(self, path, a, recursing):
89 #print "delete_dir(%s, %s, %s)" % (path, a, recursing)
90 return empty_generator()
91 def handle_add_dir(self, path, a, recursing):
92 #print "add_dir(%s, %s, %s)" % (path, a, recursing)
93 return empty_generator()
94 def handle_same_nondir(self, path, a, b):
95 #print "same_nondir(%s, %s, %s)" % (path, a, b)
96 return empty_generator()
97 def handle_delete_nondir(self, path, a, recursing):
98 #print "delete_nondir(%s, %s, %s)" % (path, a, recursing)
99 return empty_generator()
100 def handle_add_nondir(self, path, a, recursing):
101 #print "add_nondir(%s, %s, %s)" % (path, a, recursing)
102 return empty_generator()
104 def run(self):
105 a = self.__left.next()
106 if a[0] != 'd':
107 raise "Scan doesn't start with a directory"
108 b = self.__right.next()
109 if b[0] != 'd':
110 raise "Tree walk doesn't start with a directory"
111 return self.__run(b[1], 1)
113 def __run(self, path, depth):
114 """Iterate both pairs of directories equally
116 Processes the contents of a single directory, recursively
117 calling itself to handle child directories. Returns with both
118 iterators advanced past the 'u' node that ends the dir."""
119 # print "run(%d): '%s'" % (depth, path)
120 a = self.__left.next()
121 b = self.__right.next()
123 while True:
124 # print "Comparing (%d) %s and %s" % (depth, a, b)
125 if a[0] == 'u' and b[0] == 'u':
126 # Both are leaving the directory.
127 # print "leave(%d): '%s'" % (depth, path)
128 return
130 elif a[0] == 'd' and b[0] == 'd':
131 # Both looking at a directory entry.
133 if a[1] == b[1]:
134 # if the name is the same, walk the tree.
135 for x in self.handle_same_dir(path, a, b):
136 yield x
137 for x in self.__run(os.path.join(path, a[1]), depth + 1):
138 yield x
139 a = self.__left.next()
140 b = self.__right.next()
141 continue
143 elif a[1] < b[1]:
144 # A directory has been deleted.
145 for x in self.handle_delete_dir(path, a, False):
146 yield x
147 for x in self.delete_whole_dir(self.__left):
148 yield x
149 a = self.__left.next()
150 continue
152 else:
153 # A directory has been added.
154 for x in self.handle_add_dir(path, b, False):
155 yield x
157 for x in self.add_whole_dir(self.__right, path):
158 yield x
159 b = self.__right.next()
160 continue
162 elif a[0] == '-' and b[0] == '-':
163 # Both are looking at a non-dir.
165 if a[1] == b[1]:
166 # Same name as well.
167 for x in self.handle_same_nondir(path, a, b):
168 yield x
169 a = self.__left.next()
170 b = self.__right.next()
171 continue
173 elif a[1] < b[1]:
174 # Deleted non-dir.
175 for x in self.handle_delete_nondir(path, a, False):
176 yield x
177 a = self.__left.next()
178 continue
180 else:
181 # Added non-dir.
182 for x in self.handle_add_nondir(path, b, False):
183 yield x
184 b = self.__right.next()
185 continue
187 elif a[0] == '-' and b[0] == 'u':
188 for x in self.handle_delete_nondir(path, a, False):
189 yield x
190 a = self.__left.next()
191 continue
193 elif a[0] == 'u' and b[0] == '-':
194 for x in self.handle_add_nondir(path, b, False):
195 yield x
196 b = self.__right.next()
197 continue
199 elif a[0] == 'd' and b[0] == '-':
200 for x in self.handle_delete_dir(path, a, False):
201 yield x
202 for x in self.delete_whole_dir(self.__left, path):
203 yield x
204 a = self.__left.next()
205 continue
207 elif (a[0] == '-' or a[0] == 'u') and b[0] == 'd':
208 for x in self.handle_add_dir(path, b, False):
209 yield x
210 for x in self.add_whole_dir(self.__right, path):
211 yield x
212 b = self.__right.next()
213 continue
215 else:
216 print "Unhandled case!!!"
217 sys.exit(2)
219 def add_whole_dir(self, iter, path):
220 "Consume entries until this directory has been added"
221 # print "add_whole_dir: %s" % path
222 while True:
223 a = iter.next()
224 if a[0] == 'u':
225 return
226 elif a[0] == 'd':
227 for x in self.handle_add_dir(path, a, True):
228 yield x
229 for x in self.add_whole_dir(iter, os.path.join(path, a[1])):
230 yield x
231 else:
232 for x in self.handle_add_nondir(path, a, True):
233 yield x
235 def delete_whole_dir(self, iter, path):
236 "Consume entries until this directory has been deleted"
237 # print "delete_whole_dir: %s" % path
238 while True:
239 a = iter.next()
240 if a[0] == 'u':
241 return
242 elif a[0] == 'd':
243 for x in self.handle_delete_dir(path, a, True):
244 yield x
245 for x in self.delete_whole_dir(iter, os.path.join(path, a[1])):
246 yield x
247 else:
248 for x in self.handle_delete_nondir(path, a, True):
249 yield x
251 class check_comparer(comparer):
252 """Comparer for comparing either two trees, or a tree and a
253 filesystem. 'right' should be the newer tree.
254 Yields strings giving the tree differences.
256 def handle_same_dir(self, path, a, b):
257 #print "same_dir(%s, %s, %s)" % (path, a, b)
258 return empty_generator()
259 def handle_delete_dir(self, path, a, recursing):
260 if recursing:
261 return
262 else:
263 yield "- dir %s" % (os.path.join(path, a[1]))
264 def handle_add_dir(self, path, a, recursing):
265 if recursing:
266 return
267 else:
268 yield "+ dir %s" % (os.path.join(path, a[1]))
269 def handle_same_nondir(self, path, a, b):
270 #print "same_nondir(%s, %s, %s)" % (path, a, b)
271 return empty_generator()
272 def handle_delete_nondir(self, path, a, recursing):
273 if recursing:
274 return
275 else:
276 yield "- %s" % (os.path.join(path, a[1]))
277 def handle_add_nondir(self, path, a, recursing):
278 if recursing:
279 return
280 else:
281 yield "+ %s" % (os.path.join(path, a[1]))
283 version = 'Asure scan version 1.0'
285 def reader(path):
286 """Iterate over a previously written dump"""
287 fd = gzip.open(path, 'rb')
288 vers = load(fd)
289 if version != vers:
290 raise "incompatible version of asure file"
291 try:
292 while True:
293 yield load(fd)
294 except EOFError:
295 return
297 def writer(path, tmppath, iter):
298 """Write the given item (probably assembled iterator)"""
299 fd = gzip.open(tmppath, 'wb')
300 dump(version, fd, -1)
301 for item in iter:
302 dump(item, fd, -1)
303 fd.close
304 os.rename(tmppath, path)
306 def fresh_scan():
307 """Perform a fresh scan of the filesystem"""
308 writer('0sure.dat.gz', '0sure.0.gz', walk('.'))
310 def check_scan():
311 """Perform a scan of the filesystem, and compare it with the scan
312 file. reports differences."""
313 prior = reader('0sure.dat.gz')
314 cur = walk('.')
315 # compare_trees(prior, cur)
316 for x in check_comparer(prior, cur).run():
317 print x
319 def main(argv):
320 if len(argv) != 1:
321 usage()
322 if argv[0] == 'scan':
323 fresh_scan()
324 elif argv[0] == 'update':
325 print "Update"
326 elif argv[0] == 'check':
327 check_scan()
328 elif argv[0] == 'show':
329 for i in reader('0sure.dat.gz'):
330 print i
332 def usage():
333 print "Usage: asure {scan|update|check}"
334 sys.exit(1)
336 if __name__ == '__main__':
337 "Test this"
338 main(sys.argv[1:])