Write primitive tree comparison.
[asure.git] / asure.py
blob7aea9bacf8e83fde9604127ff065fb70eedc755a
1 #! /usr/bin/env python
3 # Directory integrity scanner.
5 from stat import *
6 import os
7 import sys
8 from os.path import join
10 from cPickle import dump, load
11 import gzip
13 def walk(top):
14 """Root of directory generator"""
15 topstat = os.lstat(top)
16 for x in walker(top, '.', topstat):
17 yield x
19 def walker(path, name, topstat):
20 """Directory tree generator.
22 At one point, this started as a copy of os.walk from Python's
23 library. Even the arguments are different now.
24 """
26 try:
27 names = os.listdir(path)
28 except OSError:
29 sys.stderr.write("Warning, can't read dir: %s\n" % path)
30 return
32 # The verification algorithm requires the names to be sorted.
33 names.sort()
35 # Stat each name found, and put the result in one of two lists.
36 dirs, nondirs = [], []
37 for onename in names:
38 st = os.lstat(join(path, onename))
39 if S_ISDIR(st.st_mode):
40 dirs.append((onename, st))
41 else:
42 nondirs.append((onename, st))
44 # Indicate "entering" the directory.
45 yield 'd', name
47 # Then recursively walk into all of the subdirectories.
48 for (onename, st) in dirs:
49 subpath = join(path, onename)
50 if st.st_dev == topstat.st_dev:
51 for x in walker(subpath, onename, topstat):
52 yield x
54 # Then yield each entry that is not a subdirectory.
55 for (onename, st) in nondirs:
56 yield '-', onename
58 # Last, yield the leaving.
59 yield ('u',)
61 def compare_trees(prior, cur):
62 """Compare two scanned trees."""
63 a = prior.next()
64 if a[0] != 'd':
65 raise "Scan doesn't start with a directory"
66 b = cur.next()
67 if b[0] != 'd':
68 raise "Tree walk doesn't start with a directory"
70 # We don't concern ourselves with whether the names are the same
71 # at this point.
72 comp_walk(prior, cur)
74 def comp_walk(prior, cur, depth=1):
75 """Inside directory.
77 Recursively walks both the "prior" and "cur" directories,
78 comparing the trees found inside. Returns when each has left this
79 directory."""
81 a = prior.next()
82 b = cur.next()
83 while True:
85 # print "Comparing (%d) %s and %s" % (depth, a, b)
87 # Both are 'leave' nodes.
88 if a[0] == 'u' and b[0] == 'u':
89 # print "...leave"
90 return
92 if a[0] == 'd' and b[0] == 'd':
93 # Both are looking at a child subdirectory.
95 if a[1] == b[1]:
96 # Same name, just walk this tree.
97 # print "...same dir, enter"
98 comp_walk(prior, cur, depth+1)
99 a = prior.next()
100 b = cur.next()
101 continue
103 elif a[1] < b[1]:
104 # A directory has been deleted.
105 print "Delete dir: %s" % a[1]
106 consume_dir(prior)
107 a = prior.next()
108 continue
110 else:
111 # A directory has been added.
112 print "Add dir: %s" % b[1]
113 consume_dir(cur)
114 b = cur.next()
115 continue
117 elif a[0] == '-' and b[0] == '-':
118 # Both are looking at a non-dir.
119 if a[1] == b[1]:
120 # Same name, all is well.
121 # print "...same file"
122 a = prior.next()
123 b = cur.next()
124 continue
126 elif a[1] < b[1]:
127 print "Delete nondir: %s" % a[1]
128 a = prior.next()
129 continue
131 else:
132 print "Add nondir: %s" % b[1]
133 b = cur.next()
134 continue
136 elif a[0] == '-' and b[0] == 'u':
137 print "Delete nondir: %s" % a[1]
138 a = prior.next()
139 continue
141 elif a[0] == 'u' and b[0] == '-':
142 print "New nondir: %s" % b[1]
143 b = cur.next()
144 continue
146 elif a[0] == 'd' and b[0] == '-':
147 print "Delete dir: %s" % a[1]
148 consume_dir(prior)
149 a = prior.next()
150 continue
152 elif (a[0] == '-' or a[0] == 'u') and b[0] == 'd':
153 print "Add dir: %s" % b[1]
154 consume_dir(cur)
155 b = cur.next()
156 continue
158 else:
159 print "Unhandled case: prior: %s, cur: %s" % (a[0], b[0])
160 sys.exit(2)
162 def consume_dir(iter):
163 """Consume entries until this directory has been exhausted"""
164 while True:
165 a = iter.next()
166 if a[0] == 'u':
167 return
168 elif a[0] == 'd':
169 consume_dir(iter)
171 version = 'Asure scan version 1.0'
173 def reader(path):
174 """Iterate over a previously written dump"""
175 fd = gzip.open(path, 'rb')
176 vers = load(fd)
177 if version != vers:
178 raise "incompatible version of asure file"
179 try:
180 while True:
181 yield load(fd)
182 except EOFError:
183 return
185 def writer(path, tmppath, iter):
186 """Write the given item (probably assembled iterator)"""
187 fd = gzip.open(tmppath, 'wb')
188 dump(version, fd, -1)
189 for item in iter:
190 dump(item, fd, -1)
191 fd.close
192 os.rename(tmppath, path)
194 def fresh_scan():
195 """Perform a fresh scan of the filesystem"""
196 writer('asure.dat.gz', 'asure.0.gz', walk('.'))
198 def check_scan():
199 """Perform a scan of the filesystem, and compare it with the scan
200 file. reports differences."""
201 prior = reader('asure.dat.gz')
202 cur = walk('.')
203 compare_trees(prior, cur)
205 def main(argv):
206 if len(argv) != 1:
207 usage()
208 if argv[0] == 'scan':
209 fresh_scan()
210 elif argv[0] == 'update':
211 print "Update"
212 elif argv[0] == 'check':
213 check_scan()
214 elif argv[0] == 'show':
215 for i in reader('asure.dat.gz'):
216 print i
218 def usage():
219 print "Usage: asure {scan|update|check}"
220 sys.exit(1)
222 if __name__ == '__main__':
223 "Test this"
224 main(sys.argv[1:])
225 #for info in walk('/home/davidb/wd/asure'):
226 #print info