Follow-up to r29036: Now that the "mergeinfo" transaction file is no
[svn.git] / tools / dev / scramble-tree.py
blob949c5eb453a46cb67916c9d4e57206bb97ecafdb
1 #!/usr/bin/env python
3 # scramble-tree.py: (See scramble-tree.py --help.)
5 # Makes multiple random file changes to a directory tree, for testing.
7 # This script will add some new files, remove some existing files, add
8 # text to some existing files, and delete text from some existing
9 # files. It will also leave some files completely untouched.
11 # The exact set of changes made is always the same for identical trees,
12 # where "identical" means the names of files and directories are the
13 # same, and they are arranged in the same tree structure (the actual
14 # contents of files may differ). If two are not identical, the sets of
15 # changes scramble-tree.py will make may differ arbitrarily.
17 # Directories named .svn/ and CVS/ are ignored.
19 # Example scenario, starting with a pristine Subversion working copy:
21 # $ ls
22 # foo/
23 # $ svn st foo
24 # $ cp -r foo bar
25 # $ svn st bar
26 # $ scramble-tree.py foo
27 # $ svn st foo
28 # [... see lots of scary status output ...]
29 # $ scramble-tree.py bar
30 # [... see the exact same scary status output ...]
31 # $ scramble-tree.py foo
32 # [... see a new bunch of scary status output ...]
33 # $
35 import os
36 import sys
37 import getopt
38 try:
39 my_getopt = getopt.gnu_getopt
40 except AttributeError:
41 my_getopt = getopt.getopt
42 import random
43 import md5
44 import base64
47 class VCActions:
48 def __init__(self):
49 pass
50 def add_file(self, path):
51 """Add an existing file to version control."""
52 pass
53 def remove_file(self, path):
54 """Remove an existing file from version control, and delete it."""
55 pass
58 class NoVCActions(VCActions):
59 def remove_file(self, path):
60 os.unlink(path)
63 class CVSActions(VCActions):
64 def add_file(self, path):
65 cwd = os.getcwd()
66 try:
67 dirname, basename = os.path.split(path)
68 os.chdir(os.path.join(cwd, dirname))
69 os.system('cvs -Q add -m "Adding file to repository" "%s"' % (basename))
70 finally:
71 os.chdir(cwd)
72 def remove_file(self, path):
73 cwd = os.getcwd()
74 try:
75 dirname, basename = os.path.split(path)
76 os.chdir(os.path.join(cwd, dirname))
77 os.system('cvs -Q rm -f "%s"' % (basename))
78 finally:
79 os.chdir(cwd)
82 class SVNActions(VCActions):
83 def add_file(self, path):
84 os.system('svn add --quiet "%s"' % (path))
85 def remove_file(self, path):
86 os.remove(path)
87 os.system('svn rm --quiet --force "%s"' % (path))
90 class hashDir:
91 """Given a directory, creates a string containing all directories
92 and files under that directory (sorted alphanumerically) and makes a
93 base64-encoded md5 hash of the resulting string. Call
94 hashDir.gen_seed() to generate a seed value for this tree."""
96 def __init__(self, rootdir):
97 self.allfiles = []
98 os.path.walk(rootdir, self.walker_callback, len(rootdir))
100 def gen_seed(self):
101 # Return a base64-encoded (kinda ... strip the '==\n' from the
102 # end) MD5 hash of sorted tree listing.
103 self.allfiles.sort()
104 return base64.encodestring(md5.md5(''.join(self.allfiles)).digest())[:-3]
106 def walker_callback(self, baselen, dirname, fnames):
107 if ((dirname == '.svn') or (dirname == 'CVS')):
108 return
109 self.allfiles.append(dirname[baselen:])
110 for filename in fnames:
111 path = os.path.join(dirname, filename)
112 if not os.path.isdir(path):
113 self.allfiles.append(path[baselen:])
116 class Scrambler:
117 def __init__(self, seed, vc_actions, dry_run, quiet):
118 if not quiet:
119 print 'SEED: ' + seed
121 self.rand = random.Random(seed)
122 self.vc_actions = vc_actions
123 self.dry_run = dry_run
124 self.quiet = quiet
125 self.ops = [] ### ["add" | "munge", path]
126 self.greeking = """
127 ======================================================================
128 This is some text that was inserted into this file by the lovely and
129 talented scramble-tree.py script.
130 ======================================================================
133 ### Helpers
134 def shrink_list(self, list, remove_count):
135 if len(list) <= remove_count:
136 return []
137 for i in range(remove_count):
138 j = self.rand.randrange(len(list) - 1)
139 del list[j]
140 return list
142 def _make_new_file(self, dir):
143 i = 0
144 path = None
145 for i in range(99999):
146 path = os.path.join(dir, "newfile.%05d.txt" % i)
147 if not os.path.exists(path):
148 open(path, 'w').write(self.greeking)
149 return path
150 raise Exception("Ran out of unique new filenames in directory '%s'" % dir)
152 ### File Mungers
153 def _mod_append_to_file(self, path):
154 if not self.quiet:
155 print 'append_to_file:', path
156 if self.dry_run:
157 return
158 fh = open(path, "a")
159 fh.write(self.greeking)
160 fh.close()
162 def _mod_remove_from_file(self, path):
163 if not self.quiet:
164 print 'remove_from_file:', path
165 if self.dry_run:
166 return
167 lines = self.shrink_list(open(path, "r").readlines(), 5)
168 open(path, "w").writelines(lines)
170 def _mod_delete_file(self, path):
171 if not self.quiet:
172 print 'delete_file:', path
173 if self.dry_run:
174 return
175 self.vc_actions.remove_file(path)
177 ### Public Interfaces
178 def get_randomizer(self):
179 return self.rand
181 def schedule_munge(self, path):
182 self.ops.append(tuple(["munge", path]))
184 def schedule_addition(self, dir):
185 self.ops.append(tuple(["add", dir]))
187 def enact(self, limit):
188 num_ops = len(self.ops)
189 if limit == 0:
190 return
191 elif limit > 0 and limit <= num_ops:
192 self.ops = self.shrink_list(self.ops, num_ops - limit)
193 for op, path in self.ops:
194 if op == "add":
195 path = self._make_new_file(path)
196 if not self.quiet:
197 print "add_file:", path
198 if self.dry_run:
199 return
200 self.vc_actions.add_file(path)
201 elif op == "munge":
202 file_mungers = [self._mod_append_to_file,
203 self._mod_append_to_file,
204 self._mod_append_to_file,
205 self._mod_remove_from_file,
206 self._mod_remove_from_file,
207 self._mod_remove_from_file,
208 self._mod_delete_file,
210 self.rand.choice(file_mungers)(path)
213 def usage(retcode=255):
214 print 'Usage: %s [OPTIONS] DIRECTORY' % (sys.argv[0])
215 print ''
216 print 'Options:'
217 print ' --help, -h : Show this usage message.'
218 print ' --seed ARG : Use seed ARG to scramble the tree.'
219 print ' --use-svn : Use Subversion (as "svn") to perform file additions'
220 print ' and removals.'
221 print ' --use-cvs : Use CVS (as "cvs") to perform file additions'
222 print ' and removals.'
223 print ' --dry-run : Don\'t actually change the disk.'
224 print ' --limit N : Limit the scrambling to a maximum of N operations.'
225 print ' --quiet, -q : Run in stealth mode!'
226 sys.exit(retcode)
229 def walker_callback(scrambler, dirname, fnames):
230 if ((dirname.find('.svn') != -1) or dirname.find('CVS') != -1):
231 return
232 rand = scrambler.get_randomizer()
233 if rand.randrange(5) == 1:
234 scrambler.schedule_addition(dirname)
235 for filename in fnames:
236 path = os.path.join(dirname, filename)
237 if not os.path.isdir(path) and rand.randrange(3) == 1:
238 scrambler.schedule_munge(path)
241 def main():
242 seed = None
243 vc_actions = NoVCActions()
244 dry_run = 0
245 quiet = 0
246 limit = None
248 # Mm... option parsing.
249 optlist, args = my_getopt(sys.argv[1:], "hq",
250 ['seed=', 'use-svn', 'use-cvs',
251 'help', 'quiet', 'dry-run', 'limit='])
252 for opt, arg in optlist:
253 if opt == '--help' or opt == '-h':
254 usage(0)
255 if opt == '--seed':
256 seed = arg
257 if opt == '--use-svn':
258 vc_actions = SVNActions()
259 if opt == '--use-cvs':
260 vc_actions = CVSActions()
261 if opt == '--dry-run':
262 dry_run = 1
263 if opt == '--limit':
264 limit = int(arg)
265 if opt == '--quiet' or opt == '-q':
266 quiet = 1
268 # We need at least a path to work with, here.
269 argc = len(args)
270 if argc < 1 or argc > 1:
271 usage()
272 rootdir = args[0]
274 # If a seed wasn't provide, calculate one.
275 if seed is None:
276 seed = hashDir(rootdir).gen_seed()
277 scrambler = Scrambler(seed, vc_actions, dry_run, quiet)
278 os.path.walk(rootdir, walker_callback, scrambler)
279 scrambler.enact(limit)
281 if __name__ == '__main__':
282 main()