1 #!/usr/bin/env python2.4
3 # svn-fast-backup: use rsync snapshots for very fast FSFS repository backup.
4 # Multiple FSFS backups share data via hardlinks, meaning old backups are
5 # almost free, since a newer revision of a repository is almost a complete
6 # superset of an older revision.
8 # This is good for replacing incremental log-dump+restore-style backups
9 # because it is just as space-conserving and even faster; there is no
10 # inter-backup state (old backups are essentially caches); each backup
11 # directory is self-contained. It keeps the same interface as svn-hot-backup
12 # (if you use --force), but only works for FSFS repositories.
14 # Author: Karl Chen <quarl@quarl.org>
16 ## quarl 2005-08-17 initial version
17 ## quarl 2005-09-01 refactor, documentation; new options: --force, --keep,
18 ## --simulate, --trace
21 # $LastChangedRevision$
25 # Originally based on svn-hot-backup.py, whose copyright notice states:
27 # ====================================================================
28 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
30 # This software is licensed as described in the file COPYING, which
31 # you should have received as part of this distribution. The terms
32 # are also available at http://subversion.tigris.org/license-1.html.
33 # If newer versions of this license are posted there, you may use a
34 # newer version instead, at your option.
36 # This software consists of voluntary contributions made by many
37 # individuals. For exact contribution history, see the revision
38 # history and logs, available at http://subversion.tigris.org/.
39 # ====================================================================
41 ######################################################################
45 import subprocess # python2.4
47 ######################################################################
50 svnlook = "svnlook" # Path to svnlook
51 svnadmin = "svnadmin" # Path to svnadmin
52 rsync = "rsync" # Path to rsync
54 ######################################################################
55 # Command line arguments
58 raise SystemExit("""Syntax: %s [OPTIONS] repos_path backup_dir
60 Makes a hot backup of a Subversion FSFS repository at REPOS_PATH to
63 If a previous version exists, make hard links of its files using rsync.
64 As multiple FSFS backups share data via hardlinks, old backups use
65 almost no space, since a newer revision of a repository is almost a complete
66 superset of an older revision (excluding direct repository modifications).
68 Keeps up to N backups and deletes the rest. (N includes the current backup.)
71 -h, --help This screen
72 -q, --quiet Quieter than usual
73 -k, --keep=N Keep N backups instead of 64
74 -k, --keep=all Keep all backups (never delete any)
75 -f, --force Make a new backup even if one with current revision exists
76 -t, --trace Show actions
77 -s, --simulate Don't perform actions
83 def default_options():
87 options.simulate = False
89 options.keep = 64 # Number of backups to keep around
92 def parse_commandline():
93 options = default_options()
96 opts, args = getopt.getopt(sys.argv[1:], 'qhk:fts', ['quiet', 'help', 'keep=', 'force',
98 except getopt.GetoptError, e:
99 print >>sys.stderr, "Error:", e
103 if o == '-h' or o == '--help':
105 elif o == '-q' or o == '--quiet':
107 elif o == '-f' or o == '--force':
109 elif o == '-t' or o == '--trace':
111 elif o == '-s' or o == '--simulate':
112 options.simulate = True
113 elif o == '-k' or o == '--keep':
114 if a.strip().lower() == 'all':
117 options.keep = int(a)
119 raise Exception("Internal error")
125 options.repo_dir = args[0]
127 # Where to store the repository backup. The backup will be placed in a
128 # *subdirectory* of this location, named after the youngest revision.
130 options.backup_dir = os.path.abspath(args[1])
132 options.repo = os.path.basename(os.path.abspath(options.repo_dir))
136 def comparator(a, b):
137 # We pass in filenames so there is never a case where they are equal.
138 regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?$")
139 matcha = regexp.search(a)
140 matchb = regexp.search(b)
141 reva = int(matcha.groupdict()['revision'])
142 revb = int(matchb.groupdict()['revision'])
148 inca = matcha.groupdict()['increment']
149 incb = matchb.groupdict()['increment']
154 elif (int(inca) < int(incb)):
160 return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].strip()
162 def readfile(filename):
164 return open(filename).read().strip()
170 print >>sys.stderr, '#', cmd
173 return subprocess.call(cmd)
176 runcmd(['rm', '-r', path])
178 def get_youngest_revision():
179 if readfile(os.path.join('db', 'fs-type')) != 'fsfs':
180 raise SystemExit("Path '%s' doesn't contain a FSFS repository"%options.repo_dir)
182 return pipe([svnlook,"youngest","."])
184 def list_repo_backups():
185 '''Return a sorted list of backups for this repository.'''
186 regexp = re.compile(options.repo + "-[0-9]+(-[0-9]+)?$")
187 directory_list = [x for x in os.listdir(options.backup_dir) if regexp.match(x)]
188 directory_list.sort(comparator)
189 return directory_list
191 def delete_old_backups():
192 if options.keep <= 0:
195 for item in list_repo_backups()[:-options.keep]:
196 old_backup_subdir = os.path.join(options.backup_dir, item)
197 print " Removing old backup: ", old_backup_subdir
198 deltree(old_backup_subdir)
200 def find_next_backup_name(youngest):
201 # If there is already a backup of this revision, then append the next
202 # highest increment to the path. We still need to do a backup because the
203 # repository might have changed despite no new revision having been
204 # created. We find the highest increment and add one rather than start
205 # from 1 and increment because the starting increments may have already
206 # been removed due to options.keep.
208 regexp = re.compile(options.repo + "-" + youngest + "(-(?P<increment>[0-9]+))?$")
209 directory_list = os.listdir(options.backup_dir)
210 young_list = [ x for x in directory_list if regexp.match(x) ]
211 young_list.sort(comparator)
214 return "%s-%s" %(options.repo, youngest)
216 # Backups for this revision exist already.
218 if not options.force:
219 if not options.quiet:
220 print "Backup already exists at",young_list[-1]
223 increment = int(regexp.match(young_list[-1]).groupdict()['increment'] or '0')
225 return "%s-%s-%d" %(options.repo, youngest, increment+1)
227 def do_rsync_backup():
228 youngest = get_youngest_revision()
230 if not options.quiet:
231 print "Beginning hot backup of '%s' (youngest revision is %s)..." %(options.repo, youngest),
233 backup_subdir = os.path.join(options.backup_dir, find_next_backup_name(youngest))
234 backup_tmpdir = backup_subdir + '.tmp'
236 if os.path.exists(backup_tmpdir):
237 raise SystemExit("%s: Backup in progress? '%s' exists -- aborting."%(sys.argv[0],backup_tmpdir))
239 if not options.simulate:
240 os.mkdir(backup_tmpdir) # ensures atomicity
242 if os.path.exists(backup_subdir):
243 # Check again after doing mkdir (which serves as a mutex acquire) --
244 # just in case another process just finished the same backup.
245 if not options.quiet:
246 print "Backup already exists at",backup_subdir
249 previous_backups = list_repo_backups()
251 ### Use rsync to make a copy.
252 # We need to copy the 'current' file first.
253 # Don't copy the transactions/ directory.
254 # See http://svn.collab.net/repos/svn/trunk/notes/fsfs
256 rsync_dest = os.path.join(backup_tmpdir,'')
258 # copy db/current. -R tells rsync to use relative pathnames.
259 if runcmd([rsync, '-aR', 'db/current', rsync_dest]):
260 raise "%s: rsync failed" %sys.argv[0]
262 # Now copy everything else.
264 '--exclude', 'db/current',
265 '--exclude', 'db/transactions/*',
266 '--exclude', 'db/log.*',
268 # If there's a previous backup, make hard links against the latest.
270 cmd += ['--link-dest', os.path.join(options.backup_dir, previous_backups[-1])]
273 raise "%s: rsync failed" %sys.argv[0]
275 # Rename to final name.
276 if not options.simulate:
277 os.rename(backup_tmpdir, backup_subdir)
279 print "Finished backup to", backup_subdir
282 options = parse_commandline()
283 os.chdir(options.repo_dir)