3 # hot-backup.py: perform a "hot" backup of a Subversion repository
4 # and clean any old Berkeley DB logfiles after the
5 # backup completes, if the repository backend is
8 # Subversion is a tool for revision control.
9 # See http://subversion.tigris.org for more information.
11 # ====================================================================
12 # Copyright (c) 2000-2007 CollabNet. All rights reserved.
14 # This software is licensed as described in the file COPYING, which
15 # you should have received as part of this distribution. The terms
16 # are also available at http://subversion.tigris.org/license-1.html.
17 # If newer versions of this license are posted there, you may use a
18 # newer version instead, at your option.
20 # This software consists of voluntary contributions made by many
21 # individuals. For exact contribution history, see the revision
22 # history and logs, available at http://subversion.tigris.org/.
23 # ====================================================================
28 # $LastChangedRevision$
30 ######################################################################
32 import sys
, os
, getopt
, stat
, string
, re
, time
, shutil
34 # Try to import the subprocess mode. It works better then os.popen3
35 # and os.spawnl on Windows when spaces appear in any of the svnadmin,
36 # svnlook or repository paths. os.popen3 and os.spawnl are still used
37 # to support Python 2.3 and older which do not provide the subprocess
38 # module. have_subprocess is set to 1 or 0 to support older Python
39 # versions that do not have True and False.
46 ######################################################################
49 # Path to svnlook utility
50 svnlook
= r
"@SVN_BINDIR@/svnlook"
52 # Path to svnadmin utility
53 svnadmin
= r
"@SVN_BINDIR@/svnadmin"
55 # Default number of backups to keep around (0 for "keep them all")
56 num_backups
= int(os
.environ
.get("SVN_HOTBACKUP_BACKUPS_NUMBER", 64))
58 # Archive types/extensions
65 # Chmod recursively on a whole subtree
66 def chmod_tree(path
, mode
, mask
):
67 def visit(arg
, dirname
, names
):
70 fullname
= os
.path
.join(dirname
, name
)
71 if not os
.path
.islink(fullname
):
72 new_mode
= (os
.stat(fullname
)[stat
.ST_MODE
] & ~mask
) | mode
73 os
.chmod(fullname
, new_mode
)
74 os
.path
.walk(path
, visit
, (mode
, mask
))
76 # For clearing away read-only directories
77 def safe_rmtree(dirname
, retry
=0):
78 "Remove the tree at DIRNAME, making it writable first"
80 chmod_tree(dirname
, 0666, 0666)
81 shutil
.rmtree(dirname
)
83 if not os
.path
.exists(dirname
):
87 for delay
in (0.5, 1, 2, 4):
98 ######################################################################
99 # Command line arguments
101 def usage(out
= sys
.stdout
):
102 scriptname
= os
.path
.basename(sys
.argv
[0])
104 """USAGE: %s [OPTIONS] REPOS_PATH BACKUP_PATH
106 Create a backup of the repository at REPOS_PATH in a subdirectory of
107 the BACKUP_PATH location, named after the youngest revision.
110 --archive-type=FMT Create an archive of the backup. FMT can be one of:
111 bz2 : Creates a bzip2 compressed tar file.
112 gz : Creates a gzip compressed tar file.
113 zip : Creates a compressed zip file.
114 --num-backups=N Number of prior backups to keep around (0 to keep all).
115 --help -h Print this help message and exit.
121 opts
, args
= getopt
.gnu_getopt(sys
.argv
[1:], "h?", ["archive-type=",
124 except getopt
.GetoptError
, e
:
125 print >> sys
.stderr
, "ERROR: %s\n" % e
132 if o
== "--archive-type":
134 elif o
== "--num-backups":
136 elif o
in ("-h", "--help", "-?"):
141 print >> sys
.stderr
, "ERROR: only two arguments allowed.\n"
147 repo
= os
.path
.basename(os
.path
.abspath(repo_dir
))
149 # Where to store the repository backup. The backup will be placed in
150 # a *subdirectory* of this location, named after the youngest
154 # Added to the filename regexp, set when using --archive-type.
157 # Do we want to create an archive of the backup
159 if archive_map
.has_key(archive_type
):
160 # Additionally find files with the archive extension.
161 ext_re
= "(" + re
.escape(archive_map
[archive_type
]) + ")?"
163 print >> sys
.stderr
, "Unknown archive type '%s'.\n\n" % archive_type
168 ######################################################################
171 def comparator(a
, b
):
172 # We pass in filenames so there is never a case where they are equal.
173 regexp
= re
.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?" +
175 matcha
= regexp
.search(a
)
176 matchb
= regexp
.search(b
)
177 reva
= int(matcha
.groupdict()['revision'])
178 revb
= int(matchb
.groupdict()['revision'])
184 inca
= matcha
.groupdict()['increment']
185 incb
= matchb
.groupdict()['increment']
190 elif (int(inca
) < int(incb
)):
195 def get_youngest_revision():
196 """Examine the repository REPO_DIR using the svnlook binary
197 specified by SVNLOOK, and return the youngest revision."""
200 p
= subprocess
.Popen([svnlook
, 'youngest', repo_dir
],
201 stdin
=subprocess
.PIPE
,
202 stdout
=subprocess
.PIPE
,
203 stderr
=subprocess
.PIPE
)
204 infile
, outfile
, errfile
= p
.stdin
, p
.stdout
, p
.stderr
206 infile
, outfile
, errfile
= os
.popen3(svnlook
+ " youngest " + repo_dir
)
208 stdout_lines
= outfile
.readlines()
209 stderr_lines
= errfile
.readlines()
215 raise Exception("Unable to find the youngest revision for repository '%s'"
216 ": %s" % (repo_dir
, string
.rstrip(stderr_lines
[0])))
218 return string
.strip(stdout_lines
[0])
220 ######################################################################
223 print "Beginning hot backup of '"+ repo_dir
+ "'."
226 ### Step 1: get the youngest revision.
229 youngest
= get_youngest_revision()
231 print >> sys
.stderr
, str(e
)
234 print "Youngest revision is", youngest
237 ### Step 2: Find next available backup path
239 backup_subdir
= os
.path
.join(backup_dir
, repo
+ "-" + youngest
)
241 # If there is already a backup of this revision, then append the
242 # next highest increment to the path. We still need to do a backup
243 # because the repository might have changed despite no new revision
244 # having been created. We find the highest increment and add one
245 # rather than start from 1 and increment because the starting
246 # increments may have already been removed due to num_backups.
248 regexp
= re
.compile("^" + repo
+ "-" + youngest
+
249 "(-(?P<increment>[0-9]+))?" + ext_re
+ "$")
250 directory_list
= os
.listdir(backup_dir
)
251 young_list
= filter(lambda x
: regexp
.search(x
), directory_list
)
253 young_list
.sort(comparator
)
254 increment
= regexp
.search(young_list
.pop()).groupdict()['increment']
256 backup_subdir
= os
.path
.join(backup_dir
, repo
+ "-" + youngest
+ "-"
257 + str(int(increment
) + 1))
259 backup_subdir
= os
.path
.join(backup_dir
, repo
+ "-" + youngest
+ "-1")
261 ### Step 3: Ask subversion to make a hot copy of a repository.
264 print "Backing up repository to '" + backup_subdir
+ "'..."
266 err_code
= subprocess
.call([svnadmin
, "hotcopy", repo_dir
,
267 backup_subdir
, "--clean-logs"])
269 err_code
= os
.spawnl(os
.P_WAIT
, svnadmin
, "svnadmin", "hotcopy", repo_dir
,
270 backup_subdir
, "--clean-logs")
272 print >> sys
.stderr
, "Unable to backup the repository."
278 ### Step 4: Make an archive of the backup if required.
280 archive_path
= backup_subdir
+ archive_map
[archive_type
]
283 print "Archiving backup to '" + archive_path
+ "'..."
284 if archive_type
== 'gz' or archive_type
== 'bz2':
287 tar
= tarfile
.open(archive_path
, 'w:' + archive_type
)
288 tar
.add(backup_subdir
, os
.path
.basename(backup_subdir
))
290 except ImportError, e
:
291 err_msg
= "Import failed: " + str(e
)
293 except tarfile
.TarError
, e
:
294 err_msg
= "Tar failed: " + str(e
)
297 elif archive_type
== 'zip':
301 def add_to_zip(baton
, dirname
, names
):
303 root
= os
.path
.join(baton
[1], '')
306 path
= os
.path
.join(dirname
, file)
307 if os
.path
.isfile(path
):
308 zp
.write(path
, path
[len(root
):])
309 elif os
.path
.isdir(path
) and os
.path
.islink(path
):
310 os
.path
.walk(path
, add_to_zip
, (zp
, path
))
312 zp
= zipfile
.ZipFile(archive_path
, 'w', zipfile
.ZIP_DEFLATED
)
313 os
.path
.walk(backup_subdir
, add_to_zip
, (zp
, backup_dir
))
315 except ImportError, e
:
316 err_msg
= "Import failed: " + str(e
)
318 except zipfile
.error
, e
:
319 err_msg
= "Zip failed: " + str(e
)
324 print >> sys
.stderr
, \
325 "Unable to create an archive for the backup.\n" + err_msg
328 print "Archive created, removing backup '" + backup_subdir
+ "'..."
329 safe_rmtree(backup_subdir
, 1)
331 ### Step 5: finally, remove all repository backups other than the last
335 regexp
= re
.compile("^" + repo
+ "-[0-9]+(-[0-9]+)?" + ext_re
+ "$")
336 directory_list
= os
.listdir(backup_dir
)
337 old_list
= filter(lambda x
: regexp
.search(x
), directory_list
)
338 old_list
.sort(comparator
)
339 del old_list
[max(0,len(old_list
)-num_backups
):]
340 for item
in old_list
:
341 old_backup_item
= os
.path
.join(backup_dir
, item
)
342 print "Removing old backup: " + old_backup_item
343 if os
.path
.isdir(old_backup_item
):
344 safe_rmtree(old_backup_item
, 1)
346 os
.remove(old_backup_item
)