Can accept /export and /export/ as valid URL now
[booki.git] / tools / rcs_import.py
blobc187a1e1f6c800003eb182b18536633942bc0064
1 #!/usr/bin/python
2 """Read TWiki RCS files, and import the data into git.
4 cd path/for/new/git/repo
5 git init --shared=all
6 rcs_import.py path/to/twiki/data | git-fast-import
8 # now the data is in git but not visible yet
10 git checkout
12 # examine it with one of these:
14 gitk
15 git log
16 git gui
18 # to start again at any point:
20 rm -rf .git
21 # and if you have the done `git checkout`:
22 rm -r *
24 """
26 FORCE = False
28 import traceback
29 import os, sys
31 from rcs.core import thoeny_filter, Version
33 def recursive_history_generator(path, rfilter=None):
34 """The generator that yields available RCS histories form the
35 given directory. rfilter should return true if the revision is
36 acceptable (based on metadata)"""
37 versions = []
38 os.chdir(path)
39 for root, dirs, files in os.walk('.'):
40 for f in files:
41 if acceptable_file(f):
42 try:
43 vs = extract(os.path.join(root, f), rfilter)
44 yield vs
45 except Exception, e:
46 if not FORCE:
47 raise
48 traceback.print_exc()
49 print >> sys.stderr, "Continuing, but ignoring %r..." % f
52 def sorted_history(path, rfilter=None):
53 """All file changes in chronological order"""
54 versions = []
55 for vs in recursive_history_generator(path, rfilter):
56 versions.extend(vs)
57 _versions = [(int(v.date), v) for v in versions]
58 _versions.sort()
59 for d, v in _versions:
60 v.to_git()
63 def file_by_file_history(path, rfilter=None):
64 """Complete each files history before beginning on the next. (revisions in the order they fall out of """
65 for vs in recursive_history_generator(path, rfilter):
66 for v in reversed(vs):
67 v.to_git()
69 def book_as_branch_history(path, rfilter=None):
70 """Complete each files history before beginning on the next"""
71 branch = None
72 for vs in recursive_history_generator(path, rfilter):
73 if not vs:
74 continue
75 d = os.path.dirname(vs[0].name)
76 if d != branch:
77 #new book, new branch
78 branch = d
79 print "reset %s" % branch
81 for v in reversed(vs):
82 print v
83 v.to_git(branch, strip_dir=True)
86 SORT_MODES = {
87 'sorted' : sorted_history,
88 'by-file' : file_by_file_history,
89 'branches' : book_as_branch_history,
94 # TODO: try slow import (via working dir)
95 # try one branch at a time import
99 if __name__ == '__main__':
100 from optparse import OptionParser
101 parser = OptionParser()
102 parser.add_option("-t", "--no-thoeny", action="store_true",
103 help="ignore TWiki housekeeping commits", default=False)
104 parser.add_option("-m", "--sort-mode", metavar="MODE",
105 help="use sort mode, MODE %s." % (SORT_MODES.keys(),),
106 default='sorted')
107 parser.add_option("-r", "--use-rcs", action="store_true",
108 help="Use rcs subprocesses (slow, canonical).", default=False)
109 parser.add_option("-f", "--force", action="store_true",
110 help="Don't give up on parsing errors.", default=False)
111 parser.add_option("-w", "--working-tree", action="store_true",
112 help="Use working tree for commits.", default=False)
113 options, dirs = parser.parse_args()
115 if options.use_rcs:
116 from rcs.subprocess_parse import extract, acceptable_file
117 else:
118 from rcs.parse import extract, acceptable_file
120 FORCE = options.force
122 if options.no_thoeny:
123 rfilter = thoeny_filter
124 else:
125 rfilter=None
127 if options.working_tree:
128 Version.to_git = Version.to_git_slow
131 sorter = {
132 'sorted' : sorted_history,
133 'by-file' : file_by_file_history,
134 'branches' : book_as_branch_history,
135 }[options.sort_mode]
137 for d in dirs:
138 sorter(d, rfilter)