5 darcs-fast-export - darcs backend for fast data importers
7 Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org>
8 Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 import xml
.dom
.minidom
27 import xml
.parsers
.expat
42 sys
.setdefaultencoding("utf-8")
48 self
.export_marks
= []
49 self
.import_marks
= []
51 def get_patchname(self
, patch
):
54 if patch
.attributes
['inverted'].value
== 'True':
56 cs
= patch
.getElementsByTagName("name")[0].childNodes
58 ret
.append(s
+ cs
[0].data
)
59 lines
= patch
.getElementsByTagName("comment")
61 for i
in lines
[0].childNodes
[0].data
.split('\n'):
62 if not i
.startswith("Ignore-this: "):
64 return "\n".join(ret
).encode('utf-8')
66 def get_author(self
, patch
):
67 """darcs allows any freeform string, but fast-import has a more
68 strict format, so fix up broken author names here."""
70 author
= patch
.attributes
['author'].value
71 if author
in self
.authormap
:
72 author
= self
.authormap
[author
]
74 author
= "darcs-fast-export <darcs-fast-export>"
76 elif not ">" in author
:
77 author
= "%s <%s>" % (author
.split('@')[0], author
)
78 # avoid double quoting
79 elif author
[0] == '"' and author
[-1] == '"':
82 elif author
[-1] != '>':
83 author
= author
[author
.index('>')+2:] + ' ' + author
[:author
.index('>')+1]
84 return author
.encode('utf-8')
86 def get_date(self
, patch
):
88 date
= time
.strptime(patch
, "%Y%m%d%H%M%S")
90 date
= time
.strptime(patch
[:19] + patch
[-5:], '%a %b %d %H:%M:%S %Y')
91 return calendar
.timegm(date
)
93 def progress(self
, s
):
94 print "progress [%s] %s" % (time
.strftime("%Y-%m-%d %H:%M:%S", time
.localtime()), s
)
98 self
.logsock
.write("[%s] %s" % (time
.strftime("%Y-%m-%d %H:%M:%S", time
.localtime()), s
))
101 def parse_inventory(self
, sock
=None):
106 sock
= self
.open(os
.path
.join(self
.origin
, "_darcs", "hashed_inventory"))
107 for i
in sock
.readlines():
108 if i
.startswith("hash"):
109 buf
.insert(0, i
[6:-1])
110 if i
.startswith("Starting with inventory:"):
117 self
.hashes
.insert(0, i
)
119 sock
= self
.gzip_open(os
.path
.join(self
.origin
, "_darcs", "inventories", prev
))
120 self
.parse_inventory(sock
)
122 # this is like gzip.open but supports urls as well
123 def gzip_open(self
, path
):
124 if os
.path
.exists(path
):
125 return gzip
.open(path
)
126 buf
= urllib
.urlopen(path
).read()
127 sock
= StringIO
.StringIO(buf
)
128 return gzip
.GzipFile(fileobj
=sock
)
130 # this is like os.path.exists but supports urls as well
131 def path_exists(self
, path
):
132 if os
.path
.exists(path
):
136 urllib2
.urlopen(urllib2
.Request(path
))
138 except urllib2
.HTTPError
, e
:
141 # this is like open, but supports urls as well
142 def open(self
, path
):
143 if os
.path
.exists(path
):
146 return urllib
.urlopen(path
)
148 def handle_opts(self
):
150 usage
="%prog [options] darcsrepo"
151 opp
= optparse
.OptionParser(usage
=usage
)
152 opp
.add_option("--import-marks", metavar
="IFILE",
153 help="read state for incremental imports from IFILE")
154 opp
.add_option("--export-marks", metavar
="OFILE",
155 help="write state for incremental imports from OFILE")
156 opp
.add_option("--encoding",
157 help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
158 opp
.add_option("--authors-file", metavar
="F",
159 help="read author transformations in old=new format from F")
160 opp
.add_option("--working", metavar
="W",
161 help="working directory which is removed at the end of non-incremental conversions")
162 opp
.add_option("--logfile", metavar
="L",
163 help="log file which contains the output of external programs invoked during the conversion")
164 opp
.add_option("--git-branch", metavar
="B",
165 help="git branch [default: refs/heads/master]")
166 opp
.add_option("--progress", metavar
="P",
167 help="insert progress statements after every n commit [default: 100]")
168 (self
.options
, self
.args
) = opp
.parse_args()
169 if len(self
.args
) < 1:
170 opp
.error("darcsrepo required")
172 # read author mapping file in gitauthors format,
173 # i. e. in=out (one per # line)
174 if self
.options
.authors_file
:
175 sock
= open(self
.options
.authors_file
)
176 self
.authormap
= dict([i
.strip().split('=',1) for i
in sock
])
179 if "://" not in self
.args
[0]:
180 self
.origin
= os
.path
.abspath(self
.args
[0])
182 self
.origin
= self
.args
[0].strip('/')
183 if self
.options
.working
:
184 self
.working
= os
.path
.abspath(self
.options
.working
)
186 if "://" not in self
.origin
:
187 self
.working
= "%s.darcs" % self
.origin
189 self
.working
= "%s.darcs" % os
.path
.split(self
.origin
)[-1]
190 if self
.options
.logfile
:
191 logfile
= os
.path
.abspath(self
.options
.logfile
)
193 if "://" not in self
.origin
:
194 logfile
= "%s.log" % self
.origin
196 logfile
= "%s.log" % os
.path
.split(self
.origin
)[-1]
197 self
.logsock
= open(logfile
, "a")
198 if self
.options
.git_branch
:
199 self
.git_branch
= self
.options
.git_branch
201 self
.git_branch
= "refs/heads/master"
203 if self
.options
.progress
:
204 self
.prognum
= int(self
.options
.progress
)
208 def handle_import_marks(self
):
209 if self
.options
.import_marks
:
210 sock
= open(self
.options
.import_marks
)
211 for i
in sock
.readlines():
215 self
.import_marks
.append(line
.split(' ')[1])
216 self
.export_marks
.append(line
)
219 def get_patches(self
):
220 self
.progress("getting list of patches")
221 if not len(self
.import_marks
):
222 sock
= os
.popen("darcs changes --xml --reverse --repo %s" % self
.origin
)
224 sock
= os
.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self
.origin
, self
.import_marks
[-1]))
227 # this is hackish. we need to escape some bad chars, otherwise the xml
229 buf
= buf
.replace('\x1b', '^[')
230 if self
.options
.encoding
:
231 xmldoc
= xml
.dom
.minidom
.parseString(unicode(buf
, self
.options
.encoding
).encode('utf-8'))
234 xmldoc
= xml
.dom
.minidom
.parseString(buf
)
235 except xml
.parsers
.expat
.ExpatError
:
239 sys
.exit("Error, encoding is not utf-8. Please " +
240 "either specify it with the --encoding " +
241 "option or install chardet.")
242 self
.progress("encoding is not utf8, guessing charset")
243 encoding
= chardet
.detect(buf
)['encoding']
244 self
.progress("detected encoding is %s" % encoding
)
245 xmldoc
= xml
.dom
.minidom
.parseString(unicode(buf
, encoding
).encode('utf-8'))
247 return xmldoc
.getElementsByTagName('patch')
249 def setup_workdir(self
):
251 self
.oldfashionedpatch
= True
252 self
.cwd
= os
.getcwd()
253 if self
.path_exists(os
.path
.join(self
.origin
, "_darcs", "format")):
254 sock
= self
.open(os
.path
.join(self
.origin
, "_darcs", "format"))
255 format
= [x
.strip() for x
in sock
]
257 darcs2
= 'darcs-2' in format
258 self
.oldfashionedpatch
= not 'hashed' in format
259 if not self
.oldfashionedpatch
:
260 self
.progress("parsing the inventory")
261 if "://" not in self
.origin
:
262 os
.chdir(self
.origin
)
263 self
.parse_inventory()
264 if not self
.options
.import_marks
or not os
.path
.exists(self
.working
):
265 # init the tmp darcs repo
266 os
.mkdir(self
.working
)
267 os
.chdir(self
.working
)
269 os
.system("darcs init --darcs-2")
271 os
.system("darcs init --hashed")
273 os
.chdir(self
.working
)
274 if self
.options
.import_marks
:
275 sock
= os
.popen("darcs pull -a --match 'hash %s' %s" % (self
.import_marks
[-1], self
.origin
))
276 self
.log("Building/updating working directory:\n%s" % sock
.read())
279 def sanitize_ref(self
, ref
):
280 ref
= re
.sub('[\x00-\x1f\x7f ~^:/\\*?[]+', '_', ref
)
281 ref
= re
.sub('^[._]+', '_', ref
)
282 ref
= re
.sub('[._]+$', '_', ref
)
283 ref
= re
.sub('[.][.]', '__', ref
)
284 ref
= re
.sub('@\{', '_{', ref
)
286 ref
= re
.sub('[._]+lock$', '_lock', ref
)
292 def export_patches(self
):
293 patches
= self
.get_patches()
294 # this is the number of the NEXT patch
296 if len(self
.import_marks
):
297 patches
= patches
[1:]
298 count
= len(self
.import_marks
) + 1
299 if len(self
.export_marks
):
300 # this is the mark number of the NEXT patch
301 markcount
= int(self
.export_marks
[-1].split(' ')[0][1:]) + 1
304 # this may be huge and we need it many times
305 patchnum
= len(patches
)
307 if not len(self
.import_marks
):
308 self
.progress("starting export, repo has %d patches" % patchnum
)
310 self
.progress("continuing export, %d patches to convert" % patchnum
)
314 hash = i
.attributes
['hash'].value
315 if not hash.endswith(".gz"):
317 buf
= ["\nNew patches:\n"]
318 if self
.oldfashionedpatch
:
319 sock
= self
.gzip_open(os
.path
.join(self
.origin
, "_darcs", "patches", hash))
321 sock
= self
.gzip_open(os
.path
.join(self
.origin
, "_darcs", "patches", self
.hashes
[count
-1]))
322 buf
.append(sock
.read())
324 sock
= os
.popen("darcs changes --context")
325 buf
.append(sock
.read())
327 sock
= subprocess
.Popen(["darcs", "apply", "--allow-conflicts"], stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
328 sock
.stdin
.write("".join(buf
))
330 self
.log("Applying %s:\n%s" % (hash, sock
.stdout
.read()))
332 message
= self
.get_patchname(i
)
334 print "commit %s" % self
.git_branch
335 print "mark :%s" % markcount
336 if self
.options
.export_marks
:
337 self
.export_marks
.append(":%s %s" % (markcount
, hash))
338 date
= self
.get_date(i
.attributes
['date'].value
)
339 print "committer %s %s +0000" % (self
.get_author(i
), date
)
340 print "data %d\n%s" % (len(message
), message
)
342 print "from :%s" % (markcount
-1)
347 for (root
, dirs
, files
) in os
.walk ("."):
349 j
= os
.path
.normpath(os
.path
.join(root
, f
))
350 if j
.startswith("_darcs") or "-darcs-backup" in j
:
356 # darcs does not track the executable bit :/
357 print "M 644 inline %s" % j
358 print "data %s\n%s" % (len(buf
), buf
)
359 if message
[:4] == "TAG ":
360 tag
= self
.sanitize_ref(message
[4:].strip().split('\n')[0])
362 print "from :%s" % markcount
363 print "tagger %s %s +0000" % (self
.get_author(i
), date
)
364 print "data %d\n%s" % (len(message
), message
)
365 if count
% self
.prognum
== 0:
366 self
.progress("%d/%d patches" % (count
, patchnum
))
372 if not self
.options
.export_marks
:
373 shutil
.rmtree(self
.working
)
376 def handle_export_marks(self
):
377 if self
.options
.export_marks
:
378 self
.progress("writing export marks")
379 sock
= open(self
.options
.export_marks
, 'w')
380 sock
.write("\n".join(self
.export_marks
))
384 self
.progress("finished")
388 self
.handle_import_marks()
390 self
.export_patches()
391 self
.handle_export_marks()
393 if __name__
== "__main__":