archive: handle commits with an empty tree
[git/jnareb-git.git] / contrib / remote-helpers / git-remote-hg
blobc7006000a6f4a19fa4e5f4bf3fc543d4756a1f4d
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # git:
27 # Sensible defaults for git.
28 # hg bookmarks are exported as git branches, hg branches are prefixed
29 # with 'branches/', HEAD is a special case.
31 # hg:
32 # Emulate hg-git.
33 # Only hg bookmarks are exported as git branches.
34 # Commits are modified to preserve hg information and allow bidirectionality.
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
39 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gitmode(flags):
50 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
52 def gittz(tz):
53 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
55 def hgmode(mode):
56 m = { '0100755': 'x', '0120000': 'l' }
57 return m.get(mode, '')
59 def get_config(config):
60 cmd = ['git', 'config', '--get', config]
61 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
62 output, _ = process.communicate()
63 return output
65 class Marks:
67 def __init__(self, path):
68 self.path = path
69 self.tips = {}
70 self.marks = {}
71 self.rev_marks = {}
72 self.last_mark = 0
74 self.load()
76 def load(self):
77 if not os.path.exists(self.path):
78 return
80 tmp = json.load(open(self.path))
82 self.tips = tmp['tips']
83 self.marks = tmp['marks']
84 self.last_mark = tmp['last-mark']
86 for rev, mark in self.marks.iteritems():
87 self.rev_marks[mark] = int(rev)
89 def dict(self):
90 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
92 def store(self):
93 json.dump(self.dict(), open(self.path, 'w'))
95 def __str__(self):
96 return str(self.dict())
98 def from_rev(self, rev):
99 return self.marks[str(rev)]
101 def to_rev(self, mark):
102 return self.rev_marks[mark]
104 def get_mark(self, rev):
105 self.last_mark += 1
106 self.marks[str(rev)] = self.last_mark
107 return self.last_mark
109 def new_mark(self, rev, mark):
110 self.marks[str(rev)] = mark
111 self.rev_marks[mark] = rev
112 self.last_mark = mark
114 def is_marked(self, rev):
115 return self.marks.has_key(str(rev))
117 def get_tip(self, branch):
118 return self.tips.get(branch, 0)
120 def set_tip(self, branch, tip):
121 self.tips[branch] = tip
123 class Parser:
125 def __init__(self, repo):
126 self.repo = repo
127 self.line = self.get_line()
129 def get_line(self):
130 return sys.stdin.readline().strip()
132 def __getitem__(self, i):
133 return self.line.split()[i]
135 def check(self, word):
136 return self.line.startswith(word)
138 def each_block(self, separator):
139 while self.line != separator:
140 yield self.line
141 self.line = self.get_line()
143 def __iter__(self):
144 return self.each_block('')
146 def next(self):
147 self.line = self.get_line()
148 if self.line == 'done':
149 self.line = None
151 def get_mark(self):
152 i = self.line.index(':') + 1
153 return int(self.line[i:])
155 def get_data(self):
156 if not self.check('data'):
157 return None
158 i = self.line.index(' ') + 1
159 size = int(self.line[i:])
160 return sys.stdin.read(size)
162 def get_author(self):
163 global bad_mail
165 ex = None
166 m = RAW_AUTHOR_RE.match(self.line)
167 if not m:
168 return None
169 _, name, email, date, tz = m.groups()
170 if name and 'ext:' in name:
171 m = re.match('^(.+?) ext:\((.+)\)$', name)
172 if m:
173 name = m.group(1)
174 ex = urllib.unquote(m.group(2))
176 if email != bad_mail:
177 if name:
178 user = '%s <%s>' % (name, email)
179 else:
180 user = '<%s>' % (email)
181 else:
182 user = name
184 if ex:
185 user += ex
187 tz = int(tz)
188 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
189 return (user, int(date), -tz)
191 def export_file(fc):
192 d = fc.data()
193 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
194 print "data %d" % len(d)
195 print d
197 def get_filechanges(repo, ctx, parent):
198 modified = set()
199 added = set()
200 removed = set()
202 cur = ctx.manifest()
203 prev = repo[parent].manifest().copy()
205 for fn in cur:
206 if fn in prev:
207 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
208 modified.add(fn)
209 del prev[fn]
210 else:
211 added.add(fn)
212 removed |= set(prev.keys())
214 return added | modified, removed
216 def fixup_user_git(user):
217 name = mail = None
218 user = user.replace('"', '')
219 m = AUTHOR_RE.match(user)
220 if m:
221 name = m.group(1)
222 mail = m.group(2).strip()
223 else:
224 m = NAME_RE.match(user)
225 if m:
226 name = m.group(1).strip()
227 return (name, mail)
229 def fixup_user_hg(user):
230 def sanitize(name):
231 # stole this from hg-git
232 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
234 m = AUTHOR_HG_RE.match(user)
235 if m:
236 name = sanitize(m.group(1))
237 mail = sanitize(m.group(2))
238 ex = m.group(3)
239 if ex:
240 name += ' ext:(' + urllib.quote(ex) + ')'
241 else:
242 name = sanitize(user)
243 if '@' in user:
244 mail = name
245 else:
246 mail = None
248 return (name, mail)
250 def fixup_user(user):
251 global mode, bad_mail
253 if mode == 'git':
254 name, mail = fixup_user_git(user)
255 else:
256 name, mail = fixup_user_hg(user)
258 if not name:
259 name = bad_name
260 if not mail:
261 mail = bad_mail
263 return '%s <%s>' % (name, mail)
265 def get_repo(url, alias):
266 global dirname, peer
268 myui = ui.ui()
269 myui.setconfig('ui', 'interactive', 'off')
271 if hg.islocal(url):
272 repo = hg.repository(myui, url)
273 else:
274 local_path = os.path.join(dirname, 'clone')
275 if not os.path.exists(local_path):
276 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
277 repo = dstpeer.local()
278 else:
279 repo = hg.repository(myui, local_path)
280 peer = hg.peer(myui, {}, url)
281 repo.pull(peer, heads=None, force=True)
283 return repo
285 def rev_to_mark(rev):
286 global marks
287 return marks.from_rev(rev)
289 def mark_to_rev(mark):
290 global marks
291 return marks.to_rev(mark)
293 def export_ref(repo, name, kind, head):
294 global prefix, marks, mode
296 ename = '%s/%s' % (kind, name)
297 tip = marks.get_tip(ename)
299 # mercurial takes too much time checking this
300 if tip and tip == head.rev():
301 # nothing to do
302 return
303 revs = xrange(tip, head.rev() + 1)
304 count = 0
306 revs = [rev for rev in revs if not marks.is_marked(rev)]
308 for rev in revs:
310 c = repo[rev]
311 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
312 rev_branch = extra['branch']
314 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
315 if 'committer' in extra:
316 user, time, tz = extra['committer'].rsplit(' ', 2)
317 committer = "%s %s %s" % (user, time, gittz(int(tz)))
318 else:
319 committer = author
321 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
323 if len(parents) == 0:
324 modified = c.manifest().keys()
325 removed = []
326 else:
327 modified, removed = get_filechanges(repo, c, parents[0])
329 if mode == 'hg':
330 extra_msg = ''
332 if rev_branch != 'default':
333 extra_msg += 'branch : %s\n' % rev_branch
335 renames = []
336 for f in c.files():
337 if f not in c.manifest():
338 continue
339 rename = c.filectx(f).renamed()
340 if rename:
341 renames.append((rename[0], f))
343 for e in renames:
344 extra_msg += "rename : %s => %s\n" % e
346 for key, value in extra.iteritems():
347 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
348 continue
349 else:
350 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
352 desc += '\n'
353 if extra_msg:
354 desc += '\n--HG--\n' + extra_msg
356 if len(parents) == 0 and rev:
357 print 'reset %s/%s' % (prefix, ename)
359 print "commit %s/%s" % (prefix, ename)
360 print "mark :%d" % (marks.get_mark(rev))
361 print "author %s" % (author)
362 print "committer %s" % (committer)
363 print "data %d" % (len(desc))
364 print desc
366 if len(parents) > 0:
367 print "from :%s" % (rev_to_mark(parents[0]))
368 if len(parents) > 1:
369 print "merge :%s" % (rev_to_mark(parents[1]))
371 for f in modified:
372 export_file(c.filectx(f))
373 for f in removed:
374 print "D %s" % (f)
375 print
377 count += 1
378 if (count % 100 == 0):
379 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
380 print "#############################################################"
382 # make sure the ref is updated
383 print "reset %s/%s" % (prefix, ename)
384 print "from :%u" % rev_to_mark(rev)
385 print
387 marks.set_tip(ename, rev)
389 def export_tag(repo, tag):
390 export_ref(repo, tag, 'tags', repo[tag])
392 def export_bookmark(repo, bmark):
393 head = bmarks[bmark]
394 export_ref(repo, bmark, 'bookmarks', head)
396 def export_branch(repo, branch):
397 tip = get_branch_tip(repo, branch)
398 head = repo[tip]
399 export_ref(repo, branch, 'branches', head)
401 def export_head(repo):
402 global g_head
403 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
405 def do_capabilities(parser):
406 global prefix, dirname
408 print "import"
409 print "export"
410 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
411 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
412 print "refspec refs/tags/*:%s/tags/*" % prefix
414 path = os.path.join(dirname, 'marks-git')
416 if os.path.exists(path):
417 print "*import-marks %s" % path
418 print "*export-marks %s" % path
420 print
422 def get_branch_tip(repo, branch):
423 global branches
425 heads = branches.get(branch, None)
426 if not heads:
427 return None
429 # verify there's only one head
430 if (len(heads) > 1):
431 warn("Branch '%s' has more than one head, consider merging" % branch)
432 # older versions of mercurial don't have this
433 if hasattr(repo, "branchtip"):
434 return repo.branchtip(branch)
436 return heads[0]
438 def list_head(repo, cur):
439 global g_head, bmarks
441 head = bookmarks.readcurrent(repo)
442 if head:
443 node = repo[head]
444 else:
445 # fake bookmark from current branch
446 head = cur
447 node = repo['.']
448 if not node:
449 node = repo['tip']
450 if not node:
451 return
452 if head == 'default':
453 head = 'master'
454 bmarks[head] = node
456 print "@refs/heads/%s HEAD" % head
457 g_head = (head, node)
459 def do_list(parser):
460 global branches, bmarks, mode, track_branches
462 repo = parser.repo
463 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
464 bmarks[bmark] = repo[node]
466 cur = repo.dirstate.branch()
468 list_head(repo, cur)
470 if track_branches:
471 for branch in repo.branchmap():
472 heads = repo.branchheads(branch)
473 if len(heads):
474 branches[branch] = heads
476 for branch in branches:
477 print "? refs/heads/branches/%s" % branch
479 for bmark in bmarks:
480 print "? refs/heads/%s" % bmark
482 for tag, node in repo.tagslist():
483 if tag == 'tip':
484 continue
485 print "? refs/tags/%s" % tag
487 print
489 def do_import(parser):
490 repo = parser.repo
492 path = os.path.join(dirname, 'marks-git')
494 print "feature done"
495 if os.path.exists(path):
496 print "feature import-marks=%s" % path
497 print "feature export-marks=%s" % path
498 sys.stdout.flush()
500 tmp = encoding.encoding
501 encoding.encoding = 'utf-8'
503 # lets get all the import lines
504 while parser.check('import'):
505 ref = parser[1]
507 if (ref == 'HEAD'):
508 export_head(repo)
509 elif ref.startswith('refs/heads/branches/'):
510 branch = ref[len('refs/heads/branches/'):]
511 export_branch(repo, branch)
512 elif ref.startswith('refs/heads/'):
513 bmark = ref[len('refs/heads/'):]
514 export_bookmark(repo, bmark)
515 elif ref.startswith('refs/tags/'):
516 tag = ref[len('refs/tags/'):]
517 export_tag(repo, tag)
519 parser.next()
521 encoding.encoding = tmp
523 print 'done'
525 def parse_blob(parser):
526 global blob_marks
528 parser.next()
529 mark = parser.get_mark()
530 parser.next()
531 data = parser.get_data()
532 blob_marks[mark] = data
533 parser.next()
534 return
536 def get_merge_files(repo, p1, p2, files):
537 for e in repo[p1].files():
538 if e not in files:
539 if e not in repo[p1].manifest():
540 continue
541 f = { 'ctx' : repo[p1][e] }
542 files[e] = f
544 def parse_commit(parser):
545 global marks, blob_marks, bmarks, parsed_refs
546 global mode
548 from_mark = merge_mark = None
550 ref = parser[1]
551 parser.next()
553 commit_mark = parser.get_mark()
554 parser.next()
555 author = parser.get_author()
556 parser.next()
557 committer = parser.get_author()
558 parser.next()
559 data = parser.get_data()
560 parser.next()
561 if parser.check('from'):
562 from_mark = parser.get_mark()
563 parser.next()
564 if parser.check('merge'):
565 merge_mark = parser.get_mark()
566 parser.next()
567 if parser.check('merge'):
568 die('octopus merges are not supported yet')
570 files = {}
572 for line in parser:
573 if parser.check('M'):
574 t, m, mark_ref, path = line.split(' ', 3)
575 mark = int(mark_ref[1:])
576 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
577 elif parser.check('D'):
578 t, path = line.split(' ')
579 f = { 'deleted' : True }
580 else:
581 die('Unknown file command: %s' % line)
582 files[path] = f
584 def getfilectx(repo, memctx, f):
585 of = files[f]
586 if 'deleted' in of:
587 raise IOError
588 if 'ctx' in of:
589 return of['ctx']
590 is_exec = of['mode'] == 'x'
591 is_link = of['mode'] == 'l'
592 rename = of.get('rename', None)
593 return context.memfilectx(f, of['data'],
594 is_link, is_exec, rename)
596 repo = parser.repo
598 user, date, tz = author
599 extra = {}
601 if committer != author:
602 extra['committer'] = "%s %u %u" % committer
604 if from_mark:
605 p1 = repo.changelog.node(mark_to_rev(from_mark))
606 else:
607 p1 = '\0' * 20
609 if merge_mark:
610 p2 = repo.changelog.node(mark_to_rev(merge_mark))
611 else:
612 p2 = '\0' * 20
615 # If files changed from any of the parents, hg wants to know, but in git if
616 # nothing changed from the first parent, nothing changed.
618 if merge_mark:
619 get_merge_files(repo, p1, p2, files)
621 if mode == 'hg':
622 i = data.find('\n--HG--\n')
623 if i >= 0:
624 tmp = data[i + len('\n--HG--\n'):].strip()
625 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
626 if k == 'rename':
627 old, new = v.split(' => ', 1)
628 files[new]['rename'] = old
629 elif k == 'branch':
630 extra[k] = v
631 elif k == 'extra':
632 ek, ev = v.split(' : ', 1)
633 extra[ek] = urllib.unquote(ev)
634 data = data[:i]
636 ctx = context.memctx(repo, (p1, p2), data,
637 files.keys(), getfilectx,
638 user, (date, tz), extra)
640 tmp = encoding.encoding
641 encoding.encoding = 'utf-8'
643 node = repo.commitctx(ctx)
645 encoding.encoding = tmp
647 rev = repo[node].rev()
649 parsed_refs[ref] = node
651 marks.new_mark(rev, commit_mark)
653 def parse_reset(parser):
654 ref = parser[1]
655 parser.next()
656 # ugh
657 if parser.check('commit'):
658 parse_commit(parser)
659 return
660 if not parser.check('from'):
661 return
662 from_mark = parser.get_mark()
663 parser.next()
665 node = parser.repo.changelog.node(mark_to_rev(from_mark))
666 parsed_refs[ref] = node
668 def parse_tag(parser):
669 name = parser[1]
670 parser.next()
671 from_mark = parser.get_mark()
672 parser.next()
673 tagger = parser.get_author()
674 parser.next()
675 data = parser.get_data()
676 parser.next()
678 # nothing to do
680 def do_export(parser):
681 global parsed_refs, bmarks, peer
683 parser.next()
685 for line in parser.each_block('done'):
686 if parser.check('blob'):
687 parse_blob(parser)
688 elif parser.check('commit'):
689 parse_commit(parser)
690 elif parser.check('reset'):
691 parse_reset(parser)
692 elif parser.check('tag'):
693 parse_tag(parser)
694 elif parser.check('feature'):
695 pass
696 else:
697 die('unhandled export command: %s' % line)
699 for ref, node in parsed_refs.iteritems():
700 if ref.startswith('refs/heads/branches'):
701 pass
702 elif ref.startswith('refs/heads/'):
703 bmark = ref[len('refs/heads/'):]
704 if bmark in bmarks:
705 old = bmarks[bmark].hex()
706 else:
707 old = ''
708 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
709 continue
710 elif ref.startswith('refs/tags/'):
711 tag = ref[len('refs/tags/'):]
712 parser.repo.tag([tag], node, None, True, None, {})
713 else:
714 # transport-helper/fast-export bugs
715 continue
716 print "ok %s" % ref
718 print
720 if peer:
721 parser.repo.push(peer, force=False)
723 def main(args):
724 global prefix, dirname, branches, bmarks
725 global marks, blob_marks, parsed_refs
726 global peer, mode, bad_mail, bad_name
727 global track_branches
729 alias = args[1]
730 url = args[2]
731 peer = None
733 hg_git_compat = False
734 track_branches = True
735 try:
736 if get_config('remote-hg.hg-git-compat') == 'true\n':
737 hg_git_compat = True
738 track_branches = False
739 if get_config('remote-hg.track-branches') == 'false\n':
740 track_branches = False
741 except subprocess.CalledProcessError:
742 pass
744 if hg_git_compat:
745 mode = 'hg'
746 bad_mail = 'none@none'
747 bad_name = ''
748 else:
749 mode = 'git'
750 bad_mail = 'unknown'
751 bad_name = 'Unknown'
753 if alias[4:] == url:
754 is_tmp = True
755 alias = util.sha1(alias).hexdigest()
756 else:
757 is_tmp = False
759 gitdir = os.environ['GIT_DIR']
760 dirname = os.path.join(gitdir, 'hg', alias)
761 branches = {}
762 bmarks = {}
763 blob_marks = {}
764 parsed_refs = {}
766 repo = get_repo(url, alias)
767 prefix = 'refs/hg/%s' % alias
769 if not os.path.exists(dirname):
770 os.makedirs(dirname)
772 marks_path = os.path.join(dirname, 'marks-hg')
773 marks = Marks(marks_path)
775 parser = Parser(repo)
776 for line in parser:
777 if parser.check('capabilities'):
778 do_capabilities(parser)
779 elif parser.check('list'):
780 do_list(parser)
781 elif parser.check('import'):
782 do_import(parser)
783 elif parser.check('export'):
784 do_export(parser)
785 else:
786 die('unhandled command: %s' % line)
787 sys.stdout.flush()
789 if not is_tmp:
790 marks.store()
791 else:
792 shutil.rmtree(dirname)
794 sys.exit(main(sys.argv))