hg2git.py: Use git-rev-parse to get SHA1s instead of reading files below refs/ directly
[fast-export/fast-export-unix-compliant.git] / hg2git.py
blobc58b4e7bd2e2b23987ff12dc3aed8b68c4584596
1 #!/usr/bin/env python
3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import repo,hg,cmdutil,util,ui,revlog,node
7 import re
8 import os
9 import sys
11 # git branch for hg's default 'HEAD' branch
12 cfg_master='master'
13 # silly regex to see if user field has email address
14 user_re=re.compile('([^<]+) (<[^>]+>)$')
15 # silly regex to clean out user names
16 user_clean_re=re.compile('^["]([^"]+)["]$')
18 def setup_repo(url):
19 myui=ui.ui(interactive=False)
20 return myui,hg.repository(myui,url)
22 def fixup_user(user,authors):
23 if authors!=None:
24 # if we have an authors table, try to get mapping
25 # by defaulting to the current value of 'user'
26 user=authors.get(user,user)
27 name,mail,m='','',user_re.match(user)
28 if m==None:
29 # if we don't have 'Name <mail>' syntax, use 'user
30 # <devnull@localhost>' if use contains no at and
31 # 'user <user>' otherwise
32 name=user
33 if '@' not in user:
34 mail='<devnull@localhost>'
35 else:
36 mail='<%s>' % user
37 else:
38 # if we have 'Name <mail>' syntax, everything is fine :)
39 name,mail=m.group(1),m.group(2)
41 # remove any silly quoting from username
42 m2=user_clean_re.match(name)
43 if m2!=None:
44 name=m2.group(1)
45 return '%s %s' % (name,mail)
47 def get_branch(name):
48 # HEAD may be from CVS imports into hg
49 if name=='HEAD' or name=='default' or name=='':
50 name=cfg_master
51 return name
53 def get_changeset(ui,repo,revision,authors={}):
54 node=repo.lookup(revision)
55 (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
56 tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
57 branch=get_branch(extra.get('branch','master'))
58 return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
60 def load_cache(filename):
61 cache={}
62 if not os.path.exists(filename):
63 return cache
64 f=open(filename,'r')
65 l=0
66 for line in f.readlines():
67 l+=1
68 fields=line.split(' ')
69 if fields==None or not len(fields)==2 or fields[0][0]!=':':
70 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
71 continue
72 # put key:value in cache, key without ^:
73 cache[fields[0][1:]]=fields[1].split('\n')[0]
74 f.close()
75 return cache
77 def save_cache(filename,cache):
78 f=open(filename,'w+')
79 map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
80 f.close()
82 def get_git_sha1(name,type='heads'):
83 try:
84 # use git-rev-parse to support packed refs
85 cmd="GIT_DIR='%s' git-rev-parse --verify refs/%s/%s 2>/dev/null" % (os.getenv('GIT_DIR','/dev/null'),type,name)
86 p=os.popen(cmd)
87 l=p.readline()
88 p.close()
89 if l == None or len(l) == 0:
90 return None
91 return l[0:40]
92 except IOError:
93 return None