hg-{fast-export,reset}.sh: add git --exec-path to PATH
[girocco-hg-fast-export.git] / hg2git.py
blobd8198c10dabd8dda7476cb67554d74864ddc5395
1 #!/usr/bin/env python
3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import hg,ui
7 import re
8 import os
9 import sys
10 from subprocess import PIPE,Popen
12 # default git branch name
13 cfg_master='master'
14 # default origin name
15 origin_name=''
16 # default email address when unknown
17 unknown_addr='unknown'
18 split_name_re = re.compile(r'^((?:[^<]|<at>)*?)(?:<(?!at>)(?:(?:([^>]*)|(?:(.*)>(.*)))))?$', re.S|re.I)
19 git_crud = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' \
20 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' \
21 ' .,:;<>"\\' "'"
22 git_delch_re = re.compile(r'[<>\n]+', re.S)
23 spelled_at_re = re.compile(r'^(.*?)(?:@|(?:(?:^|\s)<at>(?:\s|$)))', re.I)
25 def set_default_branch(name):
26 global cfg_master
27 cfg_master = name
29 def set_origin_name(name):
30 global origin_name
31 origin_name = name
33 def setup_repo(url):
34 try:
35 myui=ui.ui(interactive=False)
36 except TypeError:
37 myui=ui.ui()
38 myui.setconfig('ui', 'interactive', 'off')
39 return myui,hg.repository(myui,url)
41 # Git strips "crud" characters off both the beginning and end of the user's name
42 # and the user's email, then deletes any remaining '<', '>' and '\n' characters
43 # before combining the user name with the user email surrounded by '<' and '>'.
44 # This function provides the crud-stripping and deletion operation that is used
45 # on both the name and email.
46 def gitname(name):
47 name = name.strip(git_crud)
48 return git_delch_re.sub('', name)
50 def set_unknown_addr(addr):
51 global unknown_addr
52 ans=False
53 if addr!=None:
54 addr=gitname(addr)
55 if addr!='':
56 unknown_addr=addr
57 ans=True
58 return ans
60 # Split the combined name and email input into a separate name and email and
61 # apply Git's rules to each part. The idea is to use anything to the left of
62 # the first '<' and to the right of the last '>' as the name. Anything between
63 # the first '<' and the last '>' is treated as the email. If there is no '<'
64 # but the name contains '@' (which may be spelled out) then treat the entire
65 # thing as an email with no name. If the detected name is empty then anything
66 # up to the first '@' (which may be spelled out) in the email is used for the
67 # name. Failing that the entire email is used for the name.
68 def split_name_email(combined):
69 name = ''
70 email,rawemail = '',''
71 match = split_name_re.match(combined)
72 if match:
73 left,rest,mid,right = match.groups()
74 if rest != None:
75 name = gitname(left)
76 rawemail = rest
77 email = gitname(rawemail)
78 elif mid != None:
79 name = gitname(left.rstrip() + ' ' + right.lstrip())
80 rawemail = mid
81 email = gitname(rawemail)
82 else:
83 name = gitname(left)
84 if email == '' and spelled_at_re.match(left):
85 rawemail = left
86 email = name
87 name = ''
88 if name == '':
89 at = spelled_at_re.match(rawemail)
90 if at:
91 name = gitname(at.group(1))
92 if name == '':
93 name = email
94 # We do this test to be compatible with the previous behavior of hg2git.py
95 # When it's given a <email> without any name and email does not contain '@'
96 # then it sets the email to the unknown address
97 if (len(left) < 2 or left[-1] != ' ') and not at:
98 email = ''
99 return [name, email]
101 def fixup_user(user,authors):
102 user=user.strip("\"")
103 if authors!=None:
104 # if we have an authors table, try to get mapping
105 # by defaulting to the current value of 'user'
106 user=authors.get(user,user)
107 name,mail=split_name_email(user)
108 if mail == '':
109 # If we don't have an email address replace it with unknown_addr.
110 mail = unknown_addr
111 if name == '':
112 # Git does not like an empty name either -- split_name_email can only
113 # return an empty name if it also returns an empty email. This probably
114 # will never happen since the input would have to be empty or only "crud"
115 # characters, but check just to be safe.
116 name = '-'
117 return '%s <%s>' % (name,mail)
119 def get_branch(name):
120 # 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
121 # other CVS imports may need it, too
122 if name=='HEAD' or name=='default' or name=='':
123 name=cfg_master
124 if origin_name:
125 return origin_name + '/' + name
126 return name
128 def get_changeset(ui,repo,revision,authors={}):
129 node=repo.lookup(revision)
130 (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
131 tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
132 branch=get_branch(extra.get('branch','master'))
133 return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
135 def mangle_key(key):
136 return key
138 def load_cache(filename,get_key=mangle_key):
139 cache={}
140 if not os.path.exists(filename):
141 return cache
142 f=open(filename,'r')
144 for line in f.readlines():
145 l+=1
146 fields=line.split(' ')
147 if fields==None or not len(fields)==2 or fields[0][0]!=':':
148 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
149 continue
150 # put key:value in cache, key without ^:
151 cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
152 f.close()
153 return cache
155 def save_cache(filename,cache):
156 f=open(filename,'w+')
157 map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
158 f.close()
160 def get_git_sha1(name,type='heads'):
161 try:
162 # use git-rev-parse to support packed refs
163 ref="refs/%s/%s" % (type,name)
164 proc = Popen(["git", "rev-parse", "--verify", "--quiet", ref], stdout=PIPE)
165 l = proc.communicate()[0]
166 if l == None or len(l) == 0:
167 return None
168 return l[0:40]
169 except:
170 return None