3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
10 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
,node
11 from tempfile
import mkstemp
16 # silly regex to see if user field has email address
17 user_re
=re
.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
20 # insert 'checkpoint' command after this many commits
21 cfg_checkpoint_count
=1000
24 sys
.stderr
.write(__doc__
)
29 return myui
,hg
.repository(myui
,url
)
31 def get_changeset(ui
,repo
,revision
):
37 if user_re
.match(user
)==None:
39 return user
+' <none@none>'
40 return user
+' <'+user
+'>'
42 node
=repo
.lookup(revision
)
43 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
44 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
45 branch
=get_branch(extra
.get('branch','master'))
46 return (manifest
,fixup_user(user
),(time
,tz
),files
,desc
,branch
,extra
)
49 return x
and '100755' or '100644'
53 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
55 def checkpoint(count
):
57 if count
%cfg_checkpoint
_count
==0:
58 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
63 def get_parent_mark(parent
,marks
):
64 """Get the mark for some parent.
65 If we saw it in the current session, return :%d syntax and
66 otherwise the SHA1 from the cache."""
67 return marks
.get(str(parent
+1),':%d' % (parent
+1))
70 """See if two revisions of a file are not equal."""
71 return node
.hex(f1
)!=node
.hex(f2
)
73 def outer_set(dleft
,dright
,l
,c
,r
):
74 """Loop over our repository and find all changed and missing files."""
75 for left
in dleft
.keys():
76 right
=dright
.get(left
,None)
78 # we have the file but our parent hasn't: add to left set
80 elif mismatch(dleft
[left
],right
):
81 # we have it but checksums mismatch: add to center set
83 for right
in dright
.keys():
84 left
=dleft
.get(right
,None)
86 # if parent has file but we don't: add to right set
88 # change is already handled when comparing child against parent
91 def get_filechanges(repo
,revision
,parents
,mleft
):
92 """Given some repository and revision, find all changed/deleted files."""
96 mright
=repo
.changectx(p
).manifest()
101 l
,c
,r
=outer_set(mleft
,mright
,l
,c
,r
)
104 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
):
105 (_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
)
106 parents
=repo
.changelog
.parentrevs(revision
)
108 wr('commit refs/heads/%s' % branch
)
109 wr('mark :%d' % (revision
+1))
110 wr('committer %s %d %s' % (user
,time
,timezone
))
111 wr('data %d' % (len(desc
)+1)) # wtf?
115 src
=heads
.get(branch
,'')
118 # if we have a cached head, this is an incremental import: initialize it
119 # and kill reference so we won't init it again
122 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
124 link
=src
# avoid making a merge commit for incremental import
125 elif link
=='' and not heads
.has_key(branch
) and revision
>0:
126 # newly created branch and not the first one: connect to parent
127 tmp
=get_parent_mark(parents
[0],marks
)
129 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
131 link
=tmp
# avoid making a merge commit for branch fork
134 l
=last
.get(branch
,revision
)
136 # 1) as this commit implicitely is the child of the most recent
137 # commit of this branch, ignore this parent
138 # 2) ignore nonexistent parents
140 if p
==l
or p
==revision
or p
<0:
142 tmp
=get_parent_mark(p
,marks
)
143 # if we fork off a branch, don't merge with our parent via 'merge'
144 # as we have 'from' already above
147 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
151 last
[branch
]=revision
153 # we need this later to write out tags
154 marks
[str(revision
)]=':%d'%(revision
+1)
156 ctx
=repo
.changectx(str(revision
))
158 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
160 sys
.stderr
.write('Exporting revision %d with %d/%d/%d added/changed/removed files\n' %
161 (revision
,len(added
),len(changed
),len(removed
)))
163 for a
in added
+changed
:
166 wr('M %s inline %s' % (gitmode(man
.execf(a
)),a
))
167 wr('data %d' % len(d
)) # had some trouble with size()
174 return checkpoint(count
)
176 def export_tags(ui
,repo
,marks_cache
,start
,end
,count
):
179 # ignore latest revision
180 if tag
=='tip': continue
181 rev
=repo
.changelog
.rev(node
)
182 # ignore those tags not in our import range
183 if rev
<start
or rev
>=end
: continue
185 ref
=marks_cache
.get(str(rev
),None)
187 sys
.stderr
.write('Failed to find reference for creating tag'
188 ' %s at r%d\n' % (tag
,rev
))
190 (_
,user
,(time
,timezone
),_
,desc
,branch
,_
)=get_changeset(ui
,repo
,rev
)
191 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
194 wr('tagger %s %d %s' % (user
,time
,timezone
))
195 msg
='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag
,
196 rev
,branch
,desc
.split('\n')[0])
197 wr('data %d' % (len(msg
)+1))
200 count
=checkpoint(count
)
203 def load_cache(filename
):
205 if not os
.path
.exists(filename
):
209 for line
in f
.readlines():
211 fields
=line
.split(' ')
212 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
213 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
215 # put key:value in cache, key without ^:
216 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
220 def save_cache(filename
,cache
):
221 f
=open(filename
,'w+')
222 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
225 def verify_heads(ui
,repo
,cache
):
227 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
228 sha1
=f
.readlines()[0].split('\n')[0]
232 for b
in cache
.keys():
233 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
237 sys
.stderr
.write('Warning: Branch [%s] modified outside hg2git:'
238 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
241 if __name__
=='__main__':
242 if len(sys
.argv
)!=6: sys
.exit(usage(1))
243 repourl
,m
,marksfile
,headsfile
,tipfile
=sys
.argv
[1:]
246 marks_cache
=load_cache(marksfile
)
247 heads_cache
=load_cache(headsfile
)
248 state_cache
=load_cache(tipfile
)
250 ui
,repo
=setup_repo(repourl
)
252 if not verify_heads(ui
,repo
,heads_cache
):
255 tip
=repo
.changelog
.count()
257 min=int(state_cache
.get('tip',0))
264 for rev
in range(min,max):
265 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,tip
,c
)
267 c
=export_tags(ui
,repo
,marks_cache
,min,max,c
)
269 sys
.stderr
.write('Issued %d commands\n' % c
)
271 state_cache
['tip']=max
272 state_cache
['repo']=repourl
273 save_cache(tipfile
,state_cache
)