3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 # This file is deprecated after merging it into:
7 # http://repo.or.cz/w/fast-export.git
9 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
10 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
13 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
,node
14 from tempfile
import mkstemp
15 from optparse
import OptionParser
20 # silly regex to catch Signed-off-by lines in log message
21 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
22 # silly regex to see if user field has email address
23 user_re
=re
.compile('([^<]+) (<[^>]+>)$')
24 # silly regex to clean out user names
25 user_clean_re
=re
.compile('^["]([^"]+)["]$')
26 # git branch for hg's default 'HEAD' branch
28 # insert 'checkpoint' command after this many commits or none at all if 0
29 cfg_checkpoint_count
=0
30 # write some progress message every this many file contents written
31 cfg_export_boundary
=1000
34 sys
.stderr
.write(__doc__
)
39 return myui
,hg
.repository(myui
,url
)
41 def fixup_user(user
,authors
):
43 # if we have an authors table, try to get mapping
44 # by defaulting to the current value of 'user'
45 user
=authors
.get(user
,user
)
46 name
,mail
,m
='','',user_re
.match(user
)
48 # if we don't have 'Name <mail>' syntax, use 'user
49 # <devnull@localhost>' if use contains no at and
50 # 'user <user>' otherwise
53 mail
='<devnull@localhost>'
57 # if we have 'Name <mail>' syntax, everything is fine :)
58 name
,mail
=m
.group(1),m
.group(2)
60 # remove any silly quoting from username
61 m2
=user_clean_re
.match(name
)
64 return '%s %s' % (name
,mail
)
71 def get_changeset(ui
,repo
,revision
,authors
={}):
72 node
=repo
.lookup(revision
)
73 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
74 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
75 branch
=get_branch(extra
.get('branch','master'))
76 return (node
,manifest
,fixup_user(user
,authors
),(time
,tz
),files
,desc
,branch
,extra
)
79 return x
and '100755' or '100644'
83 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
85 def checkpoint(count
):
87 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
88 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
93 def get_parent_mark(parent
,marks
):
94 """Get the mark for some parent.
95 If we saw it in the current session, return :%d syntax and
96 otherwise the SHA1 from the cache."""
97 return marks
.get(str(parent
+1),':%d' % (parent
+1))
100 """See if two revisions of a file are not equal."""
101 return node
.hex(f1
)!=node
.hex(f2
)
103 def outer_set(dleft
,dright
,l
,c
,r
):
104 """Loop over our repository and find all changed and missing files."""
105 for left
in dleft
.keys():
106 right
=dright
.get(left
,None)
108 # we have the file but our parent hasn't: add to left set
110 elif mismatch(dleft
[left
],right
):
111 # we have it but checksums mismatch: add to center set
113 for right
in dright
.keys():
114 left
=dleft
.get(right
,None)
116 # if parent has file but we don't: add to right set
118 # change is already handled when comparing child against parent
121 def get_filechanges(repo
,revision
,parents
,mleft
):
122 """Given some repository and revision, find all changed/deleted files."""
126 mright
=repo
.changectx(p
).manifest()
131 l
,c
,r
=outer_set(mleft
,mright
,l
,c
,r
)
134 def get_author(logmessage
,committer
,authors
):
135 """As git distincts between author and committer of a patch, try to
136 extract author by detecting Signed-off-by lines.
138 This walks from the end of the log message towards the top skipping
139 empty lines. Upon the first non-empty line, it walks all Signed-off-by
140 lines upwards to find the first one. For that (if found), it extracts
141 authorship information the usual way (authors table, cleaning, etc.)
143 If no Signed-off-by line is found, this defaults to the committer.
145 This may sound stupid (and it somehow is), but in log messages we
146 accidentially may have lines in the middle starting with
147 "Signed-off-by: foo" and thus matching our detection regex. Prevent
150 loglines
=logmessage
.split('\n')
152 # from tail walk to top skipping empty lines
155 if len(loglines
[i
].strip())==0: continue
158 # walk further upwards to find first sob line, store in 'first'
161 m
=sob_re
.match(loglines
[i
])
165 # if the last non-empty line matches our Signed-Off-by regex: extract username
167 r
=fixup_user(first
.group(1),authors
)
171 def export_file_contents(ctx
,manifest
,files
):
176 fctx
=ctx
.filectx(file)
178 wr('M %s inline %s' % (gitmode(manifest
.execf(file)),file))
179 wr('data %d' % len(d
)) # had some trouble with size()
182 if count
%cfg_export
_boundary
==0:
183 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
184 if max>cfg_export_boundary
:
185 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
187 def is_merge(parents
):
189 for parent
in parents
:
194 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
,authors
,sob
):
195 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
196 parents
=repo
.changelog
.parentrevs(revision
)
198 wr('commit refs/heads/%s' % branch
)
199 wr('mark :%d' % (revision
+1))
201 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
202 wr('committer %s %d %s' % (user
,time
,timezone
))
203 wr('data %d' % (len(desc
)+1)) # wtf?
207 src
=heads
.get(branch
,'')
210 # if we have a cached head, this is an incremental import: initialize it
211 # and kill reference so we won't init it again
214 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
216 link
=src
# avoid making a merge commit for incremental import
217 elif link
=='' and not heads
.has_key(branch
) and revision
>0:
218 # newly created branch and not the first one: connect to parent
219 tmp
=get_parent_mark(parents
[0],marks
)
221 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
223 link
=tmp
# avoid making a merge commit for branch fork
226 l
=last
.get(branch
,revision
)
228 # 1) as this commit implicitely is the child of the most recent
229 # commit of this branch, ignore this parent
230 # 2) ignore nonexistent parents
232 if p
==l
or p
==revision
or p
<0:
234 tmp
=get_parent_mark(p
,marks
)
235 # if we fork off a branch, don't merge with our parent via 'merge'
236 # as we have 'from' already above
239 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
243 last
[branch
]=revision
245 # we need this later to write out tags
246 marks
[str(revision
)]=':%d'%(revision
+1)
248 ctx
=repo
.changectx(str(revision
))
250 added
,changed
,removed
,type=[],[],[],''
253 # first revision: feed in full manifest
256 elif is_merge(parents
):
257 # later merge revision: feed in changed manifest
258 # for many files comparing checksums is expensive so only do it for
259 # merges where we really need it due to hg's revlog logic
260 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
261 type='thorough delta'
263 # later non-merge revision: feed in changed manifest
264 # if we have exactly one parent, just take the changes from the
265 # manifest without expensively comparing checksums
266 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
267 added
,changed
,removed
=f
[1],f
[0],f
[2]
270 sys
.stderr
.write('Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
271 (type,revision
+1,max,len(added
),len(changed
),len(removed
)))
273 map(lambda r
: wr('D %s' % r
),removed
)
274 export_file_contents(ctx
,man
,added
+changed
)
277 return checkpoint(count
)
279 def export_tags(ui
,repo
,marks_cache
,start
,end
,count
,authors
):
282 # ignore latest revision
283 if tag
=='tip': continue
284 rev
=repo
.changelog
.rev(node
)
285 # ignore those tags not in our import range
286 if rev
<start
or rev
>=end
: continue
288 ref
=get_parent_mark(rev
,marks_cache
)
290 sys
.stderr
.write('Failed to find reference for creating tag'
291 ' %s at r%d\n' % (tag
,rev
))
293 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
294 wr('reset refs/tags/%s' % tag
)
297 count
=checkpoint(count
)
300 def load_authors(filename
):
302 if not os
.path
.exists(filename
):
306 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
307 for line
in f
.readlines():
311 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
313 # put key:value in cache, key without ^:
314 cache
[m
.group(1).strip()]=m
.group(2).strip()
316 sys
.stderr
.write('Loaded %d authors\n' % l
)
319 def load_cache(filename
):
321 if not os
.path
.exists(filename
):
325 for line
in f
.readlines():
327 fields
=line
.split(' ')
328 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
329 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
331 # put key:value in cache, key without ^:
332 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
336 def save_cache(filename
,cache
):
337 f
=open(filename
,'w+')
338 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
341 def verify_heads(ui
,repo
,cache
,force
):
344 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
345 sha1
=f
.readlines()[0].split('\n')[0]
351 branches
=repo
.branchtags()
352 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
355 # get list of hg's branches to verify, don't take all git has
360 if sha1
!=None and c
!=None:
361 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
363 sys
.stderr
.write('Error: Branch [%s] modified outside hg2git:'
364 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
365 if not force
: return False
367 # verify that branch has exactly one head
369 for h
in repo
.heads():
370 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
371 if t
.get(branch
,False):
372 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
373 repo
.changelog
.rev(h
))
374 if not force
: return False
379 def hg2git(repourl
,m
,marksfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False):
382 marks_cache
=load_cache(marksfile
)
383 heads_cache
=load_cache(headsfile
)
384 state_cache
=load_cache(tipfile
)
386 ui
,repo
=setup_repo(repourl
)
388 if not verify_heads(ui
,repo
,heads_cache
,force
):
391 tip
=repo
.changelog
.count()
393 min=int(state_cache
.get('tip',0))
400 for rev
in range(min,max):
401 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,max,c
,authors
,sob
)
403 c
=export_tags(ui
,repo
,marks_cache
,min,max,c
,authors
)
405 sys
.stderr
.write('Issued %d commands\n' % c
)
407 state_cache
['tip']=max
408 state_cache
['repo']=repourl
409 save_cache(tipfile
,state_cache
)
413 if __name__
=='__main__':
414 def bail(parser
,opt
):
415 sys
.stderr
.write('Error: No %s option given\n' % opt
)
419 parser
=OptionParser()
421 parser
.add_option("-m","--max",type="int",dest
="max",
422 help="Maximum hg revision to import")
423 parser
.add_option("--marks",dest
="marksfile",
424 help="File to read git-fast-import's marks from")
425 parser
.add_option("--heads",dest
="headsfile",
426 help="File to read last run's git heads from")
427 parser
.add_option("--status",dest
="statusfile",
428 help="File to read status from")
429 parser
.add_option("-r","--repo",dest
="repourl",
430 help="URL of repo to import")
431 parser
.add_option("-s",action
="store_true",dest
="sob",
432 default
=False,help="Enable parsing Signed-off-by lines")
433 parser
.add_option("-A","--authors",dest
="authorfile",
434 help="Read authormap from AUTHORFILE")
435 parser
.add_option("-f","--force",action
="store_true",dest
="force",
436 default
=False,help="Ignore validation errors by force")
438 (options
,args
)=parser
.parse_args()
441 if options
.max!=None: m
=options
.max
443 if options
.marksfile
==None: bail(parser
,'--marks')
444 if options
.marksfile
==None: bail(parser
,'--heads')
445 if options
.marksfile
==None: bail(parser
,'--status')
446 if options
.marksfile
==None: bail(parser
,'--repo')
449 if options
.authorfile
!=None:
450 a
=load_authors(options
.authorfile
)
452 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.headsfile
,
453 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
))