3 # Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
5 # Distributed under terms of the GNU General Public License
6 # This program comes with NO WARRANTY.
17 # - use binary search to find from-patch ica. conflict.
21 import distutils
.version
26 import xml
.dom
.minidom
28 import gdbm
as dbmodule
31 from email
.utils
import parsedate_tz
32 from calendar
import timegm
34 ################################################################
40 mail_to_name_dict
= {}
45 ################################################################
48 class PullConflict (Exception):
50 class CommandFailed (Exception):
54 sys
.stderr
.write (s
+ '\n')
56 def get_cli_options ():
57 class MyOP(optparse
.OptionParser
):
59 optparse
.OptionParser
.print_help (self
)
63 This tool is a conversion utility for Darcs repositories, importing
64 them in chronological order. It requires a Git version that has
65 git-fast-import. It does not support incremental updating.
69 * repositories with skewed timestamps, or different patches with
70 equal timestamps will confuse darcs2git.
71 * does not respect file modes or time zones.
72 * too slow. See source code for instructions to speed it up.
73 * probably doesn\'t work on partial repositories
75 Report new bugs to hanwen@xs4all.nl
79 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
80 Distributed under terms of the GNU General Public License
81 This program comes with NO WARRANTY.
86 p
.usage
='''darcs2git [OPTIONS] DARCS-REPO'''
87 p
.description
='''Convert darcs repo to git.'''
89 def update_map (option
, opt
, value
, parser
):
90 for l
in open (value
).readlines ():
91 (mail
, name
) = tuple (l
.strip ().split ('='))
92 mail_to_name_dict
[mail
] = name
94 p
.add_option ('-a', '--authors', action
='callback',
98 help='read a text file, containing EMAIL=NAME lines')
100 p
.add_option ('--checkpoint-frequency', action
='store',
101 dest
='checkpoint_frequency',
104 help='how often should the git importer be synced?\n'
105 'Default is 0 (no limit)'
108 p
.add_option ('-d', '--destination', action
='store',
111 dest
='target_git_repo',
112 help='where to put the resulting Git repo.')
114 p
.add_option ('--verbose', action
='store_true',
117 help='show commands as they are invoked')
119 p
.add_option ('--history-window', action
='store',
120 dest
='history_window',
123 help='Look back this many patches as conflict ancestors.\n'
124 'Default is 0 (no limit)'
127 p
.add_option ('--debug', action
='store_true',
130 help="""add patch numbers to commit messages;
131 don\'t clean conversion repo;
135 options
, args
= p
.parse_args ()
140 if len(urlparse
.urlparse(args
[0])) == 0:
141 raise NotImplementedError,"We support local DARCS repos only."
143 git_version
= distutils
.version
.LooseVersion(os
.popen("git --version","r").read().strip().split(" ")[-1])
144 ideal_version
= distutils
.version
.LooseVersion("1.5.0")
145 if git_version
<ideal_version
:
146 raise RuntimeError,"You need git >= 1.5.0 for this."
148 options
.basename
= os
.path
.basename (os
.path
.normpath (args
[0])).replace ('.darcs', '')
149 if not options
.target_git_repo
:
150 options
.target_git_repo
= options
.basename
+ '.git'
154 name
= options
.target_git_repo
.replace ('.git', '.log')
155 if name
== options
.target_git_repo
:
158 progress ("Shell log to %s" % name
)
159 log_file
= open (name
, 'w')
161 return (options
, args
)
163 def read_pipe (cmd
, ignore_errors
=False):
165 progress ('pipe %s' % cmd
)
166 pipe
= os
.popen (cmd
)
169 if pipe
.close () and not ignore_errors
:
170 raise CommandFailed ("Pipe failed: %s" % cmd
)
174 def system (c
, ignore_error
=0, timed
=0):
181 log_file
.write ('%s\n' % c
)
184 if os
.system (c
) and not ignore_error
:
185 raise CommandFailed ("Command failed: %s" % c
)
187 def darcs_date_to_git (x
):
188 t
= time
.strptime (x
, '%Y%m%d%H%M%S')
189 return '%d' % int (timegm (t
))
191 def darcs_timezone (x
) :
192 tz
= parsedate_tz(x
)[9] / 60
193 return "%+03d%02d" % (tz
/ 60, tz
% 60)
195 ################################################################
198 class DarcsConversionRepo
:
199 """Representation of a Darcs repo.
201 The repo is thought to be ordered, and supports methods for
202 going back (obliterate) and forward (pull).
206 def __init__ (self
, dir, patches
):
207 self
.dir = os
.path
.abspath (dir)
208 self
.patches
= patches
209 self
._current
_number
= -1
211 self
._inventory
_dict
= None
213 self
._short
_id
_dict
= dict ((p
.short_id (), p
) for p
in patches
)
215 def is_contiguous (self
):
216 return (len (self
.inventory_dict ()) == self
._current
_number
+1
217 and self
.contains_contiguous (self
._current
_number
))
219 def contains_contiguous (self
, num
):
220 if not self
._is
_valid
:
223 darcs_dir
= self
.dir + '/_darcs'
224 if not os
.path
.exists (darcs_dir
):
227 for p
in self
.patches
[:num
+ 1]:
228 if not self
.has_patch (p
):
233 def has_patch (self
, p
):
234 assert self
._is
_valid
236 return self
.inventory_dict ().has_key (p
.short_id ())
238 def pristine_tree (self
):
239 return self
.dir + '/_darcs/pristine'
241 def go_back_to (self
, dest
):
243 # at 4, len = 5, go to 2: count == 2
244 count
= len (self
.inventory_dict()) - dest
- 1
246 assert self
._is
_valid
252 progress ('Rewinding %d patches' % count
)
253 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
254 d
= self
.inventory_dict ()
255 for p
in self
.patches
[dest
+1:self
._current
_number
+1]:
261 self
._current
_number
= dest
264 system ('rm -rf %s' % self
.dir)
268 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
270 def pull (self
, patch
):
271 id = patch
.attributes
['hash']
272 source_repo
= patch
.dir
275 progress ('Pull patch %d' % patch
.number
)
276 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
278 self
._current
_number
= patch
.number
280 ## must reread: the pull may have pulled in others.
281 self
._inventory
_dict
= None
283 def go_forward_to (self
, num
):
284 d
= self
.inventory_dict ()
289 for p
in self
.patches
[0:num
+1]:
290 if not d
.has_key (p
.short_id ()):
294 pull_str
= ' || '.join (['hash %s' % p
.id () for p
in pull_me
])
296 src
= self
.patches
[0].dir
298 progress ('Pulling %d patches to go to %d' % (len (pull_me
), num
))
299 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
301 def create_fresh (self
):
303 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
305 self
._is
_valid
= True
306 self
._current
_number
= -1
307 self
._inventory
_dict
= {}
309 def inventory (self
):
310 darcs_dir
= self
.dir + '/_darcs'
312 for f
in [darcs_dir
+ '/inventory'] + glob
.glob (darcs_dir
+ '/inventories/*'):
313 i
+= open (f
).read ()
316 def inventory_dict (self
):
317 if type (self
._inventory
_dict
) != type ({}):
318 self
._inventory
_dict
= {}
321 self
._inventory
_dict
[m
.group (1)] = self
._short
_id
_dict
[m
.group(1)]
323 re
.sub (r
'\n([^*\n]+\*[*-][0-9]+)', note_patch
, self
.inventory ())
324 return self
._inventory
_dict
326 def start_at (self
, num
):
328 """Move the repo to NUM.
330 This uses the fishy technique of writing the inventory and
331 constructing the pristine tree with 'darcs repair'
333 progress ('Starting afresh at %d' % num
)
337 iv
= open (dir + '/_darcs/inventory', 'w')
339 log_file
.write ("# messing with _darcs/inventory")
341 for p
in self
.patches
[:num
+1]:
342 os
.link (p
.filename (), dir + '/_darcs/patches/' + os
.path
.basename (p
.filename ()))
343 iv
.write (p
.header ())
344 self
._inventory
_dict
[p
.short_id ()] = p
347 system ('darcs repair --repo %(dir)s --quiet' % locals ())
349 self
._current
_number
= num
350 self
._is
_valid
= True
352 def go_to (self
, dest
):
353 contiguous
= self
.is_contiguous ()
355 if not self
._is
_valid
:
357 elif dest
== self
._current
_number
and contiguous
:
359 elif (self
.contains_contiguous (dest
)):
360 self
.go_back_to (dest
)
361 elif dest
- len (self
.inventory_dict ()) < dest
/ 100:
362 self
.go_forward_to (dest
)
367 def go_from_to (self
, from_patch
, to_patch
):
369 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
370 PullConflict if conflict is detected
373 progress ('Trying %s -> %s' % (from_patch
, to_patch
))
375 source
= to_patch
.dir
378 self
.go_to (from_patch
.number
)
384 success
= 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
385 except CommandFailed
:
386 self
._is
_valid
= False
387 raise PullConflict ()
390 raise PullConflict ()
394 return 'patch %d' % self
.number
396 def __init__ (self
, xml
, dir):
401 self
._contents
= None
402 for (nm
, value
) in xml
.attributes
.items():
403 self
.attributes
[nm
] = value
405 # fixme: ugh attributes vs. methods.
406 self
.extract_author ()
407 self
.extract_message ()
411 return self
.attributes
['hash']
415 if self
.attributes
['inverted'] == 'True':
418 return '%s*%s%s' % (self
.attributes
['author'], inv
, self
.attributes
['hash'].split ('-')[0])
421 return self
.dir + '/_darcs/patches/' + self
.attributes
['hash']
424 if type (self
._contents
) != type (''):
425 f
= gzip
.open (self
.filename ())
426 self
._contents
= f
.read ()
428 return self
._contents
431 lines
= self
.contents ().split ('\n')
434 committer
= lines
[1] + '\n'
435 committer
= re
.sub ('] {\n$', ']\n', committer
)
436 committer
= re
.sub ('] *\n$', ']\n', committer
)
438 if not committer
.endswith (']\n'):
445 header
= name
+ '\n' + committer
449 assert header
[-1] == '\n'
452 def extract_author (self
):
453 mail
= self
.attributes
['author']
455 m
= re
.search ("^(.*) <(.*)>$", mail
)
462 name
= mail_to_name_dict
[mail
]
464 name
= mail
.split ('@')[0]
466 self
.author_name
= name
467 self
.author_mail
= mail
469 def extract_time (self
):
470 self
.date
= darcs_date_to_git (self
.attributes
['date']) + ' ' + darcs_timezone (self
.attributes
['local_date'])
473 patch_name
= '(no comment)'
475 name_elt
= self
.xml
.getElementsByTagName ('name')[0]
476 patch_name
= name_elt
.childNodes
[0].data
481 def extract_message (self
):
482 patch_name
= self
.name ()
483 comment_elts
= self
.xml
.getElementsByTagName ('comment')
486 comment
= comment_elts
[0].childNodes
[0].data
488 if self
.attributes
['inverted'] == 'True':
489 patch_name
= 'UNDO: ' + patch_name
491 self
.message
= '%s\n\n%s' % (patch_name
, comment
)
494 patch_name
= self
.name ()
495 if patch_name
.startswith ("TAG "):
497 tag
= re
.sub (r
'\s', '_', tag
).strip ()
498 tag
= re
.sub (r
':', '_', tag
).strip ()
502 def get_darcs_patches (darcs_repo
):
503 progress ('reading patches.')
505 xml_string
= read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo
)
507 dom
= xml
.dom
.minidom
.parseString(xml_string
)
508 xmls
= dom
.documentElement
.getElementsByTagName('patch')
510 patches
= [DarcsPatch (x
, darcs_repo
) for x
in xmls
]
519 ################################################################
523 def __init__ (self
, parent
, darcs_patch
):
525 self
.darcs_patch
= darcs_patch
527 self
.depth
= parent
.depth
+ 1
532 return self
.darcs_patch
.number
534 def parent_patch (self
):
536 return self
.parent
.darcs_patch
540 def common_ancestor (a
, b
):
542 if a
.depth
< b
.depth
:
544 elif a
.depth
> b
.depth
:
558 def export_checkpoint (gfi
):
559 gfi
.write ('checkpoint\n\n')
561 def export_tree (tree
, gfi
):
562 tree
= os
.path
.normpath (tree
)
563 gfi
.write ('deleteall\n')
564 for (root
, dirs
, files
) in os
.walk (tree
):
566 rf
= os
.path
.normpath (os
.path
.join (root
, f
))
567 s
= open (rf
).read ()
568 rf
= rf
.replace (tree
+ '/', '')
570 gfi
.write ('M 644 inline %s\n' % rf
)
571 gfi
.write ('data %d\n%s\n' % (len (s
), s
))
575 def export_commit (repo
, patch
, last_patch
, gfi
):
576 gfi
.write ('commit refs/heads/darcstmp%d\n' % patch
.number
)
577 gfi
.write ('mark :%d\n' % (patch
.number
+ 1))
578 gfi
.write ('committer %s <%s> %s\n' % (patch
.author_name
,
584 msg
+= '\n\n#%d\n' % patch
.number
586 gfi
.write ('data %d\n%s\n' % (len (msg
), msg
))
590 for (n
, p
) in pending_patches
.items ():
591 if repo
.has_patch (p
):
593 del pending_patches
[n
]
597 and git_commits
.has_key (last_patch
.number
)):
598 mergers
= [last_patch
.number
]
601 gfi
.write ('from :%d\n' % (mergers
[0] + 1))
602 for m
in mergers
[1:]:
603 gfi
.write ('merge :%d\n' % (m
+ 1))
605 pending_patches
[patch
.number
] = patch
606 export_tree (repo
.pristine_tree (), gfi
)
611 n
= last_patch
.number
612 git_commits
[patch
.number
] = GitCommit (git_commits
.get (n
, None),
615 def export_pending (gfi
):
616 if len (pending_patches
.items ()) == 1:
617 gfi
.write ('reset refs/heads/master\n')
618 gfi
.write ('from :%d\n\n' % (pending_patches
.values()[0].number
+1))
620 progress ("Creating branch master")
623 for (n
, p
) in pending_patches
.items ():
624 gfi
.write ('reset refs/heads/master%d\n' % n
)
625 gfi
.write ('from :%d\n\n' % (n
+1))
627 progress ("Creating branch master%d" % n
)
629 patches
= pending_patches
.values()
631 gfi
.write ('commit refs/heads/master\n')
632 gfi
.write ('committer %s <%s> %s\n' % (patch
.author_name
,
636 gfi
.write ('data %d\n%s\n' % (len(msg
), msg
))
637 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
638 for p
in patches
[1:]:
639 gfi
.write ('merge :%d\n' % (p
.number
+ 1))
642 def export_tag (patch
, gfi
):
643 gfi
.write ('tag %s\n' % patch
.tag_name ())
644 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
645 gfi
.write ('tagger %s <%s> %s\n' % (patch
.author_name
,
648 gfi
.write ('data %d\n%s\n' % (len (patch
.message
),
651 ################################################################
653 def test_conversion (darcs_repo
, git_repo
):
654 gd
= options
.basename
+ '.checkouttmp.git'
655 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
656 diff
= read_pipe ('diff --exclude .git -urN %(gd)s %(darcs_repo)s/_darcs/pristine' % locals (), ignore_errors
=True)
657 system ('rm -rf %(gd)s' % locals ())
659 progress ("Conversion introduced changes: %s" % diff
)
662 progress ("Checkout matches pristine darcs tree.")
665 (options
, args
) = get_cli_options ()
667 darcs_repo
= os
.path
.abspath (args
[0])
668 git_repo
= os
.path
.abspath (options
.target_git_repo
)
670 if os
.path
.exists (git_repo
):
671 system ('rm -rf %(git_repo)s' % locals ())
673 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
674 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
676 os
.environ
['GIT_DIR'] = git_repo
678 gfi
= os
.popen ('git-fast-import --quiet', 'w')
680 patches
= get_darcs_patches (darcs_repo
)
681 conv_repo
= DarcsConversionRepo (options
.basename
+ ".tmpdarcs", patches
)
682 conv_repo
.start_at (-1)
689 combinations
= [(v
, w
) for v
in pending_patches
.values ()
690 for w
in pending_patches
.values ()]
691 candidates
= [common_ancestor (git_commits
[c
[0].number
], git_commits
[c
[1].number
]) for c
in combinations
]
692 candidates
= sorted ([(-a
.darcs_patch
.number
, a
) for a
in candidates
])
693 for (depth
, c
) in candidates
:
696 conv_repo
.go_from_to (q
, p
)
699 parent_number
= q
.number
700 progress ('Found existing common parent as predecessor')
706 ## no branches found where we could attach.
707 ## try previous commits one by one.
709 parent_number
= p
.number
- 2
711 if parent_number
>= 0:
712 parent_patch
= patches
[parent_number
]
715 conv_repo
.go_from_to (parent_patch
, p
)
718 ## simplistic, may not be enough.
719 progress ('conflict, going one back')
722 if parent_number
< 0:
725 if (options
.history_window
726 and parent_number
< p
.number
- options
.history_window
):
731 if parent_number
>= 0 or p
.number
== 0:
732 progress ('Export %d -> %d (total %d)' % (parent_number
,
733 p
.number
, len (patches
)))
734 export_commit (conv_repo
, p
, parent_patch
, gfi
)
738 if options
.checkpoint_frequency
and p
.number
% options
.checkpoint_frequency
== 0:
739 export_checkpoint (gfi
)
741 progress ("Can't import patch %d, need conflict resolution patch?" % p
.number
)
746 for f
in glob
.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
749 test_conversion (darcs_repo
, git_repo
)
751 if not options
.debug
: