5 darcs2git -- Darcs to git converter.
7 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31 # - use binary search to find from-patch in case of conflict.
35 from datetime
import datetime
36 from time
import strptime
38 import distutils
.version
43 import xml
.dom
.minidom
49 ################################################################
55 mail_to_name_dict
= {}
60 ################################################################
63 class PullConflict (Exception):
65 class CommandFailed (Exception):
69 sys
.stderr
.write (s
+ '\n')
71 def get_cli_options ():
72 class MyOP(optparse
.OptionParser
):
74 optparse
.OptionParser
.print_help (self
)
78 This tool is a conversion utility for Darcs repositories, importing
79 them in chronological order. It requires a Git version that has
80 git fast-import. It does not support incremental updating.
84 * repositories with skewed timestamps, or different patches with
85 equal timestamps will confuse darcs2git.
86 * does not respect file modes or time zones.
87 * too slow. See source code for instructions to speed it up.
88 * probably doesn\'t work on partial repositories
90 Report new bugs to hanwen@xs4all.nl
94 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
95 Distributed under terms of the GNU General Public License
96 This program comes with NO WARRANTY.
101 p
.usage
='''darcs2git [OPTIONS] DARCS-REPO'''
102 p
.description
='''Convert darcs repo to git.'''
104 def update_map (option
, opt
, value
, parser
):
105 for l
in open (value
).readlines ():
106 (mail
, name
) = tuple (l
.strip ().split ('='))
107 mail_to_name_dict
[mail
] = name
109 p
.add_option ('-a', '--authors', action
='callback',
113 help='read a text file, containing EMAIL=NAME lines')
115 p
.add_option ('--checkpoint-frequency', action
='store',
116 dest
='checkpoint_frequency',
119 help='how often should the git importer be synced?\n'
120 'Default is 0 (no limit)'
123 p
.add_option ('-d', '--destination', action
='store',
126 dest
='target_git_repo',
127 help='where to put the resulting Git repo.')
129 p
.add_option ('--verbose', action
='store_true',
132 help='show commands as they are invoked')
134 p
.add_option ('--history-window', action
='store',
135 dest
='history_window',
138 help='Look back this many patches as conflict ancestors.\n'
139 'Default is 0 (no limit)')
141 p
.add_option ('--debug', action
='store_true',
144 help="""add patch numbers to commit messages;
145 don\'t clean conversion repo;
148 p
.add_option ('-e', '--encoding', action
='store',
152 help='specify encoding, for example ISO-8859-2')
155 options
, args
= p
.parse_args ()
160 if len(urlparse
.urlparse(args
[0])) == 0:
161 raise NotImplementedError, "We support local DARCS repos only."
163 git_version
= distutils
.version
.LooseVersion(
164 os
.popen("git --version","r").read().strip().split(" ")[-1])
165 ideal_version
= distutils
.version
.LooseVersion("1.5.0")
166 if git_version
<ideal_version
:
167 raise RuntimeError,"You need git >= 1.5.0 for this."
169 options
.basename
= os
.path
.basename (
170 os
.path
.normpath (args
[0])).replace ('.darcs', '')
171 if not options
.target_git_repo
:
172 options
.target_git_repo
= options
.basename
+ '.git'
176 name
= options
.target_git_repo
.replace ('.git', '.log')
177 if name
== options
.target_git_repo
:
180 progress ("Shell log to %s" % name
)
181 log_file
= open (name
, 'w')
183 return (options
, args
)
185 def read_pipe (cmd
, ignore_errors
=False):
187 progress ('pipe %s' % cmd
)
188 pipe
= os
.popen (cmd
)
191 if pipe
.close () and not ignore_errors
:
192 raise CommandFailed ("Pipe failed: %s" % cmd
)
196 def system (c
, ignore_error
=0, timed
=0):
203 log_file
.write ('%s\n' % c
)
206 if os
.system (c
) and not ignore_error
:
207 raise CommandFailed ("Command failed: %s" % c
)
209 def darcs_date_to_git (x
):
210 t
= time
.strptime (x
, '%Y%m%d%H%M%S')
211 return '%d' % int (time
.mktime (t
))
213 def darcs_timezone (x
) :
214 time
.strptime (x
, '%a %b %d %H:%M:%S %Z %Y')
219 ################################################################
222 ## http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/521889
224 PATCH_DATE_FORMAT
= '%Y%m%d%H%M%S'
227 \[ # Patch start indicator
228 (?P<name>[^\n]+)\n # Patch name (rest of same line)
229 (?P<author>[^\*]+) # Patch author
230 \* # Author/date separator
231 (?P<inverted>[-\*]) # Inverted patch indicator
232 (?P<date>\d{14}) # Patch date
233 (?:\n(?P<comment>(?:^\ [^\n]*\n)+))? # Optional long comment
234 \] # Patch end indicator
236 patch_re
= re
.compile(patch_pattern
, re
.VERBOSE | re
.MULTILINE
)
237 tidy_comment_re
= re
.compile(r
'^ ', re
.MULTILINE
)
239 def parse_inventory(inventory
):
241 Given the contents of a darcs inventory file, generates ``Patch``
242 objects representing contained patch details.
244 for match
in patch_re
.finditer(inventory
):
245 attrs
= match
.groupdict(None)
246 attrs
['inverted'] = (attrs
['inverted'] == '-')
247 if attrs
['comment'] is not None:
248 attrs
['comment'] = tidy_comment_re
.sub('', attrs
['comment']).strip()
249 yield InventoryPatch(**attrs
)
251 def fix_braindead_darcs_escapes(s
):
252 def insert_hibit(match
):
253 return chr(int(match
.group(1), 16))
255 return re
.sub(r
'\[_\\([0-9a-f][0-9a-f])_\]',
256 insert_hibit
, str(s
))
258 class InventoryPatch
:
260 Patch details, as defined in a darcs inventory file.
262 Attribute names match those generated by the
263 ``darcs changes --xml-output`` command.
266 def __init__(self
, name
, author
, date
, inverted
, comment
=None):
269 self
.date
= datetime(*strptime(date
, PATCH_DATE_FORMAT
)[:6])
270 self
.inverted
= inverted
271 self
.comment
= comment
277 def complete_patch_details(self
):
278 date_str
= self
.date
.strftime(PATCH_DATE_FORMAT
)
279 return '%s%s%s%s%s' % (
280 self
.name
, self
.author
, date_str
,
281 self
.comment
and ''.join([l
.rstrip() for l
in self
.comment
.split('\n')]) or '',
282 self
.inverted
and 't' or 'f')
289 return unicode('%s%s*%s%s' % (self
.name
, self
.author
, inv
, self
.hash.split ('-')[0]), options
.encoding
)
294 Calculates the filename of the gzipped file containing patch
295 contents in the repository's ``patches`` directory.
297 This consists of the patch date, a partial SHA-1 hash of the
298 patch author and a full SHA-1 hash of the complete patch
302 date_str
= self
.date
.strftime(PATCH_DATE_FORMAT
)
303 return '%s-%s-%s.gz' % (date_str
,
304 sha
.new(self
.author
).hexdigest()[:5],
305 sha
.new(self
.complete_patch_details
).hexdigest())
307 ## http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/521889
309 class DarcsConversionRepo
:
310 """Representation of a Darcs repo.
312 The repo is thought to be ordered, and supports methods for
313 going back (obliterate) and forward (pull).
317 def __init__ (self
, dir, patches
):
318 self
.dir = os
.path
.abspath (dir)
319 self
.patches
= patches
320 self
._current
_number
= -1
322 self
._inventory
_dict
= None
323 self
._short
_id
_dict
= dict ((p
.short_id (), p
) for p
in patches
)
326 if not options
.debug
:
327 system ('rm -fr %s' % self
.dir)
329 def is_contiguous (self
):
330 return (len (self
.inventory_dict ()) == self
._current
_number
+ 1
331 and self
.contains_contiguous (self
._current
_number
))
333 def contains_contiguous (self
, num
):
334 if not self
._is
_valid
:
337 darcs_dir
= self
.dir + '/_darcs'
338 if not os
.path
.exists (darcs_dir
):
341 for p
in self
.patches
[:num
+ 1]:
342 if not self
.has_patch (p
):
347 def has_patch (self
, p
):
348 assert self
._is
_valid
350 return p
.short_id () in self
.inventory_dict ()
352 def pristine_tree (self
):
353 return self
.dir + '/_darcs/pristine'
355 def go_back_to (self
, dest
):
357 # at 4, len = 5, go to 2: count == 2
358 count
= len (self
.inventory_dict()) - dest
- 1
360 assert self
._is
_valid
366 progress ('Rewinding %d patches' % count
)
367 system ('cd %(dir)s && darcs revert --all' % locals())
368 system ('cd %(dir)s && yes|darcs obliterate -a --ignore-times --last %(count)d' % locals ())
369 system ('cd %(dir)s && darcs revert -a' % locals())
370 d
= self
.inventory_dict ()
371 for p
in self
.patches
[dest
+1:self
._current
_number
+1]:
377 self
._current
_number
= dest
380 system ('rm -rf %s' % self
.dir)
384 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
386 def pull (self
, patch
):
387 id = patch
.attributes
['hash']
388 source_repo
= patch
.dir
391 progress ('Pull patch %d' % patch
.number
)
392 system ('cd %(dir)s && darcs revert --all' % locals())
393 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
395 self
._current
_number
= patch
.number
397 ## must reread: the pull may have pulled in others.
398 self
._inventory
_dict
= None
400 def go_forward_to (self
, num
):
401 d
= self
.inventory_dict ()
406 for p
in self
.patches
[0:num
+1]:
407 if not d
.has_key (p
.short_id ()):
411 pull_str
= ' || '.join (['hash %s' % p
.id () for p
in pull_me
])
413 src
= self
.patches
[0].dir
415 progress ('Pulling %d patches to go to %d' % (len (pull_me
), num
))
416 system ('darcs revert --repo %(dir)s --all' % locals ())
417 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
419 def create_fresh (self
):
421 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
423 self
._is
_valid
= True
424 self
._current
_number
= -1
425 self
._inventory
_dict
= {}
427 def inventory (self
):
428 darcs_dir
= self
.dir + '/_darcs'
430 for f
in [darcs_dir
+ '/inventory'] + glob
.glob (darcs_dir
+ '/inventories/*'):
431 i
+= open (f
).read ()
434 def inventory_dict (self
):
436 if type (self
._inventory
_dict
) != type ({}):
437 self
._inventory
_dict
= {}
439 for p
in parse_inventory(self
.inventory()):
443 self
._inventory
_dict
[key
] = self
._short
_id
_dict
[key
]
445 print 'key not found', key
446 print self
._short
_id
_dict
449 return self
._inventory
_dict
451 def start_at (self
, num
):
452 """Move the repo to NUM.
454 This uses the fishy technique of writing the inventory and
455 constructing the pristine tree with 'darcs repair'
457 progress ('Starting afresh at %d' % num
)
461 iv
= open (dir + '/_darcs/inventory', 'w')
463 log_file
.write ("# messing with _darcs/inventory")
465 for p
in self
.patches
[:num
+1]:
466 os
.link (p
.filename (), dir + '/_darcs/patches/' + os
.path
.basename (p
.filename ()))
467 iv
.write (p
.header ())
468 self
._inventory
_dict
[p
.short_id ()] = p
471 system ('darcs revert --repo %(dir)s --all' % locals())
472 system ('darcs repair --repo %(dir)s --quiet' % locals ())
474 self
._current
_number
= num
475 self
._is
_valid
= True
477 def go_to (self
, dest
):
478 if not self
._is
_valid
:
480 elif dest
== self
._current
_number
and self
.is_contiguous ():
482 elif (self
.contains_contiguous (dest
)):
483 self
.go_back_to (dest
)
484 elif dest
- len (self
.inventory_dict ()) < dest
/ 100:
485 self
.go_forward_to (dest
)
490 def go_from_to (self
, from_patch
, to_patch
):
492 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
493 PullConflict if conflict is detected
496 progress ('Trying %s -> %s' % (from_patch
, to_patch
))
498 source
= to_patch
.dir
501 self
.go_to (from_patch
.number
)
507 success
= 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
508 except CommandFailed
:
509 self
._is
_valid
= False
510 raise PullConflict ()
513 raise PullConflict ()
517 return 'patch %d' % self
.number
519 def __init__ (self
, xml
, dir):
524 self
._contents
= None
525 for (nm
, value
) in xml
.attributes
.items():
526 self
.attributes
[nm
] = value
528 # fixme: ugh attributes vs. methods.
529 self
.extract_author ()
530 self
.extract_message ()
534 return self
.attributes
['hash']
538 if self
.attributes
['inverted'] == 'True':
541 return '%s%s*%s%s' % (self
.name(), self
.attributes
['author'], inv
, self
.attributes
['hash'].split ('-')[0])
544 return self
.dir + '/_darcs/patches/' + self
.attributes
['hash']
547 if type (self
._contents
) != type (''):
548 f
= gzip
.open (self
.filename ())
549 self
._contents
= f
.read ()
551 return self
._contents
554 lines
= self
.contents ().split ('\n')
557 committer
= lines
[1] + '\n'
558 committer
= re
.sub ('] {\n$', ']\n', committer
)
559 committer
= re
.sub ('] *\n$', ']\n', committer
)
561 if not committer
.endswith (']\n'):
568 header
= name
+ '\n' + committer
572 assert header
[-1] == '\n'
575 def extract_author (self
):
576 mail
= self
.attributes
['author']
578 m
= re
.search ("^(.*) <(.*)>$", mail
)
585 name
= mail_to_name_dict
[mail
]
587 name
= mail
.split ('@')[0]
589 self
.author_name
= name
591 # mail addresses should be plain strings.
592 self
.author_mail
= mail
.encode('utf-8')
594 def extract_time (self
):
595 self
.date
= darcs_date_to_git (self
.attributes
['date']) + ' ' + darcs_timezone (self
.attributes
['local_date'])
600 name_elt
= self
.xml
.getElementsByTagName ('name')[0]
601 patch_name
= unicode(fix_braindead_darcs_escapes(str(name_elt
.childNodes
[0].data
)), options
.encoding
)
606 def extract_message (self
):
607 patch_name
= self
.name ()
608 comment_elts
= self
.xml
.getElementsByTagName ('comment')
611 comment
= comment_elts
[0].childNodes
[0].data
613 if self
.attributes
['inverted'] == 'True':
614 patch_name
= 'UNDO: ' + patch_name
616 self
.message
= '%s\n\n%s' % (patch_name
, comment
)
619 patch_name
= self
.name ()
620 if patch_name
.startswith ("TAG "):
622 tag
= re
.sub (r
'\s', '_', tag
).strip ()
623 tag
= re
.sub (r
':', '_', tag
).strip ()
627 def get_darcs_patches (darcs_repo
):
628 progress ('reading patches.')
630 xml_string
= '<?xml version="1.0" encoding="%s" ?>' % options
.encoding
631 xml_string
+= read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo
)
633 dom
= xml
.dom
.minidom
.parseString(xml_string
)
634 xmls
= dom
.documentElement
.getElementsByTagName('patch')
636 patches
= [DarcsPatch (x
, darcs_repo
) for x
in xmls
]
645 ################################################################
649 def __init__ (self
, parent
, darcs_patch
):
651 self
.darcs_patch
= darcs_patch
653 self
.depth
= parent
.depth
+ 1
658 return self
.darcs_patch
.number
660 def parent_patch (self
):
662 return self
.parent
.darcs_patch
666 def common_ancestor (a
, b
):
668 if a
.depth
< b
.depth
:
670 elif a
.depth
> b
.depth
:
684 def export_checkpoint (gfi
):
685 gfi
.write ('checkpoint\n\n')
687 def export_tree (tree
, gfi
):
688 tree
= os
.path
.normpath (tree
)
689 gfi
.write ('deleteall\n')
690 for (root
, dirs
, files
) in os
.walk (tree
):
692 rf
= os
.path
.normpath (os
.path
.join (root
, f
))
693 s
= open (rf
).read ()
694 rf
= rf
.replace (tree
+ '/', '')
696 gfi
.write ('M 644 inline %s\n' % rf
)
697 gfi
.write ('data %d\n%s\n' % (len (s
), s
))
701 def export_commit (repo
, patch
, last_patch
, gfi
):
702 gfi
.write ('commit refs/heads/darcstmp%d\n' % patch
.number
)
703 gfi
.write ('mark :%d\n' % (patch
.number
+ 1))
705 raw_name
= patch
.author_name
.encode('UTF-8')
706 gfi
.write ('committer %s <%s> %s\n' % (raw_name
,
712 msg
+= '\n\n#%d\n' % patch
.number
714 msg
= msg
.encode('utf-8')
715 gfi
.write ('data %d\n%s\n' % (len (msg
), msg
))
717 for (n
, p
) in pending_patches
.items ():
718 if repo
.has_patch (p
):
720 del pending_patches
[n
]
724 and git_commits
.has_key (last_patch
.number
)):
725 mergers
= [last_patch
.number
]
728 gfi
.write ('from :%d\n' % (mergers
[0] + 1))
729 for m
in mergers
[1:]:
730 gfi
.write ('merge :%d\n' % (m
+ 1))
732 pending_patches
[patch
.number
] = patch
733 export_tree (repo
.pristine_tree (), gfi
)
737 n
= last_patch
.number
738 git_commits
[patch
.number
] = GitCommit (git_commits
.get (n
, None),
741 def export_pending (gfi
):
742 if len (pending_patches
.items ()) == 1:
743 gfi
.write ('reset refs/heads/master\n')
744 gfi
.write ('from :%d\n\n' % (pending_patches
.values()[0].number
+1))
746 progress ("Creating branch master")
749 for (n
, p
) in pending_patches
.items ():
750 gfi
.write ('reset refs/heads/master%d\n' % n
)
751 gfi
.write ('from :%d\n\n' % (n
+1))
753 progress ("Creating branch master%d" % n
)
755 patches
= pending_patches
.values()
757 gfi
.write ('commit refs/heads/master\n')
758 gfi
.write ('committer %s <%s> %s\n' % (patch
.author_name
,
762 gfi
.write ('data %d\n%s\n' % (len(msg
), msg
))
763 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
764 for p
in patches
[1:]:
765 gfi
.write ('merge :%d\n' % (p
.number
+ 1))
768 def export_tag (patch
, gfi
):
769 gfi
.write ('tag %s\n' % patch
.tag_name ())
770 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
771 gfi
.write ('tagger %s <%s> %s\n' % (patch
.author_name
,
775 raw_message
= patch
.message
.encode('utf-8')
776 gfi
.write ('data %d\n%s\n' % (len (raw_message
),
779 ################################################################
782 def test_conversion (darcs_repo
, git_repo
):
783 pristine
= '%(darcs_repo)s/_darcs/pristine' % locals ()
784 if not os
.path
.exists (pristine
):
785 progress ("darcs repository does not contain pristine tree?!")
788 gd
= options
.basename
+ '.checkouttmp.git'
789 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
790 diff_cmd
= 'diff --exclude .git -urN %(gd)s %(pristine)s' % locals ()
791 diff
= read_pipe (diff_cmd
, ignore_errors
=True)
792 system ('rm -rf %(gd)s' % locals ())
795 if len (diff
) > 1024:
796 diff
= diff
[:512] + '\n...\n' + diff
[-512:]
798 progress ("Conversion introduced changes: %s" % diff
)
801 progress ("Checkout matches pristine darcs tree.")
804 (options
, args
) = get_cli_options ()
806 darcs_repo
= os
.path
.abspath (args
[0])
807 git_repo
= os
.path
.abspath (options
.target_git_repo
)
809 if os
.path
.exists (git_repo
):
810 system ('rm -rf %(git_repo)s' % locals ())
812 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
813 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
815 os
.environ
['DARCS_DONT_ESCAPE_8BIT'] = '1'
816 os
.environ
['GIT_DIR'] = git_repo
822 gfi
= os
.popen ('git fast-import %s' % quiet
, 'w')
824 patches
= get_darcs_patches (darcs_repo
)
825 conv_repo
= DarcsConversionRepo (options
.basename
+ ".tmpdarcs", patches
)
826 conv_repo
.start_at (-1)
832 combinations
= [(v
, w
) for v
in pending_patches
.values ()
833 for w
in pending_patches
.values ()]
834 candidates
= [common_ancestor (git_commits
[c
[0].number
], git_commits
[c
[1].number
]) for c
in combinations
]
835 candidates
= sorted ([(-a
.darcs_patch
.number
, a
) for a
in candidates
])
836 for (depth
, c
) in candidates
:
839 conv_repo
.go_from_to (q
, p
)
842 parent_number
= q
.number
843 progress ('Found existing common parent as predecessor')
849 ## no branches found where we could attach.
850 ## try previous commits one by one.
852 parent_number
= p
.number
- 2
854 if parent_number
>= 0:
855 parent_patch
= patches
[parent_number
]
858 conv_repo
.go_from_to (parent_patch
, p
)
862 ## simplistic, may not be enough.
863 progress ('conflict, going one back')
866 if parent_number
< 0:
869 if (options
.history_window
870 and parent_number
< p
.number
- options
.history_window
):
875 if parent_number
>= 0 or p
.number
== 0:
876 progress ('Export %d -> %d (total %d)' % (parent_number
,
877 p
.number
, len (patches
)))
878 export_commit (conv_repo
, p
, parent_patch
, gfi
)
882 if options
.checkpoint_frequency
and p
.number
% options
.checkpoint_frequency
== 0:
883 export_checkpoint (gfi
)
885 progress ("Can't import patch %d, need conflict resolution patch?"
890 for f
in glob
.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
893 test_conversion (darcs_repo
, git_repo
)
895 if not options
.debug
: