1 # -*- coding: utf-8 -*-
3 # Copyright (C) 2008 Canonical Ltd
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # Based on bzr-fast-export
19 # Copyright (c) 2008 Adeodato Simó
21 # Permission is hereby granted, free of charge, to any person obtaining
22 # a copy of this software and associated documentation files (the
23 # "Software"), to deal in the Software without restriction, including
24 # without limitation the rights to use, copy, modify, merge, publish,
25 # distribute, sublicense, and/or sell copies of the Software, and to
26 # permit persons to whom the Software is furnished to do so, subject to
27 # the following conditions:
29 # The above copyright notice and this permission notice shall be included
30 # in all copies or substantial portions of the Software.
32 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 # vim: fileencoding=utf-8
42 """Core engine for the fast-export command."""
44 # TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
45 # is not updated (because the parent of commit is already merged, so we don't
46 # set new_git_branch to the previously used name)
48 from email
.Utils
import parseaddr
52 import bzrlib
.revision
61 from bzrlib
.plugins
.fastimport
import (
66 from fastimport
import commands
67 from bzrlib
.plugins
.fastimport
.helpers
import (
73 def _get_output_stream(destination
):
74 if destination
is None or destination
== '-':
75 return binary_stream(sys
.stdout
)
76 elif destination
.endswith('gz'):
78 return gzip
.open(destination
, 'wb')
80 return open(destination
, 'wb')
83 def check_ref_format(refname
):
84 """Check if a refname is correctly formatted.
86 Implements all the same rules as git-check-ref-format[1].
88 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
90 :param refname: The refname to check
91 :return: True if refname is valid, False otherwise
93 # These could be combined into one big expression, but are listed separately
95 if '/.' in refname
or refname
.startswith('.'):
97 if '/' not in refname
:
102 if ord(c
) < 040 or c
in '\177 ~^:?*[':
104 if refname
[-1] in '/.':
106 if refname
.endswith('.lock'):
115 def sanitize_ref_name_for_git(refname
):
116 """Rewrite refname so that it will be accepted by git-fast-import.
117 For the detailed rules see check_ref_format.
119 By rewriting the refname we are breaking uniqueness guarantees provided by bzr
120 so we have to manually
121 verify that resulting ref names are unique.
123 :param refname: refname to rewrite
126 new_refname
= re
.sub(
127 # '/.' in refname or startswith '.'
132 r
"|[" + "".join([chr(x
) for x
in range(040)]) + r
"]"
147 class BzrFastExporter(object):
149 def __init__(self
, source
, outf
, ref
=None, checkpoint
=-1,
150 import_marks_file
=None, export_marks_file
=None, revision
=None,
151 verbose
=False, plain_format
=False, rewrite_tags
=False,
152 no_tags
=False, baseline
=False):
153 """Export branch data in fast import format.
155 :param plain_format: if True, 'classic' fast-import format is
156 used without any extended features; if False, the generated
157 data is richer and includes information like multiple
158 authors, revision properties, etc.
159 :param rewrite_tags: if True and if plain_format is set, tag names
160 will be rewritten to be git-compatible.
161 Otherwise tags which aren't valid for git will be skipped if
163 :param no_tags: if True tags won't be exported at all
168 self
.checkpoint
= checkpoint
169 self
.import_marks_file
= import_marks_file
170 self
.export_marks_file
= export_marks_file
171 self
.revision
= revision
172 self
.excluded_revisions
= set()
173 self
.plain_format
= plain_format
174 self
.rewrite_tags
= rewrite_tags
175 self
.no_tags
= no_tags
176 self
.baseline
= baseline
177 self
._multi
_author
_api
_available
= hasattr(bzrlib
.revision
.Revision
,
178 'get_apparent_authors')
179 self
.properties_to_exclude
= ['authors', 'author']
181 # Progress reporting stuff
182 self
.verbose
= verbose
184 self
.progress_every
= 100
186 self
.progress_every
= 1000
187 self
._start
_time
= time
.time()
188 self
._commit
_total
= 0
190 # Load the marks and initialise things accordingly
191 self
.revid_to_mark
= {}
192 self
.branch_names
= {}
193 if self
.import_marks_file
:
194 marks_info
= marks_file
.import_marks(self
.import_marks_file
)
195 if marks_info
is not None:
196 self
.revid_to_mark
= dict((r
, m
) for m
, r
in
198 # These are no longer included in the marks file
199 #self.branch_names = marks_info[1]
201 def interesting_history(self
):
203 rev1
, rev2
= builtins
._get
_revision
_range
(self
.revision
,
204 self
.branch
, "fast-export")
205 start_rev_id
= rev1
.rev_id
206 end_rev_id
= rev2
.rev_id
210 self
.note("Calculating the revisions to include ...")
211 view_revisions
= [rev_id
for rev_id
, _
, _
, _
in
212 self
.branch
.iter_merge_sorted_revisions(end_rev_id
, start_rev_id
)]
213 view_revisions
.reverse()
214 # If a starting point was given, we need to later check that we don't
215 # start emitting revisions from before that point. Collect the
216 # revisions to exclude now ...
217 if start_rev_id
is not None:
218 self
.note("Calculating the revisions to exclude ...")
219 self
.excluded_revisions
= set([rev_id
for rev_id
, _
, _
, _
in
220 self
.branch
.iter_merge_sorted_revisions(start_rev_id
)])
222 # needed so the first relative commit knows its parent
223 self
.excluded_revisions
.remove(start_rev_id
)
224 view_revisions
.insert(0, start_rev_id
)
225 return list(view_revisions
)
229 self
.branch
.repository
.lock_read()
231 interesting
= self
.interesting_history()
232 self
._commit
_total
= len(interesting
)
233 self
.note("Starting export of %d revisions ..." %
235 if not self
.plain_format
:
238 self
.emit_baseline(interesting
.pop(0), self
.ref
)
239 for revid
in interesting
:
240 self
.emit_commit(revid
, self
.ref
)
241 if self
.branch
.supports_tags() and not self
.no_tags
:
244 self
.branch
.repository
.unlock()
246 # Save the marks if requested
250 def note(self
, msg
, *args
):
251 """Output a note but timestamp it."""
252 msg
= "%s %s" % (self
._time
_of
_day
(), msg
)
253 trace
.note(msg
, *args
)
255 def warning(self
, msg
, *args
):
256 """Output a warning but timestamp it."""
257 msg
= "%s WARNING: %s" % (self
._time
_of
_day
(), msg
)
258 trace
.warning(msg
, *args
)
260 def _time_of_day(self
):
261 """Time of day as a string."""
262 # Note: this is a separate method so tests can patch in a fixed value
263 return time
.strftime("%H:%M:%S")
265 def report_progress(self
, commit_count
, details
=''):
266 if commit_count
and commit_count
% self
.progress_every
== 0:
267 if self
._commit
_total
:
268 counts
= "%d/%d" % (commit_count
, self
._commit
_total
)
270 counts
= "%d" % (commit_count
,)
271 minutes
= (time
.time() - self
._start
_time
) / 60
272 rate
= commit_count
* 1.0 / minutes
274 rate_str
= "at %.0f/minute " % rate
276 rate_str
= "at %.1f/minute " % rate
277 self
.note("%s commits exported %s%s" % (counts
, rate_str
, details
))
279 def dump_stats(self
):
280 time_required
= progress
.str_tdelta(time
.time() - self
._start
_time
)
281 rc
= len(self
.revid_to_mark
)
282 self
.note("Exported %d %s in %s",
283 rc
, single_plural(rc
, "revision", "revisions"),
286 def print_cmd(self
, cmd
):
287 self
.outf
.write("%r\n" % cmd
)
289 def _save_marks(self
):
290 if self
.export_marks_file
:
291 revision_ids
= dict((m
, r
) for r
, m
in self
.revid_to_mark
.items())
292 marks_file
.export_marks(self
.export_marks_file
, revision_ids
)
294 def is_empty_dir(self
, tree
, path
):
295 path_id
= tree
.path2id(path
)
297 self
.warning("Skipping empty_dir detection - no file_id for %s" %
301 # Continue if path is not a directory
302 if tree
.kind(path_id
) != 'directory':
305 # Use treewalk to find the contents of our directory
306 contents
= list(tree
.walkdirs(prefix
=path
))[0]
307 if len(contents
[1]) == 0:
312 def emit_features(self
):
313 for feature
in sorted(commands
.FEATURE_NAMES
):
314 self
.print_cmd(commands
.FeatureCommand(feature
))
316 def emit_baseline(self
, revid
, ref
):
317 # Emit a full source tree of the first commit's parent
318 revobj
= self
.branch
.repository
.get_revision(revid
)
320 self
.revid_to_mark
[revid
] = mark
321 file_cmds
= self
._get
_filecommands
(bzrlib
.revision
.NULL_REVISION
, revid
)
322 self
.print_cmd(self
._get
_commit
_command
(ref
, mark
, revobj
, file_cmds
))
324 def emit_commit(self
, revid
, ref
):
325 if revid
in self
.revid_to_mark
or revid
in self
.excluded_revisions
:
328 # Get the Revision object
330 revobj
= self
.branch
.repository
.get_revision(revid
)
331 except bazErrors
.NoSuchRevision
:
332 # This is a ghost revision. Mark it as not found and next!
333 self
.revid_to_mark
[revid
] = -1
336 # Get the primary parent
337 # TODO: Consider the excluded revisions when deciding the parents.
338 # Currently, a commit with parents that are excluded ought to be
339 # triggering the ref calculation below (and it is not).
341 ncommits
= len(self
.revid_to_mark
)
342 nparents
= len(revobj
.parent_ids
)
345 # This is a parentless commit but it's not the first one
346 # output. We need to create a new temporary branch for it
347 # otherwise git-fast-import will assume the previous commit
348 # was this one's parent
349 ref
= self
._next
_tmp
_ref
()
350 parent
= bzrlib
.revision
.NULL_REVISION
352 parent
= revobj
.parent_ids
[0]
356 self
.revid_to_mark
[revid
] = mark
357 file_cmds
= self
._get
_filecommands
(parent
, revid
)
358 self
.print_cmd(self
._get
_commit
_command
(ref
, mark
, revobj
, file_cmds
))
360 # Report progress and checkpoint if it's time for that
361 self
.report_progress(ncommits
)
362 if (self
.checkpoint
> 0 and ncommits
363 and ncommits
% self
.checkpoint
== 0):
364 self
.note("Exported %i commits - adding checkpoint to output"
367 self
.print_cmd(commands
.CheckpointCommand())
369 def _get_name_email(self
, user
):
370 if user
.find('<') == -1:
371 # If the email isn't inside <>, we need to use it as the name
372 # in order for things to round-trip correctly.
373 # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
377 name
, email
= parseaddr(user
)
378 return name
.encode("utf-8"), email
.encode("utf-8")
380 def _get_commit_command(self
, git_ref
, mark
, revobj
, file_cmds
):
381 # Get the committer and author info
382 committer
= revobj
.committer
383 name
, email
= self
._get
_name
_email
(committer
)
384 committer_info
= (name
, email
, revobj
.timestamp
, revobj
.timezone
)
385 if self
._multi
_author
_api
_available
:
386 more_authors
= revobj
.get_apparent_authors()
387 author
= more_authors
.pop(0)
390 author
= revobj
.get_apparent_author()
391 if not self
.plain_format
and more_authors
:
392 name
, email
= self
._get
_name
_email
(author
)
393 author_info
= (name
, email
, revobj
.timestamp
, revobj
.timezone
)
394 more_author_info
= []
395 for a
in more_authors
:
396 name
, email
= self
._get
_name
_email
(a
)
397 more_author_info
.append(
398 (name
, email
, revobj
.timestamp
, revobj
.timezone
))
399 elif author
!= committer
:
400 name
, email
= self
._get
_name
_email
(author
)
401 author_info
= (name
, email
, revobj
.timestamp
, revobj
.timezone
)
402 more_author_info
= None
405 more_author_info
= None
407 # Get the parents in terms of marks
408 non_ghost_parents
= []
409 for p
in revobj
.parent_ids
:
410 if p
in self
.excluded_revisions
:
413 parent_mark
= self
.revid_to_mark
[p
]
414 non_ghost_parents
.append(":%s" % parent_mark
)
418 if non_ghost_parents
:
419 from_
= non_ghost_parents
[0]
420 merges
= non_ghost_parents
[1:]
425 # Filter the revision properties. Some metadata (like the
426 # author information) is already exposed in other ways so
427 # don't repeat it here.
428 if self
.plain_format
:
431 properties
= revobj
.properties
432 for prop
in self
.properties_to_exclude
:
438 # Build and return the result
439 return commands
.CommitCommand(git_ref
, mark
, author_info
,
440 committer_info
, revobj
.message
.encode("utf-8"), from_
, merges
, iter(file_cmds
),
441 more_authors
=more_author_info
, properties
=properties
)
443 def _get_revision_trees(self
, parent
, revision_id
):
445 tree_old
= self
.branch
.repository
.revision_tree(parent
)
446 except bazErrors
.UnexpectedInventoryFormat
:
447 self
.warning("Parent is malformed - diffing against previous parent")
448 # We can't find the old parent. Let's diff against his parent
449 pp
= self
.branch
.repository
.get_revision(parent
)
450 tree_old
= self
.branch
.repository
.revision_tree(pp
.parent_ids
[0])
453 tree_new
= self
.branch
.repository
.revision_tree(revision_id
)
454 except bazErrors
.UnexpectedInventoryFormat
:
455 # We can't really do anything anymore
456 self
.warning("Revision %s is malformed - skipping" % revision_id
)
457 return tree_old
, tree_new
459 def _get_filecommands(self
, parent
, revision_id
):
460 """Get the list of FileCommands for the changes between two revisions."""
461 tree_old
, tree_new
= self
._get
_revision
_trees
(parent
, revision_id
)
462 if not(tree_old
and tree_new
):
463 # Something is wrong with this revision - ignore the filecommands
466 changes
= tree_new
.changes_from(tree_old
)
468 # Make "modified" have 3-tuples, as added does
469 my_modified
= [ x
[0:3] for x
in changes
.modified
]
471 # The potential interaction between renames and deletes is messy.
473 file_cmds
, rd_modifies
, renamed
= self
._process
_renames
_and
_deletes
(
474 changes
.renamed
, changes
.removed
, revision_id
, tree_old
)
476 # Map kind changes to a delete followed by an add
477 for path
, id_
, kind1
, kind2
in changes
.kind_changed
:
478 path
= self
._adjust
_path
_for
_renames
(path
, renamed
, revision_id
)
479 # IGC: I don't understand why a delete is needed here.
480 # In fact, it seems harmful? If you uncomment this line,
481 # please file a bug explaining why you needed to.
482 #file_cmds.append(commands.FileDeleteCommand(path))
483 my_modified
.append((path
, id_
, kind2
))
485 # Record modifications
486 for path
, id_
, kind
in changes
.added
+ my_modified
+ rd_modifies
:
488 text
= tree_new
.get_file_text(id_
)
489 file_cmds
.append(commands
.FileModifyCommand(path
.encode("utf-8"),
490 helpers
.kind_to_mode('file', tree_new
.is_executable(id_
)),
492 elif kind
== 'symlink':
493 file_cmds
.append(commands
.FileModifyCommand(path
.encode("utf-8"),
494 helpers
.kind_to_mode('symlink', False),
495 None, tree_new
.get_symlink_target(id_
)))
496 elif kind
== 'directory':
497 if not self
.plain_format
:
498 file_cmds
.append(commands
.FileModifyCommand(path
.encode("utf-8"),
499 helpers
.kind_to_mode('directory', False),
502 self
.warning("cannot export '%s' of kind %s yet - ignoring" %
506 def _process_renames_and_deletes(self
, renames
, deletes
,
507 revision_id
, tree_old
):
512 # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
513 # In a nutshell, there are several nasty cases:
515 # 1) bzr rm a; bzr mv b a; bzr commit
516 # 2) bzr mv x/y z; bzr rm x; commmit
518 # The first must come out with the delete first like this:
523 # The second case must come out with the rename first like this:
528 # So outputting all deletes first or all renames first won't work.
529 # Instead, we need to make multiple passes over the various lists to
530 # get the ordering right.
534 deleted_paths
= set([p
for p
, _
, _
in deletes
])
535 for (oldpath
, newpath
, id_
, kind
,
536 text_modified
, meta_modified
) in renames
:
537 emit
= kind
!= 'directory' or not self
.plain_format
538 if newpath
in deleted_paths
:
540 file_cmds
.append(commands
.FileDeleteCommand(newpath
.encode("utf-8")))
541 deleted_paths
.remove(newpath
)
542 if (self
.is_empty_dir(tree_old
, oldpath
)):
543 self
.note("Skipping empty dir %s in rev %s" % (oldpath
,
546 #oldpath = self._adjust_path_for_renames(oldpath, renamed,
548 renamed
.append([oldpath
, newpath
])
549 old_to_new
[oldpath
] = newpath
552 commands
.FileRenameCommand(oldpath
.encode("utf-8"), newpath
.encode("utf-8")))
553 if text_modified
or meta_modified
:
554 modifies
.append((newpath
, id_
, kind
))
556 # Renaming a directory implies all children must be renamed.
557 # Note: changes_from() doesn't handle this
558 if kind
== 'directory' and tree_old
.kind(id_
) == 'directory':
559 for p
, e
in tree_old
.inventory
.iter_entries_by_dir(from_dir
=id_
):
560 if e
.kind
== 'directory' and self
.plain_format
:
562 old_child_path
= osutils
.pathjoin(oldpath
, p
)
563 new_child_path
= osutils
.pathjoin(newpath
, p
)
564 must_be_renamed
[old_child_path
] = new_child_path
566 # Add children not already renamed
568 renamed_already
= set(old_to_new
.keys())
569 still_to_be_renamed
= set(must_be_renamed
.keys()) - renamed_already
570 for old_child_path
in sorted(still_to_be_renamed
):
571 new_child_path
= must_be_renamed
[old_child_path
]
573 self
.note("implicitly renaming %s => %s" % (old_child_path
,
575 file_cmds
.append(commands
.FileRenameCommand(old_child_path
.encode("utf-8"),
576 new_child_path
.encode("utf-8")))
578 # Record remaining deletes
579 for path
, id_
, kind
in deletes
:
580 if path
not in deleted_paths
:
582 if kind
== 'directory' and self
.plain_format
:
584 #path = self._adjust_path_for_renames(path, renamed, revision_id)
585 file_cmds
.append(commands
.FileDeleteCommand(path
.encode("utf-8")))
586 return file_cmds
, modifies
, renamed
588 def _adjust_path_for_renames(self
, path
, renamed
, revision_id
):
589 # If a previous rename is found, we should adjust the path
590 for old
, new
in renamed
:
592 self
.note("Changing path %s given rename to %s in revision %s"
593 % (path
, new
, revision_id
))
595 elif path
.startswith(old
+ '/'):
597 "Adjusting path %s given rename of %s to %s in revision %s"
598 % (path
, old
, new
, revision_id
))
599 path
= path
.replace(old
+ "/", new
+ "/")
603 for tag
, revid
in self
.branch
.tags
.get_tag_dict().items():
605 mark
= self
.revid_to_mark
[revid
]
607 self
.warning('not creating tag %r pointing to non-existent '
608 'revision %s' % (tag
, revid
))
610 git_ref
= 'refs/tags/%s' % tag
.encode("utf-8")
611 if self
.plain_format
and not check_ref_format(git_ref
):
612 if self
.rewrite_tags
:
613 new_ref
= sanitize_ref_name_for_git(git_ref
)
614 self
.warning('tag %r is exported as %r to be valid in git.',
618 self
.warning('not creating tag %r as its name would not be '
619 'valid in git.', git_ref
)
621 self
.print_cmd(commands
.ResetCommand(git_ref
, ":" + str(mark
)))
623 def _next_tmp_ref(self
):
624 """Return a unique branch name. The name will start with "tmp"."""
626 if prefix
not in self
.branch_names
:
627 self
.branch_names
[prefix
] = 0
629 self
.branch_names
[prefix
] += 1
630 prefix
= '%s.%d' % (prefix
, self
.branch_names
[prefix
])
631 return 'refs/heads/%s' % prefix