1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 """CommitHandlers that build and save revisions & their inventories."""
28 from bzrlib
.trace
import (
33 from fastimport
import (
38 from bzrlib
.plugins
.fastimport
.helpers
import (
43 _serializer_handles_escaping
= hasattr(serializer
.Serializer
,
44 'squashes_xml_invalid_characters')
47 def copy_inventory(inv
):
48 entries
= inv
.iter_entries_by_dir()
49 inv
= inventory
.Inventory(None, inv
.revision_id
)
50 for path
, inv_entry
in entries
:
51 inv
.add(inv_entry
.copy())
55 class GenericCommitHandler(processor
.CommitHandler
):
56 """Base class for Bazaar CommitHandlers."""
58 def __init__(self
, command
, cache_mgr
, rev_store
, verbose
=False,
59 prune_empty_dirs
=True):
60 super(GenericCommitHandler
, self
).__init
__(command
)
61 self
.cache_mgr
= cache_mgr
62 self
.rev_store
= rev_store
63 self
.verbose
= verbose
64 self
.branch_ref
= command
.ref
65 self
.prune_empty_dirs
= prune_empty_dirs
66 # This tracks path->file-id for things we're creating this commit.
67 # If the same path is created multiple times, we need to warn the
68 # user and add it just once.
69 # If a path is added then renamed or copied, we need to handle that.
70 self
._new
_file
_ids
= {}
71 # This tracks path->file-id for things we're modifying this commit.
72 # If a path is modified then renamed or copied, we need the make
73 # sure we grab the new content.
74 self
._modified
_file
_ids
= {}
75 # This tracks the paths for things we're deleting this commit.
76 # If the same path is added or the destination of a rename say,
77 # then a fresh file-id is required.
78 self
._paths
_deleted
_this
_commit
= set()
80 def mutter(self
, msg
, *args
):
81 """Output a mutter but add context."""
82 msg
= "%s (%s)" % (msg
, self
.command
.id)
85 def debug(self
, msg
, *args
):
86 """Output a mutter if the appropriate -D option was given."""
87 if "fast-import" in debug
.debug_flags
:
88 msg
= "%s (%s)" % (msg
, self
.command
.id)
91 def note(self
, msg
, *args
):
92 """Output a note but add context."""
93 msg
= "%s (%s)" % (msg
, self
.command
.id)
96 def warning(self
, msg
, *args
):
97 """Output a warning but add context."""
98 msg
= "%s (%s)" % (msg
, self
.command
.id)
101 def pre_process_files(self
):
102 """Prepare for committing."""
103 self
.revision_id
= self
.gen_revision_id()
104 # cache of texts for this commit, indexed by file-id
105 self
.data_for_commit
= {}
106 #if self.rev_store.expects_rich_root():
107 self
.data_for_commit
[inventory
.ROOT_ID
] = []
109 # Track the heads and get the real parent list
110 parents
= self
.cache_mgr
.reftracker
.track_heads(self
.command
)
112 # Convert the parent commit-ids to bzr revision-ids
114 self
.parents
= [self
.cache_mgr
.lookup_committish(p
)
118 self
.debug("%s id: %s, parents: %s", self
.command
.id,
119 self
.revision_id
, str(self
.parents
))
121 # Tell the RevisionStore we're starting a new commit
122 self
.revision
= self
.build_revision()
123 self
.parent_invs
= [self
.get_inventory(p
) for p
in self
.parents
]
124 self
.rev_store
.start_new_revision(self
.revision
, self
.parents
,
127 # cache of per-file parents for this commit, indexed by file-id
128 self
.per_file_parents_for_commit
= {}
129 if self
.rev_store
.expects_rich_root():
130 self
.per_file_parents_for_commit
[inventory
.ROOT_ID
] = ()
132 # Keep the basis inventory. This needs to be treated as read-only.
133 if len(self
.parents
) == 0:
134 self
.basis_inventory
= self
._init
_inventory
()
136 self
.basis_inventory
= self
.get_inventory(self
.parents
[0])
137 if hasattr(self
.basis_inventory
, "root_id"):
138 self
.inventory_root_id
= self
.basis_inventory
.root_id
140 self
.inventory_root_id
= self
.basis_inventory
.root
.file_id
142 # directory-path -> inventory-entry for current inventory
143 self
.directory_entries
= {}
145 def _init_inventory(self
):
146 return self
.rev_store
.init_inventory(self
.revision_id
)
148 def get_inventory(self
, revision_id
):
149 """Get the inventory for a revision id."""
151 inv
= self
.cache_mgr
.inventories
[revision_id
]
154 self
.mutter("get_inventory cache miss for %s", revision_id
)
155 # Not cached so reconstruct from the RevisionStore
156 inv
= self
.rev_store
.get_inventory(revision_id
)
157 self
.cache_mgr
.inventories
[revision_id
] = inv
160 def _get_data(self
, file_id
):
161 """Get the data bytes for a file-id."""
162 return self
.data_for_commit
[file_id
]
164 def _get_lines(self
, file_id
):
165 """Get the lines for a file-id."""
166 return osutils
.split_lines(self
._get
_data
(file_id
))
168 def _get_per_file_parents(self
, file_id
):
169 """Get the lines for a file-id."""
170 return self
.per_file_parents_for_commit
[file_id
]
172 def _get_inventories(self
, revision_ids
):
173 """Get the inventories for revision-ids.
175 This is a callback used by the RepositoryStore to
176 speed up inventory reconstruction.
180 # If an inventory is in the cache, we assume it was
181 # successfully loaded into the revision store
182 for revision_id
in revision_ids
:
184 inv
= self
.cache_mgr
.inventories
[revision_id
]
185 present
.append(revision_id
)
188 self
.note("get_inventories cache miss for %s", revision_id
)
189 # Not cached so reconstruct from the revision store
191 inv
= self
.get_inventory(revision_id
)
192 present
.append(revision_id
)
194 inv
= self
._init
_inventory
()
195 self
.cache_mgr
.inventories
[revision_id
] = inv
196 inventories
.append(inv
)
197 return present
, inventories
199 def bzr_file_id_and_new(self
, path
):
200 """Get a Bazaar file identifier and new flag for a path.
202 :return: file_id, is_new where
203 is_new = True if the file_id is newly created
205 if path
not in self
._paths
_deleted
_this
_commit
:
206 # Try file-ids renamed in this commit
207 id = self
._modified
_file
_ids
.get(path
)
211 # Try the basis inventory
212 id = self
.basis_inventory
.path2id(path
)
216 # Try the other inventories
217 if len(self
.parents
) > 1:
218 for inv
in self
.parent_invs
[1:]:
219 id = self
.basis_inventory
.path2id(path
)
223 # Doesn't exist yet so create it
224 dirname
, basename
= osutils
.split(path
)
225 id = generate_ids
.gen_file_id(basename
)
226 self
.debug("Generated new file id %s for '%s' in revision-id '%s'",
227 id, path
, self
.revision_id
)
228 self
._new
_file
_ids
[path
] = id
231 def bzr_file_id(self
, path
):
232 """Get a Bazaar file identifier for a path."""
233 return self
.bzr_file_id_and_new(path
)[0]
235 def _utf8_decode(self
, field
, value
):
237 return value
.decode('utf-8')
238 except UnicodeDecodeError:
239 # The spec says fields are *typically* utf8 encoded
240 # but that isn't enforced by git-fast-export (at least)
241 self
.warning("%s not in utf8 - replacing unknown "
242 "characters" % (field
,))
243 return value
.decode('utf-8', 'replace')
245 def _decode_path(self
, path
):
247 return path
.decode('utf-8')
248 except UnicodeDecodeError:
249 # The spec says fields are *typically* utf8 encoded
250 # but that isn't enforced by git-fast-export (at least)
251 self
.warning("path %r not in utf8 - replacing unknown "
252 "characters" % (path
,))
253 return path
.decode('utf-8', 'replace')
255 def _format_name_email(self
, section
, name
, email
):
256 """Format name & email as a string."""
257 name
= self
._utf
8_decode
("%s name" % section
, name
)
258 email
= self
._utf
8_decode
("%s email" % section
, email
)
261 return "%s <%s>" % (name
, email
)
265 def gen_revision_id(self
):
266 """Generate a revision id.
268 Subclasses may override this to produce deterministic ids say.
270 committer
= self
.command
.committer
271 # Perhaps 'who' being the person running the import is ok? If so,
272 # it might be a bit quicker and give slightly better compression?
273 who
= self
._format
_name
_email
("committer", committer
[0], committer
[1])
274 timestamp
= committer
[2]
275 return generate_ids
.gen_revision_id(who
, timestamp
)
277 def build_revision(self
):
278 rev_props
= self
._legal
_revision
_properties
(self
.command
.properties
)
279 if 'branch-nick' not in rev_props
:
280 rev_props
['branch-nick'] = self
.cache_mgr
.branch_mapper
.git_to_bzr(
282 self
._save
_author
_info
(rev_props
)
283 committer
= self
.command
.committer
284 who
= self
._format
_name
_email
("committer", committer
[0], committer
[1])
286 message
= self
.command
.message
.decode("utf-8")
288 except UnicodeDecodeError:
290 "commit message not in utf8 - replacing unknown characters")
291 message
= self
.command
.message
.decode('utf-8', 'replace')
292 if not _serializer_handles_escaping
:
293 # We need to assume the bad ol' days
294 message
= helpers
.escape_commit_message(message
)
295 return revision
.Revision(
296 timestamp
=committer
[2],
297 timezone
=committer
[3],
300 revision_id
=self
.revision_id
,
301 properties
=rev_props
,
302 parent_ids
=self
.parents
)
304 def _legal_revision_properties(self
, props
):
305 """Clean-up any revision properties we can't handle."""
306 # For now, we just check for None because that's not allowed in 2.0rc1
308 if props
is not None:
309 for name
, value
in props
.items():
312 "converting None to empty string for property %s"
319 def _save_author_info(self
, rev_props
):
320 author
= self
.command
.author
323 if self
.command
.more_authors
:
324 authors
= [author
] + self
.command
.more_authors
325 author_ids
= [self
._format
_name
_email
("author", a
[0], a
[1]) for a
in authors
]
326 elif author
!= self
.command
.committer
:
327 author_ids
= [self
._format
_name
_email
("author", author
[0], author
[1])]
330 # If we reach here, there are authors worth storing
331 rev_props
['authors'] = "\n".join(author_ids
)
333 def _modify_item(self
, path
, kind
, is_executable
, data
, inv
):
334 """Add to or change an item in the inventory."""
335 # If we've already added this, warn the user that we're ignoring it.
336 # In the future, it might be nice to double check that the new data
337 # is the same as the old but, frankly, exporters should be fixed
338 # not to produce bad data streams in the first place ...
339 existing
= self
._new
_file
_ids
.get(path
)
341 # We don't warn about directories because it's fine for them
342 # to be created already by a previous rename
343 if kind
!= 'directory':
344 self
.warning("%s already added in this commit - ignoring" %
348 # Create the new InventoryEntry
349 basename
, parent_id
= self
._ensure
_directory
(path
, inv
)
350 file_id
= self
.bzr_file_id(path
)
351 ie
= inventory
.make_entry(kind
, basename
, parent_id
, file_id
)
352 ie
.revision
= self
.revision_id
354 ie
.executable
= is_executable
355 # lines = osutils.split_lines(data)
356 ie
.text_sha1
= osutils
.sha_string(data
)
357 ie
.text_size
= len(data
)
358 self
.data_for_commit
[file_id
] = data
359 elif kind
== 'directory':
360 self
.directory_entries
[path
] = ie
361 # There are no lines stored for a directory so
362 # make sure the cache used by get_lines knows that
363 self
.data_for_commit
[file_id
] = ''
364 elif kind
== 'symlink':
365 ie
.symlink_target
= self
._decode
_path
(data
)
366 # There are no lines stored for a symlink so
367 # make sure the cache used by get_lines knows that
368 self
.data_for_commit
[file_id
] = ''
370 self
.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
374 if inv
.has_id(file_id
):
375 old_ie
= inv
[file_id
]
376 if old_ie
.kind
== 'directory':
377 self
.record_delete(path
, old_ie
)
378 self
.record_changed(path
, ie
, parent_id
)
381 self
.record_new(path
, ie
)
383 print "failed to add path '%s' with entry '%s' in command %s" \
384 % (path
, ie
, self
.command
.id)
385 print "parent's children are:\n%r\n" % (ie
.parent_id
.children
,)
388 def _ensure_directory(self
, path
, inv
):
389 """Ensure that the containing directory exists for 'path'"""
390 dirname
, basename
= osutils
.split(path
)
392 # the root node doesn't get updated
393 return basename
, self
.inventory_root_id
395 ie
= self
._get
_directory
_entry
(inv
, dirname
)
397 # We will create this entry, since it doesn't exist
400 return basename
, ie
.file_id
402 # No directory existed, we will just create one, first, make sure
404 dir_basename
, parent_id
= self
._ensure
_directory
(dirname
, inv
)
405 dir_file_id
= self
.bzr_file_id(dirname
)
406 ie
= inventory
.entry_factory
['directory'](dir_file_id
,
407 dir_basename
, parent_id
)
408 ie
.revision
= self
.revision_id
409 self
.directory_entries
[dirname
] = ie
410 # There are no lines stored for a directory so
411 # make sure the cache used by get_lines knows that
412 self
.data_for_commit
[dir_file_id
] = ''
414 # It's possible that a file or symlink with that file-id
415 # already exists. If it does, we need to delete it.
416 if inv
.has_id(dir_file_id
):
417 self
.record_delete(dirname
, ie
)
418 self
.record_new(dirname
, ie
)
419 return basename
, ie
.file_id
421 def _get_directory_entry(self
, inv
, dirname
):
422 """Get the inventory entry for a directory.
424 Raises KeyError if dirname is not a directory in inv.
426 result
= self
.directory_entries
.get(dirname
)
428 if dirname
in self
._paths
_deleted
_this
_commit
:
431 file_id
= inv
.path2id(dirname
)
432 except errors
.NoSuchId
:
433 # In a CHKInventory, this is raised if there's no root yet
437 result
= inv
[file_id
]
438 # dirname must be a directory for us to return it
439 if result
.kind
== 'directory':
440 self
.directory_entries
[dirname
] = result
445 def _delete_item(self
, path
, inv
):
446 newly_added
= self
._new
_file
_ids
.get(path
)
448 # We've only just added this path earlier in this commit.
449 file_id
= newly_added
450 # note: delta entries look like (old, new, file-id, ie)
451 ie
= self
._delta
_entries
_by
_fileid
[file_id
][3]
453 file_id
= inv
.path2id(path
)
455 self
.mutter("ignoring delete of %s as not in inventory", path
)
459 except errors
.NoSuchId
:
460 self
.mutter("ignoring delete of %s as not in inventory", path
)
462 self
.record_delete(path
, ie
)
464 def _copy_item(self
, src_path
, dest_path
, inv
):
465 newly_changed
= self
._new
_file
_ids
.get(src_path
) or \
466 self
._modified
_file
_ids
.get(src_path
)
468 # We've only just added/changed this path earlier in this commit.
469 file_id
= newly_changed
470 # note: delta entries look like (old, new, file-id, ie)
471 ie
= self
._delta
_entries
_by
_fileid
[file_id
][3]
473 file_id
= inv
.path2id(src_path
)
475 self
.warning("ignoring copy of %s to %s - source does not exist",
482 content
= self
.data_for_commit
[file_id
]
484 content
= self
.rev_store
.get_file_text(self
.parents
[0], file_id
)
485 self
._modify
_item
(dest_path
, kind
, ie
.executable
, content
, inv
)
486 elif kind
== 'symlink':
487 self
._modify
_item
(dest_path
, kind
, False,
488 ie
.symlink_target
.encode("utf-8"), inv
)
490 self
.warning("ignoring copy of %s %s - feature not yet supported",
493 def _rename_item(self
, old_path
, new_path
, inv
):
494 existing
= self
._new
_file
_ids
.get(old_path
) or \
495 self
._modified
_file
_ids
.get(old_path
)
497 # We've only just added/modified this path earlier in this commit.
498 # Change the add/modify of old_path to an add of new_path
499 self
._rename
_pending
_change
(old_path
, new_path
, existing
)
502 file_id
= inv
.path2id(old_path
)
505 "ignoring rename of %s to %s - old path does not exist" %
506 (old_path
, new_path
))
510 new_file_id
= inv
.path2id(new_path
)
511 if new_file_id
is not None:
512 self
.record_delete(new_path
, inv
[new_file_id
])
513 self
.record_rename(old_path
, new_path
, file_id
, ie
)
515 # The revision-id for this entry will be/has been updated and
516 # that means the loader then needs to know what the "new" text is.
517 # We therefore must go back to the revision store to get it.
518 lines
= self
.rev_store
.get_file_lines(rev_id
, file_id
)
519 self
.data_for_commit
[file_id
] = ''.join(lines
)
521 def _delete_all_items(self
, inv
):
524 for path
, ie
in inv
.iter_entries_by_dir():
526 self
.record_delete(path
, ie
)
528 def _warn_unless_in_merges(self
, fileid
, path
):
529 if len(self
.parents
) <= 1:
531 for parent
in self
.parents
[1:]:
532 if fileid
in self
.get_inventory(parent
):
534 self
.warning("ignoring delete of %s as not in parent inventories", path
)
537 class InventoryCommitHandler(GenericCommitHandler
):
538 """A CommitHandler that builds and saves Inventory objects."""
540 def pre_process_files(self
):
541 super(InventoryCommitHandler
, self
).pre_process_files()
543 # Seed the inventory from the previous one. Note that
544 # the parent class version of pre_process_files() has
545 # already set the right basis_inventory for this branch
546 # but we need to copy it in order to mutate it safely
547 # without corrupting the cached inventory value.
548 if len(self
.parents
) == 0:
549 self
.inventory
= self
.basis_inventory
551 self
.inventory
= copy_inventory(self
.basis_inventory
)
552 self
.inventory_root
= self
.inventory
.root
554 # directory-path -> inventory-entry for current inventory
555 self
.directory_entries
= dict(self
.inventory
.directories())
557 # Initialise the inventory revision info as required
558 if self
.rev_store
.expects_rich_root():
559 self
.inventory
.revision_id
= self
.revision_id
561 # In this revision store, root entries have no knit or weave.
562 # When serializing out to disk and back in, root.revision is
563 # always the new revision_id.
564 self
.inventory
.root
.revision
= self
.revision_id
566 def post_process_files(self
):
567 """Save the revision."""
568 self
.cache_mgr
.inventories
[self
.revision_id
] = self
.inventory
569 self
.rev_store
.load(self
.revision
, self
.inventory
, None,
570 lambda file_id
: self
._get
_data
(file_id
),
571 lambda file_id
: self
._get
_per
_file
_parents
(file_id
),
572 lambda revision_ids
: self
._get
_inventories
(revision_ids
))
574 def record_new(self
, path
, ie
):
576 # If this is a merge, the file was most likely added already.
577 # The per-file parent(s) must therefore be calculated and
578 # we can't assume there are none.
579 per_file_parents
, ie
.revision
= \
580 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
581 self
.per_file_parents_for_commit
[ie
.file_id
] = per_file_parents
582 self
.inventory
.add(ie
)
583 except errors
.DuplicateFileId
:
584 # Directory already exists as a file or symlink
585 del self
.inventory
[ie
.file_id
]
587 self
.inventory
.add(ie
)
589 def record_changed(self
, path
, ie
, parent_id
):
590 # HACK: no API for this (del+add does more than it needs to)
591 per_file_parents
, ie
.revision
= \
592 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
593 self
.per_file_parents_for_commit
[ie
.file_id
] = per_file_parents
594 self
.inventory
._byid
[ie
.file_id
] = ie
595 parent_ie
= self
.inventory
._byid
[parent_id
]
596 parent_ie
.children
[ie
.name
] = ie
598 def record_delete(self
, path
, ie
):
599 self
.inventory
.remove_recursive_id(ie
.file_id
)
601 def record_rename(self
, old_path
, new_path
, file_id
, ie
):
602 # For a rename, the revision-id is always the new one so
603 # no need to change/set it here
604 ie
.revision
= self
.revision_id
605 per_file_parents
, _
= \
606 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
607 self
.per_file_parents_for_commit
[file_id
] = per_file_parents
608 new_basename
, new_parent_id
= self
._ensure
_directory
(new_path
,
610 self
.inventory
.rename(file_id
, new_parent_id
, new_basename
)
612 def modify_handler(self
, filecmd
):
613 if filecmd
.dataref
is not None:
614 data
= self
.cache_mgr
.fetch_blob(filecmd
.dataref
)
617 self
.debug("modifying %s", filecmd
.path
)
618 (kind
, is_executable
) = mode_to_kind(filecmd
.mode
)
619 self
._modify
_item
(self
._decode
_path
(filecmd
.path
), kind
,
620 is_executable
, data
, self
.inventory
)
622 def delete_handler(self
, filecmd
):
623 self
.debug("deleting %s", filecmd
.path
)
624 self
._delete
_item
(self
._decode
_path
(filecmd
.path
), self
.inventory
)
626 def copy_handler(self
, filecmd
):
627 src_path
= self
._decode
_path
(filecmd
.src_path
)
628 dest_path
= self
._decode
_path
(filecmd
.dest_path
)
629 self
.debug("copying %s to %s", src_path
, dest_path
)
630 self
._copy
_item
(src_path
, dest_path
, self
.inventory
)
632 def rename_handler(self
, filecmd
):
633 old_path
= self
._decode
_path
(filecmd
.old_path
)
634 new_path
= self
._decode
_path
(filecmd
.new_path
)
635 self
.debug("renaming %s to %s", old_path
, new_path
)
636 self
._rename
_item
(old_path
, new_path
, self
.inventory
)
638 def deleteall_handler(self
, filecmd
):
639 self
.debug("deleting all files (and also all directories)")
640 self
._delete
_all
_items
(self
.inventory
)
643 class InventoryDeltaCommitHandler(GenericCommitHandler
):
644 """A CommitHandler that builds Inventories by applying a delta."""
646 def pre_process_files(self
):
647 super(InventoryDeltaCommitHandler
, self
).pre_process_files()
648 self
._dirs
_that
_might
_become
_empty
= set()
650 # A given file-id can only appear once so we accumulate
651 # the entries in a dict then build the actual delta at the end
652 self
._delta
_entries
_by
_fileid
= {}
653 if len(self
.parents
) == 0 or not self
.rev_store
.expects_rich_root():
658 # Need to explicitly add the root entry for the first revision
659 # and for non rich-root inventories
660 root_id
= inventory
.ROOT_ID
661 root_ie
= inventory
.InventoryDirectory(root_id
, u
'', None)
662 root_ie
.revision
= self
.revision_id
663 self
._add
_entry
((old_path
, '', root_id
, root_ie
))
665 def post_process_files(self
):
666 """Save the revision."""
667 delta
= self
._get
_final
_delta
()
668 inv
= self
.rev_store
.load_using_delta(self
.revision
,
669 self
.basis_inventory
, delta
, None,
671 self
._get
_per
_file
_parents
,
672 self
._get
_inventories
)
673 self
.cache_mgr
.inventories
[self
.revision_id
] = inv
674 #print "committed %s" % self.revision_id
676 def _get_final_delta(self
):
677 """Generate the final delta.
679 Smart post-processing of changes, e.g. pruning of directories
680 that would become empty, goes here.
682 delta
= list(self
._delta
_entries
_by
_fileid
.values())
683 if self
.prune_empty_dirs
and self
._dirs
_that
_might
_become
_empty
:
684 candidates
= self
._dirs
_that
_might
_become
_empty
687 parent_dirs_that_might_become_empty
= set()
688 for path
, file_id
in self
._empty
_after
_delta
(delta
, candidates
):
689 newly_added
= self
._new
_file
_ids
.get(path
)
691 never_born
.add(newly_added
)
693 delta
.append((path
, None, file_id
, None))
694 parent_dir
= osutils
.dirname(path
)
696 parent_dirs_that_might_become_empty
.add(parent_dir
)
697 candidates
= parent_dirs_that_might_become_empty
698 # Clean up entries that got deleted before they were ever added
700 delta
= [de
for de
in delta
if de
[2] not in never_born
]
703 def _empty_after_delta(self
, delta
, candidates
):
704 #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
705 #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
706 new_inv
= self
._get
_proposed
_inventory
(delta
)
708 for dir in candidates
:
709 file_id
= new_inv
.path2id(dir)
712 ie
= new_inv
[file_id
]
713 if ie
.kind
!= 'directory':
715 if len(ie
.children
) == 0:
716 result
.append((dir, file_id
))
718 self
.note("pruning empty directory %s" % (dir,))
721 def _get_proposed_inventory(self
, delta
):
722 if len(self
.parents
):
723 # new_inv = self.basis_inventory._get_mutable_inventory()
724 # Note that this will create unreferenced chk pages if we end up
725 # deleting entries, because this 'test' inventory won't end up
726 # used. However, it is cheaper than having to create a full copy of
727 # the inventory for every commit.
728 new_inv
= self
.basis_inventory
.create_by_apply_delta(delta
,
729 'not-a-valid-revision-id:')
731 new_inv
= inventory
.Inventory(revision_id
=self
.revision_id
)
732 # This is set in the delta so remove it to prevent a duplicate
733 del new_inv
[inventory
.ROOT_ID
]
735 new_inv
.apply_delta(delta
)
736 except errors
.InconsistentDelta
:
737 self
.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de
) for de
in delta
]))
741 def _add_entry(self
, entry
):
742 # We need to combine the data if multiple entries have the same file-id.
743 # For example, a rename followed by a modification looks like:
745 # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
747 # Likewise, a modification followed by a rename looks like:
749 # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
751 # Here's a rename followed by a delete and a modification followed by
754 # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
755 # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
757 # In summary, we use the original old-path, new new-path and new ie
758 # when combining entries.
763 existing
= self
._delta
_entries
_by
_fileid
.get(file_id
, None)
764 if existing
is not None:
765 old_path
= existing
[0]
766 entry
= (old_path
, new_path
, file_id
, ie
)
767 if new_path
is None and old_path
is None:
768 # This is a delete cancelling a previous add
769 del self
._delta
_entries
_by
_fileid
[file_id
]
770 parent_dir
= osutils
.dirname(existing
[1])
771 self
.mutter("cancelling add of %s with parent %s" % (existing
[1], parent_dir
))
773 self
._dirs
_that
_might
_become
_empty
.add(parent_dir
)
776 self
._delta
_entries
_by
_fileid
[file_id
] = entry
778 # Collect parent directories that might become empty
781 parent_dir
= osutils
.dirname(old_path
)
782 # note: no need to check the root
784 self
._dirs
_that
_might
_become
_empty
.add(parent_dir
)
785 elif old_path
is not None and old_path
!= new_path
:
787 old_parent_dir
= osutils
.dirname(old_path
)
788 new_parent_dir
= osutils
.dirname(new_path
)
789 if old_parent_dir
and old_parent_dir
!= new_parent_dir
:
790 self
._dirs
_that
_might
_become
_empty
.add(old_parent_dir
)
792 # Calculate the per-file parents, if not already done
793 if file_id
in self
.per_file_parents_for_commit
:
797 # If this is a merge, the file was most likely added already.
798 # The per-file parent(s) must therefore be calculated and
799 # we can't assume there are none.
800 per_file_parents
, ie
.revision
= \
801 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
802 self
.per_file_parents_for_commit
[file_id
] = per_file_parents
803 elif new_path
is None:
806 elif old_path
!= new_path
:
808 per_file_parents
, _
= \
809 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
810 self
.per_file_parents_for_commit
[file_id
] = per_file_parents
813 per_file_parents
, ie
.revision
= \
814 self
.rev_store
.get_parents_and_revision_for_entry(ie
)
815 self
.per_file_parents_for_commit
[file_id
] = per_file_parents
817 def record_new(self
, path
, ie
):
818 self
._add
_entry
((None, path
, ie
.file_id
, ie
))
820 def record_changed(self
, path
, ie
, parent_id
=None):
821 self
._add
_entry
((path
, path
, ie
.file_id
, ie
))
822 self
._modified
_file
_ids
[path
] = ie
.file_id
824 def record_delete(self
, path
, ie
):
825 self
._add
_entry
((path
, None, ie
.file_id
, None))
826 self
._paths
_deleted
_this
_commit
.add(path
)
827 if ie
.kind
== 'directory':
829 del self
.directory_entries
[path
]
832 for child_relpath
, entry
in \
833 self
.basis_inventory
.iter_entries_by_dir(from_dir
=ie
):
834 child_path
= osutils
.pathjoin(path
, child_relpath
)
835 self
._add
_entry
((child_path
, None, entry
.file_id
, None))
836 self
._paths
_deleted
_this
_commit
.add(child_path
)
837 if entry
.kind
== 'directory':
839 del self
.directory_entries
[child_path
]
843 def record_rename(self
, old_path
, new_path
, file_id
, old_ie
):
844 new_ie
= old_ie
.copy()
845 new_basename
, new_parent_id
= self
._ensure
_directory
(new_path
,
846 self
.basis_inventory
)
847 new_ie
.name
= new_basename
848 new_ie
.parent_id
= new_parent_id
849 new_ie
.revision
= self
.revision_id
850 self
._add
_entry
((old_path
, new_path
, file_id
, new_ie
))
851 self
._modified
_file
_ids
[new_path
] = file_id
852 self
._paths
_deleted
_this
_commit
.discard(new_path
)
853 if new_ie
.kind
== 'directory':
854 self
.directory_entries
[new_path
] = new_ie
856 def _rename_pending_change(self
, old_path
, new_path
, file_id
):
857 """Instead of adding/modifying old-path, add new-path instead."""
858 # note: delta entries look like (old, new, file-id, ie)
859 old_ie
= self
._delta
_entries
_by
_fileid
[file_id
][3]
861 # Delete the old path. Note that this might trigger implicit
862 # deletion of newly created parents that could now become empty.
863 self
.record_delete(old_path
, old_ie
)
865 # Update the dictionaries used for tracking new file-ids
866 if old_path
in self
._new
_file
_ids
:
867 del self
._new
_file
_ids
[old_path
]
869 del self
._modified
_file
_ids
[old_path
]
870 self
._new
_file
_ids
[new_path
] = file_id
872 # Create the new InventoryEntry
874 basename
, parent_id
= self
._ensure
_directory
(new_path
,
875 self
.basis_inventory
)
876 ie
= inventory
.make_entry(kind
, basename
, parent_id
, file_id
)
877 ie
.revision
= self
.revision_id
879 ie
.executable
= old_ie
.executable
880 ie
.text_sha1
= old_ie
.text_sha1
881 ie
.text_size
= old_ie
.text_size
882 elif kind
== 'symlink':
883 ie
.symlink_target
= old_ie
.symlink_target
886 self
.record_new(new_path
, ie
)
888 def modify_handler(self
, filecmd
):
889 (kind
, executable
) = mode_to_kind(filecmd
.mode
)
890 if filecmd
.dataref
is not None:
891 if kind
== "directory":
893 elif kind
== "tree-reference":
894 data
= filecmd
.dataref
896 data
= self
.cache_mgr
.fetch_blob(filecmd
.dataref
)
899 self
.debug("modifying %s", filecmd
.path
)
900 decoded_path
= self
._decode
_path
(filecmd
.path
)
901 self
._modify
_item
(decoded_path
, kind
,
902 executable
, data
, self
.basis_inventory
)
904 def delete_handler(self
, filecmd
):
905 self
.debug("deleting %s", filecmd
.path
)
907 self
._decode
_path
(filecmd
.path
), self
.basis_inventory
)
909 def copy_handler(self
, filecmd
):
910 src_path
= self
._decode
_path
(filecmd
.src_path
)
911 dest_path
= self
._decode
_path
(filecmd
.dest_path
)
912 self
.debug("copying %s to %s", src_path
, dest_path
)
913 self
._copy
_item
(src_path
, dest_path
, self
.basis_inventory
)
915 def rename_handler(self
, filecmd
):
916 old_path
= self
._decode
_path
(filecmd
.old_path
)
917 new_path
= self
._decode
_path
(filecmd
.new_path
)
918 self
.debug("renaming %s to %s", old_path
, new_path
)
919 self
._rename
_item
(old_path
, new_path
, self
.basis_inventory
)
921 def deleteall_handler(self
, filecmd
):
922 self
.debug("deleting all files (and also all directories)")
923 self
._delete
_all
_items
(self
.basis_inventory
)