cache_manager.py

   1 # Copyright (C) 2009 Canonical Ltd
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation; either version 2 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  16
  17 """A manager of caches."""
  18
  19
  20 from bzrlib import lru_cache, trace
  21 from bzrlib.plugins.fastimport import helpers
  22
  23 class CacheManager(object):
  24
  25     def __init__(self, info=None, verbose=False, inventory_cache_size=10):
  26         """Create a manager of caches.
  27
  28         :param info: a ConfigObj holding the output from
  29             the --info processor, or None if no hints are available
  30         """
  31         self.verbose = verbose
  32
  33         # dataref -> data. datref is either :mark or the sha-1.
  34         # Sticky blobs aren't removed after being referenced.
  35         self._blobs = {}
  36         self._sticky_blobs = {}
  37
  38         # revision-id -> Inventory cache
  39         # these are large and we probably don't need too many as
  40         # most parents are recent in history
  41         self.inventories = lru_cache.LRUCache(inventory_cache_size)
  42
  43         # import commmit-ids -> revision-id lookup table
  44         # we need to keep all of these but they are small
  45         self.revision_ids = {}
  46
  47         # (path, branch_ref) -> file-ids - as generated.
  48         # (Use store_file_id/fetch_fileid methods rather than direct access.)
  49
  50         # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
  51         self.last_ref = None
  52         self.last_ids = {}
  53         self.heads = {}
  54
  55         # Work out the blobs to make sticky - None means all
  56         self._blob_ref_counts = {}
  57         if info is not None:
  58             try:
  59                 blobs_by_counts = info['Blob reference counts']
  60                 # The parser hands values back as lists, already parsed
  61                 for count, blob_list in blobs_by_counts.items():
  62                     n = int(count)
  63                     for b in blob_list:
  64                         self._blob_ref_counts[b] = n
  65             except KeyError:
  66                 # info not in file - possible when no blobs used
  67                 pass
  68
  69     def dump_stats(self, note=trace.note):
  70         """Dump some statistics about what we cached."""
  71         # TODO: add in inventory stastistics
  72         note("Cache statistics:")
  73         self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
  74         self._show_stats_for(self.revision_ids, "revision-ids", note=note)
  75         # These aren't interesting so omit from the output, at least for now
  76         #self._show_stats_for(self._blobs, "other blobs", note=note)
  77         #self._show_stats_for(self.last_ids, "last-ids", note=note)
  78         #self._show_stats_for(self.heads, "heads", note=note)
  79
  80     def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
  81         """Dump statistics about a given dictionary.
  82
  83         By the key and value need to support len().
  84         """
  85         count = len(dict)
  86         if tuple_key:
  87             size = sum(map(len, (''.join(k) for k in dict.keys())))
  88         else:
  89             size = sum(map(len, dict.keys()))
  90         size += sum(map(len, dict.values()))
  91         size = size * 1.0 / 1024
  92         unit = 'K'
  93         if size > 1024:
  94             size = size / 1024
  95             unit = 'M'
  96             if size > 1024:
  97                 size = size / 1024
  98                 unit = 'G'
  99         note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
 100             helpers.single_plural(count, "item", "items")))
 101
 102     def clear_all(self):
 103         """Free up any memory used by the caches."""
 104         self._blobs.clear()
 105         self._sticky_blobs.clear()
 106         self.revision_ids.clear()
 107         self.last_ids.clear()
 108         self.heads.clear()
 109         self.inventories.clear()
 110
 111     def store_blob(self, id, data):
 112         """Store a blob of data."""
 113         # Note: If we're not reference counting, everything has to be sticky
 114         if not self._blob_ref_counts or id in self._blob_ref_counts:
 115             self._sticky_blobs[id] = data
 116         elif data == '':
 117             # Empty data is always sticky
 118             self._sticky_blobs[id] = data
 119         else:
 120             self._blobs[id] = data
 121
 122     def fetch_blob(self, id):
 123         """Fetch a blob of data."""
 124         try:
 125             b = self._sticky_blobs[id]
 126             if self._blob_ref_counts and b != '':
 127                 self._blob_ref_counts[id] -= 1
 128                 if self._blob_ref_counts[id] == 0:
 129                     del self._sticky_blobs[id]
 130             return b
 131         except KeyError:
 132             return self._blobs.pop(id)
 133
 134     def track_heads(self, cmd):
 135         """Track the repository heads given a CommitCommand.
 136
 137         :param cmd: the CommitCommand
 138         :return: the list of parents in terms of commit-ids
 139         """
 140         # Get the true set of parents
 141         if cmd.from_ is not None:
 142             parents = [cmd.from_]
 143         else:
 144             last_id = self.last_ids.get(cmd.ref)
 145             if last_id is not None:
 146                 parents = [last_id]
 147             else:
 148                 parents = []
 149         parents.extend(cmd.merges)
 150
 151         # Track the heads
 152         self.track_heads_for_ref(cmd.ref, cmd.id, parents)
 153         return parents
 154
 155     def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
 156         if parents is not None:
 157             for parent in parents:
 158                 if parent in self.heads:
 159                     del self.heads[parent]
 160         self.heads.setdefault(cmd_id, set()).add(cmd_ref)
 161         self.last_ids[cmd_ref] = cmd_id
 162         self.last_ref = cmd_ref