fix timezones in darcs-fast-export, take 2
[bzr-fastimport/rorcz.git] / cache_manager.py
blob3ecfddc2e39d17b03310fea29635db03c817c331
1 # Copyright (C) 2009 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 """A manager of caches."""
20 from bzrlib import lru_cache, trace
21 from bzrlib.plugins.fastimport import helpers
23 class CacheManager(object):
25 def __init__(self, info=None, verbose=False, inventory_cache_size=10):
26 """Create a manager of caches.
28 :param info: a ConfigObj holding the output from
29 the --info processor, or None if no hints are available
30 """
31 self.verbose = verbose
33 # dataref -> data. datref is either :mark or the sha-1.
34 # Sticky blobs aren't removed after being referenced.
35 self._blobs = {}
36 self._sticky_blobs = {}
38 # revision-id -> Inventory cache
39 # these are large and we probably don't need too many as
40 # most parents are recent in history
41 self.inventories = lru_cache.LRUCache(inventory_cache_size)
43 # import commmit-ids -> revision-id lookup table
44 # we need to keep all of these but they are small
45 self.revision_ids = {}
47 # (path, branch_ref) -> file-ids - as generated.
48 # (Use store_file_id/fetch_fileid methods rather than direct access.)
50 # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
51 self.last_ref = None
52 self.last_ids = {}
53 self.heads = {}
55 # Work out the blobs to make sticky - None means all
56 self._blob_ref_counts = {}
57 if info is not None:
58 try:
59 blobs_by_counts = info['Blob reference counts']
60 # The parser hands values back as lists, already parsed
61 for count, blob_list in blobs_by_counts.items():
62 n = int(count)
63 for b in blob_list:
64 self._blob_ref_counts[b] = n
65 except KeyError:
66 # info not in file - possible when no blobs used
67 pass
69 def dump_stats(self, note=trace.note):
70 """Dump some statistics about what we cached."""
71 # TODO: add in inventory stastistics
72 note("Cache statistics:")
73 self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
74 self._show_stats_for(self.revision_ids, "revision-ids", note=note)
75 # These aren't interesting so omit from the output, at least for now
76 #self._show_stats_for(self._blobs, "other blobs", note=note)
77 #self._show_stats_for(self.last_ids, "last-ids", note=note)
78 #self._show_stats_for(self.heads, "heads", note=note)
80 def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
81 """Dump statistics about a given dictionary.
83 By the key and value need to support len().
84 """
85 count = len(dict)
86 if tuple_key:
87 size = sum(map(len, (''.join(k) for k in dict.keys())))
88 else:
89 size = sum(map(len, dict.keys()))
90 size += sum(map(len, dict.values()))
91 size = size * 1.0 / 1024
92 unit = 'K'
93 if size > 1024:
94 size = size / 1024
95 unit = 'M'
96 if size > 1024:
97 size = size / 1024
98 unit = 'G'
99 note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
100 helpers.single_plural(count, "item", "items")))
102 def clear_all(self):
103 """Free up any memory used by the caches."""
104 self._blobs.clear()
105 self._sticky_blobs.clear()
106 self.revision_ids.clear()
107 self.last_ids.clear()
108 self.heads.clear()
109 self.inventories.clear()
111 def store_blob(self, id, data):
112 """Store a blob of data."""
113 # Note: If we're not reference counting, everything has to be sticky
114 if not self._blob_ref_counts or id in self._blob_ref_counts:
115 self._sticky_blobs[id] = data
116 elif data == '':
117 # Empty data is always sticky
118 self._sticky_blobs[id] = data
119 else:
120 self._blobs[id] = data
122 def fetch_blob(self, id):
123 """Fetch a blob of data."""
124 try:
125 b = self._sticky_blobs[id]
126 if self._blob_ref_counts and b != '':
127 self._blob_ref_counts[id] -= 1
128 if self._blob_ref_counts[id] == 0:
129 del self._sticky_blobs[id]
130 return b
131 except KeyError:
132 return self._blobs.pop(id)
134 def track_heads(self, cmd):
135 """Track the repository heads given a CommitCommand.
137 :param cmd: the CommitCommand
138 :return: the list of parents in terms of commit-ids
140 # Get the true set of parents
141 if cmd.from_ is not None:
142 parents = [cmd.from_]
143 else:
144 last_id = self.last_ids.get(cmd.ref)
145 if last_id is not None:
146 parents = [last_id]
147 else:
148 parents = []
149 parents.extend(cmd.merges)
151 # Track the heads
152 self.track_heads_for_ref(cmd.ref, cmd.id, parents)
153 return parents
155 def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
156 if parents is not None:
157 for parent in parents:
158 if parent in self.heads:
159 del self.heads[parent]
160 self.heads.setdefault(cmd_id, set()).add(cmd_ref)
161 self.last_ids[cmd_ref] = cmd_id
162 self.last_ref = cmd_ref