The tenth batch
[git/gitster.git] / split-index.c
blob120c8190b187bc8c6c73630f06f5dd4dc994df30
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "gettext.h"
5 #include "hash.h"
6 #include "mem-pool.h"
7 #include "read-cache-ll.h"
8 #include "split-index.h"
9 #include "strbuf.h"
10 #include "ewah/ewok.h"
12 struct split_index *init_split_index(struct index_state *istate)
14 if (!istate->split_index) {
15 if (istate->sparse_index)
16 die(_("cannot use split index with a sparse index"));
18 CALLOC_ARRAY(istate->split_index, 1);
19 istate->split_index->refcount = 1;
21 return istate->split_index;
24 int read_link_extension(struct index_state *istate,
25 const void *data_, unsigned long sz)
27 const unsigned char *data = data_;
28 struct split_index *si;
29 int ret;
31 if (sz < the_hash_algo->rawsz)
32 return error("corrupt link extension (too short)");
33 si = init_split_index(istate);
34 oidread(&si->base_oid, data, the_repository->hash_algo);
35 data += the_hash_algo->rawsz;
36 sz -= the_hash_algo->rawsz;
37 if (!sz)
38 return 0;
39 si->delete_bitmap = ewah_new();
40 ret = ewah_read_mmap(si->delete_bitmap, data, sz);
41 if (ret < 0)
42 return error("corrupt delete bitmap in link extension");
43 data += ret;
44 sz -= ret;
45 si->replace_bitmap = ewah_new();
46 ret = ewah_read_mmap(si->replace_bitmap, data, sz);
47 if (ret < 0)
48 return error("corrupt replace bitmap in link extension");
49 if (ret != sz)
50 return error("garbage at the end of link extension");
51 return 0;
54 int write_link_extension(struct strbuf *sb,
55 struct index_state *istate)
57 struct split_index *si = istate->split_index;
58 strbuf_add(sb, si->base_oid.hash, the_hash_algo->rawsz);
59 if (!si->delete_bitmap && !si->replace_bitmap)
60 return 0;
61 ewah_serialize_strbuf(si->delete_bitmap, sb);
62 ewah_serialize_strbuf(si->replace_bitmap, sb);
63 return 0;
66 static void mark_base_index_entries(struct index_state *base)
68 int i;
70 * To keep track of the shared entries between
71 * istate->base->cache[] and istate->cache[], base entry
72 * position is stored in each base entry. All positions start
73 * from 1 instead of 0, which is reserved to say "this is a new
74 * entry".
76 for (i = 0; i < base->cache_nr; i++)
77 base->cache[i]->index = i + 1;
80 void move_cache_to_base_index(struct index_state *istate)
82 struct split_index *si = istate->split_index;
83 int i;
86 * If there was a previous base index, then transfer ownership of allocated
87 * entries to the parent index.
89 if (si->base &&
90 si->base->ce_mem_pool) {
92 if (!istate->ce_mem_pool) {
93 istate->ce_mem_pool = xmalloc(sizeof(struct mem_pool));
94 mem_pool_init(istate->ce_mem_pool, 0);
97 mem_pool_combine(istate->ce_mem_pool, istate->split_index->base->ce_mem_pool);
100 ALLOC_ARRAY(si->base, 1);
101 index_state_init(si->base, istate->repo);
102 si->base->version = istate->version;
103 /* zero timestamp disables racy test in ce_write_index() */
104 si->base->timestamp = istate->timestamp;
105 ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
106 si->base->cache_nr = istate->cache_nr;
109 * The mem_pool needs to move with the allocated entries.
111 si->base->ce_mem_pool = istate->ce_mem_pool;
112 istate->ce_mem_pool = NULL;
114 COPY_ARRAY(si->base->cache, istate->cache, istate->cache_nr);
115 mark_base_index_entries(si->base);
116 for (i = 0; i < si->base->cache_nr; i++)
117 si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
120 static void mark_entry_for_delete(size_t pos, void *data)
122 struct index_state *istate = data;
123 if (pos >= istate->cache_nr)
124 die("position for delete %d exceeds base index size %d",
125 (int)pos, istate->cache_nr);
126 istate->cache[pos]->ce_flags |= CE_REMOVE;
127 istate->split_index->nr_deletions++;
130 static void replace_entry(size_t pos, void *data)
132 struct index_state *istate = data;
133 struct split_index *si = istate->split_index;
134 struct cache_entry *dst, *src;
136 if (pos >= istate->cache_nr)
137 die("position for replacement %d exceeds base index size %d",
138 (int)pos, istate->cache_nr);
139 if (si->nr_replacements >= si->saved_cache_nr)
140 die("too many replacements (%d vs %d)",
141 si->nr_replacements, si->saved_cache_nr);
142 dst = istate->cache[pos];
143 if (dst->ce_flags & CE_REMOVE)
144 die("entry %d is marked as both replaced and deleted",
145 (int)pos);
146 src = si->saved_cache[si->nr_replacements];
147 if (ce_namelen(src))
148 die("corrupt link extension, entry %d should have "
149 "zero length name", (int)pos);
150 src->index = pos + 1;
151 src->ce_flags |= CE_UPDATE_IN_BASE;
152 src->ce_namelen = dst->ce_namelen;
153 copy_cache_entry(dst, src);
154 discard_cache_entry(src);
155 si->nr_replacements++;
158 void merge_base_index(struct index_state *istate)
160 struct split_index *si = istate->split_index;
161 unsigned int i;
163 mark_base_index_entries(si->base);
165 si->saved_cache = istate->cache;
166 si->saved_cache_nr = istate->cache_nr;
167 istate->cache_nr = si->base->cache_nr;
168 istate->cache = NULL;
169 istate->cache_alloc = 0;
170 ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
171 COPY_ARRAY(istate->cache, si->base->cache, istate->cache_nr);
173 si->nr_deletions = 0;
174 si->nr_replacements = 0;
175 ewah_each_bit(si->replace_bitmap, replace_entry, istate);
176 ewah_each_bit(si->delete_bitmap, mark_entry_for_delete, istate);
177 if (si->nr_deletions)
178 remove_marked_cache_entries(istate, 0);
180 for (i = si->nr_replacements; i < si->saved_cache_nr; i++) {
181 if (!ce_namelen(si->saved_cache[i]))
182 die("corrupt link extension, entry %d should "
183 "have non-zero length name", i);
184 add_index_entry(istate, si->saved_cache[i],
185 ADD_CACHE_OK_TO_ADD |
186 ADD_CACHE_KEEP_CACHE_TREE |
188 * we may have to replay what
189 * merge-recursive.c:update_stages()
190 * does, which has this flag on
192 ADD_CACHE_SKIP_DFCHECK);
193 si->saved_cache[i] = NULL;
196 ewah_free(si->delete_bitmap);
197 ewah_free(si->replace_bitmap);
198 FREE_AND_NULL(si->saved_cache);
199 si->delete_bitmap = NULL;
200 si->replace_bitmap = NULL;
201 si->saved_cache_nr = 0;
205 * Compare most of the fields in two cache entries, i.e. all except the
206 * hashmap_entry and the name.
208 static int compare_ce_content(struct cache_entry *a, struct cache_entry *b)
210 const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID |
211 CE_EXTENDED_FLAGS;
212 unsigned int ce_flags = a->ce_flags;
213 unsigned int base_flags = b->ce_flags;
214 int ret;
216 /* only on-disk flags matter */
217 a->ce_flags &= ondisk_flags;
218 b->ce_flags &= ondisk_flags;
219 ret = memcmp(&a->ce_stat_data, &b->ce_stat_data,
220 offsetof(struct cache_entry, name) -
221 offsetof(struct cache_entry, oid)) ||
222 !oideq(&a->oid, &b->oid);
223 a->ce_flags = ce_flags;
224 b->ce_flags = base_flags;
226 return ret;
229 void prepare_to_write_split_index(struct index_state *istate)
231 struct split_index *si = init_split_index(istate);
232 struct cache_entry **entries = NULL, *ce;
233 int i, nr_entries = 0, nr_alloc = 0;
235 si->delete_bitmap = ewah_new();
236 si->replace_bitmap = ewah_new();
238 if (si->base) {
239 /* Go through istate->cache[] and mark CE_MATCHED to
240 * entry with positive index. We'll go through
241 * base->cache[] later to delete all entries in base
242 * that are not marked with either CE_MATCHED or
243 * CE_UPDATE_IN_BASE. If istate->cache[i] is a
244 * duplicate, deduplicate it.
246 for (i = 0; i < istate->cache_nr; i++) {
247 struct cache_entry *base;
248 ce = istate->cache[i];
249 if (!ce->index) {
251 * During simple update index operations this
252 * is a cache entry that is not present in
253 * the shared index. It will be added to the
254 * split index.
256 * However, it might also represent a file
257 * that already has a cache entry in the
258 * shared index, but a new index has just
259 * been constructed by unpack_trees(), and
260 * this entry now refers to different content
261 * than what was recorded in the original
262 * index, e.g. during 'read-tree -m HEAD^' or
263 * 'checkout HEAD^'. In this case the
264 * original entry in the shared index will be
265 * marked as deleted, and this entry will be
266 * added to the split index.
268 continue;
270 if (ce->index > si->base->cache_nr) {
271 BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d",
272 ce->index, si->base->cache_nr);
274 ce->ce_flags |= CE_MATCHED; /* or "shared" */
275 base = si->base->cache[ce->index - 1];
276 if (ce == base) {
277 /* The entry is present in the shared index. */
278 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
280 * Already marked for inclusion in
281 * the split index, either because
282 * the corresponding file was
283 * modified and the cached stat data
284 * was refreshed, or because there
285 * is already a replacement entry in
286 * the split index.
287 * Nothing more to do here.
289 } else if (!ce_uptodate(ce) &&
290 is_racy_timestamp(istate, ce)) {
292 * A racily clean cache entry stored
293 * only in the shared index: it must
294 * be added to the split index, so
295 * the subsequent do_write_index()
296 * can smudge its stat data.
298 ce->ce_flags |= CE_UPDATE_IN_BASE;
299 } else {
301 * The entry is only present in the
302 * shared index and it was not
303 * refreshed.
304 * Just leave it there.
307 continue;
309 if (ce->ce_namelen != base->ce_namelen ||
310 strcmp(ce->name, base->name)) {
311 ce->index = 0;
312 continue;
315 * This is the copy of a cache entry that is present
316 * in the shared index, created by unpack_trees()
317 * while it constructed a new index.
319 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
321 * Already marked for inclusion in the split
322 * index, either because the corresponding
323 * file was modified and the cached stat data
324 * was refreshed, or because the original
325 * entry already had a replacement entry in
326 * the split index.
327 * Nothing to do.
329 } else if (!ce_uptodate(ce) &&
330 is_racy_timestamp(istate, ce)) {
332 * A copy of a racily clean cache entry from
333 * the shared index. It must be added to
334 * the split index, so the subsequent
335 * do_write_index() can smudge its stat data.
337 ce->ce_flags |= CE_UPDATE_IN_BASE;
338 } else {
340 * Thoroughly compare the cached data to see
341 * whether it should be marked for inclusion
342 * in the split index.
344 * This comparison might be unnecessary, as
345 * code paths modifying the cached data do
346 * set CE_UPDATE_IN_BASE as well.
348 if (compare_ce_content(ce, base))
349 ce->ce_flags |= CE_UPDATE_IN_BASE;
351 discard_cache_entry(base);
352 si->base->cache[ce->index - 1] = ce;
354 for (i = 0; i < si->base->cache_nr; i++) {
355 ce = si->base->cache[i];
356 if ((ce->ce_flags & CE_REMOVE) ||
357 !(ce->ce_flags & CE_MATCHED))
358 ewah_set(si->delete_bitmap, i);
359 else if (ce->ce_flags & CE_UPDATE_IN_BASE) {
360 ewah_set(si->replace_bitmap, i);
361 ce->ce_flags |= CE_STRIP_NAME;
362 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
363 entries[nr_entries++] = ce;
365 if (is_null_oid(&ce->oid))
366 istate->drop_cache_tree = 1;
370 for (i = 0; i < istate->cache_nr; i++) {
371 ce = istate->cache[i];
372 if ((!si->base || !ce->index) && !(ce->ce_flags & CE_REMOVE)) {
373 assert(!(ce->ce_flags & CE_STRIP_NAME));
374 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
375 entries[nr_entries++] = ce;
377 ce->ce_flags &= ~CE_MATCHED;
381 * take cache[] out temporarily, put entries[] in its place
382 * for writing
384 si->saved_cache = istate->cache;
385 si->saved_cache_nr = istate->cache_nr;
386 istate->cache = entries;
387 istate->cache_nr = nr_entries;
390 void finish_writing_split_index(struct index_state *istate)
392 struct split_index *si = init_split_index(istate);
394 ewah_free(si->delete_bitmap);
395 ewah_free(si->replace_bitmap);
396 si->delete_bitmap = NULL;
397 si->replace_bitmap = NULL;
398 free(istate->cache);
399 istate->cache = si->saved_cache;
400 istate->cache_nr = si->saved_cache_nr;
403 void discard_split_index(struct index_state *istate)
405 struct split_index *si = istate->split_index;
406 if (!si)
407 return;
408 istate->split_index = NULL;
409 si->refcount--;
410 if (si->refcount)
411 return;
412 if (si->base) {
413 discard_index(si->base);
414 free(si->base);
416 free(si);
419 void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce)
421 if (ce->index &&
422 istate->split_index &&
423 istate->split_index->base &&
424 ce->index <= istate->split_index->base->cache_nr &&
425 ce == istate->split_index->base->cache[ce->index - 1])
426 ce->ce_flags |= CE_REMOVE;
427 else
428 discard_cache_entry(ce);
431 void replace_index_entry_in_base(struct index_state *istate,
432 struct cache_entry *old_entry,
433 struct cache_entry *new_entry)
435 if (old_entry->index &&
436 istate->split_index &&
437 istate->split_index->base &&
438 old_entry->index <= istate->split_index->base->cache_nr) {
439 new_entry->index = old_entry->index;
440 if (old_entry != istate->split_index->base->cache[new_entry->index - 1])
441 discard_cache_entry(istate->split_index->base->cache[new_entry->index - 1]);
442 istate->split_index->base->cache[new_entry->index - 1] = new_entry;
446 void add_split_index(struct index_state *istate)
448 if (!istate->split_index) {
449 init_split_index(istate);
450 istate->cache_changed |= SPLIT_INDEX_ORDERED;
454 void remove_split_index(struct index_state *istate)
456 if (istate->split_index) {
457 if (istate->split_index->base) {
459 * When removing the split index, we need to move
460 * ownership of the mem_pool associated with the
461 * base index to the main index. There may be cache entries
462 * allocated from the base's memory pool that are shared with
463 * the_index.cache[].
465 mem_pool_combine(istate->ce_mem_pool,
466 istate->split_index->base->ce_mem_pool);
469 * The split index no longer owns the mem_pool backing
470 * its cache array. As we are discarding this index,
471 * mark the index as having no cache entries, so it
472 * will not attempt to clean up the cache entries or
473 * validate them.
475 istate->split_index->base->cache_nr = 0;
479 * We can discard the split index because its
480 * memory pool has been incorporated into the
481 * memory pool associated with the the_index.
483 discard_split_index(istate);
485 istate->cache_changed |= SOMETHING_CHANGED;