Git 2.47-rc0
[git/gitster.git] / midx.c
blob67e0d64004666d288278fd9ea9f27ad988b7e56c
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "config.h"
5 #include "dir.h"
6 #include "hex.h"
7 #include "packfile.h"
8 #include "object-file.h"
9 #include "hash-lookup.h"
10 #include "midx.h"
11 #include "progress.h"
12 #include "trace2.h"
13 #include "chunk-format.h"
14 #include "pack-bitmap.h"
15 #include "pack-revindex.h"
17 int midx_checksum_valid(struct multi_pack_index *m);
18 void clear_midx_files_ext(const char *object_dir, const char *ext,
19 const char *keep_hash);
20 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
21 char **keep_hashes,
22 uint32_t hashes_nr);
23 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
24 const char *idx_name);
26 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
28 return m->data + m->data_len - the_hash_algo->rawsz;
31 void get_midx_filename(struct strbuf *out, const char *object_dir)
33 get_midx_filename_ext(out, object_dir, NULL, NULL);
36 void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
37 const unsigned char *hash, const char *ext)
39 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
40 if (ext)
41 strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
44 static int midx_read_oid_fanout(const unsigned char *chunk_start,
45 size_t chunk_size, void *data)
47 int i;
48 struct multi_pack_index *m = data;
49 m->chunk_oid_fanout = (uint32_t *)chunk_start;
51 if (chunk_size != 4 * 256) {
52 error(_("multi-pack-index OID fanout is of the wrong size"));
53 return 1;
55 for (i = 0; i < 255; i++) {
56 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
57 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
59 if (oid_fanout1 > oid_fanout2) {
60 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
61 i, oid_fanout1, oid_fanout2, i + 1);
62 return 1;
65 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
66 return 0;
69 static int midx_read_oid_lookup(const unsigned char *chunk_start,
70 size_t chunk_size, void *data)
72 struct multi_pack_index *m = data;
73 m->chunk_oid_lookup = chunk_start;
75 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
76 error(_("multi-pack-index OID lookup chunk is the wrong size"));
77 return 1;
79 return 0;
82 static int midx_read_object_offsets(const unsigned char *chunk_start,
83 size_t chunk_size, void *data)
85 struct multi_pack_index *m = data;
86 m->chunk_object_offsets = chunk_start;
88 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
89 error(_("multi-pack-index object offset chunk is the wrong size"));
90 return 1;
92 return 0;
95 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
97 static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
98 const char *midx_name,
99 int local)
101 struct multi_pack_index *m = NULL;
102 int fd;
103 struct stat st;
104 size_t midx_size;
105 void *midx_map = NULL;
106 uint32_t hash_version;
107 uint32_t i;
108 const char *cur_pack_name;
109 struct chunkfile *cf = NULL;
111 fd = git_open(midx_name);
113 if (fd < 0)
114 goto cleanup_fail;
115 if (fstat(fd, &st)) {
116 error_errno(_("failed to read %s"), midx_name);
117 goto cleanup_fail;
120 midx_size = xsize_t(st.st_size);
122 if (midx_size < MIDX_MIN_SIZE) {
123 error(_("multi-pack-index file %s is too small"), midx_name);
124 goto cleanup_fail;
127 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
128 close(fd);
130 FLEX_ALLOC_STR(m, object_dir, object_dir);
131 m->data = midx_map;
132 m->data_len = midx_size;
133 m->local = local;
135 m->signature = get_be32(m->data);
136 if (m->signature != MIDX_SIGNATURE)
137 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
138 m->signature, MIDX_SIGNATURE);
140 m->version = m->data[MIDX_BYTE_FILE_VERSION];
141 if (m->version != MIDX_VERSION)
142 die(_("multi-pack-index version %d not recognized"),
143 m->version);
145 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
146 if (hash_version != oid_version(the_hash_algo)) {
147 error(_("multi-pack-index hash version %u does not match version %u"),
148 hash_version, oid_version(the_hash_algo));
149 goto cleanup_fail;
151 m->hash_len = the_hash_algo->rawsz;
153 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
155 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
157 m->preferred_pack_idx = -1;
159 cf = init_chunkfile(NULL);
161 if (read_table_of_contents(cf, m->data, midx_size,
162 MIDX_HEADER_SIZE, m->num_chunks,
163 MIDX_CHUNK_ALIGNMENT))
164 goto cleanup_fail;
166 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
167 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
168 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
169 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
170 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
171 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
172 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
173 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
175 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
176 &m->chunk_large_offsets_len);
177 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
178 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
179 (const unsigned char **)&m->chunk_bitmapped_packs,
180 &m->chunk_bitmapped_packs_len);
182 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
183 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
184 &m->chunk_revindex_len);
186 CALLOC_ARRAY(m->pack_names, m->num_packs);
187 CALLOC_ARRAY(m->packs, m->num_packs);
189 cur_pack_name = (const char *)m->chunk_pack_names;
190 for (i = 0; i < m->num_packs; i++) {
191 const char *end;
192 size_t avail = m->chunk_pack_names_len -
193 (cur_pack_name - (const char *)m->chunk_pack_names);
195 m->pack_names[i] = cur_pack_name;
197 end = memchr(cur_pack_name, '\0', avail);
198 if (!end)
199 die(_("multi-pack-index pack-name chunk is too short"));
200 cur_pack_name = end + 1;
202 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
203 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
204 m->pack_names[i - 1],
205 m->pack_names[i]);
208 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
209 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
211 free_chunkfile(cf);
212 return m;
214 cleanup_fail:
215 free(m);
216 free_chunkfile(cf);
217 if (midx_map)
218 munmap(midx_map, midx_size);
219 if (0 <= fd)
220 close(fd);
221 return NULL;
224 void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
226 strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
229 void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
231 get_midx_chain_dirname(buf, object_dir);
232 strbuf_addstr(buf, "/multi-pack-index-chain");
235 void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
236 const unsigned char *hash, const char *ext)
238 get_midx_chain_dirname(buf, object_dir);
239 strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
242 static int open_multi_pack_index_chain(const char *chain_file,
243 int *fd, struct stat *st)
245 *fd = git_open(chain_file);
246 if (*fd < 0)
247 return 0;
248 if (fstat(*fd, st)) {
249 close(*fd);
250 return 0;
252 if (st->st_size < the_hash_algo->hexsz) {
253 close(*fd);
254 if (!st->st_size) {
255 /* treat empty files the same as missing */
256 errno = ENOENT;
257 } else {
258 warning(_("multi-pack-index chain file too small"));
259 errno = EINVAL;
261 return 0;
263 return 1;
266 static int add_midx_to_chain(struct multi_pack_index *midx,
267 struct multi_pack_index *midx_chain)
269 if (midx_chain) {
270 if (unsigned_add_overflows(midx_chain->num_packs,
271 midx_chain->num_packs_in_base)) {
272 warning(_("pack count in base MIDX too high: %"PRIuMAX),
273 (uintmax_t)midx_chain->num_packs_in_base);
274 return 0;
276 if (unsigned_add_overflows(midx_chain->num_objects,
277 midx_chain->num_objects_in_base)) {
278 warning(_("object count in base MIDX too high: %"PRIuMAX),
279 (uintmax_t)midx_chain->num_objects_in_base);
280 return 0;
282 midx->num_packs_in_base = midx_chain->num_packs +
283 midx_chain->num_packs_in_base;
284 midx->num_objects_in_base = midx_chain->num_objects +
285 midx_chain->num_objects_in_base;
288 midx->base_midx = midx_chain;
289 midx->has_chain = 1;
291 return 1;
294 static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
295 int local,
296 int fd, struct stat *st,
297 int *incomplete_chain)
299 struct multi_pack_index *midx_chain = NULL;
300 struct strbuf buf = STRBUF_INIT;
301 int valid = 1;
302 uint32_t i, count;
303 FILE *fp = xfdopen(fd, "r");
305 count = st->st_size / (the_hash_algo->hexsz + 1);
307 for (i = 0; i < count; i++) {
308 struct multi_pack_index *m;
309 struct object_id layer;
311 if (strbuf_getline_lf(&buf, fp) == EOF)
312 break;
314 if (get_oid_hex(buf.buf, &layer)) {
315 warning(_("invalid multi-pack-index chain: line '%s' "
316 "not a hash"),
317 buf.buf);
318 valid = 0;
319 break;
322 valid = 0;
324 strbuf_reset(&buf);
325 get_split_midx_filename_ext(&buf, object_dir, layer.hash,
326 MIDX_EXT_MIDX);
327 m = load_multi_pack_index_one(object_dir, buf.buf, local);
329 if (m) {
330 if (add_midx_to_chain(m, midx_chain)) {
331 midx_chain = m;
332 valid = 1;
333 } else {
334 close_midx(m);
337 if (!valid) {
338 warning(_("unable to find all multi-pack index files"));
339 break;
343 fclose(fp);
344 strbuf_release(&buf);
346 *incomplete_chain = !valid;
347 return midx_chain;
350 static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
351 int local)
353 struct strbuf chain_file = STRBUF_INIT;
354 struct stat st;
355 int fd;
356 struct multi_pack_index *m = NULL;
358 get_midx_chain_filename(&chain_file, object_dir);
359 if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
360 int incomplete;
361 /* ownership of fd is taken over by load function */
362 m = load_midx_chain_fd_st(object_dir, local, fd, &st,
363 &incomplete);
366 strbuf_release(&chain_file);
367 return m;
370 struct multi_pack_index *load_multi_pack_index(const char *object_dir,
371 int local)
373 struct strbuf midx_name = STRBUF_INIT;
374 struct multi_pack_index *m;
376 get_midx_filename(&midx_name, object_dir);
378 m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
379 if (!m)
380 m = load_multi_pack_index_chain(object_dir, local);
382 strbuf_release(&midx_name);
384 return m;
387 void close_midx(struct multi_pack_index *m)
389 uint32_t i;
391 if (!m)
392 return;
394 close_midx(m->next);
395 close_midx(m->base_midx);
397 munmap((unsigned char *)m->data, m->data_len);
399 for (i = 0; i < m->num_packs; i++) {
400 if (m->packs[i])
401 m->packs[i]->multi_pack_index = 0;
403 FREE_AND_NULL(m->packs);
404 FREE_AND_NULL(m->pack_names);
405 free(m);
408 static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
410 struct multi_pack_index *m = *_m;
411 while (m && pos < m->num_objects_in_base)
412 m = m->base_midx;
414 if (!m)
415 BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
417 if (pos >= m->num_objects + m->num_objects_in_base)
418 die(_("invalid MIDX object position, MIDX is likely corrupt"));
420 *_m = m;
422 return pos - m->num_objects_in_base;
425 static uint32_t midx_for_pack(struct multi_pack_index **_m,
426 uint32_t pack_int_id)
428 struct multi_pack_index *m = *_m;
429 while (m && pack_int_id < m->num_packs_in_base)
430 m = m->base_midx;
432 if (!m)
433 BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
435 if (pack_int_id >= m->num_packs + m->num_packs_in_base)
436 die(_("bad pack-int-id: %u (%u total packs)"),
437 pack_int_id, m->num_packs + m->num_packs_in_base);
439 *_m = m;
441 return pack_int_id - m->num_packs_in_base;
444 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
445 uint32_t pack_int_id)
447 struct strbuf pack_name = STRBUF_INIT;
448 struct packed_git *p;
450 pack_int_id = midx_for_pack(&m, pack_int_id);
452 if (m->packs[pack_int_id])
453 return 0;
455 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
456 m->pack_names[pack_int_id]);
458 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
459 strbuf_release(&pack_name);
461 if (!p)
462 return 1;
464 p->multi_pack_index = 1;
465 m->packs[pack_int_id] = p;
466 install_packed_git(r, p);
467 list_add_tail(&p->mru, &r->objects->packed_git_mru);
469 return 0;
472 struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
473 uint32_t pack_int_id)
475 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
476 return m->packs[local_pack_int_id];
479 #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
481 int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
482 struct bitmapped_pack *bp, uint32_t pack_int_id)
484 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
486 if (!m->chunk_bitmapped_packs)
487 return error(_("MIDX does not contain the BTMP chunk"));
489 if (prepare_midx_pack(r, m, pack_int_id))
490 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
492 bp->p = m->packs[local_pack_int_id];
493 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
494 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
495 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
496 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
497 sizeof(uint32_t));
498 bp->pack_int_id = pack_int_id;
499 bp->from_midx = m;
501 return 0;
504 int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
505 uint32_t *result)
507 int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
508 m->chunk_oid_lookup, the_hash_algo->rawsz,
509 result);
510 if (result)
511 *result += m->num_objects_in_base;
512 return ret;
515 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
516 uint32_t *result)
518 for (; m; m = m->base_midx)
519 if (bsearch_one_midx(oid, m, result))
520 return 1;
521 return 0;
524 int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
526 return bsearch_midx(oid, m, NULL);
529 struct object_id *nth_midxed_object_oid(struct object_id *oid,
530 struct multi_pack_index *m,
531 uint32_t n)
533 if (n >= m->num_objects + m->num_objects_in_base)
534 return NULL;
536 n = midx_for_object(&m, n);
538 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
539 the_repository->hash_algo);
540 return oid;
543 off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
545 const unsigned char *offset_data;
546 uint32_t offset32;
548 pos = midx_for_object(&m, pos);
550 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
551 offset32 = get_be32(offset_data + sizeof(uint32_t));
553 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
554 if (sizeof(off_t) < sizeof(uint64_t))
555 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
557 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
558 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
559 die(_("multi-pack-index large offset out of bounds"));
560 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
563 return offset32;
566 uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
568 pos = midx_for_object(&m, pos);
570 return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
571 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
574 int fill_midx_entry(struct repository *r,
575 const struct object_id *oid,
576 struct pack_entry *e,
577 struct multi_pack_index *m)
579 uint32_t pos;
580 uint32_t pack_int_id;
581 struct packed_git *p;
583 if (!bsearch_midx(oid, m, &pos))
584 return 0;
586 midx_for_object(&m, pos);
587 pack_int_id = nth_midxed_pack_int_id(m, pos);
589 if (prepare_midx_pack(r, m, pack_int_id))
590 return 0;
591 p = m->packs[pack_int_id - m->num_packs_in_base];
594 * We are about to tell the caller where they can locate the
595 * requested object. We better make sure the packfile is
596 * still here and can be accessed before supplying that
597 * answer, as it may have been deleted since the MIDX was
598 * loaded!
600 if (!is_pack_valid(p))
601 return 0;
603 if (oidset_size(&p->bad_objects) &&
604 oidset_contains(&p->bad_objects, oid))
605 return 0;
607 e->offset = nth_midxed_offset(m, pos);
608 e->p = p;
610 return 1;
613 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
614 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
615 const char *idx_name)
617 /* Skip past any initial matching prefix. */
618 while (*idx_name && *idx_name == *idx_or_pack_name) {
619 idx_name++;
620 idx_or_pack_name++;
624 * If we didn't match completely, we may have matched "pack-1234." and
625 * be left with "idx" and "pack" respectively, which is also OK. We do
626 * not have to check for "idx" and "idx", because that would have been
627 * a complete match (and in that case these strcmps will be false, but
628 * we'll correctly return 0 from the final strcmp() below.
630 * Technically this matches "fooidx" and "foopack", but we'd never have
631 * such names in the first place.
633 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
634 return 0;
637 * This not only checks for a complete match, but also orders based on
638 * the first non-identical character, which means our ordering will
639 * match a raw strcmp(). That makes it OK to use this to binary search
640 * a naively-sorted list.
642 return strcmp(idx_or_pack_name, idx_name);
645 static int midx_contains_pack_1(struct multi_pack_index *m,
646 const char *idx_or_pack_name)
648 uint32_t first = 0, last = m->num_packs;
650 while (first < last) {
651 uint32_t mid = first + (last - first) / 2;
652 const char *current;
653 int cmp;
655 current = m->pack_names[mid];
656 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
657 if (!cmp)
658 return 1;
659 if (cmp > 0) {
660 first = mid + 1;
661 continue;
663 last = mid;
666 return 0;
669 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
671 for (; m; m = m->base_midx)
672 if (midx_contains_pack_1(m, idx_or_pack_name))
673 return 1;
674 return 0;
677 int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
679 if (m->preferred_pack_idx == -1) {
680 uint32_t midx_pos;
681 if (load_midx_revindex(m) < 0) {
682 m->preferred_pack_idx = -2;
683 return -1;
686 midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
688 m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
690 } else if (m->preferred_pack_idx == -2)
691 return -1; /* no revindex */
693 *pack_int_id = m->preferred_pack_idx;
694 return 0;
697 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
699 struct multi_pack_index *m;
700 struct multi_pack_index *m_search;
702 prepare_repo_settings(r);
703 if (!r->settings.core_multi_pack_index)
704 return 0;
706 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
707 if (!strcmp(object_dir, m_search->object_dir))
708 return 1;
710 m = load_multi_pack_index(object_dir, local);
712 if (m) {
713 struct multi_pack_index *mp = r->objects->multi_pack_index;
714 if (mp) {
715 m->next = mp->next;
716 mp->next = m;
717 } else
718 r->objects->multi_pack_index = m;
719 return 1;
722 return 0;
725 int midx_checksum_valid(struct multi_pack_index *m)
727 return hashfile_checksum_valid(m->data, m->data_len);
730 struct clear_midx_data {
731 char **keep;
732 uint32_t keep_nr;
733 const char *ext;
736 static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
737 const char *file_name, void *_data)
739 struct clear_midx_data *data = _data;
740 uint32_t i;
742 if (!(starts_with(file_name, "multi-pack-index-") &&
743 ends_with(file_name, data->ext)))
744 return;
745 for (i = 0; i < data->keep_nr; i++) {
746 if (!strcmp(data->keep[i], file_name))
747 return;
749 if (unlink(full_path))
750 die_errno(_("failed to remove %s"), full_path);
753 void clear_midx_files_ext(const char *object_dir, const char *ext,
754 const char *keep_hash)
756 struct clear_midx_data data;
757 memset(&data, 0, sizeof(struct clear_midx_data));
759 if (keep_hash) {
760 ALLOC_ARRAY(data.keep, 1);
762 data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
763 data.keep_nr = 1;
765 data.ext = ext;
767 for_each_file_in_pack_dir(object_dir,
768 clear_midx_file_ext,
769 &data);
771 if (keep_hash)
772 free(data.keep[0]);
773 free(data.keep);
776 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
777 char **keep_hashes,
778 uint32_t hashes_nr)
780 struct clear_midx_data data;
781 uint32_t i;
783 memset(&data, 0, sizeof(struct clear_midx_data));
785 ALLOC_ARRAY(data.keep, hashes_nr);
786 for (i = 0; i < hashes_nr; i++)
787 data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
788 ext);
789 data.keep_nr = hashes_nr;
790 data.ext = ext;
792 for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d",
793 clear_midx_file_ext, &data);
795 for (i = 0; i < hashes_nr; i++)
796 free(data.keep[i]);
797 free(data.keep);
800 void clear_midx_file(struct repository *r)
802 struct strbuf midx = STRBUF_INIT;
804 get_midx_filename(&midx, r->objects->odb->path);
806 if (r->objects && r->objects->multi_pack_index) {
807 close_midx(r->objects->multi_pack_index);
808 r->objects->multi_pack_index = NULL;
811 if (remove_path(midx.buf))
812 die(_("failed to clear multi-pack-index at %s"), midx.buf);
814 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_BITMAP, NULL);
815 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_REV, NULL);
817 strbuf_release(&midx);
820 static int verify_midx_error;
822 __attribute__((format (printf, 1, 2)))
823 static void midx_report(const char *fmt, ...)
825 va_list ap;
826 verify_midx_error = 1;
827 va_start(ap, fmt);
828 vfprintf(stderr, fmt, ap);
829 fprintf(stderr, "\n");
830 va_end(ap);
833 struct pair_pos_vs_id
835 uint32_t pos;
836 uint32_t pack_int_id;
839 static int compare_pair_pos_vs_id(const void *_a, const void *_b)
841 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
842 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
844 return b->pack_int_id - a->pack_int_id;
848 * Limit calls to display_progress() for performance reasons.
849 * The interval here was arbitrarily chosen.
851 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
852 #define midx_display_sparse_progress(progress, n) \
853 do { \
854 uint64_t _n = (n); \
855 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
856 display_progress(progress, _n); \
857 } while (0)
859 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
861 struct pair_pos_vs_id *pairs = NULL;
862 uint32_t i;
863 struct progress *progress = NULL;
864 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
865 struct multi_pack_index *curr;
866 verify_midx_error = 0;
868 if (!m) {
869 int result = 0;
870 struct stat sb;
871 struct strbuf filename = STRBUF_INIT;
873 get_midx_filename(&filename, object_dir);
875 if (!stat(filename.buf, &sb)) {
876 error(_("multi-pack-index file exists, but failed to parse"));
877 result = 1;
879 strbuf_release(&filename);
880 return result;
883 if (!midx_checksum_valid(m))
884 midx_report(_("incorrect checksum"));
886 if (flags & MIDX_PROGRESS)
887 progress = start_delayed_progress(_("Looking for referenced packfiles"),
888 m->num_packs + m->num_packs_in_base);
889 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
890 if (prepare_midx_pack(r, m, i))
891 midx_report("failed to load pack in position %d", i);
893 display_progress(progress, i + 1);
895 stop_progress(&progress);
897 if (m->num_objects == 0) {
898 midx_report(_("the midx contains no oid"));
900 * Remaining tests assume that we have objects, so we can
901 * return here.
903 goto cleanup;
906 if (flags & MIDX_PROGRESS)
907 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
908 m->num_objects - 1);
910 for (curr = m; curr; curr = curr->base_midx) {
911 for (i = 0; i < m->num_objects - 1; i++) {
912 struct object_id oid1, oid2;
914 nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
915 nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
917 if (oidcmp(&oid1, &oid2) >= 0)
918 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
919 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
921 midx_display_sparse_progress(progress, i + 1);
924 stop_progress(&progress);
927 * Create an array mapping each object to its packfile id. Sort it
928 * to group the objects by packfile. Use this permutation to visit
929 * each of the objects and only require 1 packfile to be open at a
930 * time.
932 ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
933 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
934 pairs[i].pos = i;
935 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
938 if (flags & MIDX_PROGRESS)
939 progress = start_sparse_progress(_("Sorting objects by packfile"),
940 m->num_objects);
941 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
942 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
943 stop_progress(&progress);
945 if (flags & MIDX_PROGRESS)
946 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
947 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
948 struct object_id oid;
949 struct pack_entry e;
950 off_t m_offset, p_offset;
952 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
953 nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
954 uint32_t pack_int_id = pairs[i-1].pack_int_id;
955 struct packed_git *p = nth_midxed_pack(m, pack_int_id);
957 close_pack_fd(p);
958 close_pack_index(p);
961 nth_midxed_object_oid(&oid, m, pairs[i].pos);
963 if (!fill_midx_entry(r, &oid, &e, m)) {
964 midx_report(_("failed to load pack entry for oid[%d] = %s"),
965 pairs[i].pos, oid_to_hex(&oid));
966 continue;
969 if (open_pack_index(e.p)) {
970 midx_report(_("failed to load pack-index for packfile %s"),
971 e.p->pack_name);
972 break;
975 m_offset = e.offset;
976 p_offset = find_pack_entry_one(oid.hash, e.p);
978 if (m_offset != p_offset)
979 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
980 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
982 midx_display_sparse_progress(progress, i + 1);
984 stop_progress(&progress);
986 cleanup:
987 free(pairs);
988 close_midx(m);
990 return verify_midx_error;