hash.h: scaffolding for _unsafe hashing variants
[git/gitster.git] / midx.c
blobca98bfd7c644fece17d47679c6044747c3798c16
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "config.h"
5 #include "dir.h"
6 #include "hex.h"
7 #include "packfile.h"
8 #include "object-file.h"
9 #include "hash-lookup.h"
10 #include "midx.h"
11 #include "progress.h"
12 #include "trace2.h"
13 #include "chunk-format.h"
14 #include "pack-bitmap.h"
15 #include "pack-revindex.h"
17 int midx_checksum_valid(struct multi_pack_index *m);
18 void clear_midx_files_ext(const char *object_dir, const char *ext,
19 const char *keep_hash);
20 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
21 char **keep_hashes,
22 uint32_t hashes_nr);
23 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
24 const char *idx_name);
26 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
28 return m->data + m->data_len - the_hash_algo->rawsz;
31 void get_midx_filename(struct strbuf *out, const char *object_dir)
33 get_midx_filename_ext(out, object_dir, NULL, NULL);
36 void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
37 const unsigned char *hash, const char *ext)
39 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
40 if (ext)
41 strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
44 static int midx_read_oid_fanout(const unsigned char *chunk_start,
45 size_t chunk_size, void *data)
47 int i;
48 struct multi_pack_index *m = data;
49 m->chunk_oid_fanout = (uint32_t *)chunk_start;
51 if (chunk_size != 4 * 256) {
52 error(_("multi-pack-index OID fanout is of the wrong size"));
53 return 1;
55 for (i = 0; i < 255; i++) {
56 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
57 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
59 if (oid_fanout1 > oid_fanout2) {
60 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
61 i, oid_fanout1, oid_fanout2, i + 1);
62 return 1;
65 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
66 return 0;
69 static int midx_read_oid_lookup(const unsigned char *chunk_start,
70 size_t chunk_size, void *data)
72 struct multi_pack_index *m = data;
73 m->chunk_oid_lookup = chunk_start;
75 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
76 error(_("multi-pack-index OID lookup chunk is the wrong size"));
77 return 1;
79 return 0;
82 static int midx_read_object_offsets(const unsigned char *chunk_start,
83 size_t chunk_size, void *data)
85 struct multi_pack_index *m = data;
86 m->chunk_object_offsets = chunk_start;
88 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
89 error(_("multi-pack-index object offset chunk is the wrong size"));
90 return 1;
92 return 0;
95 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
97 static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
98 const char *midx_name,
99 int local)
101 struct multi_pack_index *m = NULL;
102 int fd;
103 struct stat st;
104 size_t midx_size;
105 void *midx_map = NULL;
106 uint32_t hash_version;
107 uint32_t i;
108 const char *cur_pack_name;
109 struct chunkfile *cf = NULL;
111 fd = git_open(midx_name);
113 if (fd < 0)
114 goto cleanup_fail;
115 if (fstat(fd, &st)) {
116 error_errno(_("failed to read %s"), midx_name);
117 goto cleanup_fail;
120 midx_size = xsize_t(st.st_size);
122 if (midx_size < MIDX_MIN_SIZE) {
123 error(_("multi-pack-index file %s is too small"), midx_name);
124 goto cleanup_fail;
127 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
128 close(fd);
130 FLEX_ALLOC_STR(m, object_dir, object_dir);
131 m->data = midx_map;
132 m->data_len = midx_size;
133 m->local = local;
135 m->signature = get_be32(m->data);
136 if (m->signature != MIDX_SIGNATURE)
137 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
138 m->signature, MIDX_SIGNATURE);
140 m->version = m->data[MIDX_BYTE_FILE_VERSION];
141 if (m->version != MIDX_VERSION)
142 die(_("multi-pack-index version %d not recognized"),
143 m->version);
145 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
146 if (hash_version != oid_version(the_hash_algo)) {
147 error(_("multi-pack-index hash version %u does not match version %u"),
148 hash_version, oid_version(the_hash_algo));
149 goto cleanup_fail;
151 m->hash_len = the_hash_algo->rawsz;
153 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
155 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
157 m->preferred_pack_idx = -1;
159 cf = init_chunkfile(NULL);
161 if (read_table_of_contents(cf, m->data, midx_size,
162 MIDX_HEADER_SIZE, m->num_chunks,
163 MIDX_CHUNK_ALIGNMENT))
164 goto cleanup_fail;
166 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
167 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
168 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
169 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
170 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
171 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
172 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
173 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
175 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
176 &m->chunk_large_offsets_len);
177 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
178 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
179 (const unsigned char **)&m->chunk_bitmapped_packs,
180 &m->chunk_bitmapped_packs_len);
182 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
183 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
184 &m->chunk_revindex_len);
186 CALLOC_ARRAY(m->pack_names, m->num_packs);
187 CALLOC_ARRAY(m->packs, m->num_packs);
189 cur_pack_name = (const char *)m->chunk_pack_names;
190 for (i = 0; i < m->num_packs; i++) {
191 const char *end;
192 size_t avail = m->chunk_pack_names_len -
193 (cur_pack_name - (const char *)m->chunk_pack_names);
195 m->pack_names[i] = cur_pack_name;
197 end = memchr(cur_pack_name, '\0', avail);
198 if (!end)
199 die(_("multi-pack-index pack-name chunk is too short"));
200 cur_pack_name = end + 1;
202 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
203 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
204 m->pack_names[i - 1],
205 m->pack_names[i]);
208 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
209 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
211 free_chunkfile(cf);
212 return m;
214 cleanup_fail:
215 free(m);
216 free_chunkfile(cf);
217 if (midx_map)
218 munmap(midx_map, midx_size);
219 if (0 <= fd)
220 close(fd);
221 return NULL;
224 void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
226 strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
229 void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
231 get_midx_chain_dirname(buf, object_dir);
232 strbuf_addstr(buf, "/multi-pack-index-chain");
235 void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
236 const unsigned char *hash, const char *ext)
238 get_midx_chain_dirname(buf, object_dir);
239 strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
242 static int open_multi_pack_index_chain(const char *chain_file,
243 int *fd, struct stat *st)
245 *fd = git_open(chain_file);
246 if (*fd < 0)
247 return 0;
248 if (fstat(*fd, st)) {
249 close(*fd);
250 return 0;
252 if (st->st_size < the_hash_algo->hexsz) {
253 close(*fd);
254 if (!st->st_size) {
255 /* treat empty files the same as missing */
256 errno = ENOENT;
257 } else {
258 warning(_("multi-pack-index chain file too small"));
259 errno = EINVAL;
261 return 0;
263 return 1;
266 static int add_midx_to_chain(struct multi_pack_index *midx,
267 struct multi_pack_index *midx_chain)
269 if (midx_chain) {
270 if (unsigned_add_overflows(midx_chain->num_packs,
271 midx_chain->num_packs_in_base)) {
272 warning(_("pack count in base MIDX too high: %"PRIuMAX),
273 (uintmax_t)midx_chain->num_packs_in_base);
274 return 0;
276 if (unsigned_add_overflows(midx_chain->num_objects,
277 midx_chain->num_objects_in_base)) {
278 warning(_("object count in base MIDX too high: %"PRIuMAX),
279 (uintmax_t)midx_chain->num_objects_in_base);
280 return 0;
282 midx->num_packs_in_base = midx_chain->num_packs +
283 midx_chain->num_packs_in_base;
284 midx->num_objects_in_base = midx_chain->num_objects +
285 midx_chain->num_objects_in_base;
288 midx->base_midx = midx_chain;
289 midx->has_chain = 1;
291 return 1;
294 static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
295 int local,
296 int fd, struct stat *st,
297 int *incomplete_chain)
299 struct multi_pack_index *midx_chain = NULL;
300 struct strbuf buf = STRBUF_INIT;
301 int valid = 1;
302 uint32_t i, count;
303 FILE *fp = xfdopen(fd, "r");
305 count = st->st_size / (the_hash_algo->hexsz + 1);
307 for (i = 0; i < count; i++) {
308 struct multi_pack_index *m;
309 struct object_id layer;
311 if (strbuf_getline_lf(&buf, fp) == EOF)
312 break;
314 if (get_oid_hex(buf.buf, &layer)) {
315 warning(_("invalid multi-pack-index chain: line '%s' "
316 "not a hash"),
317 buf.buf);
318 valid = 0;
319 break;
322 valid = 0;
324 strbuf_reset(&buf);
325 get_split_midx_filename_ext(&buf, object_dir, layer.hash,
326 MIDX_EXT_MIDX);
327 m = load_multi_pack_index_one(object_dir, buf.buf, local);
329 if (m) {
330 if (add_midx_to_chain(m, midx_chain)) {
331 midx_chain = m;
332 valid = 1;
333 } else {
334 close_midx(m);
337 if (!valid) {
338 warning(_("unable to find all multi-pack index files"));
339 break;
343 fclose(fp);
344 strbuf_release(&buf);
346 *incomplete_chain = !valid;
347 return midx_chain;
350 static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
351 int local)
353 struct strbuf chain_file = STRBUF_INIT;
354 struct stat st;
355 int fd;
356 struct multi_pack_index *m = NULL;
358 get_midx_chain_filename(&chain_file, object_dir);
359 if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
360 int incomplete;
361 /* ownership of fd is taken over by load function */
362 m = load_midx_chain_fd_st(object_dir, local, fd, &st,
363 &incomplete);
366 strbuf_release(&chain_file);
367 return m;
370 struct multi_pack_index *load_multi_pack_index(const char *object_dir,
371 int local)
373 struct strbuf midx_name = STRBUF_INIT;
374 struct multi_pack_index *m;
376 get_midx_filename(&midx_name, object_dir);
378 m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
379 if (!m)
380 m = load_multi_pack_index_chain(object_dir, local);
382 strbuf_release(&midx_name);
384 return m;
387 void close_midx(struct multi_pack_index *m)
389 uint32_t i;
391 if (!m)
392 return;
394 close_midx(m->next);
395 close_midx(m->base_midx);
397 munmap((unsigned char *)m->data, m->data_len);
399 for (i = 0; i < m->num_packs; i++) {
400 if (m->packs[i])
401 m->packs[i]->multi_pack_index = 0;
403 FREE_AND_NULL(m->packs);
404 FREE_AND_NULL(m->pack_names);
405 free(m);
408 static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
410 struct multi_pack_index *m = *_m;
411 while (m && pos < m->num_objects_in_base)
412 m = m->base_midx;
414 if (!m)
415 BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
417 if (pos >= m->num_objects + m->num_objects_in_base)
418 die(_("invalid MIDX object position, MIDX is likely corrupt"));
420 *_m = m;
422 return pos - m->num_objects_in_base;
425 static uint32_t midx_for_pack(struct multi_pack_index **_m,
426 uint32_t pack_int_id)
428 struct multi_pack_index *m = *_m;
429 while (m && pack_int_id < m->num_packs_in_base)
430 m = m->base_midx;
432 if (!m)
433 BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
435 if (pack_int_id >= m->num_packs + m->num_packs_in_base)
436 die(_("bad pack-int-id: %u (%u total packs)"),
437 pack_int_id, m->num_packs + m->num_packs_in_base);
439 *_m = m;
441 return pack_int_id - m->num_packs_in_base;
444 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
445 uint32_t pack_int_id)
447 struct strbuf pack_name = STRBUF_INIT;
448 struct packed_git *p;
450 pack_int_id = midx_for_pack(&m, pack_int_id);
452 if (m->packs[pack_int_id])
453 return 0;
455 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
456 m->pack_names[pack_int_id]);
458 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
459 strbuf_release(&pack_name);
461 if (!p)
462 return 1;
464 p->multi_pack_index = 1;
465 m->packs[pack_int_id] = p;
466 install_packed_git(r, p);
467 list_add_tail(&p->mru, &r->objects->packed_git_mru);
469 return 0;
472 struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
473 uint32_t pack_int_id)
475 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
476 return m->packs[local_pack_int_id];
479 #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
481 int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
482 struct bitmapped_pack *bp, uint32_t pack_int_id)
484 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
486 if (!m->chunk_bitmapped_packs)
487 return error(_("MIDX does not contain the BTMP chunk"));
489 if (prepare_midx_pack(r, m, pack_int_id))
490 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
492 bp->p = m->packs[local_pack_int_id];
493 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
494 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
495 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
496 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
497 sizeof(uint32_t));
498 bp->pack_int_id = pack_int_id;
500 return 0;
503 int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
504 uint32_t *result)
506 int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
507 m->chunk_oid_lookup, the_hash_algo->rawsz,
508 result);
509 if (result)
510 *result += m->num_objects_in_base;
511 return ret;
514 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
515 uint32_t *result)
517 for (; m; m = m->base_midx)
518 if (bsearch_one_midx(oid, m, result))
519 return 1;
520 return 0;
523 int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
525 return bsearch_midx(oid, m, NULL);
528 struct object_id *nth_midxed_object_oid(struct object_id *oid,
529 struct multi_pack_index *m,
530 uint32_t n)
532 if (n >= m->num_objects + m->num_objects_in_base)
533 return NULL;
535 n = midx_for_object(&m, n);
537 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
538 the_repository->hash_algo);
539 return oid;
542 off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
544 const unsigned char *offset_data;
545 uint32_t offset32;
547 pos = midx_for_object(&m, pos);
549 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
550 offset32 = get_be32(offset_data + sizeof(uint32_t));
552 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
553 if (sizeof(off_t) < sizeof(uint64_t))
554 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
556 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
557 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
558 die(_("multi-pack-index large offset out of bounds"));
559 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
562 return offset32;
565 uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
567 pos = midx_for_object(&m, pos);
569 return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
570 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
573 int fill_midx_entry(struct repository *r,
574 const struct object_id *oid,
575 struct pack_entry *e,
576 struct multi_pack_index *m)
578 uint32_t pos;
579 uint32_t pack_int_id;
580 struct packed_git *p;
582 if (!bsearch_midx(oid, m, &pos))
583 return 0;
585 midx_for_object(&m, pos);
586 pack_int_id = nth_midxed_pack_int_id(m, pos);
588 if (prepare_midx_pack(r, m, pack_int_id))
589 return 0;
590 p = m->packs[pack_int_id - m->num_packs_in_base];
593 * We are about to tell the caller where they can locate the
594 * requested object. We better make sure the packfile is
595 * still here and can be accessed before supplying that
596 * answer, as it may have been deleted since the MIDX was
597 * loaded!
599 if (!is_pack_valid(p))
600 return 0;
602 if (oidset_size(&p->bad_objects) &&
603 oidset_contains(&p->bad_objects, oid))
604 return 0;
606 e->offset = nth_midxed_offset(m, pos);
607 e->p = p;
609 return 1;
612 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
613 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
614 const char *idx_name)
616 /* Skip past any initial matching prefix. */
617 while (*idx_name && *idx_name == *idx_or_pack_name) {
618 idx_name++;
619 idx_or_pack_name++;
623 * If we didn't match completely, we may have matched "pack-1234." and
624 * be left with "idx" and "pack" respectively, which is also OK. We do
625 * not have to check for "idx" and "idx", because that would have been
626 * a complete match (and in that case these strcmps will be false, but
627 * we'll correctly return 0 from the final strcmp() below.
629 * Technically this matches "fooidx" and "foopack", but we'd never have
630 * such names in the first place.
632 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
633 return 0;
636 * This not only checks for a complete match, but also orders based on
637 * the first non-identical character, which means our ordering will
638 * match a raw strcmp(). That makes it OK to use this to binary search
639 * a naively-sorted list.
641 return strcmp(idx_or_pack_name, idx_name);
644 static int midx_contains_pack_1(struct multi_pack_index *m,
645 const char *idx_or_pack_name)
647 uint32_t first = 0, last = m->num_packs;
649 while (first < last) {
650 uint32_t mid = first + (last - first) / 2;
651 const char *current;
652 int cmp;
654 current = m->pack_names[mid];
655 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
656 if (!cmp)
657 return 1;
658 if (cmp > 0) {
659 first = mid + 1;
660 continue;
662 last = mid;
665 return 0;
668 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
670 for (; m; m = m->base_midx)
671 if (midx_contains_pack_1(m, idx_or_pack_name))
672 return 1;
673 return 0;
676 int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
678 if (m->preferred_pack_idx == -1) {
679 uint32_t midx_pos;
680 if (load_midx_revindex(m) < 0) {
681 m->preferred_pack_idx = -2;
682 return -1;
685 midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
687 m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
689 } else if (m->preferred_pack_idx == -2)
690 return -1; /* no revindex */
692 *pack_int_id = m->preferred_pack_idx;
693 return 0;
696 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
698 struct multi_pack_index *m;
699 struct multi_pack_index *m_search;
701 prepare_repo_settings(r);
702 if (!r->settings.core_multi_pack_index)
703 return 0;
705 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
706 if (!strcmp(object_dir, m_search->object_dir))
707 return 1;
709 m = load_multi_pack_index(object_dir, local);
711 if (m) {
712 struct multi_pack_index *mp = r->objects->multi_pack_index;
713 if (mp) {
714 m->next = mp->next;
715 mp->next = m;
716 } else
717 r->objects->multi_pack_index = m;
718 return 1;
721 return 0;
724 int midx_checksum_valid(struct multi_pack_index *m)
726 return hashfile_checksum_valid(m->data, m->data_len);
729 struct clear_midx_data {
730 char **keep;
731 uint32_t keep_nr;
732 const char *ext;
735 static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
736 const char *file_name, void *_data)
738 struct clear_midx_data *data = _data;
739 uint32_t i;
741 if (!(starts_with(file_name, "multi-pack-index-") &&
742 ends_with(file_name, data->ext)))
743 return;
744 for (i = 0; i < data->keep_nr; i++) {
745 if (!strcmp(data->keep[i], file_name))
746 return;
748 if (unlink(full_path))
749 die_errno(_("failed to remove %s"), full_path);
752 void clear_midx_files_ext(const char *object_dir, const char *ext,
753 const char *keep_hash)
755 struct clear_midx_data data;
756 memset(&data, 0, sizeof(struct clear_midx_data));
758 if (keep_hash) {
759 ALLOC_ARRAY(data.keep, 1);
761 data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
762 data.keep_nr = 1;
764 data.ext = ext;
766 for_each_file_in_pack_dir(object_dir,
767 clear_midx_file_ext,
768 &data);
770 if (keep_hash)
771 free(data.keep[0]);
772 free(data.keep);
775 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
776 char **keep_hashes,
777 uint32_t hashes_nr)
779 struct clear_midx_data data;
780 uint32_t i;
782 memset(&data, 0, sizeof(struct clear_midx_data));
784 ALLOC_ARRAY(data.keep, hashes_nr);
785 for (i = 0; i < hashes_nr; i++)
786 data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
787 ext);
788 data.keep_nr = hashes_nr;
789 data.ext = ext;
791 for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d",
792 clear_midx_file_ext, &data);
794 for (i = 0; i < hashes_nr; i++)
795 free(data.keep[i]);
796 free(data.keep);
799 void clear_midx_file(struct repository *r)
801 struct strbuf midx = STRBUF_INIT;
803 get_midx_filename(&midx, r->objects->odb->path);
805 if (r->objects && r->objects->multi_pack_index) {
806 close_midx(r->objects->multi_pack_index);
807 r->objects->multi_pack_index = NULL;
810 if (remove_path(midx.buf))
811 die(_("failed to clear multi-pack-index at %s"), midx.buf);
813 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_BITMAP, NULL);
814 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_REV, NULL);
816 strbuf_release(&midx);
819 static int verify_midx_error;
821 __attribute__((format (printf, 1, 2)))
822 static void midx_report(const char *fmt, ...)
824 va_list ap;
825 verify_midx_error = 1;
826 va_start(ap, fmt);
827 vfprintf(stderr, fmt, ap);
828 fprintf(stderr, "\n");
829 va_end(ap);
832 struct pair_pos_vs_id
834 uint32_t pos;
835 uint32_t pack_int_id;
838 static int compare_pair_pos_vs_id(const void *_a, const void *_b)
840 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
841 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
843 return b->pack_int_id - a->pack_int_id;
847 * Limit calls to display_progress() for performance reasons.
848 * The interval here was arbitrarily chosen.
850 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
851 #define midx_display_sparse_progress(progress, n) \
852 do { \
853 uint64_t _n = (n); \
854 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
855 display_progress(progress, _n); \
856 } while (0)
858 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
860 struct pair_pos_vs_id *pairs = NULL;
861 uint32_t i;
862 struct progress *progress = NULL;
863 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
864 struct multi_pack_index *curr;
865 verify_midx_error = 0;
867 if (!m) {
868 int result = 0;
869 struct stat sb;
870 struct strbuf filename = STRBUF_INIT;
872 get_midx_filename(&filename, object_dir);
874 if (!stat(filename.buf, &sb)) {
875 error(_("multi-pack-index file exists, but failed to parse"));
876 result = 1;
878 strbuf_release(&filename);
879 return result;
882 if (!midx_checksum_valid(m))
883 midx_report(_("incorrect checksum"));
885 if (flags & MIDX_PROGRESS)
886 progress = start_delayed_progress(_("Looking for referenced packfiles"),
887 m->num_packs + m->num_packs_in_base);
888 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
889 if (prepare_midx_pack(r, m, i))
890 midx_report("failed to load pack in position %d", i);
892 display_progress(progress, i + 1);
894 stop_progress(&progress);
896 if (m->num_objects == 0) {
897 midx_report(_("the midx contains no oid"));
899 * Remaining tests assume that we have objects, so we can
900 * return here.
902 goto cleanup;
905 if (flags & MIDX_PROGRESS)
906 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
907 m->num_objects - 1);
909 for (curr = m; curr; curr = curr->base_midx) {
910 for (i = 0; i < m->num_objects - 1; i++) {
911 struct object_id oid1, oid2;
913 nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
914 nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
916 if (oidcmp(&oid1, &oid2) >= 0)
917 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
918 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
920 midx_display_sparse_progress(progress, i + 1);
923 stop_progress(&progress);
926 * Create an array mapping each object to its packfile id. Sort it
927 * to group the objects by packfile. Use this permutation to visit
928 * each of the objects and only require 1 packfile to be open at a
929 * time.
931 ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
932 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
933 pairs[i].pos = i;
934 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
937 if (flags & MIDX_PROGRESS)
938 progress = start_sparse_progress(_("Sorting objects by packfile"),
939 m->num_objects);
940 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
941 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
942 stop_progress(&progress);
944 if (flags & MIDX_PROGRESS)
945 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
946 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
947 struct object_id oid;
948 struct pack_entry e;
949 off_t m_offset, p_offset;
951 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
952 nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
953 uint32_t pack_int_id = pairs[i-1].pack_int_id;
954 struct packed_git *p = nth_midxed_pack(m, pack_int_id);
956 close_pack_fd(p);
957 close_pack_index(p);
960 nth_midxed_object_oid(&oid, m, pairs[i].pos);
962 if (!fill_midx_entry(r, &oid, &e, m)) {
963 midx_report(_("failed to load pack entry for oid[%d] = %s"),
964 pairs[i].pos, oid_to_hex(&oid));
965 continue;
968 if (open_pack_index(e.p)) {
969 midx_report(_("failed to load pack-index for packfile %s"),
970 e.p->pack_name);
971 break;
974 m_offset = e.offset;
975 p_offset = find_pack_entry_one(oid.hash, e.p);
977 if (m_offset != p_offset)
978 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
979 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
981 midx_display_sparse_progress(progress, i + 1);
983 stop_progress(&progress);
985 cleanup:
986 free(pairs);
987 close_midx(m);
989 return verify_midx_error;