Sync with 'maint'
[git/gitster.git] / midx.c
blobe82d4f2e65496b3e26c6e21c42e274406111aa3b
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "config.h"
5 #include "dir.h"
6 #include "hex.h"
7 #include "packfile.h"
8 #include "object-file.h"
9 #include "hash-lookup.h"
10 #include "midx.h"
11 #include "progress.h"
12 #include "trace2.h"
13 #include "chunk-format.h"
14 #include "pack-bitmap.h"
15 #include "pack-revindex.h"
17 int midx_checksum_valid(struct multi_pack_index *m);
18 void clear_midx_files_ext(const char *object_dir, const char *ext,
19 const char *keep_hash);
20 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
21 char **keep_hashes,
22 uint32_t hashes_nr);
23 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
24 const char *idx_name);
26 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
28 return m->data + m->data_len - the_hash_algo->rawsz;
31 void get_midx_filename(struct strbuf *out, const char *object_dir)
33 get_midx_filename_ext(out, object_dir, NULL, NULL);
36 void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
37 const unsigned char *hash, const char *ext)
39 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
40 if (ext)
41 strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
44 static int midx_read_oid_fanout(const unsigned char *chunk_start,
45 size_t chunk_size, void *data)
47 int i;
48 struct multi_pack_index *m = data;
49 m->chunk_oid_fanout = (uint32_t *)chunk_start;
51 if (chunk_size != 4 * 256) {
52 error(_("multi-pack-index OID fanout is of the wrong size"));
53 return 1;
55 for (i = 0; i < 255; i++) {
56 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
57 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
59 if (oid_fanout1 > oid_fanout2) {
60 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
61 i, oid_fanout1, oid_fanout2, i + 1);
62 return 1;
65 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
66 return 0;
69 static int midx_read_oid_lookup(const unsigned char *chunk_start,
70 size_t chunk_size, void *data)
72 struct multi_pack_index *m = data;
73 m->chunk_oid_lookup = chunk_start;
75 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
76 error(_("multi-pack-index OID lookup chunk is the wrong size"));
77 return 1;
79 return 0;
82 static int midx_read_object_offsets(const unsigned char *chunk_start,
83 size_t chunk_size, void *data)
85 struct multi_pack_index *m = data;
86 m->chunk_object_offsets = chunk_start;
88 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
89 error(_("multi-pack-index object offset chunk is the wrong size"));
90 return 1;
92 return 0;
95 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
97 static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
98 const char *midx_name,
99 int local)
101 struct multi_pack_index *m = NULL;
102 int fd;
103 struct stat st;
104 size_t midx_size;
105 void *midx_map = NULL;
106 uint32_t hash_version;
107 uint32_t i;
108 const char *cur_pack_name;
109 struct chunkfile *cf = NULL;
111 fd = git_open(midx_name);
113 if (fd < 0)
114 goto cleanup_fail;
115 if (fstat(fd, &st)) {
116 error_errno(_("failed to read %s"), midx_name);
117 goto cleanup_fail;
120 midx_size = xsize_t(st.st_size);
122 if (midx_size < MIDX_MIN_SIZE) {
123 error(_("multi-pack-index file %s is too small"), midx_name);
124 goto cleanup_fail;
127 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
128 close(fd);
130 FLEX_ALLOC_STR(m, object_dir, object_dir);
131 m->data = midx_map;
132 m->data_len = midx_size;
133 m->local = local;
135 m->signature = get_be32(m->data);
136 if (m->signature != MIDX_SIGNATURE)
137 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
138 m->signature, MIDX_SIGNATURE);
140 m->version = m->data[MIDX_BYTE_FILE_VERSION];
141 if (m->version != MIDX_VERSION)
142 die(_("multi-pack-index version %d not recognized"),
143 m->version);
145 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
146 if (hash_version != oid_version(the_hash_algo)) {
147 error(_("multi-pack-index hash version %u does not match version %u"),
148 hash_version, oid_version(the_hash_algo));
149 goto cleanup_fail;
151 m->hash_len = the_hash_algo->rawsz;
153 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
155 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
157 m->preferred_pack_idx = -1;
159 cf = init_chunkfile(NULL);
161 if (read_table_of_contents(cf, m->data, midx_size,
162 MIDX_HEADER_SIZE, m->num_chunks,
163 MIDX_CHUNK_ALIGNMENT))
164 goto cleanup_fail;
166 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
167 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
168 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
169 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
170 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
171 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
172 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
173 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
175 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
176 &m->chunk_large_offsets_len);
177 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
178 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
179 (const unsigned char **)&m->chunk_bitmapped_packs,
180 &m->chunk_bitmapped_packs_len);
182 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
183 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
184 &m->chunk_revindex_len);
186 CALLOC_ARRAY(m->pack_names, m->num_packs);
187 CALLOC_ARRAY(m->packs, m->num_packs);
189 cur_pack_name = (const char *)m->chunk_pack_names;
190 for (i = 0; i < m->num_packs; i++) {
191 const char *end;
192 size_t avail = m->chunk_pack_names_len -
193 (cur_pack_name - (const char *)m->chunk_pack_names);
195 m->pack_names[i] = cur_pack_name;
197 end = memchr(cur_pack_name, '\0', avail);
198 if (!end)
199 die(_("multi-pack-index pack-name chunk is too short"));
200 cur_pack_name = end + 1;
202 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
203 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
204 m->pack_names[i - 1],
205 m->pack_names[i]);
208 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
209 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
211 free_chunkfile(cf);
212 return m;
214 cleanup_fail:
215 free(m);
216 free_chunkfile(cf);
217 if (midx_map)
218 munmap(midx_map, midx_size);
219 if (0 <= fd)
220 close(fd);
221 return NULL;
224 void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
226 strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
229 void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
231 get_midx_chain_dirname(buf, object_dir);
232 strbuf_addstr(buf, "/multi-pack-index-chain");
235 void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
236 const unsigned char *hash, const char *ext)
238 get_midx_chain_dirname(buf, object_dir);
239 strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
242 static int open_multi_pack_index_chain(const char *chain_file,
243 int *fd, struct stat *st)
245 *fd = git_open(chain_file);
246 if (*fd < 0)
247 return 0;
248 if (fstat(*fd, st)) {
249 close(*fd);
250 return 0;
252 if (st->st_size < the_hash_algo->hexsz) {
253 close(*fd);
254 if (!st->st_size) {
255 /* treat empty files the same as missing */
256 errno = ENOENT;
257 } else {
258 warning(_("multi-pack-index chain file too small"));
259 errno = EINVAL;
261 return 0;
263 return 1;
266 static int add_midx_to_chain(struct multi_pack_index *midx,
267 struct multi_pack_index *midx_chain)
269 if (midx_chain) {
270 if (unsigned_add_overflows(midx_chain->num_packs,
271 midx_chain->num_packs_in_base)) {
272 warning(_("pack count in base MIDX too high: %"PRIuMAX),
273 (uintmax_t)midx_chain->num_packs_in_base);
274 return 0;
276 if (unsigned_add_overflows(midx_chain->num_objects,
277 midx_chain->num_objects_in_base)) {
278 warning(_("object count in base MIDX too high: %"PRIuMAX),
279 (uintmax_t)midx_chain->num_objects_in_base);
280 return 0;
282 midx->num_packs_in_base = midx_chain->num_packs +
283 midx_chain->num_packs_in_base;
284 midx->num_objects_in_base = midx_chain->num_objects +
285 midx_chain->num_objects_in_base;
288 midx->base_midx = midx_chain;
289 midx->has_chain = 1;
291 return 1;
294 static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
295 int local,
296 int fd, struct stat *st,
297 int *incomplete_chain)
299 struct multi_pack_index *midx_chain = NULL;
300 struct strbuf buf = STRBUF_INIT;
301 int valid = 1;
302 uint32_t i, count;
303 FILE *fp = xfdopen(fd, "r");
305 count = st->st_size / (the_hash_algo->hexsz + 1);
307 for (i = 0; i < count; i++) {
308 struct multi_pack_index *m;
309 struct object_id layer;
311 if (strbuf_getline_lf(&buf, fp) == EOF)
312 break;
314 if (get_oid_hex(buf.buf, &layer)) {
315 warning(_("invalid multi-pack-index chain: line '%s' "
316 "not a hash"),
317 buf.buf);
318 valid = 0;
319 break;
322 valid = 0;
324 strbuf_reset(&buf);
325 get_split_midx_filename_ext(&buf, object_dir, layer.hash,
326 MIDX_EXT_MIDX);
327 m = load_multi_pack_index_one(object_dir, buf.buf, local);
329 if (m) {
330 if (add_midx_to_chain(m, midx_chain)) {
331 midx_chain = m;
332 valid = 1;
333 } else {
334 close_midx(m);
337 if (!valid) {
338 warning(_("unable to find all multi-pack index files"));
339 break;
343 fclose(fp);
344 strbuf_release(&buf);
346 *incomplete_chain = !valid;
347 return midx_chain;
350 static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
351 int local)
353 struct strbuf chain_file = STRBUF_INIT;
354 struct stat st;
355 int fd;
356 struct multi_pack_index *m = NULL;
358 get_midx_chain_filename(&chain_file, object_dir);
359 if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
360 int incomplete;
361 /* ownership of fd is taken over by load function */
362 m = load_midx_chain_fd_st(object_dir, local, fd, &st,
363 &incomplete);
366 strbuf_release(&chain_file);
367 return m;
370 struct multi_pack_index *load_multi_pack_index(const char *object_dir,
371 int local)
373 struct strbuf midx_name = STRBUF_INIT;
374 struct multi_pack_index *m;
376 get_midx_filename(&midx_name, object_dir);
378 m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
379 if (!m)
380 m = load_multi_pack_index_chain(object_dir, local);
382 strbuf_release(&midx_name);
384 return m;
387 void close_midx(struct multi_pack_index *m)
389 uint32_t i;
391 if (!m)
392 return;
394 close_midx(m->next);
395 close_midx(m->base_midx);
397 munmap((unsigned char *)m->data, m->data_len);
399 for (i = 0; i < m->num_packs; i++) {
400 if (m->packs[i])
401 m->packs[i]->multi_pack_index = 0;
403 FREE_AND_NULL(m->packs);
404 FREE_AND_NULL(m->pack_names);
405 free(m);
408 static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
410 struct multi_pack_index *m = *_m;
411 while (m && pos < m->num_objects_in_base)
412 m = m->base_midx;
414 if (!m)
415 BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
417 if (pos >= m->num_objects + m->num_objects_in_base)
418 die(_("invalid MIDX object position, MIDX is likely corrupt"));
420 *_m = m;
422 return pos - m->num_objects_in_base;
425 static uint32_t midx_for_pack(struct multi_pack_index **_m,
426 uint32_t pack_int_id)
428 struct multi_pack_index *m = *_m;
429 while (m && pack_int_id < m->num_packs_in_base)
430 m = m->base_midx;
432 if (!m)
433 BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
435 if (pack_int_id >= m->num_packs + m->num_packs_in_base)
436 die(_("bad pack-int-id: %u (%u total packs)"),
437 pack_int_id, m->num_packs + m->num_packs_in_base);
439 *_m = m;
441 return pack_int_id - m->num_packs_in_base;
444 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
445 uint32_t pack_int_id)
447 struct strbuf pack_name = STRBUF_INIT;
448 struct strbuf key = STRBUF_INIT;
449 struct packed_git *p;
451 pack_int_id = midx_for_pack(&m, pack_int_id);
453 if (m->packs[pack_int_id])
454 return 0;
456 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
457 m->pack_names[pack_int_id]);
459 /* pack_map holds the ".pack" name, but we have the .idx */
460 strbuf_addbuf(&key, &pack_name);
461 strbuf_strip_suffix(&key, ".idx");
462 strbuf_addstr(&key, ".pack");
463 p = hashmap_get_entry_from_hash(&r->objects->pack_map,
464 strhash(key.buf), key.buf,
465 struct packed_git, packmap_ent);
466 if (!p) {
467 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
468 if (p) {
469 install_packed_git(r, p);
470 list_add_tail(&p->mru, &r->objects->packed_git_mru);
474 strbuf_release(&pack_name);
475 strbuf_release(&key);
477 if (!p)
478 return 1;
480 p->multi_pack_index = 1;
481 m->packs[pack_int_id] = p;
483 return 0;
486 struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
487 uint32_t pack_int_id)
489 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
490 return m->packs[local_pack_int_id];
493 #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
495 int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
496 struct bitmapped_pack *bp, uint32_t pack_int_id)
498 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
500 if (!m->chunk_bitmapped_packs)
501 return error(_("MIDX does not contain the BTMP chunk"));
503 if (prepare_midx_pack(r, m, pack_int_id))
504 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
506 bp->p = m->packs[local_pack_int_id];
507 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
508 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
509 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
510 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
511 sizeof(uint32_t));
512 bp->pack_int_id = pack_int_id;
513 bp->from_midx = m;
515 return 0;
518 int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
519 uint32_t *result)
521 int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
522 m->chunk_oid_lookup, the_hash_algo->rawsz,
523 result);
524 if (result)
525 *result += m->num_objects_in_base;
526 return ret;
529 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
530 uint32_t *result)
532 for (; m; m = m->base_midx)
533 if (bsearch_one_midx(oid, m, result))
534 return 1;
535 return 0;
538 int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
540 return bsearch_midx(oid, m, NULL);
543 struct object_id *nth_midxed_object_oid(struct object_id *oid,
544 struct multi_pack_index *m,
545 uint32_t n)
547 if (n >= m->num_objects + m->num_objects_in_base)
548 return NULL;
550 n = midx_for_object(&m, n);
552 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
553 the_repository->hash_algo);
554 return oid;
557 off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
559 const unsigned char *offset_data;
560 uint32_t offset32;
562 pos = midx_for_object(&m, pos);
564 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
565 offset32 = get_be32(offset_data + sizeof(uint32_t));
567 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
568 if (sizeof(off_t) < sizeof(uint64_t))
569 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
571 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
572 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
573 die(_("multi-pack-index large offset out of bounds"));
574 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
577 return offset32;
580 uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
582 pos = midx_for_object(&m, pos);
584 return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
585 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
588 int fill_midx_entry(struct repository *r,
589 const struct object_id *oid,
590 struct pack_entry *e,
591 struct multi_pack_index *m)
593 uint32_t pos;
594 uint32_t pack_int_id;
595 struct packed_git *p;
597 if (!bsearch_midx(oid, m, &pos))
598 return 0;
600 midx_for_object(&m, pos);
601 pack_int_id = nth_midxed_pack_int_id(m, pos);
603 if (prepare_midx_pack(r, m, pack_int_id))
604 return 0;
605 p = m->packs[pack_int_id - m->num_packs_in_base];
608 * We are about to tell the caller where they can locate the
609 * requested object. We better make sure the packfile is
610 * still here and can be accessed before supplying that
611 * answer, as it may have been deleted since the MIDX was
612 * loaded!
614 if (!is_pack_valid(p))
615 return 0;
617 if (oidset_size(&p->bad_objects) &&
618 oidset_contains(&p->bad_objects, oid))
619 return 0;
621 e->offset = nth_midxed_offset(m, pos);
622 e->p = p;
624 return 1;
627 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
628 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
629 const char *idx_name)
631 /* Skip past any initial matching prefix. */
632 while (*idx_name && *idx_name == *idx_or_pack_name) {
633 idx_name++;
634 idx_or_pack_name++;
638 * If we didn't match completely, we may have matched "pack-1234." and
639 * be left with "idx" and "pack" respectively, which is also OK. We do
640 * not have to check for "idx" and "idx", because that would have been
641 * a complete match (and in that case these strcmps will be false, but
642 * we'll correctly return 0 from the final strcmp() below.
644 * Technically this matches "fooidx" and "foopack", but we'd never have
645 * such names in the first place.
647 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
648 return 0;
651 * This not only checks for a complete match, but also orders based on
652 * the first non-identical character, which means our ordering will
653 * match a raw strcmp(). That makes it OK to use this to binary search
654 * a naively-sorted list.
656 return strcmp(idx_or_pack_name, idx_name);
659 static int midx_contains_pack_1(struct multi_pack_index *m,
660 const char *idx_or_pack_name)
662 uint32_t first = 0, last = m->num_packs;
664 while (first < last) {
665 uint32_t mid = first + (last - first) / 2;
666 const char *current;
667 int cmp;
669 current = m->pack_names[mid];
670 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
671 if (!cmp)
672 return 1;
673 if (cmp > 0) {
674 first = mid + 1;
675 continue;
677 last = mid;
680 return 0;
683 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
685 for (; m; m = m->base_midx)
686 if (midx_contains_pack_1(m, idx_or_pack_name))
687 return 1;
688 return 0;
691 int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
693 if (m->preferred_pack_idx == -1) {
694 uint32_t midx_pos;
695 if (load_midx_revindex(m) < 0) {
696 m->preferred_pack_idx = -2;
697 return -1;
700 midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
702 m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
704 } else if (m->preferred_pack_idx == -2)
705 return -1; /* no revindex */
707 *pack_int_id = m->preferred_pack_idx;
708 return 0;
711 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
713 struct multi_pack_index *m;
714 struct multi_pack_index *m_search;
716 prepare_repo_settings(r);
717 if (!r->settings.core_multi_pack_index)
718 return 0;
720 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
721 if (!strcmp(object_dir, m_search->object_dir))
722 return 1;
724 m = load_multi_pack_index(object_dir, local);
726 if (m) {
727 struct multi_pack_index *mp = r->objects->multi_pack_index;
728 if (mp) {
729 m->next = mp->next;
730 mp->next = m;
731 } else
732 r->objects->multi_pack_index = m;
733 return 1;
736 return 0;
739 int midx_checksum_valid(struct multi_pack_index *m)
741 return hashfile_checksum_valid(m->data, m->data_len);
744 struct clear_midx_data {
745 char **keep;
746 uint32_t keep_nr;
747 const char *ext;
750 static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
751 const char *file_name, void *_data)
753 struct clear_midx_data *data = _data;
754 uint32_t i;
756 if (!(starts_with(file_name, "multi-pack-index-") &&
757 ends_with(file_name, data->ext)))
758 return;
759 for (i = 0; i < data->keep_nr; i++) {
760 if (!strcmp(data->keep[i], file_name))
761 return;
763 if (unlink(full_path))
764 die_errno(_("failed to remove %s"), full_path);
767 void clear_midx_files_ext(const char *object_dir, const char *ext,
768 const char *keep_hash)
770 struct clear_midx_data data;
771 memset(&data, 0, sizeof(struct clear_midx_data));
773 if (keep_hash) {
774 ALLOC_ARRAY(data.keep, 1);
776 data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
777 data.keep_nr = 1;
779 data.ext = ext;
781 for_each_file_in_pack_dir(object_dir,
782 clear_midx_file_ext,
783 &data);
785 if (keep_hash)
786 free(data.keep[0]);
787 free(data.keep);
790 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
791 char **keep_hashes,
792 uint32_t hashes_nr)
794 struct clear_midx_data data;
795 uint32_t i;
797 memset(&data, 0, sizeof(struct clear_midx_data));
799 ALLOC_ARRAY(data.keep, hashes_nr);
800 for (i = 0; i < hashes_nr; i++)
801 data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
802 ext);
803 data.keep_nr = hashes_nr;
804 data.ext = ext;
806 for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d",
807 clear_midx_file_ext, &data);
809 for (i = 0; i < hashes_nr; i++)
810 free(data.keep[i]);
811 free(data.keep);
814 void clear_midx_file(struct repository *r)
816 struct strbuf midx = STRBUF_INIT;
818 get_midx_filename(&midx, r->objects->odb->path);
820 if (r->objects && r->objects->multi_pack_index) {
821 close_midx(r->objects->multi_pack_index);
822 r->objects->multi_pack_index = NULL;
825 if (remove_path(midx.buf))
826 die(_("failed to clear multi-pack-index at %s"), midx.buf);
828 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_BITMAP, NULL);
829 clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_REV, NULL);
831 strbuf_release(&midx);
834 static int verify_midx_error;
836 __attribute__((format (printf, 1, 2)))
837 static void midx_report(const char *fmt, ...)
839 va_list ap;
840 verify_midx_error = 1;
841 va_start(ap, fmt);
842 vfprintf(stderr, fmt, ap);
843 fprintf(stderr, "\n");
844 va_end(ap);
847 struct pair_pos_vs_id
849 uint32_t pos;
850 uint32_t pack_int_id;
853 static int compare_pair_pos_vs_id(const void *_a, const void *_b)
855 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
856 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
858 return b->pack_int_id - a->pack_int_id;
862 * Limit calls to display_progress() for performance reasons.
863 * The interval here was arbitrarily chosen.
865 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
866 #define midx_display_sparse_progress(progress, n) \
867 do { \
868 uint64_t _n = (n); \
869 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
870 display_progress(progress, _n); \
871 } while (0)
873 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
875 struct pair_pos_vs_id *pairs = NULL;
876 uint32_t i;
877 struct progress *progress = NULL;
878 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
879 struct multi_pack_index *curr;
880 verify_midx_error = 0;
882 if (!m) {
883 int result = 0;
884 struct stat sb;
885 struct strbuf filename = STRBUF_INIT;
887 get_midx_filename(&filename, object_dir);
889 if (!stat(filename.buf, &sb)) {
890 error(_("multi-pack-index file exists, but failed to parse"));
891 result = 1;
893 strbuf_release(&filename);
894 return result;
897 if (!midx_checksum_valid(m))
898 midx_report(_("incorrect checksum"));
900 if (flags & MIDX_PROGRESS)
901 progress = start_delayed_progress(_("Looking for referenced packfiles"),
902 m->num_packs + m->num_packs_in_base);
903 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
904 if (prepare_midx_pack(r, m, i))
905 midx_report("failed to load pack in position %d", i);
907 display_progress(progress, i + 1);
909 stop_progress(&progress);
911 if (m->num_objects == 0) {
912 midx_report(_("the midx contains no oid"));
914 * Remaining tests assume that we have objects, so we can
915 * return here.
917 goto cleanup;
920 if (flags & MIDX_PROGRESS)
921 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
922 m->num_objects - 1);
924 for (curr = m; curr; curr = curr->base_midx) {
925 for (i = 0; i < m->num_objects - 1; i++) {
926 struct object_id oid1, oid2;
928 nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
929 nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
931 if (oidcmp(&oid1, &oid2) >= 0)
932 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
933 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
935 midx_display_sparse_progress(progress, i + 1);
938 stop_progress(&progress);
941 * Create an array mapping each object to its packfile id. Sort it
942 * to group the objects by packfile. Use this permutation to visit
943 * each of the objects and only require 1 packfile to be open at a
944 * time.
946 ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
947 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
948 pairs[i].pos = i;
949 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
952 if (flags & MIDX_PROGRESS)
953 progress = start_sparse_progress(_("Sorting objects by packfile"),
954 m->num_objects);
955 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
956 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
957 stop_progress(&progress);
959 if (flags & MIDX_PROGRESS)
960 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
961 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
962 struct object_id oid;
963 struct pack_entry e;
964 off_t m_offset, p_offset;
966 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
967 nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
968 uint32_t pack_int_id = pairs[i-1].pack_int_id;
969 struct packed_git *p = nth_midxed_pack(m, pack_int_id);
971 close_pack_fd(p);
972 close_pack_index(p);
975 nth_midxed_object_oid(&oid, m, pairs[i].pos);
977 if (!fill_midx_entry(r, &oid, &e, m)) {
978 midx_report(_("failed to load pack entry for oid[%d] = %s"),
979 pairs[i].pos, oid_to_hex(&oid));
980 continue;
983 if (open_pack_index(e.p)) {
984 midx_report(_("failed to load pack-index for packfile %s"),
985 e.p->pack_name);
986 break;
989 m_offset = e.offset;
990 p_offset = find_pack_entry_one(&oid, e.p);
992 if (m_offset != p_offset)
993 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
994 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
996 midx_display_sparse_progress(progress, i + 1);
998 stop_progress(&progress);
1000 cleanup:
1001 free(pairs);
1002 close_midx(m);
1004 return verify_midx_error;