2 * Copyright (c) 2019 Ori Bernstein <ori@openbsd.org>
3 * Copyright (c) 2020, 2022 Stefan Sperling <stsp@openbsd.org>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include "got_compat.h"
20 #include <sys/queue.h>
23 #include <sys/types.h>
43 #include "got_error.h"
44 #include "got_object.h"
46 #include "got_lib_hash.h"
47 #include "got_lib_delta.h"
48 #include "got_lib_inflate.h"
49 #include "got_lib_object.h"
50 #include "got_lib_object_parse.h"
51 #include "got_lib_object_idset.h"
52 #include "got_lib_privsep.h"
53 #include "got_lib_pack.h"
54 #include "got_lib_ratelimit.h"
55 #include "got_lib_pack_index.h"
56 #include "got_lib_delta_cache.h"
58 struct got_indexed_object
{
59 struct got_object_id id
;
62 * Has this object been fully resolved?
63 * If so, we know its ID, otherwise we don't and 'id' is invalid.
67 /* Offset of type+size field for this object in pack file. */
70 /* Type+size values parsed from pack file. */
74 /* Length of on-disk type+size data. */
77 /* Length of object data following type+size. */
85 struct got_object_id ref_id
;
88 /* For offset deltas. */
90 size_t base_offsetlen
;
96 putbe32(char *b
, uint32_t n
)
104 static const struct got_error
*
105 read_checksum(uint32_t *crc
, struct got_hash
*ctx
, int fd
, size_t len
)
111 for (n
= len
; n
> 0; n
-= r
){
112 r
= read(fd
, buf
, n
> sizeof(buf
) ? sizeof(buf
) : n
);
114 return got_error_from_errno("read");
118 *crc
= crc32(*crc
, buf
, r
);
120 got_hash_update(ctx
, buf
, r
);
126 static const struct got_error
*
127 read_file_digest(struct got_hash
*ctx
, FILE *f
, size_t len
)
132 for (n
= len
; n
> 0; n
-= r
) {
133 r
= fread(buf
, 1, n
> sizeof(buf
) ? sizeof(buf
) : n
, f
);
137 return got_ferror(f
, GOT_ERR_IO
);
139 got_hash_update(ctx
, buf
, r
);
145 static const struct got_error
*
146 read_packed_object(struct got_pack
*pack
, struct got_indexed_object
*obj
,
147 FILE *tmpfile
, struct got_hash
*pack_hash_ctx
)
149 const struct got_error
*err
= NULL
;
151 uint8_t *data
= NULL
;
156 const char *obj_label
;
157 size_t mapoff
= obj
->off
;
158 struct got_inflate_checksum csum
;
161 memset(&csum
, 0, sizeof(csum
));
162 csum
.input_ctx
= pack_hash_ctx
;
163 csum
.input_crc
= &obj
->crc
;
165 digest_len
= got_hash_digest_length(pack
->algo
);
167 err
= got_pack_parse_object_type_and_size(&obj
->type
, &obj
->size
,
168 &obj
->tslen
, pack
, obj
->off
);
173 obj
->crc
= crc32(obj
->crc
, pack
->map
+ mapoff
, obj
->tslen
);
174 got_hash_update(pack_hash_ctx
, pack
->map
+ mapoff
, obj
->tslen
);
175 mapoff
+= obj
->tslen
;
177 /* XXX Seek back and get the CRC of on-disk type+size bytes. */
178 if (lseek(pack
->fd
, obj
->off
, SEEK_SET
) == -1)
179 return got_error_from_errno("lseek");
180 err
= read_checksum(&obj
->crc
, pack_hash_ctx
,
181 pack
->fd
, obj
->tslen
);
187 case GOT_OBJ_TYPE_BLOB
:
188 case GOT_OBJ_TYPE_COMMIT
:
189 case GOT_OBJ_TYPE_TREE
:
190 case GOT_OBJ_TYPE_TAG
:
191 if (obj
->size
> GOT_DELTA_RESULT_SIZE_CACHED_MAX
) {
192 if (fseek(tmpfile
, 0L, SEEK_SET
) == -1) {
193 err
= got_error_from_errno("fseek");
197 err
= got_inflate_to_file_mmap(&datalen
,
198 &obj
->len
, &csum
, pack
->map
, mapoff
,
199 pack
->filesize
- mapoff
, tmpfile
);
201 err
= got_inflate_to_file_fd(&datalen
,
202 &obj
->len
, &csum
, pack
->fd
, tmpfile
);
206 err
= got_inflate_to_mem_mmap(&data
, &datalen
,
207 &obj
->len
, &csum
, pack
->map
, mapoff
,
208 pack
->filesize
- mapoff
);
210 err
= got_inflate_to_mem_fd(&data
, &datalen
,
211 &obj
->len
, &csum
, obj
->size
, pack
->fd
);
216 got_hash_init(&ctx
, pack
->algo
);
217 err
= got_object_type_label(&obj_label
, obj
->type
);
222 if (asprintf(&header
, "%s %lld", obj_label
,
223 (long long)obj
->size
) == -1) {
224 err
= got_error_from_errno("asprintf");
228 headerlen
= strlen(header
) + 1;
229 got_hash_update(&ctx
, header
, headerlen
);
230 if (obj
->size
> GOT_DELTA_RESULT_SIZE_CACHED_MAX
) {
231 err
= read_file_digest(&ctx
, tmpfile
, datalen
);
238 got_hash_update(&ctx
, data
, datalen
);
239 got_hash_final_object_id(&ctx
, &obj
->id
);
243 case GOT_OBJ_TYPE_REF_DELTA
:
244 memset(obj
->id
.hash
, 0xff, digest_len
);
245 obj
->id
.algo
= pack
->algo
;
247 if (mapoff
+ digest_len
>= pack
->filesize
) {
248 err
= got_error(GOT_ERR_BAD_PACKFILE
);
251 memcpy(obj
->delta
.ref
.ref_id
.hash
, pack
->map
+ mapoff
,
253 obj
->crc
= crc32(obj
->crc
, pack
->map
+ mapoff
,
255 got_hash_update(pack_hash_ctx
, pack
->map
+ mapoff
,
257 mapoff
+= digest_len
;
258 err
= got_inflate_to_mem_mmap(NULL
, &datalen
,
259 &obj
->len
, &csum
, pack
->map
, mapoff
,
260 pack
->filesize
- mapoff
);
264 n
= read(pack
->fd
, obj
->delta
.ref
.ref_id
.hash
,
267 err
= got_error_from_errno("read");
270 if (n
< digest_len
) {
271 err
= got_error(GOT_ERR_BAD_PACKFILE
);
274 obj
->crc
= crc32(obj
->crc
, obj
->delta
.ref
.ref_id
.hash
,
276 got_hash_update(pack_hash_ctx
,
277 obj
->delta
.ref
.ref_id
.hash
, digest_len
);
278 err
= got_inflate_to_mem_fd(NULL
, &datalen
, &obj
->len
,
279 &csum
, obj
->size
, pack
->fd
);
283 obj
->len
+= digest_len
;
285 case GOT_OBJ_TYPE_OFFSET_DELTA
:
286 memset(obj
->id
.hash
, 0xff, digest_len
);
287 obj
->id
.algo
= pack
->algo
;
288 err
= got_pack_parse_offset_delta(&obj
->delta
.ofs
.base_offset
,
289 &obj
->delta
.ofs
.base_offsetlen
, pack
, obj
->off
,
295 if (mapoff
+ obj
->delta
.ofs
.base_offsetlen
>=
297 err
= got_error(GOT_ERR_BAD_PACKFILE
);
301 if (mapoff
+ obj
->delta
.ofs
.base_offsetlen
>
303 err
= got_error_fmt(GOT_ERR_RANGE
,
304 "mapoff %lld would overflow size_t",
306 + obj
->delta
.ofs
.base_offsetlen
);
310 obj
->crc
= crc32(obj
->crc
, pack
->map
+ mapoff
,
311 obj
->delta
.ofs
.base_offsetlen
);
312 got_hash_update(pack_hash_ctx
, pack
->map
+ mapoff
,
313 obj
->delta
.ofs
.base_offsetlen
);
314 mapoff
+= obj
->delta
.ofs
.base_offsetlen
;
315 err
= got_inflate_to_mem_mmap(NULL
, &datalen
,
316 &obj
->len
, &csum
, pack
->map
, mapoff
,
317 pack
->filesize
- mapoff
);
322 * XXX Seek back and get CRC and hash digest
323 * of on-disk offset bytes.
325 if (lseek(pack
->fd
, obj
->off
+ obj
->tslen
, SEEK_SET
)
327 err
= got_error_from_errno("lseek");
330 err
= read_checksum(&obj
->crc
, pack_hash_ctx
,
331 pack
->fd
, obj
->delta
.ofs
.base_offsetlen
);
335 err
= got_inflate_to_mem_fd(NULL
, &datalen
, &obj
->len
,
336 &csum
, obj
->size
, pack
->fd
);
340 obj
->len
+= obj
->delta
.ofs
.base_offsetlen
;
343 err
= got_error(GOT_ERR_OBJ_TYPE
);
350 const struct got_error
*
351 got_pack_hwrite(int fd
, void *buf
, int len
, struct got_hash
*ctx
)
355 got_hash_update(ctx
, buf
, len
);
357 w
= write(fd
, buf
, len
);
359 return got_error_from_errno("write");
361 return got_error(GOT_ERR_IO
);
366 static const struct got_error
*
367 resolve_deltified_object(struct got_pack
*pack
, struct got_packidx
*packidx
,
368 struct got_indexed_object
*obj
, FILE *tmpfile
, FILE *delta_base_file
,
369 FILE *delta_accum_file
)
371 const struct got_error
*err
= NULL
;
372 struct got_delta_chain deltas
;
373 struct got_delta
*delta
;
381 const char *obj_label
;
384 STAILQ_INIT(&deltas
.entries
);
386 err
= got_pack_resolve_delta_chain(&deltas
, packidx
, pack
,
387 obj
->off
, obj
->tslen
, obj
->type
, obj
->size
,
388 GOT_DELTA_CHAIN_RECURSION_MAX
);
392 err
= got_pack_get_delta_chain_max_size(&max_size
, &deltas
, pack
);
395 if (max_size
> GOT_DELTA_RESULT_SIZE_CACHED_MAX
) {
397 rewind(delta_base_file
);
398 rewind(delta_accum_file
);
399 err
= got_pack_dump_delta_chain_to_file(&len
, &deltas
,
400 pack
, tmpfile
, delta_base_file
, delta_accum_file
);
404 err
= got_pack_dump_delta_chain_to_mem(&buf
, &len
,
410 err
= got_delta_chain_get_base_type(&base_obj_type
, &deltas
);
413 err
= got_object_type_label(&obj_label
, base_obj_type
);
416 if (asprintf(&header
, "%s %zd", obj_label
, len
) == -1) {
417 err
= got_error_from_errno("asprintf");
420 headerlen
= strlen(header
) + 1;
421 got_hash_init(&ctx
, pack
->algo
);
422 got_hash_update(&ctx
, header
, headerlen
);
423 if (max_size
> GOT_DELTA_RESULT_SIZE_CACHED_MAX
) {
424 err
= read_file_digest(&ctx
, tmpfile
, len
);
428 got_hash_update(&ctx
, buf
, len
);
429 got_hash_final_object_id(&ctx
, &obj
->id
);
433 while (!STAILQ_EMPTY(&deltas
.entries
)) {
434 delta
= STAILQ_FIRST(&deltas
.entries
);
435 STAILQ_REMOVE_HEAD(&deltas
.entries
, entry
);
441 /* Determine the slot in the pack index a given object ID should use. */
443 find_object_idx(struct got_packidx
*packidx
, uint8_t *hash
)
445 u_int8_t id0
= hash
[0];
446 uint32_t nindexed
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
447 int left
= 0, right
= nindexed
- 1;
449 size_t digest_len
= got_hash_digest_length(packidx
->algo
);
452 left
= be32toh(packidx
->hdr
.fanout_table
[id0
- 1]);
454 while (left
<= right
) {
457 i
= ((left
+ right
) / 2);
458 oid
= packidx
->hdr
.sorted_ids
+ i
* digest_len
;
460 cmp
= memcmp(hash
, oid
, digest_len
);
462 return -1; /* object already indexed */
474 print_packidx(struct got_packidx
*packidx
)
476 uint32_t nindexed
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
477 size_t digest_len
= got_hash_digest_length(packidx
->algo
);
480 fprintf(stderr
, "object IDs:\n");
481 for (i
= 0; i
< nindexed
; i
++) {
482 char hex
[GOT_HASH_DIGEST_STRING_MAXLEN
];
483 got_hash_digest_to_str(packidx
->hdr
.sorted_ids
+ i
* digest_len
,
484 hex
, sizeof(hex
), packidx
->algo
);
485 fprintf(stderr
, "%s\n", hex
);
487 fprintf(stderr
, "\n");
489 fprintf(stderr
, "object offsets:\n");
490 for (i
= 0; i
< nindexed
; i
++) {
491 uint32_t offset
= be32toh(packidx
->hdr
.offsets
[i
]);
492 if (offset
& GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX
) {
493 int j
= offset
& GOT_PACKIDX_OFFSET_VAL_MASK
;
494 fprintf(stderr
, "%u -> %llu\n", offset
,
495 be64toh(packidx
->hdr
.large_offsets
[j
]));
497 fprintf(stderr
, "%u\n", offset
);
499 fprintf(stderr
, "\n");
501 fprintf(stderr
, "fanout table:");
502 for (i
= 0; i
<= 0xff; i
++)
503 fprintf(stderr
, " %u", be32toh(packidx
->hdr
.fanout_table
[i
]));
504 fprintf(stderr
, "\n");
509 add_indexed_object(struct got_packidx
*packidx
, uint32_t idx
,
510 struct got_indexed_object
*obj
)
514 size_t digest_len
= got_hash_digest_length(packidx
->algo
);
516 oid
= packidx
->hdr
.sorted_ids
+ idx
* digest_len
;
517 memcpy(oid
, obj
->id
.hash
, digest_len
);
518 obj
->id
.algo
= packidx
->algo
;
519 packidx
->hdr
.crc32
[idx
] = htobe32(obj
->crc
);
520 if (obj
->off
< GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX
)
521 packidx
->hdr
.offsets
[idx
] = htobe32(obj
->off
);
523 packidx
->hdr
.offsets
[idx
] = htobe32(packidx
->nlargeobj
|
524 GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX
);
525 packidx
->hdr
.large_offsets
[packidx
->nlargeobj
] =
527 packidx
->nlargeobj
++;
530 for (i
= obj
->id
.hash
[0]; i
<= 0xff; i
++) {
531 uint32_t n
= be32toh(packidx
->hdr
.fanout_table
[i
]);
532 packidx
->hdr
.fanout_table
[i
] = htobe32(n
+ 1);
537 indexed_obj_cmp(const void *pa
, const void *pb
)
539 struct got_indexed_object
*a
, *b
;
541 a
= (struct got_indexed_object
*)pa
;
542 b
= (struct got_indexed_object
*)pb
;
543 return got_object_id_cmp(&a
->id
, &b
->id
);
547 make_packidx(struct got_packidx
*packidx
, uint32_t nobj
,
548 struct got_indexed_object
*objects
)
550 struct got_indexed_object
*obj
;
554 qsort(objects
, nobj
, sizeof(struct got_indexed_object
),
557 memset(packidx
->hdr
.fanout_table
, 0,
558 GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS
* sizeof(uint32_t));
559 packidx
->nlargeobj
= 0;
561 for (i
= 0; i
< nobj
; i
++) {
564 add_indexed_object(packidx
, idx
++, obj
);
569 update_packidx(struct got_packidx
*packidx
, uint32_t nobj
,
570 struct got_indexed_object
*obj
)
573 uint32_t nindexed
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
574 size_t digest_len
= got_hash_digest_length(packidx
->algo
);
577 idx
= find_object_idx(packidx
, obj
->id
.hash
);
579 return; /* object already indexed */
581 from
= packidx
->hdr
.sorted_ids
+ idx
* digest_len
;
582 to
= from
+ digest_len
;
583 memmove(to
, from
, digest_len
* (nindexed
- idx
));
584 memmove(&packidx
->hdr
.offsets
[idx
+ 1], &packidx
->hdr
.offsets
[idx
],
585 sizeof(uint32_t) * (nindexed
- idx
));
587 add_indexed_object(packidx
, idx
, obj
);
590 static const struct got_error
*
591 report_progress(uint32_t nobj_total
, uint32_t nobj_indexed
, uint32_t nobj_loose
,
592 uint32_t nobj_resolved
, struct got_ratelimit
*rl
,
593 got_pack_index_progress_cb progress_cb
, void *progress_arg
)
595 const struct got_error
*err
;
599 err
= got_ratelimit_check(&elapsed
, rl
);
604 return progress_cb(progress_arg
, nobj_total
, nobj_indexed
, nobj_loose
,
608 const struct got_error
*
609 got_pack_index(struct got_pack
*pack
, int idxfd
, FILE *tmpfile
,
610 FILE *delta_base_file
, FILE *delta_accum_file
,
611 struct got_object_id
*pack_hash_expected
,
612 got_pack_index_progress_cb progress_cb
, void *progress_arg
,
613 struct got_ratelimit
*rl
)
615 const struct got_error
*err
;
616 struct got_packfile_hdr hdr
;
617 struct got_packidx packidx
;
619 struct got_object_id pack_hash
;
620 uint32_t nobj
, nvalid
, nloose
, nresolved
= 0, i
;
621 struct got_indexed_object
*objects
= NULL
, *obj
;
623 uint8_t packidx_hash
[GOT_HASH_DIGEST_MAXLEN
];
625 int pass
, have_ref_deltas
= 0, first_delta_idx
= -1;
627 int p_indexed
= 0, last_p_indexed
= -1;
628 int p_resolved
= 0, last_p_resolved
= -1;
631 /* This has to be signed for lseek(2) later */
632 digest_len
= got_hash_digest_length(pack
->algo
);
634 /* Require that pack file header and hash trailer are present. */
635 if (pack
->filesize
< sizeof(hdr
) + digest_len
)
636 return got_error_msg(GOT_ERR_BAD_PACKFILE
,
640 memcpy(&hdr
, pack
->map
, sizeof(hdr
));
641 mapoff
+= sizeof(hdr
);
643 r
= read(pack
->fd
, &hdr
, sizeof(hdr
));
645 return got_error_from_errno("read");
647 return got_error_msg(GOT_ERR_BAD_PACKFILE
,
651 if (hdr
.signature
!= htobe32(GOT_PACKFILE_SIGNATURE
))
652 return got_error_msg(GOT_ERR_BAD_PACKFILE
,
653 "bad packfile signature");
654 if (hdr
.version
!= htobe32(GOT_PACKFILE_VERSION
))
655 return got_error_msg(GOT_ERR_BAD_PACKFILE
,
656 "bad packfile version");
657 nobj
= be32toh(hdr
.nobjects
);
659 return got_error_msg(GOT_ERR_BAD_PACKFILE
,
660 "bad packfile with zero objects");
662 /* We compute the hash of pack file contents and verify later on. */
663 got_hash_init(&ctx
, pack
->algo
);
664 got_hash_update(&ctx
, &hdr
, sizeof(hdr
));
667 * Create an in-memory pack index which will grow as objects
668 * IDs in the pack file are discovered. Only fields used to
669 * read deltified objects will be needed by the pack.c library
670 * code, so setting up just a pack index header is sufficient.
672 memset(&packidx
, 0, sizeof(packidx
));
673 packidx
.hdr
.magic
= malloc(sizeof(uint32_t));
674 if (packidx
.hdr
.magic
== NULL
)
675 return got_error_from_errno("malloc");
676 *packidx
.hdr
.magic
= htobe32(GOT_PACKIDX_V2_MAGIC
);
677 packidx
.hdr
.version
= malloc(sizeof(uint32_t));
678 if (packidx
.hdr
.version
== NULL
) {
679 err
= got_error_from_errno("malloc");
682 *packidx
.hdr
.version
= htobe32(GOT_PACKIDX_VERSION
);
683 packidx
.hdr
.fanout_table
= calloc(GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS
,
685 if (packidx
.hdr
.fanout_table
== NULL
) {
686 err
= got_error_from_errno("calloc");
689 packidx
.hdr
.sorted_ids
= calloc(nobj
, digest_len
);
690 if (packidx
.hdr
.sorted_ids
== NULL
) {
691 err
= got_error_from_errno("calloc");
694 packidx
.hdr
.crc32
= calloc(nobj
, sizeof(uint32_t));
695 if (packidx
.hdr
.crc32
== NULL
) {
696 err
= got_error_from_errno("calloc");
699 packidx
.hdr
.offsets
= calloc(nobj
, sizeof(uint32_t));
700 if (packidx
.hdr
.offsets
== NULL
) {
701 err
= got_error_from_errno("calloc");
704 packidx
.algo
= pack
->algo
;
705 /* Large offsets table is empty for pack files < 2 GB. */
706 if (pack
->filesize
>= GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX
) {
707 packidx
.hdr
.large_offsets
= calloc(nobj
, sizeof(uint64_t));
708 if (packidx
.hdr
.large_offsets
== NULL
) {
709 err
= got_error_from_errno("calloc");
716 objects
= calloc(nobj
, sizeof(struct got_indexed_object
));
718 return got_error_from_errno("calloc");
721 * First pass: locate all objects and identify un-deltified objects.
723 * When this pass has completed we will know offset, type, size, and
724 * CRC information for all objects in this pack file. We won't know
725 * any of the actual object IDs of deltified objects yet since we
726 * will not yet attempt to combine deltas.
729 for (i
= 0; i
< nobj
; i
++) {
730 /* Don't send too many progress privsep messages. */
731 p_indexed
= ((i
+ 1) * 100) / nobj
;
732 if (p_indexed
!= last_p_indexed
) {
733 err
= report_progress(nobj
, i
+ 1, nloose
, 0,
734 rl
, progress_cb
, progress_arg
);
737 last_p_indexed
= p_indexed
;
741 obj
->crc
= crc32(0L, NULL
, 0);
743 /* Store offset to type+size information for this object. */
747 obj
->off
= lseek(pack
->fd
, 0, SEEK_CUR
);
748 if (obj
->off
== -1) {
749 err
= got_error_from_errno("lseek");
754 err
= read_packed_object(pack
, obj
, tmpfile
, &ctx
);
759 mapoff
+= obj
->tslen
+ obj
->len
;
761 if (lseek(pack
->fd
, obj
->off
+ obj
->tslen
+ obj
->len
,
763 err
= got_error_from_errno("lseek");
768 if (obj
->type
== GOT_OBJ_TYPE_BLOB
||
769 obj
->type
== GOT_OBJ_TYPE_TREE
||
770 obj
->type
== GOT_OBJ_TYPE_COMMIT
||
771 obj
->type
== GOT_OBJ_TYPE_TAG
) {
775 if (first_delta_idx
== -1)
777 if (obj
->type
== GOT_OBJ_TYPE_REF_DELTA
)
784 * Having done a full pass over the pack file and can now
785 * verify its checksum.
787 got_hash_final_object_id(&ctx
, &pack_hash
);
789 if (got_object_id_cmp(pack_hash_expected
, &pack_hash
) != 0) {
790 err
= got_error(GOT_ERR_PACKFILE_CSUM
);
794 /* Verify the hash checksum stored at the end of the pack file. */
796 if (pack
->filesize
> SIZE_MAX
) {
797 err
= got_error_fmt(GOT_ERR_RANGE
,
798 "filesize %lld overflows size_t",
799 (long long)pack
->filesize
);
803 memcpy(pack_hash_expected
, pack
->map
+
804 pack
->filesize
- digest_len
,
808 if (lseek(pack
->fd
, -digest_len
, SEEK_END
) == -1) {
809 err
= got_error_from_errno("lseek");
812 n
= read(pack
->fd
, pack_hash_expected
, digest_len
);
814 err
= got_error_from_errno("read");
817 if (n
!= digest_len
) {
818 err
= got_error(GOT_ERR_IO
);
822 if (got_object_id_cmp(pack_hash_expected
, &pack_hash
) != 0) {
823 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
824 "bad checksum in pack file trailer");
828 if (first_delta_idx
== -1)
831 /* In order to resolve ref deltas we need an in-progress pack index. */
833 make_packidx(&packidx
, nobj
, objects
);
836 * Second pass: We can now resolve deltas to compute the IDs of
837 * objects which appear in deltified form. Because deltas can be
838 * chained this pass may require a couple of iterations until all
839 * IDs of deltified objects have been discovered.
842 while (nvalid
!= nobj
) {
845 * This loop will only run once unless the pack file
846 * contains ref deltas which refer to objects located
847 * later in the pack file, which is unusual.
848 * Offset deltas can always be resolved in one pass
849 * unless the packfile is corrupt.
851 for (i
= first_delta_idx
; i
< nobj
; i
++) {
853 if (obj
->type
!= GOT_OBJ_TYPE_REF_DELTA
&&
854 obj
->type
!= GOT_OBJ_TYPE_OFFSET_DELTA
)
860 if (pack
->map
== NULL
&& lseek(pack
->fd
,
861 obj
->off
+ obj
->tslen
, SEEK_SET
) == -1) {
862 err
= got_error_from_errno("lseek");
866 err
= resolve_deltified_object(pack
, &packidx
, obj
,
867 tmpfile
, delta_base_file
, delta_accum_file
);
869 if (err
->code
!= GOT_ERR_NO_OBJ
)
872 * We cannot resolve this object yet because
873 * a delta base is unknown. Try again later.
881 update_packidx(&packidx
, nobj
, obj
);
882 /* Don't send too many progress privsep messages. */
883 p_resolved
= ((nresolved
+ n
) * 100) / nobj
;
884 if (p_resolved
!= last_p_resolved
) {
885 err
= report_progress(nobj
, nobj
,
886 nloose
, nresolved
+ n
, rl
,
887 progress_cb
, progress_arg
);
890 last_p_resolved
= p_resolved
;
894 if (pass
++ > 3 && n
== 0) {
895 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
896 "could not resolve any of deltas; packfile could "
904 if (nloose
+ nresolved
!= nobj
) {
906 snprintf(msg
, sizeof(msg
), "discovered only %d of %d objects",
907 nloose
+ nresolved
, nobj
);
908 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
, msg
);
912 err
= report_progress(nobj
, nobj
, nloose
, nresolved
, NULL
,
913 progress_cb
, progress_arg
);
917 make_packidx(&packidx
, nobj
, objects
);
922 got_hash_init(&ctx
, pack
->algo
);
923 putbe32(buf
, GOT_PACKIDX_V2_MAGIC
);
924 putbe32(buf
+ 4, GOT_PACKIDX_VERSION
);
925 err
= got_pack_hwrite(idxfd
, buf
, 8, &ctx
);
928 err
= got_pack_hwrite(idxfd
, packidx
.hdr
.fanout_table
,
929 GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS
* sizeof(uint32_t), &ctx
);
932 err
= got_pack_hwrite(idxfd
, packidx
.hdr
.sorted_ids
,
933 nobj
* digest_len
, &ctx
);
936 err
= got_pack_hwrite(idxfd
, packidx
.hdr
.crc32
,
937 nobj
* sizeof(uint32_t), &ctx
);
940 err
= got_pack_hwrite(idxfd
, packidx
.hdr
.offsets
,
941 nobj
* sizeof(uint32_t), &ctx
);
944 if (packidx
.nlargeobj
> 0) {
945 err
= got_pack_hwrite(idxfd
, packidx
.hdr
.large_offsets
,
946 packidx
.nlargeobj
* sizeof(uint64_t), &ctx
);
950 err
= got_pack_hwrite(idxfd
, &pack_hash
.hash
, digest_len
, &ctx
);
954 got_hash_final(&ctx
, packidx_hash
);
955 w
= write(idxfd
, packidx_hash
, digest_len
);
957 err
= got_error_from_errno("write");
960 if (w
!= digest_len
) {
961 err
= got_error(GOT_ERR_IO
);
966 free(packidx
.hdr
.magic
);
967 free(packidx
.hdr
.version
);
968 free(packidx
.hdr
.fanout_table
);
969 free(packidx
.hdr
.sorted_ids
);
970 free(packidx
.hdr
.offsets
);
971 free(packidx
.hdr
.large_offsets
);