1 #define USE_THE_REPOSITORY_VARIABLE
5 #include "environment.h"
17 #include "streaming.h"
18 #include "thread-utils.h"
20 #include "pack-revindex.h"
21 #include "object-file.h"
22 #include "object-store-ll.h"
23 #include "oid-array.h"
26 #include "replace-object.h"
27 #include "tree-walk.h"
28 #include "promisor-remote.h"
29 #include "run-command.h"
33 static const char index_pack_usage
[] =
34 "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
37 struct pack_idx_entry idx
;
39 unsigned char hdr_size
;
41 signed char real_type
;
50 /* Initialized by make_base(). */
51 struct base_data
*base
;
52 struct object_entry
*obj
;
53 int ref_first
, ref_last
;
54 int ofs_first
, ofs_last
;
56 * Threads should increment retain_data if they are about to call
57 * patch_delta() using this struct's data as a base, and decrement this
58 * when they are done. While retain_data is nonzero, this struct's data
59 * will not be freed even if the delta base cache limit is exceeded.
63 * The number of direct children that have not been fully processed
64 * (entered work_head, entered done_head, left done_head). When this
65 * number reaches zero, this struct base_data can be freed.
67 int children_remaining
;
69 /* Not initialized by make_base(). */
70 struct list_head list
;
76 * Stack of struct base_data that have unprocessed children.
77 * threaded_second_pass() uses this as a source of work (the other being the
80 * Guarded by work_mutex.
82 static LIST_HEAD(work_head
);
85 * Stack of struct base_data that have children, all of whom have been
86 * processed or are being processed, and at least one child is being processed.
87 * These struct base_data must be kept around until the last child is
90 * Guarded by work_mutex.
92 static LIST_HEAD(done_head
);
95 * All threads share one delta base cache.
97 * base_cache_used is guarded by work_mutex, and base_cache_limit is read-only
100 static size_t base_cache_used
;
101 static size_t base_cache_limit
;
103 struct thread_local
{
108 /* Remember to update object flag allocation in object.h */
109 #define FLAG_LINK (1u<<20)
110 #define FLAG_CHECKED (1u<<21)
112 struct ofs_delta_entry
{
117 struct ref_delta_entry
{
118 struct object_id oid
;
122 static struct object_entry
*objects
;
123 static struct object_stat
*obj_stat
;
124 static struct ofs_delta_entry
*ofs_deltas
;
125 static struct ref_delta_entry
*ref_deltas
;
126 static struct thread_local nothread_data
;
127 static int nr_objects
;
128 static int nr_ofs_deltas
;
129 static int nr_ref_deltas
;
130 static int ref_deltas_alloc
;
131 static int nr_resolved_deltas
;
132 static int nr_threads
;
134 static int from_stdin
;
136 static int do_fsck_object
;
137 static struct fsck_options fsck_options
= FSCK_OPTIONS_MISSING_GITMODULES
;
139 static const char *progress_title
;
140 static int show_resolving_progress
;
141 static int show_stat
;
142 static int check_self_contained_and_connected
;
144 static struct progress
*progress
;
146 /* We always read in 4kB chunks. */
147 static unsigned char input_buffer
[4096];
148 static unsigned int input_offset
, input_len
;
149 static off_t consumed_bytes
;
150 static off_t max_input_size
;
151 static unsigned deepest_delta
;
152 static git_hash_ctx input_ctx
;
153 static uint32_t input_crc32
;
154 static int input_fd
, output_fd
;
155 static const char *curr_pack
;
158 * local_links is guarded by read_mutex, and record_local_links is read-only in
161 static struct oidset local_links
= OIDSET_INIT
;
162 static int record_local_links
;
164 static struct thread_local
*thread_data
;
165 static int nr_dispatched
;
166 static int threads_active
;
168 static pthread_mutex_t read_mutex
;
169 #define read_lock() lock_mutex(&read_mutex)
170 #define read_unlock() unlock_mutex(&read_mutex)
172 static pthread_mutex_t counter_mutex
;
173 #define counter_lock() lock_mutex(&counter_mutex)
174 #define counter_unlock() unlock_mutex(&counter_mutex)
176 static pthread_mutex_t work_mutex
;
177 #define work_lock() lock_mutex(&work_mutex)
178 #define work_unlock() unlock_mutex(&work_mutex)
180 static pthread_mutex_t deepest_delta_mutex
;
181 #define deepest_delta_lock() lock_mutex(&deepest_delta_mutex)
182 #define deepest_delta_unlock() unlock_mutex(&deepest_delta_mutex)
184 static pthread_key_t key
;
186 static inline void lock_mutex(pthread_mutex_t
*mutex
)
189 pthread_mutex_lock(mutex
);
192 static inline void unlock_mutex(pthread_mutex_t
*mutex
)
195 pthread_mutex_unlock(mutex
);
199 * Mutex and conditional variable can't be statically-initialized on Windows.
201 static void init_thread(void)
204 init_recursive_mutex(&read_mutex
);
205 pthread_mutex_init(&counter_mutex
, NULL
);
206 pthread_mutex_init(&work_mutex
, NULL
);
208 pthread_mutex_init(&deepest_delta_mutex
, NULL
);
209 pthread_key_create(&key
, NULL
);
210 CALLOC_ARRAY(thread_data
, nr_threads
);
211 for (i
= 0; i
< nr_threads
; i
++) {
212 thread_data
[i
].pack_fd
= xopen(curr_pack
, O_RDONLY
);
218 static void cleanup_thread(void)
224 pthread_mutex_destroy(&read_mutex
);
225 pthread_mutex_destroy(&counter_mutex
);
226 pthread_mutex_destroy(&work_mutex
);
228 pthread_mutex_destroy(&deepest_delta_mutex
);
229 for (i
= 0; i
< nr_threads
; i
++)
230 close(thread_data
[i
].pack_fd
);
231 pthread_key_delete(key
);
235 static int mark_link(struct object
*obj
, enum object_type type
,
237 struct fsck_options
*options UNUSED
)
242 if (type
!= OBJ_ANY
&& obj
->type
!= type
)
243 die(_("object type mismatch at %s"), oid_to_hex(&obj
->oid
));
245 obj
->flags
|= FLAG_LINK
;
249 /* The content of each linked object must have been checked
250 or it must be already present in the object database */
251 static unsigned check_object(struct object
*obj
)
256 if (!(obj
->flags
& FLAG_LINK
))
259 if (!(obj
->flags
& FLAG_CHECKED
)) {
261 int type
= oid_object_info(the_repository
, &obj
->oid
, &size
);
263 die(_("did not receive expected object %s"),
264 oid_to_hex(&obj
->oid
));
265 if (type
!= obj
->type
)
266 die(_("object %s: expected type %s, found %s"),
267 oid_to_hex(&obj
->oid
),
268 type_name(obj
->type
), type_name(type
));
269 obj
->flags
|= FLAG_CHECKED
;
276 static unsigned check_objects(void)
278 unsigned i
, max
, foreign_nr
= 0;
280 max
= get_max_object_index();
283 progress
= start_delayed_progress(_("Checking objects"), max
);
285 for (i
= 0; i
< max
; i
++) {
286 foreign_nr
+= check_object(get_indexed_object(i
));
287 display_progress(progress
, i
+ 1);
290 stop_progress(&progress
);
295 /* Discard current buffer used content. */
296 static void flush(void)
300 write_or_die(output_fd
, input_buffer
, input_offset
);
301 the_hash_algo
->update_fn(&input_ctx
, input_buffer
, input_offset
);
302 memmove(input_buffer
, input_buffer
+ input_offset
, input_len
);
308 * Make sure at least "min" bytes are available in the buffer, and
309 * return the pointer to the buffer.
311 static void *fill(int min
)
313 if (min
<= input_len
)
314 return input_buffer
+ input_offset
;
315 if (min
> sizeof(input_buffer
))
316 die(Q_("cannot fill %d byte",
317 "cannot fill %d bytes",
322 ssize_t ret
= xread(input_fd
, input_buffer
+ input_len
,
323 sizeof(input_buffer
) - input_len
);
327 die_errno(_("read error on input"));
331 display_throughput(progress
, consumed_bytes
+ input_len
);
332 } while (input_len
< min
);
336 static void use(int bytes
)
338 if (bytes
> input_len
)
339 die(_("used more bytes than were available"));
340 input_crc32
= crc32(input_crc32
, input_buffer
+ input_offset
, bytes
);
342 input_offset
+= bytes
;
344 /* make sure off_t is sufficiently large not to wrap */
345 if (signed_add_overflows(consumed_bytes
, bytes
))
346 die(_("pack too large for current definition of off_t"));
347 consumed_bytes
+= bytes
;
348 if (max_input_size
&& consumed_bytes
> max_input_size
) {
349 struct strbuf size_limit
= STRBUF_INIT
;
350 strbuf_humanise_bytes(&size_limit
, max_input_size
);
351 die(_("pack exceeds maximum allowed size (%s)"),
356 static const char *open_pack_file(const char *pack_name
)
361 struct strbuf tmp_file
= STRBUF_INIT
;
362 output_fd
= odb_mkstemp(&tmp_file
,
363 "pack/tmp_pack_XXXXXX");
364 pack_name
= strbuf_detach(&tmp_file
, NULL
);
366 output_fd
= xopen(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
368 nothread_data
.pack_fd
= output_fd
;
370 input_fd
= xopen(pack_name
, O_RDONLY
);
372 nothread_data
.pack_fd
= input_fd
;
374 the_hash_algo
->init_fn(&input_ctx
);
378 static void parse_pack_header(void)
380 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
382 /* Header consistency check */
383 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
384 die(_("pack signature mismatch"));
385 if (!pack_version_ok(hdr
->hdr_version
))
386 die(_("pack version %"PRIu32
" unsupported"),
387 ntohl(hdr
->hdr_version
));
389 nr_objects
= ntohl(hdr
->hdr_entries
);
390 use(sizeof(struct pack_header
));
393 __attribute__((format (printf
, 2, 3)))
394 static NORETURN
void bad_object(off_t offset
, const char *format
, ...)
399 va_start(params
, format
);
400 vsnprintf(buf
, sizeof(buf
), format
, params
);
402 die(_("pack has bad object at offset %"PRIuMAX
": %s"),
403 (uintmax_t)offset
, buf
);
406 static inline struct thread_local
*get_thread_data(void)
410 return pthread_getspecific(key
);
411 assert(!threads_active
&&
412 "This should only be reached when all threads are gone");
414 return ¬hread_data
;
417 static void set_thread_data(struct thread_local
*data
)
420 pthread_setspecific(key
, data
);
423 static void free_base_data(struct base_data
*c
)
426 FREE_AND_NULL(c
->data
);
427 base_cache_used
-= c
->size
;
431 static void prune_base_data(struct base_data
*retain
)
433 struct list_head
*pos
;
435 if (base_cache_used
<= base_cache_limit
)
438 list_for_each_prev(pos
, &done_head
) {
439 struct base_data
*b
= list_entry(pos
, struct base_data
, list
);
440 if (b
->retain_data
|| b
== retain
)
444 if (base_cache_used
<= base_cache_limit
)
449 list_for_each_prev(pos
, &work_head
) {
450 struct base_data
*b
= list_entry(pos
, struct base_data
, list
);
451 if (b
->retain_data
|| b
== retain
)
455 if (base_cache_used
<= base_cache_limit
)
461 static int is_delta_type(enum object_type type
)
463 return (type
== OBJ_REF_DELTA
|| type
== OBJ_OFS_DELTA
);
466 static void *unpack_entry_data(off_t offset
, unsigned long size
,
467 enum object_type type
, struct object_id
*oid
)
469 static char fixed_buf
[8192];
477 if (!is_delta_type(type
)) {
478 hdrlen
= format_object_header(hdr
, sizeof(hdr
), type
, size
);
479 the_hash_algo
->init_fn(&c
);
480 the_hash_algo
->update_fn(&c
, hdr
, hdrlen
);
483 if (type
== OBJ_BLOB
&& size
> big_file_threshold
)
486 buf
= xmallocz(size
);
488 memset(&stream
, 0, sizeof(stream
));
489 git_inflate_init(&stream
);
490 stream
.next_out
= buf
;
491 stream
.avail_out
= buf
== fixed_buf
? sizeof(fixed_buf
) : size
;
494 unsigned char *last_out
= stream
.next_out
;
495 stream
.next_in
= fill(1);
496 stream
.avail_in
= input_len
;
497 status
= git_inflate(&stream
, 0);
498 use(input_len
- stream
.avail_in
);
500 the_hash_algo
->update_fn(&c
, last_out
, stream
.next_out
- last_out
);
501 if (buf
== fixed_buf
) {
502 stream
.next_out
= buf
;
503 stream
.avail_out
= sizeof(fixed_buf
);
505 } while (status
== Z_OK
);
506 if (stream
.total_out
!= size
|| status
!= Z_STREAM_END
)
507 bad_object(offset
, _("inflate returned %d"), status
);
508 git_inflate_end(&stream
);
510 the_hash_algo
->final_oid_fn(oid
, &c
);
511 return buf
== fixed_buf
? NULL
: buf
;
514 static void *unpack_raw_entry(struct object_entry
*obj
,
516 struct object_id
*ref_oid
,
517 struct object_id
*oid
)
520 unsigned long size
, c
;
525 obj
->idx
.offset
= consumed_bytes
;
526 input_crc32
= crc32(0, NULL
, 0);
531 obj
->type
= (c
>> 4) & 7;
538 size
+= (c
& 0x7f) << shift
;
545 oidread(ref_oid
, fill(the_hash_algo
->rawsz
),
546 the_repository
->hash_algo
);
547 use(the_hash_algo
->rawsz
);
553 base_offset
= c
& 127;
556 if (!base_offset
|| MSB(base_offset
, 7))
557 bad_object(obj
->idx
.offset
, _("offset value overflow for delta base object"));
561 base_offset
= (base_offset
<< 7) + (c
& 127);
563 *ofs_offset
= obj
->idx
.offset
- base_offset
;
564 if (*ofs_offset
<= 0 || *ofs_offset
>= obj
->idx
.offset
)
565 bad_object(obj
->idx
.offset
, _("delta base offset is out of bound"));
573 bad_object(obj
->idx
.offset
, _("unknown object type %d"), obj
->type
);
575 obj
->hdr_size
= consumed_bytes
- obj
->idx
.offset
;
577 data
= unpack_entry_data(obj
->idx
.offset
, obj
->size
, obj
->type
, oid
);
578 obj
->idx
.crc32
= input_crc32
;
582 static void *unpack_data(struct object_entry
*obj
,
583 int (*consume
)(const unsigned char *, unsigned long, void *),
586 off_t from
= obj
[0].idx
.offset
+ obj
[0].hdr_size
;
587 off_t len
= obj
[1].idx
.offset
- from
;
588 unsigned char *data
, *inbuf
;
592 data
= xmallocz(consume
? 64*1024 : obj
->size
);
593 inbuf
= xmalloc((len
< 64*1024) ? (int)len
: 64*1024);
595 memset(&stream
, 0, sizeof(stream
));
596 git_inflate_init(&stream
);
597 stream
.next_out
= data
;
598 stream
.avail_out
= consume
? 64*1024 : obj
->size
;
601 ssize_t n
= (len
< 64*1024) ? (ssize_t
)len
: 64*1024;
602 n
= xpread(get_thread_data()->pack_fd
, inbuf
, n
, from
);
604 die_errno(_("cannot pread pack file"));
606 die(Q_("premature end of pack file, %"PRIuMAX
" byte missing",
607 "premature end of pack file, %"PRIuMAX
" bytes missing",
612 stream
.next_in
= inbuf
;
615 status
= git_inflate(&stream
, 0);
618 status
= git_inflate(&stream
, 0);
619 if (consume(data
, stream
.next_out
- data
, cb_data
)) {
624 stream
.next_out
= data
;
625 stream
.avail_out
= 64*1024;
626 } while (status
== Z_OK
&& stream
.avail_in
);
628 } while (len
&& status
== Z_OK
&& !stream
.avail_in
);
630 /* This has been inflated OK when first encountered, so... */
631 if (status
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
632 die(_("serious inflate inconsistency"));
634 git_inflate_end(&stream
);
642 static void *get_data_from_pack(struct object_entry
*obj
)
644 return unpack_data(obj
, NULL
, NULL
);
647 static int compare_ofs_delta_bases(off_t offset1
, off_t offset2
,
648 enum object_type type1
,
649 enum object_type type2
)
651 int cmp
= type1
- type2
;
654 return offset1
< offset2
? -1 :
655 offset1
> offset2
? 1 :
659 static int find_ofs_delta(const off_t offset
)
661 int first
= 0, last
= nr_ofs_deltas
;
663 while (first
< last
) {
664 int next
= first
+ (last
- first
) / 2;
665 struct ofs_delta_entry
*delta
= &ofs_deltas
[next
];
668 cmp
= compare_ofs_delta_bases(offset
, delta
->offset
,
670 objects
[delta
->obj_no
].type
);
682 static void find_ofs_delta_children(off_t offset
,
683 int *first_index
, int *last_index
)
685 int first
= find_ofs_delta(offset
);
687 int end
= nr_ofs_deltas
- 1;
694 while (first
> 0 && ofs_deltas
[first
- 1].offset
== offset
)
696 while (last
< end
&& ofs_deltas
[last
+ 1].offset
== offset
)
698 *first_index
= first
;
702 static int compare_ref_delta_bases(const struct object_id
*oid1
,
703 const struct object_id
*oid2
,
704 enum object_type type1
,
705 enum object_type type2
)
707 int cmp
= type1
- type2
;
710 return oidcmp(oid1
, oid2
);
713 static int find_ref_delta(const struct object_id
*oid
)
715 int first
= 0, last
= nr_ref_deltas
;
717 while (first
< last
) {
718 int next
= first
+ (last
- first
) / 2;
719 struct ref_delta_entry
*delta
= &ref_deltas
[next
];
722 cmp
= compare_ref_delta_bases(oid
, &delta
->oid
,
724 objects
[delta
->obj_no
].type
);
736 static void find_ref_delta_children(const struct object_id
*oid
,
737 int *first_index
, int *last_index
)
739 int first
= find_ref_delta(oid
);
741 int end
= nr_ref_deltas
- 1;
748 while (first
> 0 && oideq(&ref_deltas
[first
- 1].oid
, oid
))
750 while (last
< end
&& oideq(&ref_deltas
[last
+ 1].oid
, oid
))
752 *first_index
= first
;
756 struct compare_data
{
757 struct object_entry
*entry
;
758 struct git_istream
*st
;
760 unsigned long buf_size
;
763 static int compare_objects(const unsigned char *buf
, unsigned long size
,
766 struct compare_data
*data
= cb_data
;
768 if (data
->buf_size
< size
) {
770 data
->buf
= xmalloc(size
);
771 data
->buf_size
= size
;
775 ssize_t len
= read_istream(data
->st
, data
->buf
, size
);
777 die(_("SHA1 COLLISION FOUND WITH %s !"),
778 oid_to_hex(&data
->entry
->idx
.oid
));
780 die(_("unable to read %s"),
781 oid_to_hex(&data
->entry
->idx
.oid
));
782 if (memcmp(buf
, data
->buf
, len
))
783 die(_("SHA1 COLLISION FOUND WITH %s !"),
784 oid_to_hex(&data
->entry
->idx
.oid
));
791 static int check_collison(struct object_entry
*entry
)
793 struct compare_data data
;
794 enum object_type type
;
797 if (entry
->size
<= big_file_threshold
|| entry
->type
!= OBJ_BLOB
)
800 memset(&data
, 0, sizeof(data
));
802 data
.st
= open_istream(the_repository
, &entry
->idx
.oid
, &type
, &size
,
806 if (size
!= entry
->size
|| type
!= entry
->type
)
807 die(_("SHA1 COLLISION FOUND WITH %s !"),
808 oid_to_hex(&entry
->idx
.oid
));
809 unpack_data(entry
, compare_objects
, &data
);
810 close_istream(data
.st
);
815 static void record_if_local_object(const struct object_id
*oid
)
817 struct object_info info
= OBJECT_INFO_INIT
;
818 if (oid_object_info_extended(the_repository
, oid
, &info
, 0))
819 /* Missing; assume it is a promisor object */
821 if (info
.whence
== OI_PACKED
&& info
.u
.packed
.pack
->pack_promisor
)
823 oidset_insert(&local_links
, oid
);
826 static void do_record_local_links(struct object
*obj
)
828 if (obj
->type
== OBJ_TREE
) {
829 struct tree
*tree
= (struct tree
*)obj
;
830 struct tree_desc desc
;
831 struct name_entry entry
;
832 if (init_tree_desc_gently(&desc
, &tree
->object
.oid
,
833 tree
->buffer
, tree
->size
, 0))
835 * Error messages are given when packs are
836 * verified, so do not print any here.
839 while (tree_entry_gently(&desc
, &entry
))
840 record_if_local_object(&entry
.oid
);
841 } else if (obj
->type
== OBJ_COMMIT
) {
842 struct commit
*commit
= (struct commit
*) obj
;
843 struct commit_list
*parents
= commit
->parents
;
845 for (; parents
; parents
= parents
->next
)
846 record_if_local_object(&parents
->item
->object
.oid
);
847 } else if (obj
->type
== OBJ_TAG
) {
848 struct tag
*tag
= (struct tag
*) obj
;
849 record_if_local_object(get_tagged_oid(tag
));
853 static void sha1_object(const void *data
, struct object_entry
*obj_entry
,
854 unsigned long size
, enum object_type type
,
855 const struct object_id
*oid
)
857 void *new_data
= NULL
;
858 int collision_test_needed
= 0;
860 assert(data
|| obj_entry
);
862 if (startup_info
->have_repository
) {
864 collision_test_needed
=
865 repo_has_object_file_with_flags(the_repository
, oid
,
870 if (collision_test_needed
&& !data
) {
872 if (!check_collison(obj_entry
))
873 collision_test_needed
= 0;
876 if (collision_test_needed
) {
878 enum object_type has_type
;
879 unsigned long has_size
;
881 has_type
= oid_object_info(the_repository
, oid
, &has_size
);
883 die(_("cannot read existing object info %s"), oid_to_hex(oid
));
884 if (has_type
!= type
|| has_size
!= size
)
885 die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid
));
886 has_data
= repo_read_object_file(the_repository
, oid
,
887 &has_type
, &has_size
);
890 data
= new_data
= get_data_from_pack(obj_entry
);
892 die(_("cannot read existing object %s"), oid_to_hex(oid
));
893 if (size
!= has_size
|| type
!= has_type
||
894 memcmp(data
, has_data
, size
) != 0)
895 die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid
));
899 if (strict
|| do_fsck_object
|| record_local_links
) {
901 if (type
== OBJ_BLOB
) {
902 struct blob
*blob
= lookup_blob(the_repository
, oid
);
904 blob
->object
.flags
|= FLAG_CHECKED
;
906 die(_("invalid blob object %s"), oid_to_hex(oid
));
907 if (do_fsck_object
&&
908 fsck_object(&blob
->object
, (void *)data
, size
, &fsck_options
))
909 die(_("fsck error in packed object"));
913 void *buf
= (void *) data
;
915 assert(data
&& "data can only be NULL for large _blobs_");
918 * we do not need to free the memory here, as the
919 * buf is deleted by the caller.
921 obj
= parse_object_buffer(the_repository
, oid
, type
,
925 die(_("invalid %s"), type_name(type
));
926 if (do_fsck_object
&&
927 fsck_object(obj
, buf
, size
, &fsck_options
))
928 die(_("fsck error in packed object"));
929 if (strict
&& fsck_walk(obj
, NULL
, &fsck_options
))
930 die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj
->oid
));
931 if (record_local_links
)
932 do_record_local_links(obj
);
934 if (obj
->type
== OBJ_TREE
) {
935 struct tree
*item
= (struct tree
*) obj
;
939 if (obj
->type
== OBJ_COMMIT
) {
940 struct commit
*commit
= (struct commit
*) obj
;
941 if (detach_commit_buffer(commit
, NULL
) != data
)
942 BUG("parse_object_buffer transmogrified our buffer");
944 obj
->flags
|= FLAG_CHECKED
;
953 * Ensure that this node has been reconstructed and return its contents.
955 * In the typical and best case, this node would already be reconstructed
956 * (through the invocation to resolve_delta() in threaded_second_pass()) and it
957 * would not be pruned. However, if pruning of this node was necessary due to
958 * reaching delta_base_cache_limit, this function will find the closest
959 * ancestor with reconstructed data that has not been pruned (or if there is
960 * none, the ultimate base object), and reconstruct each node in the delta
961 * chain in order to generate the reconstructed data for this node.
963 static void *get_base_data(struct base_data
*c
)
966 struct object_entry
*obj
= c
->obj
;
967 struct base_data
**delta
= NULL
;
968 int delta_nr
= 0, delta_alloc
= 0;
970 while (is_delta_type(c
->obj
->type
) && !c
->data
) {
971 ALLOC_GROW(delta
, delta_nr
+ 1, delta_alloc
);
972 delta
[delta_nr
++] = c
;
976 c
->data
= get_data_from_pack(obj
);
978 base_cache_used
+= c
->size
;
981 for (; delta_nr
> 0; delta_nr
--) {
983 c
= delta
[delta_nr
- 1];
985 base
= get_base_data(c
->base
);
986 raw
= get_data_from_pack(obj
);
987 c
->data
= patch_delta(
993 bad_object(obj
->idx
.offset
, _("failed to apply delta"));
994 base_cache_used
+= c
->size
;
1002 static struct base_data
*make_base(struct object_entry
*obj
,
1003 struct base_data
*parent
)
1005 struct base_data
*base
= xcalloc(1, sizeof(struct base_data
));
1006 base
->base
= parent
;
1008 find_ref_delta_children(&obj
->idx
.oid
,
1009 &base
->ref_first
, &base
->ref_last
);
1010 find_ofs_delta_children(obj
->idx
.offset
,
1011 &base
->ofs_first
, &base
->ofs_last
);
1012 base
->children_remaining
= base
->ref_last
- base
->ref_first
+
1013 base
->ofs_last
- base
->ofs_first
+ 2;
1017 static struct base_data
*resolve_delta(struct object_entry
*delta_obj
,
1018 struct base_data
*base
)
1020 void *delta_data
, *result_data
;
1021 struct base_data
*result
;
1022 unsigned long result_size
;
1025 int i
= delta_obj
- objects
;
1026 int j
= base
->obj
- objects
;
1027 obj_stat
[i
].delta_depth
= obj_stat
[j
].delta_depth
+ 1;
1028 deepest_delta_lock();
1029 if (deepest_delta
< obj_stat
[i
].delta_depth
)
1030 deepest_delta
= obj_stat
[i
].delta_depth
;
1031 deepest_delta_unlock();
1032 obj_stat
[i
].base_object_no
= j
;
1034 delta_data
= get_data_from_pack(delta_obj
);
1036 result_data
= patch_delta(base
->data
, base
->size
,
1037 delta_data
, delta_obj
->size
, &result_size
);
1040 bad_object(delta_obj
->idx
.offset
, _("failed to apply delta"));
1041 hash_object_file(the_hash_algo
, result_data
, result_size
,
1042 delta_obj
->real_type
, &delta_obj
->idx
.oid
);
1043 sha1_object(result_data
, NULL
, result_size
, delta_obj
->real_type
,
1044 &delta_obj
->idx
.oid
);
1046 result
= make_base(delta_obj
, base
);
1047 result
->data
= result_data
;
1048 result
->size
= result_size
;
1051 nr_resolved_deltas
++;
1057 static int compare_ofs_delta_entry(const void *a
, const void *b
)
1059 const struct ofs_delta_entry
*delta_a
= a
;
1060 const struct ofs_delta_entry
*delta_b
= b
;
1062 return delta_a
->offset
< delta_b
->offset
? -1 :
1063 delta_a
->offset
> delta_b
->offset
? 1 :
1067 static int compare_ref_delta_entry(const void *a
, const void *b
)
1069 const struct ref_delta_entry
*delta_a
= a
;
1070 const struct ref_delta_entry
*delta_b
= b
;
1072 return oidcmp(&delta_a
->oid
, &delta_b
->oid
);
1075 static void *threaded_second_pass(void *data
)
1078 set_thread_data(data
);
1080 struct base_data
*parent
= NULL
;
1081 struct object_entry
*child_obj
;
1082 struct base_data
*child
;
1085 display_progress(progress
, nr_resolved_deltas
);
1089 if (list_empty(&work_head
)) {
1091 * Take an object from the object array.
1093 while (nr_dispatched
< nr_objects
&&
1094 is_delta_type(objects
[nr_dispatched
].type
))
1096 if (nr_dispatched
>= nr_objects
) {
1100 child_obj
= &objects
[nr_dispatched
++];
1103 * Peek at the top of the stack, and take a child from
1106 parent
= list_first_entry(&work_head
, struct base_data
,
1109 if (parent
->ref_first
<= parent
->ref_last
) {
1110 int offset
= ref_deltas
[parent
->ref_first
++].obj_no
;
1111 child_obj
= objects
+ offset
;
1112 if (child_obj
->real_type
!= OBJ_REF_DELTA
)
1113 die("REF_DELTA at offset %"PRIuMAX
" already resolved (duplicate base %s?)",
1114 (uintmax_t) child_obj
->idx
.offset
,
1115 oid_to_hex(&parent
->obj
->idx
.oid
));
1116 child_obj
->real_type
= parent
->obj
->real_type
;
1118 child_obj
= objects
+
1119 ofs_deltas
[parent
->ofs_first
++].obj_no
;
1120 assert(child_obj
->real_type
== OBJ_OFS_DELTA
);
1121 child_obj
->real_type
= parent
->obj
->real_type
;
1124 if (parent
->ref_first
> parent
->ref_last
&&
1125 parent
->ofs_first
> parent
->ofs_last
) {
1127 * This parent has run out of children, so move
1130 list_del(&parent
->list
);
1131 list_add(&parent
->list
, &done_head
);
1135 * Ensure that the parent has data, since we will need
1138 * NEEDSWORK: If parent data needs to be reloaded, this
1139 * prolongs the time that the current thread spends in
1140 * the mutex. A mitigating factor is that parent data
1141 * needs to be reloaded only if the delta base cache
1142 * limit is exceeded, so in the typical case, this does
1145 get_base_data(parent
);
1146 parent
->retain_data
++;
1151 child
= resolve_delta(child_obj
, parent
);
1152 if (!child
->children_remaining
)
1153 FREE_AND_NULL(child
->data
);
1155 child
= make_base(child_obj
, NULL
);
1156 if (child
->children_remaining
) {
1158 * Since this child has its own delta children,
1159 * we will need this data in the future.
1160 * Inflate now so that future iterations will
1161 * have access to this object's data while
1162 * outside the work mutex.
1164 child
->data
= get_data_from_pack(child_obj
);
1165 child
->size
= child_obj
->size
;
1171 parent
->retain_data
--;
1174 * This child has its own children, so add it to
1177 list_add(&child
->list
, &work_head
);
1178 base_cache_used
+= child
->size
;
1179 prune_base_data(NULL
);
1180 free_base_data(child
);
1183 * This child does not have its own children. It may be
1184 * the last descendant of its ancestors; free those
1187 struct base_data
*p
= parent
;
1190 struct base_data
*next_p
;
1192 p
->children_remaining
--;
1193 if (p
->children_remaining
)
1203 FREE_AND_NULL(child
);
1212 * - find locations of all objects;
1213 * - calculate SHA1 of all non-delta objects;
1214 * - remember base (SHA1 or offset) for all deltas.
1216 static void parse_pack_objects(unsigned char *hash
)
1218 int i
, nr_delays
= 0;
1219 struct ofs_delta_entry
*ofs_delta
= ofs_deltas
;
1220 struct object_id ref_delta_oid
;
1222 git_hash_ctx tmp_ctx
;
1225 progress
= start_progress(
1226 progress_title
? progress_title
:
1227 from_stdin
? _("Receiving objects") : _("Indexing objects"),
1229 for (i
= 0; i
< nr_objects
; i
++) {
1230 struct object_entry
*obj
= &objects
[i
];
1231 void *data
= unpack_raw_entry(obj
, &ofs_delta
->offset
,
1234 obj
->real_type
= obj
->type
;
1235 if (obj
->type
== OBJ_OFS_DELTA
) {
1237 ofs_delta
->obj_no
= i
;
1239 } else if (obj
->type
== OBJ_REF_DELTA
) {
1240 ALLOC_GROW(ref_deltas
, nr_ref_deltas
+ 1, ref_deltas_alloc
);
1241 oidcpy(&ref_deltas
[nr_ref_deltas
].oid
, &ref_delta_oid
);
1242 ref_deltas
[nr_ref_deltas
].obj_no
= i
;
1245 /* large blobs, check later */
1246 obj
->real_type
= OBJ_BAD
;
1249 sha1_object(data
, NULL
, obj
->size
, obj
->type
,
1252 display_progress(progress
, i
+1);
1254 objects
[i
].idx
.offset
= consumed_bytes
;
1255 stop_progress(&progress
);
1257 /* Check pack integrity */
1259 the_hash_algo
->init_fn(&tmp_ctx
);
1260 the_hash_algo
->clone_fn(&tmp_ctx
, &input_ctx
);
1261 the_hash_algo
->final_fn(hash
, &tmp_ctx
);
1262 if (!hasheq(fill(the_hash_algo
->rawsz
), hash
, the_repository
->hash_algo
))
1263 die(_("pack is corrupted (SHA1 mismatch)"));
1264 use(the_hash_algo
->rawsz
);
1266 /* If input_fd is a file, we should have reached its end now. */
1267 if (fstat(input_fd
, &st
))
1268 die_errno(_("cannot fstat packfile"));
1269 if (S_ISREG(st
.st_mode
) &&
1270 lseek(input_fd
, 0, SEEK_CUR
) - input_len
!= st
.st_size
)
1271 die(_("pack has junk at the end"));
1273 for (i
= 0; i
< nr_objects
; i
++) {
1274 struct object_entry
*obj
= &objects
[i
];
1275 if (obj
->real_type
!= OBJ_BAD
)
1277 obj
->real_type
= obj
->type
;
1278 sha1_object(NULL
, obj
, obj
->size
, obj
->type
,
1283 die(_("confusion beyond insanity in parse_pack_objects()"));
1288 * - for all non-delta objects, look if it is used as a base for
1290 * - if used as a base, uncompress the object and apply all deltas,
1291 * recursively checking if the resulting object is used as a base
1292 * for some more deltas.
1294 static void resolve_deltas(void)
1298 if (!nr_ofs_deltas
&& !nr_ref_deltas
)
1301 /* Sort deltas by base SHA1/offset for fast searching */
1302 QSORT(ofs_deltas
, nr_ofs_deltas
, compare_ofs_delta_entry
);
1303 QSORT(ref_deltas
, nr_ref_deltas
, compare_ref_delta_entry
);
1305 if (verbose
|| show_resolving_progress
)
1306 progress
= start_progress(_("Resolving deltas"),
1307 nr_ref_deltas
+ nr_ofs_deltas
);
1310 base_cache_limit
= delta_base_cache_limit
* nr_threads
;
1311 if (nr_threads
> 1 || getenv("GIT_FORCE_THREADS")) {
1314 for (i
= 0; i
< nr_threads
; i
++) {
1315 int ret
= pthread_create(&thread_data
[i
].thread
, NULL
,
1316 threaded_second_pass
, thread_data
+ i
);
1318 die(_("unable to create thread: %s"),
1322 for (i
= 0; i
< nr_threads
; i
++)
1323 pthread_join(thread_data
[i
].thread
, NULL
);
1327 threaded_second_pass(¬hread_data
);
1332 * - append objects to convert thin pack to full pack if required
1333 * - write the final pack hash
1335 static void fix_unresolved_deltas(struct hashfile
*f
);
1336 static void conclude_pack(int fix_thin_pack
, const char *curr_pack
, unsigned char *pack_hash
)
1338 if (nr_ref_deltas
+ nr_ofs_deltas
== nr_resolved_deltas
) {
1339 stop_progress(&progress
);
1340 /* Flush remaining pack final hash. */
1345 if (fix_thin_pack
) {
1347 unsigned char read_hash
[GIT_MAX_RAWSZ
], tail_hash
[GIT_MAX_RAWSZ
];
1348 struct strbuf msg
= STRBUF_INIT
;
1349 int nr_unresolved
= nr_ofs_deltas
+ nr_ref_deltas
- nr_resolved_deltas
;
1350 int nr_objects_initial
= nr_objects
;
1351 if (nr_unresolved
<= 0)
1352 die(_("confusion beyond insanity"));
1353 REALLOC_ARRAY(objects
, nr_objects
+ nr_unresolved
+ 1);
1354 memset(objects
+ nr_objects
+ 1, 0,
1355 nr_unresolved
* sizeof(*objects
));
1356 f
= hashfd(output_fd
, curr_pack
);
1357 fix_unresolved_deltas(f
);
1358 strbuf_addf(&msg
, Q_("completed with %d local object",
1359 "completed with %d local objects",
1360 nr_objects
- nr_objects_initial
),
1361 nr_objects
- nr_objects_initial
);
1362 stop_progress_msg(&progress
, msg
.buf
);
1363 strbuf_release(&msg
);
1364 finalize_hashfile(f
, tail_hash
, FSYNC_COMPONENT_PACK
, 0);
1365 hashcpy(read_hash
, pack_hash
, the_repository
->hash_algo
);
1366 fixup_pack_header_footer(output_fd
, pack_hash
,
1367 curr_pack
, nr_objects
,
1368 read_hash
, consumed_bytes
-the_hash_algo
->rawsz
);
1369 if (!hasheq(read_hash
, tail_hash
, the_repository
->hash_algo
))
1370 die(_("Unexpected tail checksum for %s "
1371 "(disk corruption?)"), curr_pack
);
1373 if (nr_ofs_deltas
+ nr_ref_deltas
!= nr_resolved_deltas
)
1374 die(Q_("pack has %d unresolved delta",
1375 "pack has %d unresolved deltas",
1376 nr_ofs_deltas
+ nr_ref_deltas
- nr_resolved_deltas
),
1377 nr_ofs_deltas
+ nr_ref_deltas
- nr_resolved_deltas
);
1380 static int write_compressed(struct hashfile
*f
, void *in
, unsigned int size
)
1384 unsigned char outbuf
[4096];
1386 git_deflate_init(&stream
, zlib_compression_level
);
1387 stream
.next_in
= in
;
1388 stream
.avail_in
= size
;
1391 stream
.next_out
= outbuf
;
1392 stream
.avail_out
= sizeof(outbuf
);
1393 status
= git_deflate(&stream
, Z_FINISH
);
1394 hashwrite(f
, outbuf
, sizeof(outbuf
) - stream
.avail_out
);
1395 } while (status
== Z_OK
);
1397 if (status
!= Z_STREAM_END
)
1398 die(_("unable to deflate appended object (%d)"), status
);
1399 size
= stream
.total_out
;
1400 git_deflate_end(&stream
);
1404 static struct object_entry
*append_obj_to_pack(struct hashfile
*f
,
1405 const unsigned char *sha1
, void *buf
,
1406 unsigned long size
, enum object_type type
)
1408 struct object_entry
*obj
= &objects
[nr_objects
++];
1409 unsigned char header
[10];
1410 unsigned long s
= size
;
1412 unsigned char c
= (type
<< 4) | (s
& 15);
1415 header
[n
++] = c
| 0x80;
1421 hashwrite(f
, header
, n
);
1423 obj
[0].hdr_size
= n
;
1425 obj
[0].real_type
= type
;
1426 obj
[1].idx
.offset
= obj
[0].idx
.offset
+ n
;
1427 obj
[1].idx
.offset
+= write_compressed(f
, buf
, size
);
1428 obj
[0].idx
.crc32
= crc32_end(f
);
1430 oidread(&obj
->idx
.oid
, sha1
, the_repository
->hash_algo
);
1434 static int delta_pos_compare(const void *_a
, const void *_b
)
1436 struct ref_delta_entry
*a
= *(struct ref_delta_entry
**)_a
;
1437 struct ref_delta_entry
*b
= *(struct ref_delta_entry
**)_b
;
1438 return a
->obj_no
- b
->obj_no
;
1441 static void fix_unresolved_deltas(struct hashfile
*f
)
1443 struct ref_delta_entry
**sorted_by_pos
;
1447 * Since many unresolved deltas may well be themselves base objects
1448 * for more unresolved deltas, we really want to include the
1449 * smallest number of base objects that would cover as much delta
1450 * as possible by picking the
1451 * trunc deltas first, allowing for other deltas to resolve without
1452 * additional base objects. Since most base objects are to be found
1453 * before deltas depending on them, a good heuristic is to start
1454 * resolving deltas in the same order as their position in the pack.
1456 ALLOC_ARRAY(sorted_by_pos
, nr_ref_deltas
);
1457 for (i
= 0; i
< nr_ref_deltas
; i
++)
1458 sorted_by_pos
[i
] = &ref_deltas
[i
];
1459 QSORT(sorted_by_pos
, nr_ref_deltas
, delta_pos_compare
);
1461 if (repo_has_promisor_remote(the_repository
)) {
1463 * Prefetch the delta bases.
1465 struct oid_array to_fetch
= OID_ARRAY_INIT
;
1466 for (i
= 0; i
< nr_ref_deltas
; i
++) {
1467 struct ref_delta_entry
*d
= sorted_by_pos
[i
];
1468 if (!oid_object_info_extended(the_repository
, &d
->oid
,
1470 OBJECT_INFO_FOR_PREFETCH
))
1472 oid_array_append(&to_fetch
, &d
->oid
);
1474 promisor_remote_get_direct(the_repository
,
1475 to_fetch
.oid
, to_fetch
.nr
);
1476 oid_array_clear(&to_fetch
);
1479 for (i
= 0; i
< nr_ref_deltas
; i
++) {
1480 struct ref_delta_entry
*d
= sorted_by_pos
[i
];
1481 enum object_type type
;
1485 if (objects
[d
->obj_no
].real_type
!= OBJ_REF_DELTA
)
1487 data
= repo_read_object_file(the_repository
, &d
->oid
, &type
,
1492 if (check_object_signature(the_repository
, &d
->oid
, data
, size
,
1494 die(_("local object %s is corrupt"), oid_to_hex(&d
->oid
));
1497 * Add this as an object to the objects array and call
1498 * threaded_second_pass() (which will pick up the added
1501 append_obj_to_pack(f
, d
->oid
.hash
, data
, size
, type
);
1503 threaded_second_pass(NULL
);
1505 display_progress(progress
, nr_resolved_deltas
);
1507 free(sorted_by_pos
);
1510 static const char *derive_filename(const char *pack_name
, const char *strip
,
1511 const char *suffix
, struct strbuf
*buf
)
1514 if (!strip_suffix(pack_name
, strip
, &len
) || !len
||
1515 pack_name
[len
- 1] != '.')
1516 die(_("packfile name '%s' does not end with '.%s'"),
1518 strbuf_add(buf
, pack_name
, len
);
1519 strbuf_addstr(buf
, suffix
);
1523 static void write_special_file(const char *suffix
, const char *msg
,
1524 const char *pack_name
, const unsigned char *hash
,
1525 const char **report
)
1527 struct strbuf name_buf
= STRBUF_INIT
;
1528 const char *filename
;
1530 int msg_len
= strlen(msg
);
1533 filename
= derive_filename(pack_name
, "pack", suffix
, &name_buf
);
1535 filename
= odb_pack_name(&name_buf
, hash
, suffix
);
1537 fd
= odb_pack_keep(filename
);
1539 if (errno
!= EEXIST
)
1540 die_errno(_("cannot write %s file '%s'"),
1544 write_or_die(fd
, msg
, msg_len
);
1545 write_or_die(fd
, "\n", 1);
1548 die_errno(_("cannot close written %s file '%s'"),
1553 strbuf_release(&name_buf
);
1556 static void rename_tmp_packfile(const char **final_name
,
1557 const char *curr_name
,
1558 struct strbuf
*name
, unsigned char *hash
,
1559 const char *ext
, int make_read_only_if_same
)
1561 if (!*final_name
|| strcmp(*final_name
, curr_name
)) {
1563 *final_name
= odb_pack_name(name
, hash
, ext
);
1564 if (finalize_object_file(curr_name
, *final_name
))
1565 die(_("unable to rename temporary '*.%s' file to '%s'"),
1567 } else if (make_read_only_if_same
) {
1568 chmod(*final_name
, 0444);
1572 static void final(const char *final_pack_name
, const char *curr_pack_name
,
1573 const char *final_index_name
, const char *curr_index_name
,
1574 const char *final_rev_index_name
, const char *curr_rev_index_name
,
1575 const char *keep_msg
, const char *promisor_msg
,
1576 unsigned char *hash
)
1578 const char *report
= "pack";
1579 struct strbuf pack_name
= STRBUF_INIT
;
1580 struct strbuf index_name
= STRBUF_INIT
;
1581 struct strbuf rev_index_name
= STRBUF_INIT
;
1586 fsync_component_or_die(FSYNC_COMPONENT_PACK
, output_fd
, curr_pack_name
);
1587 if (close(output_fd
))
1588 die_errno(_("error while closing pack file"));
1592 write_special_file("keep", keep_msg
, final_pack_name
, hash
,
1595 write_special_file("promisor", promisor_msg
, final_pack_name
,
1598 rename_tmp_packfile(&final_pack_name
, curr_pack_name
, &pack_name
,
1599 hash
, "pack", from_stdin
);
1600 if (curr_rev_index_name
)
1601 rename_tmp_packfile(&final_rev_index_name
, curr_rev_index_name
,
1602 &rev_index_name
, hash
, "rev", 1);
1603 rename_tmp_packfile(&final_index_name
, curr_index_name
, &index_name
,
1606 if (do_fsck_object
) {
1607 struct packed_git
*p
;
1608 p
= add_packed_git(final_index_name
, strlen(final_index_name
), 0);
1610 install_packed_git(the_repository
, p
);
1614 printf("%s\n", hash_to_hex(hash
));
1616 struct strbuf buf
= STRBUF_INIT
;
1618 strbuf_addf(&buf
, "%s\t%s\n", report
, hash_to_hex(hash
));
1619 write_or_die(1, buf
.buf
, buf
.len
);
1620 strbuf_release(&buf
);
1622 /* Write the last part of the buffer to stdout */
1623 write_in_full(1, input_buffer
+ input_offset
, input_len
);
1626 strbuf_release(&rev_index_name
);
1627 strbuf_release(&index_name
);
1628 strbuf_release(&pack_name
);
1631 static int git_index_pack_config(const char *k
, const char *v
,
1632 const struct config_context
*ctx
, void *cb
)
1634 struct pack_idx_option
*opts
= cb
;
1636 if (!strcmp(k
, "pack.indexversion")) {
1637 opts
->version
= git_config_int(k
, v
, ctx
->kvi
);
1638 if (opts
->version
> 2)
1639 die(_("bad pack.indexVersion=%"PRIu32
), opts
->version
);
1642 if (!strcmp(k
, "pack.threads")) {
1643 nr_threads
= git_config_int(k
, v
, ctx
->kvi
);
1645 die(_("invalid number of threads specified (%d)"),
1647 if (!HAVE_THREADS
&& nr_threads
!= 1) {
1648 warning(_("no threads support, ignoring %s"), k
);
1653 if (!strcmp(k
, "pack.writereverseindex")) {
1654 if (git_config_bool(k
, v
))
1655 opts
->flags
|= WRITE_REV
;
1657 opts
->flags
&= ~WRITE_REV
;
1659 return git_default_config(k
, v
, ctx
, cb
);
1662 static int cmp_uint32(const void *a_
, const void *b_
)
1664 uint32_t a
= *((uint32_t *)a_
);
1665 uint32_t b
= *((uint32_t *)b_
);
1667 return (a
< b
) ? -1 : (a
!= b
);
1670 static void read_v2_anomalous_offsets(struct packed_git
*p
,
1671 struct pack_idx_option
*opts
)
1673 const uint32_t *idx1
, *idx2
;
1676 /* The address of the 4-byte offset table */
1677 idx1
= (((const uint32_t *)((const uint8_t *)p
->index_data
+ p
->crc_offset
))
1678 + (size_t)p
->num_objects
/* CRC32 table */
1681 /* The address of the 8-byte offset table */
1682 idx2
= idx1
+ p
->num_objects
;
1684 for (i
= 0; i
< p
->num_objects
; i
++) {
1685 uint32_t off
= ntohl(idx1
[i
]);
1686 if (!(off
& 0x80000000))
1688 off
= off
& 0x7fffffff;
1689 check_pack_index_ptr(p
, &idx2
[off
* 2]);
1693 * The real offset is ntohl(idx2[off * 2]) in high 4
1694 * octets, and ntohl(idx2[off * 2 + 1]) in low 4
1695 * octets. But idx2[off * 2] is Zero!!!
1697 ALLOC_GROW(opts
->anomaly
, opts
->anomaly_nr
+ 1, opts
->anomaly_alloc
);
1698 opts
->anomaly
[opts
->anomaly_nr
++] = ntohl(idx2
[off
* 2 + 1]);
1701 QSORT(opts
->anomaly
, opts
->anomaly_nr
, cmp_uint32
);
1704 static void read_idx_option(struct pack_idx_option
*opts
, const char *pack_name
)
1706 struct packed_git
*p
= add_packed_git(pack_name
, strlen(pack_name
), 1);
1709 die(_("Cannot open existing pack file '%s'"), pack_name
);
1710 if (open_pack_index(p
))
1711 die(_("Cannot open existing pack idx file for '%s'"), pack_name
);
1713 /* Read the attributes from the existing idx file */
1714 opts
->version
= p
->index_version
;
1716 if (opts
->version
== 2)
1717 read_v2_anomalous_offsets(p
, opts
);
1720 * Get rid of the idx file as we do not need it anymore.
1721 * NEEDSWORK: extract this bit from free_pack_by_name() in
1722 * object-file.c, perhaps? It shouldn't matter very much as we
1723 * know we haven't installed this pack (hence we never have
1724 * read anything from it).
1726 close_pack_index(p
);
1730 static void show_pack_info(int stat_only
)
1732 int i
, baseobjects
= nr_objects
- nr_ref_deltas
- nr_ofs_deltas
;
1733 unsigned long *chain_histogram
= NULL
;
1736 CALLOC_ARRAY(chain_histogram
, deepest_delta
);
1738 for (i
= 0; i
< nr_objects
; i
++) {
1739 struct object_entry
*obj
= &objects
[i
];
1741 if (is_delta_type(obj
->type
))
1742 chain_histogram
[obj_stat
[i
].delta_depth
- 1]++;
1745 printf("%s %-6s %"PRIuMAX
" %"PRIuMAX
" %"PRIuMAX
,
1746 oid_to_hex(&obj
->idx
.oid
),
1747 type_name(obj
->real_type
), (uintmax_t)obj
->size
,
1748 (uintmax_t)(obj
[1].idx
.offset
- obj
->idx
.offset
),
1749 (uintmax_t)obj
->idx
.offset
);
1750 if (is_delta_type(obj
->type
)) {
1751 struct object_entry
*bobj
= &objects
[obj_stat
[i
].base_object_no
];
1752 printf(" %u %s", obj_stat
[i
].delta_depth
,
1753 oid_to_hex(&bobj
->idx
.oid
));
1759 printf_ln(Q_("non delta: %d object",
1760 "non delta: %d objects",
1763 for (i
= 0; i
< deepest_delta
; i
++) {
1764 if (!chain_histogram
[i
])
1766 printf_ln(Q_("chain length = %d: %lu object",
1767 "chain length = %d: %lu objects",
1768 chain_histogram
[i
]),
1770 chain_histogram
[i
]);
1772 free(chain_histogram
);
1775 static void repack_local_links(void)
1777 struct child_process cmd
= CHILD_PROCESS_INIT
;
1779 struct strbuf line
= STRBUF_INIT
;
1780 struct oidset_iter iter
;
1781 struct object_id
*oid
;
1784 if (!oidset_size(&local_links
))
1787 base_name
= mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository
));
1789 strvec_push(&cmd
.args
, "pack-objects");
1790 strvec_push(&cmd
.args
, "--exclude-promisor-objects-best-effort");
1791 strvec_push(&cmd
.args
, base_name
);
1795 if (start_command(&cmd
))
1796 die(_("could not start pack-objects to repack local links"));
1798 oidset_iter_init(&local_links
, &iter
);
1799 while ((oid
= oidset_iter_next(&iter
))) {
1800 if (write_in_full(cmd
.in
, oid_to_hex(oid
), the_hash_algo
->hexsz
) < 0 ||
1801 write_in_full(cmd
.in
, "\n", 1) < 0)
1802 die(_("failed to feed local object to pack-objects"));
1806 out
= xfdopen(cmd
.out
, "r");
1807 while (strbuf_getline_lf(&line
, out
) != EOF
) {
1808 unsigned char binary
[GIT_MAX_RAWSZ
];
1809 if (line
.len
!= the_hash_algo
->hexsz
||
1810 !hex_to_bytes(binary
, line
.buf
, line
.len
))
1811 die(_("index-pack: Expecting full hex object ID lines only from pack-objects."));
1814 * pack-objects creates the .pack and .idx files, but not the
1815 * .promisor file. Create the .promisor file, which is empty.
1817 write_special_file("promisor", "", NULL
, binary
, NULL
);
1821 if (finish_command(&cmd
))
1822 die(_("could not finish pack-objects to repack local links"));
1823 strbuf_release(&line
);
1827 int cmd_index_pack(int argc
,
1830 struct repository
*repo UNUSED
)
1832 int i
, fix_thin_pack
= 0, verify
= 0, stat_only
= 0, rev_index
;
1833 const char *curr_index
;
1834 char *curr_rev_index
= NULL
;
1835 const char *index_name
= NULL
, *pack_name
= NULL
, *rev_index_name
= NULL
;
1836 const char *keep_msg
= NULL
;
1837 const char *promisor_msg
= NULL
;
1838 struct strbuf index_name_buf
= STRBUF_INIT
;
1839 struct strbuf rev_index_name_buf
= STRBUF_INIT
;
1840 struct pack_idx_entry
**idx_objects
;
1841 struct pack_idx_option opts
;
1842 unsigned char pack_hash
[GIT_MAX_RAWSZ
];
1843 unsigned foreign_nr
= 1; /* zero is a "good" value, assume bad */
1844 int report_end_of_input
= 0;
1848 * index-pack never needs to fetch missing objects except when
1849 * REF_DELTA bases are missing (which are explicitly handled). It only
1850 * accesses the repo to do hash collision checks and to check which
1851 * REF_DELTA bases need to be fetched.
1853 fetch_if_missing
= 0;
1855 if (argc
== 2 && !strcmp(argv
[1], "-h"))
1856 usage(index_pack_usage
);
1858 disable_replace_refs();
1859 fsck_options
.walk
= mark_link
;
1861 reset_pack_idx_option(&opts
);
1862 opts
.flags
|= WRITE_REV
;
1863 git_config(git_index_pack_config
, &opts
);
1864 if (prefix
&& chdir(prefix
))
1865 die(_("Cannot come back to cwd"));
1867 if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX
, 0))
1870 rev_index
= !!(opts
.flags
& (WRITE_REV_VERIFY
| WRITE_REV
));
1872 for (i
= 1; i
< argc
; i
++) {
1873 const char *arg
= argv
[i
];
1876 if (!strcmp(arg
, "--stdin")) {
1878 } else if (!strcmp(arg
, "--fix-thin")) {
1880 } else if (skip_to_optional_arg(arg
, "--strict", &arg
)) {
1883 fsck_set_msg_types(&fsck_options
, arg
);
1884 } else if (!strcmp(arg
, "--check-self-contained-and-connected")) {
1886 check_self_contained_and_connected
= 1;
1887 } else if (skip_to_optional_arg(arg
, "--fsck-objects", &arg
)) {
1889 fsck_set_msg_types(&fsck_options
, arg
);
1890 } else if (!strcmp(arg
, "--verify")) {
1892 } else if (!strcmp(arg
, "--verify-stat")) {
1895 } else if (!strcmp(arg
, "--verify-stat-only")) {
1899 } else if (skip_to_optional_arg(arg
, "--keep", &keep_msg
)) {
1900 ; /* nothing to do */
1901 } else if (skip_to_optional_arg(arg
, "--promisor", &promisor_msg
)) {
1902 record_local_links
= 1;
1903 } else if (starts_with(arg
, "--threads=")) {
1905 nr_threads
= strtoul(arg
+10, &end
, 0);
1906 if (!arg
[10] || *end
|| nr_threads
< 0)
1907 usage(index_pack_usage
);
1908 if (!HAVE_THREADS
&& nr_threads
!= 1) {
1909 warning(_("no threads support, ignoring %s"), arg
);
1912 } else if (starts_with(arg
, "--pack_header=")) {
1913 struct pack_header
*hdr
;
1916 hdr
= (struct pack_header
*)input_buffer
;
1917 hdr
->hdr_signature
= htonl(PACK_SIGNATURE
);
1918 hdr
->hdr_version
= htonl(strtoul(arg
+ 14, &c
, 10));
1920 die(_("bad %s"), arg
);
1921 hdr
->hdr_entries
= htonl(strtoul(c
+ 1, &c
, 10));
1923 die(_("bad %s"), arg
);
1924 input_len
= sizeof(*hdr
);
1925 } else if (!strcmp(arg
, "-v")) {
1927 } else if (!strcmp(arg
, "--progress-title")) {
1928 if (progress_title
|| (i
+1) >= argc
)
1929 usage(index_pack_usage
);
1930 progress_title
= argv
[++i
];
1931 } else if (!strcmp(arg
, "--show-resolving-progress")) {
1932 show_resolving_progress
= 1;
1933 } else if (!strcmp(arg
, "--report-end-of-input")) {
1934 report_end_of_input
= 1;
1935 } else if (!strcmp(arg
, "-o")) {
1936 if (index_name
|| (i
+1) >= argc
)
1937 usage(index_pack_usage
);
1938 index_name
= argv
[++i
];
1939 } else if (starts_with(arg
, "--index-version=")) {
1941 opts
.version
= strtoul(arg
+ 16, &c
, 10);
1942 if (opts
.version
> 2)
1943 die(_("bad %s"), arg
);
1945 opts
.off32_limit
= strtoul(c
+1, &c
, 0);
1946 if (*c
|| opts
.off32_limit
& 0x80000000)
1947 die(_("bad %s"), arg
);
1948 } else if (skip_prefix(arg
, "--max-input-size=", &arg
)) {
1949 max_input_size
= strtoumax(arg
, NULL
, 10);
1950 } else if (skip_prefix(arg
, "--object-format=", &arg
)) {
1951 hash_algo
= hash_algo_by_name(arg
);
1952 if (hash_algo
== GIT_HASH_UNKNOWN
)
1953 die(_("unknown hash algorithm '%s'"), arg
);
1954 repo_set_hash_algo(the_repository
, hash_algo
);
1955 } else if (!strcmp(arg
, "--rev-index")) {
1957 } else if (!strcmp(arg
, "--no-rev-index")) {
1960 usage(index_pack_usage
);
1965 usage(index_pack_usage
);
1969 if (!pack_name
&& !from_stdin
)
1970 usage(index_pack_usage
);
1971 if (fix_thin_pack
&& !from_stdin
)
1972 die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin");
1973 if (from_stdin
&& !startup_info
->have_repository
)
1974 die(_("--stdin requires a git repository"));
1975 if (from_stdin
&& hash_algo
)
1976 die(_("options '%s' and '%s' cannot be used together"), "--object-format", "--stdin");
1977 if (!index_name
&& pack_name
)
1978 index_name
= derive_filename(pack_name
, "pack", "idx", &index_name_buf
);
1981 * Packfiles and indices do not carry enough information to be able to
1982 * identify their object hash. So when we are neither in a repository
1983 * nor has the user told us which object hash to use we have no other
1984 * choice but to guess the object hash.
1986 if (!the_repository
->hash_algo
)
1987 repo_set_hash_algo(the_repository
, GIT_HASH_SHA1
);
1989 opts
.flags
&= ~(WRITE_REV
| WRITE_REV_VERIFY
);
1991 opts
.flags
|= verify
? WRITE_REV_VERIFY
: WRITE_REV
;
1993 rev_index_name
= derive_filename(index_name
,
1995 &rev_index_name_buf
);
2000 die(_("--verify with no packfile name given"));
2001 read_idx_option(&opts
, index_name
);
2002 opts
.flags
|= WRITE_IDX_VERIFY
| WRITE_IDX_STRICT
;
2005 opts
.flags
|= WRITE_IDX_STRICT
;
2007 if (HAVE_THREADS
&& !nr_threads
) {
2008 nr_threads
= online_cpus();
2010 * Experiments show that going above 20 threads doesn't help,
2011 * no matter how many cores you have. Below that, we tend to
2012 * max at half the number of online_cpus(), presumably because
2013 * half of those are hyperthreads rather than full cores. We'll
2014 * never reduce the level below "3", though, to match a
2015 * historical value that nobody complained about.
2018 ; /* too few cores to consider capping */
2019 else if (nr_threads
< 6)
2020 nr_threads
= 3; /* historic cap */
2021 else if (nr_threads
< 40)
2024 nr_threads
= 20; /* hard cap */
2027 curr_pack
= open_pack_file(pack_name
);
2028 parse_pack_header();
2029 CALLOC_ARRAY(objects
, st_add(nr_objects
, 1));
2031 CALLOC_ARRAY(obj_stat
, st_add(nr_objects
, 1));
2032 CALLOC_ARRAY(ofs_deltas
, nr_objects
);
2033 parse_pack_objects(pack_hash
);
2034 if (report_end_of_input
)
2035 write_in_full(2, "\0", 1);
2037 conclude_pack(fix_thin_pack
, curr_pack
, pack_hash
);
2041 foreign_nr
= check_objects();
2044 show_pack_info(stat_only
);
2046 ALLOC_ARRAY(idx_objects
, nr_objects
);
2047 for (i
= 0; i
< nr_objects
; i
++)
2048 idx_objects
[i
] = &objects
[i
].idx
;
2049 curr_index
= write_idx_file(index_name
, idx_objects
, nr_objects
, &opts
, pack_hash
);
2051 curr_rev_index
= write_rev_file(rev_index_name
, idx_objects
,
2052 nr_objects
, pack_hash
,
2057 final(pack_name
, curr_pack
,
2058 index_name
, curr_index
,
2059 rev_index_name
, curr_rev_index
,
2060 keep_msg
, promisor_msg
,
2065 if (do_fsck_object
&& fsck_finish(&fsck_options
))
2066 die(_("fsck error in pack objects"));
2070 strbuf_release(&index_name_buf
);
2071 strbuf_release(&rev_index_name_buf
);
2073 free((void *) curr_pack
);
2075 free((void *) curr_index
);
2076 free(curr_rev_index
);
2078 repack_local_links();
2081 * Let the caller know this pack is not self contained
2083 if (check_self_contained_and_connected
&& foreign_nr
)