git-p4: improve import performance by avoiding reallocations.
[git/git-bigfiles.git] / builtin-fetch-pack.c
blobc2e5adc8847e8ffa94bd009121b7a295f3f0512e
1 #include "cache.h"
2 #include "refs.h"
3 #include "pkt-line.h"
4 #include "commit.h"
5 #include "tag.h"
6 #include "exec_cmd.h"
7 #include "pack.h"
8 #include "sideband.h"
9 #include "fetch-pack.h"
10 #include "remote.h"
11 #include "run-command.h"
13 static int transfer_unpack_limit = -1;
14 static int fetch_unpack_limit = -1;
15 static int unpack_limit = 100;
16 static struct fetch_pack_args args = {
17 /* .uploadpack = */ "git-upload-pack",
20 static const char fetch_pack_usage[] =
21 "git fetch-pack [--all] [--quiet|-q] [--keep|-k] [--thin] [--include-tag] [--upload-pack=<git-upload-pack>] [--depth=<n>] [--no-progress] [-v] [<host>:]<directory> [<refs>...]";
23 #define COMPLETE (1U << 0)
24 #define COMMON (1U << 1)
25 #define COMMON_REF (1U << 2)
26 #define SEEN (1U << 3)
27 #define POPPED (1U << 4)
29 static int marked;
32 * After sending this many "have"s if we do not get any new ACK , we
33 * give up traversing our history.
35 #define MAX_IN_VAIN 256
37 static struct commit_list *rev_list;
38 static int non_common_revs, multi_ack, use_sideband;
40 static void rev_list_push(struct commit *commit, int mark)
42 if (!(commit->object.flags & mark)) {
43 commit->object.flags |= mark;
45 if (!(commit->object.parsed))
46 if (parse_commit(commit))
47 return;
49 insert_by_date(commit, &rev_list);
51 if (!(commit->object.flags & COMMON))
52 non_common_revs++;
56 static int rev_list_insert_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data)
58 struct object *o = deref_tag(parse_object(sha1), path, 0);
60 if (o && o->type == OBJ_COMMIT)
61 rev_list_push((struct commit *)o, SEEN);
63 return 0;
66 static int clear_marks(const char *path, const unsigned char *sha1, int flag, void *cb_data)
68 struct object *o = deref_tag(parse_object(sha1), path, 0);
70 if (o && o->type == OBJ_COMMIT)
71 clear_commit_marks((struct commit *)o,
72 COMMON | COMMON_REF | SEEN | POPPED);
73 return 0;
77 This function marks a rev and its ancestors as common.
78 In some cases, it is desirable to mark only the ancestors (for example
79 when only the server does not yet know that they are common).
82 static void mark_common(struct commit *commit,
83 int ancestors_only, int dont_parse)
85 if (commit != NULL && !(commit->object.flags & COMMON)) {
86 struct object *o = (struct object *)commit;
88 if (!ancestors_only)
89 o->flags |= COMMON;
91 if (!(o->flags & SEEN))
92 rev_list_push(commit, SEEN);
93 else {
94 struct commit_list *parents;
96 if (!ancestors_only && !(o->flags & POPPED))
97 non_common_revs--;
98 if (!o->parsed && !dont_parse)
99 if (parse_commit(commit))
100 return;
102 for (parents = commit->parents;
103 parents;
104 parents = parents->next)
105 mark_common(parents->item, 0, dont_parse);
111 Get the next rev to send, ignoring the common.
114 static const unsigned char* get_rev(void)
116 struct commit *commit = NULL;
118 while (commit == NULL) {
119 unsigned int mark;
120 struct commit_list *parents;
122 if (rev_list == NULL || non_common_revs == 0)
123 return NULL;
125 commit = rev_list->item;
126 if (!commit->object.parsed)
127 parse_commit(commit);
128 parents = commit->parents;
130 commit->object.flags |= POPPED;
131 if (!(commit->object.flags & COMMON))
132 non_common_revs--;
134 if (commit->object.flags & COMMON) {
135 /* do not send "have", and ignore ancestors */
136 commit = NULL;
137 mark = COMMON | SEEN;
138 } else if (commit->object.flags & COMMON_REF)
139 /* send "have", and ignore ancestors */
140 mark = COMMON | SEEN;
141 else
142 /* send "have", also for its ancestors */
143 mark = SEEN;
145 while (parents) {
146 if (!(parents->item->object.flags & SEEN))
147 rev_list_push(parents->item, mark);
148 if (mark & COMMON)
149 mark_common(parents->item, 1, 0);
150 parents = parents->next;
153 rev_list = rev_list->next;
156 return commit->object.sha1;
159 static int find_common(int fd[2], unsigned char *result_sha1,
160 struct ref *refs)
162 int fetching;
163 int count = 0, flushes = 0, retval;
164 const unsigned char *sha1;
165 unsigned in_vain = 0;
166 int got_continue = 0;
168 if (marked)
169 for_each_ref(clear_marks, NULL);
170 marked = 1;
172 for_each_ref(rev_list_insert_ref, NULL);
174 fetching = 0;
175 for ( ; refs ; refs = refs->next) {
176 unsigned char *remote = refs->old_sha1;
177 struct object *o;
180 * If that object is complete (i.e. it is an ancestor of a
181 * local ref), we tell them we have it but do not have to
182 * tell them about its ancestors, which they already know
183 * about.
185 * We use lookup_object here because we are only
186 * interested in the case we *know* the object is
187 * reachable and we have already scanned it.
189 if (((o = lookup_object(remote)) != NULL) &&
190 (o->flags & COMPLETE)) {
191 continue;
194 if (!fetching)
195 packet_write(fd[1], "want %s%s%s%s%s%s%s%s\n",
196 sha1_to_hex(remote),
197 (multi_ack ? " multi_ack" : ""),
198 (use_sideband == 2 ? " side-band-64k" : ""),
199 (use_sideband == 1 ? " side-band" : ""),
200 (args.use_thin_pack ? " thin-pack" : ""),
201 (args.no_progress ? " no-progress" : ""),
202 (args.include_tag ? " include-tag" : ""),
203 " ofs-delta");
204 else
205 packet_write(fd[1], "want %s\n", sha1_to_hex(remote));
206 fetching++;
208 if (is_repository_shallow())
209 write_shallow_commits(fd[1], 1);
210 if (args.depth > 0)
211 packet_write(fd[1], "deepen %d", args.depth);
212 packet_flush(fd[1]);
213 if (!fetching)
214 return 1;
216 if (args.depth > 0) {
217 char line[1024];
218 unsigned char sha1[20];
220 while (packet_read_line(fd[0], line, sizeof(line))) {
221 if (!prefixcmp(line, "shallow ")) {
222 if (get_sha1_hex(line + 8, sha1))
223 die("invalid shallow line: %s", line);
224 register_shallow(sha1);
225 continue;
227 if (!prefixcmp(line, "unshallow ")) {
228 if (get_sha1_hex(line + 10, sha1))
229 die("invalid unshallow line: %s", line);
230 if (!lookup_object(sha1))
231 die("object not found: %s", line);
232 /* make sure that it is parsed as shallow */
233 if (!parse_object(sha1))
234 die("error in object: %s", line);
235 if (unregister_shallow(sha1))
236 die("no shallow found: %s", line);
237 continue;
239 die("expected shallow/unshallow, got %s", line);
243 flushes = 0;
244 retval = -1;
245 while ((sha1 = get_rev())) {
246 packet_write(fd[1], "have %s\n", sha1_to_hex(sha1));
247 if (args.verbose)
248 fprintf(stderr, "have %s\n", sha1_to_hex(sha1));
249 in_vain++;
250 if (!(31 & ++count)) {
251 int ack;
253 packet_flush(fd[1]);
254 flushes++;
257 * We keep one window "ahead" of the other side, and
258 * will wait for an ACK only on the next one
260 if (count == 32)
261 continue;
263 do {
264 ack = get_ack(fd[0], result_sha1);
265 if (args.verbose && ack)
266 fprintf(stderr, "got ack %d %s\n", ack,
267 sha1_to_hex(result_sha1));
268 if (ack == 1) {
269 flushes = 0;
270 multi_ack = 0;
271 retval = 0;
272 goto done;
273 } else if (ack == 2) {
274 struct commit *commit =
275 lookup_commit(result_sha1);
276 mark_common(commit, 0, 1);
277 retval = 0;
278 in_vain = 0;
279 got_continue = 1;
281 } while (ack);
282 flushes--;
283 if (got_continue && MAX_IN_VAIN < in_vain) {
284 if (args.verbose)
285 fprintf(stderr, "giving up\n");
286 break; /* give up */
290 done:
291 packet_write(fd[1], "done\n");
292 if (args.verbose)
293 fprintf(stderr, "done\n");
294 if (retval != 0) {
295 multi_ack = 0;
296 flushes++;
298 while (flushes || multi_ack) {
299 int ack = get_ack(fd[0], result_sha1);
300 if (ack) {
301 if (args.verbose)
302 fprintf(stderr, "got ack (%d) %s\n", ack,
303 sha1_to_hex(result_sha1));
304 if (ack == 1)
305 return 0;
306 multi_ack = 1;
307 continue;
309 flushes--;
311 /* it is no error to fetch into a completely empty repo */
312 return count ? retval : 0;
315 static struct commit_list *complete;
317 static int mark_complete(const char *path, const unsigned char *sha1, int flag, void *cb_data)
319 struct object *o = parse_object(sha1);
321 while (o && o->type == OBJ_TAG) {
322 struct tag *t = (struct tag *) o;
323 if (!t->tagged)
324 break; /* broken repository */
325 o->flags |= COMPLETE;
326 o = parse_object(t->tagged->sha1);
328 if (o && o->type == OBJ_COMMIT) {
329 struct commit *commit = (struct commit *)o;
330 commit->object.flags |= COMPLETE;
331 insert_by_date(commit, &complete);
333 return 0;
336 static void mark_recent_complete_commits(unsigned long cutoff)
338 while (complete && cutoff <= complete->item->date) {
339 if (args.verbose)
340 fprintf(stderr, "Marking %s as complete\n",
341 sha1_to_hex(complete->item->object.sha1));
342 pop_most_recent_commit(&complete, COMPLETE);
346 static void filter_refs(struct ref **refs, int nr_match, char **match)
348 struct ref **return_refs;
349 struct ref *newlist = NULL;
350 struct ref **newtail = &newlist;
351 struct ref *ref, *next;
352 struct ref *fastarray[32];
354 if (nr_match && !args.fetch_all) {
355 if (ARRAY_SIZE(fastarray) < nr_match)
356 return_refs = xcalloc(nr_match, sizeof(struct ref *));
357 else {
358 return_refs = fastarray;
359 memset(return_refs, 0, sizeof(struct ref *) * nr_match);
362 else
363 return_refs = NULL;
365 for (ref = *refs; ref; ref = next) {
366 next = ref->next;
367 if (!memcmp(ref->name, "refs/", 5) &&
368 check_ref_format(ref->name + 5))
369 ; /* trash */
370 else if (args.fetch_all &&
371 (!args.depth || prefixcmp(ref->name, "refs/tags/") )) {
372 *newtail = ref;
373 ref->next = NULL;
374 newtail = &ref->next;
375 continue;
377 else {
378 int order = path_match(ref->name, nr_match, match);
379 if (order) {
380 return_refs[order-1] = ref;
381 continue; /* we will link it later */
384 free(ref);
387 if (!args.fetch_all) {
388 int i;
389 for (i = 0; i < nr_match; i++) {
390 ref = return_refs[i];
391 if (ref) {
392 *newtail = ref;
393 ref->next = NULL;
394 newtail = &ref->next;
397 if (return_refs != fastarray)
398 free(return_refs);
400 *refs = newlist;
403 static int everything_local(struct ref **refs, int nr_match, char **match)
405 struct ref *ref;
406 int retval;
407 unsigned long cutoff = 0;
409 save_commit_buffer = 0;
411 for (ref = *refs; ref; ref = ref->next) {
412 struct object *o;
414 o = parse_object(ref->old_sha1);
415 if (!o)
416 continue;
418 /* We already have it -- which may mean that we were
419 * in sync with the other side at some time after
420 * that (it is OK if we guess wrong here).
422 if (o->type == OBJ_COMMIT) {
423 struct commit *commit = (struct commit *)o;
424 if (!cutoff || cutoff < commit->date)
425 cutoff = commit->date;
429 if (!args.depth) {
430 for_each_ref(mark_complete, NULL);
431 if (cutoff)
432 mark_recent_complete_commits(cutoff);
436 * Mark all complete remote refs as common refs.
437 * Don't mark them common yet; the server has to be told so first.
439 for (ref = *refs; ref; ref = ref->next) {
440 struct object *o = deref_tag(lookup_object(ref->old_sha1),
441 NULL, 0);
443 if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
444 continue;
446 if (!(o->flags & SEEN)) {
447 rev_list_push((struct commit *)o, COMMON_REF | SEEN);
449 mark_common((struct commit *)o, 1, 1);
453 filter_refs(refs, nr_match, match);
455 for (retval = 1, ref = *refs; ref ; ref = ref->next) {
456 const unsigned char *remote = ref->old_sha1;
457 unsigned char local[20];
458 struct object *o;
460 o = lookup_object(remote);
461 if (!o || !(o->flags & COMPLETE)) {
462 retval = 0;
463 if (!args.verbose)
464 continue;
465 fprintf(stderr,
466 "want %s (%s)\n", sha1_to_hex(remote),
467 ref->name);
468 continue;
471 hashcpy(ref->new_sha1, local);
472 if (!args.verbose)
473 continue;
474 fprintf(stderr,
475 "already have %s (%s)\n", sha1_to_hex(remote),
476 ref->name);
478 return retval;
481 static int sideband_demux(int fd, void *data)
483 int *xd = data;
485 return recv_sideband("fetch-pack", xd[0], fd, 2);
488 static int get_pack(int xd[2], char **pack_lockfile)
490 struct async demux;
491 const char *argv[20];
492 char keep_arg[256];
493 char hdr_arg[256];
494 const char **av;
495 int do_keep = args.keep_pack;
496 struct child_process cmd;
498 memset(&demux, 0, sizeof(demux));
499 if (use_sideband) {
500 /* xd[] is talking with upload-pack; subprocess reads from
501 * xd[0], spits out band#2 to stderr, and feeds us band#1
502 * through demux->out.
504 demux.proc = sideband_demux;
505 demux.data = xd;
506 if (start_async(&demux))
507 die("fetch-pack: unable to fork off sideband"
508 " demultiplexer");
510 else
511 demux.out = xd[0];
513 memset(&cmd, 0, sizeof(cmd));
514 cmd.argv = argv;
515 av = argv;
516 *hdr_arg = 0;
517 if (!args.keep_pack && unpack_limit) {
518 struct pack_header header;
520 if (read_pack_header(demux.out, &header))
521 die("protocol error: bad pack header");
522 snprintf(hdr_arg, sizeof(hdr_arg),
523 "--pack_header=%"PRIu32",%"PRIu32,
524 ntohl(header.hdr_version), ntohl(header.hdr_entries));
525 if (ntohl(header.hdr_entries) < unpack_limit)
526 do_keep = 0;
527 else
528 do_keep = 1;
531 if (do_keep) {
532 if (pack_lockfile)
533 cmd.out = -1;
534 *av++ = "index-pack";
535 *av++ = "--stdin";
536 if (!args.quiet && !args.no_progress)
537 *av++ = "-v";
538 if (args.use_thin_pack)
539 *av++ = "--fix-thin";
540 if (args.lock_pack || unpack_limit) {
541 int s = sprintf(keep_arg,
542 "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
543 if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
544 strcpy(keep_arg + s, "localhost");
545 *av++ = keep_arg;
548 else {
549 *av++ = "unpack-objects";
550 if (args.quiet)
551 *av++ = "-q";
553 if (*hdr_arg)
554 *av++ = hdr_arg;
555 *av++ = NULL;
557 cmd.in = demux.out;
558 cmd.git_cmd = 1;
559 if (start_command(&cmd))
560 die("fetch-pack: unable to fork off %s", argv[0]);
561 if (do_keep && pack_lockfile) {
562 *pack_lockfile = index_pack_lockfile(cmd.out);
563 close(cmd.out);
566 if (finish_command(&cmd))
567 die("%s failed", argv[0]);
568 if (use_sideband && finish_async(&demux))
569 die("error in sideband demultiplexer");
570 return 0;
573 static struct ref *do_fetch_pack(int fd[2],
574 const struct ref *orig_ref,
575 int nr_match,
576 char **match,
577 char **pack_lockfile)
579 struct ref *ref = copy_ref_list(orig_ref);
580 unsigned char sha1[20];
582 if (is_repository_shallow() && !server_supports("shallow"))
583 die("Server does not support shallow clients");
584 if (server_supports("multi_ack")) {
585 if (args.verbose)
586 fprintf(stderr, "Server supports multi_ack\n");
587 multi_ack = 1;
589 if (server_supports("side-band-64k")) {
590 if (args.verbose)
591 fprintf(stderr, "Server supports side-band-64k\n");
592 use_sideband = 2;
594 else if (server_supports("side-band")) {
595 if (args.verbose)
596 fprintf(stderr, "Server supports side-band\n");
597 use_sideband = 1;
599 if (everything_local(&ref, nr_match, match)) {
600 packet_flush(fd[1]);
601 goto all_done;
603 if (find_common(fd, sha1, ref) < 0)
604 if (!args.keep_pack)
605 /* When cloning, it is not unusual to have
606 * no common commit.
608 fprintf(stderr, "warning: no common commits\n");
610 if (get_pack(fd, pack_lockfile))
611 die("git fetch-pack: fetch failed.");
613 all_done:
614 return ref;
617 static int remove_duplicates(int nr_heads, char **heads)
619 int src, dst;
621 for (src = dst = 0; src < nr_heads; src++) {
622 /* If heads[src] is different from any of
623 * heads[0..dst], push it in.
625 int i;
626 for (i = 0; i < dst; i++) {
627 if (!strcmp(heads[i], heads[src]))
628 break;
630 if (i < dst)
631 continue;
632 if (src != dst)
633 heads[dst] = heads[src];
634 dst++;
636 return dst;
639 static int fetch_pack_config(const char *var, const char *value, void *cb)
641 if (strcmp(var, "fetch.unpacklimit") == 0) {
642 fetch_unpack_limit = git_config_int(var, value);
643 return 0;
646 if (strcmp(var, "transfer.unpacklimit") == 0) {
647 transfer_unpack_limit = git_config_int(var, value);
648 return 0;
651 return git_default_config(var, value, cb);
654 static struct lock_file lock;
656 static void fetch_pack_setup(void)
658 static int did_setup;
659 if (did_setup)
660 return;
661 git_config(fetch_pack_config, NULL);
662 if (0 <= transfer_unpack_limit)
663 unpack_limit = transfer_unpack_limit;
664 else if (0 <= fetch_unpack_limit)
665 unpack_limit = fetch_unpack_limit;
666 did_setup = 1;
669 int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
671 int i, ret, nr_heads;
672 struct ref *ref = NULL;
673 char *dest = NULL, **heads;
674 int fd[2];
675 struct child_process *conn;
677 nr_heads = 0;
678 heads = NULL;
679 for (i = 1; i < argc; i++) {
680 const char *arg = argv[i];
682 if (*arg == '-') {
683 if (!prefixcmp(arg, "--upload-pack=")) {
684 args.uploadpack = arg + 14;
685 continue;
687 if (!prefixcmp(arg, "--exec=")) {
688 args.uploadpack = arg + 7;
689 continue;
691 if (!strcmp("--quiet", arg) || !strcmp("-q", arg)) {
692 args.quiet = 1;
693 continue;
695 if (!strcmp("--keep", arg) || !strcmp("-k", arg)) {
696 args.lock_pack = args.keep_pack;
697 args.keep_pack = 1;
698 continue;
700 if (!strcmp("--thin", arg)) {
701 args.use_thin_pack = 1;
702 continue;
704 if (!strcmp("--include-tag", arg)) {
705 args.include_tag = 1;
706 continue;
708 if (!strcmp("--all", arg)) {
709 args.fetch_all = 1;
710 continue;
712 if (!strcmp("-v", arg)) {
713 args.verbose = 1;
714 continue;
716 if (!prefixcmp(arg, "--depth=")) {
717 args.depth = strtol(arg + 8, NULL, 0);
718 continue;
720 if (!strcmp("--no-progress", arg)) {
721 args.no_progress = 1;
722 continue;
724 usage(fetch_pack_usage);
726 dest = (char *)arg;
727 heads = (char **)(argv + i + 1);
728 nr_heads = argc - i - 1;
729 break;
731 if (!dest)
732 usage(fetch_pack_usage);
734 conn = git_connect(fd, (char *)dest, args.uploadpack,
735 args.verbose ? CONNECT_VERBOSE : 0);
736 if (conn) {
737 get_remote_heads(fd[0], &ref, 0, NULL, 0, NULL);
739 ref = fetch_pack(&args, fd, conn, ref, dest, nr_heads, heads, NULL);
740 close(fd[0]);
741 close(fd[1]);
742 if (finish_connect(conn))
743 ref = NULL;
744 } else {
745 ref = NULL;
747 ret = !ref;
749 if (!ret && nr_heads) {
750 /* If the heads to pull were given, we should have
751 * consumed all of them by matching the remote.
752 * Otherwise, 'git fetch remote no-such-ref' would
753 * silently succeed without issuing an error.
755 for (i = 0; i < nr_heads; i++)
756 if (heads[i] && heads[i][0]) {
757 error("no such remote ref %s", heads[i]);
758 ret = 1;
761 while (ref) {
762 printf("%s %s\n",
763 sha1_to_hex(ref->old_sha1), ref->name);
764 ref = ref->next;
767 return ret;
770 struct ref *fetch_pack(struct fetch_pack_args *my_args,
771 int fd[], struct child_process *conn,
772 const struct ref *ref,
773 const char *dest,
774 int nr_heads,
775 char **heads,
776 char **pack_lockfile)
778 struct stat st;
779 struct ref *ref_cpy;
781 fetch_pack_setup();
782 if (&args != my_args)
783 memcpy(&args, my_args, sizeof(args));
784 if (args.depth > 0) {
785 if (stat(git_path("shallow"), &st))
786 st.st_mtime = 0;
789 if (heads && nr_heads)
790 nr_heads = remove_duplicates(nr_heads, heads);
791 if (!ref) {
792 packet_flush(fd[1]);
793 die("no matching remote head");
795 ref_cpy = do_fetch_pack(fd, ref, nr_heads, heads, pack_lockfile);
797 if (args.depth > 0) {
798 struct cache_time mtime;
799 char *shallow = git_path("shallow");
800 int fd;
802 mtime.sec = st.st_mtime;
803 #ifdef USE_NSEC
804 mtime.usec = st.st_mtim.usec;
805 #endif
806 if (stat(shallow, &st)) {
807 if (mtime.sec)
808 die("shallow file was removed during fetch");
809 } else if (st.st_mtime != mtime.sec
810 #ifdef USE_NSEC
811 || st.st_mtim.usec != mtime.usec
812 #endif
814 die("shallow file was changed during fetch");
816 fd = hold_lock_file_for_update(&lock, shallow,
817 LOCK_DIE_ON_ERROR);
818 if (!write_shallow_commits(fd, 0)) {
819 unlink(shallow);
820 rollback_lock_file(&lock);
821 } else {
822 commit_lock_file(&lock);
826 reprepare_packed_git();
827 return ref_cpy;