2 * Copyright (c) 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/types.h>
20 #include <sys/queue.h>
23 #include <sys/socket.h>
36 #include "got_error.h"
37 #include "got_cancel.h"
38 #include "got_object.h"
39 #include "got_reference.h"
40 #include "got_repository.h"
41 #include "got_repository_admin.h"
42 #include "got_opentemp.h"
45 #include "got_lib_delta.h"
46 #include "got_lib_hash.h"
47 #include "got_lib_object.h"
48 #include "got_lib_object_idset.h"
49 #include "got_lib_object_cache.h"
50 #include "got_lib_pack.h"
51 #include "got_lib_privsep.h"
52 #include "got_lib_repository.h"
53 #include "got_lib_ratelimit.h"
54 #include "got_lib_pack_create.h"
55 #include "got_lib_lockfile.h"
58 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
61 static const struct got_error
*
62 get_reflist_object_ids(struct got_object_id
***ids
, int *nobjects
,
63 unsigned int wanted_obj_type_mask
, struct got_reflist_head
*refs
,
64 struct got_repository
*repo
,
65 got_cancel_cb cancel_cb
, void *cancel_arg
)
67 const struct got_error
*err
= NULL
;
68 const size_t alloc_chunksz
= 256;
70 struct got_reflist_entry
*re
;
76 err
= got_reflist_sort(refs
,
77 got_ref_cmp_by_commit_timestamp_descending
, repo
);
81 *ids
= reallocarray(NULL
, alloc_chunksz
, sizeof(struct got_object_id
*));
83 return got_error_from_errno("reallocarray");
84 nalloc
= alloc_chunksz
;
86 TAILQ_FOREACH(re
, refs
, entry
) {
87 struct got_object_id
*id
;
90 err
= cancel_cb(cancel_arg
);
95 err
= got_ref_resolve(&id
, repo
, re
->ref
);
99 if (wanted_obj_type_mask
!= GOT_OBJ_TYPE_ANY
) {
101 err
= got_object_get_type(&obj_type
, repo
, id
);
104 if ((wanted_obj_type_mask
& (1 << obj_type
)) == 0) {
111 if (nalloc
<= *nobjects
) {
112 struct got_object_id
**new;
113 new = recallocarray(*ids
, nalloc
,
114 nalloc
+ alloc_chunksz
,
115 sizeof(struct got_object_id
*));
117 err
= got_error_from_errno(
122 nalloc
+= alloc_chunksz
;
124 (*ids
)[*nobjects
] = id
;
125 if ((*ids
)[*nobjects
] == NULL
) {
126 err
= got_error_from_errno("got_object_id_dup");
133 for (i
= 0; i
< *nobjects
; i
++)
142 static const struct got_error
*
143 create_temp_packfile(int *packfd
, char **tmpfile_path
,
144 struct got_repository
*repo
)
146 const struct got_error
*err
= NULL
;
151 if (asprintf(&path
, "%s/%s/packing.pack",
152 got_repo_get_path_git_dir(repo
), GOT_OBJECTS_PACK_DIR
) == -1)
153 return got_error_from_errno("asprintf");
155 err
= got_opentemp_named_fd(tmpfile_path
, packfd
, path
, "");
159 if (fchmod(*packfd
, GOT_DEFAULT_PACK_MODE
) == -1)
160 err
= got_error_from_errno2("fchmod", *tmpfile_path
);
168 *tmpfile_path
= NULL
;
173 static const struct got_error
*
174 install_packfile(FILE **packfile
, int *packfd
, char **packfile_path
,
175 char **tmpfile_path
, struct got_object_id
*pack_hash
,
176 struct got_repository
*repo
)
178 const struct got_error
*err
;
181 err
= got_object_id_str(&hash_str
, pack_hash
);
185 if (asprintf(packfile_path
, "%s/%s/pack-%s.pack",
186 got_repo_get_path_git_dir(repo
), GOT_OBJECTS_PACK_DIR
,
188 err
= got_error_from_errno("asprintf");
192 if (lseek(*packfd
, 0L, SEEK_SET
) == -1) {
193 err
= got_error_from_errno("lseek");
197 if (rename(*tmpfile_path
, *packfile_path
) == -1) {
198 err
= got_error_from_errno3("rename", *tmpfile_path
,
204 *tmpfile_path
= NULL
;
206 *packfile
= fdopen(*packfd
, "w");
207 if (*packfile
== NULL
) {
208 err
= got_error_from_errno2("fdopen", *packfile_path
);
217 const struct got_error
*
218 got_repo_pack_objects(FILE **packfile
, struct got_object_id
**pack_hash
,
219 struct got_reflist_head
*include_refs
,
220 struct got_reflist_head
*exclude_refs
, struct got_repository
*repo
,
221 int loose_obj_only
, int force_refdelta
,
222 got_pack_progress_cb progress_cb
, void *progress_arg
,
223 got_cancel_cb cancel_cb
, void *cancel_arg
)
225 const struct got_error
*err
= NULL
;
226 struct got_object_id
**ours
= NULL
, **theirs
= NULL
;
227 int nours
= 0, ntheirs
= 0, packfd
= -1, i
;
228 char *tmpfile_path
= NULL
, *packfile_path
= NULL
;
229 FILE *delta_cache
= NULL
;
230 struct got_ratelimit rl
;
235 got_ratelimit_init(&rl
, 0, 500);
237 err
= create_temp_packfile(&packfd
, &tmpfile_path
, repo
);
241 delta_cache
= got_opentemp();
242 if (delta_cache
== NULL
) {
243 err
= got_error_from_errno("got_opentemp");
247 err
= get_reflist_object_ids(&ours
, &nours
,
248 (1 << GOT_OBJ_TYPE_COMMIT
) | (1 << GOT_OBJ_TYPE_TAG
),
249 include_refs
, repo
, cancel_cb
, cancel_arg
);
254 err
= got_error(GOT_ERR_CANNOT_PACK
);
258 if (!TAILQ_EMPTY(exclude_refs
)) {
259 err
= get_reflist_object_ids(&theirs
, &ntheirs
,
260 (1 << GOT_OBJ_TYPE_COMMIT
) | (1 << GOT_OBJ_TYPE_TAG
),
262 cancel_cb
, cancel_arg
);
267 *pack_hash
= calloc(1, sizeof(**pack_hash
));
268 if (*pack_hash
== NULL
) {
269 err
= got_error_from_errno("calloc");
272 err
= got_pack_create(*pack_hash
, packfd
, delta_cache
,
273 theirs
, ntheirs
, ours
, nours
, repo
, loose_obj_only
,
274 0, force_refdelta
, progress_cb
, progress_arg
, &rl
,
275 cancel_cb
, cancel_arg
);
279 err
= install_packfile(packfile
, &packfd
, &packfile_path
,
280 &tmpfile_path
, *pack_hash
, repo
);
282 for (i
= 0; i
< nours
; i
++)
285 for (i
= 0; i
< ntheirs
; i
++)
288 if (packfd
!= -1 && close(packfd
) == -1 && err
== NULL
)
289 err
= got_error_from_errno2("close",
290 packfile_path
? packfile_path
: tmpfile_path
);
291 if (delta_cache
&& fclose(delta_cache
) == EOF
&& err
== NULL
)
292 err
= got_error_from_errno("fclose");
293 if (tmpfile_path
&& unlink(tmpfile_path
) == -1 && err
== NULL
)
294 err
= got_error_from_errno2("unlink", tmpfile_path
);
307 const struct got_error
*
308 got_repo_index_pack(char **idxpath
, FILE *packfile
,
309 struct got_object_id
*pack_hash
, struct got_repository
*repo
,
310 got_pack_index_progress_cb progress_cb
, void *progress_arg
,
311 got_cancel_cb cancel_cb
, void *cancel_arg
)
316 int npackfd
= -1, idxfd
= -1, nidxfd
= -1;
318 int idxstatus
, done
= 0;
319 int nobj_total
= 0, nobj_indexed
= 0, nobj_loose
= 0, nobj_resolved
= 0;
320 const struct got_error
*err
;
321 struct imsgbuf idxibuf
;
323 char *tmpidxpath
= NULL
;
324 char *packfile_path
= NULL
, *id_str
= NULL
;
325 const char *repo_path
= got_repo_get_path_git_dir(repo
);
329 memset(&idxibuf
, 0, sizeof(idxibuf
));
331 for (i
= 0; i
< nitems(tmpfds
); i
++)
334 if (asprintf(&path
, "%s/%s/indexing.idx",
335 repo_path
, GOT_OBJECTS_PACK_DIR
) == -1) {
336 err
= got_error_from_errno("asprintf");
339 err
= got_opentemp_named_fd(&tmpidxpath
, &idxfd
, path
, "");
343 if (fchmod(idxfd
, GOT_DEFAULT_PACK_MODE
) == -1) {
344 err
= got_error_from_errno2("fchmod", tmpidxpath
);
350 err
= got_error_from_errno("dup");
354 for (i
= 0; i
< nitems(tmpfds
); i
++) {
355 tmpfds
[i
] = got_opentempfd();
356 if (tmpfds
[i
] == -1) {
357 err
= got_error_from_errno("got_opentempfd");
362 err
= got_object_id_str(&id_str
, pack_hash
);
366 if (asprintf(&packfile_path
, "%s/%s/pack-%s.pack",
367 repo_path
, GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
368 err
= got_error_from_errno("asprintf");
372 if (fstat(fileno(packfile
), &sb
) == -1) {
373 err
= got_error_from_errno2("fstat", packfile_path
);
377 if (asprintf(idxpath
, "%s/%s/pack-%s.idx",
378 repo_path
, GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
379 err
= got_error_from_errno("asprintf");
383 if (socketpair(AF_UNIX
, SOCK_STREAM
, PF_UNSPEC
, imsg_idxfds
) == -1) {
384 err
= got_error_from_errno("socketpair");
389 err
= got_error_from_errno("fork");
391 } else if (idxpid
== 0)
392 got_privsep_exec_child(imsg_idxfds
,
393 GOT_PATH_PROG_INDEX_PACK
, packfile_path
);
394 if (close(imsg_idxfds
[1]) == -1) {
395 err
= got_error_from_errno("close");
398 if (imsgbuf_init(&idxibuf
, imsg_idxfds
[0]) == -1) {
399 err
= got_error_from_errno("imsgbuf_init");
402 imsgbuf_allow_fdpass(&idxibuf
);
404 npackfd
= dup(fileno(packfile
));
406 err
= got_error_from_errno("dup");
409 err
= got_privsep_send_index_pack_req(&idxibuf
, pack_hash
, npackfd
);
413 err
= got_privsep_send_index_pack_outfd(&idxibuf
, nidxfd
);
417 for (i
= 0; i
< nitems(tmpfds
); i
++) {
418 err
= got_privsep_send_tmpfd(&idxibuf
, tmpfds
[i
]);
426 err
= cancel_cb(cancel_arg
);
431 err
= got_privsep_recv_index_progress(&done
, &nobj_total
,
432 &nobj_indexed
, &nobj_loose
, &nobj_resolved
,
436 if (nobj_indexed
!= 0) {
437 err
= progress_cb(progress_arg
, sb
.st_size
,
438 nobj_total
, nobj_indexed
, nobj_loose
,
445 err
= progress_cb(progress_arg
, sb
.st_size
,
446 nobj_total
, nobj_indexed
, nobj_loose
,
447 nobj_resolved
, done
);
451 if (close(imsg_idxfds
[0]) == -1) {
452 err
= got_error_from_errno("close");
455 if (waitpid(idxpid
, &idxstatus
, 0) == -1) {
456 err
= got_error_from_errno("waitpid");
460 if (rename(tmpidxpath
, *idxpath
) == -1) {
461 err
= got_error_from_errno3("rename", tmpidxpath
, *idxpath
);
469 imsgbuf_clear(&idxibuf
);
470 if (tmpidxpath
&& unlink(tmpidxpath
) == -1 && err
== NULL
)
471 err
= got_error_from_errno2("unlink", tmpidxpath
);
472 if (npackfd
!= -1 && close(npackfd
) == -1 && err
== NULL
)
473 err
= got_error_from_errno("close");
474 if (idxfd
!= -1 && close(idxfd
) == -1 && err
== NULL
)
475 err
= got_error_from_errno("close");
476 for (i
= 0; i
< nitems(tmpfds
); i
++) {
477 if (tmpfds
[i
] != -1 && close(tmpfds
[i
]) == -1 && err
== NULL
)
478 err
= got_error_from_errno("close");
485 const struct got_error
*
486 got_repo_find_pack(FILE **packfile
, struct got_object_id
**pack_hash
,
487 struct got_repository
*repo
, const char *packfile_path
)
489 const struct got_error
*err
= NULL
;
490 const char *packdir_path
= NULL
;
491 char *packfile_name
= NULL
, *p
, *dot
;
492 struct got_object_id id
;
498 packdir_path
= got_repo_get_path_objects_pack(repo
);
499 if (packdir_path
== NULL
)
500 return got_error_from_errno("got_repo_get_path_objects_pack");
502 if (!got_path_is_child(packfile_path
, packdir_path
,
503 strlen(packdir_path
))) {
504 err
= got_error_path(packfile_path
, GOT_ERR_BAD_PATH
);
509 err
= got_path_basename(&packfile_name
, packfile_path
);
514 if (strncmp(p
, "pack-", 5) != 0) {
515 err
= got_error_fmt(GOT_ERR_BAD_PATH
,
516 "'%s' is not a valid pack file name",
521 dot
= strchr(p
, '.');
523 err
= got_error_fmt(GOT_ERR_BAD_PATH
,
524 "'%s' is not a valid pack file name",
528 if (strcmp(dot
+ 1, "pack") != 0) {
529 err
= got_error_fmt(GOT_ERR_BAD_PATH
,
530 "'%s' is not a valid pack file name",
535 if (!got_parse_object_id(&id
, p
, repo
->algo
)) {
536 err
= got_error_fmt(GOT_ERR_BAD_PATH
,
537 "'%s' is not a valid pack file name",
542 *pack_hash
= got_object_id_dup(&id
);
543 if (*pack_hash
== NULL
) {
544 err
= got_error_from_errno("got_object_id_dup");
548 packfd
= open(packfile_path
, O_RDONLY
| O_NOFOLLOW
| O_CLOEXEC
);
550 err
= got_error_from_errno2("open", packfile_path
);
554 *packfile
= fdopen(packfd
, "r");
555 if (*packfile
== NULL
) {
556 err
= got_error_from_errno2("fdopen", packfile_path
);
561 if (packfd
!= -1 && close(packfd
) == -1 && err
== NULL
)
562 err
= got_error_from_errno2("close", packfile_path
);
571 const struct got_error
*
572 got_repo_list_pack(FILE *packfile
, struct got_object_id
*pack_hash
,
573 struct got_repository
*repo
, got_pack_list_cb list_cb
, void *list_arg
,
574 got_cancel_cb cancel_cb
, void *cancel_arg
)
576 const struct got_error
*err
= NULL
;
577 char *id_str
= NULL
, *idxpath
= NULL
, *packpath
= NULL
;
578 struct got_packidx
*packidx
= NULL
;
579 struct got_pack
*pack
= NULL
;
581 size_t digest_len
= got_hash_digest_length(repo
->algo
);
583 err
= got_object_id_str(&id_str
, pack_hash
);
587 if (asprintf(&packpath
, "%s/pack-%s.pack",
588 GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
589 err
= got_error_from_errno("asprintf");
592 if (asprintf(&idxpath
, "%s/pack-%s.idx",
593 GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
594 err
= got_error_from_errno("asprintf");
598 err
= got_packidx_open(&packidx
, got_repo_get_fd(repo
), idxpath
, 1,
603 err
= got_repo_cache_pack(&pack
, repo
, packpath
, packidx
);
607 nobj
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
608 for (i
= 0; i
< nobj
; i
++) {
610 struct got_object_id id
, base_id
;
611 off_t offset
, base_offset
= 0;
617 err
= cancel_cb(cancel_arg
);
621 oid
= packidx
->hdr
.sorted_ids
+ i
* digest_len
;
622 id
.algo
= repo
->algo
;
623 memcpy(id
.hash
, oid
, digest_len
);
625 offset
= got_packidx_get_object_offset(packidx
, i
);
627 err
= got_error(GOT_ERR_BAD_PACKIDX
);
631 err
= got_pack_parse_object_type_and_size(&type
, &size
, &tslen
,
637 case GOT_OBJ_TYPE_OFFSET_DELTA
:
638 err
= got_pack_parse_offset_delta(&base_offset
, &len
,
639 pack
, offset
, tslen
);
643 case GOT_OBJ_TYPE_REF_DELTA
:
644 err
= got_pack_parse_ref_delta(&base_id
,
645 pack
, offset
, tslen
);
650 err
= (*list_cb
)(list_arg
, &id
, type
, offset
, size
,
651 base_offset
, &base_id
);
661 got_packidx_close(packidx
);
665 static const struct got_error
*
666 repo_cleanup_lock(struct got_repository
*repo
, struct got_lockfile
**lk
)
668 const struct got_error
*err
;
669 char myname
[_POSIX_HOST_NAME_MAX
+ 1];
671 if (gethostname(myname
, sizeof(myname
)) == -1)
672 return got_error_from_errno("gethostname");
674 err
= got_lockfile_lock(lk
, "gc.pid", got_repo_get_fd(repo
));
679 * Git uses these info to provide some verbiage when finds a
680 * lock during `git gc --force' so don't try too hard to avoid
681 * short writes and don't care if a race happens between the
682 * lockfile creation and the write itself.
684 if (dprintf((*lk
)->fd
, "%d %s", getpid(), myname
) < 0)
685 return got_error_from_errno("dprintf");
690 static const struct got_error
*
691 report_cleanup_progress(got_cleanup_progress_cb progress_cb
,
692 void *progress_arg
, struct got_ratelimit
*rl
,
693 int ncommits
, int nloose
, int npurged
, int nredundant
)
695 const struct got_error
*err
;
698 if (progress_cb
== NULL
)
701 err
= got_ratelimit_check(&elapsed
, rl
);
705 return progress_cb(progress_arg
, ncommits
, nloose
, npurged
,
709 static const struct got_error
*
710 get_loose_object_ids(struct got_object_idset
**loose_ids
,
711 off_t
*ondisk_size
, int ncommits
,
712 got_cleanup_progress_cb progress_cb
, void *progress_arg
,
713 struct got_ratelimit
*rl
, struct got_repository
*repo
)
715 const struct got_error
*err
= NULL
;
716 char *path_objects
= NULL
, *path
= NULL
;
718 struct got_object
*obj
= NULL
;
719 struct got_object_id id
;
724 *loose_ids
= got_object_idset_alloc();
725 if (*loose_ids
== NULL
)
726 return got_error_from_errno("got_object_idset_alloc");
728 path_objects
= got_repo_get_path_objects(repo
);
729 if (path_objects
== NULL
) {
730 err
= got_error_from_errno("got_repo_get_path_objects");
734 for (i
= 0; i
<= 0xff; i
++) {
737 if (asprintf(&path
, "%s/%.2x", path_objects
, i
) == -1) {
738 err
= got_error_from_errno("asprintf");
744 if (errno
== ENOENT
) {
748 err
= got_error_from_errno2("opendir", path
);
752 while ((dent
= readdir(dir
)) != NULL
) {
755 if (strcmp(dent
->d_name
, ".") == 0 ||
756 strcmp(dent
->d_name
, "..") == 0)
759 if (asprintf(&id_str
, "%.2x%s", i
, dent
->d_name
) == -1) {
760 err
= got_error_from_errno("asprintf");
764 if (!got_parse_object_id(&id
, id_str
, repo
->algo
)) {
770 err
= got_object_open_loose_fd(&fd
, &id
, repo
);
773 if (fstat(fd
, &sb
) == -1) {
774 err
= got_error_from_errno("fstat");
777 err
= got_object_read_header_privsep(&obj
, &id
, repo
,
781 fd
= -1; /* already closed */
784 case GOT_OBJ_TYPE_COMMIT
:
785 case GOT_OBJ_TYPE_TREE
:
786 case GOT_OBJ_TYPE_BLOB
:
787 case GOT_OBJ_TYPE_TAG
:
790 err
= got_error_fmt(GOT_ERR_OBJ_TYPE
,
794 got_object_close(obj
);
796 (*ondisk_size
) += sb
.st_size
;
797 err
= got_object_idset_add(*loose_ids
, &id
, NULL
);
800 err
= report_cleanup_progress(progress_cb
,
801 progress_arg
, rl
, ncommits
,
802 got_object_idset_num_elements(*loose_ids
),
808 if (closedir(dir
) != 0) {
809 err
= got_error_from_errno("closedir");
818 if (dir
&& closedir(dir
) != 0 && err
== NULL
)
819 err
= got_error_from_errno("closedir");
820 if (fd
!= -1 && close(fd
) == -1 && err
== NULL
)
821 err
= got_error_from_errno("close");
823 got_object_idset_free(*loose_ids
);
827 got_object_close(obj
);
833 static const struct got_error
*
834 load_tree_entries(struct got_object_id_queue
*ids
,
835 struct got_object_idset
*traversed_ids
, struct got_object_id
*tree_id
,
836 const char *dpath
, struct got_repository
*repo
,
837 got_cancel_cb cancel_cb
, void *cancel_arg
)
839 const struct got_error
*err
;
840 struct got_tree_object
*tree
;
844 err
= got_object_open_as_tree(&tree
, repo
, tree_id
);
848 for (i
= 0; i
< got_object_tree_get_nentries(tree
); i
++) {
849 struct got_tree_entry
*e
= got_object_tree_get_entry(tree
, i
);
850 struct got_object_id
*id
= got_tree_entry_get_id(e
);
851 mode_t mode
= got_tree_entry_get_mode(e
);
854 err
= (*cancel_cb
)(cancel_arg
);
859 if (got_object_tree_entry_is_symlink(e
) ||
860 got_object_tree_entry_is_submodule(e
) ||
861 got_object_idset_contains(traversed_ids
, id
))
864 if (asprintf(&p
, "%s%s%s", dpath
, dpath
[0] != '\0' ? "/" : "",
865 got_tree_entry_get_name(e
)) == -1) {
866 err
= got_error_from_errno("asprintf");
871 struct got_object_qid
*qid
;
872 err
= got_object_qid_alloc(&qid
, id
);
875 STAILQ_INSERT_TAIL(ids
, qid
, entry
);
876 } else if (S_ISREG(mode
)) {
877 /* This blob is referenced. */
878 err
= got_object_idset_add(traversed_ids
, id
, NULL
);
886 got_object_tree_close(tree
);
891 static const struct got_error
*
892 load_tree(struct got_object_idset
*traversed_ids
,
893 struct got_object_id
*tree_id
,
894 const char *dpath
, struct got_repository
*repo
,
895 got_cancel_cb cancel_cb
, void *cancel_arg
)
897 const struct got_error
*err
= NULL
;
898 struct got_object_id_queue tree_ids
;
899 struct got_object_qid
*qid
;
901 err
= got_object_qid_alloc(&qid
, tree_id
);
905 STAILQ_INIT(&tree_ids
);
906 STAILQ_INSERT_TAIL(&tree_ids
, qid
, entry
);
908 while (!STAILQ_EMPTY(&tree_ids
)) {
910 err
= (*cancel_cb
)(cancel_arg
);
915 qid
= STAILQ_FIRST(&tree_ids
);
916 STAILQ_REMOVE_HEAD(&tree_ids
, entry
);
918 if (got_object_idset_contains(traversed_ids
, &qid
->id
)) {
919 got_object_qid_free(qid
);
923 err
= got_object_idset_add(traversed_ids
, &qid
->id
, NULL
);
925 got_object_qid_free(qid
);
929 err
= load_tree_entries(&tree_ids
, traversed_ids
,
930 &qid
->id
, dpath
, repo
, cancel_cb
, cancel_arg
);
931 got_object_qid_free(qid
);
936 got_object_id_queue_free(&tree_ids
);
940 static const struct got_error
*
941 load_commit_or_tag(int *ncommits
, struct got_object_idset
*traversed_ids
,
942 struct got_object_id
*id
, struct got_repository
*repo
,
943 got_cleanup_progress_cb progress_cb
, void *progress_arg
,
944 struct got_ratelimit
*rl
, got_cancel_cb cancel_cb
, void *cancel_arg
)
946 const struct got_error
*err
;
947 struct got_commit_object
*commit
= NULL
;
948 struct got_tag_object
*tag
= NULL
;
949 struct got_object_id
*tree_id
= NULL
;
950 struct got_object_id_queue ids
;
951 struct got_object_qid
*qid
;
954 err
= got_object_qid_alloc(&qid
, id
);
959 STAILQ_INSERT_TAIL(&ids
, qid
, entry
);
961 while (!STAILQ_EMPTY(&ids
)) {
963 err
= (*cancel_cb
)(cancel_arg
);
968 qid
= STAILQ_FIRST(&ids
);
969 STAILQ_REMOVE_HEAD(&ids
, entry
);
971 if (got_object_idset_contains(traversed_ids
, &qid
->id
)) {
972 got_object_qid_free(qid
);
977 err
= got_object_idset_add(traversed_ids
, &qid
->id
, NULL
);
981 err
= got_object_get_type(&obj_type
, repo
, &qid
->id
);
985 case GOT_OBJ_TYPE_COMMIT
:
986 err
= got_object_open_as_commit(&commit
, repo
,
990 tree_id
= got_object_commit_get_tree_id(commit
);
992 case GOT_OBJ_TYPE_TAG
:
993 err
= got_object_open_as_tag(&tag
, repo
, &qid
->id
);
996 /* tree_id will be set below */
998 case GOT_OBJ_TYPE_TREE
:
1001 case GOT_OBJ_TYPE_BLOB
:
1005 /* should not happen */
1006 err
= got_error(GOT_ERR_OBJ_TYPE
);
1011 struct got_object_id
*id
;
1013 obj_type
= got_object_tag_get_object_type(tag
);
1014 while (obj_type
== GOT_OBJ_TYPE_TAG
) {
1015 struct got_tag_object
*next_tag
;
1017 id
= got_object_tag_get_object_id(tag
);
1018 if (!got_object_idset_contains(traversed_ids
,
1020 err
= got_object_idset_add(
1021 traversed_ids
, id
, NULL
);
1026 err
= got_object_open_as_tag(&next_tag
, repo
,
1031 got_object_tag_close(tag
);
1033 obj_type
= got_object_tag_get_object_type(tag
);
1035 id
= got_object_tag_get_object_id(tag
);
1037 case GOT_OBJ_TYPE_COMMIT
:
1038 err
= got_object_open_as_commit(&commit
, repo
,
1042 tree_id
= got_object_commit_get_tree_id(commit
);
1044 case GOT_OBJ_TYPE_TREE
:
1047 case GOT_OBJ_TYPE_BLOB
:
1048 if (got_object_idset_contains(traversed_ids
,
1051 err
= got_object_idset_add(traversed_ids
, id
,
1057 /* should not happen */
1058 err
= got_error(GOT_ERR_OBJ_TYPE
);
1061 } else if (tree_id
== NULL
) {
1062 /* Blob which has already been marked as traversed. */
1067 err
= load_tree(traversed_ids
, tree_id
, "",
1068 repo
, cancel_cb
, cancel_arg
);
1075 (*ncommits
)++; /* scanned tags are counted as commits */
1077 err
= report_cleanup_progress(progress_cb
, progress_arg
, rl
,
1078 *ncommits
, -1, -1, -1);
1083 /* Find parent commits to scan. */
1084 const struct got_object_id_queue
*parent_ids
;
1085 parent_ids
= got_object_commit_get_parent_ids(commit
);
1086 err
= got_object_id_queue_copy(parent_ids
, &ids
);
1089 got_object_commit_close(commit
);
1093 got_object_tag_close(tag
);
1096 got_object_qid_free(qid
);
1101 got_object_qid_free(qid
);
1103 got_object_commit_close(commit
);
1105 got_object_tag_close(tag
);
1106 got_object_id_queue_free(&ids
);
1110 static const struct got_error
*
1111 is_object_packed(int *packed
, struct got_repository
*repo
,
1112 struct got_object_id
*id
)
1114 const struct got_error
*err
;
1115 struct got_object
*obj
;
1119 err
= got_object_open_packed(&obj
, id
, repo
);
1121 if (err
->code
== GOT_ERR_NO_OBJ
)
1125 got_object_close(obj
);
1130 struct purge_loose_object_arg
{
1131 struct got_repository
*repo
;
1132 got_cleanup_progress_cb progress_cb
;
1134 struct got_ratelimit
*rl
;
1135 struct got_object_idset
*traversed_ids
;
1146 static const struct got_error
*
1147 purge_loose_object(struct got_object_id
*id
, void *data
, void *arg
)
1149 struct purge_loose_object_arg
*a
= arg
;
1150 const struct got_error
*err
, *unlock_err
= NULL
;
1152 int packed
, fd
= -1;
1154 struct got_lockfile
*lf
= NULL
;
1156 err
= is_object_packed(&packed
, a
->repo
, id
);
1160 if (!packed
&& got_object_idset_contains(a
->traversed_ids
, id
))
1166 err
= got_object_get_path(&path
, id
, a
->repo
);
1170 err
= got_object_open_loose_fd(&fd
, id
, a
->repo
);
1174 if (fstat(fd
, &sb
) == -1) {
1175 err
= got_error_from_errno("fstat");
1180 * Do not delete objects which are younger than our maximum
1181 * modification time threshold. This prevents a race where
1182 * new objects which are being added to the repository
1183 * concurrently would be deleted.
1185 if (a
->ignore_mtime
|| sb
.st_mtime
<= a
->max_mtime
) {
1187 err
= got_lockfile_lock(&lf
, path
, -1);
1190 if (unlink(path
) == -1) {
1191 err
= got_error_from_errno2("unlink", path
);
1197 a
->size_purged
+= sb
.st_size
;
1198 err
= report_cleanup_progress(a
->progress_cb
, a
->progress_arg
,
1199 a
->rl
, a
->ncommits
, a
->nloose
, a
->npurged
, -1);
1204 if (fd
!= -1 && close(fd
) == -1 && err
== NULL
)
1205 err
= got_error_from_errno("close");
1208 unlock_err
= got_lockfile_unlock(lf
, -1);
1209 return err
? err
: unlock_err
;
1212 static const struct got_error
*
1213 repo_purge_unreferenced_loose_objects(struct got_repository
*repo
,
1214 struct got_object_idset
*traversed_ids
,
1215 off_t
*size_before
, off_t
*size_after
, int ncommits
, int *nloose
,
1216 int *npacked
, int *npurged
, int dry_run
, int ignore_mtime
,
1217 time_t max_mtime
, struct got_ratelimit
*rl
,
1218 got_cleanup_progress_cb progress_cb
, void *progress_arg
,
1219 got_cancel_cb cancel_cb
, void *cancel_arg
)
1221 const struct got_error
*err
;
1222 struct got_object_idset
*loose_ids
;
1223 struct purge_loose_object_arg arg
;
1225 err
= get_loose_object_ids(&loose_ids
, size_before
, ncommits
,
1226 progress_cb
, progress_arg
, rl
, repo
);
1229 *nloose
= got_object_idset_num_elements(loose_ids
);
1231 got_object_idset_free(loose_ids
);
1233 err
= progress_cb(progress_arg
, 0, 0, 0, -1);
1240 memset(&arg
, 0, sizeof(arg
));
1242 arg
.progress_arg
= progress_arg
;
1243 arg
.progress_cb
= progress_cb
;
1245 arg
.traversed_ids
= traversed_ids
;
1246 arg
.nloose
= *nloose
;
1249 arg
.size_purged
= 0;
1250 arg
.dry_run
= dry_run
;
1251 arg
.max_mtime
= max_mtime
;
1252 arg
.ignore_mtime
= ignore_mtime
;
1253 err
= got_object_idset_for_each(loose_ids
, purge_loose_object
, &arg
);
1257 *size_after
= *size_before
- arg
.size_purged
;
1258 *npacked
= arg
.npacked
;
1259 *npurged
= arg
.npurged
;
1261 /* Produce a final progress report. */
1263 err
= progress_cb(progress_arg
, ncommits
, *nloose
,
1269 got_object_idset_free(loose_ids
);
1273 static const struct got_error
*
1274 purge_redundant_pack(struct got_repository
*repo
, const char *packidx_path
,
1275 int dry_run
, int ignore_mtime
, time_t max_mtime
,
1276 int *remove
, off_t
*size_before
, off_t
*size_after
)
1278 static const char *ext
[] = {".idx", ".pack", ".rev", ".bitmap",
1279 ".promisor", ".mtimes"};
1281 char *dot
, path
[PATH_MAX
];
1284 if (strlcpy(path
, packidx_path
, sizeof(path
)) >= sizeof(path
))
1285 return got_error(GOT_ERR_NO_SPACE
);
1288 * Do not delete pack files which are younger than our maximum
1289 * modification time threshold. This prevents a race where a
1290 * new pack file which is being added to the repository
1291 * concurrently would be deleted.
1293 if (fstatat(got_repo_get_fd(repo
), path
, &sb
, 0) == -1) {
1294 if (errno
== ENOENT
)
1296 return got_error_from_errno2("fstatat", path
);
1298 if (!ignore_mtime
&& sb
.st_mtime
> max_mtime
)
1302 * For compatibility with Git, if a matching .keep file exist
1303 * don't delete the packfile.
1305 dot
= strrchr(path
, '.');
1307 if (strlcat(path
, ".keep", sizeof(path
)) >= sizeof(path
))
1308 return got_error(GOT_ERR_NO_SPACE
);
1309 if (faccessat(got_repo_get_fd(repo
), path
, F_OK
, 0) == 0)
1312 for (i
= 0; i
< nitems(ext
); ++i
) {
1315 if (strlcat(path
, ext
[i
], sizeof(path
)) >=
1317 return got_error(GOT_ERR_NO_SPACE
);
1319 if (fstatat(got_repo_get_fd(repo
), path
, &sb
, 0) ==
1321 if (errno
== ENOENT
)
1323 return got_error_from_errno2("fstatat", path
);
1326 *size_before
+= sb
.st_size
;
1328 *size_after
+= sb
.st_size
;
1335 if (unlinkat(got_repo_get_fd(repo
), path
, 0) == -1) {
1336 if (errno
== ENOENT
)
1338 return got_error_from_errno2("unlinkat",
1346 static const struct got_error
*
1347 pack_is_redundant(int *redundant
, struct got_repository
*repo
,
1348 struct got_object_idset
*traversed_ids
,
1349 const char *packidx_path
, struct got_object_idset
*idset
)
1351 const struct got_error
*err
;
1352 struct got_packidx
*packidx
;
1354 struct got_object_id id
;
1356 size_t digest_len
= got_hash_digest_length(repo
->algo
);
1360 err
= got_repo_get_packidx(&packidx
, packidx_path
, repo
);
1364 nobjects
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
1365 for (i
= 0; i
< nobjects
; ++i
) {
1366 pid
= packidx
->hdr
.sorted_ids
+ i
* digest_len
;
1368 memset(&id
, 0, sizeof(id
));
1369 memcpy(&id
.hash
, pid
, digest_len
);
1370 id
.algo
= repo
->algo
;
1372 if (got_object_idset_contains(idset
, &id
))
1375 if (!got_object_idset_contains(traversed_ids
, &id
))
1379 err
= got_object_idset_add(idset
, &id
, NULL
);
1393 pack_info_cmp(const void *a
, const void *b
)
1395 const struct pack_info
*pa
, *pb
;
1399 if (pa
->nobjects
== pb
->nobjects
)
1400 return strcmp(pa
->path
, pb
->path
);
1401 if (pa
->nobjects
> pb
->nobjects
)
1406 static const struct got_error
*
1407 repo_purge_redundant_packfiles(struct got_repository
*repo
,
1408 struct got_object_idset
*traversed_ids
,
1409 off_t
*size_before
, off_t
*size_after
, int dry_run
, int ignore_mtime
,
1410 time_t max_mtime
, int nloose
, int ncommits
, int npurged
,
1411 struct got_ratelimit
*rl
,
1412 got_cleanup_progress_cb progress_cb
, void *progress_arg
,
1413 got_cancel_cb cancel_cb
, void *cancel_arg
)
1415 const struct got_error
*err
;
1416 struct pack_info
*pinfo
, *sorted
= NULL
;
1417 struct got_packidx
*packidx
;
1418 struct got_object_idset
*idset
= NULL
;
1419 struct got_pathlist_entry
*pe
;
1421 int remove
, redundant_packs
= 0;
1424 RB_FOREACH(pe
, got_pathlist_head
, &repo
->packidx_paths
)
1430 sorted
= calloc(npacks
, sizeof(*sorted
));
1432 return got_error_from_errno("calloc");
1435 RB_FOREACH(pe
, got_pathlist_head
, &repo
->packidx_paths
) {
1436 err
= got_repo_get_packidx(&packidx
, pe
->path
, repo
);
1440 pinfo
= &sorted
[i
++];
1441 pinfo
->path
= pe
->path
;
1442 pinfo
->nobjects
= be32toh(packidx
->hdr
.fanout_table
[0xff]);
1444 qsort(sorted
, npacks
, sizeof(*sorted
), pack_info_cmp
);
1446 idset
= got_object_idset_alloc();
1447 if (idset
== NULL
) {
1448 err
= got_error_from_errno("got_object_idset_alloc");
1452 for (i
= 0; i
< npacks
; ++i
) {
1454 err
= (*cancel_cb
)(cancel_arg
);
1459 err
= pack_is_redundant(&remove
, repo
, traversed_ids
,
1460 sorted
[i
].path
, idset
);
1463 err
= purge_redundant_pack(repo
, sorted
[i
].path
, dry_run
,
1464 ignore_mtime
, max_mtime
, &remove
, size_before
, size_after
);
1469 err
= report_cleanup_progress(progress_cb
, progress_arg
,
1470 rl
, ncommits
, nloose
, npurged
, ++redundant_packs
);
1475 /* Produce a final progress report. */
1477 err
= progress_cb(progress_arg
, ncommits
, nloose
, npurged
,
1485 got_object_idset_free(idset
);
1489 const struct got_error
*
1490 got_repo_cleanup(struct got_repository
*repo
,
1491 off_t
*loose_before
, off_t
*loose_after
,
1492 off_t
*pack_before
, off_t
*pack_after
,
1493 int *ncommits
, int *nloose
, int *npacked
, int dry_run
, int ignore_mtime
,
1494 got_cleanup_progress_cb progress_cb
, void *progress_arg
,
1495 got_pack_progress_cb pack_progress_cb
, void *pack_progress_arg
,
1496 got_pack_index_progress_cb index_progress_cb
, void *index_progress_arg
,
1497 got_cancel_cb cancel_cb
, void *cancel_arg
)
1499 const struct got_error
*unlock_err
, *err
= NULL
;
1500 struct got_lockfile
*lk
= NULL
;
1501 struct got_ratelimit rl
;
1502 struct got_reflist_head refs
;
1503 struct got_object_idset
*traversed_ids
= NULL
;
1504 struct got_reflist_entry
*re
;
1505 struct got_object_id
**referenced_ids
;
1507 int npurged
= 0, packfd
= -1;
1508 char *tmpfile_path
= NULL
, *packfile_path
= NULL
, *idxpath
= NULL
;
1509 FILE *delta_cache
= NULL
, *packfile
= NULL
;
1510 struct got_object_id pack_hash
;
1511 time_t max_mtime
= 0;
1514 got_ratelimit_init(&rl
, 0, 500);
1515 memset(&pack_hash
, 0, sizeof(pack_hash
));
1525 err
= repo_cleanup_lock(repo
, &lk
);
1529 err
= create_temp_packfile(&packfd
, &tmpfile_path
, repo
);
1533 delta_cache
= got_opentemp();
1534 if (delta_cache
== NULL
) {
1535 err
= got_error_from_errno("got_opentemp");
1539 traversed_ids
= got_object_idset_alloc();
1540 if (traversed_ids
== NULL
) {
1541 err
= got_error_from_errno("got_object_idset_alloc");
1545 err
= got_ref_list(&refs
, repo
, "", got_ref_cmp_by_name
, NULL
);
1548 if (!ignore_mtime
) {
1549 TAILQ_FOREACH(re
, &refs
, entry
) {
1550 time_t mtime
= got_ref_get_mtime(re
->ref
);
1551 if (mtime
> max_mtime
)
1555 * For safety, keep objects created within 10 minutes
1556 * before the youngest reference was created.
1558 if (max_mtime
>= 600)
1562 err
= get_reflist_object_ids(&referenced_ids
, &nreferenced
,
1563 GOT_OBJ_TYPE_ANY
, &refs
, repo
, cancel_cb
, cancel_arg
);
1567 for (i
= 0; i
< nreferenced
; i
++) {
1568 struct got_object_id
*id
= referenced_ids
[i
];
1569 err
= load_commit_or_tag(ncommits
, traversed_ids
,
1570 id
, repo
, progress_cb
, progress_arg
, &rl
,
1571 cancel_cb
, cancel_arg
);
1576 err
= got_pack_create(&pack_hash
, packfd
, delta_cache
,
1577 NULL
, 0, referenced_ids
, nreferenced
, repo
, 0,
1578 0, 0, pack_progress_cb
, pack_progress_arg
,
1579 &rl
, cancel_cb
, cancel_arg
);
1583 err
= install_packfile(&packfile
, &packfd
, &packfile_path
,
1584 &tmpfile_path
, &pack_hash
, repo
);
1588 err
= got_repo_index_pack(&idxpath
, packfile
, &pack_hash
, repo
,
1589 index_progress_cb
, index_progress_arg
,
1590 cancel_cb
, cancel_arg
);
1594 err
= got_repo_list_packidx(&repo
->packidx_paths
, repo
);
1598 err
= repo_purge_unreferenced_loose_objects(repo
, traversed_ids
,
1599 loose_before
, loose_after
, *ncommits
, nloose
, npacked
, &npurged
,
1600 dry_run
, ignore_mtime
, max_mtime
, &rl
, progress_cb
, progress_arg
,
1601 cancel_cb
, cancel_arg
);
1605 err
= repo_purge_redundant_packfiles(repo
, traversed_ids
,
1606 pack_before
, pack_after
, dry_run
, ignore_mtime
, max_mtime
,
1607 *nloose
, *ncommits
, npurged
, &rl
, progress_cb
, progress_arg
,
1608 cancel_cb
, cancel_arg
);
1613 if (idxpath
&& unlink(idxpath
) == -1)
1614 err
= got_error_from_errno2("unlink", idxpath
);
1615 if (packfile_path
&& unlink(packfile_path
) == -1 && err
== NULL
)
1616 err
= got_error_from_errno2("unlink", packfile_path
);
1620 unlock_err
= got_lockfile_unlock(lk
, got_repo_get_fd(repo
));
1625 got_object_idset_free(traversed_ids
);
1626 if (packfd
!= -1 && close(packfd
) == -1 && err
== NULL
)
1627 err
= got_error_from_errno2("close",
1628 packfile_path
? packfile_path
: tmpfile_path
);
1629 if (delta_cache
&& fclose(delta_cache
) == EOF
&& err
== NULL
)
1630 err
= got_error_from_errno("fclose");
1631 if (tmpfile_path
&& unlink(tmpfile_path
) == -1 && err
== NULL
)
1632 err
= got_error_from_errno2("unlink", tmpfile_path
);
1634 free(packfile_path
);
1639 const struct got_error
*
1640 got_repo_remove_lonely_packidx(struct got_repository
*repo
, int dry_run
,
1641 got_lonely_packidx_progress_cb progress_cb
, void *progress_arg
,
1642 got_cancel_cb cancel_cb
, void *cancel_arg
)
1644 const struct got_error
*err
= NULL
;
1645 DIR *packdir
= NULL
;
1646 struct dirent
*dent
;
1647 char *pack_relpath
= NULL
;
1651 packdir_fd
= openat(got_repo_get_fd(repo
),
1652 GOT_OBJECTS_PACK_DIR
, O_DIRECTORY
| O_CLOEXEC
);
1653 if (packdir_fd
== -1) {
1654 if (errno
== ENOENT
)
1656 return got_error_from_errno_fmt("openat: %s/%s",
1657 got_repo_get_path_git_dir(repo
),
1658 GOT_OBJECTS_PACK_DIR
);
1661 packdir
= fdopendir(packdir_fd
);
1662 if (packdir
== NULL
) {
1663 err
= got_error_from_errno("fdopendir");
1668 while ((dent
= readdir(packdir
)) != NULL
) {
1670 err
= cancel_cb(cancel_arg
);
1675 if (!got_repo_is_packidx_filename(dent
->d_name
,
1676 strlen(dent
->d_name
),
1677 got_repo_get_object_format(repo
)))
1680 err
= got_packidx_get_packfile_path(&pack_relpath
,
1685 if (fstatat(packdir_fd
, pack_relpath
, &sb
, 0) != -1) {
1687 pack_relpath
= NULL
;
1690 if (errno
!= ENOENT
) {
1691 err
= got_error_from_errno_fmt("fstatat: %s/%s/%s",
1692 got_repo_get_path_git_dir(repo
),
1693 GOT_OBJECTS_PACK_DIR
,
1699 if (unlinkat(packdir_fd
, dent
->d_name
, 0) == -1) {
1700 err
= got_error_from_errno("unlinkat");
1706 if (asprintf(&path
, "%s/%s/%s",
1707 got_repo_get_path_git_dir(repo
),
1708 GOT_OBJECTS_PACK_DIR
,
1709 dent
->d_name
) == -1) {
1710 err
= got_error_from_errno("asprintf");
1713 err
= progress_cb(progress_arg
, path
);
1719 pack_relpath
= NULL
;
1722 if (packdir
&& closedir(packdir
) != 0 && err
== NULL
)
1723 err
= got_error_from_errno("closedir");