2 * Copyright (c) 2023 Omar Polo <op@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/queue.h>
20 #include <sys/socket.h>
23 #include <sys/types.h>
35 #include "got_error.h"
36 #include "got_cancel.h"
37 #include "got_object.h"
38 #include "got_opentemp.h"
40 #include "got_reference.h"
41 #include "got_repository.h"
42 #include "got_repository_load.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_hash.h"
46 #include "got_lib_object.h"
47 #include "got_lib_object_cache.h"
48 #include "got_lib_pack.h"
49 #include "got_lib_ratelimit.h"
50 #include "got_lib_repository.h"
51 #include "got_lib_privsep.h"
53 #define GIT_BUNDLE_SIGNATURE_V2 "# v2 git bundle\n"
54 #define GIT_BUNDLE_SIGNATURE_V3 "# v3 git bundle\n"
57 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
61 #define ssizeof(_x) ((ssize_t)(sizeof(_x)))
64 static const struct got_error
*
65 temp_file(int *fd
, char **path
, const char *ext
, struct got_repository
*repo
)
67 const struct got_error
*err
;
73 r
= snprintf(p
, sizeof(p
), "%s/%s/loading",
74 got_repo_get_path_git_dir(repo
), GOT_OBJECTS_PACK_DIR
);
75 if (r
< 0 || (size_t)r
>= sizeof(p
))
76 return got_error_from_errno("snprintf");
78 err
= got_opentemp_named_fd(path
, fd
, p
, ext
);
82 if (fchmod(*fd
, GOT_DEFAULT_FILE_MODE
) == -1)
83 return got_error_from_errno("fchmod");
88 static const struct got_error
*
89 load_report_progress(got_load_progress_cb progress_cb
, void *progress_arg
,
90 struct got_ratelimit
*rl
, off_t packsiz
, int nobj_total
,
91 int nobj_indexed
, int nobj_loose
, int nobj_resolved
)
93 const struct got_error
*err
;
96 if (progress_cb
== NULL
)
99 err
= got_ratelimit_check(&elapsed
, rl
);
103 return progress_cb(progress_arg
, packsiz
, nobj_total
, nobj_indexed
,
104 nobj_loose
, nobj_resolved
);
107 static const struct got_error
*
108 copypack(FILE *in
, int outfd
, off_t
*tot
, struct got_object_id
*id
,
109 enum got_hash_algorithm algo
, struct got_ratelimit
*rl
,
110 got_load_progress_cb progress_cb
, void *progress_arg
,
111 got_cancel_cb cancel_cb
, void *cancel_arg
)
113 const struct got_error
*err
;
114 struct got_hash hash
;
115 struct got_object_id expected_id
;
116 char buf
[BUFSIZ
], hashbuf
[GOT_HASH_DIGEST_MAXLEN
];
117 size_t r
, digest_len
, hashlen
= 0;
120 digest_len
= got_hash_digest_length(algo
);
121 got_hash_init(&hash
, algo
);
124 err
= cancel_cb(cancel_arg
);
128 r
= fread(buf
, 1, sizeof(buf
), in
);
133 * An expected a checksum sits at the end of the pack
134 * file. Since we don't know the file size ahead of
135 * time we have to keep digest_len bytes buffered and
136 * avoid mixing those bytes int our hash computation
137 * until we know for sure that additional pack file
140 * We can assume that BUFSIZE is greater than
141 * digest_len and that a short read means that we've
145 if (r
>= digest_len
) {
147 got_hash_update(&hash
, hashbuf
, hashlen
);
148 if (write(outfd
, hashbuf
, hashlen
) == -1)
149 return got_error_from_errno("write");
152 memcpy(hashbuf
, &buf
[r
], digest_len
);
153 hashlen
= digest_len
;
156 got_hash_update(&hash
, buf
, r
);
157 if (write(outfd
, buf
, r
) == -1)
158 return got_error_from_errno("write");
160 err
= load_report_progress(progress_cb
, progress_arg
,
161 rl
, *tot
, 0, 0, 0, 0);
169 return got_error(GOT_ERR_BAD_PACKFILE
);
171 /* short read, we've reached EOF */
173 got_hash_update(&hash
, hashbuf
, r
);
174 if (write(outfd
, hashbuf
, r
) == -1)
175 return got_error_from_errno("write");
177 memmove(&hashbuf
[0], &hashbuf
[r
], digest_len
- r
);
178 memcpy(&hashbuf
[digest_len
- r
], buf
, r
);
183 return got_error(GOT_ERR_BAD_PACKFILE
);
185 got_hash_final_object_id(&hash
, id
);
187 memset(&expected_id
, 0, sizeof(expected_id
));
188 expected_id
.algo
= algo
;
189 memcpy(&expected_id
.hash
, hashbuf
, digest_len
);
191 if (got_object_id_cmp(id
, &expected_id
) != 0)
192 return got_error(GOT_ERR_PACKIDX_CSUM
);
194 /* re-add the expected hash at the end of the pack */
195 if (write(outfd
, hashbuf
, digest_len
) == -1)
196 return got_error_from_errno("write");
199 err
= progress_cb(progress_arg
, *tot
, 0, 0, 0, 0);
206 const struct got_error
*
207 got_repo_load(FILE *in
, struct got_pathlist_head
*refs_found
,
208 struct got_repository
*repo
, int list_refs_only
, int noop
,
209 got_load_progress_cb progress_cb
, void *progress_arg
,
210 got_cancel_cb cancel_cb
, void *cancel_arg
)
212 const struct got_error
*err
= NULL
;
213 struct got_object_id id
;
214 struct got_object
*obj
;
215 struct got_packfile_hdr pack_hdr
;
216 struct got_ratelimit rl
;
217 struct imsgbuf idxibuf
;
218 const char *repo_path
;
219 char *packpath
= NULL
, *idxpath
= NULL
;
220 char *tmppackpath
= NULL
, *tmpidxpath
= NULL
;
221 int packfd
= -1, idxfd
= -1;
222 char *spc
, *refname
, *id_str
= NULL
;
226 size_t i
, digest_len
;
229 int tmpfds
[3] = {-1, -1, -1};
230 int imsg_idxfds
[2] = {-1, -1};
231 int ch
, done
, nobj
, idxstatus
;
233 enum got_hash_algorithm repo_algo
, bundle_algo
;
235 got_ratelimit_init(&rl
, 0, 500);
236 repo_algo
= got_repo_get_object_format(repo
);
237 digest_len
= got_hash_digest_length(repo_algo
);
238 repo_path
= got_repo_get_path_git_dir(repo
);
240 /* bundles will use v3 and a capability to advertise sha256 */
241 bundle_algo
= GOT_HASH_SHA1
;
243 linelen
= getline(&line
, &linesize
, in
);
245 err
= got_ferror(in
, GOT_ERR_IO
);
249 if (strcmp(line
, GIT_BUNDLE_SIGNATURE_V2
) != 0 &&
250 strcmp(line
, GIT_BUNDLE_SIGNATURE_V3
) != 0) {
251 err
= got_error(GOT_ERR_BUNDLE_FORMAT
);
255 /* Parse the capabilities */
266 linelen
= getline(&line
, &linesize
, in
);
268 err
= got_ferror(in
, GOT_ERR_IO
);
272 if (line
[linelen
- 1] == '\n')
273 line
[linelen
- 1] = '\0';
276 val
= strchr(key
, '=');
278 err
= got_error_path(key
, GOT_ERR_UNKNOWN_CAPA
);
282 if (!strcmp(key
, "object-format")) {
283 if (!strcmp(val
, "sha1")) {
284 bundle_algo
= GOT_HASH_SHA1
;
287 if (!strcmp(val
, "sha256")) {
288 bundle_algo
= GOT_HASH_SHA256
;
292 err
= got_error_path(key
, GOT_ERR_UNKNOWN_CAPA
);
296 if (bundle_algo
!= repo_algo
) {
297 err
= got_error(GOT_ERR_OBJECT_FORMAT
);
301 /* Parse the prerequisite */
310 linelen
= getline(&line
, &linesize
, in
);
312 err
= got_ferror(in
, GOT_ERR_IO
);
316 if (line
[linelen
- 1] == '\n')
317 line
[linelen
- 1] = '\0';
319 if (!got_parse_object_id(&id
, line
, repo_algo
)) {
320 err
= got_error_path(line
, GOT_ERR_BAD_OBJ_ID_STR
);
324 err
= got_object_open(&obj
, repo
, &id
);
327 got_object_close(obj
);
330 /* Read references */
332 struct got_object_id
*id
;
335 linelen
= getline(&line
, &linesize
, in
);
337 err
= got_ferror(in
, GOT_ERR_IO
);
340 if (line
[linelen
- 1] == '\n')
341 line
[linelen
- 1] = '\0';
345 spc
= strchr(line
, ' ');
347 err
= got_error(GOT_ERR_IO
);
353 if (!got_ref_name_is_valid(refname
)) {
354 err
= got_error(GOT_ERR_BAD_REF_DATA
);
358 id
= malloc(sizeof(*id
));
360 err
= got_error_from_errno("malloc");
364 if (!got_parse_object_id(id
, line
, repo_algo
)) {
366 err
= got_error(GOT_ERR_BAD_OBJ_ID_STR
);
370 dup
= strdup(refname
);
373 err
= got_error_from_errno("strdup");
377 err
= got_pathlist_append(refs_found
, dup
, id
);
388 err
= temp_file(&packfd
, &tmppackpath
, ".pack", repo
);
392 err
= temp_file(&idxfd
, &tmpidxpath
, ".idx", repo
);
396 err
= copypack(in
, packfd
, &packsiz
, &id
, repo_algo
, &rl
,
397 progress_cb
, progress_arg
, cancel_cb
, cancel_arg
);
401 if (lseek(packfd
, 0, SEEK_SET
) == -1) {
402 err
= got_error_from_errno("lseek");
406 /* Safety checks on the pack' content. */
407 if (packsiz
<= ssizeof(pack_hdr
) + digest_len
) {
408 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
, "short pack file");
412 n
= read(packfd
, &pack_hdr
, ssizeof(pack_hdr
));
414 err
= got_error_from_errno("read");
417 if (n
!= ssizeof(pack_hdr
)) {
418 err
= got_error(GOT_ERR_IO
);
421 if (pack_hdr
.signature
!= htobe32(GOT_PACKFILE_SIGNATURE
)) {
422 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
423 "bad pack file signature");
426 if (pack_hdr
.version
!= htobe32(GOT_PACKFILE_VERSION
)) {
427 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
428 "bad pack file version");
431 nobj
= be32toh(pack_hdr
.nobjects
);
433 packsiz
> ssizeof(pack_hdr
) + digest_len
) {
434 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
435 "bad pack file with zero objects");
439 packsiz
<= ssizeof(pack_hdr
) + digest_len
) {
440 err
= got_error_msg(GOT_ERR_BAD_PACKFILE
,
441 "empty pack file with non-zero object count");
445 /* nothing to do if there are no objects. */
449 for (i
= 0; i
< nitems(tmpfds
); i
++) {
450 tmpfds
[i
] = got_opentempfd();
451 if (tmpfds
[i
] == -1) {
452 err
= got_error_from_errno("got_opentempfd");
457 if (lseek(packfd
, 0, SEEK_SET
) == -1) {
458 err
= got_error_from_errno("lseek");
462 if (socketpair(AF_UNIX
, SOCK_STREAM
, PF_UNSPEC
, imsg_idxfds
) == -1) {
463 err
= got_error_from_errno("socketpair");
468 err
= got_error_from_errno("fork");
470 } else if (idxpid
== 0)
471 got_privsep_exec_child(imsg_idxfds
,
472 GOT_PATH_PROG_INDEX_PACK
, tmppackpath
);
473 if (close(imsg_idxfds
[1]) == -1) {
474 err
= got_error_from_errno("close");
478 imsg_init(&idxibuf
, imsg_idxfds
[0]);
480 err
= got_privsep_send_index_pack_req(&idxibuf
, &id
, packfd
);
485 err
= got_privsep_send_index_pack_outfd(&idxibuf
, idxfd
);
490 for (i
= 0; i
< nitems(tmpfds
); i
++) {
491 err
= got_privsep_send_tmpfd(&idxibuf
, tmpfds
[i
]);
499 int nobj_total
, nobj_indexed
, nobj_loose
, nobj_resolved
;
501 err
= got_privsep_recv_index_progress(&done
, &nobj_total
,
502 &nobj_indexed
, &nobj_loose
, &nobj_resolved
, &idxibuf
);
505 if (nobj_indexed
!= 0) {
506 err
= load_report_progress(progress_cb
, progress_arg
,
507 &rl
, packsiz
, nobj_total
, nobj_indexed
,
508 nobj_loose
, nobj_resolved
);
513 if (close(imsg_idxfds
[0]) == -1) {
514 err
= got_error_from_errno("close");
518 if (waitpid(idxpid
, &idxstatus
, 0) == -1) {
519 err
= got_error_from_errno("waitpid");
526 err
= got_object_id_str(&id_str
, &id
);
530 if (asprintf(&packpath
, "%s/%s/pack-%s.pack", repo_path
,
531 GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
532 err
= got_error_from_errno("asprintf");
536 if (asprintf(&idxpath
, "%s/%s/pack-%s.idx", repo_path
,
537 GOT_OBJECTS_PACK_DIR
, id_str
) == -1) {
538 err
= got_error_from_errno("asprintf");
542 if (rename(tmppackpath
, packpath
) == -1) {
543 err
= got_error_from_errno3("rename", tmppackpath
, packpath
);
549 if (rename(tmpidxpath
, idxpath
) == -1) {
550 err
= got_error_from_errno3("rename", tmpidxpath
, idxpath
);
562 if (tmppackpath
&& unlink(tmppackpath
) == -1 && err
== NULL
)
563 err
= got_error_from_errno2("unlink", tmppackpath
);
564 if (packfd
!= -1 && close(packfd
) == -1 && err
== NULL
)
565 err
= got_error_from_errno("close");
568 if (tmpidxpath
&& unlink(tmpidxpath
) == -1 && err
== NULL
)
569 err
= got_error_from_errno2("unlink", tmpidxpath
);
570 if (idxfd
!= -1 && close(idxfd
) == -1 && err
== NULL
)
571 err
= got_error_from_errno("close");
574 if (imsg_idxfds
[0] != -1 && close(imsg_idxfds
[0]) == -1 && err
== NULL
)
575 err
= got_error_from_errno("close");
576 if (imsg_idxfds
[1] != -1 && close(imsg_idxfds
[1]) == -1 && err
== NULL
)
577 err
= got_error_from_errno("close");
579 for (i
= 0; i
< nitems(tmpfds
); ++i
)
580 if (tmpfds
[i
] != -1 && close(tmpfds
[i
]) == -1 && err
== NULL
)
581 err
= got_error_from_errno("close");