update TODO list
[got-portable.git] / lib / object_parse.c
blobc0b1855c518c2630ff4a38ada9e593cbd10c74bd
1 /*
2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <sys/queue.h>
22 #include <sys/uio.h>
23 #include <sys/socket.h>
24 #include <sys/wait.h>
25 #include <sys/mman.h>
27 #include <errno.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdint.h>
32 #include <zlib.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <time.h>
36 #include <unistd.h>
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
42 #include "got_path.h"
44 #include "got_lib_hash.h"
45 #include "got_lib_delta.h"
46 #include "got_lib_inflate.h"
47 #include "got_lib_object.h"
48 #include "got_lib_object_parse.h"
49 #include "got_lib_object_cache.h"
50 #include "got_lib_pack.h"
51 #include "got_lib_repository.h"
53 #ifndef nitems
54 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
55 #endif
57 struct got_object_id *
58 got_object_id_dup(struct got_object_id *id1)
60 struct got_object_id *id2;
62 id2 = malloc(sizeof(*id2));
63 if (id2 == NULL)
64 return NULL;
65 memcpy(id2, id1, sizeof(*id2));
66 return id2;
69 int
70 got_object_id_cmp(const struct got_object_id *id1,
71 const struct got_object_id *id2)
73 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
76 const struct got_error *
77 got_object_qid_alloc_partial(struct got_object_qid **qid)
79 *qid = malloc(sizeof(**qid));
80 if (*qid == NULL)
81 return got_error_from_errno("malloc");
83 (*qid)->data = NULL;
84 return NULL;
87 const struct got_error *
88 got_object_id_str(char **outbuf, struct got_object_id *id)
90 static const size_t len = GOT_OBJECT_ID_HEX_MAXLEN;
92 *outbuf = malloc(len);
93 if (*outbuf == NULL)
94 return got_error_from_errno("malloc");
96 if (got_object_id_hex(id, *outbuf, len) == NULL) {
97 free(*outbuf);
98 *outbuf = NULL;
99 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
102 return NULL;
105 char *
106 got_object_id_hex(struct got_object_id *id, char *buf, size_t len)
108 return got_sha1_digest_to_str(id->sha1, buf, len);
111 const struct got_error *
112 got_object_type_label(const char **label, int obj_type)
114 const struct got_error *err = NULL;
116 switch (obj_type) {
117 case GOT_OBJ_TYPE_BLOB:
118 *label = GOT_OBJ_LABEL_BLOB;
119 break;
120 case GOT_OBJ_TYPE_TREE:
121 *label = GOT_OBJ_LABEL_TREE;
122 break;
123 case GOT_OBJ_TYPE_COMMIT:
124 *label = GOT_OBJ_LABEL_COMMIT;
125 break;
126 case GOT_OBJ_TYPE_TAG:
127 *label = GOT_OBJ_LABEL_TAG;
128 break;
129 default:
130 *label = NULL;
131 err = got_error(GOT_ERR_OBJ_TYPE);
132 break;
135 return err;
138 void
139 got_object_close(struct got_object *obj)
141 if (obj->refcnt > 0) {
142 obj->refcnt--;
143 if (obj->refcnt > 0)
144 return;
147 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
148 struct got_delta *delta;
149 while (!STAILQ_EMPTY(&obj->deltas.entries)) {
150 delta = STAILQ_FIRST(&obj->deltas.entries);
151 STAILQ_REMOVE_HEAD(&obj->deltas.entries, entry);
152 free(delta);
155 free(obj);
158 const struct got_error *
159 got_object_raw_close(struct got_raw_object *obj)
161 const struct got_error *err = NULL;
163 if (obj->refcnt > 0) {
164 obj->refcnt--;
165 if (obj->refcnt > 0)
166 return NULL;
169 if (obj->close_cb)
170 obj->close_cb(obj);
172 if (obj->f == NULL) {
173 if (obj->fd != -1) {
174 if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
175 err = got_error_from_errno("munmap");
176 if (close(obj->fd) == -1 && err == NULL)
177 err = got_error_from_errno("close");
178 } else
179 free(obj->data);
180 } else {
181 if (fclose(obj->f) == EOF && err == NULL)
182 err = got_error_from_errno("fclose");
184 free(obj);
185 return err;
188 void
189 got_object_qid_free(struct got_object_qid *qid)
191 free(qid);
194 void
195 got_object_id_queue_free(struct got_object_id_queue *ids)
197 struct got_object_qid *qid;
199 while (!STAILQ_EMPTY(ids)) {
200 qid = STAILQ_FIRST(ids);
201 STAILQ_REMOVE_HEAD(ids, entry);
202 got_object_qid_free(qid);
206 const struct got_error *
207 got_object_parse_header(struct got_object **obj, char *buf, size_t len)
209 const char *obj_labels[] = {
210 GOT_OBJ_LABEL_COMMIT,
211 GOT_OBJ_LABEL_TREE,
212 GOT_OBJ_LABEL_BLOB,
213 GOT_OBJ_LABEL_TAG,
215 const int obj_types[] = {
216 GOT_OBJ_TYPE_COMMIT,
217 GOT_OBJ_TYPE_TREE,
218 GOT_OBJ_TYPE_BLOB,
219 GOT_OBJ_TYPE_TAG,
221 int type = 0;
222 size_t size = 0;
223 size_t i;
224 char *end;
226 *obj = NULL;
228 end = memchr(buf, '\0', len);
229 if (end == NULL)
230 return got_error(GOT_ERR_BAD_OBJ_HDR);
232 for (i = 0; i < nitems(obj_labels); i++) {
233 const char *label = obj_labels[i];
234 size_t label_len = strlen(label);
235 const char *errstr;
237 if (len <= label_len || buf + label_len >= end ||
238 strncmp(buf, label, label_len) != 0)
239 continue;
241 type = obj_types[i];
242 size = strtonum(buf + label_len, 0, LONG_MAX, &errstr);
243 if (errstr != NULL)
244 return got_error(GOT_ERR_BAD_OBJ_HDR);
245 break;
248 if (type == 0)
249 return got_error(GOT_ERR_BAD_OBJ_HDR);
251 *obj = calloc(1, sizeof(**obj));
252 if (*obj == NULL)
253 return got_error_from_errno("calloc");
254 (*obj)->type = type;
255 (*obj)->hdrlen = end - buf + 1;
256 (*obj)->size = size;
257 return NULL;
260 const struct got_error *
261 got_object_read_header(struct got_object **obj, int fd)
263 const struct got_error *err;
264 struct got_inflate_buf zb;
265 uint8_t *buf;
266 const size_t zbsize = 64;
267 size_t outlen, totlen;
268 int nbuf = 1;
270 *obj = NULL;
272 buf = malloc(zbsize);
273 if (buf == NULL)
274 return got_error_from_errno("malloc");
275 buf[0] = '\0';
277 err = got_inflate_init(&zb, buf, zbsize, NULL);
278 if (err)
279 return err;
281 totlen = 0;
282 do {
283 err = got_inflate_read_fd(&zb, fd, &outlen, NULL);
284 if (err)
285 goto done;
286 if (outlen == 0)
287 break;
288 totlen += outlen;
289 if (memchr(zb.outbuf, '\0', outlen) == NULL) {
290 uint8_t *newbuf;
291 nbuf++;
292 newbuf = recallocarray(buf, nbuf - 1, nbuf, zbsize);
293 if (newbuf == NULL) {
294 err = got_error_from_errno("recallocarray");
295 goto done;
297 buf = newbuf;
298 zb.outbuf = newbuf + totlen;
299 zb.outlen = (nbuf * zbsize) - totlen;
301 } while (memchr(zb.outbuf, '\0', outlen) == NULL);
303 err = got_object_parse_header(obj, buf, totlen);
304 done:
305 free(buf);
306 got_inflate_end(&zb);
307 return err;
310 const struct got_error *
311 got_object_read_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
312 size_t max_in_mem_size, int outfd, struct got_object_id *expected_id,
313 int infd)
315 const struct got_error *err = NULL;
316 struct got_object *obj;
317 struct got_inflate_checksum csum;
318 struct got_object_id id;
319 struct got_hash ctx;
320 size_t len, consumed;
321 FILE *f = NULL;
323 *outbuf = NULL;
324 *size = 0;
325 *hdrlen = 0;
327 got_hash_init(&ctx, GOT_HASH_SHA1);
328 memset(&csum, 0, sizeof(csum));
329 csum.output_ctx = &ctx;
331 if (lseek(infd, SEEK_SET, 0) == -1)
332 return got_error_from_errno("lseek");
334 err = got_object_read_header(&obj, infd);
335 if (err)
336 return err;
338 if (lseek(infd, SEEK_SET, 0) == -1)
339 return got_error_from_errno("lseek");
341 if (obj->size + obj->hdrlen <= max_in_mem_size) {
342 err = got_inflate_to_mem_fd(outbuf, &len, &consumed, &csum,
343 obj->size + obj->hdrlen, infd);
344 } else {
345 int fd;
347 * XXX This uses an extra file descriptor for no good reason.
348 * We should have got_inflate_fd_to_fd().
350 fd = dup(infd);
351 if (fd == -1)
352 return got_error_from_errno("dup");
353 f = fdopen(fd, "r");
354 if (f == NULL) {
355 err = got_error_from_errno("fdopen");
356 abort();
357 close(fd);
358 goto done;
360 err = got_inflate_to_fd(&len, f, &csum, outfd);
362 if (err)
363 goto done;
365 if (len < obj->hdrlen || len != obj->hdrlen + obj->size) {
366 err = got_error(GOT_ERR_BAD_OBJ_HDR);
367 goto done;
370 got_hash_final_object_id(&ctx, &id);
371 if (got_object_id_cmp(expected_id, &id) != 0) {
372 err = got_error_checksum(expected_id);
373 goto done;
376 *size = obj->size;
377 *hdrlen = obj->hdrlen;
378 done:
379 got_object_close(obj);
380 if (f && fclose(f) == EOF && err == NULL)
381 err = got_error_from_errno("fclose");
382 return err;
385 struct got_commit_object *
386 got_object_commit_alloc_partial(void)
388 struct got_commit_object *commit;
390 commit = calloc(1, sizeof(*commit));
391 if (commit == NULL)
392 return NULL;
393 commit->tree_id = malloc(sizeof(*commit->tree_id));
394 if (commit->tree_id == NULL) {
395 free(commit);
396 return NULL;
399 STAILQ_INIT(&commit->parent_ids);
401 return commit;
404 const struct got_error *
405 got_object_commit_add_parent(struct got_commit_object *commit,
406 const char *id_str)
408 const struct got_error *err = NULL;
409 struct got_object_qid *qid;
411 err = got_object_qid_alloc_partial(&qid);
412 if (err)
413 return err;
415 if (!got_parse_object_id(&qid->id, id_str, GOT_HASH_SHA1)) {
416 err = got_error(GOT_ERR_BAD_OBJ_DATA);
417 got_object_qid_free(qid);
418 return err;
421 STAILQ_INSERT_TAIL(&commit->parent_ids, qid, entry);
422 commit->nparents++;
424 return NULL;
427 static const struct got_error *
428 parse_gmtoff(time_t *gmtoff, const char *tzstr)
430 int sign = 1;
431 const char *p = tzstr;
432 time_t h, m;
434 *gmtoff = 0;
436 if (*p == '-')
437 sign = -1;
438 else if (*p != '+')
439 return got_error(GOT_ERR_BAD_OBJ_DATA);
440 p++;
441 if (!isdigit((unsigned char)*p) &&
442 !isdigit((unsigned char)*(p + 1)))
443 return got_error(GOT_ERR_BAD_OBJ_DATA);
444 h = (((*p - '0') * 10) + (*(p + 1) - '0'));
446 p += 2;
447 if (!isdigit((unsigned char)*p) &&
448 !isdigit((unsigned char)*(p + 1)))
449 return got_error(GOT_ERR_BAD_OBJ_DATA);
450 m = ((*p - '0') * 10) + (*(p + 1) - '0');
452 *gmtoff = (h * 60 * 60 + m * 60) * sign;
453 return NULL;
456 static const struct got_error *
457 parse_commit_time(time_t *time, time_t *gmtoff, char *committer)
459 const struct got_error *err = NULL;
460 const char *errstr;
461 char *space, *tzstr;
463 /* Parse and strip off trailing timezone indicator string. */
464 space = strrchr(committer, ' ');
465 if (space == NULL)
466 return got_error(GOT_ERR_BAD_OBJ_DATA);
467 tzstr = strdup(space + 1);
468 if (tzstr == NULL)
469 return got_error_from_errno("strdup");
470 err = parse_gmtoff(gmtoff, tzstr);
471 free(tzstr);
472 if (err) {
473 if (err->code != GOT_ERR_BAD_OBJ_DATA)
474 return err;
475 /* Old versions of Git omitted the timestamp. */
476 *time = 0;
477 *gmtoff = 0;
478 return NULL;
480 *space = '\0';
482 /* Timestamp is separated from committer name + email by space. */
483 space = strrchr(committer, ' ');
484 if (space == NULL)
485 return got_error(GOT_ERR_BAD_OBJ_DATA);
487 /* Timestamp parsed here is expressed as UNIX timestamp (UTC). */
488 *time = strtonum(space + 1, 0, INT64_MAX, &errstr);
489 if (errstr)
490 return got_error(GOT_ERR_BAD_OBJ_DATA);
492 /* Strip off parsed time information, leaving just author and email. */
493 *space = '\0';
495 return NULL;
498 void
499 got_object_commit_close(struct got_commit_object *commit)
501 if (commit->refcnt > 0) {
502 commit->refcnt--;
503 if (commit->refcnt > 0)
504 return;
507 got_object_id_queue_free(&commit->parent_ids);
508 free(commit->tree_id);
509 free(commit->author);
510 free(commit->committer);
511 free(commit->logmsg);
512 free(commit);
515 struct got_object_id *
516 got_object_commit_get_tree_id(struct got_commit_object *commit)
518 return commit->tree_id;
522 got_object_commit_get_nparents(struct got_commit_object *commit)
524 return commit->nparents;
527 const struct got_object_id_queue *
528 got_object_commit_get_parent_ids(struct got_commit_object *commit)
530 return &commit->parent_ids;
533 const char *
534 got_object_commit_get_author(struct got_commit_object *commit)
536 return commit->author;
539 time_t
540 got_object_commit_get_author_time(struct got_commit_object *commit)
542 return commit->author_time;
545 time_t got_object_commit_get_author_gmtoff(struct got_commit_object *commit)
547 return commit->author_gmtoff;
550 const char *
551 got_object_commit_get_committer(struct got_commit_object *commit)
553 return commit->committer;
556 time_t
557 got_object_commit_get_committer_time(struct got_commit_object *commit)
559 return commit->committer_time;
562 time_t
563 got_object_commit_get_committer_gmtoff(struct got_commit_object *commit)
565 return commit->committer_gmtoff;
568 const struct got_error *
569 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
571 const struct got_error *err = NULL;
572 const char *src;
573 char *dst;
574 size_t len;
576 len = strlen(commit->logmsg);
577 *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
578 if (*logmsg == NULL)
579 return got_error_from_errno("malloc");
582 * Strip out unusual headers. Headers are separated from the commit
583 * message body by a single empty line.
585 src = commit->logmsg;
586 dst = *logmsg;
587 while (*src != '\0' && *src != '\n') {
588 int copy_header = 1, eol = 0;
589 if (strncmp(src, GOT_COMMIT_LABEL_TREE,
590 strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
591 strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
592 strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
593 strncmp(src, GOT_COMMIT_LABEL_PARENT,
594 strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
595 strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
596 strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
597 copy_header = 0;
599 while (*src != '\0' && !eol) {
600 if (copy_header) {
601 *dst = *src;
602 dst++;
604 if (*src == '\n')
605 eol = 1;
606 src++;
609 *dst = '\0';
611 if (strlcat(*logmsg, src, len + 1) >= len + 1) {
612 err = got_error(GOT_ERR_NO_SPACE);
613 goto done;
616 /* Trim redundant trailing whitespace. */
617 len = strlen(*logmsg);
618 while (len > 1 && isspace((unsigned char)(*logmsg)[len - 2]) &&
619 isspace((unsigned char)(*logmsg)[len - 1])) {
620 (*logmsg)[len - 1] = '\0';
621 len--;
624 /* Append a trailing newline if missing. */
625 if (len > 0 && (*logmsg)[len - 1] != '\n') {
626 (*logmsg)[len] = '\n';
627 (*logmsg)[len + 1] = '\0';
629 done:
630 if (err) {
631 free(*logmsg);
632 *logmsg = NULL;
634 return err;
637 const char *
638 got_object_commit_get_logmsg_raw(struct got_commit_object *commit)
640 return commit->logmsg;
643 const struct got_error *
644 got_object_parse_commit(struct got_commit_object **commit, char *buf,
645 size_t len)
647 const struct got_error *err = NULL;
648 enum got_hash_algorithm algo = GOT_HASH_SHA1;
649 char *s = buf;
650 size_t label_len;
651 ssize_t remain = (ssize_t)len;
653 if (remain == 0)
654 return got_error(GOT_ERR_BAD_OBJ_DATA);
656 *commit = got_object_commit_alloc_partial();
657 if (*commit == NULL)
658 return got_error_from_errno("got_object_commit_alloc_partial");
660 label_len = strlen(GOT_COMMIT_LABEL_TREE);
661 if (strncmp(s, GOT_COMMIT_LABEL_TREE, label_len) == 0) {
662 remain -= label_len;
663 if (remain < SHA1_DIGEST_STRING_LENGTH) {
664 err = got_error(GOT_ERR_BAD_OBJ_DATA);
665 goto done;
667 s += label_len;
668 if (!got_parse_object_id((*commit)->tree_id, s, algo)) {
669 err = got_error(GOT_ERR_BAD_OBJ_DATA);
670 goto done;
672 remain -= SHA1_DIGEST_STRING_LENGTH;
673 s += SHA1_DIGEST_STRING_LENGTH;
674 } else {
675 err = got_error(GOT_ERR_BAD_OBJ_DATA);
676 goto done;
679 label_len = strlen(GOT_COMMIT_LABEL_PARENT);
680 while (strncmp(s, GOT_COMMIT_LABEL_PARENT, label_len) == 0) {
681 remain -= label_len;
682 if (remain < SHA1_DIGEST_STRING_LENGTH) {
683 err = got_error(GOT_ERR_BAD_OBJ_DATA);
684 goto done;
686 s += label_len;
687 err = got_object_commit_add_parent(*commit, s);
688 if (err)
689 goto done;
691 remain -= SHA1_DIGEST_STRING_LENGTH;
692 s += SHA1_DIGEST_STRING_LENGTH;
695 label_len = strlen(GOT_COMMIT_LABEL_AUTHOR);
696 if (strncmp(s, GOT_COMMIT_LABEL_AUTHOR, label_len) == 0) {
697 char *p;
698 size_t slen;
700 remain -= label_len;
701 if (remain <= 0) {
702 err = got_error(GOT_ERR_BAD_OBJ_DATA);
703 goto done;
705 s += label_len;
706 p = memchr(s, '\n', remain);
707 if (p == NULL) {
708 err = got_error(GOT_ERR_BAD_OBJ_DATA);
709 goto done;
711 *p = '\0';
712 slen = strlen(s);
713 err = parse_commit_time(&(*commit)->author_time,
714 &(*commit)->author_gmtoff, s);
715 if (err)
716 goto done;
717 (*commit)->author = strdup(s);
718 if ((*commit)->author == NULL) {
719 err = got_error_from_errno("strdup");
720 goto done;
722 s += slen + 1;
723 remain -= slen + 1;
726 label_len = strlen(GOT_COMMIT_LABEL_COMMITTER);
727 if (strncmp(s, GOT_COMMIT_LABEL_COMMITTER, label_len) == 0) {
728 char *p;
729 size_t slen;
731 remain -= label_len;
732 if (remain <= 0) {
733 err = got_error(GOT_ERR_BAD_OBJ_DATA);
734 goto done;
736 s += label_len;
737 p = memchr(s, '\n', remain);
738 if (p == NULL) {
739 err = got_error(GOT_ERR_BAD_OBJ_DATA);
740 goto done;
742 *p = '\0';
743 slen = strlen(s);
744 err = parse_commit_time(&(*commit)->committer_time,
745 &(*commit)->committer_gmtoff, s);
746 if (err)
747 goto done;
748 (*commit)->committer = strdup(s);
749 if ((*commit)->committer == NULL) {
750 err = got_error_from_errno("strdup");
751 goto done;
753 s += slen + 1;
754 remain -= slen + 1;
757 (*commit)->logmsg = strndup(s, remain);
758 if ((*commit)->logmsg == NULL) {
759 err = got_error_from_errno("strndup");
760 goto done;
762 done:
763 if (err) {
764 got_object_commit_close(*commit);
765 *commit = NULL;
767 return err;
770 const struct got_error *
771 got_object_read_commit(struct got_commit_object **commit, int fd,
772 struct got_object_id *expected_id, size_t expected_size)
774 struct got_object *obj = NULL;
775 const struct got_error *err = NULL;
776 size_t len;
777 uint8_t *p;
778 struct got_inflate_checksum csum;
779 struct got_hash ctx;
780 struct got_object_id id;
782 got_hash_init(&ctx, GOT_HASH_SHA1);
783 memset(&csum, 0, sizeof(csum));
784 csum.output_ctx = &ctx;
786 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum, expected_size, fd);
787 if (err)
788 return err;
790 got_hash_final_object_id(&ctx, &id);
791 if (got_object_id_cmp(expected_id, &id) != 0) {
792 err = got_error_checksum(expected_id);
793 goto done;
796 err = got_object_parse_header(&obj, p, len);
797 if (err)
798 goto done;
800 if (len < obj->hdrlen + obj->size) {
801 err = got_error(GOT_ERR_BAD_OBJ_DATA);
802 goto done;
805 if (obj->type != GOT_OBJ_TYPE_COMMIT) {
806 err = got_error(GOT_ERR_OBJ_TYPE);
807 goto done;
810 /* Skip object header. */
811 len -= obj->hdrlen;
812 err = got_object_parse_commit(commit, p + obj->hdrlen, len);
813 done:
814 free(p);
815 if (obj)
816 got_object_close(obj);
817 return err;
820 void
821 got_object_tree_close(struct got_tree_object *tree)
823 if (tree->refcnt > 0) {
824 tree->refcnt--;
825 if (tree->refcnt > 0)
826 return;
829 free(tree->entries);
830 free(tree);
833 static const struct got_error *
834 parse_tree_entry(struct got_parsed_tree_entry *pte, size_t *elen, char *buf,
835 size_t maxlen)
837 char *p, *space;
839 *elen = 0;
841 *elen = strnlen(buf, maxlen) + 1;
842 if (*elen > maxlen)
843 return got_error(GOT_ERR_BAD_OBJ_DATA);
845 space = memchr(buf, ' ', *elen);
846 if (space == NULL || space <= buf)
847 return got_error(GOT_ERR_BAD_OBJ_DATA);
849 pte->mode = 0;
850 p = buf;
851 while (p < space) {
852 if (*p < '0' || *p > '7')
853 return got_error(GOT_ERR_BAD_OBJ_DATA);
854 pte->mode <<= 3;
855 pte->mode |= *p - '0';
856 p++;
859 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH)
860 return got_error(GOT_ERR_BAD_OBJ_DATA);
862 pte->name = space + 1;
863 pte->namelen = strlen(pte->name);
864 buf += *elen;
865 pte->id = buf;
866 *elen += SHA1_DIGEST_LENGTH;
867 return NULL;
870 static int
871 pte_cmp(const void *pa, const void *pb)
873 const struct got_parsed_tree_entry *a = pa, *b = pb;
875 return got_path_cmp(a->name, b->name, a->namelen, b->namelen);
878 const struct got_error *
879 got_object_parse_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
880 size_t *nentries_alloc, uint8_t *buf, size_t len)
882 const struct got_error *err = NULL;
883 size_t remain = len;
884 const size_t nalloc = 16;
885 struct got_parsed_tree_entry *pte;
886 int i;
888 *nentries = 0;
889 if (remain == 0)
890 return NULL; /* tree is empty */
892 while (remain > 0) {
893 size_t elen;
895 if (*nentries >= *nentries_alloc) {
896 pte = recallocarray(*entries, *nentries_alloc,
897 *nentries_alloc + nalloc, sizeof(**entries));
898 if (pte == NULL) {
899 err = got_error_from_errno("recallocarray");
900 goto done;
902 *entries = pte;
903 *nentries_alloc += nalloc;
906 pte = &(*entries)[*nentries];
907 err = parse_tree_entry(pte, &elen, buf, remain);
908 if (err)
909 goto done;
910 buf += elen;
911 remain -= elen;
912 (*nentries)++;
915 if (remain != 0) {
916 err = got_error(GOT_ERR_BAD_OBJ_DATA);
917 goto done;
920 if (*nentries > 1) {
921 mergesort(*entries, *nentries, sizeof(**entries), pte_cmp);
923 for (i = 0; i < *nentries - 1; i++) {
924 struct got_parsed_tree_entry *prev = &(*entries)[i];
925 pte = &(*entries)[i + 1];
926 if (got_path_cmp(prev->name, pte->name,
927 prev->namelen, pte->namelen) == 0) {
928 err = got_error(GOT_ERR_TREE_DUP_ENTRY);
929 break;
933 done:
934 if (err)
935 *nentries = 0;
936 return err;
939 const struct got_error *
940 got_object_read_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
941 size_t *nentries_alloc, uint8_t **p, int fd,
942 struct got_object_id *expected_id)
944 const struct got_error *err = NULL;
945 struct got_object *obj = NULL;
946 size_t len;
947 struct got_inflate_checksum csum;
948 struct got_hash ctx;
949 struct got_object_id id;
951 got_hash_init(&ctx, GOT_HASH_SHA1);
952 memset(&csum, 0, sizeof(csum));
953 csum.output_ctx = &ctx;
955 err = got_inflate_to_mem_fd(p, &len, NULL, &csum, 0, fd);
956 if (err)
957 return err;
959 got_hash_final_object_id(&ctx, &id);
960 if (got_object_id_cmp(expected_id, &id) != 0) {
961 err = got_error_checksum(expected_id);
962 goto done;
965 err = got_object_parse_header(&obj, *p, len);
966 if (err)
967 goto done;
969 if (len < obj->hdrlen + obj->size) {
970 err = got_error(GOT_ERR_BAD_OBJ_DATA);
971 goto done;
974 /* Skip object header. */
975 len -= obj->hdrlen;
976 err = got_object_parse_tree(entries, nentries, nentries_alloc,
977 *p + obj->hdrlen, len);
978 done:
979 if (obj)
980 got_object_close(obj);
981 return err;
984 void
985 got_object_tag_close(struct got_tag_object *tag)
987 if (tag->refcnt > 0) {
988 tag->refcnt--;
989 if (tag->refcnt > 0)
990 return;
993 free(tag->tag);
994 free(tag->tagger);
995 free(tag->tagmsg);
996 free(tag);
999 const struct got_error *
1000 got_object_parse_tag(struct got_tag_object **tag, uint8_t *buf, size_t len)
1002 const struct got_error *err = NULL;
1003 enum got_hash_algorithm algo = GOT_HASH_SHA1;
1004 size_t remain = len;
1005 char *s = buf;
1006 size_t label_len;
1008 if (remain == 0)
1009 return got_error(GOT_ERR_BAD_OBJ_DATA);
1011 *tag = calloc(1, sizeof(**tag));
1012 if (*tag == NULL)
1013 return got_error_from_errno("calloc");
1015 label_len = strlen(GOT_TAG_LABEL_OBJECT);
1016 if (strncmp(s, GOT_TAG_LABEL_OBJECT, label_len) == 0) {
1017 remain -= label_len;
1018 if (remain < SHA1_DIGEST_STRING_LENGTH) {
1019 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1020 goto done;
1022 s += label_len;
1023 if (!got_parse_object_id(&(*tag)->id, s, algo)) {
1024 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1025 goto done;
1027 remain -= SHA1_DIGEST_STRING_LENGTH;
1028 s += SHA1_DIGEST_STRING_LENGTH;
1029 } else {
1030 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1031 goto done;
1034 if (remain <= 0) {
1035 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1036 goto done;
1039 label_len = strlen(GOT_TAG_LABEL_TYPE);
1040 if (strncmp(s, GOT_TAG_LABEL_TYPE, label_len) == 0) {
1041 remain -= label_len;
1042 if (remain <= 0) {
1043 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1044 goto done;
1046 s += label_len;
1047 if (strncmp(s, GOT_OBJ_LABEL_COMMIT,
1048 strlen(GOT_OBJ_LABEL_COMMIT)) == 0) {
1049 (*tag)->obj_type = GOT_OBJ_TYPE_COMMIT;
1050 label_len = strlen(GOT_OBJ_LABEL_COMMIT);
1051 s += label_len;
1052 remain -= label_len;
1053 } else if (strncmp(s, GOT_OBJ_LABEL_TREE,
1054 strlen(GOT_OBJ_LABEL_TREE)) == 0) {
1055 (*tag)->obj_type = GOT_OBJ_TYPE_TREE;
1056 label_len = strlen(GOT_OBJ_LABEL_TREE);
1057 s += label_len;
1058 remain -= label_len;
1059 } else if (strncmp(s, GOT_OBJ_LABEL_BLOB,
1060 strlen(GOT_OBJ_LABEL_BLOB)) == 0) {
1061 (*tag)->obj_type = GOT_OBJ_TYPE_BLOB;
1062 label_len = strlen(GOT_OBJ_LABEL_BLOB);
1063 s += label_len;
1064 remain -= label_len;
1065 } else if (strncmp(s, GOT_OBJ_LABEL_TAG,
1066 strlen(GOT_OBJ_LABEL_TAG)) == 0) {
1067 (*tag)->obj_type = GOT_OBJ_TYPE_TAG;
1068 label_len = strlen(GOT_OBJ_LABEL_TAG);
1069 s += label_len;
1070 remain -= label_len;
1071 } else {
1072 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1073 goto done;
1076 if (remain <= 0 || *s != '\n') {
1077 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1078 goto done;
1080 s++;
1081 remain--;
1082 if (remain <= 0) {
1083 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1084 goto done;
1086 } else {
1087 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1088 goto done;
1091 label_len = strlen(GOT_TAG_LABEL_TAG);
1092 if (strncmp(s, GOT_TAG_LABEL_TAG, label_len) == 0) {
1093 char *p;
1094 size_t slen;
1095 remain -= label_len;
1096 if (remain <= 0) {
1097 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1098 goto done;
1100 s += label_len;
1101 p = memchr(s, '\n', remain);
1102 if (p == NULL) {
1103 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1104 goto done;
1106 *p = '\0';
1107 slen = strlen(s);
1108 (*tag)->tag = strndup(s, slen);
1109 if ((*tag)->tag == NULL) {
1110 err = got_error_from_errno("strndup");
1111 goto done;
1113 s += slen + 1;
1114 remain -= slen + 1;
1115 if (remain <= 0) {
1116 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1117 goto done;
1119 } else {
1120 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1121 goto done;
1124 label_len = strlen(GOT_TAG_LABEL_TAGGER);
1125 if (strncmp(s, GOT_TAG_LABEL_TAGGER, label_len) == 0) {
1126 char *p;
1127 size_t slen;
1129 remain -= label_len;
1130 if (remain <= 0) {
1131 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1132 goto done;
1134 s += label_len;
1135 p = memchr(s, '\n', remain);
1136 if (p == NULL) {
1137 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1138 goto done;
1140 *p = '\0';
1141 slen = strlen(s);
1142 err = parse_commit_time(&(*tag)->tagger_time,
1143 &(*tag)->tagger_gmtoff, s);
1144 if (err)
1145 goto done;
1146 (*tag)->tagger = strdup(s);
1147 if ((*tag)->tagger == NULL) {
1148 err = got_error_from_errno("strdup");
1149 goto done;
1151 s += slen + 1;
1152 remain -= slen + 1;
1153 if (remain < 0) {
1154 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1155 goto done;
1157 } else {
1158 /* Some old tags in the Linux git repo have no tagger. */
1159 (*tag)->tagger = strdup("");
1160 if ((*tag)->tagger == NULL) {
1161 err = got_error_from_errno("strdup");
1162 goto done;
1166 (*tag)->tagmsg = strndup(s, remain);
1167 if ((*tag)->tagmsg == NULL) {
1168 err = got_error_from_errno("strndup");
1169 goto done;
1171 done:
1172 if (err) {
1173 got_object_tag_close(*tag);
1174 *tag = NULL;
1176 return err;
1179 const struct got_error *
1180 got_object_read_tag(struct got_tag_object **tag, int fd,
1181 struct got_object_id *expected_id, size_t expected_size)
1183 const struct got_error *err = NULL;
1184 struct got_object *obj = NULL;
1185 size_t len;
1186 uint8_t *p;
1187 struct got_inflate_checksum csum;
1188 struct got_hash ctx;
1189 struct got_object_id id;
1191 got_hash_init(&ctx, GOT_HASH_SHA1);
1192 memset(&csum, 0, sizeof(csum));
1193 csum.output_ctx = &ctx;
1195 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum,
1196 expected_size, fd);
1197 if (err)
1198 return err;
1200 got_hash_final_object_id(&ctx, &id);
1201 if (got_object_id_cmp(expected_id, &id) != 0) {
1202 err = got_error_checksum(expected_id);
1203 goto done;
1206 err = got_object_parse_header(&obj, p, len);
1207 if (err)
1208 goto done;
1210 if (len < obj->hdrlen + obj->size) {
1211 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1212 goto done;
1215 /* Skip object header. */
1216 len -= obj->hdrlen;
1217 err = got_object_parse_tag(tag, p + obj->hdrlen, len);
1218 done:
1219 free(p);
1220 if (obj)
1221 got_object_close(obj);
1222 return err;
1225 const struct got_error *
1226 got_read_file_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
1228 const struct got_error *err = NULL;
1229 static const size_t blocksize = 512;
1230 size_t n, total, remain;
1231 uint8_t *buf;
1233 *outbuf = NULL;
1234 *outlen = 0;
1236 buf = malloc(blocksize);
1237 if (buf == NULL)
1238 return got_error_from_errno("malloc");
1240 remain = blocksize;
1241 total = 0;
1242 for (;;) {
1243 if (remain == 0) {
1244 uint8_t *newbuf;
1245 newbuf = reallocarray(buf, 1, total + blocksize);
1246 if (newbuf == NULL) {
1247 err = got_error_from_errno("reallocarray");
1248 goto done;
1250 buf = newbuf;
1251 remain += blocksize;
1253 n = fread(buf + total, 1, remain, f);
1254 if (n == 0) {
1255 if (ferror(f)) {
1256 err = got_ferror(f, GOT_ERR_IO);
1257 goto done;
1259 break; /* EOF */
1261 remain -= n;
1262 total += n;
1265 done:
1266 if (err == NULL) {
1267 *outbuf = buf;
1268 *outlen = total;
1269 } else
1270 free(buf);
1271 return err;