bump version number
[got-portable.git] / lib / object_parse.c
blob00ef12292ee99c13b11a184350a24f3e4abf2fdf
1 /*
2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <sys/queue.h>
22 #include <sys/uio.h>
23 #include <sys/socket.h>
24 #include <sys/wait.h>
25 #include <sys/mman.h>
27 #include <errno.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdint.h>
32 #include <zlib.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <time.h>
36 #include <unistd.h>
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
42 #include "got_path.h"
44 #include "got_lib_hash.h"
45 #include "got_lib_delta.h"
46 #include "got_lib_inflate.h"
47 #include "got_lib_object.h"
48 #include "got_lib_object_parse.h"
49 #include "got_lib_object_qid.h"
50 #include "got_lib_object_cache.h"
51 #include "got_lib_pack.h"
52 #include "got_lib_repository.h"
54 #ifndef nitems
55 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
56 #endif
58 const struct got_error *
59 got_object_type_label(const char **label, int obj_type)
61 const struct got_error *err = NULL;
63 switch (obj_type) {
64 case GOT_OBJ_TYPE_BLOB:
65 *label = GOT_OBJ_LABEL_BLOB;
66 break;
67 case GOT_OBJ_TYPE_TREE:
68 *label = GOT_OBJ_LABEL_TREE;
69 break;
70 case GOT_OBJ_TYPE_COMMIT:
71 *label = GOT_OBJ_LABEL_COMMIT;
72 break;
73 case GOT_OBJ_TYPE_TAG:
74 *label = GOT_OBJ_LABEL_TAG;
75 break;
76 default:
77 *label = NULL;
78 err = got_error(GOT_ERR_OBJ_TYPE);
79 break;
82 return err;
85 void
86 got_object_close(struct got_object *obj)
88 if (obj->refcnt > 0) {
89 obj->refcnt--;
90 if (obj->refcnt > 0)
91 return;
94 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
95 struct got_delta *delta;
96 while (!STAILQ_EMPTY(&obj->deltas.entries)) {
97 delta = STAILQ_FIRST(&obj->deltas.entries);
98 STAILQ_REMOVE_HEAD(&obj->deltas.entries, entry);
99 free(delta);
102 free(obj);
105 const struct got_error *
106 got_object_raw_close(struct got_raw_object *obj)
108 const struct got_error *err = NULL;
110 if (obj->refcnt > 0) {
111 obj->refcnt--;
112 if (obj->refcnt > 0)
113 return NULL;
116 if (obj->close_cb)
117 obj->close_cb(obj);
119 if (obj->f == NULL) {
120 if (obj->fd != -1) {
121 if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
122 err = got_error_from_errno("munmap");
123 if (close(obj->fd) == -1 && err == NULL)
124 err = got_error_from_errno("close");
125 } else
126 free(obj->data);
127 } else {
128 if (fclose(obj->f) == EOF && err == NULL)
129 err = got_error_from_errno("fclose");
131 free(obj);
132 return err;
135 const struct got_error *
136 got_object_parse_header(struct got_object **obj, char *buf, size_t len)
138 const char *obj_labels[] = {
139 GOT_OBJ_LABEL_COMMIT,
140 GOT_OBJ_LABEL_TREE,
141 GOT_OBJ_LABEL_BLOB,
142 GOT_OBJ_LABEL_TAG,
144 const int obj_types[] = {
145 GOT_OBJ_TYPE_COMMIT,
146 GOT_OBJ_TYPE_TREE,
147 GOT_OBJ_TYPE_BLOB,
148 GOT_OBJ_TYPE_TAG,
150 int type = 0;
151 size_t size = 0;
152 size_t i;
153 char *end;
155 *obj = NULL;
157 end = memchr(buf, '\0', len);
158 if (end == NULL)
159 return got_error(GOT_ERR_BAD_OBJ_HDR);
161 for (i = 0; i < nitems(obj_labels); i++) {
162 const char *label = obj_labels[i];
163 size_t label_len = strlen(label);
164 const char *errstr;
166 if (len <= label_len || buf + label_len >= end ||
167 strncmp(buf, label, label_len) != 0)
168 continue;
170 type = obj_types[i];
171 size = strtonum(buf + label_len, 0, LONG_MAX, &errstr);
172 if (errstr != NULL)
173 return got_error(GOT_ERR_BAD_OBJ_HDR);
174 break;
177 if (type == 0)
178 return got_error(GOT_ERR_BAD_OBJ_HDR);
180 *obj = calloc(1, sizeof(**obj));
181 if (*obj == NULL)
182 return got_error_from_errno("calloc");
183 (*obj)->type = type;
184 (*obj)->hdrlen = end - buf + 1;
185 (*obj)->size = size;
186 return NULL;
189 const struct got_error *
190 got_object_read_header(struct got_object **obj, int fd)
192 const struct got_error *err;
193 struct got_inflate_buf zb;
194 uint8_t *buf;
195 const size_t zbsize = 64;
196 size_t outlen, totlen;
197 int nbuf = 1;
199 *obj = NULL;
201 buf = malloc(zbsize);
202 if (buf == NULL)
203 return got_error_from_errno("malloc");
204 buf[0] = '\0';
206 err = got_inflate_init(&zb, buf, zbsize, NULL);
207 if (err)
208 return err;
210 totlen = 0;
211 do {
212 err = got_inflate_read_fd(&zb, fd, &outlen, NULL);
213 if (err)
214 goto done;
215 if (outlen == 0)
216 break;
217 totlen += outlen;
218 if (memchr(zb.outbuf, '\0', outlen) == NULL) {
219 uint8_t *newbuf;
220 nbuf++;
221 newbuf = recallocarray(buf, nbuf - 1, nbuf, zbsize);
222 if (newbuf == NULL) {
223 err = got_error_from_errno("recallocarray");
224 goto done;
226 buf = newbuf;
227 zb.outbuf = newbuf + totlen;
228 zb.outlen = (nbuf * zbsize) - totlen;
230 } while (memchr(zb.outbuf, '\0', outlen) == NULL);
232 err = got_object_parse_header(obj, buf, totlen);
233 done:
234 free(buf);
235 got_inflate_end(&zb);
236 return err;
239 const struct got_error *
240 got_object_read_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
241 size_t max_in_mem_size, int outfd, struct got_object_id *expected_id,
242 int infd)
244 const struct got_error *err = NULL;
245 struct got_object *obj;
246 struct got_inflate_checksum csum;
247 struct got_object_id id;
248 struct got_hash ctx;
249 size_t len, consumed;
250 FILE *f = NULL;
252 *outbuf = NULL;
253 *size = 0;
254 *hdrlen = 0;
256 got_hash_init(&ctx, expected_id->algo);
257 memset(&csum, 0, sizeof(csum));
258 csum.output_ctx = &ctx;
260 if (lseek(infd, SEEK_SET, 0) == -1)
261 return got_error_from_errno("lseek");
263 err = got_object_read_header(&obj, infd);
264 if (err)
265 return err;
267 if (lseek(infd, SEEK_SET, 0) == -1)
268 return got_error_from_errno("lseek");
270 if (obj->size + obj->hdrlen <= max_in_mem_size) {
271 err = got_inflate_to_mem_fd(outbuf, &len, &consumed, &csum,
272 obj->size + obj->hdrlen, infd);
273 } else {
274 int fd;
276 * XXX This uses an extra file descriptor for no good reason.
277 * We should have got_inflate_fd_to_fd().
279 fd = dup(infd);
280 if (fd == -1)
281 return got_error_from_errno("dup");
282 f = fdopen(fd, "r");
283 if (f == NULL) {
284 err = got_error_from_errno("fdopen");
285 close(fd);
286 goto done;
288 err = got_inflate_to_fd(&len, f, &csum, outfd);
290 if (err)
291 goto done;
293 if (len < obj->hdrlen || len != obj->hdrlen + obj->size) {
294 err = got_error(GOT_ERR_BAD_OBJ_HDR);
295 goto done;
298 got_hash_final_object_id(&ctx, &id);
299 if (got_object_id_cmp(expected_id, &id) != 0) {
300 err = got_error_checksum(expected_id);
301 goto done;
304 *size = obj->size;
305 *hdrlen = obj->hdrlen;
306 done:
307 got_object_close(obj);
308 if (f && fclose(f) == EOF && err == NULL)
309 err = got_error_from_errno("fclose");
310 return err;
313 struct got_commit_object *
314 got_object_commit_alloc_partial(void)
316 struct got_commit_object *commit;
318 commit = calloc(1, sizeof(*commit));
319 if (commit == NULL)
320 return NULL;
321 commit->tree_id = malloc(sizeof(*commit->tree_id));
322 if (commit->tree_id == NULL) {
323 free(commit);
324 return NULL;
327 STAILQ_INIT(&commit->parent_ids);
329 return commit;
332 const struct got_error *
333 got_object_commit_add_parent(struct got_commit_object *commit,
334 const char *id_str, enum got_hash_algorithm algo)
336 const struct got_error *err = NULL;
337 struct got_object_qid *qid;
339 err = got_object_qid_alloc_partial(&qid);
340 if (err)
341 return err;
343 if (!got_parse_object_id(&qid->id, id_str, algo)) {
344 err = got_error(GOT_ERR_BAD_OBJ_DATA);
345 got_object_qid_free(qid);
346 return err;
349 STAILQ_INSERT_TAIL(&commit->parent_ids, qid, entry);
350 commit->nparents++;
352 return NULL;
355 static const struct got_error *
356 parse_gmtoff(time_t *gmtoff, const char *tzstr)
358 int sign = 1;
359 const char *p = tzstr;
360 time_t h, m;
362 *gmtoff = 0;
364 if (*p == '-')
365 sign = -1;
366 else if (*p != '+')
367 return got_error(GOT_ERR_BAD_OBJ_DATA);
368 p++;
369 if (!isdigit((unsigned char)*p) &&
370 !isdigit((unsigned char)*(p + 1)))
371 return got_error(GOT_ERR_BAD_OBJ_DATA);
372 h = (((*p - '0') * 10) + (*(p + 1) - '0'));
374 p += 2;
375 if (!isdigit((unsigned char)*p) &&
376 !isdigit((unsigned char)*(p + 1)))
377 return got_error(GOT_ERR_BAD_OBJ_DATA);
378 m = ((*p - '0') * 10) + (*(p + 1) - '0');
380 *gmtoff = (h * 60 * 60 + m * 60) * sign;
381 return NULL;
384 static const struct got_error *
385 parse_commit_time(time_t *time, time_t *gmtoff, char *committer)
387 const struct got_error *err = NULL;
388 const char *errstr;
389 char *space, *tzstr;
391 /* Parse and strip off trailing timezone indicator string. */
392 space = strrchr(committer, ' ');
393 if (space == NULL)
394 return got_error(GOT_ERR_BAD_OBJ_DATA);
395 tzstr = strdup(space + 1);
396 if (tzstr == NULL)
397 return got_error_from_errno("strdup");
398 err = parse_gmtoff(gmtoff, tzstr);
399 free(tzstr);
400 if (err) {
401 if (err->code != GOT_ERR_BAD_OBJ_DATA)
402 return err;
403 /* Old versions of Git omitted the timestamp. */
404 *time = 0;
405 *gmtoff = 0;
406 return NULL;
408 *space = '\0';
410 /* Timestamp is separated from committer name + email by space. */
411 space = strrchr(committer, ' ');
412 if (space == NULL)
413 return got_error(GOT_ERR_BAD_OBJ_DATA);
415 /* Timestamp parsed here is expressed as UNIX timestamp (UTC). */
416 *time = strtonum(space + 1, 0, INT64_MAX, &errstr);
417 if (errstr)
418 return got_error(GOT_ERR_BAD_OBJ_DATA);
420 /* Strip off parsed time information, leaving just author and email. */
421 *space = '\0';
423 return NULL;
426 void
427 got_object_commit_close(struct got_commit_object *commit)
429 if (commit->refcnt > 0) {
430 commit->refcnt--;
431 if (commit->refcnt > 0)
432 return;
435 got_object_id_queue_free(&commit->parent_ids);
436 free(commit->tree_id);
437 free(commit->author);
438 free(commit->committer);
439 free(commit->logmsg);
440 free(commit);
443 struct got_object_id *
444 got_object_commit_get_tree_id(struct got_commit_object *commit)
446 return commit->tree_id;
450 got_object_commit_get_nparents(struct got_commit_object *commit)
452 return commit->nparents;
455 const struct got_object_id_queue *
456 got_object_commit_get_parent_ids(struct got_commit_object *commit)
458 return &commit->parent_ids;
461 const char *
462 got_object_commit_get_author(struct got_commit_object *commit)
464 return commit->author;
467 time_t
468 got_object_commit_get_author_time(struct got_commit_object *commit)
470 return commit->author_time;
473 time_t got_object_commit_get_author_gmtoff(struct got_commit_object *commit)
475 return commit->author_gmtoff;
478 const char *
479 got_object_commit_get_committer(struct got_commit_object *commit)
481 return commit->committer;
484 time_t
485 got_object_commit_get_committer_time(struct got_commit_object *commit)
487 return commit->committer_time;
490 time_t
491 got_object_commit_get_committer_gmtoff(struct got_commit_object *commit)
493 return commit->committer_gmtoff;
496 const struct got_error *
497 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
499 const struct got_error *err = NULL;
500 const char *src;
501 char *dst;
502 size_t len;
504 len = strlen(commit->logmsg);
505 *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
506 if (*logmsg == NULL)
507 return got_error_from_errno("malloc");
510 * Strip out unusual headers. Headers are separated from the commit
511 * message body by a single empty line.
513 src = commit->logmsg;
514 dst = *logmsg;
515 while (*src != '\0' && *src != '\n') {
516 int copy_header = 1, eol = 0;
517 if (strncmp(src, GOT_COMMIT_LABEL_TREE,
518 strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
519 strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
520 strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
521 strncmp(src, GOT_COMMIT_LABEL_PARENT,
522 strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
523 strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
524 strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
525 copy_header = 0;
527 while (*src != '\0' && !eol) {
528 if (copy_header) {
529 *dst = *src;
530 dst++;
532 if (*src == '\n')
533 eol = 1;
534 src++;
537 *dst = '\0';
539 if (strlcat(*logmsg, src, len + 1) >= len + 1) {
540 err = got_error(GOT_ERR_NO_SPACE);
541 goto done;
544 /* Trim redundant trailing whitespace. */
545 len = strlen(*logmsg);
546 while (len > 1 && isspace((unsigned char)(*logmsg)[len - 2]) &&
547 isspace((unsigned char)(*logmsg)[len - 1])) {
548 (*logmsg)[len - 1] = '\0';
549 len--;
552 /* Append a trailing newline if missing. */
553 if (len > 0 && (*logmsg)[len - 1] != '\n') {
554 (*logmsg)[len] = '\n';
555 (*logmsg)[len + 1] = '\0';
557 done:
558 if (err) {
559 free(*logmsg);
560 *logmsg = NULL;
562 return err;
565 const char *
566 got_object_commit_get_logmsg_raw(struct got_commit_object *commit)
568 return commit->logmsg;
571 const struct got_error *
572 got_object_parse_commit(struct got_commit_object **commit, char *buf,
573 size_t len, enum got_hash_algorithm algo)
575 const struct got_error *err = NULL;
576 char *s = buf;
577 size_t label_len, digest_string_len;
578 ssize_t remain = (ssize_t)len;
580 digest_string_len = got_hash_digest_string_length(algo);
582 if (remain == 0)
583 return got_error(GOT_ERR_BAD_OBJ_DATA);
585 *commit = got_object_commit_alloc_partial();
586 if (*commit == NULL)
587 return got_error_from_errno("got_object_commit_alloc_partial");
589 label_len = strlen(GOT_COMMIT_LABEL_TREE);
590 if (strncmp(s, GOT_COMMIT_LABEL_TREE, label_len) == 0) {
591 remain -= label_len;
592 if (remain < digest_string_len) {
593 err = got_error(GOT_ERR_BAD_OBJ_DATA);
594 goto done;
596 s += label_len;
597 if (!got_parse_object_id((*commit)->tree_id, s, algo)) {
598 err = got_error(GOT_ERR_BAD_OBJ_DATA);
599 goto done;
601 remain -= digest_string_len;
602 s += digest_string_len;
603 } else {
604 err = got_error(GOT_ERR_BAD_OBJ_DATA);
605 goto done;
608 label_len = strlen(GOT_COMMIT_LABEL_PARENT);
609 while (strncmp(s, GOT_COMMIT_LABEL_PARENT, label_len) == 0) {
610 remain -= label_len;
611 if (remain < digest_string_len) {
612 err = got_error(GOT_ERR_BAD_OBJ_DATA);
613 goto done;
615 s += label_len;
616 err = got_object_commit_add_parent(*commit, s, algo);
617 if (err)
618 goto done;
620 remain -= digest_string_len;
621 s += digest_string_len;
624 label_len = strlen(GOT_COMMIT_LABEL_AUTHOR);
625 if (strncmp(s, GOT_COMMIT_LABEL_AUTHOR, label_len) == 0) {
626 char *p;
627 size_t slen;
629 remain -= label_len;
630 if (remain <= 0) {
631 err = got_error(GOT_ERR_BAD_OBJ_DATA);
632 goto done;
634 s += label_len;
635 p = memchr(s, '\n', remain);
636 if (p == NULL) {
637 err = got_error(GOT_ERR_BAD_OBJ_DATA);
638 goto done;
640 *p = '\0';
641 slen = strlen(s);
642 err = parse_commit_time(&(*commit)->author_time,
643 &(*commit)->author_gmtoff, s);
644 if (err)
645 goto done;
646 (*commit)->author = strdup(s);
647 if ((*commit)->author == NULL) {
648 err = got_error_from_errno("strdup");
649 goto done;
651 s += slen + 1;
652 remain -= slen + 1;
655 label_len = strlen(GOT_COMMIT_LABEL_COMMITTER);
656 if (strncmp(s, GOT_COMMIT_LABEL_COMMITTER, label_len) == 0) {
657 char *p;
658 size_t slen;
660 remain -= label_len;
661 if (remain <= 0) {
662 err = got_error(GOT_ERR_BAD_OBJ_DATA);
663 goto done;
665 s += label_len;
666 p = memchr(s, '\n', remain);
667 if (p == NULL) {
668 err = got_error(GOT_ERR_BAD_OBJ_DATA);
669 goto done;
671 *p = '\0';
672 slen = strlen(s);
673 err = parse_commit_time(&(*commit)->committer_time,
674 &(*commit)->committer_gmtoff, s);
675 if (err)
676 goto done;
677 (*commit)->committer = strdup(s);
678 if ((*commit)->committer == NULL) {
679 err = got_error_from_errno("strdup");
680 goto done;
682 s += slen + 1;
683 remain -= slen + 1;
686 (*commit)->logmsg = strndup(s, remain);
687 if ((*commit)->logmsg == NULL) {
688 err = got_error_from_errno("strndup");
689 goto done;
691 done:
692 if (err) {
693 got_object_commit_close(*commit);
694 *commit = NULL;
696 return err;
699 const struct got_error *
700 got_object_read_commit(struct got_commit_object **commit, int fd,
701 struct got_object_id *expected_id, size_t expected_size)
703 struct got_object *obj = NULL;
704 const struct got_error *err = NULL;
705 size_t len;
706 uint8_t *p;
707 struct got_inflate_checksum csum;
708 struct got_hash ctx;
709 struct got_object_id id;
711 got_hash_init(&ctx, expected_id->algo);
712 memset(&csum, 0, sizeof(csum));
713 csum.output_ctx = &ctx;
715 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum, expected_size, fd);
716 if (err)
717 return err;
719 got_hash_final_object_id(&ctx, &id);
720 if (got_object_id_cmp(expected_id, &id) != 0) {
721 err = got_error_checksum(expected_id);
722 goto done;
725 err = got_object_parse_header(&obj, p, len);
726 if (err)
727 goto done;
729 if (len < obj->hdrlen + obj->size) {
730 err = got_error(GOT_ERR_BAD_OBJ_DATA);
731 goto done;
734 if (obj->type != GOT_OBJ_TYPE_COMMIT) {
735 err = got_error(GOT_ERR_OBJ_TYPE);
736 goto done;
739 /* Skip object header. */
740 len -= obj->hdrlen;
741 err = got_object_parse_commit(commit, p + obj->hdrlen, len,
742 expected_id->algo);
743 done:
744 free(p);
745 if (obj)
746 got_object_close(obj);
747 return err;
750 void
751 got_object_tree_close(struct got_tree_object *tree)
753 if (tree->refcnt > 0) {
754 tree->refcnt--;
755 if (tree->refcnt > 0)
756 return;
759 free(tree->entries);
760 free(tree);
763 const struct got_error *
764 got_object_parse_tree_entry(struct got_parsed_tree_entry *pte, size_t *elen,
765 char *buf, size_t maxlen, size_t digest_len, enum got_hash_algorithm algo)
767 char *p, *space;
769 *elen = 0;
771 *elen = strnlen(buf, maxlen) + 1;
772 if (*elen > maxlen)
773 return got_error(GOT_ERR_BAD_OBJ_DATA);
775 space = memchr(buf, ' ', *elen);
776 if (space == NULL || space <= buf)
777 return got_error(GOT_ERR_BAD_OBJ_DATA);
779 pte->mode = 0;
780 p = buf;
781 while (p < space) {
782 if (*p < '0' || *p > '7')
783 return got_error(GOT_ERR_BAD_OBJ_DATA);
784 pte->mode <<= 3;
785 pte->mode |= *p - '0';
786 p++;
789 if (*elen > maxlen || maxlen - *elen < digest_len)
790 return got_error(GOT_ERR_BAD_OBJ_DATA);
792 pte->name = space + 1;
793 pte->namelen = strlen(pte->name);
794 buf += *elen;
795 pte->id = buf;
796 pte->digest_len = digest_len;
797 pte->algo = algo;
798 *elen += digest_len;
799 return NULL;
802 static int
803 pte_cmp(const void *pa, const void *pb)
805 const struct got_parsed_tree_entry *a = pa, *b = pb;
807 return got_path_cmp(a->name, b->name, a->namelen, b->namelen);
810 const struct got_error *
811 got_object_parse_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
812 size_t *nentries_alloc, uint8_t *buf, size_t len,
813 enum got_hash_algorithm algo)
815 const struct got_error *err = NULL;
816 size_t digest_len, remain = len;
817 const size_t nalloc = 16;
818 struct got_parsed_tree_entry *pte;
819 int i;
821 digest_len = got_hash_digest_length(algo);
823 *nentries = 0;
824 if (remain == 0)
825 return NULL; /* tree is empty */
827 while (remain > 0) {
828 size_t elen;
830 if (*nentries >= *nentries_alloc) {
831 pte = recallocarray(*entries, *nentries_alloc,
832 *nentries_alloc + nalloc, sizeof(**entries));
833 if (pte == NULL) {
834 err = got_error_from_errno("recallocarray");
835 goto done;
837 *entries = pte;
838 *nentries_alloc += nalloc;
841 pte = &(*entries)[*nentries];
842 err = got_object_parse_tree_entry(pte, &elen, buf, remain,
843 digest_len, algo);
844 if (err)
845 goto done;
846 buf += elen;
847 remain -= elen;
848 (*nentries)++;
851 if (remain != 0) {
852 err = got_error(GOT_ERR_BAD_OBJ_DATA);
853 goto done;
856 if (*nentries > 1) {
857 mergesort(*entries, *nentries, sizeof(**entries), pte_cmp);
859 for (i = 0; i < *nentries - 1; i++) {
860 struct got_parsed_tree_entry *prev = &(*entries)[i];
861 pte = &(*entries)[i + 1];
862 if (got_path_cmp(prev->name, pte->name,
863 prev->namelen, pte->namelen) == 0) {
864 err = got_error(GOT_ERR_TREE_DUP_ENTRY);
865 break;
869 done:
870 if (err)
871 *nentries = 0;
872 return err;
875 const struct got_error *
876 got_object_read_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
877 size_t *nentries_alloc, uint8_t **p, int fd,
878 struct got_object_id *expected_id)
880 const struct got_error *err = NULL;
881 struct got_object *obj = NULL;
882 size_t len;
883 struct got_inflate_checksum csum;
884 struct got_hash ctx;
885 struct got_object_id id;
887 got_hash_init(&ctx, expected_id->algo);
888 memset(&csum, 0, sizeof(csum));
889 csum.output_ctx = &ctx;
891 err = got_inflate_to_mem_fd(p, &len, NULL, &csum, 0, fd);
892 if (err)
893 return err;
895 got_hash_final_object_id(&ctx, &id);
896 if (got_object_id_cmp(expected_id, &id) != 0) {
897 err = got_error_checksum(expected_id);
898 goto done;
901 err = got_object_parse_header(&obj, *p, len);
902 if (err)
903 goto done;
905 if (len < obj->hdrlen + obj->size) {
906 err = got_error(GOT_ERR_BAD_OBJ_DATA);
907 goto done;
910 /* Skip object header. */
911 len -= obj->hdrlen;
912 err = got_object_parse_tree(entries, nentries, nentries_alloc,
913 *p + obj->hdrlen, len, expected_id->algo);
914 done:
915 if (obj)
916 got_object_close(obj);
917 return err;
920 void
921 got_object_tag_close(struct got_tag_object *tag)
923 if (tag->refcnt > 0) {
924 tag->refcnt--;
925 if (tag->refcnt > 0)
926 return;
929 free(tag->tag);
930 free(tag->tagger);
931 free(tag->tagmsg);
932 free(tag);
935 const struct got_error *
936 got_object_parse_tag(struct got_tag_object **tag, uint8_t *buf, size_t len,
937 enum got_hash_algorithm algo)
939 const struct got_error *err = NULL;
940 size_t remain = len;
941 char *s = buf;
942 size_t label_len, digest_string_len;
944 digest_string_len = got_hash_digest_string_length(algo);
946 if (remain == 0)
947 return got_error(GOT_ERR_BAD_OBJ_DATA);
949 *tag = calloc(1, sizeof(**tag));
950 if (*tag == NULL)
951 return got_error_from_errno("calloc");
953 label_len = strlen(GOT_TAG_LABEL_OBJECT);
954 if (strncmp(s, GOT_TAG_LABEL_OBJECT, label_len) == 0) {
955 remain -= label_len;
956 if (remain < digest_string_len) {
957 err = got_error(GOT_ERR_BAD_OBJ_DATA);
958 goto done;
960 s += label_len;
961 if (!got_parse_object_id(&(*tag)->id, s, algo)) {
962 err = got_error(GOT_ERR_BAD_OBJ_DATA);
963 goto done;
965 remain -= digest_string_len;
966 s += digest_string_len;
967 } else {
968 err = got_error(GOT_ERR_BAD_OBJ_DATA);
969 goto done;
972 if (remain <= 0) {
973 err = got_error(GOT_ERR_BAD_OBJ_DATA);
974 goto done;
977 label_len = strlen(GOT_TAG_LABEL_TYPE);
978 if (strncmp(s, GOT_TAG_LABEL_TYPE, label_len) == 0) {
979 remain -= label_len;
980 if (remain <= 0) {
981 err = got_error(GOT_ERR_BAD_OBJ_DATA);
982 goto done;
984 s += label_len;
985 if (strncmp(s, GOT_OBJ_LABEL_COMMIT,
986 strlen(GOT_OBJ_LABEL_COMMIT)) == 0) {
987 (*tag)->obj_type = GOT_OBJ_TYPE_COMMIT;
988 label_len = strlen(GOT_OBJ_LABEL_COMMIT);
989 s += label_len;
990 remain -= label_len;
991 } else if (strncmp(s, GOT_OBJ_LABEL_TREE,
992 strlen(GOT_OBJ_LABEL_TREE)) == 0) {
993 (*tag)->obj_type = GOT_OBJ_TYPE_TREE;
994 label_len = strlen(GOT_OBJ_LABEL_TREE);
995 s += label_len;
996 remain -= label_len;
997 } else if (strncmp(s, GOT_OBJ_LABEL_BLOB,
998 strlen(GOT_OBJ_LABEL_BLOB)) == 0) {
999 (*tag)->obj_type = GOT_OBJ_TYPE_BLOB;
1000 label_len = strlen(GOT_OBJ_LABEL_BLOB);
1001 s += label_len;
1002 remain -= label_len;
1003 } else if (strncmp(s, GOT_OBJ_LABEL_TAG,
1004 strlen(GOT_OBJ_LABEL_TAG)) == 0) {
1005 (*tag)->obj_type = GOT_OBJ_TYPE_TAG;
1006 label_len = strlen(GOT_OBJ_LABEL_TAG);
1007 s += label_len;
1008 remain -= label_len;
1009 } else {
1010 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1011 goto done;
1014 if (remain <= 0 || *s != '\n') {
1015 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1016 goto done;
1018 s++;
1019 remain--;
1020 if (remain <= 0) {
1021 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1022 goto done;
1024 } else {
1025 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1026 goto done;
1029 label_len = strlen(GOT_TAG_LABEL_TAG);
1030 if (strncmp(s, GOT_TAG_LABEL_TAG, label_len) == 0) {
1031 char *p;
1032 size_t slen;
1033 remain -= label_len;
1034 if (remain <= 0) {
1035 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1036 goto done;
1038 s += label_len;
1039 p = memchr(s, '\n', remain);
1040 if (p == NULL) {
1041 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1042 goto done;
1044 *p = '\0';
1045 slen = strlen(s);
1046 (*tag)->tag = strndup(s, slen);
1047 if ((*tag)->tag == NULL) {
1048 err = got_error_from_errno("strndup");
1049 goto done;
1051 s += slen + 1;
1052 remain -= slen + 1;
1053 if (remain <= 0) {
1054 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1055 goto done;
1057 } else {
1058 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1059 goto done;
1062 label_len = strlen(GOT_TAG_LABEL_TAGGER);
1063 if (strncmp(s, GOT_TAG_LABEL_TAGGER, label_len) == 0) {
1064 char *p;
1065 size_t slen;
1067 remain -= label_len;
1068 if (remain <= 0) {
1069 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1070 goto done;
1072 s += label_len;
1073 p = memchr(s, '\n', remain);
1074 if (p == NULL) {
1075 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1076 goto done;
1078 *p = '\0';
1079 slen = strlen(s);
1080 err = parse_commit_time(&(*tag)->tagger_time,
1081 &(*tag)->tagger_gmtoff, s);
1082 if (err)
1083 goto done;
1084 (*tag)->tagger = strdup(s);
1085 if ((*tag)->tagger == NULL) {
1086 err = got_error_from_errno("strdup");
1087 goto done;
1089 s += slen + 1;
1090 remain -= slen + 1;
1091 if (remain < 0) {
1092 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1093 goto done;
1095 } else {
1096 /* Some old tags in the Linux git repo have no tagger. */
1097 (*tag)->tagger = strdup("");
1098 if ((*tag)->tagger == NULL) {
1099 err = got_error_from_errno("strdup");
1100 goto done;
1104 (*tag)->tagmsg = strndup(s, remain);
1105 if ((*tag)->tagmsg == NULL) {
1106 err = got_error_from_errno("strndup");
1107 goto done;
1109 done:
1110 if (err) {
1111 got_object_tag_close(*tag);
1112 *tag = NULL;
1114 return err;
1117 const struct got_error *
1118 got_object_read_tag(struct got_tag_object **tag, int fd,
1119 struct got_object_id *expected_id, size_t expected_size)
1121 const struct got_error *err = NULL;
1122 struct got_object *obj = NULL;
1123 size_t len;
1124 uint8_t *p;
1125 struct got_inflate_checksum csum;
1126 struct got_hash ctx;
1127 struct got_object_id id;
1129 got_hash_init(&ctx, expected_id->algo);
1130 memset(&csum, 0, sizeof(csum));
1131 csum.output_ctx = &ctx;
1133 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum,
1134 expected_size, fd);
1135 if (err)
1136 return err;
1138 got_hash_final_object_id(&ctx, &id);
1139 if (got_object_id_cmp(expected_id, &id) != 0) {
1140 err = got_error_checksum(expected_id);
1141 goto done;
1144 err = got_object_parse_header(&obj, p, len);
1145 if (err)
1146 goto done;
1148 if (len < obj->hdrlen + obj->size) {
1149 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1150 goto done;
1153 /* Skip object header. */
1154 len -= obj->hdrlen;
1155 err = got_object_parse_tag(tag, p + obj->hdrlen, len,
1156 expected_id->algo);
1157 done:
1158 free(p);
1159 if (obj)
1160 got_object_close(obj);
1161 return err;