2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
27 #include "got_compat.h"
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_cancel.h"
32 #include "got_commit_graph.h"
35 #include "got_lib_delta.h"
36 #include "got_lib_inflate.h"
37 #include "got_lib_object.h"
38 #include "got_lib_object_idset.h"
40 struct got_commit_graph_node
{
41 struct got_object_id id
;
43 /* Used only during iteration. */
45 TAILQ_ENTRY(got_commit_graph_node
) entry
;
48 TAILQ_HEAD(got_commit_graph_iter_list
, got_commit_graph_node
);
50 struct got_commit_graph_branch_tip
{
51 struct got_object_id
*commit_id
;
52 struct got_commit_object
*commit
;
53 struct got_commit_graph_node
*new_node
;
56 struct got_commit_graph
{
57 /* The set of all commits we have traversed. */
58 struct got_object_idset
*node_ids
;
61 #define GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL 0x01
64 * A set of object IDs of known parent commits which we have not yet
65 * traversed. Each commit ID in this set represents a branch in commit
66 * history: Either the first-parent branch of the head node, or another
67 * branch corresponding to a traversed merge commit for which we have
68 * not traversed a branch point commit yet.
70 * Whenever we add a commit with a matching ID to the graph, we remove
71 * its corresponding element from this set, and add new elements for
72 * each of that commit's parent commits which were not traversed yet.
74 * When API users ask us to fetch more commits, we fetch commits from
75 * all currently open branches. This allows API users to process
76 * commits in linear order even though the history contains branches.
78 struct got_object_idset
*open_branches
;
80 /* Array of branch tips for fetch_commits_from_open_branches(). */
81 struct got_commit_graph_branch_tip
*tips
;
84 /* Path of tree entry of interest to the API user. */
88 * Nodes which will be passed to the API user next, sorted by
91 struct got_commit_graph_iter_list iter_list
;
94 static const struct got_error
*
95 detect_changed_path(int *changed
, struct got_commit_object
*commit
,
96 struct got_object_id
*commit_id
, const char *path
,
97 struct got_repository
*repo
)
99 const struct got_error
*err
= NULL
;
100 struct got_commit_object
*pcommit
= NULL
;
101 struct got_tree_object
*tree
= NULL
, *ptree
= NULL
;
102 struct got_object_qid
*pid
;
104 if (got_path_is_root_dir(path
)) {
111 pid
= STAILQ_FIRST(&commit
->parent_ids
);
113 struct got_object_id
*obj_id
;
114 err
= got_object_id_by_path(&obj_id
, repo
, commit
, path
);
116 if (err
->code
== GOT_ERR_NO_TREE_ENTRY
)
119 *changed
= 1; /* The path was created in this commit. */
124 err
= got_object_open_as_tree(&tree
, repo
, commit
->tree_id
);
128 err
= got_object_open_as_commit(&pcommit
, repo
, &pid
->id
);
132 err
= got_object_open_as_tree(&ptree
, repo
, pcommit
->tree_id
);
136 err
= got_object_tree_path_changed(changed
, tree
, ptree
, path
, repo
);
139 got_object_tree_close(tree
);
141 got_object_tree_close(ptree
);
143 got_object_commit_close(pcommit
);
148 add_node_to_iter_list(struct got_commit_graph
*graph
,
149 struct got_commit_graph_node
*node
, time_t committer_time
)
151 struct got_commit_graph_node
*n
, *next
;
153 node
->timestamp
= committer_time
;
155 n
= TAILQ_FIRST(&graph
->iter_list
);
157 next
= TAILQ_NEXT(n
, entry
);
158 if (next
&& node
->timestamp
>= next
->timestamp
) {
159 TAILQ_INSERT_BEFORE(next
, node
, entry
);
164 TAILQ_INSERT_TAIL(&graph
->iter_list
, node
, entry
);
167 static const struct got_error
*
168 add_node(struct got_commit_graph_node
**new_node
,
169 struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
170 struct got_repository
*repo
)
172 const struct got_error
*err
= NULL
;
173 struct got_commit_graph_node
*node
;
177 node
= calloc(1, sizeof(*node
));
179 return got_error_from_errno("calloc");
181 memcpy(&node
->id
, commit_id
, sizeof(node
->id
));
182 err
= got_object_idset_add(graph
->node_ids
, &node
->id
, NULL
);
191 * Ask got-read-pack to traverse first-parent history until a commit is
192 * encountered which modified graph->path, or until the pack file runs
193 * out of relevant commits. This is faster than sending an individual
194 * request for each commit stored in the pack file.
196 static const struct got_error
*
197 packed_first_parent_traversal(int *ncommits_traversed
,
198 struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
199 struct got_repository
*repo
)
201 const struct got_error
*err
= NULL
;
202 struct got_object_id_queue traversed_commits
;
203 struct got_object_qid
*qid
;
205 STAILQ_INIT(&traversed_commits
);
206 *ncommits_traversed
= 0;
208 err
= got_traverse_packed_commits(&traversed_commits
,
209 commit_id
, graph
->path
, repo
);
213 /* Add all traversed commits to the graph... */
214 STAILQ_FOREACH(qid
, &traversed_commits
, entry
) {
215 struct got_commit_graph_node
*node
;
217 if (got_object_idset_contains(graph
->open_branches
, &qid
->id
))
219 if (got_object_idset_contains(graph
->node_ids
, &qid
->id
))
222 (*ncommits_traversed
)++;
224 /* ... except the last commit is the new branch tip. */
225 if (STAILQ_NEXT(qid
, entry
) == NULL
) {
226 err
= got_object_idset_add(graph
->open_branches
,
231 err
= add_node(&node
, graph
, &qid
->id
, repo
);
236 got_object_id_queue_free(&traversed_commits
);
240 static const struct got_error
*
241 close_branch(struct got_commit_graph
*graph
, struct got_object_id
*commit_id
)
243 const struct got_error
*err
;
245 err
= got_object_idset_remove(NULL
, graph
->open_branches
, commit_id
);
246 if (err
&& err
->code
!= GOT_ERR_NO_OBJ
)
251 static const struct got_error
*
252 advance_branch(struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
253 struct got_commit_object
*commit
, struct got_repository
*repo
)
255 const struct got_error
*err
;
256 struct got_object_qid
*qid
;
258 err
= close_branch(graph
, commit_id
);
262 if (graph
->flags
& GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL
) {
263 qid
= STAILQ_FIRST(&commit
->parent_ids
);
265 got_object_idset_contains(graph
->open_branches
, &qid
->id
))
268 * The root directory always changes by definition, and when
269 * logging the root we want to traverse consecutive commits
270 * even if they point at the same tree.
271 * But if we are looking for a specific path then we can avoid
272 * fetching packed commits which did not modify the path and
273 * only fetch their IDs. This speeds up 'got blame'.
275 if (!got_path_is_root_dir(graph
->path
) &&
276 (commit
->flags
& GOT_COMMIT_FLAG_PACKED
)) {
278 err
= packed_first_parent_traversal(&ncommits
,
279 graph
, &qid
->id
, repo
);
280 if (err
|| ncommits
> 0)
283 return got_object_idset_add(graph
->open_branches
,
288 * If we are graphing commits for a specific path, skip branches
289 * which do not contribute any content to this path.
291 if (commit
->nparents
> 1 && !got_path_is_root_dir(graph
->path
)) {
292 struct got_object_id
*merged_id
, *prev_id
= NULL
;
293 int branches_differ
= 0;
295 err
= got_object_id_by_path(&merged_id
, repo
, commit
,
300 STAILQ_FOREACH(qid
, &commit
->parent_ids
, entry
) {
301 struct got_object_id
*id
= NULL
;
302 struct got_commit_object
*pcommit
= NULL
;
304 if (got_object_idset_contains(graph
->open_branches
,
308 err
= got_object_open_as_commit(&pcommit
, repo
,
315 err
= got_object_id_by_path(&id
, repo
, pcommit
,
317 got_object_commit_close(pcommit
);
320 if (err
->code
== GOT_ERR_NO_TREE_ENTRY
) {
330 if (!branches_differ
&&
331 got_object_id_cmp(id
, prev_id
) != 0)
338 * If a branch has created the merged content we can
339 * skip any other branches.
341 if (got_object_id_cmp(merged_id
, id
) == 0) {
342 err
= got_object_idset_add(graph
->open_branches
,
356 * If the path's content is the same on all branches,
357 * follow the first parent only.
359 if (!branches_differ
) {
360 qid
= STAILQ_FIRST(&commit
->parent_ids
);
363 if (got_object_idset_contains(graph
->open_branches
,
366 if (got_object_idset_contains(graph
->node_ids
,
368 return NULL
; /* parent already traversed */
369 return got_object_idset_add(graph
->open_branches
,
374 STAILQ_FOREACH(qid
, &commit
->parent_ids
, entry
) {
375 if (got_object_idset_contains(graph
->open_branches
, &qid
->id
))
377 if (got_object_idset_contains(graph
->node_ids
, &qid
->id
))
378 continue; /* parent already traversed */
379 err
= got_object_idset_add(graph
->open_branches
, &qid
->id
,
388 const struct got_error
*
389 got_commit_graph_open(struct got_commit_graph
**graph
,
390 const char *path
, int first_parent_traversal
)
392 const struct got_error
*err
= NULL
;
394 *graph
= calloc(1, sizeof(**graph
));
396 return got_error_from_errno("calloc");
398 TAILQ_INIT(&(*graph
)->iter_list
);
400 (*graph
)->path
= strdup(path
);
401 if ((*graph
)->path
== NULL
) {
402 err
= got_error_from_errno("strdup");
406 (*graph
)->node_ids
= got_object_idset_alloc();
407 if ((*graph
)->node_ids
== NULL
) {
408 err
= got_error_from_errno("got_object_idset_alloc");
412 (*graph
)->open_branches
= got_object_idset_alloc();
413 if ((*graph
)->open_branches
== NULL
) {
414 err
= got_error_from_errno("got_object_idset_alloc");
418 if (first_parent_traversal
)
419 (*graph
)->flags
|= GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL
;
422 got_commit_graph_close(*graph
);
428 struct add_branch_tip_arg
{
429 struct got_commit_graph_branch_tip
*tips
;
431 struct got_repository
*repo
;
432 struct got_commit_graph
*graph
;
435 static const struct got_error
*
436 add_branch_tip(struct got_object_id
*commit_id
, void *data
, void *arg
)
438 const struct got_error
*err
;
439 struct add_branch_tip_arg
*a
= arg
;
440 struct got_commit_graph_node
*new_node
;
441 struct got_commit_object
*commit
;
443 err
= got_object_open_as_commit(&commit
, a
->repo
, commit_id
);
447 err
= add_node(&new_node
, a
->graph
, commit_id
, a
->repo
);
451 a
->tips
[a
->ntips
].commit_id
= new_node
? &new_node
->id
: NULL
;
452 a
->tips
[a
->ntips
].commit
= commit
;
453 a
->tips
[a
->ntips
].new_node
= new_node
;
459 static const struct got_error
*
460 fetch_commits_from_open_branches(struct got_commit_graph
*graph
,
461 struct got_repository
*repo
, got_cancel_cb cancel_cb
, void *cancel_arg
)
463 const struct got_error
*err
;
464 struct add_branch_tip_arg arg
;
467 ntips
= got_object_idset_num_elements(graph
->open_branches
);
471 /* (Re-)allocate branch tips array if necessary. */
472 if (graph
->ntips
< ntips
) {
473 struct got_commit_graph_branch_tip
*tips
;
474 tips
= recallocarray(graph
->tips
, graph
->ntips
, ntips
,
477 return got_error_from_errno("recallocarray");
479 graph
->ntips
= ntips
;
481 arg
.tips
= graph
->tips
;
482 arg
.ntips
= 0; /* add_branch_tip() will increment */
485 err
= got_object_idset_for_each(graph
->open_branches
, add_branch_tip
,
490 for (i
= 0; i
< arg
.ntips
; i
++) {
491 struct got_object_id
*commit_id
;
492 struct got_commit_object
*commit
;
493 struct got_commit_graph_node
*new_node
;
497 err
= (*cancel_cb
)(cancel_arg
);
502 commit_id
= arg
.tips
[i
].commit_id
;
503 commit
= arg
.tips
[i
].commit
;
504 new_node
= arg
.tips
[i
].new_node
;
506 err
= detect_changed_path(&changed
, commit
, commit_id
,
509 if (err
->code
!= GOT_ERR_NO_OBJ
)
512 * History of the path stops here on the current
513 * branch. Keep going on other branches.
515 err
= close_branch(graph
, commit_id
);
521 add_node_to_iter_list(graph
, new_node
,
522 got_object_commit_get_committer_time(commit
));
523 err
= advance_branch(graph
, commit_id
, commit
, repo
);
528 for (i
= 0; i
< arg
.ntips
; i
++)
529 got_object_commit_close(arg
.tips
[i
].commit
);
534 got_commit_graph_close(struct got_commit_graph
*graph
)
536 if (graph
->open_branches
)
537 got_object_idset_free(graph
->open_branches
);
539 got_object_idset_free(graph
->node_ids
);
545 const struct got_error
*
546 got_commit_graph_iter_start(struct got_commit_graph
*graph
,
547 struct got_object_id
*id
, struct got_repository
*repo
,
548 got_cancel_cb cancel_cb
, void *cancel_arg
)
550 const struct got_error
*err
= NULL
;
552 if (!TAILQ_EMPTY(&graph
->iter_list
))
553 return got_error(GOT_ERR_ITER_BUSY
);
555 err
= got_object_idset_add(graph
->open_branches
, id
, NULL
);
559 /* Locate first commit which changed graph->path. */
560 while (TAILQ_EMPTY(&graph
->iter_list
) &&
561 got_object_idset_num_elements(graph
->open_branches
) > 0) {
562 err
= fetch_commits_from_open_branches(graph
, repo
,
563 cancel_cb
, cancel_arg
);
568 if (TAILQ_EMPTY(&graph
->iter_list
)) {
570 if (got_path_is_root_dir(graph
->path
))
571 return got_error_no_obj(id
);
573 while (path
[0] == '/')
575 return got_error_path(path
, GOT_ERR_NO_TREE_ENTRY
);
581 const struct got_error
*
582 got_commit_graph_iter_next(struct got_object_id
**id
,
583 struct got_commit_graph
*graph
, struct got_repository
*repo
,
584 got_cancel_cb cancel_cb
, void *cancel_arg
)
586 const struct got_error
*err
= NULL
;
587 struct got_commit_graph_node
*node
;
591 node
= TAILQ_FIRST(&graph
->iter_list
);
593 /* We are done iterating, or iteration was not started. */
594 return got_error(GOT_ERR_ITER_COMPLETED
);
597 while (TAILQ_NEXT(node
, entry
) == NULL
&&
598 got_object_idset_num_elements(graph
->open_branches
) > 0) {
599 err
= fetch_commits_from_open_branches(graph
, repo
,
600 cancel_cb
, cancel_arg
);
606 TAILQ_REMOVE(&graph
->iter_list
, node
, entry
);
610 const struct got_error
*
611 got_commit_graph_find_youngest_common_ancestor(struct got_object_id
**yca_id
,
612 struct got_object_id
*commit_id
, struct got_object_id
*commit_id2
,
613 int first_parent_traversal
,
614 struct got_repository
*repo
, got_cancel_cb cancel_cb
, void *cancel_arg
)
616 const struct got_error
*err
= NULL
;
617 struct got_commit_graph
*graph
= NULL
, *graph2
= NULL
;
618 int completed
= 0, completed2
= 0;
619 struct got_object_idset
*commit_ids
;
623 commit_ids
= got_object_idset_alloc();
624 if (commit_ids
== NULL
)
625 return got_error_from_errno("got_object_idset_alloc");
627 err
= got_commit_graph_open(&graph
, "/", first_parent_traversal
);
631 err
= got_commit_graph_open(&graph2
, "/", first_parent_traversal
);
635 err
= got_commit_graph_iter_start(graph
, commit_id
, repo
,
636 cancel_cb
, cancel_arg
);
640 err
= got_commit_graph_iter_start(graph2
, commit_id2
, repo
,
641 cancel_cb
, cancel_arg
);
646 struct got_object_id
*id
= NULL
, *id2
= NULL
;
649 err
= (*cancel_cb
)(cancel_arg
);
655 err
= got_commit_graph_iter_next(&id
, graph
, repo
,
656 cancel_cb
, cancel_arg
);
658 if (err
->code
!= GOT_ERR_ITER_COMPLETED
)
666 err
= got_commit_graph_iter_next(&id2
, graph2
, repo
,
667 cancel_cb
, cancel_arg
);
669 if (err
->code
!= GOT_ERR_ITER_COMPLETED
)
677 if (got_object_idset_contains(commit_ids
, id
)) {
678 *yca_id
= got_object_id_dup(id
);
681 err
= got_error_from_errno("got_object_id_dup");
685 err
= got_object_idset_add(commit_ids
, id
, NULL
);
690 if (got_object_idset_contains(commit_ids
, id2
)) {
691 *yca_id
= got_object_id_dup(id2
);
694 err
= got_error_from_errno("got_object_id_dup");
698 err
= got_object_idset_add(commit_ids
, id2
, NULL
);
703 if (completed
&& completed2
) {
704 err
= got_error(GOT_ERR_ANCESTRY
);
710 got_object_idset_free(commit_ids
);
712 got_commit_graph_close(graph
);
714 got_commit_graph_close(graph2
);