1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "pseudo-merge.h"
9 #include "string-list.h"
11 #include "pack-bitmap.h"
17 #define DEFAULT_PSEUDO_MERGE_DECAY 1.0
18 #define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
19 #define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
20 #define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
21 #define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
22 #define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
24 static double gitexp(double base
, int exp
)
38 static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group
*group
,
39 const struct pseudo_merge_matches
*matches
,
46 * The size of pseudo-merge groups decays according to a power series,
51 * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
52 * is the decay rate, and 'C' is a scaling value.
54 * The value of C depends on the number of groups, decay rate, and total
55 * number of commits. It is computed such that if there are M and N
56 * total groups and commits, respectively, that:
58 * N = f(0) + f(1) + ... f(M-1)
60 * Rearranging to isolate C, we get:
62 * N = \sum_{n=1}^M C / n^k
64 * N / C = \sum_{n=1}^M n^-k
66 * C = N / \sum_{n=1}^M n^-k
68 * For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
69 * total commits equal to 10,000, and 'M' being equal to 6 groups, then
70 * the (rounded) group sizes are:
72 * { 5469, 1934, 1053, 684, 489, 372 }
74 * increasing the number of total groups, say to 10, scales the group
75 * sizes appropriately:
77 * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
79 for (n
= 0; n
< group
->max_merges
; n
++)
80 C
+= 1.0 / gitexp(n
+ 1, group
->decay
);
81 C
= matches
->unstable_nr
/ C
;
83 return (uint32_t)((C
/ gitexp(i
+ 1, group
->decay
)) + 0.5);
86 static void pseudo_merge_group_init(struct pseudo_merge_group
*group
)
88 memset(group
, 0, sizeof(struct pseudo_merge_group
));
90 strmap_init_with_options(&group
->matches
, NULL
, 1);
92 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
93 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
94 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
95 group
->threshold
= DEFAULT_PSEUDO_MERGE_THRESHOLD
;
96 group
->stable_threshold
= DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD
;
97 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
100 void pseudo_merge_group_release(struct pseudo_merge_group
*group
)
102 struct hashmap_iter iter
;
103 struct strmap_entry
*e
;
105 regfree(group
->pattern
);
106 free(group
->pattern
);
108 strmap_for_each_entry(&group
->matches
, &iter
, e
) {
109 struct pseudo_merge_matches
*matches
= e
->value
;
110 free(matches
->stable
);
111 free(matches
->unstable
);
114 strmap_clear(&group
->matches
, 0);
119 static int pseudo_merge_config(const char *var
, const char *value
,
120 const struct config_context
*ctx
,
123 struct string_list
*list
= cb_data
;
124 struct string_list_item
*item
;
125 struct pseudo_merge_group
*group
;
126 struct strbuf buf
= STRBUF_INIT
;
127 const char *sub
, *key
;
131 if (parse_config_key(var
, "bitmappseudomerge", &sub
, &sub_len
, &key
))
137 strbuf_add(&buf
, sub
, sub_len
);
139 item
= string_list_lookup(list
, buf
.buf
);
141 item
= string_list_insert(list
, buf
.buf
);
143 item
->util
= xmalloc(sizeof(struct pseudo_merge_group
));
144 pseudo_merge_group_init(item
->util
);
149 if (!strcmp(key
, "pattern")) {
150 struct strbuf re
= STRBUF_INIT
;
152 free(group
->pattern
);
154 strbuf_addch(&re
, '^');
155 strbuf_addstr(&re
, value
);
157 group
->pattern
= xcalloc(1, sizeof(regex_t
));
158 if (regcomp(group
->pattern
, re
.buf
, REG_EXTENDED
))
159 die(_("failed to load pseudo-merge regex for %s: '%s'"),
163 } else if (!strcmp(key
, "decay")) {
164 group
->decay
= git_config_double(var
, value
, ctx
->kvi
);
165 if (group
->decay
< 0) {
166 warning(_("%s must be non-negative, using default"), var
);
167 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
169 } else if (!strcmp(key
, "samplerate")) {
170 group
->sample_rate
= git_config_double(var
, value
, ctx
->kvi
);
171 if (!(0 <= group
->sample_rate
&& group
->sample_rate
<= 1)) {
172 warning(_("%s must be between 0 and 1, using default"), var
);
173 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
175 } else if (!strcmp(key
, "threshold")) {
176 if (git_config_expiry_date(&group
->threshold
, var
, value
)) {
180 } else if (!strcmp(key
, "maxmerges")) {
181 group
->max_merges
= git_config_int(var
, value
, ctx
->kvi
);
182 if (group
->max_merges
< 0) {
183 warning(_("%s must be non-negative, using default"), var
);
184 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
186 } else if (!strcmp(key
, "stablethreshold")) {
187 if (git_config_expiry_date(&group
->stable_threshold
, var
, value
)) {
191 } else if (!strcmp(key
, "stablesize")) {
192 group
->stable_size
= git_config_int(var
, value
, ctx
->kvi
);
193 if (group
->stable_size
<= 0) {
194 warning(_("%s must be positive, using default"), var
);
195 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
200 strbuf_release(&buf
);
205 void load_pseudo_merges_from_config(struct repository
*r
,
206 struct string_list
*list
)
208 struct string_list_item
*item
;
210 repo_config(r
, pseudo_merge_config
, list
);
212 for_each_string_list_item(item
, list
) {
213 struct pseudo_merge_group
*group
= item
->util
;
215 die(_("pseudo-merge group '%s' missing required pattern"),
217 if (group
->threshold
< group
->stable_threshold
)
218 die(_("pseudo-merge group '%s' has unstable threshold "
219 "before stable one"), item
->string
);
223 static int find_pseudo_merge_group_for_ref(const char *refname
,
224 const char *referent UNUSED
,
225 const struct object_id
*oid
,
229 struct bitmap_writer
*writer
= _data
;
230 struct object_id peeled
;
235 if (!peel_iterated_oid(the_repository
, oid
, &peeled
))
238 c
= lookup_commit(the_repository
, oid
);
241 if (!packlist_find(writer
->to_pack
, oid
))
244 has_bitmap
= bitmap_writer_has_bitmapped_object_id(writer
, oid
);
246 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
247 struct pseudo_merge_group
*group
;
248 struct pseudo_merge_matches
*matches
;
249 struct strbuf group_name
= STRBUF_INIT
;
250 regmatch_t captures
[16];
253 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
254 if (regexec(group
->pattern
, refname
, ARRAY_SIZE(captures
),
258 if (captures
[ARRAY_SIZE(captures
) - 1].rm_so
!= -1)
259 warning(_("pseudo-merge regex from config has too many capture "
260 "groups (max=%"PRIuMAX
")"),
261 (uintmax_t)ARRAY_SIZE(captures
) - 2);
263 for (j
= !!group
->pattern
->re_nsub
; j
< ARRAY_SIZE(captures
); j
++) {
264 regmatch_t
*match
= &captures
[j
];
265 if (match
->rm_so
== -1)
269 strbuf_addch(&group_name
, '-');
271 strbuf_add(&group_name
, refname
+ match
->rm_so
,
272 match
->rm_eo
- match
->rm_so
);
275 matches
= strmap_get(&group
->matches
, group_name
.buf
);
277 matches
= xcalloc(1, sizeof(*matches
));
278 strmap_put(&group
->matches
, group_name
.buf
,
282 if (c
->date
<= group
->stable_threshold
) {
283 ALLOC_GROW(matches
->stable
, matches
->stable_nr
+ 1,
284 matches
->stable_alloc
);
285 matches
->stable
[matches
->stable_nr
++] = c
;
286 } else if (c
->date
<= group
->threshold
&& !has_bitmap
) {
287 ALLOC_GROW(matches
->unstable
, matches
->unstable_nr
+ 1,
288 matches
->unstable_alloc
);
289 matches
->unstable
[matches
->unstable_nr
++] = c
;
292 strbuf_release(&group_name
);
298 static struct commit
*push_pseudo_merge(struct pseudo_merge_group
*group
)
300 struct commit
*merge
;
302 ALLOC_GROW(group
->merges
, group
->merges_nr
+ 1, group
->merges_alloc
);
304 merge
= alloc_commit_node(the_repository
);
305 merge
->object
.parsed
= 1;
306 merge
->object
.flags
|= BITMAP_PSEUDO_MERGE
;
308 group
->merges
[group
->merges_nr
++] = merge
;
313 static struct pseudo_merge_commit_idx
*pseudo_merge_idx(kh_oid_map_t
*pseudo_merge_commits
,
314 const struct object_id
*oid
)
317 struct pseudo_merge_commit_idx
*pmc
;
319 khiter_t hash_pos
= kh_put_oid_map(pseudo_merge_commits
, *oid
,
323 CALLOC_ARRAY(pmc
, 1);
324 kh_value(pseudo_merge_commits
, hash_pos
) = pmc
;
326 pmc
= kh_value(pseudo_merge_commits
, hash_pos
);
332 #define MIN_PSEUDO_MERGE_SIZE 8
334 static void select_pseudo_merges_1(struct bitmap_writer
*writer
,
335 struct pseudo_merge_group
*group
,
336 struct pseudo_merge_matches
*matches
)
339 uint32_t stable_merges_nr
;
341 if (!matches
->stable_nr
&& !matches
->unstable_nr
)
342 return; /* all tips in this group already have bitmaps */
344 stable_merges_nr
= matches
->stable_nr
/ group
->stable_size
;
345 if (matches
->stable_nr
% group
->stable_size
)
348 /* make stable_merges_nr pseudo merges for stable commits */
349 for (i
= 0, j
= 0; i
< stable_merges_nr
; i
++) {
350 struct commit
*merge
;
351 struct commit_list
**p
;
353 merge
= push_pseudo_merge(group
);
357 * For each pseudo-merge created above, add parents to the
358 * allocated commit node from the stable set of commits
359 * (un-bitmapped, newer than the stable threshold).
363 struct pseudo_merge_commit_idx
*pmc
;
365 if (j
>= matches
->stable_nr
)
368 c
= matches
->stable
[j
++];
370 * Here and below, make sure that we keep our mapping of
371 * commits -> pseudo-merge(s) which include the key'd
374 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
377 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
379 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
380 p
= commit_list_append(c
, p
);
381 } while (j
% group
->stable_size
);
383 if (merge
->parents
) {
384 bitmap_writer_push_commit(writer
, merge
, 1);
385 writer
->pseudo_merges_nr
++;
389 /* make up to group->max_merges pseudo merges for unstable commits */
390 for (i
= 0, j
= 0; i
< group
->max_merges
; i
++) {
391 struct commit
*merge
;
392 struct commit_list
**p
;
395 merge
= push_pseudo_merge(group
);
398 size
= pseudo_merge_group_size(group
, matches
, i
);
399 end
= size
< MIN_PSEUDO_MERGE_SIZE
? matches
->unstable_nr
: j
+ size
;
402 * For each pseudo-merge commit created above, add parents to
403 * the allocated commit node from the unstable set of commits
404 * (newer than the stable threshold).
406 * Account for the sample rate, since not every candidate from
407 * the set of stable commits will be included as a pseudo-merge
410 for (; j
< end
&& j
< matches
->unstable_nr
; j
++) {
411 struct commit
*c
= matches
->unstable
[j
];
412 struct pseudo_merge_commit_idx
*pmc
;
414 if (j
% (uint32_t)(1.0 / group
->sample_rate
))
417 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
420 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
422 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
423 p
= commit_list_append(c
, p
);
426 if (merge
->parents
) {
427 bitmap_writer_push_commit(writer
, merge
, 1);
428 writer
->pseudo_merges_nr
++; }
429 if (end
>= matches
->unstable_nr
)
434 static int commit_date_cmp(const void *va
, const void *vb
)
436 timestamp_t a
= (*(const struct commit
**)va
)->date
;
437 timestamp_t b
= (*(const struct commit
**)vb
)->date
;
446 static void sort_pseudo_merge_matches(struct pseudo_merge_matches
*matches
)
448 QSORT(matches
->stable
, matches
->stable_nr
, commit_date_cmp
);
449 QSORT(matches
->unstable
, matches
->unstable_nr
, commit_date_cmp
);
452 void select_pseudo_merges(struct bitmap_writer
*writer
)
454 struct progress
*progress
= NULL
;
457 if (!writer
->pseudo_merge_groups
.nr
)
460 if (writer
->show_progress
)
461 progress
= start_progress("Selecting pseudo-merge commits",
462 writer
->pseudo_merge_groups
.nr
);
464 refs_for_each_ref(get_main_ref_store(the_repository
),
465 find_pseudo_merge_group_for_ref
, writer
);
467 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
468 struct pseudo_merge_group
*group
;
469 struct hashmap_iter iter
;
470 struct strmap_entry
*e
;
472 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
473 strmap_for_each_entry(&group
->matches
, &iter
, e
) {
474 struct pseudo_merge_matches
*matches
= e
->value
;
476 sort_pseudo_merge_matches(matches
);
478 select_pseudo_merges_1(writer
, group
, matches
);
481 display_progress(progress
, i
+ 1);
484 stop_progress(&progress
);
487 void free_pseudo_merge_map(struct pseudo_merge_map
*pm
)
490 for (i
= 0; i
< pm
->nr
; i
++) {
491 ewah_pool_free(pm
->v
[i
].commits
);
492 ewah_pool_free(pm
->v
[i
].bitmap
);
497 struct pseudo_merge_commit_ext
{
499 const unsigned char *ptr
;
502 static int pseudo_merge_ext_at(const struct pseudo_merge_map
*pm
,
503 struct pseudo_merge_commit_ext
*ext
, size_t at
)
505 if (at
>= pm
->map_size
)
506 return error(_("extended pseudo-merge read out-of-bounds "
507 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
508 (uintmax_t)at
, (uintmax_t)pm
->map_size
);
509 if (at
+ 4 >= pm
->map_size
)
510 return error(_("extended pseudo-merge entry is too short "
511 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
512 (uintmax_t)(at
+ 4), (uintmax_t)pm
->map_size
);
514 ext
->nr
= get_be32(pm
->map
+ at
);
515 ext
->ptr
= pm
->map
+ at
+ sizeof(uint32_t);
520 struct ewah_bitmap
*pseudo_merge_bitmap(const struct pseudo_merge_map
*pm
,
521 struct pseudo_merge
*merge
)
523 if (!merge
->loaded_commits
)
524 BUG("cannot use unloaded pseudo-merge bitmap");
526 if (!merge
->loaded_bitmap
) {
527 size_t at
= merge
->bitmap_at
;
529 merge
->bitmap
= read_bitmap(pm
->map
, pm
->map_size
, &at
);
530 merge
->loaded_bitmap
= 1;
533 return merge
->bitmap
;
536 struct pseudo_merge
*use_pseudo_merge(const struct pseudo_merge_map
*pm
,
537 struct pseudo_merge
*merge
)
539 if (!merge
->loaded_commits
) {
540 size_t pos
= merge
->at
;
542 merge
->commits
= read_bitmap(pm
->map
, pm
->map_size
, &pos
);
543 merge
->bitmap_at
= pos
;
544 merge
->loaded_commits
= 1;
549 static struct pseudo_merge
*pseudo_merge_at(const struct pseudo_merge_map
*pm
,
550 struct object_id
*oid
,
557 size_t mi
= lo
+ (hi
- lo
) / 2;
558 size_t got
= pm
->v
[mi
].at
;
561 return use_pseudo_merge(pm
, &pm
->v
[mi
]);
568 warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX
),
569 oid_to_hex(oid
), (uintmax_t)want
);
574 struct pseudo_merge_commit
{
576 uint64_t pseudo_merge_ofs
;
579 #define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))
581 static void read_pseudo_merge_commit_at(struct pseudo_merge_commit
*merge
,
582 const unsigned char *at
)
584 merge
->commit_pos
= get_be32(at
);
585 merge
->pseudo_merge_ofs
= get_be64(at
+ sizeof(uint32_t));
588 static int nth_pseudo_merge_ext(const struct pseudo_merge_map
*pm
,
589 struct pseudo_merge_commit_ext
*ext
,
590 struct pseudo_merge_commit
*merge
,
596 return error(_("extended pseudo-merge lookup out-of-bounds "
597 "(%"PRIu32
" >= %"PRIu32
")"), n
, ext
->nr
);
599 ofs
= get_be64(ext
->ptr
+ st_mult(n
, sizeof(uint64_t)));
600 if (ofs
>= pm
->map_size
)
601 return error(_("out-of-bounds read: (%"PRIuMAX
" >= %"PRIuMAX
")"),
602 (uintmax_t)ofs
, (uintmax_t)pm
->map_size
);
604 read_pseudo_merge_commit_at(merge
, pm
->map
+ ofs
);
609 static unsigned apply_pseudo_merge(const struct pseudo_merge_map
*pm
,
610 struct pseudo_merge
*merge
,
611 struct bitmap
*result
,
612 struct bitmap
*roots
)
614 if (merge
->satisfied
)
617 if (!ewah_bitmap_is_subset(merge
->commits
, roots
? roots
: result
))
620 bitmap_or_ewah(result
, pseudo_merge_bitmap(pm
, merge
));
622 bitmap_or_ewah(roots
, pseudo_merge_bitmap(pm
, merge
));
623 merge
->satisfied
= 1;
628 static int pseudo_merge_commit_cmp(const void *va
, const void *vb
)
630 struct pseudo_merge_commit merge
;
631 uint32_t key
= *(uint32_t*)va
;
633 read_pseudo_merge_commit_at(&merge
, vb
);
635 if (key
< merge
.commit_pos
)
637 if (key
> merge
.commit_pos
)
642 static struct pseudo_merge_commit
*find_pseudo_merge(const struct pseudo_merge_map
*pm
,
648 return bsearch(&pos
, pm
->commits
, pm
->commits_nr
,
649 PSEUDO_MERGE_COMMIT_RAWSZ
, pseudo_merge_commit_cmp
);
652 int apply_pseudo_merges_for_commit(const struct pseudo_merge_map
*pm
,
653 struct bitmap
*result
,
654 struct commit
*commit
, uint32_t commit_pos
)
656 struct pseudo_merge
*merge
;
657 struct pseudo_merge_commit
*merge_commit
;
660 merge_commit
= find_pseudo_merge(pm
, commit_pos
);
664 if (merge_commit
->pseudo_merge_ofs
& ((uint64_t)1<<63)) {
665 struct pseudo_merge_commit_ext ext
= { 0 };
666 off_t ofs
= merge_commit
->pseudo_merge_ofs
& ~((uint64_t)1<<63);
669 if (pseudo_merge_ext_at(pm
, &ext
, ofs
) < -1) {
670 warning(_("could not read extended pseudo-merge table "
672 oid_to_hex(&commit
->object
.oid
));
676 for (i
= 0; i
< ext
.nr
; i
++) {
677 if (nth_pseudo_merge_ext(pm
, &ext
, merge_commit
, i
) < 0)
680 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
681 merge_commit
->pseudo_merge_ofs
);
686 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
690 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
691 merge_commit
->pseudo_merge_ofs
);
696 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
701 cascade_pseudo_merges(pm
, result
, NULL
);
706 int cascade_pseudo_merges(const struct pseudo_merge_map
*pm
,
707 struct bitmap
*result
,
708 struct bitmap
*roots
)
710 unsigned any_satisfied
;
714 struct pseudo_merge
*merge
;
719 for (i
= 0; i
< pm
->nr
; i
++) {
720 merge
= use_pseudo_merge(pm
, &pm
->v
[i
]);
721 if (apply_pseudo_merge(pm
, merge
, result
, roots
)) {
726 } while (any_satisfied
);
731 struct pseudo_merge
*pseudo_merge_for_parents(const struct pseudo_merge_map
*pm
,
732 struct bitmap
*parents
)
734 struct pseudo_merge
*match
= NULL
;
741 * NOTE: this loop is quadratic in the worst-case (where no
742 * matching pseudo-merge bitmaps are found), but in practice
743 * this is OK for a few reasons:
745 * - Rejecting pseudo-merge bitmaps that do not match the
746 * given commit is done quickly (i.e. `bitmap_equals_ewah()`
747 * returns early when we know the two bitmaps aren't equal.
749 * - Already matched pseudo-merge bitmaps (which we track with
750 * the `->satisfied` bit here) are skipped as potential
753 * - The number of pseudo-merges should be small (in the
754 * hundreds for most repositories).
756 * If in the future this semi-quadratic behavior does become a
757 * problem, another approach would be to keep track of which
758 * pseudo-merges are still "viable" after enumerating the
759 * pseudo-merge commit's parents:
761 * - A pseudo-merge bitmap becomes non-viable when the bit(s)
762 * corresponding to one or more parent(s) of the given
763 * commit are not set in a candidate pseudo-merge's commits
766 * - After processing all bits, enumerate the remaining set of
767 * viable pseudo-merge bitmaps, and check that their
768 * popcount() matches the number of parents in the given
771 for (i
= 0; i
< pm
->nr
; i
++) {
772 struct pseudo_merge
*candidate
= use_pseudo_merge(pm
, &pm
->v
[i
]);
773 if (!candidate
|| candidate
->satisfied
)
775 if (!bitmap_equals_ewah(parents
, candidate
->commits
))
779 match
->satisfied
= 1;