1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "pseudo-merge.h"
9 #include "string-list.h"
11 #include "pack-bitmap.h"
17 #define DEFAULT_PSEUDO_MERGE_DECAY 1.0
18 #define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
19 #define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
20 #define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
21 #define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
22 #define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
24 static double gitexp(double base
, int exp
)
38 static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group
*group
,
39 const struct pseudo_merge_matches
*matches
,
46 * The size of pseudo-merge groups decays according to a power series,
51 * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
52 * is the decay rate, and 'C' is a scaling value.
54 * The value of C depends on the number of groups, decay rate, and total
55 * number of commits. It is computed such that if there are M and N
56 * total groups and commits, respectively, that:
58 * N = f(0) + f(1) + ... f(M-1)
60 * Rearranging to isolate C, we get:
62 * N = \sum_{n=1}^M C / n^k
64 * N / C = \sum_{n=1}^M n^-k
66 * C = N / \sum_{n=1}^M n^-k
68 * For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
69 * total commits equal to 10,000, and 'M' being equal to 6 groups, then
70 * the (rounded) group sizes are:
72 * { 5469, 1934, 1053, 684, 489, 372 }
74 * increasing the number of total groups, say to 10, scales the group
75 * sizes appropriately:
77 * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
79 for (n
= 0; n
< group
->max_merges
; n
++)
80 C
+= 1.0 / gitexp(n
+ 1, group
->decay
);
81 C
= matches
->unstable_nr
/ C
;
83 return (uint32_t)((C
/ gitexp(i
+ 1, group
->decay
)) + 0.5);
86 static void pseudo_merge_group_init(struct pseudo_merge_group
*group
)
88 memset(group
, 0, sizeof(struct pseudo_merge_group
));
90 strmap_init_with_options(&group
->matches
, NULL
, 0);
92 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
93 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
94 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
95 group
->threshold
= DEFAULT_PSEUDO_MERGE_THRESHOLD
;
96 group
->stable_threshold
= DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD
;
97 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
100 static int pseudo_merge_config(const char *var
, const char *value
,
101 const struct config_context
*ctx
,
104 struct string_list
*list
= cb_data
;
105 struct string_list_item
*item
;
106 struct pseudo_merge_group
*group
;
107 struct strbuf buf
= STRBUF_INIT
;
108 const char *sub
, *key
;
112 if (parse_config_key(var
, "bitmappseudomerge", &sub
, &sub_len
, &key
))
118 strbuf_add(&buf
, sub
, sub_len
);
120 item
= string_list_lookup(list
, buf
.buf
);
122 item
= string_list_insert(list
, buf
.buf
);
124 item
->util
= xmalloc(sizeof(struct pseudo_merge_group
));
125 pseudo_merge_group_init(item
->util
);
130 if (!strcmp(key
, "pattern")) {
131 struct strbuf re
= STRBUF_INIT
;
133 free(group
->pattern
);
135 strbuf_addch(&re
, '^');
136 strbuf_addstr(&re
, value
);
138 group
->pattern
= xcalloc(1, sizeof(regex_t
));
139 if (regcomp(group
->pattern
, re
.buf
, REG_EXTENDED
))
140 die(_("failed to load pseudo-merge regex for %s: '%s'"),
144 } else if (!strcmp(key
, "decay")) {
145 group
->decay
= git_config_double(var
, value
, ctx
->kvi
);
146 if (group
->decay
< 0) {
147 warning(_("%s must be non-negative, using default"), var
);
148 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
150 } else if (!strcmp(key
, "samplerate")) {
151 group
->sample_rate
= git_config_double(var
, value
, ctx
->kvi
);
152 if (!(0 <= group
->sample_rate
&& group
->sample_rate
<= 1)) {
153 warning(_("%s must be between 0 and 1, using default"), var
);
154 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
156 } else if (!strcmp(key
, "threshold")) {
157 if (git_config_expiry_date(&group
->threshold
, var
, value
)) {
161 } else if (!strcmp(key
, "maxmerges")) {
162 group
->max_merges
= git_config_int(var
, value
, ctx
->kvi
);
163 if (group
->max_merges
< 0) {
164 warning(_("%s must be non-negative, using default"), var
);
165 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
167 } else if (!strcmp(key
, "stablethreshold")) {
168 if (git_config_expiry_date(&group
->stable_threshold
, var
, value
)) {
172 } else if (!strcmp(key
, "stablesize")) {
173 group
->stable_size
= git_config_int(var
, value
, ctx
->kvi
);
174 if (group
->stable_size
<= 0) {
175 warning(_("%s must be positive, using default"), var
);
176 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
181 strbuf_release(&buf
);
186 void load_pseudo_merges_from_config(struct repository
*r
,
187 struct string_list
*list
)
189 struct string_list_item
*item
;
191 repo_config(r
, pseudo_merge_config
, list
);
193 for_each_string_list_item(item
, list
) {
194 struct pseudo_merge_group
*group
= item
->util
;
196 die(_("pseudo-merge group '%s' missing required pattern"),
198 if (group
->threshold
< group
->stable_threshold
)
199 die(_("pseudo-merge group '%s' has unstable threshold "
200 "before stable one"), item
->string
);
204 static int find_pseudo_merge_group_for_ref(const char *refname
,
205 const char *referent UNUSED
,
206 const struct object_id
*oid
,
210 struct bitmap_writer
*writer
= _data
;
211 struct object_id peeled
;
216 if (!peel_iterated_oid(the_repository
, oid
, &peeled
))
219 c
= lookup_commit(the_repository
, oid
);
222 if (!packlist_find(writer
->to_pack
, oid
))
225 has_bitmap
= bitmap_writer_has_bitmapped_object_id(writer
, oid
);
227 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
228 struct pseudo_merge_group
*group
;
229 struct pseudo_merge_matches
*matches
;
230 struct strbuf group_name
= STRBUF_INIT
;
231 regmatch_t captures
[16];
234 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
235 if (regexec(group
->pattern
, refname
, ARRAY_SIZE(captures
),
239 if (captures
[ARRAY_SIZE(captures
) - 1].rm_so
!= -1)
240 warning(_("pseudo-merge regex from config has too many capture "
241 "groups (max=%"PRIuMAX
")"),
242 (uintmax_t)ARRAY_SIZE(captures
) - 2);
244 for (j
= !!group
->pattern
->re_nsub
; j
< ARRAY_SIZE(captures
); j
++) {
245 regmatch_t
*match
= &captures
[j
];
246 if (match
->rm_so
== -1)
250 strbuf_addch(&group_name
, '-');
252 strbuf_add(&group_name
, refname
+ match
->rm_so
,
253 match
->rm_eo
- match
->rm_so
);
256 matches
= strmap_get(&group
->matches
, group_name
.buf
);
258 matches
= xcalloc(1, sizeof(*matches
));
259 strmap_put(&group
->matches
, strbuf_detach(&group_name
, NULL
),
263 if (c
->date
<= group
->stable_threshold
) {
264 ALLOC_GROW(matches
->stable
, matches
->stable_nr
+ 1,
265 matches
->stable_alloc
);
266 matches
->stable
[matches
->stable_nr
++] = c
;
267 } else if (c
->date
<= group
->threshold
&& !has_bitmap
) {
268 ALLOC_GROW(matches
->unstable
, matches
->unstable_nr
+ 1,
269 matches
->unstable_alloc
);
270 matches
->unstable
[matches
->unstable_nr
++] = c
;
273 strbuf_release(&group_name
);
279 static struct commit
*push_pseudo_merge(struct pseudo_merge_group
*group
)
281 struct commit
*merge
;
283 ALLOC_GROW(group
->merges
, group
->merges_nr
+ 1, group
->merges_alloc
);
285 merge
= alloc_commit_node(the_repository
);
286 merge
->object
.parsed
= 1;
287 merge
->object
.flags
|= BITMAP_PSEUDO_MERGE
;
289 group
->merges
[group
->merges_nr
++] = merge
;
294 static struct pseudo_merge_commit_idx
*pseudo_merge_idx(kh_oid_map_t
*pseudo_merge_commits
,
295 const struct object_id
*oid
)
298 struct pseudo_merge_commit_idx
*pmc
;
300 khiter_t hash_pos
= kh_put_oid_map(pseudo_merge_commits
, *oid
,
304 CALLOC_ARRAY(pmc
, 1);
305 kh_value(pseudo_merge_commits
, hash_pos
) = pmc
;
307 pmc
= kh_value(pseudo_merge_commits
, hash_pos
);
313 #define MIN_PSEUDO_MERGE_SIZE 8
315 static void select_pseudo_merges_1(struct bitmap_writer
*writer
,
316 struct pseudo_merge_group
*group
,
317 struct pseudo_merge_matches
*matches
)
320 uint32_t stable_merges_nr
;
322 if (!matches
->stable_nr
&& !matches
->unstable_nr
)
323 return; /* all tips in this group already have bitmaps */
325 stable_merges_nr
= matches
->stable_nr
/ group
->stable_size
;
326 if (matches
->stable_nr
% group
->stable_size
)
329 /* make stable_merges_nr pseudo merges for stable commits */
330 for (i
= 0, j
= 0; i
< stable_merges_nr
; i
++) {
331 struct commit
*merge
;
332 struct commit_list
**p
;
334 merge
= push_pseudo_merge(group
);
338 * For each pseudo-merge created above, add parents to the
339 * allocated commit node from the stable set of commits
340 * (un-bitmapped, newer than the stable threshold).
344 struct pseudo_merge_commit_idx
*pmc
;
346 if (j
>= matches
->stable_nr
)
349 c
= matches
->stable
[j
++];
351 * Here and below, make sure that we keep our mapping of
352 * commits -> pseudo-merge(s) which include the key'd
355 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
358 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
360 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
361 p
= commit_list_append(c
, p
);
362 } while (j
% group
->stable_size
);
364 if (merge
->parents
) {
365 bitmap_writer_push_commit(writer
, merge
, 1);
366 writer
->pseudo_merges_nr
++;
370 /* make up to group->max_merges pseudo merges for unstable commits */
371 for (i
= 0, j
= 0; i
< group
->max_merges
; i
++) {
372 struct commit
*merge
;
373 struct commit_list
**p
;
376 merge
= push_pseudo_merge(group
);
379 size
= pseudo_merge_group_size(group
, matches
, i
);
380 end
= size
< MIN_PSEUDO_MERGE_SIZE
? matches
->unstable_nr
: j
+ size
;
383 * For each pseudo-merge commit created above, add parents to
384 * the allocated commit node from the unstable set of commits
385 * (newer than the stable threshold).
387 * Account for the sample rate, since not every candidate from
388 * the set of stable commits will be included as a pseudo-merge
391 for (; j
< end
&& j
< matches
->unstable_nr
; j
++) {
392 struct commit
*c
= matches
->unstable
[j
];
393 struct pseudo_merge_commit_idx
*pmc
;
395 if (j
% (uint32_t)(1.0 / group
->sample_rate
))
398 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
401 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
403 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
404 p
= commit_list_append(c
, p
);
407 if (merge
->parents
) {
408 bitmap_writer_push_commit(writer
, merge
, 1);
409 writer
->pseudo_merges_nr
++; }
410 if (end
>= matches
->unstable_nr
)
415 static int commit_date_cmp(const void *va
, const void *vb
)
417 timestamp_t a
= (*(const struct commit
**)va
)->date
;
418 timestamp_t b
= (*(const struct commit
**)vb
)->date
;
427 static void sort_pseudo_merge_matches(struct pseudo_merge_matches
*matches
)
429 QSORT(matches
->stable
, matches
->stable_nr
, commit_date_cmp
);
430 QSORT(matches
->unstable
, matches
->unstable_nr
, commit_date_cmp
);
433 void select_pseudo_merges(struct bitmap_writer
*writer
)
435 struct progress
*progress
= NULL
;
438 if (!writer
->pseudo_merge_groups
.nr
)
441 if (writer
->show_progress
)
442 progress
= start_progress("Selecting pseudo-merge commits",
443 writer
->pseudo_merge_groups
.nr
);
445 refs_for_each_ref(get_main_ref_store(the_repository
),
446 find_pseudo_merge_group_for_ref
, writer
);
448 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
449 struct pseudo_merge_group
*group
;
450 struct hashmap_iter iter
;
451 struct strmap_entry
*e
;
453 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
454 strmap_for_each_entry(&group
->matches
, &iter
, e
) {
455 struct pseudo_merge_matches
*matches
= e
->value
;
457 sort_pseudo_merge_matches(matches
);
459 select_pseudo_merges_1(writer
, group
, matches
);
462 display_progress(progress
, i
+ 1);
465 stop_progress(&progress
);
468 void free_pseudo_merge_map(struct pseudo_merge_map
*pm
)
471 for (i
= 0; i
< pm
->nr
; i
++) {
472 ewah_pool_free(pm
->v
[i
].commits
);
473 ewah_pool_free(pm
->v
[i
].bitmap
);
478 struct pseudo_merge_commit_ext
{
480 const unsigned char *ptr
;
483 static int pseudo_merge_ext_at(const struct pseudo_merge_map
*pm
,
484 struct pseudo_merge_commit_ext
*ext
, size_t at
)
486 if (at
>= pm
->map_size
)
487 return error(_("extended pseudo-merge read out-of-bounds "
488 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
489 (uintmax_t)at
, (uintmax_t)pm
->map_size
);
490 if (at
+ 4 >= pm
->map_size
)
491 return error(_("extended pseudo-merge entry is too short "
492 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
493 (uintmax_t)(at
+ 4), (uintmax_t)pm
->map_size
);
495 ext
->nr
= get_be32(pm
->map
+ at
);
496 ext
->ptr
= pm
->map
+ at
+ sizeof(uint32_t);
501 struct ewah_bitmap
*pseudo_merge_bitmap(const struct pseudo_merge_map
*pm
,
502 struct pseudo_merge
*merge
)
504 if (!merge
->loaded_commits
)
505 BUG("cannot use unloaded pseudo-merge bitmap");
507 if (!merge
->loaded_bitmap
) {
508 size_t at
= merge
->bitmap_at
;
510 merge
->bitmap
= read_bitmap(pm
->map
, pm
->map_size
, &at
);
511 merge
->loaded_bitmap
= 1;
514 return merge
->bitmap
;
517 struct pseudo_merge
*use_pseudo_merge(const struct pseudo_merge_map
*pm
,
518 struct pseudo_merge
*merge
)
520 if (!merge
->loaded_commits
) {
521 size_t pos
= merge
->at
;
523 merge
->commits
= read_bitmap(pm
->map
, pm
->map_size
, &pos
);
524 merge
->bitmap_at
= pos
;
525 merge
->loaded_commits
= 1;
530 static struct pseudo_merge
*pseudo_merge_at(const struct pseudo_merge_map
*pm
,
531 struct object_id
*oid
,
538 size_t mi
= lo
+ (hi
- lo
) / 2;
539 size_t got
= pm
->v
[mi
].at
;
542 return use_pseudo_merge(pm
, &pm
->v
[mi
]);
549 warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX
),
550 oid_to_hex(oid
), (uintmax_t)want
);
555 struct pseudo_merge_commit
{
557 uint64_t pseudo_merge_ofs
;
560 #define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))
562 static void read_pseudo_merge_commit_at(struct pseudo_merge_commit
*merge
,
563 const unsigned char *at
)
565 merge
->commit_pos
= get_be32(at
);
566 merge
->pseudo_merge_ofs
= get_be64(at
+ sizeof(uint32_t));
569 static int nth_pseudo_merge_ext(const struct pseudo_merge_map
*pm
,
570 struct pseudo_merge_commit_ext
*ext
,
571 struct pseudo_merge_commit
*merge
,
577 return error(_("extended pseudo-merge lookup out-of-bounds "
578 "(%"PRIu32
" >= %"PRIu32
")"), n
, ext
->nr
);
580 ofs
= get_be64(ext
->ptr
+ st_mult(n
, sizeof(uint64_t)));
581 if (ofs
>= pm
->map_size
)
582 return error(_("out-of-bounds read: (%"PRIuMAX
" >= %"PRIuMAX
")"),
583 (uintmax_t)ofs
, (uintmax_t)pm
->map_size
);
585 read_pseudo_merge_commit_at(merge
, pm
->map
+ ofs
);
590 static unsigned apply_pseudo_merge(const struct pseudo_merge_map
*pm
,
591 struct pseudo_merge
*merge
,
592 struct bitmap
*result
,
593 struct bitmap
*roots
)
595 if (merge
->satisfied
)
598 if (!ewah_bitmap_is_subset(merge
->commits
, roots
? roots
: result
))
601 bitmap_or_ewah(result
, pseudo_merge_bitmap(pm
, merge
));
603 bitmap_or_ewah(roots
, pseudo_merge_bitmap(pm
, merge
));
604 merge
->satisfied
= 1;
609 static int pseudo_merge_commit_cmp(const void *va
, const void *vb
)
611 struct pseudo_merge_commit merge
;
612 uint32_t key
= *(uint32_t*)va
;
614 read_pseudo_merge_commit_at(&merge
, vb
);
616 if (key
< merge
.commit_pos
)
618 if (key
> merge
.commit_pos
)
623 static struct pseudo_merge_commit
*find_pseudo_merge(const struct pseudo_merge_map
*pm
,
629 return bsearch(&pos
, pm
->commits
, pm
->commits_nr
,
630 PSEUDO_MERGE_COMMIT_RAWSZ
, pseudo_merge_commit_cmp
);
633 int apply_pseudo_merges_for_commit(const struct pseudo_merge_map
*pm
,
634 struct bitmap
*result
,
635 struct commit
*commit
, uint32_t commit_pos
)
637 struct pseudo_merge
*merge
;
638 struct pseudo_merge_commit
*merge_commit
;
641 merge_commit
= find_pseudo_merge(pm
, commit_pos
);
645 if (merge_commit
->pseudo_merge_ofs
& ((uint64_t)1<<63)) {
646 struct pseudo_merge_commit_ext ext
= { 0 };
647 off_t ofs
= merge_commit
->pseudo_merge_ofs
& ~((uint64_t)1<<63);
650 if (pseudo_merge_ext_at(pm
, &ext
, ofs
) < -1) {
651 warning(_("could not read extended pseudo-merge table "
653 oid_to_hex(&commit
->object
.oid
));
657 for (i
= 0; i
< ext
.nr
; i
++) {
658 if (nth_pseudo_merge_ext(pm
, &ext
, merge_commit
, i
) < 0)
661 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
662 merge_commit
->pseudo_merge_ofs
);
667 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
671 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
672 merge_commit
->pseudo_merge_ofs
);
677 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
682 cascade_pseudo_merges(pm
, result
, NULL
);
687 int cascade_pseudo_merges(const struct pseudo_merge_map
*pm
,
688 struct bitmap
*result
,
689 struct bitmap
*roots
)
691 unsigned any_satisfied
;
695 struct pseudo_merge
*merge
;
700 for (i
= 0; i
< pm
->nr
; i
++) {
701 merge
= use_pseudo_merge(pm
, &pm
->v
[i
]);
702 if (apply_pseudo_merge(pm
, merge
, result
, roots
)) {
707 } while (any_satisfied
);
712 struct pseudo_merge
*pseudo_merge_for_parents(const struct pseudo_merge_map
*pm
,
713 struct bitmap
*parents
)
715 struct pseudo_merge
*match
= NULL
;
722 * NOTE: this loop is quadratic in the worst-case (where no
723 * matching pseudo-merge bitmaps are found), but in practice
724 * this is OK for a few reasons:
726 * - Rejecting pseudo-merge bitmaps that do not match the
727 * given commit is done quickly (i.e. `bitmap_equals_ewah()`
728 * returns early when we know the two bitmaps aren't equal.
730 * - Already matched pseudo-merge bitmaps (which we track with
731 * the `->satisfied` bit here) are skipped as potential
734 * - The number of pseudo-merges should be small (in the
735 * hundreds for most repositories).
737 * If in the future this semi-quadratic behavior does become a
738 * problem, another approach would be to keep track of which
739 * pseudo-merges are still "viable" after enumerating the
740 * pseudo-merge commit's parents:
742 * - A pseudo-merge bitmap becomes non-viable when the bit(s)
743 * corresponding to one or more parent(s) of the given
744 * commit are not set in a candidate pseudo-merge's commits
747 * - After processing all bits, enumerate the remaining set of
748 * viable pseudo-merge bitmaps, and check that their
749 * popcount() matches the number of parents in the given
752 for (i
= 0; i
< pm
->nr
; i
++) {
753 struct pseudo_merge
*candidate
= use_pseudo_merge(pm
, &pm
->v
[i
]);
754 if (!candidate
|| candidate
->satisfied
)
756 if (!bitmap_equals_ewah(parents
, candidate
->commits
))
760 match
->satisfied
= 1;