finalize_object_file(): check for name collision before renaming
[git/gitster.git] / reftable / stack.c
blobce0a35216bad6d0171120e286f8a43076818d3f8
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "stack.h"
11 #include "../write-or-die.h"
12 #include "system.h"
13 #include "constants.h"
14 #include "merged.h"
15 #include "reader.h"
16 #include "reftable-error.h"
17 #include "reftable-record.h"
18 #include "reftable-merged.h"
19 #include "writer.h"
20 #include "tempfile.h"
22 static int stack_try_add(struct reftable_stack *st,
23 int (*write_table)(struct reftable_writer *wr,
24 void *arg),
25 void *arg);
26 static int stack_write_compact(struct reftable_stack *st,
27 struct reftable_writer *wr,
28 size_t first, size_t last,
29 struct reftable_log_expiry_config *config);
30 static void reftable_addition_close(struct reftable_addition *add);
31 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
32 int reuse_open);
34 static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
35 const char *name)
37 strbuf_reset(dest);
38 strbuf_addstr(dest, st->reftable_dir);
39 strbuf_addstr(dest, "/");
40 strbuf_addstr(dest, name);
43 static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
45 int *fdp = (int *)arg;
46 return write_in_full(*fdp, data, sz);
49 static int reftable_fd_flush(void *arg)
51 int *fdp = (int *)arg;
53 return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp);
56 int reftable_new_stack(struct reftable_stack **dest, const char *dir,
57 const struct reftable_write_options *_opts)
59 struct reftable_stack *p = reftable_calloc(1, sizeof(*p));
60 struct strbuf list_file_name = STRBUF_INIT;
61 struct reftable_write_options opts = {0};
62 int err = 0;
64 if (_opts)
65 opts = *_opts;
66 if (opts.hash_id == 0)
67 opts.hash_id = GIT_SHA1_FORMAT_ID;
69 *dest = NULL;
71 strbuf_reset(&list_file_name);
72 strbuf_addstr(&list_file_name, dir);
73 strbuf_addstr(&list_file_name, "/tables.list");
75 p->list_file = strbuf_detach(&list_file_name, NULL);
76 p->list_fd = -1;
77 p->reftable_dir = xstrdup(dir);
78 p->opts = opts;
80 err = reftable_stack_reload_maybe_reuse(p, 1);
81 if (err < 0) {
82 reftable_stack_destroy(p);
83 } else {
84 *dest = p;
86 return err;
89 static int fd_read_lines(int fd, char ***namesp)
91 off_t size = lseek(fd, 0, SEEK_END);
92 char *buf = NULL;
93 int err = 0;
94 if (size < 0) {
95 err = REFTABLE_IO_ERROR;
96 goto done;
98 err = lseek(fd, 0, SEEK_SET);
99 if (err < 0) {
100 err = REFTABLE_IO_ERROR;
101 goto done;
104 REFTABLE_ALLOC_ARRAY(buf, size + 1);
105 if (read_in_full(fd, buf, size) != size) {
106 err = REFTABLE_IO_ERROR;
107 goto done;
109 buf[size] = 0;
111 parse_names(buf, size, namesp);
113 done:
114 reftable_free(buf);
115 return err;
118 int read_lines(const char *filename, char ***namesp)
120 int fd = open(filename, O_RDONLY);
121 int err = 0;
122 if (fd < 0) {
123 if (errno == ENOENT) {
124 REFTABLE_CALLOC_ARRAY(*namesp, 1);
125 return 0;
128 return REFTABLE_IO_ERROR;
130 err = fd_read_lines(fd, namesp);
131 close(fd);
132 return err;
135 void reftable_stack_init_ref_iterator(struct reftable_stack *st,
136 struct reftable_iterator *it)
138 merged_table_init_iter(reftable_stack_merged_table(st),
139 it, BLOCK_TYPE_REF);
142 void reftable_stack_init_log_iterator(struct reftable_stack *st,
143 struct reftable_iterator *it)
145 merged_table_init_iter(reftable_stack_merged_table(st),
146 it, BLOCK_TYPE_LOG);
149 struct reftable_merged_table *
150 reftable_stack_merged_table(struct reftable_stack *st)
152 return st->merged;
155 static int has_name(char **names, const char *name)
157 while (*names) {
158 if (!strcmp(*names, name))
159 return 1;
160 names++;
162 return 0;
165 /* Close and free the stack */
166 void reftable_stack_destroy(struct reftable_stack *st)
168 char **names = NULL;
169 int err = 0;
170 if (st->merged) {
171 reftable_merged_table_free(st->merged);
172 st->merged = NULL;
175 err = read_lines(st->list_file, &names);
176 if (err < 0) {
177 FREE_AND_NULL(names);
180 if (st->readers) {
181 int i = 0;
182 struct strbuf filename = STRBUF_INIT;
183 for (i = 0; i < st->readers_len; i++) {
184 const char *name = reader_name(st->readers[i]);
185 strbuf_reset(&filename);
186 if (names && !has_name(names, name)) {
187 stack_filename(&filename, st, name);
189 reftable_reader_decref(st->readers[i]);
191 if (filename.len) {
192 /* On Windows, can only unlink after closing. */
193 unlink(filename.buf);
196 strbuf_release(&filename);
197 st->readers_len = 0;
198 FREE_AND_NULL(st->readers);
201 if (st->list_fd >= 0) {
202 close(st->list_fd);
203 st->list_fd = -1;
206 FREE_AND_NULL(st->list_file);
207 FREE_AND_NULL(st->reftable_dir);
208 reftable_free(st);
209 free_names(names);
212 static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
213 int cur_len)
215 struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur));
216 int i = 0;
217 for (i = 0; i < cur_len; i++) {
218 cur[i] = st->readers[i];
220 return cur;
223 static int reftable_stack_reload_once(struct reftable_stack *st,
224 const char **names,
225 int reuse_open)
227 size_t cur_len = !st->merged ? 0 : st->merged->readers_len;
228 struct reftable_reader **cur = stack_copy_readers(st, cur_len);
229 struct reftable_reader **reused = NULL;
230 size_t reused_len = 0, reused_alloc = 0;
231 size_t names_len = names_length(names);
232 struct reftable_reader **new_readers =
233 reftable_calloc(names_len, sizeof(*new_readers));
234 size_t new_readers_len = 0;
235 struct reftable_merged_table *new_merged = NULL;
236 struct strbuf table_path = STRBUF_INIT;
237 int err = 0;
238 size_t i;
240 while (*names) {
241 struct reftable_reader *rd = NULL;
242 const char *name = *names++;
244 /* this is linear; we assume compaction keeps the number of
245 tables under control so this is not quadratic. */
246 for (i = 0; reuse_open && i < cur_len; i++) {
247 if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
248 rd = cur[i];
249 cur[i] = NULL;
252 * When reloading the stack fails, we end up
253 * releasing all new readers. This also
254 * includes the reused readers, even though
255 * they are still in used by the old stack. We
256 * thus need to keep them alive here, which we
257 * do by bumping their refcount.
259 REFTABLE_ALLOC_GROW(reused, reused_len + 1, reused_alloc);
260 reused[reused_len++] = rd;
261 reftable_reader_incref(rd);
262 break;
266 if (!rd) {
267 struct reftable_block_source src = { NULL };
268 stack_filename(&table_path, st, name);
270 err = reftable_block_source_from_file(&src,
271 table_path.buf);
272 if (err < 0)
273 goto done;
275 err = reftable_reader_new(&rd, &src, name);
276 if (err < 0)
277 goto done;
280 new_readers[new_readers_len] = rd;
281 new_readers_len++;
284 /* success! */
285 err = reftable_merged_table_new(&new_merged, new_readers,
286 new_readers_len, st->opts.hash_id);
287 if (err < 0)
288 goto done;
291 * Close the old, non-reused readers and proactively try to unlink
292 * them. This is done for systems like Windows, where the underlying
293 * file of such an open reader wouldn't have been possible to be
294 * unlinked by the compacting process.
296 for (i = 0; i < cur_len; i++) {
297 if (cur[i]) {
298 const char *name = reader_name(cur[i]);
299 stack_filename(&table_path, st, name);
300 reftable_reader_decref(cur[i]);
301 unlink(table_path.buf);
305 /* Update the stack to point to the new tables. */
306 if (st->merged)
307 reftable_merged_table_free(st->merged);
308 new_merged->suppress_deletions = 1;
309 st->merged = new_merged;
311 if (st->readers)
312 reftable_free(st->readers);
313 st->readers = new_readers;
314 st->readers_len = new_readers_len;
315 new_readers = NULL;
316 new_readers_len = 0;
319 * Decrement the refcount of reused readers again. This only needs to
320 * happen on the successful case, because on the unsuccessful one we
321 * decrement their refcount via `new_readers`.
323 for (i = 0; i < reused_len; i++)
324 reftable_reader_decref(reused[i]);
326 done:
327 for (i = 0; i < new_readers_len; i++)
328 reftable_reader_decref(new_readers[i]);
329 reftable_free(new_readers);
330 reftable_free(reused);
331 reftable_free(cur);
332 strbuf_release(&table_path);
333 return err;
336 /* return negative if a before b. */
337 static int tv_cmp(struct timeval *a, struct timeval *b)
339 time_t diff = a->tv_sec - b->tv_sec;
340 int udiff = a->tv_usec - b->tv_usec;
342 if (diff != 0)
343 return diff;
345 return udiff;
348 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
349 int reuse_open)
351 char **names = NULL, **names_after = NULL;
352 struct timeval deadline;
353 int64_t delay = 0;
354 int tries = 0, err;
355 int fd = -1;
357 err = gettimeofday(&deadline, NULL);
358 if (err < 0)
359 goto out;
360 deadline.tv_sec += 3;
362 while (1) {
363 struct timeval now;
365 err = gettimeofday(&now, NULL);
366 if (err < 0)
367 goto out;
370 * Only look at deadlines after the first few times. This
371 * simplifies debugging in GDB.
373 tries++;
374 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
375 goto out;
377 fd = open(st->list_file, O_RDONLY);
378 if (fd < 0) {
379 if (errno != ENOENT) {
380 err = REFTABLE_IO_ERROR;
381 goto out;
384 REFTABLE_CALLOC_ARRAY(names, 1);
385 } else {
386 err = fd_read_lines(fd, &names);
387 if (err < 0)
388 goto out;
391 err = reftable_stack_reload_once(st, (const char **) names, reuse_open);
392 if (!err)
393 break;
394 if (err != REFTABLE_NOT_EXIST_ERROR)
395 goto out;
398 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
399 * writer. Check if there was one by checking if the name list
400 * changed.
402 err = read_lines(st->list_file, &names_after);
403 if (err < 0)
404 goto out;
405 if (names_equal((const char **) names_after,
406 (const char **) names)) {
407 err = REFTABLE_NOT_EXIST_ERROR;
408 goto out;
411 free_names(names);
412 names = NULL;
413 free_names(names_after);
414 names_after = NULL;
415 close(fd);
416 fd = -1;
418 delay = delay + (delay * rand()) / RAND_MAX + 1;
419 sleep_millisec(delay);
422 out:
424 * Invalidate the stat cache. It is sufficient to only close the file
425 * descriptor and keep the cached stat info because we never use the
426 * latter when the former is negative.
428 if (st->list_fd >= 0) {
429 close(st->list_fd);
430 st->list_fd = -1;
434 * Cache stat information in case it provides a useful signal to us.
435 * According to POSIX, "The st_ino and st_dev fields taken together
436 * uniquely identify the file within the system." That being said,
437 * Windows is not POSIX compliant and we do not have these fields
438 * available. So the information we have there is insufficient to
439 * determine whether two file descriptors point to the same file.
441 * While we could fall back to using other signals like the file's
442 * mtime, those are not sufficient to avoid races. We thus refrain from
443 * using the stat cache on such systems and fall back to the secondary
444 * caching mechanism, which is to check whether contents of the file
445 * have changed.
447 * On other systems which are POSIX compliant we must keep the file
448 * descriptor open. This is to avoid a race condition where two
449 * processes access the reftable stack at the same point in time:
451 * 1. A reads the reftable stack and caches its stat info.
453 * 2. B updates the stack, appending a new table to "tables.list".
454 * This will both use a new inode and result in a different file
455 * size, thus invalidating A's cache in theory.
457 * 3. B decides to auto-compact the stack and merges two tables. The
458 * file size now matches what A has cached again. Furthermore, the
459 * filesystem may decide to recycle the inode number of the file
460 * we have replaced in (2) because it is not in use anymore.
462 * 4. A reloads the reftable stack. Neither the inode number nor the
463 * file size changed. If the timestamps did not change either then
464 * we think the cached copy of our stack is up-to-date.
466 * By keeping the file descriptor open the inode number cannot be
467 * recycled, mitigating the race.
469 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
470 st->list_st.st_dev && st->list_st.st_ino) {
471 st->list_fd = fd;
472 fd = -1;
475 if (fd >= 0)
476 close(fd);
477 free_names(names);
478 free_names(names_after);
479 return err;
482 /* -1 = error
483 0 = up to date
484 1 = changed. */
485 static int stack_uptodate(struct reftable_stack *st)
487 char **names = NULL;
488 int err;
489 int i = 0;
492 * When we have cached stat information available then we use it to
493 * verify whether the file has been rewritten.
495 * Note that we explicitly do not want to use `stat_validity_check()`
496 * and friends here because they may end up not comparing the `st_dev`
497 * and `st_ino` fields. These functions thus cannot guarantee that we
498 * indeed still have the same file.
500 if (st->list_fd >= 0) {
501 struct stat list_st;
503 if (stat(st->list_file, &list_st) < 0) {
505 * It's fine for "tables.list" to not exist. In that
506 * case, we have to refresh when the loaded stack has
507 * any readers.
509 if (errno == ENOENT)
510 return !!st->readers_len;
511 return REFTABLE_IO_ERROR;
515 * When "tables.list" refers to the same file we can assume
516 * that it didn't change. This is because we always use
517 * rename(3P) to update the file and never write to it
518 * directly.
520 if (st->list_st.st_dev == list_st.st_dev &&
521 st->list_st.st_ino == list_st.st_ino)
522 return 0;
525 err = read_lines(st->list_file, &names);
526 if (err < 0)
527 return err;
529 for (i = 0; i < st->readers_len; i++) {
530 if (!names[i]) {
531 err = 1;
532 goto done;
535 if (strcmp(st->readers[i]->name, names[i])) {
536 err = 1;
537 goto done;
541 if (names[st->merged->readers_len]) {
542 err = 1;
543 goto done;
546 done:
547 free_names(names);
548 return err;
551 int reftable_stack_reload(struct reftable_stack *st)
553 int err = stack_uptodate(st);
554 if (err > 0)
555 return reftable_stack_reload_maybe_reuse(st, 1);
556 return err;
559 int reftable_stack_add(struct reftable_stack *st,
560 int (*write)(struct reftable_writer *wr, void *arg),
561 void *arg)
563 int err = stack_try_add(st, write, arg);
564 if (err < 0) {
565 if (err == REFTABLE_OUTDATED_ERROR) {
566 /* Ignore error return, we want to propagate
567 REFTABLE_OUTDATED_ERROR.
569 reftable_stack_reload(st);
571 return err;
574 return 0;
577 static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
579 char buf[100];
580 uint32_t rnd = (uint32_t)git_rand();
581 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
582 min, max, rnd);
583 strbuf_reset(dest);
584 strbuf_addstr(dest, buf);
587 struct reftable_addition {
588 struct lock_file tables_list_lock;
589 struct reftable_stack *stack;
591 char **new_tables;
592 size_t new_tables_len, new_tables_cap;
593 uint64_t next_update_index;
596 #define REFTABLE_ADDITION_INIT {0}
598 static int reftable_stack_init_addition(struct reftable_addition *add,
599 struct reftable_stack *st)
601 struct strbuf lock_file_name = STRBUF_INIT;
602 int err;
604 add->stack = st;
606 err = hold_lock_file_for_update(&add->tables_list_lock, st->list_file,
607 LOCK_NO_DEREF);
608 if (err < 0) {
609 if (errno == EEXIST) {
610 err = REFTABLE_LOCK_ERROR;
611 } else {
612 err = REFTABLE_IO_ERROR;
614 goto done;
616 if (st->opts.default_permissions) {
617 if (chmod(get_lock_file_path(&add->tables_list_lock),
618 st->opts.default_permissions) < 0) {
619 err = REFTABLE_IO_ERROR;
620 goto done;
624 err = stack_uptodate(st);
625 if (err < 0)
626 goto done;
627 if (err > 0) {
628 err = REFTABLE_OUTDATED_ERROR;
629 goto done;
632 add->next_update_index = reftable_stack_next_update_index(st);
633 done:
634 if (err) {
635 reftable_addition_close(add);
637 strbuf_release(&lock_file_name);
638 return err;
641 static void reftable_addition_close(struct reftable_addition *add)
643 struct strbuf nm = STRBUF_INIT;
644 size_t i;
646 for (i = 0; i < add->new_tables_len; i++) {
647 stack_filename(&nm, add->stack, add->new_tables[i]);
648 unlink(nm.buf);
649 reftable_free(add->new_tables[i]);
650 add->new_tables[i] = NULL;
652 reftable_free(add->new_tables);
653 add->new_tables = NULL;
654 add->new_tables_len = 0;
655 add->new_tables_cap = 0;
657 rollback_lock_file(&add->tables_list_lock);
658 strbuf_release(&nm);
661 void reftable_addition_destroy(struct reftable_addition *add)
663 if (!add) {
664 return;
666 reftable_addition_close(add);
667 reftable_free(add);
670 int reftable_addition_commit(struct reftable_addition *add)
672 struct strbuf table_list = STRBUF_INIT;
673 int lock_file_fd = get_lock_file_fd(&add->tables_list_lock);
674 int err = 0;
675 size_t i;
677 if (add->new_tables_len == 0)
678 goto done;
680 for (i = 0; i < add->stack->merged->readers_len; i++) {
681 strbuf_addstr(&table_list, add->stack->readers[i]->name);
682 strbuf_addstr(&table_list, "\n");
684 for (i = 0; i < add->new_tables_len; i++) {
685 strbuf_addstr(&table_list, add->new_tables[i]);
686 strbuf_addstr(&table_list, "\n");
689 err = write_in_full(lock_file_fd, table_list.buf, table_list.len);
690 strbuf_release(&table_list);
691 if (err < 0) {
692 err = REFTABLE_IO_ERROR;
693 goto done;
696 err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd);
697 if (err < 0) {
698 err = REFTABLE_IO_ERROR;
699 goto done;
702 err = commit_lock_file(&add->tables_list_lock);
703 if (err < 0) {
704 err = REFTABLE_IO_ERROR;
705 goto done;
708 /* success, no more state to clean up. */
709 for (i = 0; i < add->new_tables_len; i++)
710 reftable_free(add->new_tables[i]);
711 reftable_free(add->new_tables);
712 add->new_tables = NULL;
713 add->new_tables_len = 0;
714 add->new_tables_cap = 0;
716 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
717 if (err)
718 goto done;
720 if (!add->stack->opts.disable_auto_compact) {
722 * Auto-compact the stack to keep the number of tables in
723 * control. It is possible that a concurrent writer is already
724 * trying to compact parts of the stack, which would lead to a
725 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
726 * already. This is a benign error though, so we ignore it.
728 err = reftable_stack_auto_compact(add->stack);
729 if (err < 0 && err != REFTABLE_LOCK_ERROR)
730 goto done;
731 err = 0;
734 done:
735 reftable_addition_close(add);
736 return err;
739 int reftable_stack_new_addition(struct reftable_addition **dest,
740 struct reftable_stack *st)
742 int err = 0;
743 struct reftable_addition empty = REFTABLE_ADDITION_INIT;
744 REFTABLE_CALLOC_ARRAY(*dest, 1);
745 **dest = empty;
746 err = reftable_stack_init_addition(*dest, st);
747 if (err) {
748 reftable_free(*dest);
749 *dest = NULL;
751 return err;
754 static int stack_try_add(struct reftable_stack *st,
755 int (*write_table)(struct reftable_writer *wr,
756 void *arg),
757 void *arg)
759 struct reftable_addition add = REFTABLE_ADDITION_INIT;
760 int err = reftable_stack_init_addition(&add, st);
761 if (err < 0)
762 goto done;
764 err = reftable_addition_add(&add, write_table, arg);
765 if (err < 0)
766 goto done;
768 err = reftable_addition_commit(&add);
769 done:
770 reftable_addition_close(&add);
771 return err;
774 int reftable_addition_add(struct reftable_addition *add,
775 int (*write_table)(struct reftable_writer *wr,
776 void *arg),
777 void *arg)
779 struct strbuf temp_tab_file_name = STRBUF_INIT;
780 struct strbuf tab_file_name = STRBUF_INIT;
781 struct strbuf next_name = STRBUF_INIT;
782 struct reftable_writer *wr = NULL;
783 struct tempfile *tab_file = NULL;
784 int err = 0;
785 int tab_fd;
787 strbuf_reset(&next_name);
788 format_name(&next_name, add->next_update_index, add->next_update_index);
790 stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
791 strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
793 tab_file = mks_tempfile(temp_tab_file_name.buf);
794 if (!tab_file) {
795 err = REFTABLE_IO_ERROR;
796 goto done;
798 if (add->stack->opts.default_permissions) {
799 if (chmod(get_tempfile_path(tab_file),
800 add->stack->opts.default_permissions)) {
801 err = REFTABLE_IO_ERROR;
802 goto done;
805 tab_fd = get_tempfile_fd(tab_file);
807 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd,
808 &add->stack->opts);
809 err = write_table(wr, arg);
810 if (err < 0)
811 goto done;
813 err = reftable_writer_close(wr);
814 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
815 err = 0;
816 goto done;
818 if (err < 0)
819 goto done;
821 err = close_tempfile_gently(tab_file);
822 if (err < 0) {
823 err = REFTABLE_IO_ERROR;
824 goto done;
827 if (wr->min_update_index < add->next_update_index) {
828 err = REFTABLE_API_ERROR;
829 goto done;
832 format_name(&next_name, wr->min_update_index, wr->max_update_index);
833 strbuf_addstr(&next_name, ".ref");
834 stack_filename(&tab_file_name, add->stack, next_name.buf);
837 On windows, this relies on rand() picking a unique destination name.
838 Maybe we should do retry loop as well?
840 err = rename_tempfile(&tab_file, tab_file_name.buf);
841 if (err < 0) {
842 err = REFTABLE_IO_ERROR;
843 goto done;
846 REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1,
847 add->new_tables_cap);
848 add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL);
849 done:
850 delete_tempfile(&tab_file);
851 strbuf_release(&temp_tab_file_name);
852 strbuf_release(&tab_file_name);
853 strbuf_release(&next_name);
854 reftable_writer_free(wr);
855 return err;
858 uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
860 int sz = st->merged->readers_len;
861 if (sz > 0)
862 return reftable_reader_max_update_index(st->readers[sz - 1]) +
864 return 1;
867 static int stack_compact_locked(struct reftable_stack *st,
868 size_t first, size_t last,
869 struct reftable_log_expiry_config *config,
870 struct tempfile **tab_file_out)
872 struct strbuf next_name = STRBUF_INIT;
873 struct strbuf tab_file_path = STRBUF_INIT;
874 struct reftable_writer *wr = NULL;
875 struct tempfile *tab_file;
876 int tab_fd, err = 0;
878 format_name(&next_name,
879 reftable_reader_min_update_index(st->readers[first]),
880 reftable_reader_max_update_index(st->readers[last]));
881 stack_filename(&tab_file_path, st, next_name.buf);
882 strbuf_addstr(&tab_file_path, ".temp.XXXXXX");
884 tab_file = mks_tempfile(tab_file_path.buf);
885 if (!tab_file) {
886 err = REFTABLE_IO_ERROR;
887 goto done;
889 tab_fd = get_tempfile_fd(tab_file);
891 if (st->opts.default_permissions &&
892 chmod(get_tempfile_path(tab_file), st->opts.default_permissions) < 0) {
893 err = REFTABLE_IO_ERROR;
894 goto done;
897 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush,
898 &tab_fd, &st->opts);
899 err = stack_write_compact(st, wr, first, last, config);
900 if (err < 0)
901 goto done;
903 err = reftable_writer_close(wr);
904 if (err < 0)
905 goto done;
907 err = close_tempfile_gently(tab_file);
908 if (err < 0)
909 goto done;
911 *tab_file_out = tab_file;
912 tab_file = NULL;
914 done:
915 delete_tempfile(&tab_file);
916 reftable_writer_free(wr);
917 strbuf_release(&next_name);
918 strbuf_release(&tab_file_path);
919 return err;
922 static int stack_write_compact(struct reftable_stack *st,
923 struct reftable_writer *wr,
924 size_t first, size_t last,
925 struct reftable_log_expiry_config *config)
927 struct reftable_merged_table *mt = NULL;
928 struct reftable_iterator it = { NULL };
929 struct reftable_ref_record ref = { NULL };
930 struct reftable_log_record log = { NULL };
931 size_t subtabs_len = last - first + 1;
932 uint64_t entries = 0;
933 int err = 0;
935 for (size_t i = first; i <= last; i++)
936 st->stats.bytes += st->readers[i]->size;
937 reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
938 st->readers[last]->max_update_index);
940 err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len,
941 st->opts.hash_id);
942 if (err < 0)
943 goto done;
945 merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
946 err = reftable_iterator_seek_ref(&it, "");
947 if (err < 0)
948 goto done;
950 while (1) {
951 err = reftable_iterator_next_ref(&it, &ref);
952 if (err > 0) {
953 err = 0;
954 break;
956 if (err < 0)
957 goto done;
959 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
960 continue;
963 err = reftable_writer_add_ref(wr, &ref);
964 if (err < 0)
965 goto done;
966 entries++;
968 reftable_iterator_destroy(&it);
970 merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG);
971 err = reftable_iterator_seek_log(&it, "");
972 if (err < 0)
973 goto done;
975 while (1) {
976 err = reftable_iterator_next_log(&it, &log);
977 if (err > 0) {
978 err = 0;
979 break;
981 if (err < 0)
982 goto done;
983 if (first == 0 && reftable_log_record_is_deletion(&log)) {
984 continue;
987 if (config && config->min_update_index > 0 &&
988 log.update_index < config->min_update_index) {
989 continue;
992 if (config && config->time > 0 &&
993 log.value.update.time < config->time) {
994 continue;
997 err = reftable_writer_add_log(wr, &log);
998 if (err < 0)
999 goto done;
1000 entries++;
1003 done:
1004 reftable_iterator_destroy(&it);
1005 if (mt)
1006 reftable_merged_table_free(mt);
1007 reftable_ref_record_release(&ref);
1008 reftable_log_record_release(&log);
1009 st->stats.entries_written += entries;
1010 return err;
1013 enum stack_compact_range_flags {
1015 * Perform a best-effort compaction. That is, even if we cannot lock
1016 * all tables in the specified range, we will try to compact the
1017 * remaining slice.
1019 STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0),
1023 * Compact all tables in the range `[first, last)` into a single new table.
1025 * This function returns `0` on success or a code `< 0` on failure. When the
1026 * stack or any of the tables in the specified range are already locked then
1027 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
1028 * callers can either ignore, or they may choose to retry compaction after some
1029 * amount of time.
1031 static int stack_compact_range(struct reftable_stack *st,
1032 size_t first, size_t last,
1033 struct reftable_log_expiry_config *expiry,
1034 unsigned int flags)
1036 struct strbuf tables_list_buf = STRBUF_INIT;
1037 struct strbuf new_table_name = STRBUF_INIT;
1038 struct strbuf new_table_path = STRBUF_INIT;
1039 struct strbuf table_name = STRBUF_INIT;
1040 struct lock_file tables_list_lock = LOCK_INIT;
1041 struct lock_file *table_locks = NULL;
1042 struct tempfile *new_table = NULL;
1043 int is_empty_table = 0, err = 0;
1044 size_t first_to_replace, last_to_replace;
1045 size_t i, nlocks = 0;
1046 char **names = NULL;
1048 if (first > last || (!expiry && first == last)) {
1049 err = 0;
1050 goto done;
1053 st->stats.attempts++;
1056 * Hold the lock so that we can read "tables.list" and lock all tables
1057 * which are part of the user-specified range.
1059 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1060 LOCK_NO_DEREF);
1061 if (err < 0) {
1062 if (errno == EEXIST)
1063 err = REFTABLE_LOCK_ERROR;
1064 else
1065 err = REFTABLE_IO_ERROR;
1066 goto done;
1069 err = stack_uptodate(st);
1070 if (err)
1071 goto done;
1074 * Lock all tables in the user-provided range. This is the slice of our
1075 * stack which we'll compact.
1077 * Note that we lock tables in reverse order from last to first. The
1078 * intent behind this is to allow a newer process to perform best
1079 * effort compaction of tables that it has added in the case where an
1080 * older process is still busy compacting tables which are preexisting
1081 * from the point of view of the newer process.
1083 REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
1084 for (i = last + 1; i > first; i--) {
1085 stack_filename(&table_name, st, reader_name(st->readers[i - 1]));
1087 err = hold_lock_file_for_update(&table_locks[nlocks],
1088 table_name.buf, LOCK_NO_DEREF);
1089 if (err < 0) {
1091 * When the table is locked already we may do a
1092 * best-effort compaction and compact only the tables
1093 * that we have managed to lock so far. This of course
1094 * requires that we have been able to lock at least two
1095 * tables, otherwise there would be nothing to compact.
1096 * In that case, we return a lock error to our caller.
1098 if (errno == EEXIST && last - (i - 1) >= 2 &&
1099 flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
1100 err = 0;
1102 * The subtraction is to offset the index, the
1103 * addition is to only compact up to the table
1104 * of the preceding iteration. They obviously
1105 * cancel each other out, but that may be
1106 * non-obvious when it was omitted.
1108 first = (i - 1) + 1;
1109 break;
1110 } else if (errno == EEXIST) {
1111 err = REFTABLE_LOCK_ERROR;
1112 goto done;
1113 } else {
1114 err = REFTABLE_IO_ERROR;
1115 goto done;
1120 * We need to close the lockfiles as we might otherwise easily
1121 * run into file descriptor exhaustion when we compress a lot
1122 * of tables.
1124 err = close_lock_file_gently(&table_locks[nlocks++]);
1125 if (err < 0) {
1126 err = REFTABLE_IO_ERROR;
1127 goto done;
1132 * We have locked all tables in our range and can thus release the
1133 * "tables.list" lock while compacting the locked tables. This allows
1134 * concurrent updates to the stack to proceed.
1136 err = rollback_lock_file(&tables_list_lock);
1137 if (err < 0) {
1138 err = REFTABLE_IO_ERROR;
1139 goto done;
1143 * Compact the now-locked tables into a new table. Note that compacting
1144 * these tables may end up with an empty new table in case tombstones
1145 * end up cancelling out all refs in that range.
1147 err = stack_compact_locked(st, first, last, expiry, &new_table);
1148 if (err < 0) {
1149 if (err != REFTABLE_EMPTY_TABLE_ERROR)
1150 goto done;
1151 is_empty_table = 1;
1155 * Now that we have written the new, compacted table we need to re-lock
1156 * "tables.list". We'll then replace the compacted range of tables with
1157 * the new table.
1159 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1160 LOCK_NO_DEREF);
1161 if (err < 0) {
1162 if (errno == EEXIST)
1163 err = REFTABLE_LOCK_ERROR;
1164 else
1165 err = REFTABLE_IO_ERROR;
1166 goto done;
1169 if (st->opts.default_permissions) {
1170 if (chmod(get_lock_file_path(&tables_list_lock),
1171 st->opts.default_permissions) < 0) {
1172 err = REFTABLE_IO_ERROR;
1173 goto done;
1178 * As we have unlocked the stack while compacting our slice of tables
1179 * it may have happened that a concurrently running process has updated
1180 * the stack while we were compacting. In that case, we need to check
1181 * whether the tables that we have just compacted still exist in the
1182 * stack in the exact same order as we have compacted them.
1184 * If they do exist, then it is fine to continue and replace those
1185 * tables with our compacted version. If they don't, then we need to
1186 * abort.
1188 err = stack_uptodate(st);
1189 if (err < 0)
1190 goto done;
1191 if (err > 0) {
1192 ssize_t new_offset = -1;
1193 int fd;
1195 fd = open(st->list_file, O_RDONLY);
1196 if (fd < 0) {
1197 err = REFTABLE_IO_ERROR;
1198 goto done;
1201 err = fd_read_lines(fd, &names);
1202 close(fd);
1203 if (err < 0)
1204 goto done;
1207 * Search for the offset of the first table that we have
1208 * compacted in the updated "tables.list" file.
1210 for (size_t i = 0; names[i]; i++) {
1211 if (strcmp(names[i], st->readers[first]->name))
1212 continue;
1215 * We have found the first entry. Verify that all the
1216 * subsequent tables we have compacted still exist in
1217 * the modified stack in the exact same order as we
1218 * have compacted them.
1220 for (size_t j = 1; j < last - first + 1; j++) {
1221 const char *old = first + j < st->merged->readers_len ?
1222 st->readers[first + j]->name : NULL;
1223 const char *new = names[i + j];
1226 * If some entries are missing or in case the tables
1227 * have changed then we need to bail out. Again, this
1228 * shouldn't ever happen because we have locked the
1229 * tables we are compacting.
1231 if (!old || !new || strcmp(old, new)) {
1232 err = REFTABLE_OUTDATED_ERROR;
1233 goto done;
1237 new_offset = i;
1238 break;
1242 * In case we didn't find our compacted tables in the stack we
1243 * need to bail out. In theory, this should have never happened
1244 * because we locked the tables we are compacting.
1246 if (new_offset < 0) {
1247 err = REFTABLE_OUTDATED_ERROR;
1248 goto done;
1252 * We have found the new range that we want to replace, so
1253 * let's update the range of tables that we want to replace.
1255 first_to_replace = new_offset;
1256 last_to_replace = last + (new_offset - first);
1257 } else {
1259 * `fd_read_lines()` uses a `NULL` sentinel to indicate that
1260 * the array is at its end. As we use `free_names()` to free
1261 * the array, we need to include this sentinel value here and
1262 * thus have to allocate `readers_len + 1` many entries.
1264 REFTABLE_CALLOC_ARRAY(names, st->merged->readers_len + 1);
1265 for (size_t i = 0; i < st->merged->readers_len; i++)
1266 names[i] = xstrdup(st->readers[i]->name);
1267 first_to_replace = first;
1268 last_to_replace = last;
1272 * If the resulting compacted table is not empty, then we need to move
1273 * it into place now.
1275 if (!is_empty_table) {
1276 format_name(&new_table_name, st->readers[first]->min_update_index,
1277 st->readers[last]->max_update_index);
1278 strbuf_addstr(&new_table_name, ".ref");
1279 stack_filename(&new_table_path, st, new_table_name.buf);
1281 err = rename_tempfile(&new_table, new_table_path.buf);
1282 if (err < 0) {
1283 err = REFTABLE_IO_ERROR;
1284 goto done;
1289 * Write the new "tables.list" contents with the compacted table we
1290 * have just written. In case the compacted table became empty we
1291 * simply skip writing it.
1293 for (i = 0; i < first_to_replace; i++)
1294 strbuf_addf(&tables_list_buf, "%s\n", names[i]);
1295 if (!is_empty_table)
1296 strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
1297 for (i = last_to_replace + 1; names[i]; i++)
1298 strbuf_addf(&tables_list_buf, "%s\n", names[i]);
1300 err = write_in_full(get_lock_file_fd(&tables_list_lock),
1301 tables_list_buf.buf, tables_list_buf.len);
1302 if (err < 0) {
1303 err = REFTABLE_IO_ERROR;
1304 unlink(new_table_path.buf);
1305 goto done;
1308 err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock));
1309 if (err < 0) {
1310 err = REFTABLE_IO_ERROR;
1311 unlink(new_table_path.buf);
1312 goto done;
1315 err = commit_lock_file(&tables_list_lock);
1316 if (err < 0) {
1317 err = REFTABLE_IO_ERROR;
1318 unlink(new_table_path.buf);
1319 goto done;
1323 * Reload the stack before deleting the compacted tables. We can only
1324 * delete the files after we closed them on Windows, so this needs to
1325 * happen first.
1327 err = reftable_stack_reload_maybe_reuse(st, first < last);
1328 if (err < 0)
1329 goto done;
1332 * Delete the old tables. They may still be in use by concurrent
1333 * readers, so it is expected that unlinking tables may fail.
1335 for (i = 0; i < nlocks; i++) {
1336 struct lock_file *table_lock = &table_locks[i];
1337 char *table_path = get_locked_file_path(table_lock);
1338 unlink(table_path);
1339 free(table_path);
1342 done:
1343 rollback_lock_file(&tables_list_lock);
1344 for (i = 0; table_locks && i < nlocks; i++)
1345 rollback_lock_file(&table_locks[i]);
1346 reftable_free(table_locks);
1348 delete_tempfile(&new_table);
1349 strbuf_release(&new_table_name);
1350 strbuf_release(&new_table_path);
1351 strbuf_release(&tables_list_buf);
1352 strbuf_release(&table_name);
1353 free_names(names);
1355 if (err == REFTABLE_LOCK_ERROR)
1356 st->stats.failures++;
1358 return err;
1361 int reftable_stack_compact_all(struct reftable_stack *st,
1362 struct reftable_log_expiry_config *config)
1364 size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0;
1365 return stack_compact_range(st, 0, last, config, 0);
1368 static int segment_size(struct segment *s)
1370 return s->end - s->start;
1373 struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
1374 uint8_t factor)
1376 struct segment seg = { 0 };
1377 uint64_t bytes;
1378 size_t i;
1380 if (!factor)
1381 factor = DEFAULT_GEOMETRIC_FACTOR;
1384 * If there are no tables or only a single one then we don't have to
1385 * compact anything. The sequence is geometric by definition already.
1387 if (n <= 1)
1388 return seg;
1391 * Find the ending table of the compaction segment needed to restore the
1392 * geometric sequence. Note that the segment end is exclusive.
1394 * To do so, we iterate backwards starting from the most recent table
1395 * until a valid segment end is found. If the preceding table is smaller
1396 * than the current table multiplied by the geometric factor (2), the
1397 * compaction segment end has been identified.
1399 * Tables after the ending point are not added to the byte count because
1400 * they are already valid members of the geometric sequence. Due to the
1401 * properties of a geometric sequence, it is not possible for the sum of
1402 * these tables to exceed the value of the ending point table.
1404 * Example table size sequence requiring no compaction:
1405 * 64, 32, 16, 8, 4, 2, 1
1407 * Example table size sequence where compaction segment end is set to
1408 * the last table. Since the segment end is exclusive, the last table is
1409 * excluded during subsequent compaction and the table with size 3 is
1410 * the final table included:
1411 * 64, 32, 16, 8, 4, 3, 1
1413 for (i = n - 1; i > 0; i--) {
1414 if (sizes[i - 1] < sizes[i] * factor) {
1415 seg.end = i + 1;
1416 bytes = sizes[i];
1417 break;
1422 * Find the starting table of the compaction segment by iterating
1423 * through the remaining tables and keeping track of the accumulated
1424 * size of all tables seen from the segment end table. The previous
1425 * table is compared to the accumulated size because the tables from the
1426 * segment end are merged backwards recursively.
1428 * Note that we keep iterating even after we have found the first
1429 * starting point. This is because there may be tables in the stack
1430 * preceding that first starting point which violate the geometric
1431 * sequence.
1433 * Example compaction segment start set to table with size 32:
1434 * 128, 32, 16, 8, 4, 3, 1
1436 for (; i > 0; i--) {
1437 uint64_t curr = bytes;
1438 bytes += sizes[i - 1];
1440 if (sizes[i - 1] < curr * factor) {
1441 seg.start = i - 1;
1442 seg.bytes = bytes;
1446 return seg;
1449 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1451 int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
1452 int overhead = header_size(version) - 1;
1453 uint64_t *sizes;
1455 REFTABLE_CALLOC_ARRAY(sizes, st->merged->readers_len);
1457 for (size_t i = 0; i < st->merged->readers_len; i++)
1458 sizes[i] = st->readers[i]->size - overhead;
1460 return sizes;
1463 int reftable_stack_auto_compact(struct reftable_stack *st)
1465 uint64_t *sizes = stack_table_sizes_for_compaction(st);
1466 struct segment seg =
1467 suggest_compaction_segment(sizes, st->merged->readers_len,
1468 st->opts.auto_compaction_factor);
1469 reftable_free(sizes);
1470 if (segment_size(&seg) > 0)
1471 return stack_compact_range(st, seg.start, seg.end - 1,
1472 NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
1474 return 0;
1477 struct reftable_compaction_stats *
1478 reftable_stack_compaction_stats(struct reftable_stack *st)
1480 return &st->stats;
1483 int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1484 struct reftable_ref_record *ref)
1486 struct reftable_iterator it = { 0 };
1487 int ret;
1489 reftable_merged_table_init_ref_iterator(st->merged, &it);
1490 ret = reftable_iterator_seek_ref(&it, refname);
1491 if (ret)
1492 goto out;
1494 ret = reftable_iterator_next_ref(&it, ref);
1495 if (ret)
1496 goto out;
1498 if (strcmp(ref->refname, refname) ||
1499 reftable_ref_record_is_deletion(ref)) {
1500 reftable_ref_record_release(ref);
1501 ret = 1;
1502 goto out;
1505 out:
1506 reftable_iterator_destroy(&it);
1507 return ret;
1510 int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1511 struct reftable_log_record *log)
1513 struct reftable_iterator it = {0};
1514 int err;
1516 reftable_stack_init_log_iterator(st, &it);
1517 err = reftable_iterator_seek_log(&it, refname);
1518 if (err)
1519 goto done;
1521 err = reftable_iterator_next_log(&it, log);
1522 if (err)
1523 goto done;
1525 if (strcmp(log->refname, refname) ||
1526 reftable_log_record_is_deletion(log)) {
1527 err = 1;
1528 goto done;
1531 done:
1532 if (err) {
1533 reftable_log_record_release(log);
1535 reftable_iterator_destroy(&it);
1536 return err;
1539 static int is_table_name(const char *s)
1541 const char *dot = strrchr(s, '.');
1542 return dot && !strcmp(dot, ".ref");
1545 static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1546 const char *name)
1548 int err = 0;
1549 uint64_t update_idx = 0;
1550 struct reftable_block_source src = { NULL };
1551 struct reftable_reader *rd = NULL;
1552 struct strbuf table_path = STRBUF_INIT;
1553 stack_filename(&table_path, st, name);
1555 err = reftable_block_source_from_file(&src, table_path.buf);
1556 if (err < 0)
1557 goto done;
1559 err = reftable_reader_new(&rd, &src, name);
1560 if (err < 0)
1561 goto done;
1563 update_idx = reftable_reader_max_update_index(rd);
1564 reftable_reader_decref(rd);
1566 if (update_idx <= max) {
1567 unlink(table_path.buf);
1569 done:
1570 strbuf_release(&table_path);
1573 static int reftable_stack_clean_locked(struct reftable_stack *st)
1575 uint64_t max = reftable_merged_table_max_update_index(
1576 reftable_stack_merged_table(st));
1577 DIR *dir = opendir(st->reftable_dir);
1578 struct dirent *d = NULL;
1579 if (!dir) {
1580 return REFTABLE_IO_ERROR;
1583 while ((d = readdir(dir))) {
1584 int i = 0;
1585 int found = 0;
1586 if (!is_table_name(d->d_name))
1587 continue;
1589 for (i = 0; !found && i < st->readers_len; i++) {
1590 found = !strcmp(reader_name(st->readers[i]), d->d_name);
1592 if (found)
1593 continue;
1595 remove_maybe_stale_table(st, max, d->d_name);
1598 closedir(dir);
1599 return 0;
1602 int reftable_stack_clean(struct reftable_stack *st)
1604 struct reftable_addition *add = NULL;
1605 int err = reftable_stack_new_addition(&add, st);
1606 if (err < 0) {
1607 goto done;
1610 err = reftable_stack_reload(st);
1611 if (err < 0) {
1612 goto done;
1615 err = reftable_stack_clean_locked(st);
1617 done:
1618 reftable_addition_destroy(add);
1619 return err;