2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
11 #include "../write-or-die.h"
13 #include "constants.h"
16 #include "reftable-error.h"
17 #include "reftable-record.h"
18 #include "reftable-merged.h"
22 static int stack_try_add(struct reftable_stack
*st
,
23 int (*write_table
)(struct reftable_writer
*wr
,
26 static int stack_write_compact(struct reftable_stack
*st
,
27 struct reftable_writer
*wr
,
28 size_t first
, size_t last
,
29 struct reftable_log_expiry_config
*config
);
30 static void reftable_addition_close(struct reftable_addition
*add
);
31 static int reftable_stack_reload_maybe_reuse(struct reftable_stack
*st
,
34 static int stack_filename(struct reftable_buf
*dest
, struct reftable_stack
*st
,
38 reftable_buf_reset(dest
);
39 if ((err
= reftable_buf_addstr(dest
, st
->reftable_dir
)) < 0 ||
40 (err
= reftable_buf_addstr(dest
, "/")) < 0 ||
41 (err
= reftable_buf_addstr(dest
, name
)) < 0)
46 static ssize_t
reftable_fd_write(void *arg
, const void *data
, size_t sz
)
48 int *fdp
= (int *)arg
;
49 return write_in_full(*fdp
, data
, sz
);
52 static int reftable_fd_flush(void *arg
)
54 int *fdp
= (int *)arg
;
56 return fsync_component(FSYNC_COMPONENT_REFERENCE
, *fdp
);
59 int reftable_new_stack(struct reftable_stack
**dest
, const char *dir
,
60 const struct reftable_write_options
*_opts
)
62 struct reftable_buf list_file_name
= REFTABLE_BUF_INIT
;
63 struct reftable_write_options opts
= { 0 };
64 struct reftable_stack
*p
;
67 p
= reftable_calloc(1, sizeof(*p
));
69 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
75 if (opts
.hash_id
== 0)
76 opts
.hash_id
= GIT_SHA1_FORMAT_ID
;
80 reftable_buf_reset(&list_file_name
);
81 if ((err
= reftable_buf_addstr(&list_file_name
, dir
)) < 0 ||
82 (err
= reftable_buf_addstr(&list_file_name
, "/tables.list")) < 0)
85 p
->list_file
= reftable_buf_detach(&list_file_name
);
88 p
->reftable_dir
= reftable_strdup(dir
);
89 if (!p
->reftable_dir
) {
90 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
94 err
= reftable_stack_reload_maybe_reuse(p
, 1);
103 reftable_stack_destroy(p
);
107 static int fd_read_lines(int fd
, char ***namesp
)
109 off_t size
= lseek(fd
, 0, SEEK_END
);
113 err
= REFTABLE_IO_ERROR
;
116 err
= lseek(fd
, 0, SEEK_SET
);
118 err
= REFTABLE_IO_ERROR
;
122 REFTABLE_ALLOC_ARRAY(buf
, size
+ 1);
124 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
128 if (read_in_full(fd
, buf
, size
) != size
) {
129 err
= REFTABLE_IO_ERROR
;
134 *namesp
= parse_names(buf
, size
);
136 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
145 int read_lines(const char *filename
, char ***namesp
)
147 int fd
= open(filename
, O_RDONLY
);
150 if (errno
== ENOENT
) {
151 REFTABLE_CALLOC_ARRAY(*namesp
, 1);
153 return REFTABLE_OUT_OF_MEMORY_ERROR
;
157 return REFTABLE_IO_ERROR
;
159 err
= fd_read_lines(fd
, namesp
);
164 int reftable_stack_init_ref_iterator(struct reftable_stack
*st
,
165 struct reftable_iterator
*it
)
167 return merged_table_init_iter(reftable_stack_merged_table(st
),
171 int reftable_stack_init_log_iterator(struct reftable_stack
*st
,
172 struct reftable_iterator
*it
)
174 return merged_table_init_iter(reftable_stack_merged_table(st
),
178 struct reftable_merged_table
*
179 reftable_stack_merged_table(struct reftable_stack
*st
)
184 static int has_name(char **names
, const char *name
)
187 if (!strcmp(*names
, name
))
194 /* Close and free the stack */
195 void reftable_stack_destroy(struct reftable_stack
*st
)
204 reftable_merged_table_free(st
->merged
);
208 err
= read_lines(st
->list_file
, &names
);
210 REFTABLE_FREE_AND_NULL(names
);
215 struct reftable_buf filename
= REFTABLE_BUF_INIT
;
216 for (i
= 0; i
< st
->readers_len
; i
++) {
217 const char *name
= reader_name(st
->readers
[i
]);
218 int try_unlinking
= 1;
220 reftable_buf_reset(&filename
);
221 if (names
&& !has_name(names
, name
)) {
222 if (stack_filename(&filename
, st
, name
) < 0)
225 reftable_reader_decref(st
->readers
[i
]);
227 if (try_unlinking
&& filename
.len
) {
228 /* On Windows, can only unlink after closing. */
229 unlink(filename
.buf
);
232 reftable_buf_release(&filename
);
234 REFTABLE_FREE_AND_NULL(st
->readers
);
237 if (st
->list_fd
>= 0) {
242 REFTABLE_FREE_AND_NULL(st
->list_file
);
243 REFTABLE_FREE_AND_NULL(st
->reftable_dir
);
248 static struct reftable_reader
**stack_copy_readers(struct reftable_stack
*st
,
251 struct reftable_reader
**cur
= reftable_calloc(cur_len
, sizeof(*cur
));
254 for (size_t i
= 0; i
< cur_len
; i
++)
255 cur
[i
] = st
->readers
[i
];
259 static int reftable_stack_reload_once(struct reftable_stack
*st
,
263 size_t cur_len
= !st
->merged
? 0 : st
->merged
->readers_len
;
264 struct reftable_reader
**cur
;
265 struct reftable_reader
**reused
= NULL
;
266 struct reftable_reader
**new_readers
;
267 size_t reused_len
= 0, reused_alloc
= 0, names_len
;
268 size_t new_readers_len
= 0;
269 struct reftable_merged_table
*new_merged
= NULL
;
270 struct reftable_buf table_path
= REFTABLE_BUF_INIT
;
274 cur
= stack_copy_readers(st
, cur_len
);
276 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
280 names_len
= names_length(names
);
282 new_readers
= reftable_calloc(names_len
, sizeof(*new_readers
));
284 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
289 struct reftable_reader
*rd
= NULL
;
290 const char *name
= *names
++;
292 /* this is linear; we assume compaction keeps the number of
293 tables under control so this is not quadratic. */
294 for (i
= 0; reuse_open
&& i
< cur_len
; i
++) {
295 if (cur
[i
] && 0 == strcmp(cur
[i
]->name
, name
)) {
300 * When reloading the stack fails, we end up
301 * releasing all new readers. This also
302 * includes the reused readers, even though
303 * they are still in used by the old stack. We
304 * thus need to keep them alive here, which we
305 * do by bumping their refcount.
307 REFTABLE_ALLOC_GROW(reused
, reused_len
+ 1, reused_alloc
);
309 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
312 reused
[reused_len
++] = rd
;
313 reftable_reader_incref(rd
);
319 struct reftable_block_source src
= { NULL
};
321 err
= stack_filename(&table_path
, st
, name
);
325 err
= reftable_block_source_from_file(&src
,
330 err
= reftable_reader_new(&rd
, &src
, name
);
335 new_readers
[new_readers_len
] = rd
;
340 err
= reftable_merged_table_new(&new_merged
, new_readers
,
341 new_readers_len
, st
->opts
.hash_id
);
346 * Close the old, non-reused readers and proactively try to unlink
347 * them. This is done for systems like Windows, where the underlying
348 * file of such an open reader wouldn't have been possible to be
349 * unlinked by the compacting process.
351 for (i
= 0; i
< cur_len
; i
++) {
353 const char *name
= reader_name(cur
[i
]);
355 err
= stack_filename(&table_path
, st
, name
);
359 reftable_reader_decref(cur
[i
]);
360 unlink(table_path
.buf
);
364 /* Update the stack to point to the new tables. */
366 reftable_merged_table_free(st
->merged
);
367 new_merged
->suppress_deletions
= 1;
368 st
->merged
= new_merged
;
371 reftable_free(st
->readers
);
372 st
->readers
= new_readers
;
373 st
->readers_len
= new_readers_len
;
378 * Decrement the refcount of reused readers again. This only needs to
379 * happen on the successful case, because on the unsuccessful one we
380 * decrement their refcount via `new_readers`.
382 for (i
= 0; i
< reused_len
; i
++)
383 reftable_reader_decref(reused
[i
]);
386 for (i
= 0; i
< new_readers_len
; i
++)
387 reftable_reader_decref(new_readers
[i
]);
388 reftable_free(new_readers
);
389 reftable_free(reused
);
391 reftable_buf_release(&table_path
);
395 /* return negative if a before b. */
396 static int tv_cmp(struct timeval
*a
, struct timeval
*b
)
398 time_t diff
= a
->tv_sec
- b
->tv_sec
;
399 int udiff
= a
->tv_usec
- b
->tv_usec
;
407 static int reftable_stack_reload_maybe_reuse(struct reftable_stack
*st
,
410 char **names
= NULL
, **names_after
= NULL
;
411 struct timeval deadline
;
416 err
= gettimeofday(&deadline
, NULL
);
419 deadline
.tv_sec
+= 3;
424 err
= gettimeofday(&now
, NULL
);
429 * Only look at deadlines after the first few times. This
430 * simplifies debugging in GDB.
433 if (tries
> 3 && tv_cmp(&now
, &deadline
) >= 0)
436 fd
= open(st
->list_file
, O_RDONLY
);
438 if (errno
!= ENOENT
) {
439 err
= REFTABLE_IO_ERROR
;
443 REFTABLE_CALLOC_ARRAY(names
, 1);
445 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
449 err
= fd_read_lines(fd
, &names
);
454 err
= reftable_stack_reload_once(st
, (const char **) names
, reuse_open
);
457 if (err
!= REFTABLE_NOT_EXIST_ERROR
)
461 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
462 * writer. Check if there was one by checking if the name list
465 err
= read_lines(st
->list_file
, &names_after
);
468 if (names_equal((const char **) names_after
,
469 (const char **) names
)) {
470 err
= REFTABLE_NOT_EXIST_ERROR
;
476 free_names(names_after
);
481 delay
= delay
+ (delay
* rand()) / RAND_MAX
+ 1;
482 sleep_millisec(delay
);
487 * Invalidate the stat cache. It is sufficient to only close the file
488 * descriptor and keep the cached stat info because we never use the
489 * latter when the former is negative.
491 if (st
->list_fd
>= 0) {
497 * Cache stat information in case it provides a useful signal to us.
498 * According to POSIX, "The st_ino and st_dev fields taken together
499 * uniquely identify the file within the system." That being said,
500 * Windows is not POSIX compliant and we do not have these fields
501 * available. So the information we have there is insufficient to
502 * determine whether two file descriptors point to the same file.
504 * While we could fall back to using other signals like the file's
505 * mtime, those are not sufficient to avoid races. We thus refrain from
506 * using the stat cache on such systems and fall back to the secondary
507 * caching mechanism, which is to check whether contents of the file
510 * On other systems which are POSIX compliant we must keep the file
511 * descriptor open. This is to avoid a race condition where two
512 * processes access the reftable stack at the same point in time:
514 * 1. A reads the reftable stack and caches its stat info.
516 * 2. B updates the stack, appending a new table to "tables.list".
517 * This will both use a new inode and result in a different file
518 * size, thus invalidating A's cache in theory.
520 * 3. B decides to auto-compact the stack and merges two tables. The
521 * file size now matches what A has cached again. Furthermore, the
522 * filesystem may decide to recycle the inode number of the file
523 * we have replaced in (2) because it is not in use anymore.
525 * 4. A reloads the reftable stack. Neither the inode number nor the
526 * file size changed. If the timestamps did not change either then
527 * we think the cached copy of our stack is up-to-date.
529 * By keeping the file descriptor open the inode number cannot be
530 * recycled, mitigating the race.
532 if (!err
&& fd
>= 0 && !fstat(fd
, &st
->list_st
) &&
533 st
->list_st
.st_dev
&& st
->list_st
.st_ino
) {
541 free_names(names_after
);
548 static int stack_uptodate(struct reftable_stack
*st
)
555 * When we have cached stat information available then we use it to
556 * verify whether the file has been rewritten.
558 * Note that we explicitly do not want to use `stat_validity_check()`
559 * and friends here because they may end up not comparing the `st_dev`
560 * and `st_ino` fields. These functions thus cannot guarantee that we
561 * indeed still have the same file.
563 if (st
->list_fd
>= 0) {
566 if (stat(st
->list_file
, &list_st
) < 0) {
568 * It's fine for "tables.list" to not exist. In that
569 * case, we have to refresh when the loaded stack has
573 return !!st
->readers_len
;
574 return REFTABLE_IO_ERROR
;
578 * When "tables.list" refers to the same file we can assume
579 * that it didn't change. This is because we always use
580 * rename(3P) to update the file and never write to it
583 if (st
->list_st
.st_dev
== list_st
.st_dev
&&
584 st
->list_st
.st_ino
== list_st
.st_ino
)
588 err
= read_lines(st
->list_file
, &names
);
592 for (i
= 0; i
< st
->readers_len
; i
++) {
598 if (strcmp(st
->readers
[i
]->name
, names
[i
])) {
604 if (names
[st
->merged
->readers_len
]) {
614 int reftable_stack_reload(struct reftable_stack
*st
)
616 int err
= stack_uptodate(st
);
618 return reftable_stack_reload_maybe_reuse(st
, 1);
622 int reftable_stack_add(struct reftable_stack
*st
,
623 int (*write
)(struct reftable_writer
*wr
, void *arg
),
626 int err
= stack_try_add(st
, write
, arg
);
628 if (err
== REFTABLE_OUTDATED_ERROR
) {
629 /* Ignore error return, we want to propagate
630 REFTABLE_OUTDATED_ERROR.
632 reftable_stack_reload(st
);
640 static int format_name(struct reftable_buf
*dest
, uint64_t min
, uint64_t max
)
643 uint32_t rnd
= (uint32_t)git_rand();
644 snprintf(buf
, sizeof(buf
), "0x%012" PRIx64
"-0x%012" PRIx64
"-%08x",
646 reftable_buf_reset(dest
);
647 return reftable_buf_addstr(dest
, buf
);
650 struct reftable_addition
{
651 struct lock_file tables_list_lock
;
652 struct reftable_stack
*stack
;
655 size_t new_tables_len
, new_tables_cap
;
656 uint64_t next_update_index
;
659 #define REFTABLE_ADDITION_INIT {0}
661 static int reftable_stack_init_addition(struct reftable_addition
*add
,
662 struct reftable_stack
*st
,
665 struct reftable_buf lock_file_name
= REFTABLE_BUF_INIT
;
670 err
= hold_lock_file_for_update_timeout(&add
->tables_list_lock
,
673 st
->opts
.lock_timeout_ms
);
675 if (errno
== EEXIST
) {
676 err
= REFTABLE_LOCK_ERROR
;
678 err
= REFTABLE_IO_ERROR
;
682 if (st
->opts
.default_permissions
) {
683 if (chmod(get_lock_file_path(&add
->tables_list_lock
),
684 st
->opts
.default_permissions
) < 0) {
685 err
= REFTABLE_IO_ERROR
;
690 err
= stack_uptodate(st
);
693 if (err
> 0 && flags
& REFTABLE_STACK_NEW_ADDITION_RELOAD
) {
694 err
= reftable_stack_reload_maybe_reuse(add
->stack
, 1);
699 err
= REFTABLE_OUTDATED_ERROR
;
703 add
->next_update_index
= reftable_stack_next_update_index(st
);
706 reftable_addition_close(add
);
707 reftable_buf_release(&lock_file_name
);
711 static void reftable_addition_close(struct reftable_addition
*add
)
713 struct reftable_buf nm
= REFTABLE_BUF_INIT
;
716 for (i
= 0; i
< add
->new_tables_len
; i
++) {
717 if (!stack_filename(&nm
, add
->stack
, add
->new_tables
[i
]))
719 reftable_free(add
->new_tables
[i
]);
720 add
->new_tables
[i
] = NULL
;
722 reftable_free(add
->new_tables
);
723 add
->new_tables
= NULL
;
724 add
->new_tables_len
= 0;
725 add
->new_tables_cap
= 0;
727 rollback_lock_file(&add
->tables_list_lock
);
728 reftable_buf_release(&nm
);
731 void reftable_addition_destroy(struct reftable_addition
*add
)
736 reftable_addition_close(add
);
740 int reftable_addition_commit(struct reftable_addition
*add
)
742 struct reftable_buf table_list
= REFTABLE_BUF_INIT
;
743 int lock_file_fd
= get_lock_file_fd(&add
->tables_list_lock
);
747 if (add
->new_tables_len
== 0)
750 for (i
= 0; i
< add
->stack
->merged
->readers_len
; i
++) {
751 if ((err
= reftable_buf_addstr(&table_list
, add
->stack
->readers
[i
]->name
)) < 0 ||
752 (err
= reftable_buf_addstr(&table_list
, "\n")) < 0)
755 for (i
= 0; i
< add
->new_tables_len
; i
++) {
756 if ((err
= reftable_buf_addstr(&table_list
, add
->new_tables
[i
])) < 0 ||
757 (err
= reftable_buf_addstr(&table_list
, "\n")) < 0)
761 err
= write_in_full(lock_file_fd
, table_list
.buf
, table_list
.len
);
762 reftable_buf_release(&table_list
);
764 err
= REFTABLE_IO_ERROR
;
768 err
= fsync_component(FSYNC_COMPONENT_REFERENCE
, lock_file_fd
);
770 err
= REFTABLE_IO_ERROR
;
774 err
= commit_lock_file(&add
->tables_list_lock
);
776 err
= REFTABLE_IO_ERROR
;
780 /* success, no more state to clean up. */
781 for (i
= 0; i
< add
->new_tables_len
; i
++)
782 reftable_free(add
->new_tables
[i
]);
783 reftable_free(add
->new_tables
);
784 add
->new_tables
= NULL
;
785 add
->new_tables_len
= 0;
786 add
->new_tables_cap
= 0;
788 err
= reftable_stack_reload_maybe_reuse(add
->stack
, 1);
792 if (!add
->stack
->opts
.disable_auto_compact
) {
794 * Auto-compact the stack to keep the number of tables in
795 * control. It is possible that a concurrent writer is already
796 * trying to compact parts of the stack, which would lead to a
797 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
798 * already. This is a benign error though, so we ignore it.
800 err
= reftable_stack_auto_compact(add
->stack
);
801 if (err
< 0 && err
!= REFTABLE_LOCK_ERROR
)
807 reftable_addition_close(add
);
811 int reftable_stack_new_addition(struct reftable_addition
**dest
,
812 struct reftable_stack
*st
,
816 struct reftable_addition empty
= REFTABLE_ADDITION_INIT
;
818 REFTABLE_CALLOC_ARRAY(*dest
, 1);
820 return REFTABLE_OUT_OF_MEMORY_ERROR
;
823 err
= reftable_stack_init_addition(*dest
, st
, flags
);
825 reftable_free(*dest
);
831 static int stack_try_add(struct reftable_stack
*st
,
832 int (*write_table
)(struct reftable_writer
*wr
,
836 struct reftable_addition add
= REFTABLE_ADDITION_INIT
;
837 int err
= reftable_stack_init_addition(&add
, st
, 0);
841 err
= reftable_addition_add(&add
, write_table
, arg
);
845 err
= reftable_addition_commit(&add
);
847 reftable_addition_close(&add
);
851 int reftable_addition_add(struct reftable_addition
*add
,
852 int (*write_table
)(struct reftable_writer
*wr
,
856 struct reftable_buf temp_tab_file_name
= REFTABLE_BUF_INIT
;
857 struct reftable_buf tab_file_name
= REFTABLE_BUF_INIT
;
858 struct reftable_buf next_name
= REFTABLE_BUF_INIT
;
859 struct reftable_writer
*wr
= NULL
;
860 struct tempfile
*tab_file
= NULL
;
864 reftable_buf_reset(&next_name
);
866 err
= format_name(&next_name
, add
->next_update_index
, add
->next_update_index
);
870 err
= stack_filename(&temp_tab_file_name
, add
->stack
, next_name
.buf
);
874 err
= reftable_buf_addstr(&temp_tab_file_name
, ".temp.XXXXXX");
878 tab_file
= mks_tempfile(temp_tab_file_name
.buf
);
880 err
= REFTABLE_IO_ERROR
;
883 if (add
->stack
->opts
.default_permissions
) {
884 if (chmod(get_tempfile_path(tab_file
),
885 add
->stack
->opts
.default_permissions
)) {
886 err
= REFTABLE_IO_ERROR
;
890 tab_fd
= get_tempfile_fd(tab_file
);
892 err
= reftable_writer_new(&wr
, reftable_fd_write
, reftable_fd_flush
,
893 &tab_fd
, &add
->stack
->opts
);
897 err
= write_table(wr
, arg
);
901 err
= reftable_writer_close(wr
);
902 if (err
== REFTABLE_EMPTY_TABLE_ERROR
) {
909 err
= close_tempfile_gently(tab_file
);
911 err
= REFTABLE_IO_ERROR
;
915 if (wr
->min_update_index
< add
->next_update_index
) {
916 err
= REFTABLE_API_ERROR
;
920 err
= format_name(&next_name
, wr
->min_update_index
, wr
->max_update_index
);
924 err
= reftable_buf_addstr(&next_name
, ".ref");
928 err
= stack_filename(&tab_file_name
, add
->stack
, next_name
.buf
);
933 On windows, this relies on rand() picking a unique destination name.
934 Maybe we should do retry loop as well?
936 err
= rename_tempfile(&tab_file
, tab_file_name
.buf
);
938 err
= REFTABLE_IO_ERROR
;
942 REFTABLE_ALLOC_GROW(add
->new_tables
, add
->new_tables_len
+ 1,
943 add
->new_tables_cap
);
944 if (!add
->new_tables
) {
945 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
948 add
->new_tables
[add
->new_tables_len
++] = reftable_buf_detach(&next_name
);
951 delete_tempfile(&tab_file
);
952 reftable_buf_release(&temp_tab_file_name
);
953 reftable_buf_release(&tab_file_name
);
954 reftable_buf_release(&next_name
);
955 reftable_writer_free(wr
);
959 uint64_t reftable_stack_next_update_index(struct reftable_stack
*st
)
961 int sz
= st
->merged
->readers_len
;
963 return reftable_reader_max_update_index(st
->readers
[sz
- 1]) +
968 static int stack_compact_locked(struct reftable_stack
*st
,
969 size_t first
, size_t last
,
970 struct reftable_log_expiry_config
*config
,
971 struct tempfile
**tab_file_out
)
973 struct reftable_buf next_name
= REFTABLE_BUF_INIT
;
974 struct reftable_buf tab_file_path
= REFTABLE_BUF_INIT
;
975 struct reftable_writer
*wr
= NULL
;
976 struct tempfile
*tab_file
;
979 err
= format_name(&next_name
, reftable_reader_min_update_index(st
->readers
[first
]),
980 reftable_reader_max_update_index(st
->readers
[last
]));
984 err
= stack_filename(&tab_file_path
, st
, next_name
.buf
);
988 err
= reftable_buf_addstr(&tab_file_path
, ".temp.XXXXXX");
992 tab_file
= mks_tempfile(tab_file_path
.buf
);
994 err
= REFTABLE_IO_ERROR
;
997 tab_fd
= get_tempfile_fd(tab_file
);
999 if (st
->opts
.default_permissions
&&
1000 chmod(get_tempfile_path(tab_file
), st
->opts
.default_permissions
) < 0) {
1001 err
= REFTABLE_IO_ERROR
;
1005 err
= reftable_writer_new(&wr
, reftable_fd_write
, reftable_fd_flush
,
1006 &tab_fd
, &st
->opts
);
1010 err
= stack_write_compact(st
, wr
, first
, last
, config
);
1014 err
= reftable_writer_close(wr
);
1018 err
= close_tempfile_gently(tab_file
);
1022 *tab_file_out
= tab_file
;
1026 delete_tempfile(&tab_file
);
1027 reftable_writer_free(wr
);
1028 reftable_buf_release(&next_name
);
1029 reftable_buf_release(&tab_file_path
);
1033 static int stack_write_compact(struct reftable_stack
*st
,
1034 struct reftable_writer
*wr
,
1035 size_t first
, size_t last
,
1036 struct reftable_log_expiry_config
*config
)
1038 struct reftable_merged_table
*mt
= NULL
;
1039 struct reftable_iterator it
= { NULL
};
1040 struct reftable_ref_record ref
= { NULL
};
1041 struct reftable_log_record log
= { NULL
};
1042 size_t subtabs_len
= last
- first
+ 1;
1043 uint64_t entries
= 0;
1046 for (size_t i
= first
; i
<= last
; i
++)
1047 st
->stats
.bytes
+= st
->readers
[i
]->size
;
1048 reftable_writer_set_limits(wr
, st
->readers
[first
]->min_update_index
,
1049 st
->readers
[last
]->max_update_index
);
1051 err
= reftable_merged_table_new(&mt
, st
->readers
+ first
, subtabs_len
,
1056 err
= merged_table_init_iter(mt
, &it
, BLOCK_TYPE_REF
);
1060 err
= reftable_iterator_seek_ref(&it
, "");
1065 err
= reftable_iterator_next_ref(&it
, &ref
);
1073 if (first
== 0 && reftable_ref_record_is_deletion(&ref
)) {
1077 err
= reftable_writer_add_ref(wr
, &ref
);
1082 reftable_iterator_destroy(&it
);
1084 err
= merged_table_init_iter(mt
, &it
, BLOCK_TYPE_LOG
);
1088 err
= reftable_iterator_seek_log(&it
, "");
1093 err
= reftable_iterator_next_log(&it
, &log
);
1100 if (first
== 0 && reftable_log_record_is_deletion(&log
)) {
1104 if (config
&& config
->min_update_index
> 0 &&
1105 log
.update_index
< config
->min_update_index
) {
1109 if (config
&& config
->time
> 0 &&
1110 log
.value
.update
.time
< config
->time
) {
1114 err
= reftable_writer_add_log(wr
, &log
);
1121 reftable_iterator_destroy(&it
);
1123 reftable_merged_table_free(mt
);
1124 reftable_ref_record_release(&ref
);
1125 reftable_log_record_release(&log
);
1126 st
->stats
.entries_written
+= entries
;
1130 enum stack_compact_range_flags
{
1132 * Perform a best-effort compaction. That is, even if we cannot lock
1133 * all tables in the specified range, we will try to compact the
1136 STACK_COMPACT_RANGE_BEST_EFFORT
= (1 << 0),
1140 * Compact all tables in the range `[first, last)` into a single new table.
1142 * This function returns `0` on success or a code `< 0` on failure. When the
1143 * stack or any of the tables in the specified range are already locked then
1144 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
1145 * callers can either ignore, or they may choose to retry compaction after some
1148 static int stack_compact_range(struct reftable_stack
*st
,
1149 size_t first
, size_t last
,
1150 struct reftable_log_expiry_config
*expiry
,
1153 struct reftable_buf tables_list_buf
= REFTABLE_BUF_INIT
;
1154 struct reftable_buf new_table_name
= REFTABLE_BUF_INIT
;
1155 struct reftable_buf new_table_path
= REFTABLE_BUF_INIT
;
1156 struct reftable_buf table_name
= REFTABLE_BUF_INIT
;
1157 struct lock_file tables_list_lock
= LOCK_INIT
;
1158 struct lock_file
*table_locks
= NULL
;
1159 struct tempfile
*new_table
= NULL
;
1160 int is_empty_table
= 0, err
= 0;
1161 size_t first_to_replace
, last_to_replace
;
1162 size_t i
, nlocks
= 0;
1163 char **names
= NULL
;
1165 if (first
> last
|| (!expiry
&& first
== last
)) {
1170 st
->stats
.attempts
++;
1173 * Hold the lock so that we can read "tables.list" and lock all tables
1174 * which are part of the user-specified range.
1176 err
= hold_lock_file_for_update_timeout(&tables_list_lock
,
1179 st
->opts
.lock_timeout_ms
);
1181 if (errno
== EEXIST
)
1182 err
= REFTABLE_LOCK_ERROR
;
1184 err
= REFTABLE_IO_ERROR
;
1188 err
= stack_uptodate(st
);
1193 * Lock all tables in the user-provided range. This is the slice of our
1194 * stack which we'll compact.
1196 * Note that we lock tables in reverse order from last to first. The
1197 * intent behind this is to allow a newer process to perform best
1198 * effort compaction of tables that it has added in the case where an
1199 * older process is still busy compacting tables which are preexisting
1200 * from the point of view of the newer process.
1202 REFTABLE_CALLOC_ARRAY(table_locks
, last
- first
+ 1);
1204 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
1208 for (i
= last
+ 1; i
> first
; i
--) {
1209 err
= stack_filename(&table_name
, st
, reader_name(st
->readers
[i
- 1]));
1213 err
= hold_lock_file_for_update(&table_locks
[nlocks
],
1214 table_name
.buf
, LOCK_NO_DEREF
);
1217 * When the table is locked already we may do a
1218 * best-effort compaction and compact only the tables
1219 * that we have managed to lock so far. This of course
1220 * requires that we have been able to lock at least two
1221 * tables, otherwise there would be nothing to compact.
1222 * In that case, we return a lock error to our caller.
1224 if (errno
== EEXIST
&& last
- (i
- 1) >= 2 &&
1225 flags
& STACK_COMPACT_RANGE_BEST_EFFORT
) {
1228 * The subtraction is to offset the index, the
1229 * addition is to only compact up to the table
1230 * of the preceding iteration. They obviously
1231 * cancel each other out, but that may be
1232 * non-obvious when it was omitted.
1234 first
= (i
- 1) + 1;
1236 } else if (errno
== EEXIST
) {
1237 err
= REFTABLE_LOCK_ERROR
;
1240 err
= REFTABLE_IO_ERROR
;
1246 * We need to close the lockfiles as we might otherwise easily
1247 * run into file descriptor exhaustion when we compress a lot
1250 err
= close_lock_file_gently(&table_locks
[nlocks
++]);
1252 err
= REFTABLE_IO_ERROR
;
1258 * We have locked all tables in our range and can thus release the
1259 * "tables.list" lock while compacting the locked tables. This allows
1260 * concurrent updates to the stack to proceed.
1262 err
= rollback_lock_file(&tables_list_lock
);
1264 err
= REFTABLE_IO_ERROR
;
1269 * Compact the now-locked tables into a new table. Note that compacting
1270 * these tables may end up with an empty new table in case tombstones
1271 * end up cancelling out all refs in that range.
1273 err
= stack_compact_locked(st
, first
, last
, expiry
, &new_table
);
1275 if (err
!= REFTABLE_EMPTY_TABLE_ERROR
)
1281 * Now that we have written the new, compacted table we need to re-lock
1282 * "tables.list". We'll then replace the compacted range of tables with
1285 err
= hold_lock_file_for_update_timeout(&tables_list_lock
,
1288 st
->opts
.lock_timeout_ms
);
1290 if (errno
== EEXIST
)
1291 err
= REFTABLE_LOCK_ERROR
;
1293 err
= REFTABLE_IO_ERROR
;
1297 if (st
->opts
.default_permissions
) {
1298 if (chmod(get_lock_file_path(&tables_list_lock
),
1299 st
->opts
.default_permissions
) < 0) {
1300 err
= REFTABLE_IO_ERROR
;
1306 * As we have unlocked the stack while compacting our slice of tables
1307 * it may have happened that a concurrently running process has updated
1308 * the stack while we were compacting. In that case, we need to check
1309 * whether the tables that we have just compacted still exist in the
1310 * stack in the exact same order as we have compacted them.
1312 * If they do exist, then it is fine to continue and replace those
1313 * tables with our compacted version. If they don't, then we need to
1316 err
= stack_uptodate(st
);
1320 ssize_t new_offset
= -1;
1323 fd
= open(st
->list_file
, O_RDONLY
);
1325 err
= REFTABLE_IO_ERROR
;
1329 err
= fd_read_lines(fd
, &names
);
1335 * Search for the offset of the first table that we have
1336 * compacted in the updated "tables.list" file.
1338 for (size_t i
= 0; names
[i
]; i
++) {
1339 if (strcmp(names
[i
], st
->readers
[first
]->name
))
1343 * We have found the first entry. Verify that all the
1344 * subsequent tables we have compacted still exist in
1345 * the modified stack in the exact same order as we
1346 * have compacted them.
1348 for (size_t j
= 1; j
< last
- first
+ 1; j
++) {
1349 const char *old
= first
+ j
< st
->merged
->readers_len
?
1350 st
->readers
[first
+ j
]->name
: NULL
;
1351 const char *new = names
[i
+ j
];
1354 * If some entries are missing or in case the tables
1355 * have changed then we need to bail out. Again, this
1356 * shouldn't ever happen because we have locked the
1357 * tables we are compacting.
1359 if (!old
|| !new || strcmp(old
, new)) {
1360 err
= REFTABLE_OUTDATED_ERROR
;
1370 * In case we didn't find our compacted tables in the stack we
1371 * need to bail out. In theory, this should have never happened
1372 * because we locked the tables we are compacting.
1374 if (new_offset
< 0) {
1375 err
= REFTABLE_OUTDATED_ERROR
;
1380 * We have found the new range that we want to replace, so
1381 * let's update the range of tables that we want to replace.
1383 first_to_replace
= new_offset
;
1384 last_to_replace
= last
+ (new_offset
- first
);
1387 * `fd_read_lines()` uses a `NULL` sentinel to indicate that
1388 * the array is at its end. As we use `free_names()` to free
1389 * the array, we need to include this sentinel value here and
1390 * thus have to allocate `readers_len + 1` many entries.
1392 REFTABLE_CALLOC_ARRAY(names
, st
->merged
->readers_len
+ 1);
1394 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
1398 for (size_t i
= 0; i
< st
->merged
->readers_len
; i
++) {
1399 names
[i
] = reftable_strdup(st
->readers
[i
]->name
);
1401 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
1405 first_to_replace
= first
;
1406 last_to_replace
= last
;
1410 * If the resulting compacted table is not empty, then we need to move
1411 * it into place now.
1413 if (!is_empty_table
) {
1414 err
= format_name(&new_table_name
, st
->readers
[first
]->min_update_index
,
1415 st
->readers
[last
]->max_update_index
);
1419 err
= reftable_buf_addstr(&new_table_name
, ".ref");
1423 err
= stack_filename(&new_table_path
, st
, new_table_name
.buf
);
1427 err
= rename_tempfile(&new_table
, new_table_path
.buf
);
1429 err
= REFTABLE_IO_ERROR
;
1435 * Write the new "tables.list" contents with the compacted table we
1436 * have just written. In case the compacted table became empty we
1437 * simply skip writing it.
1439 for (i
= 0; i
< first_to_replace
; i
++) {
1440 if ((err
= reftable_buf_addstr(&tables_list_buf
, names
[i
])) < 0 ||
1441 (err
= reftable_buf_addstr(&tables_list_buf
, "\n")) < 0)
1444 if (!is_empty_table
) {
1445 if ((err
= reftable_buf_addstr(&tables_list_buf
, new_table_name
.buf
)) < 0 ||
1446 (err
= reftable_buf_addstr(&tables_list_buf
, "\n")) < 0)
1449 for (i
= last_to_replace
+ 1; names
[i
]; i
++) {
1450 if ((err
= reftable_buf_addstr(&tables_list_buf
, names
[i
])) < 0 ||
1451 (err
= reftable_buf_addstr(&tables_list_buf
, "\n")) < 0)
1455 err
= write_in_full(get_lock_file_fd(&tables_list_lock
),
1456 tables_list_buf
.buf
, tables_list_buf
.len
);
1458 err
= REFTABLE_IO_ERROR
;
1459 unlink(new_table_path
.buf
);
1463 err
= fsync_component(FSYNC_COMPONENT_REFERENCE
, get_lock_file_fd(&tables_list_lock
));
1465 err
= REFTABLE_IO_ERROR
;
1466 unlink(new_table_path
.buf
);
1470 err
= commit_lock_file(&tables_list_lock
);
1472 err
= REFTABLE_IO_ERROR
;
1473 unlink(new_table_path
.buf
);
1478 * Reload the stack before deleting the compacted tables. We can only
1479 * delete the files after we closed them on Windows, so this needs to
1482 err
= reftable_stack_reload_maybe_reuse(st
, first
< last
);
1487 * Delete the old tables. They may still be in use by concurrent
1488 * readers, so it is expected that unlinking tables may fail.
1490 for (i
= 0; i
< nlocks
; i
++) {
1491 struct lock_file
*table_lock
= &table_locks
[i
];
1492 char *table_path
= get_locked_file_path(table_lock
);
1494 reftable_free(table_path
);
1498 rollback_lock_file(&tables_list_lock
);
1499 for (i
= 0; table_locks
&& i
< nlocks
; i
++)
1500 rollback_lock_file(&table_locks
[i
]);
1501 reftable_free(table_locks
);
1503 delete_tempfile(&new_table
);
1504 reftable_buf_release(&new_table_name
);
1505 reftable_buf_release(&new_table_path
);
1506 reftable_buf_release(&tables_list_buf
);
1507 reftable_buf_release(&table_name
);
1510 if (err
== REFTABLE_LOCK_ERROR
)
1511 st
->stats
.failures
++;
1516 int reftable_stack_compact_all(struct reftable_stack
*st
,
1517 struct reftable_log_expiry_config
*config
)
1519 size_t last
= st
->merged
->readers_len
? st
->merged
->readers_len
- 1 : 0;
1520 return stack_compact_range(st
, 0, last
, config
, 0);
1523 static int segment_size(struct segment
*s
)
1525 return s
->end
- s
->start
;
1528 struct segment
suggest_compaction_segment(uint64_t *sizes
, size_t n
,
1531 struct segment seg
= { 0 };
1536 factor
= DEFAULT_GEOMETRIC_FACTOR
;
1539 * If there are no tables or only a single one then we don't have to
1540 * compact anything. The sequence is geometric by definition already.
1546 * Find the ending table of the compaction segment needed to restore the
1547 * geometric sequence. Note that the segment end is exclusive.
1549 * To do so, we iterate backwards starting from the most recent table
1550 * until a valid segment end is found. If the preceding table is smaller
1551 * than the current table multiplied by the geometric factor (2), the
1552 * compaction segment end has been identified.
1554 * Tables after the ending point are not added to the byte count because
1555 * they are already valid members of the geometric sequence. Due to the
1556 * properties of a geometric sequence, it is not possible for the sum of
1557 * these tables to exceed the value of the ending point table.
1559 * Example table size sequence requiring no compaction:
1560 * 64, 32, 16, 8, 4, 2, 1
1562 * Example table size sequence where compaction segment end is set to
1563 * the last table. Since the segment end is exclusive, the last table is
1564 * excluded during subsequent compaction and the table with size 3 is
1565 * the final table included:
1566 * 64, 32, 16, 8, 4, 3, 1
1568 for (i
= n
- 1; i
> 0; i
--) {
1569 if (sizes
[i
- 1] < sizes
[i
] * factor
) {
1577 * Find the starting table of the compaction segment by iterating
1578 * through the remaining tables and keeping track of the accumulated
1579 * size of all tables seen from the segment end table. The previous
1580 * table is compared to the accumulated size because the tables from the
1581 * segment end are merged backwards recursively.
1583 * Note that we keep iterating even after we have found the first
1584 * starting point. This is because there may be tables in the stack
1585 * preceding that first starting point which violate the geometric
1588 * Example compaction segment start set to table with size 32:
1589 * 128, 32, 16, 8, 4, 3, 1
1591 for (; i
> 0; i
--) {
1592 uint64_t curr
= bytes
;
1593 bytes
+= sizes
[i
- 1];
1595 if (sizes
[i
- 1] < curr
* factor
) {
1604 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack
*st
)
1606 int version
= (st
->opts
.hash_id
== GIT_SHA1_FORMAT_ID
) ? 1 : 2;
1607 int overhead
= header_size(version
) - 1;
1610 REFTABLE_CALLOC_ARRAY(sizes
, st
->merged
->readers_len
);
1614 for (size_t i
= 0; i
< st
->merged
->readers_len
; i
++)
1615 sizes
[i
] = st
->readers
[i
]->size
- overhead
;
1620 int reftable_stack_auto_compact(struct reftable_stack
*st
)
1625 sizes
= stack_table_sizes_for_compaction(st
);
1627 return REFTABLE_OUT_OF_MEMORY_ERROR
;
1629 seg
= suggest_compaction_segment(sizes
, st
->merged
->readers_len
,
1630 st
->opts
.auto_compaction_factor
);
1631 reftable_free(sizes
);
1633 if (segment_size(&seg
) > 0)
1634 return stack_compact_range(st
, seg
.start
, seg
.end
- 1,
1635 NULL
, STACK_COMPACT_RANGE_BEST_EFFORT
);
1640 struct reftable_compaction_stats
*
1641 reftable_stack_compaction_stats(struct reftable_stack
*st
)
1646 int reftable_stack_read_ref(struct reftable_stack
*st
, const char *refname
,
1647 struct reftable_ref_record
*ref
)
1649 struct reftable_iterator it
= { 0 };
1652 ret
= reftable_merged_table_init_ref_iterator(st
->merged
, &it
);
1656 ret
= reftable_iterator_seek_ref(&it
, refname
);
1660 ret
= reftable_iterator_next_ref(&it
, ref
);
1664 if (strcmp(ref
->refname
, refname
) ||
1665 reftable_ref_record_is_deletion(ref
)) {
1666 reftable_ref_record_release(ref
);
1672 reftable_iterator_destroy(&it
);
1676 int reftable_stack_read_log(struct reftable_stack
*st
, const char *refname
,
1677 struct reftable_log_record
*log
)
1679 struct reftable_iterator it
= {0};
1682 err
= reftable_stack_init_log_iterator(st
, &it
);
1686 err
= reftable_iterator_seek_log(&it
, refname
);
1690 err
= reftable_iterator_next_log(&it
, log
);
1694 if (strcmp(log
->refname
, refname
) ||
1695 reftable_log_record_is_deletion(log
)) {
1702 reftable_log_record_release(log
);
1704 reftable_iterator_destroy(&it
);
1708 static int is_table_name(const char *s
)
1710 const char *dot
= strrchr(s
, '.');
1711 return dot
&& !strcmp(dot
, ".ref");
1714 static void remove_maybe_stale_table(struct reftable_stack
*st
, uint64_t max
,
1718 uint64_t update_idx
= 0;
1719 struct reftable_block_source src
= { NULL
};
1720 struct reftable_reader
*rd
= NULL
;
1721 struct reftable_buf table_path
= REFTABLE_BUF_INIT
;
1723 err
= stack_filename(&table_path
, st
, name
);
1727 err
= reftable_block_source_from_file(&src
, table_path
.buf
);
1731 err
= reftable_reader_new(&rd
, &src
, name
);
1735 update_idx
= reftable_reader_max_update_index(rd
);
1736 reftable_reader_decref(rd
);
1738 if (update_idx
<= max
) {
1739 unlink(table_path
.buf
);
1742 reftable_buf_release(&table_path
);
1745 static int reftable_stack_clean_locked(struct reftable_stack
*st
)
1747 uint64_t max
= reftable_merged_table_max_update_index(
1748 reftable_stack_merged_table(st
));
1749 DIR *dir
= opendir(st
->reftable_dir
);
1750 struct dirent
*d
= NULL
;
1752 return REFTABLE_IO_ERROR
;
1755 while ((d
= readdir(dir
))) {
1758 if (!is_table_name(d
->d_name
))
1761 for (i
= 0; !found
&& i
< st
->readers_len
; i
++) {
1762 found
= !strcmp(reader_name(st
->readers
[i
]), d
->d_name
);
1767 remove_maybe_stale_table(st
, max
, d
->d_name
);
1774 int reftable_stack_clean(struct reftable_stack
*st
)
1776 struct reftable_addition
*add
= NULL
;
1777 int err
= reftable_stack_new_addition(&add
, st
, 0);
1782 err
= reftable_stack_reload(st
);
1787 err
= reftable_stack_clean_locked(st
);
1790 reftable_addition_destroy(add
);