2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
14 #include "constants.h"
17 #include "reftable-error.h"
19 /* finishes a block, and writes it to storage */
20 static int writer_flush_block(struct reftable_writer
*w
);
22 /* deallocates memory related to the index */
23 static void writer_clear_index(struct reftable_writer
*w
);
25 /* finishes writing a 'r' (refs) or 'g' (reflogs) section */
26 static int writer_finish_public_section(struct reftable_writer
*w
);
28 static struct reftable_block_stats
*
29 writer_reftable_block_stats(struct reftable_writer
*w
, uint8_t typ
)
33 return &w
->stats
.ref_stats
;
35 return &w
->stats
.obj_stats
;
37 return &w
->stats
.idx_stats
;
39 return &w
->stats
.log_stats
;
45 /* write data, queuing the padding for the next write. Returns negative for
47 static int padded_write(struct reftable_writer
*w
, uint8_t *data
, size_t len
,
51 if (w
->pending_padding
> 0) {
55 zeroed
= reftable_calloc(w
->pending_padding
, sizeof(*zeroed
));
59 n
= w
->write(w
->write_arg
, zeroed
, w
->pending_padding
);
63 w
->pending_padding
= 0;
64 reftable_free(zeroed
);
67 w
->pending_padding
= padding
;
68 n
= w
->write(w
->write_arg
, data
, len
);
75 static void options_set_defaults(struct reftable_write_options
*opts
)
77 if (opts
->restart_interval
== 0) {
78 opts
->restart_interval
= 16;
81 if (opts
->hash_id
== 0) {
82 opts
->hash_id
= GIT_SHA1_FORMAT_ID
;
84 if (opts
->block_size
== 0) {
85 opts
->block_size
= DEFAULT_BLOCK_SIZE
;
89 static int writer_version(struct reftable_writer
*w
)
91 return (w
->opts
.hash_id
== 0 || w
->opts
.hash_id
== GIT_SHA1_FORMAT_ID
) ?
96 static int writer_write_header(struct reftable_writer
*w
, uint8_t *dest
)
98 memcpy(dest
, "REFT", 4);
100 dest
[4] = writer_version(w
);
102 put_be24(dest
+ 5, w
->opts
.block_size
);
103 put_be64(dest
+ 8, w
->min_update_index
);
104 put_be64(dest
+ 16, w
->max_update_index
);
105 if (writer_version(w
) == 2) {
106 put_be32(dest
+ 24, w
->opts
.hash_id
);
108 return header_size(writer_version(w
));
111 static int writer_reinit_block_writer(struct reftable_writer
*w
, uint8_t typ
)
113 int block_start
= 0, ret
;
116 block_start
= header_size(writer_version(w
));
118 reftable_buf_reset(&w
->last_key
);
119 ret
= block_writer_init(&w
->block_writer_data
, typ
, w
->block
,
120 w
->opts
.block_size
, block_start
,
121 hash_size(w
->opts
.hash_id
));
125 w
->block_writer
= &w
->block_writer_data
;
126 w
->block_writer
->restart_interval
= w
->opts
.restart_interval
;
131 int reftable_writer_new(struct reftable_writer
**out
,
132 ssize_t (*writer_func
)(void *, const void *, size_t),
133 int (*flush_func
)(void *),
134 void *writer_arg
, const struct reftable_write_options
*_opts
)
136 struct reftable_write_options opts
= {0};
137 struct reftable_writer
*wp
;
139 wp
= reftable_calloc(1, sizeof(*wp
));
141 return REFTABLE_OUT_OF_MEMORY_ERROR
;
145 options_set_defaults(&opts
);
146 if (opts
.block_size
>= (1 << 24))
147 BUG("configured block size exceeds 16MB");
149 reftable_buf_init(&wp
->block_writer_data
.last_key
);
150 reftable_buf_init(&wp
->last_key
);
151 REFTABLE_CALLOC_ARRAY(wp
->block
, opts
.block_size
);
154 return REFTABLE_OUT_OF_MEMORY_ERROR
;
156 wp
->write
= writer_func
;
157 wp
->write_arg
= writer_arg
;
159 wp
->flush
= flush_func
;
160 writer_reinit_block_writer(wp
, BLOCK_TYPE_REF
);
167 void reftable_writer_set_limits(struct reftable_writer
*w
, uint64_t min
,
170 w
->min_update_index
= min
;
171 w
->max_update_index
= max
;
174 static void writer_release(struct reftable_writer
*w
)
177 reftable_free(w
->block
);
179 block_writer_release(&w
->block_writer_data
);
180 w
->block_writer
= NULL
;
181 writer_clear_index(w
);
182 reftable_buf_release(&w
->last_key
);
186 void reftable_writer_free(struct reftable_writer
*w
)
192 struct obj_index_tree_node
{
193 struct reftable_buf hash
;
199 #define OBJ_INDEX_TREE_NODE_INIT \
201 .hash = REFTABLE_BUF_INIT \
204 static int obj_index_tree_node_compare(const void *a
, const void *b
)
206 return reftable_buf_cmp(&((const struct obj_index_tree_node
*)a
)->hash
,
207 &((const struct obj_index_tree_node
*)b
)->hash
);
210 static int writer_index_hash(struct reftable_writer
*w
, struct reftable_buf
*hash
)
212 uint64_t off
= w
->next
;
213 struct obj_index_tree_node want
= { .hash
= *hash
};
214 struct obj_index_tree_node
*key
;
215 struct tree_node
*node
;
217 node
= tree_search(w
->obj_index_tree
, &want
, &obj_index_tree_node_compare
);
219 struct obj_index_tree_node empty
= OBJ_INDEX_TREE_NODE_INIT
;
222 key
= reftable_malloc(sizeof(*key
));
224 return REFTABLE_OUT_OF_MEMORY_ERROR
;
228 reftable_buf_reset(&key
->hash
);
229 err
= reftable_buf_add(&key
->hash
, hash
->buf
, hash
->len
);
232 tree_insert(&w
->obj_index_tree
, key
,
233 &obj_index_tree_node_compare
);
238 if (key
->offset_len
> 0 && key
->offsets
[key
->offset_len
- 1] == off
)
241 REFTABLE_ALLOC_GROW(key
->offsets
, key
->offset_len
+ 1, key
->offset_cap
);
243 return REFTABLE_OUT_OF_MEMORY_ERROR
;
244 key
->offsets
[key
->offset_len
++] = off
;
249 static int writer_add_record(struct reftable_writer
*w
,
250 struct reftable_record
*rec
)
252 struct reftable_buf key
= REFTABLE_BUF_INIT
;
255 err
= reftable_record_key(rec
, &key
);
259 if (reftable_buf_cmp(&w
->last_key
, &key
) >= 0) {
260 err
= REFTABLE_API_ERROR
;
264 reftable_buf_reset(&w
->last_key
);
265 err
= reftable_buf_add(&w
->last_key
, key
.buf
, key
.len
);
269 if (!w
->block_writer
) {
270 err
= writer_reinit_block_writer(w
, reftable_record_type(rec
));
275 if (block_writer_type(w
->block_writer
) != reftable_record_type(rec
))
276 BUG("record of type %d added to writer of type %d",
277 reftable_record_type(rec
), block_writer_type(w
->block_writer
));
280 * Try to add the record to the writer. If this succeeds then we're
281 * done. Otherwise the block writer may have hit the block size limit
282 * and needs to be flushed.
284 if (!block_writer_add(w
->block_writer
, rec
)) {
290 * The current block is full, so we need to flush and reinitialize the
291 * writer to start writing the next block.
293 err
= writer_flush_block(w
);
296 err
= writer_reinit_block_writer(w
, reftable_record_type(rec
));
301 * Try to add the record to the writer again. If this still fails then
302 * the record does not fit into the block size.
304 * TODO: it would be great to have `block_writer_add()` return proper
305 * error codes so that we don't have to second-guess the failure
308 err
= block_writer_add(w
->block_writer
, rec
);
310 err
= REFTABLE_ENTRY_TOO_BIG_ERROR
;
315 reftable_buf_release(&key
);
319 int reftable_writer_add_ref(struct reftable_writer
*w
,
320 struct reftable_ref_record
*ref
)
322 struct reftable_record rec
= {
323 .type
= BLOCK_TYPE_REF
,
328 struct reftable_buf buf
= REFTABLE_BUF_INIT
;
332 ref
->update_index
< w
->min_update_index
||
333 ref
->update_index
> w
->max_update_index
)
334 return REFTABLE_API_ERROR
;
336 rec
.u
.ref
.update_index
-= w
->min_update_index
;
338 err
= writer_add_record(w
, &rec
);
342 if (!w
->opts
.skip_index_objects
&& reftable_ref_record_val1(ref
)) {
343 err
= reftable_buf_add(&buf
, (char *)reftable_ref_record_val1(ref
),
344 hash_size(w
->opts
.hash_id
));
348 err
= writer_index_hash(w
, &buf
);
353 if (!w
->opts
.skip_index_objects
&& reftable_ref_record_val2(ref
)) {
354 reftable_buf_reset(&buf
);
355 err
= reftable_buf_add(&buf
, reftable_ref_record_val2(ref
),
356 hash_size(w
->opts
.hash_id
));
360 err
= writer_index_hash(w
, &buf
);
368 reftable_buf_release(&buf
);
372 int reftable_writer_add_refs(struct reftable_writer
*w
,
373 struct reftable_ref_record
*refs
, int n
)
377 QSORT(refs
, n
, reftable_ref_record_compare_name
);
378 for (i
= 0; err
== 0 && i
< n
; i
++) {
379 err
= reftable_writer_add_ref(w
, &refs
[i
]);
384 static int reftable_writer_add_log_verbatim(struct reftable_writer
*w
,
385 struct reftable_log_record
*log
)
387 struct reftable_record rec
= {
388 .type
= BLOCK_TYPE_LOG
,
393 if (w
->block_writer
&&
394 block_writer_type(w
->block_writer
) == BLOCK_TYPE_REF
) {
395 int err
= writer_finish_public_section(w
);
400 w
->next
-= w
->pending_padding
;
401 w
->pending_padding
= 0;
402 return writer_add_record(w
, &rec
);
405 int reftable_writer_add_log(struct reftable_writer
*w
,
406 struct reftable_log_record
*log
)
408 char *input_log_message
= NULL
;
409 struct reftable_buf cleaned_message
= REFTABLE_BUF_INIT
;
412 if (log
->value_type
== REFTABLE_LOG_DELETION
)
413 return reftable_writer_add_log_verbatim(w
, log
);
416 return REFTABLE_API_ERROR
;
418 input_log_message
= log
->value
.update
.message
;
419 if (!w
->opts
.exact_log_message
&& log
->value
.update
.message
) {
420 err
= reftable_buf_addstr(&cleaned_message
, log
->value
.update
.message
);
424 while (cleaned_message
.len
&&
425 cleaned_message
.buf
[cleaned_message
.len
- 1] == '\n') {
426 err
= reftable_buf_setlen(&cleaned_message
,
427 cleaned_message
.len
- 1);
431 if (strchr(cleaned_message
.buf
, '\n')) {
432 /* multiple lines not allowed. */
433 err
= REFTABLE_API_ERROR
;
437 err
= reftable_buf_addstr(&cleaned_message
, "\n");
441 log
->value
.update
.message
= cleaned_message
.buf
;
444 err
= reftable_writer_add_log_verbatim(w
, log
);
445 log
->value
.update
.message
= input_log_message
;
447 reftable_buf_release(&cleaned_message
);
451 int reftable_writer_add_logs(struct reftable_writer
*w
,
452 struct reftable_log_record
*logs
, int n
)
456 QSORT(logs
, n
, reftable_log_record_compare_key
);
458 for (i
= 0; err
== 0 && i
< n
; i
++) {
459 err
= reftable_writer_add_log(w
, &logs
[i
]);
464 static int writer_finish_section(struct reftable_writer
*w
)
466 struct reftable_block_stats
*bstats
= NULL
;
467 uint8_t typ
= block_writer_type(w
->block_writer
);
468 uint64_t index_start
= 0;
470 size_t threshold
= w
->opts
.unpadded
? 1 : 3;
471 int before_blocks
= w
->stats
.idx_stats
.blocks
;
474 err
= writer_flush_block(w
);
479 * When the section we are about to index has a lot of blocks then the
480 * index itself may span across multiple blocks, as well. This would
481 * require a linear scan over index blocks only to find the desired
482 * indexed block, which is inefficient. Instead, we write a multi-level
483 * index where index records of level N+1 will refer to index blocks of
484 * level N. This isn't constant time, either, but at least logarithmic.
486 * This loop handles writing this multi-level index. Note that we write
487 * the lowest-level index pointing to the indexed blocks first. We then
488 * continue writing additional index levels until the current level has
489 * less blocks than the threshold so that the highest level will be at
490 * the end of the index section.
492 * Readers are thus required to start reading the index section from
493 * its end, which is why we set `index_start` to the beginning of the
494 * last index section.
496 while (w
->index_len
> threshold
) {
497 struct reftable_index_record
*idx
= NULL
;
501 index_start
= w
->next
;
502 err
= writer_reinit_block_writer(w
, BLOCK_TYPE_INDEX
);
507 idx_len
= w
->index_len
;
512 for (i
= 0; i
< idx_len
; i
++) {
513 struct reftable_record rec
= {
514 .type
= BLOCK_TYPE_INDEX
,
520 err
= writer_add_record(w
, &rec
);
525 err
= writer_flush_block(w
);
529 for (i
= 0; i
< idx_len
; i
++)
530 reftable_buf_release(&idx
[i
].last_key
);
535 * The index may still contain a number of index blocks lower than the
536 * threshold. Clear it so that these entries don't leak into the next
539 writer_clear_index(w
);
541 bstats
= writer_reftable_block_stats(w
, typ
);
542 bstats
->index_blocks
= w
->stats
.idx_stats
.blocks
- before_blocks
;
543 bstats
->index_offset
= index_start
;
544 bstats
->max_index_level
= max_level
;
546 /* Reinit lastKey, as the next section can start with any key. */
547 reftable_buf_reset(&w
->last_key
);
552 struct common_prefix_arg
{
553 struct reftable_buf
*last
;
557 static void update_common(void *void_arg
, void *key
)
559 struct common_prefix_arg
*arg
= void_arg
;
560 struct obj_index_tree_node
*entry
= key
;
562 int n
= common_prefix_size(&entry
->hash
, arg
->last
);
567 arg
->last
= &entry
->hash
;
570 struct write_record_arg
{
571 struct reftable_writer
*w
;
575 static void write_object_record(void *void_arg
, void *key
)
577 struct write_record_arg
*arg
= void_arg
;
578 struct obj_index_tree_node
*entry
= key
;
579 struct reftable_record
580 rec
= { .type
= BLOCK_TYPE_OBJ
,
582 .hash_prefix
= (uint8_t *)entry
->hash
.buf
,
583 .hash_prefix_len
= arg
->w
->stats
.object_id_len
,
584 .offsets
= entry
->offsets
,
585 .offset_len
= entry
->offset_len
,
590 arg
->err
= block_writer_add(arg
->w
->block_writer
, &rec
);
594 arg
->err
= writer_flush_block(arg
->w
);
598 arg
->err
= writer_reinit_block_writer(arg
->w
, BLOCK_TYPE_OBJ
);
602 arg
->err
= block_writer_add(arg
->w
->block_writer
, &rec
);
606 rec
.u
.obj
.offset_len
= 0;
607 arg
->err
= block_writer_add(arg
->w
->block_writer
, &rec
);
609 /* Should be able to write into a fresh block. */
610 assert(arg
->err
== 0);
615 static void object_record_free(void *void_arg UNUSED
, void *key
)
617 struct obj_index_tree_node
*entry
= key
;
619 REFTABLE_FREE_AND_NULL(entry
->offsets
);
620 reftable_buf_release(&entry
->hash
);
621 reftable_free(entry
);
624 static int writer_dump_object_index(struct reftable_writer
*w
)
626 struct write_record_arg closure
= { .w
= w
};
627 struct common_prefix_arg common
= {
628 .max
= 1, /* obj_id_len should be >= 2. */
632 if (w
->obj_index_tree
)
633 infix_walk(w
->obj_index_tree
, &update_common
, &common
);
634 w
->stats
.object_id_len
= common
.max
+ 1;
636 err
= writer_reinit_block_writer(w
, BLOCK_TYPE_OBJ
);
640 if (w
->obj_index_tree
)
641 infix_walk(w
->obj_index_tree
, &write_object_record
, &closure
);
645 return writer_finish_section(w
);
648 static int writer_finish_public_section(struct reftable_writer
*w
)
653 if (!w
->block_writer
)
656 typ
= block_writer_type(w
->block_writer
);
657 err
= writer_finish_section(w
);
660 if (typ
== BLOCK_TYPE_REF
&& !w
->opts
.skip_index_objects
&&
661 w
->stats
.ref_stats
.index_blocks
> 0) {
662 err
= writer_dump_object_index(w
);
667 if (w
->obj_index_tree
) {
668 infix_walk(w
->obj_index_tree
, &object_record_free
, NULL
);
669 tree_free(w
->obj_index_tree
);
670 w
->obj_index_tree
= NULL
;
673 w
->block_writer
= NULL
;
677 int reftable_writer_close(struct reftable_writer
*w
)
681 int err
= writer_finish_public_section(w
);
682 int empty_table
= w
->next
== 0;
685 w
->pending_padding
= 0;
687 /* Empty tables need a header anyway. */
689 int n
= writer_write_header(w
, header
);
690 err
= padded_write(w
, header
, n
, 0);
695 p
+= writer_write_header(w
, footer
);
696 put_be64(p
, w
->stats
.ref_stats
.index_offset
);
698 put_be64(p
, (w
->stats
.obj_stats
.offset
) << 5 | w
->stats
.object_id_len
);
700 put_be64(p
, w
->stats
.obj_stats
.index_offset
);
703 put_be64(p
, w
->stats
.log_stats
.offset
);
705 put_be64(p
, w
->stats
.log_stats
.index_offset
);
708 put_be32(p
, crc32(0, footer
, p
- footer
));
711 err
= w
->flush(w
->write_arg
);
713 err
= REFTABLE_IO_ERROR
;
717 err
= padded_write(w
, footer
, footer_size(writer_version(w
)), 0);
722 err
= REFTABLE_EMPTY_TABLE_ERROR
;
731 static void writer_clear_index(struct reftable_writer
*w
)
733 for (size_t i
= 0; w
->index
&& i
< w
->index_len
; i
++)
734 reftable_buf_release(&w
->index
[i
].last_key
);
735 REFTABLE_FREE_AND_NULL(w
->index
);
740 static int writer_flush_nonempty_block(struct reftable_writer
*w
)
742 struct reftable_index_record index_record
= {
743 .last_key
= REFTABLE_BUF_INIT
,
745 uint8_t typ
= block_writer_type(w
->block_writer
);
746 struct reftable_block_stats
*bstats
;
747 int raw_bytes
, padding
= 0, err
;
748 uint64_t block_typ_off
;
751 * Finish the current block. This will cause the block writer to emit
752 * restart points and potentially compress records in case we are
753 * writing a log block.
755 * Note that this is still happening in memory.
757 raw_bytes
= block_writer_finish(w
->block_writer
);
762 * By default, all records except for log records are padded to the
765 if (!w
->opts
.unpadded
&& typ
!= BLOCK_TYPE_LOG
)
766 padding
= w
->opts
.block_size
- raw_bytes
;
768 bstats
= writer_reftable_block_stats(w
, typ
);
769 block_typ_off
= (bstats
->blocks
== 0) ? w
->next
: 0;
770 if (block_typ_off
> 0)
771 bstats
->offset
= block_typ_off
;
772 bstats
->entries
+= w
->block_writer
->entries
;
773 bstats
->restarts
+= w
->block_writer
->restart_len
;
778 * If this is the first block we're writing to the table then we need
779 * to also write the reftable header.
782 writer_write_header(w
, w
->block
);
784 err
= padded_write(w
, w
->block
, raw_bytes
, padding
);
789 * Add an index record for every block that we're writing. If we end up
790 * having more than a threshold of index records we will end up writing
791 * an index section in `writer_finish_section()`. Each index record
792 * contains the last record key of the block it is indexing as well as
793 * the offset of that block.
795 * Note that this also applies when flushing index blocks, in which
796 * case we will end up with a multi-level index.
798 REFTABLE_ALLOC_GROW(w
->index
, w
->index_len
+ 1, w
->index_cap
);
800 return REFTABLE_OUT_OF_MEMORY_ERROR
;
802 index_record
.offset
= w
->next
;
803 reftable_buf_reset(&index_record
.last_key
);
804 err
= reftable_buf_add(&index_record
.last_key
, w
->block_writer
->last_key
.buf
,
805 w
->block_writer
->last_key
.len
);
808 w
->index
[w
->index_len
] = index_record
;
811 w
->next
+= padding
+ raw_bytes
;
812 w
->block_writer
= NULL
;
817 static int writer_flush_block(struct reftable_writer
*w
)
819 if (!w
->block_writer
)
821 if (w
->block_writer
->entries
== 0)
823 return writer_flush_nonempty_block(w
);
826 const struct reftable_stats
*reftable_writer_stats(struct reftable_writer
*w
)