Sync with 'maint'
[alt-git.git] / reftable / writer.c
blobfd136794d5a27b33b5017f36fbd6b095ab8dac5b
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "writer.h"
11 #include "system.h"
13 #include "block.h"
14 #include "constants.h"
15 #include "record.h"
16 #include "tree.h"
17 #include "reftable-error.h"
19 /* finishes a block, and writes it to storage */
20 static int writer_flush_block(struct reftable_writer *w);
22 /* deallocates memory related to the index */
23 static void writer_clear_index(struct reftable_writer *w);
25 /* finishes writing a 'r' (refs) or 'g' (reflogs) section */
26 static int writer_finish_public_section(struct reftable_writer *w);
28 static struct reftable_block_stats *
29 writer_reftable_block_stats(struct reftable_writer *w, uint8_t typ)
31 switch (typ) {
32 case 'r':
33 return &w->stats.ref_stats;
34 case 'o':
35 return &w->stats.obj_stats;
36 case 'i':
37 return &w->stats.idx_stats;
38 case 'g':
39 return &w->stats.log_stats;
41 abort();
42 return NULL;
45 /* write data, queuing the padding for the next write. Returns negative for
46 * error. */
47 static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len,
48 int padding)
50 int n = 0;
51 if (w->pending_padding > 0) {
52 uint8_t *zeroed;
53 int n;
55 zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed));
56 if (!zeroed)
57 return -1;
59 n = w->write(w->write_arg, zeroed, w->pending_padding);
60 if (n < 0)
61 return n;
63 w->pending_padding = 0;
64 reftable_free(zeroed);
67 w->pending_padding = padding;
68 n = w->write(w->write_arg, data, len);
69 if (n < 0)
70 return n;
71 n += padding;
72 return 0;
75 static void options_set_defaults(struct reftable_write_options *opts)
77 if (opts->restart_interval == 0) {
78 opts->restart_interval = 16;
81 if (opts->hash_id == 0) {
82 opts->hash_id = GIT_SHA1_FORMAT_ID;
84 if (opts->block_size == 0) {
85 opts->block_size = DEFAULT_BLOCK_SIZE;
89 static int writer_version(struct reftable_writer *w)
91 return (w->opts.hash_id == 0 || w->opts.hash_id == GIT_SHA1_FORMAT_ID) ?
92 1 :
96 static int writer_write_header(struct reftable_writer *w, uint8_t *dest)
98 memcpy(dest, "REFT", 4);
100 dest[4] = writer_version(w);
102 put_be24(dest + 5, w->opts.block_size);
103 put_be64(dest + 8, w->min_update_index);
104 put_be64(dest + 16, w->max_update_index);
105 if (writer_version(w) == 2) {
106 put_be32(dest + 24, w->opts.hash_id);
108 return header_size(writer_version(w));
111 static int writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ)
113 int block_start = 0, ret;
115 if (w->next == 0)
116 block_start = header_size(writer_version(w));
118 reftable_buf_reset(&w->last_key);
119 ret = block_writer_init(&w->block_writer_data, typ, w->block,
120 w->opts.block_size, block_start,
121 hash_size(w->opts.hash_id));
122 if (ret < 0)
123 return ret;
125 w->block_writer = &w->block_writer_data;
126 w->block_writer->restart_interval = w->opts.restart_interval;
128 return 0;
131 int reftable_writer_new(struct reftable_writer **out,
132 ssize_t (*writer_func)(void *, const void *, size_t),
133 int (*flush_func)(void *),
134 void *writer_arg, const struct reftable_write_options *_opts)
136 struct reftable_write_options opts = {0};
137 struct reftable_writer *wp;
139 wp = reftable_calloc(1, sizeof(*wp));
140 if (!wp)
141 return REFTABLE_OUT_OF_MEMORY_ERROR;
143 if (_opts)
144 opts = *_opts;
145 options_set_defaults(&opts);
146 if (opts.block_size >= (1 << 24))
147 BUG("configured block size exceeds 16MB");
149 reftable_buf_init(&wp->block_writer_data.last_key);
150 reftable_buf_init(&wp->last_key);
151 REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
152 if (!wp->block) {
153 reftable_free(wp);
154 return REFTABLE_OUT_OF_MEMORY_ERROR;
156 wp->write = writer_func;
157 wp->write_arg = writer_arg;
158 wp->opts = opts;
159 wp->flush = flush_func;
160 writer_reinit_block_writer(wp, BLOCK_TYPE_REF);
162 *out = wp;
164 return 0;
167 void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min,
168 uint64_t max)
170 w->min_update_index = min;
171 w->max_update_index = max;
174 static void writer_release(struct reftable_writer *w)
176 if (w) {
177 reftable_free(w->block);
178 w->block = NULL;
179 block_writer_release(&w->block_writer_data);
180 w->block_writer = NULL;
181 writer_clear_index(w);
182 reftable_buf_release(&w->last_key);
186 void reftable_writer_free(struct reftable_writer *w)
188 writer_release(w);
189 reftable_free(w);
192 struct obj_index_tree_node {
193 struct reftable_buf hash;
194 uint64_t *offsets;
195 size_t offset_len;
196 size_t offset_cap;
199 #define OBJ_INDEX_TREE_NODE_INIT \
201 .hash = REFTABLE_BUF_INIT \
204 static int obj_index_tree_node_compare(const void *a, const void *b)
206 return reftable_buf_cmp(&((const struct obj_index_tree_node *)a)->hash,
207 &((const struct obj_index_tree_node *)b)->hash);
210 static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *hash)
212 uint64_t off = w->next;
213 struct obj_index_tree_node want = { .hash = *hash };
214 struct obj_index_tree_node *key;
215 struct tree_node *node;
217 node = tree_search(w->obj_index_tree, &want, &obj_index_tree_node_compare);
218 if (!node) {
219 struct obj_index_tree_node empty = OBJ_INDEX_TREE_NODE_INIT;
220 int err;
222 key = reftable_malloc(sizeof(*key));
223 if (!key)
224 return REFTABLE_OUT_OF_MEMORY_ERROR;
226 *key = empty;
228 reftable_buf_reset(&key->hash);
229 err = reftable_buf_add(&key->hash, hash->buf, hash->len);
230 if (err < 0)
231 return err;
232 tree_insert(&w->obj_index_tree, key,
233 &obj_index_tree_node_compare);
234 } else {
235 key = node->key;
238 if (key->offset_len > 0 && key->offsets[key->offset_len - 1] == off)
239 return 0;
241 REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap);
242 if (!key->offsets)
243 return REFTABLE_OUT_OF_MEMORY_ERROR;
244 key->offsets[key->offset_len++] = off;
246 return 0;
249 static int writer_add_record(struct reftable_writer *w,
250 struct reftable_record *rec)
252 struct reftable_buf key = REFTABLE_BUF_INIT;
253 int err;
255 err = reftable_record_key(rec, &key);
256 if (err < 0)
257 goto done;
259 if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
260 err = REFTABLE_API_ERROR;
261 goto done;
264 reftable_buf_reset(&w->last_key);
265 err = reftable_buf_add(&w->last_key, key.buf, key.len);
266 if (err < 0)
267 goto done;
269 if (!w->block_writer) {
270 err = writer_reinit_block_writer(w, reftable_record_type(rec));
271 if (err < 0)
272 goto done;
275 if (block_writer_type(w->block_writer) != reftable_record_type(rec))
276 BUG("record of type %d added to writer of type %d",
277 reftable_record_type(rec), block_writer_type(w->block_writer));
280 * Try to add the record to the writer. If this succeeds then we're
281 * done. Otherwise the block writer may have hit the block size limit
282 * and needs to be flushed.
284 if (!block_writer_add(w->block_writer, rec)) {
285 err = 0;
286 goto done;
290 * The current block is full, so we need to flush and reinitialize the
291 * writer to start writing the next block.
293 err = writer_flush_block(w);
294 if (err < 0)
295 goto done;
296 err = writer_reinit_block_writer(w, reftable_record_type(rec));
297 if (err < 0)
298 goto done;
301 * Try to add the record to the writer again. If this still fails then
302 * the record does not fit into the block size.
304 * TODO: it would be great to have `block_writer_add()` return proper
305 * error codes so that we don't have to second-guess the failure
306 * mode here.
308 err = block_writer_add(w->block_writer, rec);
309 if (err) {
310 err = REFTABLE_ENTRY_TOO_BIG_ERROR;
311 goto done;
314 done:
315 reftable_buf_release(&key);
316 return err;
319 int reftable_writer_add_ref(struct reftable_writer *w,
320 struct reftable_ref_record *ref)
322 struct reftable_record rec = {
323 .type = BLOCK_TYPE_REF,
324 .u = {
325 .ref = *ref
328 struct reftable_buf buf = REFTABLE_BUF_INIT;
329 int err;
331 if (!ref->refname ||
332 ref->update_index < w->min_update_index ||
333 ref->update_index > w->max_update_index)
334 return REFTABLE_API_ERROR;
336 rec.u.ref.update_index -= w->min_update_index;
338 err = writer_add_record(w, &rec);
339 if (err < 0)
340 goto out;
342 if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
343 err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
344 hash_size(w->opts.hash_id));
345 if (err < 0)
346 goto out;
348 err = writer_index_hash(w, &buf);
349 if (err < 0)
350 goto out;
353 if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
354 reftable_buf_reset(&buf);
355 err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
356 hash_size(w->opts.hash_id));
357 if (err < 0)
358 goto out;
360 err = writer_index_hash(w, &buf);
361 if (err < 0)
362 goto out;
365 err = 0;
367 out:
368 reftable_buf_release(&buf);
369 return err;
372 int reftable_writer_add_refs(struct reftable_writer *w,
373 struct reftable_ref_record *refs, int n)
375 int err = 0;
376 int i = 0;
377 QSORT(refs, n, reftable_ref_record_compare_name);
378 for (i = 0; err == 0 && i < n; i++) {
379 err = reftable_writer_add_ref(w, &refs[i]);
381 return err;
384 static int reftable_writer_add_log_verbatim(struct reftable_writer *w,
385 struct reftable_log_record *log)
387 struct reftable_record rec = {
388 .type = BLOCK_TYPE_LOG,
389 .u = {
390 .log = *log,
393 if (w->block_writer &&
394 block_writer_type(w->block_writer) == BLOCK_TYPE_REF) {
395 int err = writer_finish_public_section(w);
396 if (err < 0)
397 return err;
400 w->next -= w->pending_padding;
401 w->pending_padding = 0;
402 return writer_add_record(w, &rec);
405 int reftable_writer_add_log(struct reftable_writer *w,
406 struct reftable_log_record *log)
408 char *input_log_message = NULL;
409 struct reftable_buf cleaned_message = REFTABLE_BUF_INIT;
410 int err = 0;
412 if (log->value_type == REFTABLE_LOG_DELETION)
413 return reftable_writer_add_log_verbatim(w, log);
415 if (!log->refname)
416 return REFTABLE_API_ERROR;
418 input_log_message = log->value.update.message;
419 if (!w->opts.exact_log_message && log->value.update.message) {
420 err = reftable_buf_addstr(&cleaned_message, log->value.update.message);
421 if (err < 0)
422 goto done;
424 while (cleaned_message.len &&
425 cleaned_message.buf[cleaned_message.len - 1] == '\n') {
426 err = reftable_buf_setlen(&cleaned_message,
427 cleaned_message.len - 1);
428 if (err < 0)
429 goto done;
431 if (strchr(cleaned_message.buf, '\n')) {
432 /* multiple lines not allowed. */
433 err = REFTABLE_API_ERROR;
434 goto done;
437 err = reftable_buf_addstr(&cleaned_message, "\n");
438 if (err < 0)
439 goto done;
441 log->value.update.message = cleaned_message.buf;
444 err = reftable_writer_add_log_verbatim(w, log);
445 log->value.update.message = input_log_message;
446 done:
447 reftable_buf_release(&cleaned_message);
448 return err;
451 int reftable_writer_add_logs(struct reftable_writer *w,
452 struct reftable_log_record *logs, int n)
454 int err = 0;
455 int i = 0;
456 QSORT(logs, n, reftable_log_record_compare_key);
458 for (i = 0; err == 0 && i < n; i++) {
459 err = reftable_writer_add_log(w, &logs[i]);
461 return err;
464 static int writer_finish_section(struct reftable_writer *w)
466 struct reftable_block_stats *bstats = NULL;
467 uint8_t typ = block_writer_type(w->block_writer);
468 uint64_t index_start = 0;
469 int max_level = 0;
470 size_t threshold = w->opts.unpadded ? 1 : 3;
471 int before_blocks = w->stats.idx_stats.blocks;
472 int err;
474 err = writer_flush_block(w);
475 if (err < 0)
476 return err;
479 * When the section we are about to index has a lot of blocks then the
480 * index itself may span across multiple blocks, as well. This would
481 * require a linear scan over index blocks only to find the desired
482 * indexed block, which is inefficient. Instead, we write a multi-level
483 * index where index records of level N+1 will refer to index blocks of
484 * level N. This isn't constant time, either, but at least logarithmic.
486 * This loop handles writing this multi-level index. Note that we write
487 * the lowest-level index pointing to the indexed blocks first. We then
488 * continue writing additional index levels until the current level has
489 * less blocks than the threshold so that the highest level will be at
490 * the end of the index section.
492 * Readers are thus required to start reading the index section from
493 * its end, which is why we set `index_start` to the beginning of the
494 * last index section.
496 while (w->index_len > threshold) {
497 struct reftable_index_record *idx = NULL;
498 size_t i, idx_len;
500 max_level++;
501 index_start = w->next;
502 err = writer_reinit_block_writer(w, BLOCK_TYPE_INDEX);
503 if (err < 0)
504 return err;
506 idx = w->index;
507 idx_len = w->index_len;
509 w->index = NULL;
510 w->index_len = 0;
511 w->index_cap = 0;
512 for (i = 0; i < idx_len; i++) {
513 struct reftable_record rec = {
514 .type = BLOCK_TYPE_INDEX,
515 .u = {
516 .idx = idx[i],
520 err = writer_add_record(w, &rec);
521 if (err < 0)
522 return err;
525 err = writer_flush_block(w);
526 if (err < 0)
527 return err;
529 for (i = 0; i < idx_len; i++)
530 reftable_buf_release(&idx[i].last_key);
531 reftable_free(idx);
535 * The index may still contain a number of index blocks lower than the
536 * threshold. Clear it so that these entries don't leak into the next
537 * index section.
539 writer_clear_index(w);
541 bstats = writer_reftable_block_stats(w, typ);
542 bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
543 bstats->index_offset = index_start;
544 bstats->max_index_level = max_level;
546 /* Reinit lastKey, as the next section can start with any key. */
547 reftable_buf_reset(&w->last_key);
549 return 0;
552 struct common_prefix_arg {
553 struct reftable_buf *last;
554 int max;
557 static void update_common(void *void_arg, void *key)
559 struct common_prefix_arg *arg = void_arg;
560 struct obj_index_tree_node *entry = key;
561 if (arg->last) {
562 int n = common_prefix_size(&entry->hash, arg->last);
563 if (n > arg->max) {
564 arg->max = n;
567 arg->last = &entry->hash;
570 struct write_record_arg {
571 struct reftable_writer *w;
572 int err;
575 static void write_object_record(void *void_arg, void *key)
577 struct write_record_arg *arg = void_arg;
578 struct obj_index_tree_node *entry = key;
579 struct reftable_record
580 rec = { .type = BLOCK_TYPE_OBJ,
581 .u.obj = {
582 .hash_prefix = (uint8_t *)entry->hash.buf,
583 .hash_prefix_len = arg->w->stats.object_id_len,
584 .offsets = entry->offsets,
585 .offset_len = entry->offset_len,
586 } };
587 if (arg->err < 0)
588 goto done;
590 arg->err = block_writer_add(arg->w->block_writer, &rec);
591 if (arg->err == 0)
592 goto done;
594 arg->err = writer_flush_block(arg->w);
595 if (arg->err < 0)
596 goto done;
598 arg->err = writer_reinit_block_writer(arg->w, BLOCK_TYPE_OBJ);
599 if (arg->err < 0)
600 goto done;
602 arg->err = block_writer_add(arg->w->block_writer, &rec);
603 if (arg->err == 0)
604 goto done;
606 rec.u.obj.offset_len = 0;
607 arg->err = block_writer_add(arg->w->block_writer, &rec);
609 /* Should be able to write into a fresh block. */
610 assert(arg->err == 0);
612 done:;
615 static void object_record_free(void *void_arg UNUSED, void *key)
617 struct obj_index_tree_node *entry = key;
619 REFTABLE_FREE_AND_NULL(entry->offsets);
620 reftable_buf_release(&entry->hash);
621 reftable_free(entry);
624 static int writer_dump_object_index(struct reftable_writer *w)
626 struct write_record_arg closure = { .w = w };
627 struct common_prefix_arg common = {
628 .max = 1, /* obj_id_len should be >= 2. */
630 int err;
632 if (w->obj_index_tree)
633 infix_walk(w->obj_index_tree, &update_common, &common);
634 w->stats.object_id_len = common.max + 1;
636 err = writer_reinit_block_writer(w, BLOCK_TYPE_OBJ);
637 if (err < 0)
638 return err;
640 if (w->obj_index_tree)
641 infix_walk(w->obj_index_tree, &write_object_record, &closure);
643 if (closure.err < 0)
644 return closure.err;
645 return writer_finish_section(w);
648 static int writer_finish_public_section(struct reftable_writer *w)
650 uint8_t typ = 0;
651 int err = 0;
653 if (!w->block_writer)
654 return 0;
656 typ = block_writer_type(w->block_writer);
657 err = writer_finish_section(w);
658 if (err < 0)
659 return err;
660 if (typ == BLOCK_TYPE_REF && !w->opts.skip_index_objects &&
661 w->stats.ref_stats.index_blocks > 0) {
662 err = writer_dump_object_index(w);
663 if (err < 0)
664 return err;
667 if (w->obj_index_tree) {
668 infix_walk(w->obj_index_tree, &object_record_free, NULL);
669 tree_free(w->obj_index_tree);
670 w->obj_index_tree = NULL;
673 w->block_writer = NULL;
674 return 0;
677 int reftable_writer_close(struct reftable_writer *w)
679 uint8_t footer[72];
680 uint8_t *p = footer;
681 int err = writer_finish_public_section(w);
682 int empty_table = w->next == 0;
683 if (err != 0)
684 goto done;
685 w->pending_padding = 0;
686 if (empty_table) {
687 /* Empty tables need a header anyway. */
688 uint8_t header[28];
689 int n = writer_write_header(w, header);
690 err = padded_write(w, header, n, 0);
691 if (err < 0)
692 goto done;
695 p += writer_write_header(w, footer);
696 put_be64(p, w->stats.ref_stats.index_offset);
697 p += 8;
698 put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len);
699 p += 8;
700 put_be64(p, w->stats.obj_stats.index_offset);
701 p += 8;
703 put_be64(p, w->stats.log_stats.offset);
704 p += 8;
705 put_be64(p, w->stats.log_stats.index_offset);
706 p += 8;
708 put_be32(p, crc32(0, footer, p - footer));
709 p += 4;
711 err = w->flush(w->write_arg);
712 if (err < 0) {
713 err = REFTABLE_IO_ERROR;
714 goto done;
717 err = padded_write(w, footer, footer_size(writer_version(w)), 0);
718 if (err < 0)
719 goto done;
721 if (empty_table) {
722 err = REFTABLE_EMPTY_TABLE_ERROR;
723 goto done;
726 done:
727 writer_release(w);
728 return err;
731 static void writer_clear_index(struct reftable_writer *w)
733 for (size_t i = 0; w->index && i < w->index_len; i++)
734 reftable_buf_release(&w->index[i].last_key);
735 REFTABLE_FREE_AND_NULL(w->index);
736 w->index_len = 0;
737 w->index_cap = 0;
740 static int writer_flush_nonempty_block(struct reftable_writer *w)
742 struct reftable_index_record index_record = {
743 .last_key = REFTABLE_BUF_INIT,
745 uint8_t typ = block_writer_type(w->block_writer);
746 struct reftable_block_stats *bstats;
747 int raw_bytes, padding = 0, err;
748 uint64_t block_typ_off;
751 * Finish the current block. This will cause the block writer to emit
752 * restart points and potentially compress records in case we are
753 * writing a log block.
755 * Note that this is still happening in memory.
757 raw_bytes = block_writer_finish(w->block_writer);
758 if (raw_bytes < 0)
759 return raw_bytes;
762 * By default, all records except for log records are padded to the
763 * block size.
765 if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG)
766 padding = w->opts.block_size - raw_bytes;
768 bstats = writer_reftable_block_stats(w, typ);
769 block_typ_off = (bstats->blocks == 0) ? w->next : 0;
770 if (block_typ_off > 0)
771 bstats->offset = block_typ_off;
772 bstats->entries += w->block_writer->entries;
773 bstats->restarts += w->block_writer->restart_len;
774 bstats->blocks++;
775 w->stats.blocks++;
778 * If this is the first block we're writing to the table then we need
779 * to also write the reftable header.
781 if (!w->next)
782 writer_write_header(w, w->block);
784 err = padded_write(w, w->block, raw_bytes, padding);
785 if (err < 0)
786 return err;
789 * Add an index record for every block that we're writing. If we end up
790 * having more than a threshold of index records we will end up writing
791 * an index section in `writer_finish_section()`. Each index record
792 * contains the last record key of the block it is indexing as well as
793 * the offset of that block.
795 * Note that this also applies when flushing index blocks, in which
796 * case we will end up with a multi-level index.
798 REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap);
799 if (!w->index)
800 return REFTABLE_OUT_OF_MEMORY_ERROR;
802 index_record.offset = w->next;
803 reftable_buf_reset(&index_record.last_key);
804 err = reftable_buf_add(&index_record.last_key, w->block_writer->last_key.buf,
805 w->block_writer->last_key.len);
806 if (err < 0)
807 return err;
808 w->index[w->index_len] = index_record;
809 w->index_len++;
811 w->next += padding + raw_bytes;
812 w->block_writer = NULL;
814 return 0;
817 static int writer_flush_block(struct reftable_writer *w)
819 if (!w->block_writer)
820 return 0;
821 if (w->block_writer->entries == 0)
822 return 0;
823 return writer_flush_nonempty_block(w);
826 const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w)
828 return &w->stats;