The seventh batch
[git.git] / reftable / block.c
blob8ac865ce781e45c41722383e74a1c2dc59b061dc
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "block.h"
11 #include "blocksource.h"
12 #include "constants.h"
13 #include "record.h"
14 #include "reftable-error.h"
15 #include "system.h"
16 #include <zlib.h>
18 size_t header_size(int version)
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
26 abort();
29 size_t footer_size(int version)
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
37 abort();
40 static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct reftable_buf *key)
43 uint32_t rlen;
44 int err;
46 rlen = w->restart_len;
47 if (rlen >= MAX_RESTARTS)
48 is_restart = 0;
50 if (is_restart)
51 rlen++;
52 if (2 + 3 * rlen + n > w->block_size - w->next)
53 return -1;
54 if (is_restart) {
55 REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1,
56 w->restart_cap);
57 if (!w->restarts)
58 return REFTABLE_OUT_OF_MEMORY_ERROR;
59 w->restarts[w->restart_len++] = w->next;
62 w->next += n;
64 reftable_buf_reset(&w->last_key);
65 err = reftable_buf_add(&w->last_key, key->buf, key->len);
66 if (err < 0)
67 return err;
69 w->entries++;
70 return 0;
73 int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
74 uint32_t block_size, uint32_t header_off, uint32_t hash_size)
76 bw->block = block;
77 bw->hash_size = hash_size;
78 bw->block_size = block_size;
79 bw->header_off = header_off;
80 bw->block[header_off] = typ;
81 bw->next = header_off + 4;
82 bw->restart_interval = 16;
83 bw->entries = 0;
84 bw->restart_len = 0;
85 bw->last_key.len = 0;
86 if (!bw->zstream) {
87 REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
88 if (!bw->zstream)
89 return REFTABLE_OUT_OF_MEMORY_ERROR;
90 deflateInit(bw->zstream, 9);
93 return 0;
96 uint8_t block_writer_type(struct block_writer *bw)
98 return bw->block[bw->header_off];
101 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
102 success. Returns REFTABLE_API_ERROR if attempting to write a record with
103 empty key. */
104 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
106 struct reftable_buf empty = REFTABLE_BUF_INIT;
107 struct reftable_buf last =
108 w->entries % w->restart_interval == 0 ? empty : w->last_key;
109 struct string_view out = {
110 .buf = w->block + w->next,
111 .len = w->block_size - w->next,
113 struct string_view start = out;
114 int is_restart = 0;
115 int n = 0;
116 int err;
118 err = reftable_record_key(rec, &w->scratch);
119 if (err < 0)
120 goto done;
122 if (!w->scratch.len) {
123 err = REFTABLE_API_ERROR;
124 goto done;
127 n = reftable_encode_key(&is_restart, out, last, w->scratch,
128 reftable_record_val_type(rec));
129 if (n < 0) {
130 err = -1;
131 goto done;
133 string_view_consume(&out, n);
135 n = reftable_record_encode(rec, out, w->hash_size);
136 if (n < 0) {
137 err = -1;
138 goto done;
140 string_view_consume(&out, n);
142 err = block_writer_register_restart(w, start.len - out.len, is_restart,
143 &w->scratch);
144 done:
145 return err;
148 int block_writer_finish(struct block_writer *w)
150 for (uint32_t i = 0; i < w->restart_len; i++) {
151 put_be24(w->block + w->next, w->restarts[i]);
152 w->next += 3;
155 put_be16(w->block + w->next, w->restart_len);
156 w->next += 2;
157 put_be24(w->block + 1 + w->header_off, w->next);
160 * Log records are stored zlib-compressed. Note that the compression
161 * also spans over the restart points we have just written.
163 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
164 int block_header_skip = 4 + w->header_off;
165 uLongf src_len = w->next - block_header_skip, compressed_len;
166 int ret;
168 ret = deflateReset(w->zstream);
169 if (ret != Z_OK)
170 return REFTABLE_ZLIB_ERROR;
173 * Precompute the upper bound of how many bytes the compressed
174 * data may end up with. Combined with `Z_FINISH`, `deflate()`
175 * is guaranteed to return `Z_STREAM_END`.
177 compressed_len = deflateBound(w->zstream, src_len);
178 REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len,
179 w->compressed_cap);
180 if (!w->compressed) {
181 ret = REFTABLE_OUT_OF_MEMORY_ERROR;
182 return ret;
185 w->zstream->next_out = w->compressed;
186 w->zstream->avail_out = compressed_len;
187 w->zstream->next_in = w->block + block_header_skip;
188 w->zstream->avail_in = src_len;
191 * We want to perform all decompression in a single step, which
192 * is why we can pass Z_FINISH here. As we have precomputed the
193 * deflated buffer's size via `deflateBound()` this function is
194 * guaranteed to succeed according to the zlib documentation.
196 ret = deflate(w->zstream, Z_FINISH);
197 if (ret != Z_STREAM_END)
198 return REFTABLE_ZLIB_ERROR;
201 * Overwrite the uncompressed data we have already written and
202 * adjust the `next` pointer to point right after the
203 * compressed data.
205 memcpy(w->block + block_header_skip, w->compressed,
206 w->zstream->total_out);
207 w->next = w->zstream->total_out + block_header_skip;
210 return w->next;
213 int block_reader_init(struct block_reader *br, struct reftable_block *block,
214 uint32_t header_off, uint32_t table_block_size,
215 uint32_t hash_size)
217 uint32_t full_block_size = table_block_size;
218 uint8_t typ = block->data[header_off];
219 uint32_t sz = get_be24(block->data + header_off + 1);
220 int err = 0;
221 uint16_t restart_count = 0;
222 uint32_t restart_start = 0;
223 uint8_t *restart_bytes = NULL;
225 reftable_block_done(&br->block);
227 if (!reftable_is_block_type(typ)) {
228 err = REFTABLE_FORMAT_ERROR;
229 goto done;
232 if (typ == BLOCK_TYPE_LOG) {
233 uint32_t block_header_skip = 4 + header_off;
234 uLong dst_len = sz - block_header_skip;
235 uLong src_len = block->len - block_header_skip;
237 /* Log blocks specify the *uncompressed* size in their header. */
238 REFTABLE_ALLOC_GROW_OR_NULL(br->uncompressed_data, sz,
239 br->uncompressed_cap);
240 if (!br->uncompressed_data) {
241 err = REFTABLE_OUT_OF_MEMORY_ERROR;
242 goto done;
245 /* Copy over the block header verbatim. It's not compressed. */
246 memcpy(br->uncompressed_data, block->data, block_header_skip);
248 if (!br->zstream) {
249 REFTABLE_CALLOC_ARRAY(br->zstream, 1);
250 if (!br->zstream) {
251 err = REFTABLE_OUT_OF_MEMORY_ERROR;
252 goto done;
255 err = inflateInit(br->zstream);
256 } else {
257 err = inflateReset(br->zstream);
259 if (err != Z_OK) {
260 err = REFTABLE_ZLIB_ERROR;
261 goto done;
264 br->zstream->next_in = block->data + block_header_skip;
265 br->zstream->avail_in = src_len;
266 br->zstream->next_out = br->uncompressed_data + block_header_skip;
267 br->zstream->avail_out = dst_len;
270 * We know both input as well as output size, and we know that
271 * the sizes should never be bigger than `uInt_MAX` because
272 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
273 * here to instruct zlib to inflate the data in one go, which
274 * is more efficient than using `Z_NO_FLUSH`.
276 err = inflate(br->zstream, Z_FINISH);
277 if (err != Z_STREAM_END) {
278 err = REFTABLE_ZLIB_ERROR;
279 goto done;
281 err = 0;
283 if (br->zstream->total_out + block_header_skip != sz) {
284 err = REFTABLE_FORMAT_ERROR;
285 goto done;
288 /* We're done with the input data. */
289 reftable_block_done(block);
290 block->data = br->uncompressed_data;
291 block->len = sz;
292 full_block_size = src_len + block_header_skip - br->zstream->avail_in;
293 } else if (full_block_size == 0) {
294 full_block_size = sz;
295 } else if (sz < full_block_size && sz < block->len &&
296 block->data[sz] != 0) {
297 /* If the block is smaller than the full block size, it is
298 padded (data followed by '\0') or the next block is
299 unaligned. */
300 full_block_size = sz;
303 restart_count = get_be16(block->data + sz - 2);
304 restart_start = sz - 2 - 3 * restart_count;
305 restart_bytes = block->data + restart_start;
307 /* transfer ownership. */
308 br->block = *block;
309 block->data = NULL;
310 block->len = 0;
312 br->hash_size = hash_size;
313 br->block_len = restart_start;
314 br->full_block_size = full_block_size;
315 br->header_off = header_off;
316 br->restart_count = restart_count;
317 br->restart_bytes = restart_bytes;
319 done:
320 return err;
323 void block_reader_release(struct block_reader *br)
325 inflateEnd(br->zstream);
326 reftable_free(br->zstream);
327 reftable_free(br->uncompressed_data);
328 reftable_block_done(&br->block);
331 uint8_t block_reader_type(const struct block_reader *r)
333 return r->block.data[r->header_off];
336 int block_reader_first_key(const struct block_reader *br, struct reftable_buf *key)
338 int off = br->header_off + 4, n;
339 struct string_view in = {
340 .buf = br->block.data + off,
341 .len = br->block_len - off,
343 uint8_t extra = 0;
345 reftable_buf_reset(key);
347 n = reftable_decode_key(key, &extra, in);
348 if (n < 0)
349 return n;
350 if (!key->len)
351 return REFTABLE_FORMAT_ERROR;
353 return 0;
356 static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx)
358 return get_be24(br->restart_bytes + 3 * idx);
361 void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
363 it->block = br->block.data;
364 it->block_len = br->block_len;
365 it->hash_size = br->hash_size;
366 reftable_buf_reset(&it->last_key);
367 it->next_off = br->header_off + 4;
370 struct restart_needle_less_args {
371 int error;
372 struct reftable_buf needle;
373 const struct block_reader *reader;
376 static int restart_needle_less(size_t idx, void *_args)
378 struct restart_needle_less_args *args = _args;
379 uint32_t off = block_reader_restart_offset(args->reader, idx);
380 struct string_view in = {
381 .buf = args->reader->block.data + off,
382 .len = args->reader->block_len - off,
384 uint64_t prefix_len, suffix_len;
385 uint8_t extra;
386 int n;
389 * Records at restart points are stored without prefix compression, so
390 * there is no need to fully decode the record key here. This removes
391 * the need for allocating memory.
393 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
394 if (n < 0 || prefix_len) {
395 args->error = 1;
396 return -1;
399 string_view_consume(&in, n);
400 if (suffix_len > in.len) {
401 args->error = 1;
402 return -1;
405 n = memcmp(args->needle.buf, in.buf,
406 args->needle.len < suffix_len ? args->needle.len : suffix_len);
407 if (n)
408 return n < 0;
409 return args->needle.len < suffix_len;
412 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
414 struct string_view in = {
415 .buf = (unsigned char *) it->block + it->next_off,
416 .len = it->block_len - it->next_off,
418 struct string_view start = in;
419 uint8_t extra = 0;
420 int n = 0;
422 if (it->next_off >= it->block_len)
423 return 1;
425 n = reftable_decode_key(&it->last_key, &extra, in);
426 if (n < 0)
427 return -1;
428 if (!it->last_key.len)
429 return REFTABLE_FORMAT_ERROR;
431 string_view_consume(&in, n);
432 n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
433 &it->scratch);
434 if (n < 0)
435 return -1;
436 string_view_consume(&in, n);
438 it->next_off += start.len - in.len;
439 return 0;
442 void block_iter_reset(struct block_iter *it)
444 reftable_buf_reset(&it->last_key);
445 it->next_off = 0;
446 it->block = NULL;
447 it->block_len = 0;
448 it->hash_size = 0;
451 void block_iter_close(struct block_iter *it)
453 reftable_buf_release(&it->last_key);
454 reftable_buf_release(&it->scratch);
457 int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
458 struct reftable_buf *want)
460 struct restart_needle_less_args args = {
461 .needle = *want,
462 .reader = br,
464 struct reftable_record rec;
465 int err = 0;
466 size_t i;
469 * Perform a binary search over the block's restart points, which
470 * avoids doing a linear scan over the whole block. Like this, we
471 * identify the section of the block that should contain our key.
473 * Note that we explicitly search for the first restart point _greater_
474 * than the sought-after record, not _greater or equal_ to it. In case
475 * the sought-after record is located directly at the restart point we
476 * would otherwise start doing the linear search at the preceding
477 * restart point. While that works alright, we would end up scanning
478 * too many record.
480 i = binsearch(br->restart_count, &restart_needle_less, &args);
481 if (args.error) {
482 err = REFTABLE_FORMAT_ERROR;
483 goto done;
487 * Now there are multiple cases:
489 * - `i == 0`: The wanted record is smaller than the record found at
490 * the first restart point. As the first restart point is the first
491 * record in the block, our wanted record cannot be located in this
492 * block at all. We still need to position the iterator so that the
493 * next call to `block_iter_next()` will yield an end-of-iterator
494 * signal.
496 * - `i == restart_count`: The wanted record was not found at any of
497 * the restart points. As there is no restart point at the end of
498 * the section the record may thus be contained in the last block.
500 * - `i > 0`: The wanted record must be contained in the section
501 * before the found restart point. We thus do a linear search
502 * starting from the preceding restart point.
504 if (i > 0)
505 it->next_off = block_reader_restart_offset(br, i - 1);
506 else
507 it->next_off = br->header_off + 4;
508 it->block = br->block.data;
509 it->block_len = br->block_len;
510 it->hash_size = br->hash_size;
512 reftable_record_init(&rec, block_reader_type(br));
515 * We're looking for the last entry less than the wanted key so that
516 * the next call to `block_reader_next()` would yield the wanted
517 * record. We thus don't want to position our reader at the sought
518 * after record, but one before. To do so, we have to go one entry too
519 * far and then back up.
521 while (1) {
522 size_t prev_off = it->next_off;
524 err = block_iter_next(it, &rec);
525 if (err < 0)
526 goto done;
527 if (err > 0) {
528 it->next_off = prev_off;
529 err = 0;
530 goto done;
533 err = reftable_record_key(&rec, &it->last_key);
534 if (err < 0)
535 goto done;
538 * Check whether the current key is greater or equal to the
539 * sought-after key. In case it is greater we know that the
540 * record does not exist in the block and can thus abort early.
541 * In case it is equal to the sought-after key we have found
542 * the desired record.
544 * Note that we store the next record's key record directly in
545 * `last_key` without restoring the key of the preceding record
546 * in case we need to go one record back. This is safe to do as
547 * `block_iter_next()` would return the ref whose key is equal
548 * to `last_key` now, and naturally all keys share a prefix
549 * with themselves.
551 if (reftable_buf_cmp(&it->last_key, want) >= 0) {
552 it->next_off = prev_off;
553 goto done;
557 done:
558 reftable_record_release(&rec);
559 return err;
562 void block_writer_release(struct block_writer *bw)
564 deflateEnd(bw->zstream);
565 REFTABLE_FREE_AND_NULL(bw->zstream);
566 REFTABLE_FREE_AND_NULL(bw->restarts);
567 REFTABLE_FREE_AND_NULL(bw->compressed);
568 reftable_buf_release(&bw->scratch);
569 reftable_buf_release(&bw->last_key);
570 /* the block is not owned. */
573 void reftable_block_done(struct reftable_block *blockp)
575 struct reftable_block_source source = blockp->source;
576 if (blockp && source.ops)
577 source.ops->return_block(source.arg, blockp);
578 blockp->data = NULL;
579 blockp->len = 0;
580 blockp->source.ops = NULL;
581 blockp->source.arg = NULL;