2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
11 #include "blocksource.h"
12 #include "constants.h"
14 #include "reftable-error.h"
18 size_t header_size(int version
)
29 size_t footer_size(int version
)
40 static int block_writer_register_restart(struct block_writer
*w
, int n
,
41 int is_restart
, struct reftable_buf
*key
)
46 rlen
= w
->restart_len
;
47 if (rlen
>= MAX_RESTARTS
)
52 if (2 + 3 * rlen
+ n
> w
->block_size
- w
->next
)
55 REFTABLE_ALLOC_GROW_OR_NULL(w
->restarts
, w
->restart_len
+ 1,
58 return REFTABLE_OUT_OF_MEMORY_ERROR
;
59 w
->restarts
[w
->restart_len
++] = w
->next
;
64 reftable_buf_reset(&w
->last_key
);
65 err
= reftable_buf_add(&w
->last_key
, key
->buf
, key
->len
);
73 int block_writer_init(struct block_writer
*bw
, uint8_t typ
, uint8_t *block
,
74 uint32_t block_size
, uint32_t header_off
, uint32_t hash_size
)
77 bw
->hash_size
= hash_size
;
78 bw
->block_size
= block_size
;
79 bw
->header_off
= header_off
;
80 bw
->block
[header_off
] = typ
;
81 bw
->next
= header_off
+ 4;
82 bw
->restart_interval
= 16;
87 REFTABLE_CALLOC_ARRAY(bw
->zstream
, 1);
89 return REFTABLE_OUT_OF_MEMORY_ERROR
;
90 deflateInit(bw
->zstream
, 9);
96 uint8_t block_writer_type(struct block_writer
*bw
)
98 return bw
->block
[bw
->header_off
];
101 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
102 success. Returns REFTABLE_API_ERROR if attempting to write a record with
104 int block_writer_add(struct block_writer
*w
, struct reftable_record
*rec
)
106 struct reftable_buf empty
= REFTABLE_BUF_INIT
;
107 struct reftable_buf last
=
108 w
->entries
% w
->restart_interval
== 0 ? empty
: w
->last_key
;
109 struct string_view out
= {
110 .buf
= w
->block
+ w
->next
,
111 .len
= w
->block_size
- w
->next
,
113 struct string_view start
= out
;
118 err
= reftable_record_key(rec
, &w
->scratch
);
122 if (!w
->scratch
.len
) {
123 err
= REFTABLE_API_ERROR
;
127 n
= reftable_encode_key(&is_restart
, out
, last
, w
->scratch
,
128 reftable_record_val_type(rec
));
133 string_view_consume(&out
, n
);
135 n
= reftable_record_encode(rec
, out
, w
->hash_size
);
140 string_view_consume(&out
, n
);
142 err
= block_writer_register_restart(w
, start
.len
- out
.len
, is_restart
,
148 int block_writer_finish(struct block_writer
*w
)
150 for (uint32_t i
= 0; i
< w
->restart_len
; i
++) {
151 put_be24(w
->block
+ w
->next
, w
->restarts
[i
]);
155 put_be16(w
->block
+ w
->next
, w
->restart_len
);
157 put_be24(w
->block
+ 1 + w
->header_off
, w
->next
);
160 * Log records are stored zlib-compressed. Note that the compression
161 * also spans over the restart points we have just written.
163 if (block_writer_type(w
) == BLOCK_TYPE_LOG
) {
164 int block_header_skip
= 4 + w
->header_off
;
165 uLongf src_len
= w
->next
- block_header_skip
, compressed_len
;
168 ret
= deflateReset(w
->zstream
);
170 return REFTABLE_ZLIB_ERROR
;
173 * Precompute the upper bound of how many bytes the compressed
174 * data may end up with. Combined with `Z_FINISH`, `deflate()`
175 * is guaranteed to return `Z_STREAM_END`.
177 compressed_len
= deflateBound(w
->zstream
, src_len
);
178 REFTABLE_ALLOC_GROW_OR_NULL(w
->compressed
, compressed_len
,
180 if (!w
->compressed
) {
181 ret
= REFTABLE_OUT_OF_MEMORY_ERROR
;
185 w
->zstream
->next_out
= w
->compressed
;
186 w
->zstream
->avail_out
= compressed_len
;
187 w
->zstream
->next_in
= w
->block
+ block_header_skip
;
188 w
->zstream
->avail_in
= src_len
;
191 * We want to perform all decompression in a single step, which
192 * is why we can pass Z_FINISH here. As we have precomputed the
193 * deflated buffer's size via `deflateBound()` this function is
194 * guaranteed to succeed according to the zlib documentation.
196 ret
= deflate(w
->zstream
, Z_FINISH
);
197 if (ret
!= Z_STREAM_END
)
198 return REFTABLE_ZLIB_ERROR
;
201 * Overwrite the uncompressed data we have already written and
202 * adjust the `next` pointer to point right after the
205 memcpy(w
->block
+ block_header_skip
, w
->compressed
,
206 w
->zstream
->total_out
);
207 w
->next
= w
->zstream
->total_out
+ block_header_skip
;
213 int block_reader_init(struct block_reader
*br
, struct reftable_block
*block
,
214 uint32_t header_off
, uint32_t table_block_size
,
217 uint32_t full_block_size
= table_block_size
;
218 uint8_t typ
= block
->data
[header_off
];
219 uint32_t sz
= get_be24(block
->data
+ header_off
+ 1);
221 uint16_t restart_count
= 0;
222 uint32_t restart_start
= 0;
223 uint8_t *restart_bytes
= NULL
;
225 reftable_block_done(&br
->block
);
227 if (!reftable_is_block_type(typ
)) {
228 err
= REFTABLE_FORMAT_ERROR
;
232 if (typ
== BLOCK_TYPE_LOG
) {
233 uint32_t block_header_skip
= 4 + header_off
;
234 uLong dst_len
= sz
- block_header_skip
;
235 uLong src_len
= block
->len
- block_header_skip
;
237 /* Log blocks specify the *uncompressed* size in their header. */
238 REFTABLE_ALLOC_GROW_OR_NULL(br
->uncompressed_data
, sz
,
239 br
->uncompressed_cap
);
240 if (!br
->uncompressed_data
) {
241 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
245 /* Copy over the block header verbatim. It's not compressed. */
246 memcpy(br
->uncompressed_data
, block
->data
, block_header_skip
);
249 REFTABLE_CALLOC_ARRAY(br
->zstream
, 1);
251 err
= REFTABLE_OUT_OF_MEMORY_ERROR
;
255 err
= inflateInit(br
->zstream
);
257 err
= inflateReset(br
->zstream
);
260 err
= REFTABLE_ZLIB_ERROR
;
264 br
->zstream
->next_in
= block
->data
+ block_header_skip
;
265 br
->zstream
->avail_in
= src_len
;
266 br
->zstream
->next_out
= br
->uncompressed_data
+ block_header_skip
;
267 br
->zstream
->avail_out
= dst_len
;
270 * We know both input as well as output size, and we know that
271 * the sizes should never be bigger than `uInt_MAX` because
272 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
273 * here to instruct zlib to inflate the data in one go, which
274 * is more efficient than using `Z_NO_FLUSH`.
276 err
= inflate(br
->zstream
, Z_FINISH
);
277 if (err
!= Z_STREAM_END
) {
278 err
= REFTABLE_ZLIB_ERROR
;
283 if (br
->zstream
->total_out
+ block_header_skip
!= sz
) {
284 err
= REFTABLE_FORMAT_ERROR
;
288 /* We're done with the input data. */
289 reftable_block_done(block
);
290 block
->data
= br
->uncompressed_data
;
292 full_block_size
= src_len
+ block_header_skip
- br
->zstream
->avail_in
;
293 } else if (full_block_size
== 0) {
294 full_block_size
= sz
;
295 } else if (sz
< full_block_size
&& sz
< block
->len
&&
296 block
->data
[sz
] != 0) {
297 /* If the block is smaller than the full block size, it is
298 padded (data followed by '\0') or the next block is
300 full_block_size
= sz
;
303 restart_count
= get_be16(block
->data
+ sz
- 2);
304 restart_start
= sz
- 2 - 3 * restart_count
;
305 restart_bytes
= block
->data
+ restart_start
;
307 /* transfer ownership. */
312 br
->hash_size
= hash_size
;
313 br
->block_len
= restart_start
;
314 br
->full_block_size
= full_block_size
;
315 br
->header_off
= header_off
;
316 br
->restart_count
= restart_count
;
317 br
->restart_bytes
= restart_bytes
;
323 void block_reader_release(struct block_reader
*br
)
325 inflateEnd(br
->zstream
);
326 reftable_free(br
->zstream
);
327 reftable_free(br
->uncompressed_data
);
328 reftable_block_done(&br
->block
);
331 uint8_t block_reader_type(const struct block_reader
*r
)
333 return r
->block
.data
[r
->header_off
];
336 int block_reader_first_key(const struct block_reader
*br
, struct reftable_buf
*key
)
338 int off
= br
->header_off
+ 4, n
;
339 struct string_view in
= {
340 .buf
= br
->block
.data
+ off
,
341 .len
= br
->block_len
- off
,
345 reftable_buf_reset(key
);
347 n
= reftable_decode_key(key
, &extra
, in
);
351 return REFTABLE_FORMAT_ERROR
;
356 static uint32_t block_reader_restart_offset(const struct block_reader
*br
, size_t idx
)
358 return get_be24(br
->restart_bytes
+ 3 * idx
);
361 void block_iter_seek_start(struct block_iter
*it
, const struct block_reader
*br
)
363 it
->block
= br
->block
.data
;
364 it
->block_len
= br
->block_len
;
365 it
->hash_size
= br
->hash_size
;
366 reftable_buf_reset(&it
->last_key
);
367 it
->next_off
= br
->header_off
+ 4;
370 struct restart_needle_less_args
{
372 struct reftable_buf needle
;
373 const struct block_reader
*reader
;
376 static int restart_needle_less(size_t idx
, void *_args
)
378 struct restart_needle_less_args
*args
= _args
;
379 uint32_t off
= block_reader_restart_offset(args
->reader
, idx
);
380 struct string_view in
= {
381 .buf
= args
->reader
->block
.data
+ off
,
382 .len
= args
->reader
->block_len
- off
,
384 uint64_t prefix_len
, suffix_len
;
389 * Records at restart points are stored without prefix compression, so
390 * there is no need to fully decode the record key here. This removes
391 * the need for allocating memory.
393 n
= reftable_decode_keylen(in
, &prefix_len
, &suffix_len
, &extra
);
394 if (n
< 0 || prefix_len
) {
399 string_view_consume(&in
, n
);
400 if (suffix_len
> in
.len
) {
405 n
= memcmp(args
->needle
.buf
, in
.buf
,
406 args
->needle
.len
< suffix_len
? args
->needle
.len
: suffix_len
);
409 return args
->needle
.len
< suffix_len
;
412 int block_iter_next(struct block_iter
*it
, struct reftable_record
*rec
)
414 struct string_view in
= {
415 .buf
= (unsigned char *) it
->block
+ it
->next_off
,
416 .len
= it
->block_len
- it
->next_off
,
418 struct string_view start
= in
;
422 if (it
->next_off
>= it
->block_len
)
425 n
= reftable_decode_key(&it
->last_key
, &extra
, in
);
428 if (!it
->last_key
.len
)
429 return REFTABLE_FORMAT_ERROR
;
431 string_view_consume(&in
, n
);
432 n
= reftable_record_decode(rec
, it
->last_key
, extra
, in
, it
->hash_size
,
436 string_view_consume(&in
, n
);
438 it
->next_off
+= start
.len
- in
.len
;
442 void block_iter_reset(struct block_iter
*it
)
444 reftable_buf_reset(&it
->last_key
);
451 void block_iter_close(struct block_iter
*it
)
453 reftable_buf_release(&it
->last_key
);
454 reftable_buf_release(&it
->scratch
);
457 int block_iter_seek_key(struct block_iter
*it
, const struct block_reader
*br
,
458 struct reftable_buf
*want
)
460 struct restart_needle_less_args args
= {
464 struct reftable_record rec
;
469 * Perform a binary search over the block's restart points, which
470 * avoids doing a linear scan over the whole block. Like this, we
471 * identify the section of the block that should contain our key.
473 * Note that we explicitly search for the first restart point _greater_
474 * than the sought-after record, not _greater or equal_ to it. In case
475 * the sought-after record is located directly at the restart point we
476 * would otherwise start doing the linear search at the preceding
477 * restart point. While that works alright, we would end up scanning
480 i
= binsearch(br
->restart_count
, &restart_needle_less
, &args
);
482 err
= REFTABLE_FORMAT_ERROR
;
487 * Now there are multiple cases:
489 * - `i == 0`: The wanted record is smaller than the record found at
490 * the first restart point. As the first restart point is the first
491 * record in the block, our wanted record cannot be located in this
492 * block at all. We still need to position the iterator so that the
493 * next call to `block_iter_next()` will yield an end-of-iterator
496 * - `i == restart_count`: The wanted record was not found at any of
497 * the restart points. As there is no restart point at the end of
498 * the section the record may thus be contained in the last block.
500 * - `i > 0`: The wanted record must be contained in the section
501 * before the found restart point. We thus do a linear search
502 * starting from the preceding restart point.
505 it
->next_off
= block_reader_restart_offset(br
, i
- 1);
507 it
->next_off
= br
->header_off
+ 4;
508 it
->block
= br
->block
.data
;
509 it
->block_len
= br
->block_len
;
510 it
->hash_size
= br
->hash_size
;
512 reftable_record_init(&rec
, block_reader_type(br
));
515 * We're looking for the last entry less than the wanted key so that
516 * the next call to `block_reader_next()` would yield the wanted
517 * record. We thus don't want to position our reader at the sought
518 * after record, but one before. To do so, we have to go one entry too
519 * far and then back up.
522 size_t prev_off
= it
->next_off
;
524 err
= block_iter_next(it
, &rec
);
528 it
->next_off
= prev_off
;
533 err
= reftable_record_key(&rec
, &it
->last_key
);
538 * Check whether the current key is greater or equal to the
539 * sought-after key. In case it is greater we know that the
540 * record does not exist in the block and can thus abort early.
541 * In case it is equal to the sought-after key we have found
542 * the desired record.
544 * Note that we store the next record's key record directly in
545 * `last_key` without restoring the key of the preceding record
546 * in case we need to go one record back. This is safe to do as
547 * `block_iter_next()` would return the ref whose key is equal
548 * to `last_key` now, and naturally all keys share a prefix
551 if (reftable_buf_cmp(&it
->last_key
, want
) >= 0) {
552 it
->next_off
= prev_off
;
558 reftable_record_release(&rec
);
562 void block_writer_release(struct block_writer
*bw
)
564 deflateEnd(bw
->zstream
);
565 REFTABLE_FREE_AND_NULL(bw
->zstream
);
566 REFTABLE_FREE_AND_NULL(bw
->restarts
);
567 REFTABLE_FREE_AND_NULL(bw
->compressed
);
568 reftable_buf_release(&bw
->scratch
);
569 reftable_buf_release(&bw
->last_key
);
570 /* the block is not owned. */
573 void reftable_block_done(struct reftable_block
*blockp
)
575 struct reftable_block_source source
= blockp
->source
;
576 if (blockp
&& source
.ops
)
577 source
.ops
->return_block(source
.arg
, blockp
);
580 blockp
->source
.ops
= NULL
;
581 blockp
->source
.arg
= NULL
;