reftable/merged: expose functions to initialize iterators
[git/gitster.git] / reftable / merged.c
blob8d78b3da719fac9bfc44259d965d89b4b42e4b20
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "merged.h"
11 #include "constants.h"
12 #include "iter.h"
13 #include "pq.h"
14 #include "record.h"
15 #include "generic.h"
16 #include "reftable-merged.h"
17 #include "reftable-error.h"
18 #include "system.h"
20 struct merged_subiter {
21 struct reftable_iterator iter;
22 struct reftable_record rec;
25 struct merged_iter {
26 struct merged_subiter *subiters;
27 struct merged_iter_pqueue pq;
28 size_t stack_len;
29 int suppress_deletions;
30 ssize_t advance_index;
33 static void merged_iter_init(struct merged_iter *mi,
34 struct reftable_merged_table *mt,
35 uint8_t typ)
37 memset(mi, 0, sizeof(*mi));
38 mi->advance_index = -1;
39 mi->suppress_deletions = mt->suppress_deletions;
41 REFTABLE_CALLOC_ARRAY(mi->subiters, mt->stack_len);
42 for (size_t i = 0; i < mt->stack_len; i++) {
43 reftable_record_init(&mi->subiters[i].rec, typ);
44 table_init_iter(&mt->stack[i], &mi->subiters[i].iter, typ);
46 mi->stack_len = mt->stack_len;
49 static void merged_iter_close(void *p)
51 struct merged_iter *mi = p;
53 merged_iter_pqueue_release(&mi->pq);
54 for (size_t i = 0; i < mi->stack_len; i++) {
55 reftable_iterator_destroy(&mi->subiters[i].iter);
56 reftable_record_release(&mi->subiters[i].rec);
58 reftable_free(mi->subiters);
61 static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
63 struct pq_entry e = {
64 .index = idx,
65 .rec = &mi->subiters[idx].rec,
67 int err;
69 err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
70 if (err)
71 return err;
73 merged_iter_pqueue_add(&mi->pq, &e);
74 return 0;
77 static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want)
79 int err;
81 mi->advance_index = -1;
83 for (size_t i = 0; i < mi->stack_len; i++) {
84 err = iterator_seek(&mi->subiters[i].iter, want);
85 if (err < 0)
86 return err;
87 if (err > 0)
88 continue;
90 err = merged_iter_advance_subiter(mi, i);
91 if (err < 0)
92 return err;
95 return 0;
98 static int merged_iter_next_entry(struct merged_iter *mi,
99 struct reftable_record *rec)
101 struct pq_entry entry = { 0 };
102 int err = 0, empty;
104 empty = merged_iter_pqueue_is_empty(mi->pq);
106 if (mi->advance_index >= 0) {
108 * When there are no pqueue entries then we only have a single
109 * subiter left. There is no need to use the pqueue in that
110 * case anymore as we know that the subiter will return entries
111 * in the correct order already.
113 * While this may sound like a very specific edge case, it may
114 * happen more frequently than you think. Most repositories
115 * will end up having a single large base table that contains
116 * most of the refs. It's thus likely that we exhaust all
117 * subiters but the one from that base ref.
119 if (empty)
120 return iterator_next(&mi->subiters[mi->advance_index].iter,
121 rec);
123 err = merged_iter_advance_subiter(mi, mi->advance_index);
124 if (err < 0)
125 return err;
126 if (!err)
127 empty = 0;
128 mi->advance_index = -1;
131 if (empty)
132 return 1;
134 entry = merged_iter_pqueue_remove(&mi->pq);
137 One can also use reftable as datacenter-local storage, where the ref
138 database is maintained in globally consistent database (eg.
139 CockroachDB or Spanner). In this scenario, replication delays together
140 with compaction may cause newer tables to contain older entries. In
141 such a deployment, the loop below must be changed to collect all
142 entries for the same key, and return new the newest one.
144 while (!merged_iter_pqueue_is_empty(mi->pq)) {
145 struct pq_entry top = merged_iter_pqueue_top(mi->pq);
146 int cmp;
148 cmp = reftable_record_cmp(top.rec, entry.rec);
149 if (cmp > 0)
150 break;
152 merged_iter_pqueue_remove(&mi->pq);
153 err = merged_iter_advance_subiter(mi, top.index);
154 if (err < 0)
155 return err;
158 mi->advance_index = entry.index;
159 SWAP(*rec, *entry.rec);
160 return 0;
163 static int merged_iter_seek_void(void *it, struct reftable_record *want)
165 return merged_iter_seek(it, want);
168 static int merged_iter_next_void(void *p, struct reftable_record *rec)
170 struct merged_iter *mi = p;
171 while (1) {
172 int err = merged_iter_next_entry(mi, rec);
173 if (err)
174 return err;
175 if (mi->suppress_deletions && reftable_record_is_deletion(rec))
176 continue;
177 return 0;
181 static struct reftable_iterator_vtable merged_iter_vtable = {
182 .seek = merged_iter_seek_void,
183 .next = &merged_iter_next_void,
184 .close = &merged_iter_close,
187 static void iterator_from_merged_iter(struct reftable_iterator *it,
188 struct merged_iter *mi)
190 assert(!it->ops);
191 it->iter_arg = mi;
192 it->ops = &merged_iter_vtable;
195 int reftable_new_merged_table(struct reftable_merged_table **dest,
196 struct reftable_table *stack, size_t n,
197 uint32_t hash_id)
199 struct reftable_merged_table *m = NULL;
200 uint64_t last_max = 0;
201 uint64_t first_min = 0;
203 for (size_t i = 0; i < n; i++) {
204 uint64_t min = reftable_table_min_update_index(&stack[i]);
205 uint64_t max = reftable_table_max_update_index(&stack[i]);
207 if (reftable_table_hash_id(&stack[i]) != hash_id) {
208 return REFTABLE_FORMAT_ERROR;
210 if (i == 0 || min < first_min) {
211 first_min = min;
213 if (i == 0 || max > last_max) {
214 last_max = max;
218 REFTABLE_CALLOC_ARRAY(m, 1);
219 m->stack = stack;
220 m->stack_len = n;
221 m->min = first_min;
222 m->max = last_max;
223 m->hash_id = hash_id;
224 *dest = m;
225 return 0;
228 void reftable_merged_table_free(struct reftable_merged_table *mt)
230 if (!mt)
231 return;
232 FREE_AND_NULL(mt->stack);
233 reftable_free(mt);
236 uint64_t
237 reftable_merged_table_max_update_index(struct reftable_merged_table *mt)
239 return mt->max;
242 uint64_t
243 reftable_merged_table_min_update_index(struct reftable_merged_table *mt)
245 return mt->min;
248 void merged_table_init_iter(struct reftable_merged_table *mt,
249 struct reftable_iterator *it,
250 uint8_t typ)
252 struct merged_iter *mi = reftable_malloc(sizeof(*mi));
253 merged_iter_init(mi, mt, typ);
254 iterator_from_merged_iter(it, mi);
257 void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt,
258 struct reftable_iterator *it)
260 merged_table_init_iter(mt, it, BLOCK_TYPE_REF);
263 void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt,
264 struct reftable_iterator *it)
266 merged_table_init_iter(mt, it, BLOCK_TYPE_LOG);
269 uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt)
271 return mt->hash_id;
274 static void reftable_merged_table_init_iter_void(void *tab,
275 struct reftable_iterator *it,
276 uint8_t typ)
278 merged_table_init_iter(tab, it, typ);
281 static uint32_t reftable_merged_table_hash_id_void(void *tab)
283 return reftable_merged_table_hash_id(tab);
286 static uint64_t reftable_merged_table_min_update_index_void(void *tab)
288 return reftable_merged_table_min_update_index(tab);
291 static uint64_t reftable_merged_table_max_update_index_void(void *tab)
293 return reftable_merged_table_max_update_index(tab);
296 static struct reftable_table_vtable merged_table_vtable = {
297 .init_iter = reftable_merged_table_init_iter_void,
298 .hash_id = reftable_merged_table_hash_id_void,
299 .min_update_index = reftable_merged_table_min_update_index_void,
300 .max_update_index = reftable_merged_table_max_update_index_void,
303 void reftable_table_from_merged_table(struct reftable_table *tab,
304 struct reftable_merged_table *merged)
306 assert(!tab->ops);
307 tab->ops = &merged_table_vtable;
308 tab->table_arg = merged;