Properly support 'merged' tail block
[httpd-crcsyncproxy.git] / ccan / crcsync / crcsync.c
blob80d8024266fd639bc553b4b9137d345426e70adf
1 #include "crcsync.h"
2 #include <ccan/crc/crc.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <stdbool.h>
7 /* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do
8 * some quantitative tests and replace it? Meanwhile, use upper bits. */
9 static uint64_t mask_of(unsigned int crcbits)
11 return -1ULL << (64 - crcbits);
14 void crc_of_blocks(const void *data, size_t len, unsigned int normal_block_size,
15 unsigned int crcbits, bool merge_trailing_bytes_in_last_block, uint64_t crc[])
17 unsigned int nblocks = len/normal_block_size;
18 unsigned int i;
19 const uint8_t *buf = data;
20 uint64_t crcmask = mask_of(crcbits);
22 if (len%normal_block_size && !merge_trailing_bytes_in_last_block)
23 nblocks++;
25 for (i = 0; i < nblocks - 1; i++) {
26 crc[i] = (crc64_iso(0, buf, normal_block_size) & crcmask);
27 buf += normal_block_size;
28 len -= normal_block_size;
30 crc[i] = (crc64_iso(0, buf, len) & crcmask);
33 struct crc_hash_record {
34 uint64_t crc;
35 int value;
38 struct crc_context {
39 size_t normal_block_size;
40 size_t tail_block_size;
41 size_t max_block_size;
42 uint64_t crcmask;
44 /* Saved old buffer bytes (max_block_size bytes). */
45 void *buffer;
46 size_t buffer_size;
47 void *buffer_end; /* invariant to be enforced in code: buffer_end = buffer + buffer_size */
49 /* Progress so far. */
50 uint64_t running_normal_crc;
51 uint64_t running_tail_crc;;
52 size_t literal_bytes;
53 size_t total_bytes;
54 int have_match;
56 /* Uncrc tab. */
57 uint64_t normal_uncrc_tab[256];
58 uint64_t tail_uncrc_tab[256];
60 /* last CRC is special */
61 uint64_t tail_crc;
62 /* This doesn't count the last CRC. */
63 unsigned int num_crcs;
64 unsigned crc_hashtable_size;
65 struct crc_hash_record *crc_hashtable;
68 static uint64_t crc64_over_zeros(const uint64_t *crc64_iso_tab, uint64_t crc, int size)
70 while (size--)
72 crc = crc64_iso_tab[crc & 0xFFL] ^ (crc >> 8);
74 return crc;
77 /* Initialize one table that is used to calculate how the crc changes when we take a give
78 * char out of the crc'd area. This function is to be used when there is no tail block */
79 static void init_uncrc_tab(uint64_t uncrc_tab[], unsigned int wsize)
81 const uint64_t *crc64_iso_tab = crc64_iso_table();
82 unsigned int i;
83 uint64_t part_crc;
85 part_crc = crc64_over_zeros(crc64_iso_tab, 0, wsize-1);
86 for (i=0; i < 256; i++)
87 uncrc_tab[i] = crc64_over_zeros(crc64_iso_tab, crc64_iso_tab[i], wsize-1) ^ part_crc;
90 /* Initialize two tables that are used to calculate how the crc changes when we take a give
91 * char out of the crc'd area. This function is to be used when there is a tail block.
92 * The function initializes one table for the tail block and another one for the normal block.
93 * You must pass the params for the smalles block first followed by the params for the largest block */
94 static void init_uncrc_tabs(uint64_t small_uncrc_tab[], unsigned int small_wsize, uint64_t large_uncrc_tab[], unsigned int large_wsize)
96 const uint64_t *crc64_iso_tab = crc64_iso_table();
97 unsigned int i;
98 unsigned int delta_wsize = large_wsize - small_wsize;
99 uint64_t small_part_crc;
100 uint64_t large_part_crc;
101 uint64_t crc;
103 small_part_crc = crc64_over_zeros(crc64_iso_tab, 0, small_wsize-1);
104 large_part_crc = crc64_over_zeros(crc64_iso_tab, small_part_crc, delta_wsize);
105 for (i=0; i < 256; i++) {
106 crc = crc64_over_zeros(crc64_iso_tab, crc64_iso_tab[i], small_wsize-1);
107 small_uncrc_tab[i] = crc ^ small_part_crc;
108 crc = crc64_over_zeros(crc64_iso_tab, crc, delta_wsize);
109 large_uncrc_tab[i] = crc ^ large_part_crc;
113 static void crc_hashtable_put(struct crc_hash_record *crc_hashtable, unsigned hash_tmask, uint64_t crc, int value)
115 unsigned pos = (crc >> 32) & hash_tmask; // Use highest 32 bits of the checksum as start position
116 unsigned step = (1 + (crc & 0x1e)); // Step with an odd-number of steps, exact value depends on crc lowest 5 bits
118 while (crc_hashtable[pos].value != -1 && crc_hashtable[pos].crc != crc)
120 // This position is already taken by another crc record. Go to next position
121 pos = (pos + step) & hash_tmask;
123 crc_hashtable[pos].value = value;
124 crc_hashtable[pos].crc = crc;
127 static int crc_hashtable_get(const struct crc_hash_record crc_hashtable[], unsigned hash_tmask, uint64_t crc)
129 unsigned pos = (crc >> 32) & hash_tmask;
130 unsigned step = (1 + (crc & 0x1e));
132 while (crc_hashtable[pos].value != -1 && crc_hashtable[pos].crc != crc)
134 // This position is taken by another CRC record. Go to next position
135 pos = (pos + step) & hash_tmask;
137 // Found an empty position (with value -1) or found the entry for the requested CRC
138 return crc_hashtable[pos].value;
141 struct crc_context *crc_context_new(size_t normal_block_size, unsigned crcbits,
142 const uint64_t crc[], unsigned num_crcs,
143 size_t tail_block_size)
145 struct crc_context *ctx;
147 assert(num_crcs > 0);
148 assert(normal_block_size > 0);
149 assert(tail_block_size >= 0);
151 ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs);
152 if (ctx) {
153 ctx->normal_block_size = normal_block_size;
154 if (tail_block_size == normal_block_size)
156 tail_block_size = 0; // treat a tail block with normal block size as a normal block
158 ctx->tail_block_size = tail_block_size;
159 ctx->max_block_size = (tail_block_size > normal_block_size) ?
160 tail_block_size : normal_block_size;
161 if (tail_block_size)
162 ctx->tail_crc = crc[--num_crcs];
164 ctx->crcmask = mask_of(crcbits);
165 ctx->num_crcs = num_crcs;
166 ctx->crc_hashtable_size = 4;
167 while (ctx->crc_hashtable_size < 2*num_crcs)
169 ctx->crc_hashtable_size <<= 1;
171 ctx->crc_hashtable = malloc(sizeof(struct crc_hash_record)*ctx->crc_hashtable_size);
172 unsigned cnt;
173 for (cnt=0; cnt != ctx->crc_hashtable_size; cnt++)
175 ctx->crc_hashtable[cnt].value = -1;
177 for (cnt=0; cnt != num_crcs; cnt++)
179 crc_hashtable_put(ctx->crc_hashtable, ctx->crc_hashtable_size-1, crc[cnt], cnt);
181 // memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
182 ctx->running_normal_crc = 0;
183 ctx->literal_bytes = 0;
184 ctx->total_bytes = 0;
185 ctx->have_match = -1;
186 if (tail_block_size)
188 if (tail_block_size < normal_block_size)
189 init_uncrc_tabs(ctx->tail_uncrc_tab, tail_block_size, ctx->normal_uncrc_tab, normal_block_size);
190 else
191 init_uncrc_tabs(ctx->normal_uncrc_tab, normal_block_size, ctx->tail_uncrc_tab, tail_block_size);
193 else
195 init_uncrc_tab(ctx->normal_uncrc_tab, normal_block_size);
198 ctx->buffer = malloc(ctx->max_block_size);
199 if (!ctx->buffer) {
200 free(ctx);
201 ctx = NULL;
203 else {
204 ctx->buffer_size = 0;
205 ctx->buffer_end = ctx->buffer;
208 return ctx;
211 /* Return -1 or index into matching crc. */
212 /* Only invoke once you have read enough literal bytes! */
213 static int crc_matches(const struct crc_context *ctx)
215 return crc_hashtable_get(ctx->crc_hashtable,ctx->crc_hashtable_size-1, ctx->running_normal_crc & ctx->crcmask);
218 /* Return -1 or index of tail crc */
219 /* Only invoke once you have read enough literal bytes! */
220 static int tail_matches(const struct crc_context *ctx)
222 return (ctx->running_tail_crc & ctx->crcmask) == ctx->tail_crc ? ctx->num_crcs : -1;
225 static uint64_t crc_add_byte(uint64_t crc, uint8_t newbyte)
227 return crc64_iso(crc, &newbyte, 1);
230 static uint64_t crc_remove_byte(uint64_t crc, uint8_t oldbyte,
231 const uint64_t uncrc_tab[])
233 return crc ^ uncrc_tab[oldbyte];
236 static uint64_t crc_roll(uint64_t crc, uint8_t oldbyte, uint8_t newbyte,
237 const uint64_t uncrc_tab[])
239 return crc_add_byte(crc_remove_byte(crc, oldbyte, uncrc_tab), newbyte);
242 enum RB_PHASE { non_rolling, only_tail_rolling, only_normal_rolling, both_rolling };
244 size_t crc_read_block(struct crc_context *ctx, long *result,
245 const void *buf, size_t buflen)
247 size_t consumed = 0, len;
248 int crcmatch = -1;
249 const uint8_t *normal_old, *tail_old, *get_pos = buf;
250 enum RB_PHASE phase;
252 /* Simple optimization, if we found a match last time. */
253 if (ctx->have_match >= 0) {
254 crcmatch = ctx->have_match;
255 goto have_match;
258 /* normal_old is the trailing edge of the normal checksum window. */
259 if (ctx->buffer_size >= ctx->normal_block_size)
260 normal_old = ctx->buffer_end - ctx->normal_block_size;
261 else
262 normal_old = NULL;
264 /* tail_old is the trailing edge of the tail checksum window. */
265 if (ctx->tail_block_size != 0 && ctx->buffer_size >= ctx->tail_block_size)
266 tail_old = ctx->buffer_end - ctx->tail_block_size;
267 else
268 tail_old = NULL;
270 if (normal_old == NULL && tail_old == NULL)
271 phase = non_rolling;
272 else
273 if (normal_old == NULL)
274 phase = only_tail_rolling;
275 else
276 if (tail_old == NULL)
277 phase = only_normal_rolling;
278 else
279 phase = both_rolling;
280 while (consumed != buflen && crcmatch == -1) {
281 size_t old_consumed = consumed;
282 switch (phase)
284 case non_rolling:
285 while (consumed != buflen)
287 consumed++;
288 ctx->literal_bytes++;
289 ctx->running_tail_crc = ctx->running_normal_crc =
290 crc_add_byte(ctx->running_normal_crc, *get_pos++);
291 if (ctx->literal_bytes == ctx->normal_block_size) {
292 /* Reached the end of a normal block. Check CRC
293 and start rolling the CRC at next iteration */
294 if ((crcmatch = crc_matches(ctx)) != -1)
295 break;
296 normal_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
297 phase = only_normal_rolling;
298 break;
300 if (ctx->literal_bytes == ctx->tail_block_size) {
301 /* Reached the end of a tail block. Check tail CRC
302 and start rolling the CRC at next iteration */
303 if ((crcmatch = tail_matches(ctx)) != -1)
304 break;
305 tail_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
306 phase = only_tail_rolling;
307 break;
310 break;
311 case only_normal_rolling:
312 while (consumed != buflen)
314 consumed++;
315 ctx->literal_bytes++;
316 ctx->running_normal_crc = crc_roll(ctx->running_normal_crc,
317 *normal_old, *get_pos,
318 ctx->normal_uncrc_tab);
319 if ((crcmatch = crc_matches(ctx)) != -1)
320 break;
321 /* Advance trailing pointer for normal CRC */
322 if (++normal_old == ctx->buffer_end)
323 normal_old = buf;
324 if (ctx->tail_block_size) {
325 ctx->running_tail_crc = crc_add_byte(ctx->running_tail_crc, *get_pos++);
326 if (ctx->literal_bytes == ctx->tail_block_size)
328 if ((crcmatch = tail_matches(ctx)) != -1)
329 break;
330 tail_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
331 phase = both_rolling;
332 break;
335 else
336 get_pos++;
338 break;
339 case only_tail_rolling:
340 while (consumed != buflen)
342 consumed++;
343 ctx->literal_bytes++;
344 ctx->running_tail_crc = crc_roll(ctx->running_tail_crc,
345 *tail_old, *get_pos,
346 ctx->tail_uncrc_tab);
347 if ((crcmatch = tail_matches(ctx)) != -1)
348 break;
349 /* Advance trailing pointer for tail CRC */
350 if (++tail_old == ctx->buffer_end)
351 tail_old = buf;
352 ctx->running_normal_crc = crc_add_byte(ctx->running_normal_crc, *get_pos++);
353 if (ctx->literal_bytes == ctx->normal_block_size)
355 if ((crcmatch = crc_matches(ctx)) != -1)
356 break;
357 normal_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
358 phase = both_rolling;
359 break;
362 break;
363 case both_rolling:
364 while (consumed != buflen)
366 consumed++;
367 ctx->running_normal_crc = crc_roll(ctx->running_normal_crc,
368 *normal_old, *get_pos,
369 ctx->normal_uncrc_tab);
370 if ((crcmatch = crc_matches(ctx)) != -1)
371 break;
372 /* Advance trailing pointer for normal CRC */
373 if (++normal_old == ctx->buffer_end)
374 normal_old = buf;
375 ctx->running_tail_crc = crc_roll(ctx->running_tail_crc,
376 *tail_old, *get_pos,
377 ctx->tail_uncrc_tab);
378 if ((crcmatch = tail_matches(ctx)) != -1)
379 break;
380 /* Advance trailing pointer for tail CRC */
381 if (++tail_old == ctx->buffer_end)
382 tail_old = buf;
383 get_pos++;
385 ctx->literal_bytes += (consumed - old_consumed);
386 break;
388 ctx->total_bytes += (consumed - old_consumed);
391 if (crcmatch >= 0) {
392 /* We have a match! */
393 size_t matched_block_size = (crcmatch == ctx->num_crcs) ?
394 ctx->tail_block_size : ctx->normal_block_size;
395 if (ctx->literal_bytes > matched_block_size) {
396 /* Output literal first. */
397 *result = ctx->literal_bytes - matched_block_size;
398 ctx->literal_bytes = matched_block_size;
399 /* Remember for next time! */
400 ctx->have_match = crcmatch;
401 } else {
402 have_match:
403 *result = -crcmatch-1;
404 if (crcmatch == ctx->num_crcs)
405 assert(ctx->literal_bytes == ctx->tail_block_size);
406 else
407 assert(ctx->literal_bytes == ctx->normal_block_size);
408 ctx->literal_bytes = 0;
409 ctx->have_match = -1;
410 ctx->running_normal_crc = 0;
411 ctx->running_tail_crc = 0;
412 /* Nothing more in the buffer. */
413 ctx->buffer_size = 0;
414 ctx->buffer_end = ctx->buffer;
416 } else {
417 /* Output literal if it's more than 1 block ago and
418 keep exactly one block of data for future matching. */
419 if (ctx->literal_bytes > ctx->max_block_size) {
420 *result = ctx->literal_bytes - ctx->max_block_size;
421 ctx->literal_bytes = ctx->max_block_size;
422 /* Advance buffer. */
423 if (*result >= ctx->buffer_size) {
424 ctx->buffer_size = 0;
425 ctx->buffer_end = ctx->buffer;
427 else
429 memmove(ctx->buffer, ctx->buffer + *result,
430 ctx->buffer_size);
431 ctx->buffer_size -= *result;
432 ctx->buffer_end -= *result;
434 } else
435 *result = 0;
437 /* Now save any literal bytes we'll need in future. */
438 len = ctx->literal_bytes - ctx->buffer_size;
439 memcpy(ctx->buffer_end, buf + buflen - len, len);
440 ctx->buffer_size += len;
441 ctx->buffer_end += len;
442 assert(ctx->buffer_size <= ctx->max_block_size);
444 return consumed;
447 long crc_read_flush(struct crc_context *ctx)
449 long ret;
451 /* We might have ended right on a matched block. */
452 if (ctx->have_match != -1) {
453 size_t matched_block_size = (ctx->have_match == ctx->num_crcs) ?
454 ctx->tail_block_size : ctx->normal_block_size;
455 ctx->literal_bytes -= matched_block_size;
456 assert(ctx->literal_bytes == 0);
457 ret = -ctx->have_match-1;
458 ctx->have_match = -1;
459 ctx->running_normal_crc = 0;
460 ctx->running_tail_crc = 0;
461 /* Nothing more in the buffer. */
462 ctx->buffer_size = 0;
463 ctx->buffer_end = ctx->buffer;
464 return ret;
467 /* The rest is just a literal. */
468 ret = ctx->buffer_size;
469 assert(ctx->literal_bytes == ret);
470 ctx->buffer_size = 0;
471 ctx->buffer_end = ctx->buffer;
472 ctx->literal_bytes = 0;
473 return ret;
476 void crc_reset_running_crcs(struct crc_context *ctx)
478 ctx->running_normal_crc = 0;
479 ctx->running_tail_crc = 0;
483 * crc_context_free - free a context returned from crc_context_new.
484 * @ctx: the context returned from crc_context_new, or NULL.
486 void crc_context_free(struct crc_context *ctx)
488 free(ctx->buffer);
489 free(ctx);