2 #include <ccan/crc/crc.h>
7 /* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do
8 * some quantitative tests and replace it? Meanwhile, use upper bits. */
9 static uint64_t mask_of(unsigned int crcbits
)
11 return -1ULL << (64 - crcbits
);
14 void crc_of_blocks(const void *data
, size_t len
, unsigned int normal_block_size
,
15 unsigned int crcbits
, bool merge_trailing_bytes_in_last_block
, uint64_t crc
[])
17 unsigned int nblocks
= len
/normal_block_size
;
19 const uint8_t *buf
= data
;
20 uint64_t crcmask
= mask_of(crcbits
);
22 if (len
%normal_block_size
&& !merge_trailing_bytes_in_last_block
)
25 for (i
= 0; i
< nblocks
- 1; i
++) {
26 crc
[i
] = (crc64_iso(0, buf
, normal_block_size
) & crcmask
);
27 buf
+= normal_block_size
;
28 len
-= normal_block_size
;
30 crc
[i
] = (crc64_iso(0, buf
, len
) & crcmask
);
33 struct crc_hash_record
{
39 size_t normal_block_size
;
40 size_t tail_block_size
;
41 size_t max_block_size
;
44 /* Saved old buffer bytes (max_block_size bytes). */
47 void *buffer_end
; /* invariant to be enforced in code: buffer_end = buffer + buffer_size */
49 /* Progress so far. */
50 uint64_t running_normal_crc
;
51 uint64_t running_tail_crc
;;
57 uint64_t normal_uncrc_tab
[256];
58 uint64_t tail_uncrc_tab
[256];
60 /* last CRC is special */
62 /* This doesn't count the last CRC. */
63 unsigned int num_crcs
;
64 unsigned crc_hashtable_size
;
65 struct crc_hash_record
*crc_hashtable
;
68 static uint64_t crc64_over_zeros(const uint64_t *crc64_iso_tab
, uint64_t crc
, int size
)
72 crc
= crc64_iso_tab
[crc
& 0xFFL
] ^ (crc
>> 8);
77 /* Initialize one table that is used to calculate how the crc changes when we take a give
78 * char out of the crc'd area. This function is to be used when there is no tail block */
79 static void init_uncrc_tab(uint64_t uncrc_tab
[], unsigned int wsize
)
81 const uint64_t *crc64_iso_tab
= crc64_iso_table();
85 part_crc
= crc64_over_zeros(crc64_iso_tab
, 0, wsize
-1);
86 for (i
=0; i
< 256; i
++)
87 uncrc_tab
[i
] = crc64_over_zeros(crc64_iso_tab
, crc64_iso_tab
[i
], wsize
-1) ^ part_crc
;
90 /* Initialize two tables that are used to calculate how the crc changes when we take a give
91 * char out of the crc'd area. This function is to be used when there is a tail block.
92 * The function initializes one table for the tail block and another one for the normal block.
93 * You must pass the params for the smalles block first followed by the params for the largest block */
94 static void init_uncrc_tabs(uint64_t small_uncrc_tab
[], unsigned int small_wsize
, uint64_t large_uncrc_tab
[], unsigned int large_wsize
)
96 const uint64_t *crc64_iso_tab
= crc64_iso_table();
98 unsigned int delta_wsize
= large_wsize
- small_wsize
;
99 uint64_t small_part_crc
;
100 uint64_t large_part_crc
;
103 small_part_crc
= crc64_over_zeros(crc64_iso_tab
, 0, small_wsize
-1);
104 large_part_crc
= crc64_over_zeros(crc64_iso_tab
, small_part_crc
, delta_wsize
);
105 for (i
=0; i
< 256; i
++) {
106 crc
= crc64_over_zeros(crc64_iso_tab
, crc64_iso_tab
[i
], small_wsize
-1);
107 small_uncrc_tab
[i
] = crc
^ small_part_crc
;
108 crc
= crc64_over_zeros(crc64_iso_tab
, crc
, delta_wsize
);
109 large_uncrc_tab
[i
] = crc
^ large_part_crc
;
113 static void crc_hashtable_put(struct crc_hash_record
*crc_hashtable
, unsigned hash_tmask
, uint64_t crc
, int value
)
115 unsigned pos
= (crc
>> 32) & hash_tmask
; // Use highest 32 bits of the checksum as start position
116 unsigned step
= (1 + (crc
& 0x1e)); // Step with an odd-number of steps, exact value depends on crc lowest 5 bits
118 while (crc_hashtable
[pos
].value
!= -1 && crc_hashtable
[pos
].crc
!= crc
)
120 // This position is already taken by another crc record. Go to next position
121 pos
= (pos
+ step
) & hash_tmask
;
123 crc_hashtable
[pos
].value
= value
;
124 crc_hashtable
[pos
].crc
= crc
;
127 static int crc_hashtable_get(const struct crc_hash_record crc_hashtable
[], unsigned hash_tmask
, uint64_t crc
)
129 unsigned pos
= (crc
>> 32) & hash_tmask
;
130 unsigned step
= (1 + (crc
& 0x1e));
132 while (crc_hashtable
[pos
].value
!= -1 && crc_hashtable
[pos
].crc
!= crc
)
134 // This position is taken by another CRC record. Go to next position
135 pos
= (pos
+ step
) & hash_tmask
;
137 // Found an empty position (with value -1) or found the entry for the requested CRC
138 return crc_hashtable
[pos
].value
;
141 struct crc_context
*crc_context_new(size_t normal_block_size
, unsigned crcbits
,
142 const uint64_t crc
[], unsigned num_crcs
,
143 size_t tail_block_size
)
145 struct crc_context
*ctx
;
147 assert(num_crcs
> 0);
148 assert(normal_block_size
> 0);
149 assert(tail_block_size
>= 0);
151 ctx
= malloc(sizeof(*ctx
) + sizeof(crc
[0])*num_crcs
);
153 ctx
->normal_block_size
= normal_block_size
;
154 if (tail_block_size
== normal_block_size
)
156 tail_block_size
= 0; // treat a tail block with normal block size as a normal block
158 ctx
->tail_block_size
= tail_block_size
;
159 ctx
->max_block_size
= (tail_block_size
> normal_block_size
) ?
160 tail_block_size
: normal_block_size
;
162 ctx
->tail_crc
= crc
[--num_crcs
];
164 ctx
->crcmask
= mask_of(crcbits
);
165 ctx
->num_crcs
= num_crcs
;
166 ctx
->crc_hashtable_size
= 4;
167 while (ctx
->crc_hashtable_size
< 2*num_crcs
)
169 ctx
->crc_hashtable_size
<<= 1;
171 ctx
->crc_hashtable
= malloc(sizeof(struct crc_hash_record
)*ctx
->crc_hashtable_size
);
173 for (cnt
=0; cnt
!= ctx
->crc_hashtable_size
; cnt
++)
175 ctx
->crc_hashtable
[cnt
].value
= -1;
177 for (cnt
=0; cnt
!= num_crcs
; cnt
++)
179 crc_hashtable_put(ctx
->crc_hashtable
, ctx
->crc_hashtable_size
-1, crc
[cnt
], cnt
);
181 // memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
182 ctx
->running_normal_crc
= 0;
183 ctx
->literal_bytes
= 0;
184 ctx
->total_bytes
= 0;
185 ctx
->have_match
= -1;
188 if (tail_block_size
< normal_block_size
)
189 init_uncrc_tabs(ctx
->tail_uncrc_tab
, tail_block_size
, ctx
->normal_uncrc_tab
, normal_block_size
);
191 init_uncrc_tabs(ctx
->normal_uncrc_tab
, normal_block_size
, ctx
->tail_uncrc_tab
, tail_block_size
);
195 init_uncrc_tab(ctx
->normal_uncrc_tab
, normal_block_size
);
198 ctx
->buffer
= malloc(ctx
->max_block_size
);
204 ctx
->buffer_size
= 0;
205 ctx
->buffer_end
= ctx
->buffer
;
211 /* Return -1 or index into matching crc. */
212 /* Only invoke once you have read enough literal bytes! */
213 static int crc_matches(const struct crc_context
*ctx
)
215 return crc_hashtable_get(ctx
->crc_hashtable
,ctx
->crc_hashtable_size
-1, ctx
->running_normal_crc
& ctx
->crcmask
);
218 /* Return -1 or index of tail crc */
219 /* Only invoke once you have read enough literal bytes! */
220 static int tail_matches(const struct crc_context
*ctx
)
222 return (ctx
->running_tail_crc
& ctx
->crcmask
) == ctx
->tail_crc
? ctx
->num_crcs
: -1;
225 static uint64_t crc_add_byte(uint64_t crc
, uint8_t newbyte
)
227 return crc64_iso(crc
, &newbyte
, 1);
230 static uint64_t crc_remove_byte(uint64_t crc
, uint8_t oldbyte
,
231 const uint64_t uncrc_tab
[])
233 return crc
^ uncrc_tab
[oldbyte
];
236 static uint64_t crc_roll(uint64_t crc
, uint8_t oldbyte
, uint8_t newbyte
,
237 const uint64_t uncrc_tab
[])
239 return crc_add_byte(crc_remove_byte(crc
, oldbyte
, uncrc_tab
), newbyte
);
242 enum RB_PHASE
{ non_rolling
, only_tail_rolling
, only_normal_rolling
, both_rolling
};
244 size_t crc_read_block(struct crc_context
*ctx
, long *result
,
245 const void *buf
, size_t buflen
)
247 size_t consumed
= 0, len
;
249 const uint8_t *normal_old
, *tail_old
, *get_pos
= buf
;
252 /* Simple optimization, if we found a match last time. */
253 if (ctx
->have_match
>= 0) {
254 crcmatch
= ctx
->have_match
;
258 /* normal_old is the trailing edge of the normal checksum window. */
259 if (ctx
->buffer_size
>= ctx
->normal_block_size
)
260 normal_old
= ctx
->buffer_end
- ctx
->normal_block_size
;
264 /* tail_old is the trailing edge of the tail checksum window. */
265 if (ctx
->tail_block_size
!= 0 && ctx
->buffer_size
>= ctx
->tail_block_size
)
266 tail_old
= ctx
->buffer_end
- ctx
->tail_block_size
;
270 if (normal_old
== NULL
&& tail_old
== NULL
)
273 if (normal_old
== NULL
)
274 phase
= only_tail_rolling
;
276 if (tail_old
== NULL
)
277 phase
= only_normal_rolling
;
279 phase
= both_rolling
;
280 while (consumed
!= buflen
&& crcmatch
== -1) {
281 size_t old_consumed
= consumed
;
285 while (consumed
!= buflen
)
288 ctx
->literal_bytes
++;
289 ctx
->running_tail_crc
= ctx
->running_normal_crc
=
290 crc_add_byte(ctx
->running_normal_crc
, *get_pos
++);
291 if (ctx
->literal_bytes
== ctx
->normal_block_size
) {
292 /* Reached the end of a normal block. Check CRC
293 and start rolling the CRC at next iteration */
294 if ((crcmatch
= crc_matches(ctx
)) != -1)
296 normal_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
297 phase
= only_normal_rolling
;
300 if (ctx
->literal_bytes
== ctx
->tail_block_size
) {
301 /* Reached the end of a tail block. Check tail CRC
302 and start rolling the CRC at next iteration */
303 if ((crcmatch
= tail_matches(ctx
)) != -1)
305 tail_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
306 phase
= only_tail_rolling
;
311 case only_normal_rolling
:
312 while (consumed
!= buflen
)
315 ctx
->literal_bytes
++;
316 ctx
->running_normal_crc
= crc_roll(ctx
->running_normal_crc
,
317 *normal_old
, *get_pos
,
318 ctx
->normal_uncrc_tab
);
319 if ((crcmatch
= crc_matches(ctx
)) != -1)
321 /* Advance trailing pointer for normal CRC */
322 if (++normal_old
== ctx
->buffer_end
)
324 if (ctx
->tail_block_size
) {
325 ctx
->running_tail_crc
= crc_add_byte(ctx
->running_tail_crc
, *get_pos
++);
326 if (ctx
->literal_bytes
== ctx
->tail_block_size
)
328 if ((crcmatch
= tail_matches(ctx
)) != -1)
330 tail_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
331 phase
= both_rolling
;
339 case only_tail_rolling
:
340 while (consumed
!= buflen
)
343 ctx
->literal_bytes
++;
344 ctx
->running_tail_crc
= crc_roll(ctx
->running_tail_crc
,
346 ctx
->tail_uncrc_tab
);
347 if ((crcmatch
= tail_matches(ctx
)) != -1)
349 /* Advance trailing pointer for tail CRC */
350 if (++tail_old
== ctx
->buffer_end
)
352 ctx
->running_normal_crc
= crc_add_byte(ctx
->running_normal_crc
, *get_pos
++);
353 if (ctx
->literal_bytes
== ctx
->normal_block_size
)
355 if ((crcmatch
= crc_matches(ctx
)) != -1)
357 normal_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
358 phase
= both_rolling
;
364 while (consumed
!= buflen
)
367 ctx
->running_normal_crc
= crc_roll(ctx
->running_normal_crc
,
368 *normal_old
, *get_pos
,
369 ctx
->normal_uncrc_tab
);
370 if ((crcmatch
= crc_matches(ctx
)) != -1)
372 /* Advance trailing pointer for normal CRC */
373 if (++normal_old
== ctx
->buffer_end
)
375 ctx
->running_tail_crc
= crc_roll(ctx
->running_tail_crc
,
377 ctx
->tail_uncrc_tab
);
378 if ((crcmatch
= tail_matches(ctx
)) != -1)
380 /* Advance trailing pointer for tail CRC */
381 if (++tail_old
== ctx
->buffer_end
)
385 ctx
->literal_bytes
+= (consumed
- old_consumed
);
388 ctx
->total_bytes
+= (consumed
- old_consumed
);
392 /* We have a match! */
393 size_t matched_block_size
= (crcmatch
== ctx
->num_crcs
) ?
394 ctx
->tail_block_size
: ctx
->normal_block_size
;
395 if (ctx
->literal_bytes
> matched_block_size
) {
396 /* Output literal first. */
397 *result
= ctx
->literal_bytes
- matched_block_size
;
398 ctx
->literal_bytes
= matched_block_size
;
399 /* Remember for next time! */
400 ctx
->have_match
= crcmatch
;
403 *result
= -crcmatch
-1;
404 if (crcmatch
== ctx
->num_crcs
)
405 assert(ctx
->literal_bytes
== ctx
->tail_block_size
);
407 assert(ctx
->literal_bytes
== ctx
->normal_block_size
);
408 ctx
->literal_bytes
= 0;
409 ctx
->have_match
= -1;
410 ctx
->running_normal_crc
= 0;
411 ctx
->running_tail_crc
= 0;
412 /* Nothing more in the buffer. */
413 ctx
->buffer_size
= 0;
414 ctx
->buffer_end
= ctx
->buffer
;
417 /* Output literal if it's more than 1 block ago and
418 keep exactly one block of data for future matching. */
419 if (ctx
->literal_bytes
> ctx
->max_block_size
) {
420 *result
= ctx
->literal_bytes
- ctx
->max_block_size
;
421 ctx
->literal_bytes
= ctx
->max_block_size
;
422 /* Advance buffer. */
423 if (*result
>= ctx
->buffer_size
) {
424 ctx
->buffer_size
= 0;
425 ctx
->buffer_end
= ctx
->buffer
;
429 memmove(ctx
->buffer
, ctx
->buffer
+ *result
,
431 ctx
->buffer_size
-= *result
;
432 ctx
->buffer_end
-= *result
;
437 /* Now save any literal bytes we'll need in future. */
438 len
= ctx
->literal_bytes
- ctx
->buffer_size
;
439 memcpy(ctx
->buffer_end
, buf
+ buflen
- len
, len
);
440 ctx
->buffer_size
+= len
;
441 ctx
->buffer_end
+= len
;
442 assert(ctx
->buffer_size
<= ctx
->max_block_size
);
447 long crc_read_flush(struct crc_context
*ctx
)
451 /* We might have ended right on a matched block. */
452 if (ctx
->have_match
!= -1) {
453 size_t matched_block_size
= (ctx
->have_match
== ctx
->num_crcs
) ?
454 ctx
->tail_block_size
: ctx
->normal_block_size
;
455 ctx
->literal_bytes
-= matched_block_size
;
456 assert(ctx
->literal_bytes
== 0);
457 ret
= -ctx
->have_match
-1;
458 ctx
->have_match
= -1;
459 ctx
->running_normal_crc
= 0;
460 ctx
->running_tail_crc
= 0;
461 /* Nothing more in the buffer. */
462 ctx
->buffer_size
= 0;
463 ctx
->buffer_end
= ctx
->buffer
;
467 /* The rest is just a literal. */
468 ret
= ctx
->buffer_size
;
469 assert(ctx
->literal_bytes
== ret
);
470 ctx
->buffer_size
= 0;
471 ctx
->buffer_end
= ctx
->buffer
;
472 ctx
->literal_bytes
= 0;
476 void crc_reset_running_crcs(struct crc_context
*ctx
)
478 ctx
->running_normal_crc
= 0;
479 ctx
->running_tail_crc
= 0;
483 * crc_context_free - free a context returned from crc_context_new.
484 * @ctx: the context returned from crc_context_new, or NULL.
486 void crc_context_free(struct crc_context
*ctx
)