The xxh* checksums don't need to be reversed on output.
[rsync.git] / checksum.c
blobcf3000d918c1eeedbc400f227813b6a28bc0dd0a
1 /*
2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
29 #include "rsync.h"
30 #ifdef SUPPORT_XXHASH
31 #include "xxhash.h"
32 #endif
33 #ifdef USE_OPENSSL
34 #include "openssl/md4.h"
35 #include "openssl/md5.h"
36 #endif
38 extern int am_server;
39 extern int whole_file;
40 extern int checksum_seed;
41 extern int protocol_version;
42 extern int proper_seed_order;
43 extern const char *checksum_choice;
45 #define CSUM_NONE 0
46 #define CSUM_MD4_ARCHAIC 1
47 #define CSUM_MD4_BUSTED 2
48 #define CSUM_MD4_OLD 3
49 #define CSUM_MD4 4
50 #define CSUM_MD5 5
51 #define CSUM_XXH64 6
53 struct name_num_obj valid_checksums = {
54 "checksum", NULL, NULL, 0, 0, {
55 #ifdef SUPPORT_XXHASH
56 { CSUM_XXH64, "xxh64", NULL },
57 { CSUM_XXH64, "xxhash", NULL },
58 #endif
59 { CSUM_MD5, "md5", NULL },
60 { CSUM_MD4, "md4", NULL },
61 { CSUM_NONE, "none", NULL },
62 { 0, NULL, NULL }
66 #ifndef USE_OPENSSL
67 #define MD5_CTX md_context
68 #define MD5_Init md5_begin
69 #define MD5_Update md5_update
70 #define MD5_Final(digest, cptr) md5_result(cptr, digest)
71 #endif
73 int xfersum_type = 0; /* used for the file transfer checksums */
74 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
76 static int parse_csum_name(const char *name, int len)
78 struct name_num_item *nni;
80 if (len < 0 && name)
81 len = strlen(name);
83 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
84 if (protocol_version >= 30)
85 return CSUM_MD5;
86 if (protocol_version >= 27)
87 return CSUM_MD4_OLD;
88 if (protocol_version >= 21)
89 return CSUM_MD4_BUSTED;
90 return CSUM_MD4_ARCHAIC;
93 nni = get_nni_by_name(&valid_checksums, name, len);
95 if (!nni) {
96 rprintf(FERROR, "unknown checksum name: %s\n", name);
97 exit_cleanup(RERR_UNSUPPORTED);
100 return nni->num;
103 static const char *checksum_name(int num)
105 struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
107 return nni ? nni->name : num < CSUM_MD4 ? "MD4" : "UNKNOWN";
110 void parse_checksum_choice(int final_call)
112 if (valid_checksums.negotiated_name)
113 xfersum_type = checksum_type = valid_checksums.negotiated_num;
114 else {
115 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
116 if (cp) {
117 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
118 checksum_type = parse_csum_name(cp+1, -1);
119 } else
120 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
123 if (xfersum_type == CSUM_NONE)
124 whole_file = 1;
126 /* Snag the checksum name for both write_batch's option output & the following debug output. */
127 if (valid_checksums.negotiated_name)
128 checksum_choice = valid_checksums.negotiated_name;
129 else if (checksum_choice == NULL)
130 checksum_choice = checksum_name(xfersum_type);
132 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
133 rprintf(FINFO, "%s%s checksum: %s\n",
134 am_server ? "Server" : "Client",
135 valid_checksums.negotiated_name ? " negotiated" : "",
136 checksum_choice);
140 int csum_len_for_type(int cst, BOOL flist_csum)
142 switch (cst) {
143 case CSUM_NONE:
144 return 1;
145 case CSUM_MD4_ARCHAIC:
146 /* The oldest checksum code is rather weird: the file-list code only sent
147 * 2-byte checksums, but all other checksums were full MD4 length. */
148 return flist_csum ? 2 : MD4_DIGEST_LEN;
149 case CSUM_MD4:
150 case CSUM_MD4_OLD:
151 case CSUM_MD4_BUSTED:
152 return MD4_DIGEST_LEN;
153 case CSUM_MD5:
154 return MD5_DIGEST_LEN;
155 #ifdef SUPPORT_XXHASH
156 case CSUM_XXH64:
157 return 64/8;
158 #endif
159 default: /* paranoia to prevent missing case values */
160 exit_cleanup(RERR_UNSUPPORTED);
162 return 0;
165 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
166 * Returns 1 if the public sum order matches our internal sum order.
167 * Returns -1 if the public sum order is the reverse of our internal sum order.
169 int canonical_checksum(int csum_type)
171 switch (csum_type) {
172 case CSUM_NONE:
173 case CSUM_MD4_ARCHAIC:
174 case CSUM_MD4_OLD:
175 case CSUM_MD4_BUSTED:
176 break;
177 case CSUM_MD4:
178 case CSUM_MD5:
179 return -1;
180 #ifdef SUPPORT_XXHASH
181 case CSUM_XXH64:
182 return 1;
183 #endif
184 default: /* paranoia to prevent missing case values */
185 exit_cleanup(RERR_UNSUPPORTED);
187 return 0;
190 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
192 a simple 32 bit checksum that can be updated from either end
193 (inspired by Mark Adler's Adler-32 checksum)
195 uint32 get_checksum1(char *buf1, int32 len)
197 int32 i;
198 uint32 s1, s2;
199 schar *buf = (schar *)buf1;
201 s1 = s2 = 0;
202 for (i = 0; i < (len-4); i+=4) {
203 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
204 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
206 for (; i < len; i++) {
207 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
209 return (s1 & 0xffff) + (s2 << 16);
211 #endif
213 void get_checksum2(char *buf, int32 len, char *sum)
215 switch (xfersum_type) {
216 case CSUM_MD5: {
217 MD5_CTX m5;
218 uchar seedbuf[4];
219 MD5_Init(&m5);
220 if (proper_seed_order) {
221 if (checksum_seed) {
222 SIVALu(seedbuf, 0, checksum_seed);
223 MD5_Update(&m5, seedbuf, 4);
225 MD5_Update(&m5, (uchar *)buf, len);
226 } else {
227 MD5_Update(&m5, (uchar *)buf, len);
228 if (checksum_seed) {
229 SIVALu(seedbuf, 0, checksum_seed);
230 MD5_Update(&m5, seedbuf, 4);
233 MD5_Final((uchar *)sum, &m5);
234 break;
236 case CSUM_MD4:
237 #ifdef USE_OPENSSL
239 MD4_CTX m4;
240 MD4_Init(&m4);
241 MD4_Update(&m4, (uchar *)buf, len);
242 if (checksum_seed) {
243 uchar seedbuf[4];
244 SIVALu(seedbuf, 0, checksum_seed);
245 MD4_Update(&m4, seedbuf, 4);
247 MD4_Final((uchar *)sum, &m4);
248 break;
250 #endif
251 case CSUM_MD4_OLD:
252 case CSUM_MD4_BUSTED:
253 case CSUM_MD4_ARCHAIC: {
254 md_context m;
255 int32 i;
256 static char *buf1;
257 static int32 len1;
259 mdfour_begin(&m);
261 if (len > len1) {
262 if (buf1)
263 free(buf1);
264 buf1 = new_array(char, len+4);
265 len1 = len;
266 if (!buf1)
267 out_of_memory("get_checksum2");
270 memcpy(buf1, buf, len);
271 if (checksum_seed) {
272 SIVAL(buf1,len,checksum_seed);
273 len += 4;
276 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
277 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
280 * Prior to version 27 an incorrect MD4 checksum was computed
281 * by failing to call mdfour_tail() for block sizes that
282 * are multiples of 64. This is fixed by calling mdfour_update()
283 * even when there are no more bytes.
285 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
286 mdfour_update(&m, (uchar *)(buf1+i), len-i);
288 mdfour_result(&m, (uchar *)sum);
289 break;
291 #ifdef SUPPORT_XXHASH
292 case CSUM_XXH64:
293 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
294 break;
295 #endif
296 default: /* paranoia to prevent missing case values */
297 exit_cleanup(RERR_UNSUPPORTED);
301 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
303 struct map_struct *buf;
304 OFF_T i, len = st_p->st_size;
305 int32 remainder;
306 int fd;
308 memset(sum, 0, MAX_DIGEST_LEN);
310 fd = do_open(fname, O_RDONLY, 0);
311 if (fd == -1)
312 return;
314 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
316 switch (checksum_type) {
317 case CSUM_MD5: {
318 MD5_CTX m5;
320 MD5_Init(&m5);
322 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
323 MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
325 remainder = (int32)(len - i);
326 if (remainder > 0)
327 MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
329 MD5_Final((uchar *)sum, &m5);
330 break;
332 case CSUM_MD4:
333 #ifdef USE_OPENSSL
335 MD4_CTX m4;
337 MD4_Init(&m4);
339 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
340 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
342 remainder = (int32)(len - i);
343 if (remainder > 0)
344 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
346 MD4_Final((uchar *)sum, &m4);
347 break;
349 #endif
350 case CSUM_MD4_OLD:
351 case CSUM_MD4_BUSTED:
352 case CSUM_MD4_ARCHAIC: {
353 md_context m;
355 mdfour_begin(&m);
357 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
358 mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
360 /* Prior to version 27 an incorrect MD4 checksum was computed
361 * by failing to call mdfour_tail() for block sizes that
362 * are multiples of 64. This is fixed by calling mdfour_update()
363 * even when there are no more bytes. */
364 remainder = (int32)(len - i);
365 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
366 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
368 mdfour_result(&m, (uchar *)sum);
369 break;
371 #ifdef SUPPORT_XXHASH
372 case CSUM_XXH64: {
373 XXH64_state_t* state = XXH64_createState();
374 if (state == NULL)
375 out_of_memory("file_checksum XXH64");
377 if (XXH64_reset(state, 0) == XXH_ERROR) {
378 rprintf(FERROR, "error resetting XXH64 seed");
379 exit_cleanup(RERR_STREAMIO);
382 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) {
383 XXH_errorcode const updateResult =
384 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
385 if (updateResult == XXH_ERROR) {
386 rprintf(FERROR, "error computing XXH64 hash");
387 exit_cleanup(RERR_STREAMIO);
391 remainder = (int32)(len - i);
392 if (remainder > 0)
393 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
394 SIVAL64(sum, 0, XXH64_digest(state));
396 XXH64_freeState(state);
397 break;
399 #endif
400 default:
401 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
402 checksum_name(checksum_type), checksum_type);
403 exit_cleanup(RERR_UNSUPPORTED);
406 close(fd);
407 unmap_file(buf);
410 static int32 sumresidue;
411 static union {
412 md_context md;
413 #ifdef USE_OPENSSL
414 MD4_CTX m4;
415 #endif
416 MD5_CTX m5;
417 } ctx;
418 #ifdef SUPPORT_XXHASH
419 static XXH64_state_t* xxh64_state;
420 #endif
421 static int cursum_type;
423 void sum_init(int csum_type, int seed)
425 char s[4];
427 if (csum_type < 0)
428 csum_type = parse_csum_name(NULL, 0);
429 cursum_type = csum_type;
431 switch (csum_type) {
432 case CSUM_MD5:
433 MD5_Init(&ctx.m5);
434 break;
435 case CSUM_MD4:
436 #ifdef USE_OPENSSL
437 MD4_Init(&ctx.m4);
438 #else
439 mdfour_begin(&ctx.md);
440 sumresidue = 0;
441 #endif
442 break;
443 case CSUM_MD4_OLD:
444 case CSUM_MD4_BUSTED:
445 case CSUM_MD4_ARCHAIC:
446 mdfour_begin(&ctx.md);
447 sumresidue = 0;
448 SIVAL(s, 0, seed);
449 sum_update(s, 4);
450 break;
451 #ifdef SUPPORT_XXHASH
452 case CSUM_XXH64:
453 if (xxh64_state == NULL) {
454 xxh64_state = XXH64_createState();
455 if (xxh64_state == NULL)
456 out_of_memory("sum_init xxh64");
458 if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
459 rprintf(FERROR, "error resetting XXH64 state");
460 exit_cleanup(RERR_STREAMIO);
462 break;
463 #endif
464 case CSUM_NONE:
465 break;
466 default: /* paranoia to prevent missing case values */
467 exit_cleanup(RERR_UNSUPPORTED);
472 * Feed data into an MD4 accumulator, md. The results may be
473 * retrieved using sum_end(). md is used for different purposes at
474 * different points during execution.
476 * @todo Perhaps get rid of md and just pass in the address each time.
477 * Very slightly clearer and slower.
479 void sum_update(const char *p, int32 len)
481 switch (cursum_type) {
482 case CSUM_MD5:
483 MD5_Update(&ctx.m5, (uchar *)p, len);
484 break;
485 case CSUM_MD4:
486 #ifdef USE_OPENSSL
487 MD4_Update(&ctx.m4, (uchar *)p, len);
488 break;
489 #endif
490 case CSUM_MD4_OLD:
491 case CSUM_MD4_BUSTED:
492 case CSUM_MD4_ARCHAIC:
493 if (len + sumresidue < CSUM_CHUNK) {
494 memcpy(ctx.md.buffer + sumresidue, p, len);
495 sumresidue += len;
496 break;
499 if (sumresidue) {
500 int32 i = CSUM_CHUNK - sumresidue;
501 memcpy(ctx.md.buffer + sumresidue, p, i);
502 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
503 len -= i;
504 p += i;
507 while (len >= CSUM_CHUNK) {
508 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
509 len -= CSUM_CHUNK;
510 p += CSUM_CHUNK;
513 sumresidue = len;
514 if (sumresidue)
515 memcpy(ctx.md.buffer, p, sumresidue);
516 break;
517 #ifdef SUPPORT_XXHASH
518 case CSUM_XXH64:
519 if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
520 rprintf(FERROR, "error computing XXH64 hash");
521 exit_cleanup(RERR_STREAMIO);
523 break;
524 #endif
525 case CSUM_NONE:
526 break;
527 default: /* paranoia to prevent missing case values */
528 exit_cleanup(RERR_UNSUPPORTED);
532 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
533 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
534 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
535 * into the "sum" buffer. */
536 int sum_end(char *sum)
538 switch (cursum_type) {
539 case CSUM_MD5:
540 MD5_Final((uchar *)sum, &ctx.m5);
541 break;
542 case CSUM_MD4:
543 #ifdef USE_OPENSSL
544 MD4_Final((uchar *)sum, &ctx.m4);
545 break;
546 #endif
547 case CSUM_MD4_OLD:
548 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
549 mdfour_result(&ctx.md, (uchar *)sum);
550 break;
551 case CSUM_MD4_BUSTED:
552 case CSUM_MD4_ARCHAIC:
553 if (sumresidue)
554 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
555 mdfour_result(&ctx.md, (uchar *)sum);
556 break;
557 #ifdef SUPPORT_XXHASH
558 case CSUM_XXH64:
559 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
560 break;
561 #endif
562 case CSUM_NONE:
563 *sum = '\0';
564 break;
565 default: /* paranoia to prevent missing case values */
566 exit_cleanup(RERR_UNSUPPORTED);
569 return csum_len_for_type(cursum_type, 0);