Promoting xxhash support.
[rsync.git] / checksum.c
blob17a9507acda203af6be9ccb6841e59d9034f7acb
1 /*
2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, visit the http://fsf.org website.
22 #include "rsync.h"
23 #ifdef SUPPORT_XXHASH
24 #include "xxhash.h"
25 #endif
27 extern int am_server;
28 extern int local_server;
29 extern int whole_file;
30 extern int read_batch;
31 extern int checksum_seed;
32 extern int protocol_version;
33 extern int proper_seed_order;
34 extern char *checksum_choice;
36 #define CSUM_NONE 0
37 #define CSUM_MD4_ARCHAIC 1
38 #define CSUM_MD4_BUSTED 2
39 #define CSUM_MD4_OLD 3
40 #define CSUM_MD4 4
41 #define CSUM_MD5 5
42 #define CSUM_XXHASH 6
44 #define CSUM_SAW_BUFLEN 10
46 struct csum_struct {
47 int num;
48 const char *name;
49 } valid_checksums[] = {
50 #ifdef SUPPORT_XXHASH
51 { CSUM_XXHASH, "xxhash" },
52 #endif
53 { CSUM_MD5, "md5" },
54 { CSUM_MD4, "md4" },
55 { CSUM_NONE, "none" },
56 { -1, NULL }
59 #define MAX_CHECKSUM_LIST 1024
61 int xfersum_type = 0; /* used for the file transfer checksums */
62 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
63 const char *negotiated_csum_name = NULL;
65 static int parse_csum_name(const char *name, int len, int allow_auto)
67 struct csum_struct *cs;
69 if (len < 0 && name)
70 len = strlen(name);
72 if (!name || (allow_auto && len == 4 && strncasecmp(name, "auto", 4) == 0)) {
73 if (protocol_version >= 30)
74 return CSUM_MD5;
75 if (protocol_version >= 27)
76 return CSUM_MD4_OLD;
77 if (protocol_version >= 21)
78 return CSUM_MD4_BUSTED;
79 return CSUM_MD4_ARCHAIC;
82 for (cs = valid_checksums; cs->name; cs++) {
83 if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
84 return cs->num;
87 if (allow_auto) {
88 rprintf(FERROR, "unknown checksum name: %s\n", name);
89 exit_cleanup(RERR_UNSUPPORTED);
92 return -1;
95 static const char *checksum_name(int num)
97 struct csum_struct *cs;
99 for (cs = valid_checksums; cs->name; cs++) {
100 if (num == cs->num)
101 return cs->name;
104 if (num < CSUM_MD4)
105 return "MD4";
107 return "UNKNOWN";
110 void parse_checksum_choice(int final_call)
112 if (!negotiated_csum_name) {
113 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
114 if (cp) {
115 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice, 1);
116 checksum_type = parse_csum_name(cp+1, -1, 1);
117 } else
118 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
121 if (xfersum_type == CSUM_NONE)
122 whole_file = 1;
124 if (final_call && DEBUG_GTE(CSUM, 1)) {
125 if (negotiated_csum_name)
126 rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
127 else if (xfersum_type == checksum_type) {
128 rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
129 checksum_choice ? "chosen" : "protocol-based",
130 checksum_name(xfersum_type));
131 } else {
132 rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
133 who_am_i(), checksum_name(xfersum_type));
134 rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
135 who_am_i(), checksum_name(checksum_type));
140 static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
142 char *to = sumbuf, *tok = NULL;
143 int cnt = 0;
145 memset(saw, 0, CSUM_SAW_BUFLEN);
147 while (1) {
148 if (*from == ' ' || !*from) {
149 if (tok) {
150 int sum_type = parse_csum_name(tok, to - tok, 0);
151 if (sum_type >= 0 && !saw[sum_type])
152 saw[sum_type] = ++cnt;
153 else
154 to = tok - (tok != sumbuf);
155 tok = NULL;
157 if (!*from++)
158 break;
159 continue;
161 if (!tok) {
162 if (to != sumbuf)
163 *to++ = ' ';
164 tok = to;
166 if (to - sumbuf >= sumbuf_len - 1) {
167 to = tok - (tok != sumbuf);
168 break;
170 *to++ = *from++;
172 *to = '\0';
174 return to - sumbuf;
177 void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
179 char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
180 int sum_type, len;
182 /* Simplify the user-provided string so that it contains valid
183 * checksum names without any duplicates. The client side also
184 * makes use of the saw values when scanning the server's list. */
185 if (csum_list && *csum_list && (!am_server || local_server)) {
186 len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
187 if (saw_fail && !len)
188 len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
189 csum_list = sumbuf;
190 } else
191 csum_list = NULL;
193 if (!csum_list || !*csum_list) {
194 struct csum_struct *cs;
195 for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) {
196 if (cs->num == CSUM_NONE)
197 continue;
198 if (tok != sumbuf)
199 *tok++ = ' ';
200 tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf));
201 saw[cs->num] = ++len;
203 *tok = '\0';
204 len = tok - sumbuf;
207 /* Each side sends their list of valid checksum names to the other side and
208 * then both sides pick the first name in the client's list that is also in
209 * the server's list. */
210 if (!local_server)
211 write_vstring(f_out, sumbuf, len);
213 if (!local_server || read_batch)
214 len = read_vstring(f_in, sumbuf, sizeof sumbuf);
216 if (len > 0) {
217 int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
218 if (am_server)
219 memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
220 for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
221 sum_type = parse_csum_name(tok, -1, 0);
222 if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
223 continue;
224 xfersum_type = checksum_type = sum_type;
225 negotiated_csum_name = tok;
226 best = saw[sum_type];
227 if (best == 1)
228 break;
230 if (negotiated_csum_name) {
231 negotiated_csum_name = strdup(negotiated_csum_name);
232 return;
236 if (!am_server)
237 msleep(20);
238 rprintf(FERROR, "Failed to negotiate a common checksum\n");
239 exit_cleanup(RERR_UNSUPPORTED);
242 int csum_len_for_type(int cst, BOOL flist_csum)
244 switch (cst) {
245 case CSUM_NONE:
246 return 1;
247 case CSUM_MD4_ARCHAIC:
248 /* The oldest checksum code is rather weird: the file-list code only sent
249 * 2-byte checksums, but all other checksums were full MD4 length. */
250 return flist_csum ? 2 : MD4_DIGEST_LEN;
251 case CSUM_MD4:
252 case CSUM_MD4_OLD:
253 case CSUM_MD4_BUSTED:
254 return MD4_DIGEST_LEN;
255 case CSUM_MD5:
256 return MD5_DIGEST_LEN;
257 #ifdef SUPPORT_XXHASH
258 case CSUM_XXHASH:
259 return sizeof (XXH64_hash_t);
260 #endif
261 default: /* paranoia to prevent missing case values */
262 exit_cleanup(RERR_UNSUPPORTED);
264 return 0;
267 int canonical_checksum(int csum_type)
269 return csum_type >= CSUM_MD4 ? 1 : 0;
272 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
274 a simple 32 bit checksum that can be updated from either end
275 (inspired by Mark Adler's Adler-32 checksum)
277 uint32 get_checksum1(char *buf1, int32 len)
279 int32 i;
280 uint32 s1, s2;
281 schar *buf = (schar *)buf1;
283 s1 = s2 = 0;
284 for (i = 0; i < (len-4); i+=4) {
285 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
286 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
288 for (; i < len; i++) {
289 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
291 return (s1 & 0xffff) + (s2 << 16);
293 #endif
295 void get_checksum2(char *buf, int32 len, char *sum)
297 md_context m;
299 switch (xfersum_type) {
300 case CSUM_MD5: {
301 uchar seedbuf[4];
302 md5_begin(&m);
303 if (proper_seed_order) {
304 if (checksum_seed) {
305 SIVALu(seedbuf, 0, checksum_seed);
306 md5_update(&m, seedbuf, 4);
308 md5_update(&m, (uchar *)buf, len);
309 } else {
310 md5_update(&m, (uchar *)buf, len);
311 if (checksum_seed) {
312 SIVALu(seedbuf, 0, checksum_seed);
313 md5_update(&m, seedbuf, 4);
316 md5_result(&m, (uchar *)sum);
317 break;
319 case CSUM_MD4:
320 case CSUM_MD4_OLD:
321 case CSUM_MD4_BUSTED:
322 case CSUM_MD4_ARCHAIC: {
323 int32 i;
324 static char *buf1;
325 static int32 len1;
327 mdfour_begin(&m);
329 if (len > len1) {
330 if (buf1)
331 free(buf1);
332 buf1 = new_array(char, len+4);
333 len1 = len;
334 if (!buf1)
335 out_of_memory("get_checksum2");
338 memcpy(buf1, buf, len);
339 if (checksum_seed) {
340 SIVAL(buf1,len,checksum_seed);
341 len += 4;
344 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
345 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
348 * Prior to version 27 an incorrect MD4 checksum was computed
349 * by failing to call mdfour_tail() for block sizes that
350 * are multiples of 64. This is fixed by calling mdfour_update()
351 * even when there are no more bytes.
353 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
354 mdfour_update(&m, (uchar *)(buf1+i), len-i);
356 mdfour_result(&m, (uchar *)sum);
357 break;
359 #ifdef SUPPORT_XXHASH
360 case CSUM_XXHASH:
361 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
362 break;
363 #endif
364 default: /* paranoia to prevent missing case values */
365 exit_cleanup(RERR_UNSUPPORTED);
369 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
371 struct map_struct *buf;
372 OFF_T i, len = st_p->st_size;
373 md_context m;
374 int32 remainder;
375 int fd;
377 memset(sum, 0, MAX_DIGEST_LEN);
379 fd = do_open(fname, O_RDONLY, 0);
380 if (fd == -1)
381 return;
383 buf = map_file(fd, len, MAX_MAP_SIZE, CSUM_CHUNK);
385 switch (checksum_type) {
386 case CSUM_MD5:
387 md5_begin(&m);
389 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
390 md5_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK),
391 CSUM_CHUNK);
394 remainder = (int32)(len - i);
395 if (remainder > 0)
396 md5_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
398 md5_result(&m, (uchar *)sum);
399 break;
400 case CSUM_MD4:
401 case CSUM_MD4_OLD:
402 case CSUM_MD4_BUSTED:
403 case CSUM_MD4_ARCHAIC:
404 mdfour_begin(&m);
406 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
407 mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
410 /* Prior to version 27 an incorrect MD4 checksum was computed
411 * by failing to call mdfour_tail() for block sizes that
412 * are multiples of 64. This is fixed by calling mdfour_update()
413 * even when there are no more bytes. */
414 remainder = (int32)(len - i);
415 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
416 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
418 mdfour_result(&m, (uchar *)sum);
419 break;
420 #ifdef SUPPORT_XXHASH
421 case CSUM_XXHASH: {
422 XXH64_state_t* state = XXH64_createState();
423 if (state == NULL)
424 out_of_memory("file_checksum xx64");
426 if (XXH64_reset(state, 0) == XXH_ERROR) {
427 rprintf(FERROR, "error resetting XXH64 seed");
428 exit_cleanup(RERR_STREAMIO);
431 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
432 XXH_errorcode const updateResult =
433 XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
434 if (updateResult == XXH_ERROR) {
435 rprintf(FERROR, "error computing XX64 hash");
436 exit_cleanup(RERR_STREAMIO);
439 remainder = (int32)(len - i);
440 if (remainder > 0)
441 XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
442 SIVAL64(sum, 0, XXH64_digest(state));
444 XXH64_freeState(state);
445 break;
447 #endif
448 default:
449 rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type);
450 exit_cleanup(RERR_UNSUPPORTED);
453 close(fd);
454 unmap_file(buf);
457 static int32 sumresidue;
458 static md_context md;
459 static int cursum_type;
460 #ifdef SUPPORT_XXHASH
461 XXH64_state_t* xxh64_state = NULL;
462 #endif
464 void sum_init(int csum_type, int seed)
466 char s[4];
468 if (csum_type < 0)
469 csum_type = parse_csum_name(NULL, 0, 1);
470 cursum_type = csum_type;
472 switch (csum_type) {
473 case CSUM_MD5:
474 md5_begin(&md);
475 break;
476 case CSUM_MD4:
477 mdfour_begin(&md);
478 sumresidue = 0;
479 break;
480 case CSUM_MD4_OLD:
481 case CSUM_MD4_BUSTED:
482 case CSUM_MD4_ARCHAIC:
483 mdfour_begin(&md);
484 sumresidue = 0;
485 SIVAL(s, 0, seed);
486 sum_update(s, 4);
487 break;
488 #ifdef SUPPORT_XXHASH
489 case CSUM_XXHASH:
490 if (xxh64_state == NULL) {
491 xxh64_state = XXH64_createState();
492 if (xxh64_state == NULL)
493 out_of_memory("sum_init xxh64");
495 if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
496 rprintf(FERROR, "error resetting XXH64 state");
497 exit_cleanup(RERR_STREAMIO);
499 break;
500 #endif
501 case CSUM_NONE:
502 break;
503 default: /* paranoia to prevent missing case values */
504 exit_cleanup(RERR_UNSUPPORTED);
509 * Feed data into an MD4 accumulator, md. The results may be
510 * retrieved using sum_end(). md is used for different purposes at
511 * different points during execution.
513 * @todo Perhaps get rid of md and just pass in the address each time.
514 * Very slightly clearer and slower.
516 void sum_update(const char *p, int32 len)
518 switch (cursum_type) {
519 case CSUM_MD5:
520 md5_update(&md, (uchar *)p, len);
521 break;
522 case CSUM_MD4:
523 case CSUM_MD4_OLD:
524 case CSUM_MD4_BUSTED:
525 case CSUM_MD4_ARCHAIC:
526 if (len + sumresidue < CSUM_CHUNK) {
527 memcpy(md.buffer + sumresidue, p, len);
528 sumresidue += len;
529 break;
532 if (sumresidue) {
533 int32 i = CSUM_CHUNK - sumresidue;
534 memcpy(md.buffer + sumresidue, p, i);
535 mdfour_update(&md, (uchar *)md.buffer, CSUM_CHUNK);
536 len -= i;
537 p += i;
540 while (len >= CSUM_CHUNK) {
541 mdfour_update(&md, (uchar *)p, CSUM_CHUNK);
542 len -= CSUM_CHUNK;
543 p += CSUM_CHUNK;
546 sumresidue = len;
547 if (sumresidue)
548 memcpy(md.buffer, p, sumresidue);
549 break;
550 #ifdef SUPPORT_XXHASH
551 case CSUM_XXHASH:
552 if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
553 rprintf(FERROR, "error computing XX64 hash");
554 exit_cleanup(RERR_STREAMIO);
556 break;
557 #endif
558 case CSUM_NONE:
559 break;
560 default: /* paranoia to prevent missing case values */
561 exit_cleanup(RERR_UNSUPPORTED);
565 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
566 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
567 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
568 * into the "sum" buffer. */
569 int sum_end(char *sum)
571 switch (cursum_type) {
572 case CSUM_MD5:
573 md5_result(&md, (uchar *)sum);
574 break;
575 case CSUM_MD4:
576 case CSUM_MD4_OLD:
577 mdfour_update(&md, (uchar *)md.buffer, sumresidue);
578 mdfour_result(&md, (uchar *)sum);
579 break;
580 case CSUM_MD4_BUSTED:
581 case CSUM_MD4_ARCHAIC:
582 if (sumresidue)
583 mdfour_update(&md, (uchar *)md.buffer, sumresidue);
584 mdfour_result(&md, (uchar *)sum);
585 break;
586 #ifdef SUPPORT_XXHASH
587 case CSUM_XXHASH:
588 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
589 break;
590 #endif
591 case CSUM_NONE:
592 *sum = '\0';
593 break;
594 default: /* paranoia to prevent missing case values */
595 exit_cleanup(RERR_UNSUPPORTED);
598 return csum_len_for_type(cursum_type, 0);