A couple minor fixes.
[rsync.git] / checksum.c
blobd7b2ebdd50c21adb34d8040bf4d9eaeb2f42baeb
1 /*
2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
29 #include "rsync.h"
30 #ifdef SUPPORT_XXHASH
31 #include "xxhash.h"
32 #endif
34 extern int am_server;
35 extern int whole_file;
36 extern int checksum_seed;
37 extern int protocol_version;
38 extern int proper_seed_order;
39 extern const char *checksum_choice;
41 struct name_num_obj valid_checksums = {
42 "checksum", NULL, NULL, 0, 0, {
43 #ifdef SUPPORT_XXHASH
44 { CSUM_XXH64, "xxh64", NULL },
45 { CSUM_XXH64, "xxhash", NULL },
46 #endif
47 { CSUM_MD5, "md5", NULL },
48 { CSUM_MD4, "md4", NULL },
49 { CSUM_NONE, "none", NULL },
50 { 0, NULL, NULL }
54 int xfersum_type = 0; /* used for the file transfer checksums */
55 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
57 static int parse_csum_name(const char *name, int len)
59 struct name_num_item *nni;
61 if (len < 0 && name)
62 len = strlen(name);
64 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
65 if (protocol_version >= 30)
66 return CSUM_MD5;
67 if (protocol_version >= 27)
68 return CSUM_MD4_OLD;
69 if (protocol_version >= 21)
70 return CSUM_MD4_BUSTED;
71 return CSUM_MD4_ARCHAIC;
74 nni = get_nni_by_name(&valid_checksums, name, len);
76 if (!nni) {
77 rprintf(FERROR, "unknown checksum name: %s\n", name);
78 exit_cleanup(RERR_UNSUPPORTED);
81 return nni->num;
84 static const char *checksum_name(int num)
86 struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
88 return nni ? nni->name : num < CSUM_MD4 ? "MD4" : "UNKNOWN";
91 void parse_checksum_choice(int final_call)
93 if (valid_checksums.negotiated_name)
94 xfersum_type = checksum_type = valid_checksums.negotiated_num;
95 else {
96 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
97 if (cp) {
98 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
99 checksum_type = parse_csum_name(cp+1, -1);
100 } else
101 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
104 if (xfersum_type == CSUM_NONE)
105 whole_file = 1;
107 /* Snag the checksum name for both write_batch's option output & the following debug output. */
108 if (valid_checksums.negotiated_name)
109 checksum_choice = valid_checksums.negotiated_name;
110 else if (checksum_choice == NULL)
111 checksum_choice = checksum_name(xfersum_type);
113 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
114 rprintf(FINFO, "%s%s checksum: %s\n",
115 am_server ? "Server" : "Client",
116 valid_checksums.negotiated_name ? " negotiated" : "",
117 checksum_choice);
121 int csum_len_for_type(int cst, BOOL flist_csum)
123 switch (cst) {
124 case CSUM_NONE:
125 return 1;
126 case CSUM_MD4_ARCHAIC:
127 /* The oldest checksum code is rather weird: the file-list code only sent
128 * 2-byte checksums, but all other checksums were full MD4 length. */
129 return flist_csum ? 2 : MD4_DIGEST_LEN;
130 case CSUM_MD4:
131 case CSUM_MD4_OLD:
132 case CSUM_MD4_BUSTED:
133 return MD4_DIGEST_LEN;
134 case CSUM_MD5:
135 return MD5_DIGEST_LEN;
136 #ifdef SUPPORT_XXHASH
137 case CSUM_XXH64:
138 return 64/8;
139 #endif
140 default: /* paranoia to prevent missing case values */
141 exit_cleanup(RERR_UNSUPPORTED);
143 return 0;
146 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
147 * Returns 1 if the public sum order matches our internal sum order.
148 * Returns -1 if the public sum order is the reverse of our internal sum order.
150 int canonical_checksum(int csum_type)
152 switch (csum_type) {
153 case CSUM_NONE:
154 case CSUM_MD4_ARCHAIC:
155 case CSUM_MD4_OLD:
156 case CSUM_MD4_BUSTED:
157 break;
158 case CSUM_MD4:
159 case CSUM_MD5:
160 return -1;
161 #ifdef SUPPORT_XXHASH
162 case CSUM_XXH64:
163 return 1;
164 #endif
165 default: /* paranoia to prevent missing case values */
166 exit_cleanup(RERR_UNSUPPORTED);
168 return 0;
171 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
173 a simple 32 bit checksum that can be updated from either end
174 (inspired by Mark Adler's Adler-32 checksum)
176 uint32 get_checksum1(char *buf1, int32 len)
178 int32 i;
179 uint32 s1, s2;
180 schar *buf = (schar *)buf1;
182 s1 = s2 = 0;
183 for (i = 0; i < (len-4); i+=4) {
184 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
185 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
187 for (; i < len; i++) {
188 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
190 return (s1 & 0xffff) + (s2 << 16);
192 #endif
194 void get_checksum2(char *buf, int32 len, char *sum)
196 switch (xfersum_type) {
197 #ifdef SUPPORT_XXHASH
198 case CSUM_XXH64:
199 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
200 break;
201 #endif
202 case CSUM_MD5: {
203 MD5_CTX m5;
204 uchar seedbuf[4];
205 MD5_Init(&m5);
206 if (proper_seed_order) {
207 if (checksum_seed) {
208 SIVALu(seedbuf, 0, checksum_seed);
209 MD5_Update(&m5, seedbuf, 4);
211 MD5_Update(&m5, (uchar *)buf, len);
212 } else {
213 MD5_Update(&m5, (uchar *)buf, len);
214 if (checksum_seed) {
215 SIVALu(seedbuf, 0, checksum_seed);
216 MD5_Update(&m5, seedbuf, 4);
219 MD5_Final((uchar *)sum, &m5);
220 break;
222 case CSUM_MD4:
223 #ifdef USE_OPENSSL
225 MD4_CTX m4;
226 MD4_Init(&m4);
227 MD4_Update(&m4, (uchar *)buf, len);
228 if (checksum_seed) {
229 uchar seedbuf[4];
230 SIVALu(seedbuf, 0, checksum_seed);
231 MD4_Update(&m4, seedbuf, 4);
233 MD4_Final((uchar *)sum, &m4);
234 break;
236 #endif
237 case CSUM_MD4_OLD:
238 case CSUM_MD4_BUSTED:
239 case CSUM_MD4_ARCHAIC: {
240 md_context m;
241 int32 i;
242 static char *buf1;
243 static int32 len1;
245 mdfour_begin(&m);
247 if (len > len1) {
248 if (buf1)
249 free(buf1);
250 buf1 = new_array(char, len+4);
251 len1 = len;
252 if (!buf1)
253 out_of_memory("get_checksum2");
256 memcpy(buf1, buf, len);
257 if (checksum_seed) {
258 SIVAL(buf1,len,checksum_seed);
259 len += 4;
262 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
263 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
266 * Prior to version 27 an incorrect MD4 checksum was computed
267 * by failing to call mdfour_tail() for block sizes that
268 * are multiples of 64. This is fixed by calling mdfour_update()
269 * even when there are no more bytes.
271 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
272 mdfour_update(&m, (uchar *)(buf1+i), len-i);
274 mdfour_result(&m, (uchar *)sum);
275 break;
277 default: /* paranoia to prevent missing case values */
278 exit_cleanup(RERR_UNSUPPORTED);
282 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
284 struct map_struct *buf;
285 OFF_T i, len = st_p->st_size;
286 int32 remainder;
287 int fd;
289 memset(sum, 0, MAX_DIGEST_LEN);
291 fd = do_open(fname, O_RDONLY, 0);
292 if (fd == -1)
293 return;
295 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
297 switch (checksum_type) {
298 #ifdef SUPPORT_XXHASH
299 case CSUM_XXH64: {
300 static XXH64_state_t* state = NULL;
301 if (!state && !(state = XXH64_createState()))
302 out_of_memory("file_checksum");
304 XXH64_reset(state, 0);
306 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
307 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
309 remainder = (int32)(len - i);
310 if (remainder > 0)
311 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
313 SIVAL64(sum, 0, XXH64_digest(state));
314 break;
316 #endif
317 case CSUM_MD5: {
318 MD5_CTX m5;
320 MD5_Init(&m5);
322 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
323 MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
325 remainder = (int32)(len - i);
326 if (remainder > 0)
327 MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
329 MD5_Final((uchar *)sum, &m5);
330 break;
332 case CSUM_MD4:
333 #ifdef USE_OPENSSL
335 MD4_CTX m4;
337 MD4_Init(&m4);
339 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
340 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
342 remainder = (int32)(len - i);
343 if (remainder > 0)
344 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
346 MD4_Final((uchar *)sum, &m4);
347 break;
349 #endif
350 case CSUM_MD4_OLD:
351 case CSUM_MD4_BUSTED:
352 case CSUM_MD4_ARCHAIC: {
353 md_context m;
355 mdfour_begin(&m);
357 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
358 mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
360 /* Prior to version 27 an incorrect MD4 checksum was computed
361 * by failing to call mdfour_tail() for block sizes that
362 * are multiples of 64. This is fixed by calling mdfour_update()
363 * even when there are no more bytes. */
364 remainder = (int32)(len - i);
365 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
366 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
368 mdfour_result(&m, (uchar *)sum);
369 break;
371 default:
372 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
373 checksum_name(checksum_type), checksum_type);
374 exit_cleanup(RERR_UNSUPPORTED);
377 close(fd);
378 unmap_file(buf);
381 static int32 sumresidue;
382 static union {
383 md_context md;
384 #ifdef USE_OPENSSL
385 MD4_CTX m4;
386 #endif
387 MD5_CTX m5;
388 } ctx;
389 #ifdef SUPPORT_XXHASH
390 static XXH64_state_t* xxh64_state;
391 #endif
392 static int cursum_type;
394 void sum_init(int csum_type, int seed)
396 char s[4];
398 if (csum_type < 0)
399 csum_type = parse_csum_name(NULL, 0);
400 cursum_type = csum_type;
402 switch (csum_type) {
403 #ifdef SUPPORT_XXHASH
404 case CSUM_XXH64:
405 if (!xxh64_state && !(xxh64_state = XXH64_createState()))
406 out_of_memory("sum_init");
407 XXH64_reset(xxh64_state, 0);
408 break;
409 #endif
410 case CSUM_MD5:
411 MD5_Init(&ctx.m5);
412 break;
413 case CSUM_MD4:
414 #ifdef USE_OPENSSL
415 MD4_Init(&ctx.m4);
416 #else
417 mdfour_begin(&ctx.md);
418 sumresidue = 0;
419 #endif
420 break;
421 case CSUM_MD4_OLD:
422 case CSUM_MD4_BUSTED:
423 case CSUM_MD4_ARCHAIC:
424 mdfour_begin(&ctx.md);
425 sumresidue = 0;
426 SIVAL(s, 0, seed);
427 sum_update(s, 4);
428 break;
429 case CSUM_NONE:
430 break;
431 default: /* paranoia to prevent missing case values */
432 exit_cleanup(RERR_UNSUPPORTED);
437 * Feed data into an MD4 accumulator, md. The results may be
438 * retrieved using sum_end(). md is used for different purposes at
439 * different points during execution.
441 * @todo Perhaps get rid of md and just pass in the address each time.
442 * Very slightly clearer and slower.
444 void sum_update(const char *p, int32 len)
446 switch (cursum_type) {
447 #ifdef SUPPORT_XXHASH
448 case CSUM_XXH64:
449 XXH64_update(xxh64_state, p, len);
450 break;
451 #endif
452 case CSUM_MD5:
453 MD5_Update(&ctx.m5, (uchar *)p, len);
454 break;
455 case CSUM_MD4:
456 #ifdef USE_OPENSSL
457 MD4_Update(&ctx.m4, (uchar *)p, len);
458 break;
459 #endif
460 case CSUM_MD4_OLD:
461 case CSUM_MD4_BUSTED:
462 case CSUM_MD4_ARCHAIC:
463 if (len + sumresidue < CSUM_CHUNK) {
464 memcpy(ctx.md.buffer + sumresidue, p, len);
465 sumresidue += len;
466 break;
469 if (sumresidue) {
470 int32 i = CSUM_CHUNK - sumresidue;
471 memcpy(ctx.md.buffer + sumresidue, p, i);
472 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
473 len -= i;
474 p += i;
477 while (len >= CSUM_CHUNK) {
478 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
479 len -= CSUM_CHUNK;
480 p += CSUM_CHUNK;
483 sumresidue = len;
484 if (sumresidue)
485 memcpy(ctx.md.buffer, p, sumresidue);
486 break;
487 case CSUM_NONE:
488 break;
489 default: /* paranoia to prevent missing case values */
490 exit_cleanup(RERR_UNSUPPORTED);
494 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
495 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
496 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
497 * into the "sum" buffer. */
498 int sum_end(char *sum)
500 switch (cursum_type) {
501 #ifdef SUPPORT_XXHASH
502 case CSUM_XXH64:
503 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
504 break;
505 #endif
506 case CSUM_MD5:
507 MD5_Final((uchar *)sum, &ctx.m5);
508 break;
509 case CSUM_MD4:
510 #ifdef USE_OPENSSL
511 MD4_Final((uchar *)sum, &ctx.m4);
512 break;
513 #endif
514 case CSUM_MD4_OLD:
515 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
516 mdfour_result(&ctx.md, (uchar *)sum);
517 break;
518 case CSUM_MD4_BUSTED:
519 case CSUM_MD4_ARCHAIC:
520 if (sumresidue)
521 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
522 mdfour_result(&ctx.md, (uchar *)sum);
523 break;
524 case CSUM_NONE:
525 *sum = '\0';
526 break;
527 default: /* paranoia to prevent missing case values */
528 exit_cleanup(RERR_UNSUPPORTED);
531 return csum_len_for_type(cursum_type, 0);