2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
35 extern int whole_file
;
36 extern int checksum_seed
;
37 extern int protocol_version
;
38 extern int proper_seed_order
;
39 extern const char *checksum_choice
;
41 struct name_num_obj valid_checksums
= {
42 "checksum", NULL
, NULL
, 0, 0, {
44 { CSUM_XXH64
, "xxh64", NULL
},
45 { CSUM_XXH64
, "xxhash", NULL
},
47 { CSUM_MD5
, "md5", NULL
},
48 { CSUM_MD4
, "md4", NULL
},
49 { CSUM_NONE
, "none", NULL
},
54 int xfersum_type
= 0; /* used for the file transfer checksums */
55 int checksum_type
= 0; /* used for the pre-transfer (--checksum) checksums */
57 static int parse_csum_name(const char *name
, int len
)
59 struct name_num_item
*nni
;
64 if (!name
|| (len
== 4 && strncasecmp(name
, "auto", 4) == 0)) {
65 if (protocol_version
>= 30)
67 if (protocol_version
>= 27)
69 if (protocol_version
>= 21)
70 return CSUM_MD4_BUSTED
;
71 return CSUM_MD4_ARCHAIC
;
74 nni
= get_nni_by_name(&valid_checksums
, name
, len
);
77 rprintf(FERROR
, "unknown checksum name: %s\n", name
);
78 exit_cleanup(RERR_UNSUPPORTED
);
84 static const char *checksum_name(int num
)
86 struct name_num_item
*nni
= get_nni_by_num(&valid_checksums
, num
);
88 return nni
? nni
->name
: num
< CSUM_MD4
? "MD4" : "UNKNOWN";
91 void parse_checksum_choice(int final_call
)
93 if (valid_checksums
.negotiated_name
)
94 xfersum_type
= checksum_type
= valid_checksums
.negotiated_num
;
96 char *cp
= checksum_choice
? strchr(checksum_choice
, ',') : NULL
;
98 xfersum_type
= parse_csum_name(checksum_choice
, cp
- checksum_choice
);
99 checksum_type
= parse_csum_name(cp
+1, -1);
101 xfersum_type
= checksum_type
= parse_csum_name(checksum_choice
, -1);
104 if (xfersum_type
== CSUM_NONE
)
107 /* Snag the checksum name for both write_batch's option output & the following debug output. */
108 if (valid_checksums
.negotiated_name
)
109 checksum_choice
= valid_checksums
.negotiated_name
;
110 else if (checksum_choice
== NULL
)
111 checksum_choice
= checksum_name(xfersum_type
);
113 if (final_call
&& DEBUG_GTE(NSTR
, am_server
? 3 : 1)) {
114 rprintf(FINFO
, "%s%s checksum: %s\n",
115 am_server
? "Server" : "Client",
116 valid_checksums
.negotiated_name
? " negotiated" : "",
121 int csum_len_for_type(int cst
, BOOL flist_csum
)
126 case CSUM_MD4_ARCHAIC
:
127 /* The oldest checksum code is rather weird: the file-list code only sent
128 * 2-byte checksums, but all other checksums were full MD4 length. */
129 return flist_csum
? 2 : MD4_DIGEST_LEN
;
132 case CSUM_MD4_BUSTED
:
133 return MD4_DIGEST_LEN
;
135 return MD5_DIGEST_LEN
;
136 #ifdef SUPPORT_XXHASH
140 default: /* paranoia to prevent missing case values */
141 exit_cleanup(RERR_UNSUPPORTED
);
146 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
147 * Returns 1 if the public sum order matches our internal sum order.
148 * Returns -1 if the public sum order is the reverse of our internal sum order.
150 int canonical_checksum(int csum_type
)
154 case CSUM_MD4_ARCHAIC
:
156 case CSUM_MD4_BUSTED
:
161 #ifdef SUPPORT_XXHASH
165 default: /* paranoia to prevent missing case values */
166 exit_cleanup(RERR_UNSUPPORTED
);
171 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
173 a simple 32 bit checksum that can be updated from either end
174 (inspired by Mark Adler's Adler-32 checksum)
176 uint32
get_checksum1(char *buf1
, int32 len
)
180 schar
*buf
= (schar
*)buf1
;
183 for (i
= 0; i
< (len
-4); i
+=4) {
184 s2
+= 4*(s1
+ buf
[i
]) + 3*buf
[i
+1] + 2*buf
[i
+2] + buf
[i
+3] + 10*CHAR_OFFSET
;
185 s1
+= (buf
[i
+0] + buf
[i
+1] + buf
[i
+2] + buf
[i
+3] + 4*CHAR_OFFSET
);
187 for (; i
< len
; i
++) {
188 s1
+= (buf
[i
]+CHAR_OFFSET
); s2
+= s1
;
190 return (s1
& 0xffff) + (s2
<< 16);
194 void get_checksum2(char *buf
, int32 len
, char *sum
)
196 switch (xfersum_type
) {
197 #ifdef SUPPORT_XXHASH
199 SIVAL64(sum
, 0, XXH64(buf
, len
, checksum_seed
));
206 if (proper_seed_order
) {
208 SIVALu(seedbuf
, 0, checksum_seed
);
209 MD5_Update(&m5
, seedbuf
, 4);
211 MD5_Update(&m5
, (uchar
*)buf
, len
);
213 MD5_Update(&m5
, (uchar
*)buf
, len
);
215 SIVALu(seedbuf
, 0, checksum_seed
);
216 MD5_Update(&m5
, seedbuf
, 4);
219 MD5_Final((uchar
*)sum
, &m5
);
227 MD4_Update(&m4
, (uchar
*)buf
, len
);
230 SIVALu(seedbuf
, 0, checksum_seed
);
231 MD4_Update(&m4
, seedbuf
, 4);
233 MD4_Final((uchar
*)sum
, &m4
);
238 case CSUM_MD4_BUSTED
:
239 case CSUM_MD4_ARCHAIC
: {
250 buf1
= new_array(char, len
+4);
253 out_of_memory("get_checksum2");
256 memcpy(buf1
, buf
, len
);
258 SIVAL(buf1
,len
,checksum_seed
);
262 for (i
= 0; i
+ CSUM_CHUNK
<= len
; i
+= CSUM_CHUNK
)
263 mdfour_update(&m
, (uchar
*)(buf1
+i
), CSUM_CHUNK
);
266 * Prior to version 27 an incorrect MD4 checksum was computed
267 * by failing to call mdfour_tail() for block sizes that
268 * are multiples of 64. This is fixed by calling mdfour_update()
269 * even when there are no more bytes.
271 if (len
- i
> 0 || xfersum_type
> CSUM_MD4_BUSTED
)
272 mdfour_update(&m
, (uchar
*)(buf1
+i
), len
-i
);
274 mdfour_result(&m
, (uchar
*)sum
);
277 default: /* paranoia to prevent missing case values */
278 exit_cleanup(RERR_UNSUPPORTED
);
282 void file_checksum(const char *fname
, const STRUCT_STAT
*st_p
, char *sum
)
284 struct map_struct
*buf
;
285 OFF_T i
, len
= st_p
->st_size
;
289 memset(sum
, 0, MAX_DIGEST_LEN
);
291 fd
= do_open(fname
, O_RDONLY
, 0);
295 buf
= map_file(fd
, len
, MAX_MAP_SIZE
, CHUNK_SIZE
);
297 switch (checksum_type
) {
298 #ifdef SUPPORT_XXHASH
300 static XXH64_state_t
* state
= NULL
;
301 if (!state
&& !(state
= XXH64_createState()))
302 out_of_memory("file_checksum");
304 XXH64_reset(state
, 0);
306 for (i
= 0; i
+ CHUNK_SIZE
<= len
; i
+= CHUNK_SIZE
)
307 XXH64_update(state
, (uchar
*)map_ptr(buf
, i
, CHUNK_SIZE
), CHUNK_SIZE
);
309 remainder
= (int32
)(len
- i
);
311 XXH64_update(state
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
313 SIVAL64(sum
, 0, XXH64_digest(state
));
322 for (i
= 0; i
+ CHUNK_SIZE
<= len
; i
+= CHUNK_SIZE
)
323 MD5_Update(&m5
, (uchar
*)map_ptr(buf
, i
, CHUNK_SIZE
), CHUNK_SIZE
);
325 remainder
= (int32
)(len
- i
);
327 MD5_Update(&m5
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
329 MD5_Final((uchar
*)sum
, &m5
);
339 for (i
= 0; i
+ CHUNK_SIZE
<= len
; i
+= CHUNK_SIZE
)
340 MD4_Update(&m4
, (uchar
*)map_ptr(buf
, i
, CHUNK_SIZE
), CHUNK_SIZE
);
342 remainder
= (int32
)(len
- i
);
344 MD4_Update(&m4
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
346 MD4_Final((uchar
*)sum
, &m4
);
351 case CSUM_MD4_BUSTED
:
352 case CSUM_MD4_ARCHAIC
: {
357 for (i
= 0; i
+ CHUNK_SIZE
<= len
; i
+= CHUNK_SIZE
)
358 mdfour_update(&m
, (uchar
*)map_ptr(buf
, i
, CHUNK_SIZE
), CHUNK_SIZE
);
360 /* Prior to version 27 an incorrect MD4 checksum was computed
361 * by failing to call mdfour_tail() for block sizes that
362 * are multiples of 64. This is fixed by calling mdfour_update()
363 * even when there are no more bytes. */
364 remainder
= (int32
)(len
- i
);
365 if (remainder
> 0 || checksum_type
> CSUM_MD4_BUSTED
)
366 mdfour_update(&m
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
368 mdfour_result(&m
, (uchar
*)sum
);
372 rprintf(FERROR
, "Invalid checksum-choice for --checksum: %s (%d)\n",
373 checksum_name(checksum_type
), checksum_type
);
374 exit_cleanup(RERR_UNSUPPORTED
);
381 static int32 sumresidue
;
389 #ifdef SUPPORT_XXHASH
390 static XXH64_state_t
* xxh64_state
;
392 static int cursum_type
;
394 void sum_init(int csum_type
, int seed
)
399 csum_type
= parse_csum_name(NULL
, 0);
400 cursum_type
= csum_type
;
403 #ifdef SUPPORT_XXHASH
405 if (!xxh64_state
&& !(xxh64_state
= XXH64_createState()))
406 out_of_memory("sum_init");
407 XXH64_reset(xxh64_state
, 0);
417 mdfour_begin(&ctx
.md
);
422 case CSUM_MD4_BUSTED
:
423 case CSUM_MD4_ARCHAIC
:
424 mdfour_begin(&ctx
.md
);
431 default: /* paranoia to prevent missing case values */
432 exit_cleanup(RERR_UNSUPPORTED
);
437 * Feed data into an MD4 accumulator, md. The results may be
438 * retrieved using sum_end(). md is used for different purposes at
439 * different points during execution.
441 * @todo Perhaps get rid of md and just pass in the address each time.
442 * Very slightly clearer and slower.
444 void sum_update(const char *p
, int32 len
)
446 switch (cursum_type
) {
447 #ifdef SUPPORT_XXHASH
449 XXH64_update(xxh64_state
, p
, len
);
453 MD5_Update(&ctx
.m5
, (uchar
*)p
, len
);
457 MD4_Update(&ctx
.m4
, (uchar
*)p
, len
);
461 case CSUM_MD4_BUSTED
:
462 case CSUM_MD4_ARCHAIC
:
463 if (len
+ sumresidue
< CSUM_CHUNK
) {
464 memcpy(ctx
.md
.buffer
+ sumresidue
, p
, len
);
470 int32 i
= CSUM_CHUNK
- sumresidue
;
471 memcpy(ctx
.md
.buffer
+ sumresidue
, p
, i
);
472 mdfour_update(&ctx
.md
, (uchar
*)ctx
.md
.buffer
, CSUM_CHUNK
);
477 while (len
>= CSUM_CHUNK
) {
478 mdfour_update(&ctx
.md
, (uchar
*)p
, CSUM_CHUNK
);
485 memcpy(ctx
.md
.buffer
, p
, sumresidue
);
489 default: /* paranoia to prevent missing case values */
490 exit_cleanup(RERR_UNSUPPORTED
);
494 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
495 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
496 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
497 * into the "sum" buffer. */
498 int sum_end(char *sum
)
500 switch (cursum_type
) {
501 #ifdef SUPPORT_XXHASH
503 SIVAL64(sum
, 0, XXH64_digest(xxh64_state
));
507 MD5_Final((uchar
*)sum
, &ctx
.m5
);
511 MD4_Final((uchar
*)sum
, &ctx
.m4
);
515 mdfour_update(&ctx
.md
, (uchar
*)ctx
.md
.buffer
, sumresidue
);
516 mdfour_result(&ctx
.md
, (uchar
*)sum
);
518 case CSUM_MD4_BUSTED
:
519 case CSUM_MD4_ARCHAIC
:
521 mdfour_update(&ctx
.md
, (uchar
*)ctx
.md
.buffer
, sumresidue
);
522 mdfour_result(&ctx
.md
, (uchar
*)sum
);
527 default: /* paranoia to prevent missing case values */
528 exit_cleanup(RERR_UNSUPPORTED
);
531 return csum_len_for_type(cursum_type
, 0);