2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, visit the http://fsf.org website.
28 extern int local_server
;
29 extern int whole_file
;
30 extern int read_batch
;
31 extern int checksum_seed
;
32 extern int protocol_version
;
33 extern int proper_seed_order
;
34 extern char *checksum_choice
;
37 #define CSUM_MD4_ARCHAIC 1
38 #define CSUM_MD4_BUSTED 2
39 #define CSUM_MD4_OLD 3
44 #define CSUM_SAW_BUFLEN 10
49 } valid_checksums
[] = {
51 { CSUM_XXHASH
, "xxhash" },
55 { CSUM_NONE
, "none" },
59 #define MAX_CHECKSUM_LIST 1024
61 int xfersum_type
= 0; /* used for the file transfer checksums */
62 int checksum_type
= 0; /* used for the pre-transfer (--checksum) checksums */
63 const char *negotiated_csum_name
= NULL
;
65 static int parse_csum_name(const char *name
, int len
, int allow_auto
)
67 struct csum_struct
*cs
;
72 if (!name
|| (allow_auto
&& len
== 4 && strncasecmp(name
, "auto", 4) == 0)) {
73 if (protocol_version
>= 30)
75 if (protocol_version
>= 27)
77 if (protocol_version
>= 21)
78 return CSUM_MD4_BUSTED
;
79 return CSUM_MD4_ARCHAIC
;
82 for (cs
= valid_checksums
; cs
->name
; cs
++) {
83 if (strncasecmp(name
, cs
->name
, len
) == 0 && cs
->name
[len
] == '\0')
88 rprintf(FERROR
, "unknown checksum name: %s\n", name
);
89 exit_cleanup(RERR_UNSUPPORTED
);
95 static const char *checksum_name(int num
)
97 struct csum_struct
*cs
;
99 for (cs
= valid_checksums
; cs
->name
; cs
++) {
110 void parse_checksum_choice(int final_call
)
112 if (!negotiated_csum_name
) {
113 char *cp
= checksum_choice
? strchr(checksum_choice
, ',') : NULL
;
115 xfersum_type
= parse_csum_name(checksum_choice
, cp
- checksum_choice
, 1);
116 checksum_type
= parse_csum_name(cp
+1, -1, 1);
118 xfersum_type
= checksum_type
= parse_csum_name(checksum_choice
, -1, 1);
121 if (xfersum_type
== CSUM_NONE
)
124 if (final_call
&& DEBUG_GTE(CSUM
, 1)) {
125 if (negotiated_csum_name
)
126 rprintf(FINFO
, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name
);
127 else if (xfersum_type
== checksum_type
) {
128 rprintf(FINFO
, "[%s] %s checksum: %s\n", who_am_i(),
129 checksum_choice
? "chosen" : "protocol-based",
130 checksum_name(xfersum_type
));
132 rprintf(FINFO
, "[%s] chosen transfer checksum: %s\n",
133 who_am_i(), checksum_name(xfersum_type
));
134 rprintf(FINFO
, "[%s] chosen pre-transfer checksum: %s\n",
135 who_am_i(), checksum_name(checksum_type
));
140 static int parse_checksum_list(const char *from
, char *sumbuf
, int sumbuf_len
, char *saw
)
142 char *to
= sumbuf
, *tok
= NULL
;
145 memset(saw
, 0, CSUM_SAW_BUFLEN
);
148 if (*from
== ' ' || !*from
) {
150 int sum_type
= parse_csum_name(tok
, to
- tok
, 0);
151 if (sum_type
>= 0 && !saw
[sum_type
])
152 saw
[sum_type
] = ++cnt
;
154 to
= tok
- (tok
!= sumbuf
);
166 if (to
- sumbuf
>= sumbuf_len
- 1) {
167 to
= tok
- (tok
!= sumbuf
);
177 void negotiate_checksum(int f_in
, int f_out
, const char *csum_list
, int saw_fail
)
179 char *tok
, sumbuf
[MAX_CHECKSUM_LIST
], saw
[CSUM_SAW_BUFLEN
];
182 /* Simplify the user-provided string so that it contains valid
183 * checksum names without any duplicates. The client side also
184 * makes use of the saw values when scanning the server's list. */
185 if (csum_list
&& *csum_list
&& (!am_server
|| local_server
)) {
186 len
= parse_checksum_list(csum_list
, sumbuf
, sizeof sumbuf
, saw
);
187 if (saw_fail
&& !len
)
188 len
= strlcpy(sumbuf
, "FAIL", sizeof sumbuf
);
193 if (!csum_list
|| !*csum_list
) {
194 struct csum_struct
*cs
;
195 for (tok
= sumbuf
, cs
= valid_checksums
, len
= 0; cs
->name
; cs
++) {
196 if (cs
->num
== CSUM_NONE
)
200 tok
+= strlcpy(tok
, cs
->name
, sizeof sumbuf
- (tok
- sumbuf
));
201 saw
[cs
->num
] = ++len
;
207 /* Each side sends their list of valid checksum names to the other side and
208 * then both sides pick the first name in the client's list that is also in
209 * the server's list. */
211 write_vstring(f_out
, sumbuf
, len
);
213 if (!local_server
|| read_batch
)
214 len
= read_vstring(f_in
, sumbuf
, sizeof sumbuf
);
217 int best
= CSUM_SAW_BUFLEN
; /* We want best == 1 from the client list */
219 memset(saw
, 1, CSUM_SAW_BUFLEN
); /* The first client's choice is the best choice */
220 for (tok
= strtok(sumbuf
, " \t"); tok
; tok
= strtok(NULL
, " \t")) {
221 sum_type
= parse_csum_name(tok
, -1, 0);
222 if (sum_type
< 0 || !saw
[sum_type
] || best
< saw
[sum_type
])
224 xfersum_type
= checksum_type
= sum_type
;
225 negotiated_csum_name
= tok
;
226 best
= saw
[sum_type
];
230 if (negotiated_csum_name
) {
231 negotiated_csum_name
= strdup(negotiated_csum_name
);
238 rprintf(FERROR
, "Failed to negotiate a common checksum\n");
239 exit_cleanup(RERR_UNSUPPORTED
);
242 int csum_len_for_type(int cst
, BOOL flist_csum
)
247 case CSUM_MD4_ARCHAIC
:
248 /* The oldest checksum code is rather weird: the file-list code only sent
249 * 2-byte checksums, but all other checksums were full MD4 length. */
250 return flist_csum
? 2 : MD4_DIGEST_LEN
;
253 case CSUM_MD4_BUSTED
:
254 return MD4_DIGEST_LEN
;
256 return MD5_DIGEST_LEN
;
257 #ifdef SUPPORT_XXHASH
259 return sizeof (XXH64_hash_t
);
261 default: /* paranoia to prevent missing case values */
262 exit_cleanup(RERR_UNSUPPORTED
);
267 int canonical_checksum(int csum_type
)
269 return csum_type
>= CSUM_MD4
? 1 : 0;
272 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
274 a simple 32 bit checksum that can be updated from either end
275 (inspired by Mark Adler's Adler-32 checksum)
277 uint32
get_checksum1(char *buf1
, int32 len
)
281 schar
*buf
= (schar
*)buf1
;
284 for (i
= 0; i
< (len
-4); i
+=4) {
285 s2
+= 4*(s1
+ buf
[i
]) + 3*buf
[i
+1] + 2*buf
[i
+2] + buf
[i
+3] + 10*CHAR_OFFSET
;
286 s1
+= (buf
[i
+0] + buf
[i
+1] + buf
[i
+2] + buf
[i
+3] + 4*CHAR_OFFSET
);
288 for (; i
< len
; i
++) {
289 s1
+= (buf
[i
]+CHAR_OFFSET
); s2
+= s1
;
291 return (s1
& 0xffff) + (s2
<< 16);
295 void get_checksum2(char *buf
, int32 len
, char *sum
)
299 switch (xfersum_type
) {
303 if (proper_seed_order
) {
305 SIVALu(seedbuf
, 0, checksum_seed
);
306 md5_update(&m
, seedbuf
, 4);
308 md5_update(&m
, (uchar
*)buf
, len
);
310 md5_update(&m
, (uchar
*)buf
, len
);
312 SIVALu(seedbuf
, 0, checksum_seed
);
313 md5_update(&m
, seedbuf
, 4);
316 md5_result(&m
, (uchar
*)sum
);
321 case CSUM_MD4_BUSTED
:
322 case CSUM_MD4_ARCHAIC
: {
332 buf1
= new_array(char, len
+4);
335 out_of_memory("get_checksum2");
338 memcpy(buf1
, buf
, len
);
340 SIVAL(buf1
,len
,checksum_seed
);
344 for (i
= 0; i
+ CSUM_CHUNK
<= len
; i
+= CSUM_CHUNK
)
345 mdfour_update(&m
, (uchar
*)(buf1
+i
), CSUM_CHUNK
);
348 * Prior to version 27 an incorrect MD4 checksum was computed
349 * by failing to call mdfour_tail() for block sizes that
350 * are multiples of 64. This is fixed by calling mdfour_update()
351 * even when there are no more bytes.
353 if (len
- i
> 0 || xfersum_type
> CSUM_MD4_BUSTED
)
354 mdfour_update(&m
, (uchar
*)(buf1
+i
), len
-i
);
356 mdfour_result(&m
, (uchar
*)sum
);
359 #ifdef SUPPORT_XXHASH
361 SIVAL64(sum
, 0, XXH64(buf
, len
, checksum_seed
));
364 default: /* paranoia to prevent missing case values */
365 exit_cleanup(RERR_UNSUPPORTED
);
369 void file_checksum(const char *fname
, const STRUCT_STAT
*st_p
, char *sum
)
371 struct map_struct
*buf
;
372 OFF_T i
, len
= st_p
->st_size
;
377 memset(sum
, 0, MAX_DIGEST_LEN
);
379 fd
= do_open(fname
, O_RDONLY
, 0);
383 buf
= map_file(fd
, len
, MAX_MAP_SIZE
, CSUM_CHUNK
);
385 switch (checksum_type
) {
389 for (i
= 0; i
+ CSUM_CHUNK
<= len
; i
+= CSUM_CHUNK
) {
390 md5_update(&m
, (uchar
*)map_ptr(buf
, i
, CSUM_CHUNK
),
394 remainder
= (int32
)(len
- i
);
396 md5_update(&m
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
398 md5_result(&m
, (uchar
*)sum
);
402 case CSUM_MD4_BUSTED
:
403 case CSUM_MD4_ARCHAIC
:
406 for (i
= 0; i
+ CSUM_CHUNK
<= len
; i
+= CSUM_CHUNK
) {
407 mdfour_update(&m
, (uchar
*)map_ptr(buf
, i
, CSUM_CHUNK
), CSUM_CHUNK
);
410 /* Prior to version 27 an incorrect MD4 checksum was computed
411 * by failing to call mdfour_tail() for block sizes that
412 * are multiples of 64. This is fixed by calling mdfour_update()
413 * even when there are no more bytes. */
414 remainder
= (int32
)(len
- i
);
415 if (remainder
> 0 || checksum_type
> CSUM_MD4_BUSTED
)
416 mdfour_update(&m
, (uchar
*)map_ptr(buf
, i
, remainder
), remainder
);
418 mdfour_result(&m
, (uchar
*)sum
);
420 #ifdef SUPPORT_XXHASH
422 XXH64_state_t
* state
= XXH64_createState();
424 out_of_memory("file_checksum xx64");
426 if (XXH64_reset(state
, 0) == XXH_ERROR
) {
427 rprintf(FERROR
, "error resetting XXH64 seed");
428 exit_cleanup(RERR_STREAMIO
);
431 for (i
= 0; i
+ CSUM_CHUNK
<= len
; i
+= CSUM_CHUNK
) {
432 XXH_errorcode
const updateResult
=
433 XXH64_update(state
, (uchar
*)map_ptr(buf
, i
, CSUM_CHUNK
), CSUM_CHUNK
);
434 if (updateResult
== XXH_ERROR
) {
435 rprintf(FERROR
, "error computing XX64 hash");
436 exit_cleanup(RERR_STREAMIO
);
439 remainder
= (int32
)(len
- i
);
441 XXH64_update(state
, (uchar
*)map_ptr(buf
, i
, CSUM_CHUNK
), remainder
);
442 SIVAL64(sum
, 0, XXH64_digest(state
));
444 XXH64_freeState(state
);
449 rprintf(FERROR
, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type
);
450 exit_cleanup(RERR_UNSUPPORTED
);
457 static int32 sumresidue
;
458 static md_context md
;
459 static int cursum_type
;
460 #ifdef SUPPORT_XXHASH
461 XXH64_state_t
* xxh64_state
= NULL
;
464 void sum_init(int csum_type
, int seed
)
469 csum_type
= parse_csum_name(NULL
, 0, 1);
470 cursum_type
= csum_type
;
481 case CSUM_MD4_BUSTED
:
482 case CSUM_MD4_ARCHAIC
:
488 #ifdef SUPPORT_XXHASH
490 if (xxh64_state
== NULL
) {
491 xxh64_state
= XXH64_createState();
492 if (xxh64_state
== NULL
)
493 out_of_memory("sum_init xxh64");
495 if (XXH64_reset(xxh64_state
, 0) == XXH_ERROR
) {
496 rprintf(FERROR
, "error resetting XXH64 state");
497 exit_cleanup(RERR_STREAMIO
);
503 default: /* paranoia to prevent missing case values */
504 exit_cleanup(RERR_UNSUPPORTED
);
509 * Feed data into an MD4 accumulator, md. The results may be
510 * retrieved using sum_end(). md is used for different purposes at
511 * different points during execution.
513 * @todo Perhaps get rid of md and just pass in the address each time.
514 * Very slightly clearer and slower.
516 void sum_update(const char *p
, int32 len
)
518 switch (cursum_type
) {
520 md5_update(&md
, (uchar
*)p
, len
);
524 case CSUM_MD4_BUSTED
:
525 case CSUM_MD4_ARCHAIC
:
526 if (len
+ sumresidue
< CSUM_CHUNK
) {
527 memcpy(md
.buffer
+ sumresidue
, p
, len
);
533 int32 i
= CSUM_CHUNK
- sumresidue
;
534 memcpy(md
.buffer
+ sumresidue
, p
, i
);
535 mdfour_update(&md
, (uchar
*)md
.buffer
, CSUM_CHUNK
);
540 while (len
>= CSUM_CHUNK
) {
541 mdfour_update(&md
, (uchar
*)p
, CSUM_CHUNK
);
548 memcpy(md
.buffer
, p
, sumresidue
);
550 #ifdef SUPPORT_XXHASH
552 if (XXH64_update(xxh64_state
, p
, len
) == XXH_ERROR
) {
553 rprintf(FERROR
, "error computing XX64 hash");
554 exit_cleanup(RERR_STREAMIO
);
560 default: /* paranoia to prevent missing case values */
561 exit_cleanup(RERR_UNSUPPORTED
);
565 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
566 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
567 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
568 * into the "sum" buffer. */
569 int sum_end(char *sum
)
571 switch (cursum_type
) {
573 md5_result(&md
, (uchar
*)sum
);
577 mdfour_update(&md
, (uchar
*)md
.buffer
, sumresidue
);
578 mdfour_result(&md
, (uchar
*)sum
);
580 case CSUM_MD4_BUSTED
:
581 case CSUM_MD4_ARCHAIC
:
583 mdfour_update(&md
, (uchar
*)md
.buffer
, sumresidue
);
584 mdfour_result(&md
, (uchar
*)sum
);
586 #ifdef SUPPORT_XXHASH
588 SIVAL64(sum
, 0, XXH64_digest(xxh64_state
));
594 default: /* paranoia to prevent missing case values */
595 exit_cleanup(RERR_UNSUPPORTED
);
598 return csum_len_for_type(cursum_type
, 0);