1 /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
3 #include <commonlib/bsd/ipchksum.h>
5 /* See RFC 1071 for mathematical explanations of why we can first sum in a larger register and
6 then narrow down, why we don't need to worry about endianness, etc. */
7 uint16_t ipchksum(const void *data
, size_t size
)
9 const uint8_t *p1
= data
;
10 unsigned long wide_sum
= 0;
14 #if defined(__aarch64__)
15 size_t size16
= size
/ 16;
16 const uint64_t *p8
= data
;
18 unsigned long tmp1
, tmp2
;
21 "adds xzr, xzr, xzr\n\t" /* init carry flag for addition */
23 "ldp %[v1], %[v2], [%[p8]], #16\n\t"
24 "adcs %[wsum], %[wsum], %[v1]\n\t"
25 "adcs %[wsum], %[wsum], %[v2]\n\t"
26 "sub %[size16], %[size16], #1\n\t"
27 "cbnz %[size16], 1b\n\t"
28 "adcs %[wsum], %[wsum], xzr\n\t" /* use up last carry */
31 [wsum
] "+r" (wide_sum
),
33 [size16
] "+r" (size16
)
37 #elif defined(__i386__) || defined(__x86_64__)
38 size_t size8
= size
/ 8;
39 const uint64_t *p8
= data
;
44 "jecxz 2f\n\t" /* technically RCX on 64, but not gonna be that big */
45 "adc (%[p8]), %[wsum]\n\t"
47 "adc 4(%[p8]), %[wsum]\n\t"
49 "lea -1(%[size8]), %[size8]\n\t" /* Use LEA as a makeshift ADD that */
50 "lea 8(%[p8]), %[p8]\n\t" /* doesn't modify the carry flag. */
53 "setc %b[size8]\n\t" /* reuse size register to save last carry */
54 "add %[size8], %[wsum]\n\t"
55 : [wsum
] "+r" (wide_sum
),
57 [size8
] "+c" (size8
) /* put size in ECX so we can JECXZ */
60 #endif /* __i386__ || __x86_64__ */
63 sum
+= wide_sum
& 0xFFFF;
66 sum
= (sum
& 0xFFFF) + (sum
>> 16);
68 for (; i
< size
; i
++) {
74 /* Doing this unconditionally seems to be faster. */
75 sum
= (sum
& 0xFFFF) + (sum
>> 16);
78 return (uint16_t)~sum
;
81 uint16_t ipchksum_add(size_t offset
, uint16_t first
, uint16_t second
)
87 * Since the checksum is calculated in 16-bit chunks, if the offset at which
88 * the data covered by the second checksum would start (if both data streams
89 * came one after the other) is odd, that means the second stream starts in
90 * the middle of a 16-bit chunk. This means the second checksum is byte
91 * swapped compared to what we need it to be, and we must swap it back.
94 second
= (second
>> 8) | (second
<< 8);
96 uint32_t sum
= first
+ second
;
97 sum
= (sum
& 0xFFFF) + (sum
>> 16);
99 return (uint16_t)~sum
;