1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
7 * IP/TCP/UDP checksumming routines
9 * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
10 * Optimized by Joe Taylor
13 #include <linux/errno.h>
14 #include <linux/linkage.h>
15 #include <asm/asmmacro.h>
19 * computes a partial checksum, e.g. for TCP/UDP fragments
23 * unsigned int csum_partial(const unsigned char *buf, int len,
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
33 #define ONES_ADD(sum, val) \
35 bgeu sum, val, 99f ; \
43 * Experiments with Ethernet and SLIP connections show that buf
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
105 slli a6, a6, 8 /* load byte into bits 8..15 */
112 /* uncommon case, buf is 2-byte aligned */
114 beqz a3, 7b /* branch if len == 0 */
115 beqi a3, 1, 6b /* branch if len == 1 */
118 bnez a5, 8f /* branch if 1-byte aligned */
120 l16ui a6, a2, 0 /* common case, len >= 2 */
122 addi a2, a2, 2 /* adjust buf */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
127 * This case is dog slow, so don't give us an odd address.
128 * (I don't think this ever happens, but just in case.)
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
140 l8ui a6, a2, 0 /* bits 24..31 */
141 l16ui a7, a2, 1 /* bits 8..23 */
142 l8ui a8, a2, 3 /* bits 0.. 8 */
153 #if !XCHAL_HAVE_LOOPS
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
169 j 5b /* branch to handle the remaining byte */
171 ENDPROC(csum_partial)
174 * Copy from ds while checksumming, otherwise like csum_partial
178 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
187 This function is optimized for 4-byte aligned addresses. Other
188 alignments work, but not nearly as efficiently.
191 ENTRY(csum_partial_copy_generic)
197 /* We optimize the following alignment tests for the 4-byte
198 aligned case. Two bbsi.l instructions might seem more optimal
199 (commented out below). However, both labels 5: and 3: are out
200 of the imm8 range, so the assembler relaxes them into
201 equivalent bbci.l, j combinations, which is actually
205 beqz a9, 1f /* branch if both are 4-byte aligned */
206 bbsi.l a10, 0, 5f /* branch if one address is odd */
207 j 3f /* one address is 2-byte aligned */
209 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
210 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
213 /* src and dst are both 4-byte aligned */
214 srli a10, a4, 5 /* 32-byte chunks */
220 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
223 EX(10f) l32i a9, a2, 0
224 EX(10f) l32i a8, a2, 4
225 EX(10f) s32i a9, a3, 0
226 EX(10f) s32i a8, a3, 4
229 EX(10f) l32i a9, a2, 8
230 EX(10f) l32i a8, a2, 12
231 EX(10f) s32i a9, a3, 8
232 EX(10f) s32i a8, a3, 12
235 EX(10f) l32i a9, a2, 16
236 EX(10f) l32i a8, a2, 20
237 EX(10f) s32i a9, a3, 16
238 EX(10f) s32i a8, a3, 20
241 EX(10f) l32i a9, a2, 24
242 EX(10f) l32i a8, a2, 28
243 EX(10f) s32i a9, a3, 24
244 EX(10f) s32i a8, a3, 28
249 #if !XCHAL_HAVE_LOOPS
253 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
254 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
260 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
263 EX(10f) l32i a9, a2, 0
264 EX(10f) s32i a9, a3, 0
268 #if !XCHAL_HAVE_LOOPS
273 Control comes to here in two cases: (1) It may fall through
274 to here from the 4-byte alignment case to process, at most,
275 one 2-byte chunk. (2) It branches to here from above if
276 either src or dst is 2-byte aligned, and we process all bytes
277 here, except for perhaps a trailing odd byte. It's
278 inefficient, so align your addresses to 4-byte boundaries.
285 srli a10, a4, 1 /* 2-byte chunks */
291 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
294 EX(10f) l16ui a9, a2, 0
295 EX(10f) s16i a9, a3, 0
299 #if !XCHAL_HAVE_LOOPS
303 /* This section processes a possible trailing odd byte. */
304 _bbci.l a4, 0, 8f /* 1-byte chunk */
305 EX(10f) l8ui a9, a2, 0
306 EX(10f) s8i a9, a3, 0
308 slli a9, a9, 8 /* shift byte to bits 8..15 */
316 /* Control branch to here when either src or dst is odd. We
317 process all bytes using 8-bit accesses. Grossly inefficient,
318 so don't feed us an odd address. */
320 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
326 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
329 EX(10f) l8ui a9, a2, 0
330 EX(10f) l8ui a8, a2, 1
331 EX(10f) s8i a9, a3, 0
332 EX(10f) s8i a8, a3, 1
334 slli a9, a9, 8 /* combine into a single 16-bit value */
335 #else /* for checksum computation */
342 #if !XCHAL_HAVE_LOOPS
346 j 4b /* process the possible trailing odd byte */
348 ENDPROC(csum_partial_copy_generic)
352 .section .fixup, "ax"