1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
7 * IP/TCP/UDP checksumming routines
9 * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
10 * Optimized by Joe Taylor
13 #include <linux/errno.h>
14 #include <linux/linkage.h>
15 #include <asm/asmmacro.h>
19 * computes a partial checksum, e.g. for TCP/UDP fragments
23 * unsigned int csum_partial(const unsigned char *buf, int len,
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
33 #define ONES_ADD(sum, val) \
35 bgeu sum, val, 99f ; \
43 * Experiments with Ethernet and SLIP connections show that buf
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
105 slli a6, a6, 8 /* load byte into bits 8..15 */
112 /* uncommon case, buf is 2-byte aligned */
114 beqz a3, 7b /* branch if len == 0 */
115 beqi a3, 1, 6b /* branch if len == 1 */
118 bnez a5, 8f /* branch if 1-byte aligned */
120 l16ui a6, a2, 0 /* common case, len >= 2 */
122 addi a2, a2, 2 /* adjust buf */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
127 * This case is dog slow, so don't give us an odd address.
128 * (I don't think this ever happens, but just in case.)
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
140 l8ui a6, a2, 0 /* bits 24..31 */
141 l16ui a7, a2, 1 /* bits 8..23 */
142 l8ui a8, a2, 3 /* bits 0.. 8 */
153 #if !XCHAL_HAVE_LOOPS
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
169 j 5b /* branch to handle the remaining byte */
171 ENDPROC(csum_partial)
172 EXPORT_SYMBOL(csum_partial)
175 * Copy from ds while checksumming, otherwise like csum_partial
179 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
188 This function is optimized for 4-byte aligned addresses. Other
189 alignments work, but not nearly as efficiently.
192 ENTRY(csum_partial_copy_generic)
198 /* We optimize the following alignment tests for the 4-byte
199 aligned case. Two bbsi.l instructions might seem more optimal
200 (commented out below). However, both labels 5: and 3: are out
201 of the imm8 range, so the assembler relaxes them into
202 equivalent bbci.l, j combinations, which is actually
206 beqz a9, 1f /* branch if both are 4-byte aligned */
207 bbsi.l a10, 0, 5f /* branch if one address is odd */
208 j 3f /* one address is 2-byte aligned */
210 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
211 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
214 /* src and dst are both 4-byte aligned */
215 srli a10, a4, 5 /* 32-byte chunks */
221 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
224 EX(10f) l32i a9, a2, 0
225 EX(10f) l32i a8, a2, 4
226 EX(10f) s32i a9, a3, 0
227 EX(10f) s32i a8, a3, 4
230 EX(10f) l32i a9, a2, 8
231 EX(10f) l32i a8, a2, 12
232 EX(10f) s32i a9, a3, 8
233 EX(10f) s32i a8, a3, 12
236 EX(10f) l32i a9, a2, 16
237 EX(10f) l32i a8, a2, 20
238 EX(10f) s32i a9, a3, 16
239 EX(10f) s32i a8, a3, 20
242 EX(10f) l32i a9, a2, 24
243 EX(10f) l32i a8, a2, 28
244 EX(10f) s32i a9, a3, 24
245 EX(10f) s32i a8, a3, 28
250 #if !XCHAL_HAVE_LOOPS
254 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
255 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
261 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
264 EX(10f) l32i a9, a2, 0
265 EX(10f) s32i a9, a3, 0
269 #if !XCHAL_HAVE_LOOPS
274 Control comes to here in two cases: (1) It may fall through
275 to here from the 4-byte alignment case to process, at most,
276 one 2-byte chunk. (2) It branches to here from above if
277 either src or dst is 2-byte aligned, and we process all bytes
278 here, except for perhaps a trailing odd byte. It's
279 inefficient, so align your addresses to 4-byte boundaries.
286 srli a10, a4, 1 /* 2-byte chunks */
292 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
295 EX(10f) l16ui a9, a2, 0
296 EX(10f) s16i a9, a3, 0
300 #if !XCHAL_HAVE_LOOPS
304 /* This section processes a possible trailing odd byte. */
305 _bbci.l a4, 0, 8f /* 1-byte chunk */
306 EX(10f) l8ui a9, a2, 0
307 EX(10f) s8i a9, a3, 0
309 slli a9, a9, 8 /* shift byte to bits 8..15 */
317 /* Control branch to here when either src or dst is odd. We
318 process all bytes using 8-bit accesses. Grossly inefficient,
319 so don't feed us an odd address. */
321 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
327 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
330 EX(10f) l8ui a9, a2, 0
331 EX(10f) l8ui a8, a2, 1
332 EX(10f) s8i a9, a3, 0
333 EX(10f) s8i a8, a3, 1
335 slli a9, a9, 8 /* combine into a single 16-bit value */
336 #else /* for checksum computation */
343 #if !XCHAL_HAVE_LOOPS
347 j 4b /* process the possible trailing odd byte */
349 ENDPROC(csum_partial_copy_generic)
350 EXPORT_SYMBOL(csum_partial_copy_generic)
354 .section .fixup, "ax"