1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
7 * IP/TCP/UDP checksumming routines
9 * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
10 * Optimized by Joe Taylor
13 #include <linux/errno.h>
14 #include <linux/linkage.h>
15 #include <asm/asmmacro.h>
19 * computes a partial checksum, e.g. for TCP/UDP fragments
23 * unsigned int csum_partial(const unsigned char *buf, int len,
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
33 #define ONES_ADD(sum, val) \
35 bgeu sum, val, 99f ; \
43 * Experiments with Ethernet and SLIP connections show that buf
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
105 slli a6, a6, 8 /* load byte into bits 8..15 */
112 /* uncommon case, buf is 2-byte aligned */
114 beqz a3, 7b /* branch if len == 0 */
115 beqi a3, 1, 6b /* branch if len == 1 */
118 bnez a5, 8f /* branch if 1-byte aligned */
120 l16ui a6, a2, 0 /* common case, len >= 2 */
122 addi a2, a2, 2 /* adjust buf */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
127 * This case is dog slow, so don't give us an odd address.
128 * (I don't think this ever happens, but just in case.)
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
140 l8ui a6, a2, 0 /* bits 24..31 */
141 l16ui a7, a2, 1 /* bits 8..23 */
142 l8ui a8, a2, 3 /* bits 0.. 8 */
153 #if !XCHAL_HAVE_LOOPS
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
169 j 5b /* branch to handle the remaining byte */
171 ENDPROC(csum_partial)
174 * Copy from ds while checksumming, otherwise like csum_partial
178 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
179 int sum, int *src_err_ptr, int *dst_err_ptr)
189 a11 = original len for exception handling
190 a12 = original dst for exception handling
192 This function is optimized for 4-byte aligned addresses. Other
193 alignments work, but not nearly as efficiently.
196 ENTRY(csum_partial_copy_generic)
203 /* We optimize the following alignment tests for the 4-byte
204 aligned case. Two bbsi.l instructions might seem more optimal
205 (commented out below). However, both labels 5: and 3: are out
206 of the imm8 range, so the assembler relaxes them into
207 equivalent bbci.l, j combinations, which is actually
211 beqz a9, 1f /* branch if both are 4-byte aligned */
212 bbsi.l a10, 0, 5f /* branch if one address is odd */
213 j 3f /* one address is 2-byte aligned */
215 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
216 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
219 /* src and dst are both 4-byte aligned */
220 srli a10, a4, 5 /* 32-byte chunks */
226 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
229 EX(10f) l32i a9, a2, 0
230 EX(10f) l32i a8, a2, 4
231 EX(11f) s32i a9, a3, 0
232 EX(11f) s32i a8, a3, 4
235 EX(10f) l32i a9, a2, 8
236 EX(10f) l32i a8, a2, 12
237 EX(11f) s32i a9, a3, 8
238 EX(11f) s32i a8, a3, 12
241 EX(10f) l32i a9, a2, 16
242 EX(10f) l32i a8, a2, 20
243 EX(11f) s32i a9, a3, 16
244 EX(11f) s32i a8, a3, 20
247 EX(10f) l32i a9, a2, 24
248 EX(10f) l32i a8, a2, 28
249 EX(11f) s32i a9, a3, 24
250 EX(11f) s32i a8, a3, 28
255 #if !XCHAL_HAVE_LOOPS
259 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
260 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
266 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
269 EX(10f) l32i a9, a2, 0
270 EX(11f) s32i a9, a3, 0
274 #if !XCHAL_HAVE_LOOPS
279 Control comes to here in two cases: (1) It may fall through
280 to here from the 4-byte alignment case to process, at most,
281 one 2-byte chunk. (2) It branches to here from above if
282 either src or dst is 2-byte aligned, and we process all bytes
283 here, except for perhaps a trailing odd byte. It's
284 inefficient, so align your addresses to 4-byte boundaries.
291 srli a10, a4, 1 /* 2-byte chunks */
297 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
300 EX(10f) l16ui a9, a2, 0
301 EX(11f) s16i a9, a3, 0
305 #if !XCHAL_HAVE_LOOPS
309 /* This section processes a possible trailing odd byte. */
310 _bbci.l a4, 0, 8f /* 1-byte chunk */
311 EX(10f) l8ui a9, a2, 0
312 EX(11f) s8i a9, a3, 0
314 slli a9, a9, 8 /* shift byte to bits 8..15 */
322 /* Control branch to here when either src or dst is odd. We
323 process all bytes using 8-bit accesses. Grossly inefficient,
324 so don't feed us an odd address. */
326 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
332 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
335 EX(10f) l8ui a9, a2, 0
336 EX(10f) l8ui a8, a2, 1
337 EX(11f) s8i a9, a3, 0
338 EX(11f) s8i a8, a3, 1
340 slli a9, a9, 8 /* combine into a single 16-bit value */
341 #else /* for checksum computation */
348 #if !XCHAL_HAVE_LOOPS
352 j 4b /* process the possible trailing odd byte */
354 ENDPROC(csum_partial_copy_generic)
358 .section .fixup, "ax"
362 a11 = original len for exception handling
363 a12 = original dst for exception handling
368 s32i a2, a6, 0 /* src_err_ptr */
370 # clear the complete destination - computing the rest
377 add a11, a11, a12 /* a11 = ending address */
382 #if !XCHAL_HAVE_LOOPS
383 blt a12, a11, .Leloop
390 s32i a2, a7, 0 /* dst_err_ptr */