2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IP/TCP/UDP checksumming routines
8 * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
9 * Optimized by Joe Taylor
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
17 #include <linux/errno.h>
18 #include <linux/linkage.h>
19 #include <variant/core.h>
20 #include <asm/asmmacro.h>
23 * computes a partial checksum, e.g. for TCP/UDP fragments
27 * unsigned int csum_partial(const unsigned char *buf, int len,
33 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
36 /* ONES_ADD converts twos-complement math to ones-complement. */
37 #define ONES_ADD(sum, val) \
39 bgeu sum, val, 99f ; \
47 * Experiments with Ethernet and SLIP connections show that buf
48 * is aligned on either a 2-byte or 4-byte boundary.
52 bnez a5, 8f /* branch if 2-byte aligned */
53 /* Fall-through on common case, 4-byte alignment */
55 srli a5, a3, 5 /* 32-byte chunks */
61 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
85 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
91 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
101 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
106 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
109 slli a6, a6, 8 /* load byte into bits 8..15 */
116 /* uncommon case, buf is 2-byte aligned */
118 beqz a3, 7b /* branch if len == 0 */
119 beqi a3, 1, 6b /* branch if len == 1 */
122 bnez a5, 8f /* branch if 1-byte aligned */
124 l16ui a6, a2, 0 /* common case, len >= 2 */
126 addi a2, a2, 2 /* adjust buf */
127 addi a3, a3, -2 /* adjust len */
128 j 1b /* now buf is 4-byte aligned */
130 /* case: odd-byte aligned, len > 1
131 * This case is dog slow, so don't give us an odd address.
132 * (I don't think this ever happens, but just in case.)
135 srli a5, a3, 2 /* 4-byte chunks */
141 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
144 l8ui a6, a2, 0 /* bits 24..31 */
145 l16ui a7, a2, 1 /* bits 8..23 */
146 l8ui a8, a2, 3 /* bits 0.. 8 */
157 #if !XCHAL_HAVE_LOOPS
161 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
173 j 5b /* branch to handle the remaining byte */
175 ENDPROC(csum_partial)
178 * Copy from ds while checksumming, otherwise like csum_partial
182 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
183 int sum, int *src_err_ptr, int *dst_err_ptr)
193 a11 = original len for exception handling
194 a12 = original dst for exception handling
196 This function is optimized for 4-byte aligned addresses. Other
197 alignments work, but not nearly as efficiently.
200 ENTRY(csum_partial_copy_generic)
207 /* We optimize the following alignment tests for the 4-byte
208 aligned case. Two bbsi.l instructions might seem more optimal
209 (commented out below). However, both labels 5: and 3: are out
210 of the imm8 range, so the assembler relaxes them into
211 equivalent bbci.l, j combinations, which is actually
215 beqz a9, 1f /* branch if both are 4-byte aligned */
216 bbsi.l a10, 0, 5f /* branch if one address is odd */
217 j 3f /* one address is 2-byte aligned */
219 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
220 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
223 /* src and dst are both 4-byte aligned */
224 srli a10, a4, 5 /* 32-byte chunks */
230 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
233 EX(10f) l32i a9, a2, 0
234 EX(10f) l32i a8, a2, 4
235 EX(11f) s32i a9, a3, 0
236 EX(11f) s32i a8, a3, 4
239 EX(10f) l32i a9, a2, 8
240 EX(10f) l32i a8, a2, 12
241 EX(11f) s32i a9, a3, 8
242 EX(11f) s32i a8, a3, 12
245 EX(10f) l32i a9, a2, 16
246 EX(10f) l32i a8, a2, 20
247 EX(11f) s32i a9, a3, 16
248 EX(11f) s32i a8, a3, 20
251 EX(10f) l32i a9, a2, 24
252 EX(10f) l32i a8, a2, 28
253 EX(11f) s32i a9, a3, 24
254 EX(11f) s32i a8, a3, 28
259 #if !XCHAL_HAVE_LOOPS
263 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
264 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
270 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
273 EX(10f) l32i a9, a2, 0
274 EX(11f) s32i a9, a3, 0
278 #if !XCHAL_HAVE_LOOPS
283 Control comes to here in two cases: (1) It may fall through
284 to here from the 4-byte alignment case to process, at most,
285 one 2-byte chunk. (2) It branches to here from above if
286 either src or dst is 2-byte aligned, and we process all bytes
287 here, except for perhaps a trailing odd byte. It's
288 inefficient, so align your addresses to 4-byte boundaries.
295 srli a10, a4, 1 /* 2-byte chunks */
301 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
304 EX(10f) l16ui a9, a2, 0
305 EX(11f) s16i a9, a3, 0
309 #if !XCHAL_HAVE_LOOPS
313 /* This section processes a possible trailing odd byte. */
314 _bbci.l a4, 0, 8f /* 1-byte chunk */
315 EX(10f) l8ui a9, a2, 0
316 EX(11f) s8i a9, a3, 0
318 slli a9, a9, 8 /* shift byte to bits 8..15 */
326 /* Control branch to here when either src or dst is odd. We
327 process all bytes using 8-bit accesses. Grossly inefficient,
328 so don't feed us an odd address. */
330 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
336 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
339 EX(10f) l8ui a9, a2, 0
340 EX(10f) l8ui a8, a2, 1
341 EX(11f) s8i a9, a3, 0
342 EX(11f) s8i a8, a3, 1
344 slli a9, a9, 8 /* combine into a single 16-bit value */
345 #else /* for checksum computation */
352 #if !XCHAL_HAVE_LOOPS
356 j 4b /* process the possible trailing odd byte */
358 ENDPROC(csum_partial_copy_generic)
362 .section .fixup, "ax"
366 a11 = original len for exception handling
367 a12 = original dst for exception handling
372 s32i a2, a6, 0 /* src_err_ptr */
374 # clear the complete destination - computing the rest
381 add a11, a11, a12 /* a11 = ending address */
386 #if !XCHAL_HAVE_LOOPS
387 blt a12, a11, .Leloop
394 s32i a2, a7, 0 /* dst_err_ptr */