1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* checksum.S: Sparc V9 optimized checksum code.
4 * Copyright(C) 1995 Linus Torvalds
5 * Copyright(C) 1995 Miguel de Icaza
6 * Copyright(C) 1996, 2000 David S. Miller
7 * Copyright(C) 1997 Jakub Jelinek
10 * Linux/Alpha checksum c-code
11 * Linux/ix86 inline checksum assembly
12 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
13 * David Mosberger-Tang for optimized reference c-code
14 * BSD4.4 portable checksum routine
17 #include <asm/export.h>
20 csum_partial_fix_alignment:
21 /* We checked for zero length already, so there must be
26 ldub [%o0 + 0x00], %o4
29 1: andcc %o0, 0x2, %g0
30 be,pn %icc, csum_partial_post_align
32 blu,pn %icc, csum_partial_end_cruft
34 lduh [%o0 + 0x00], %o5
37 ba,pt %xcc, csum_partial_post_align
42 .type csum_partial,#function
43 EXPORT_SYMBOL(csum_partial)
44 csum_partial: /* %o0=buff, %o1=len, %o2=sum */
45 prefetch [%o0 + 0x000], #n_reads
47 prefetch [%o0 + 0x040], #n_reads
48 brz,pn %o1, csum_partial_finish
51 /* We "remember" whether the lowest bit in the address
52 * was set in %g7. Because if it is, we have to swap
53 * upper and lower 8 bit fields of the sum we calculate.
55 bne,pn %icc, csum_partial_fix_alignment
58 csum_partial_post_align:
59 prefetch [%o0 + 0x080], #n_reads
62 prefetch [%o0 + 0x0c0], #n_reads
65 prefetch [%o0 + 0x100], #n_reads
67 /* So that we don't need to use the non-pairing
68 * add-with-carry instructions we accumulate 32-bit
69 * values into a 64-bit register. At the end of the
70 * loop we fold it down to 32-bits and so on.
72 prefetch [%o0 + 0x140], #n_reads
73 1: lduw [%o0 + 0x00], %o5
74 lduw [%o0 + 0x04], %g1
75 lduw [%o0 + 0x08], %g2
77 lduw [%o0 + 0x0c], %g3
79 lduw [%o0 + 0x10], %o5
81 lduw [%o0 + 0x14], %g1
83 lduw [%o0 + 0x18], %g2
85 lduw [%o0 + 0x1c], %g3
87 lduw [%o0 + 0x20], %o5
89 lduw [%o0 + 0x24], %g1
91 lduw [%o0 + 0x28], %g2
93 lduw [%o0 + 0x2c], %g3
95 lduw [%o0 + 0x30], %o5
97 lduw [%o0 + 0x34], %g1
99 lduw [%o0 + 0x38], %g2
101 lduw [%o0 + 0x3c], %g3
103 prefetch [%o0 + 0x180], #n_reads
110 2: and %o1, 0x3c, %o3
113 1: lduw [%o0 + 0x00], %o5
129 sethi %hi(0xffff0000), %g1
137 csum_partial_end_cruft:
138 /* %o4 has the 16-bit sum we have calculated so-far. */
142 lduh [%o0 + 0x00], %o5
148 ldub [%o0 + 0x00], %o5
155 sethi %hi(0xffff0000), %g1
166 /* We started with an odd byte, byte-swap the result. */
172 1: addcc %o2, %o4, %o2