1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* csum_copy.S: Checksum+copy code for sparc64
4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
7 #include <linux/export.h>
10 #define GLOBAL_SPARE %g7
12 #define GLOBAL_SPARE %g5
24 #define EX_RETVAL(x) x
28 #define LOAD(type,addr,dest) type [addr], dest
32 #define STORE(type,src,addr) type src, [addr]
36 #define FUNC_NAME csum_partial_copy_nocheck
39 .register %g2, #scratch
40 .register %g3, #scratch
45 /* We checked for zero length already, so there must be
50 EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
53 EX_ST(STORE(stb, %o4, %o1 + 0x00))
55 1: andcc %o0, 0x2, %g0
60 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
63 EX_ST(STORE(sth, %o5, %o1 + 0x00))
69 .type FUNC_NAME,#function
70 EXPORT_SYMBOL(FUNC_NAME)
71 FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */
72 LOAD(prefetch, %o0 + 0x000, #n_reads)
78 LOAD(prefetch, %o0 + 0x040, #n_reads)
83 /* We "remember" whether the lowest bit in the address
84 * was set in GLOBAL_SPARE. Because if it is, we have to swap
85 * upper and lower 8 bit fields of the sum we calculate.
88 andcc %o0, 0x1, GLOBAL_SPARE
91 LOAD(prefetch, %o0 + 0x080, #n_reads)
94 LOAD(prefetch, %o0 + 0x0c0, #n_reads)
97 LOAD(prefetch, %o0 + 0x100, #n_reads)
99 /* So that we don't need to use the non-pairing
100 * add-with-carry instructions we accumulate 32-bit
101 * values into a 64-bit register. At the end of the
102 * loop we fold it down to 32-bits and so on.
105 LOAD(prefetch, %o0 + 0x140, #n_reads)
108 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
109 EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
110 EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
112 EX_ST(STORE(stw, %o5, %o1 + 0x00))
113 EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
115 EX_ST(STORE(stw, %g1, %o1 + 0x04))
116 EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
118 EX_ST(STORE(stw, %g2, %o1 + 0x08))
119 EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
121 EX_ST(STORE(stw, %o5, %o1 + 0x0c))
122 EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
124 EX_ST(STORE(stw, %g1, %o1 + 0x10))
125 EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
127 EX_ST(STORE(stw, %g2, %o1 + 0x14))
128 EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
130 EX_ST(STORE(stw, %o5, %o1 + 0x18))
131 EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
133 EX_ST(STORE(stw, %g1, %o1 + 0x1c))
134 EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
136 EX_ST(STORE(stw, %g2, %o1 + 0x20))
137 EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
139 EX_ST(STORE(stw, %o5, %o1 + 0x24))
140 EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
142 EX_ST(STORE(stw, %g1, %o1 + 0x28))
143 EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
145 EX_ST(STORE(stw, %g2, %o1 + 0x2c))
146 EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
148 EX_ST(STORE(stw, %o5, %o1 + 0x30))
149 EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
151 EX_ST(STORE(stw, %g1, %o1 + 0x34))
152 LOAD(prefetch, %o0 + 0x180, #n_reads)
154 EX_ST(STORE(stw, %g2, %o1 + 0x38))
158 EX_ST(STORE(stw, %o5, %o1 + 0x3c))
162 2: and %o2, 0x3c, %g3
165 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
169 EX_ST(STORE(stw, %o5, %o1 + 0x00))
183 sethi %hi(0xffff0000), %g1
192 /* %o4 has the 16-bit sum we have calculated so-far. */
196 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
200 EX_ST(STORE(sth, %o5, %o1 + 0x00))
204 EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
207 EX_ST(STORE(stb, %o5, %o1 + 0x00))
213 sethi %hi(0xffff0000), %g1
221 1: brz,pt GLOBAL_SPARE, 1f
224 /* We started with an odd byte, byte-swap the result. */
230 1: addcc %o3, %o4, %o3
237 95: mov 0, GLOBAL_SPARE
243 EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
245 EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
253 EX_LD(LOAD(lduh, %o0, %o4))
257 EX_ST(STORE(stb, %g2, %o1))
258 add %o4, GLOBAL_SPARE, GLOBAL_SPARE
259 EX_ST(STORE(stb, %o4, %o1 + 1))
265 EX_LD(LOAD(lduw, %o0, %o4))
268 EX_ST(STORE(stb, %g2, %o1))
270 EX_ST(STORE(stb, %g3, %o1 + 1))
272 EX_ST(STORE(stb, %g2, %o1 + 2))
273 addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
274 EX_ST(STORE(stb, %o4, %o1 + 3))
275 addc GLOBAL_SPARE, %g0, GLOBAL_SPARE
279 EX_LD(LOAD(lduw, %o0, %o4))
280 sll GLOBAL_SPARE, 16, %g2
281 srl GLOBAL_SPARE, 16, GLOBAL_SPARE
284 add %g2, GLOBAL_SPARE, GLOBAL_SPARE
287 EX_LD(LOAD(lduh, %o0, %o4))
291 EX_ST(STORE(stb, %g2, %o1))
292 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
293 EX_ST(STORE(stb, %o4, %o1 + 1))
296 sll GLOBAL_SPARE, 16, %o4
297 EX_LD(LOAD(ldub, %o0, %g2))
299 EX_ST(STORE(stb, %g2, %o1))
300 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
301 sll GLOBAL_SPARE, 16, %o4
302 1: addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
303 srl GLOBAL_SPARE, 16, %o4
304 addc %g0, %o4, GLOBAL_SPARE
306 srl GLOBAL_SPARE, 8, %o4
307 and GLOBAL_SPARE, 0xff, %g2
310 or %g2, %o4, GLOBAL_SPARE
311 4: addcc %o3, GLOBAL_SPARE, %o3
315 .size FUNC_NAME, .-FUNC_NAME