1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* csum_copy.S: Checksum+copy code for sparc64
4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
7 #include <asm/export.h>
10 #define GLOBAL_SPARE %g7
12 #define GLOBAL_SPARE %g5
24 #define EX_RETVAL(x) x
28 #define LOAD(type,addr,dest) type [addr], dest
32 #define STORE(type,src,addr) type src, [addr]
36 #define FUNC_NAME csum_partial_copy_nocheck
39 .register %g2, #scratch
40 .register %g3, #scratch
45 /* We checked for zero length already, so there must be
50 EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
53 EX_ST(STORE(stb, %o4, %o1 + 0x00))
55 1: andcc %o0, 0x2, %g0
60 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
63 EX_ST(STORE(sth, %o5, %o1 + 0x00))
69 .type FUNC_NAME,#function
70 EXPORT_SYMBOL(FUNC_NAME)
71 FUNC_NAME: /* %o0=src, %o1=dst, %o2=len, %o3=sum */
72 LOAD(prefetch, %o0 + 0x000, #n_reads)
77 LOAD(prefetch, %o0 + 0x040, #n_reads)
82 /* We "remember" whether the lowest bit in the address
83 * was set in GLOBAL_SPARE. Because if it is, we have to swap
84 * upper and lower 8 bit fields of the sum we calculate.
87 andcc %o0, 0x1, GLOBAL_SPARE
90 LOAD(prefetch, %o0 + 0x080, #n_reads)
93 LOAD(prefetch, %o0 + 0x0c0, #n_reads)
96 LOAD(prefetch, %o0 + 0x100, #n_reads)
98 /* So that we don't need to use the non-pairing
99 * add-with-carry instructions we accumulate 32-bit
100 * values into a 64-bit register. At the end of the
101 * loop we fold it down to 32-bits and so on.
104 LOAD(prefetch, %o0 + 0x140, #n_reads)
107 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
108 EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
109 EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
111 EX_ST(STORE(stw, %o5, %o1 + 0x00))
112 EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
114 EX_ST(STORE(stw, %g1, %o1 + 0x04))
115 EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
117 EX_ST(STORE(stw, %g2, %o1 + 0x08))
118 EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
120 EX_ST(STORE(stw, %o5, %o1 + 0x0c))
121 EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
123 EX_ST(STORE(stw, %g1, %o1 + 0x10))
124 EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
126 EX_ST(STORE(stw, %g2, %o1 + 0x14))
127 EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
129 EX_ST(STORE(stw, %o5, %o1 + 0x18))
130 EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
132 EX_ST(STORE(stw, %g1, %o1 + 0x1c))
133 EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
135 EX_ST(STORE(stw, %g2, %o1 + 0x20))
136 EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
138 EX_ST(STORE(stw, %o5, %o1 + 0x24))
139 EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
141 EX_ST(STORE(stw, %g1, %o1 + 0x28))
142 EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
144 EX_ST(STORE(stw, %g2, %o1 + 0x2c))
145 EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
147 EX_ST(STORE(stw, %o5, %o1 + 0x30))
148 EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
150 EX_ST(STORE(stw, %g1, %o1 + 0x34))
151 LOAD(prefetch, %o0 + 0x180, #n_reads)
153 EX_ST(STORE(stw, %g2, %o1 + 0x38))
157 EX_ST(STORE(stw, %o5, %o1 + 0x3c))
161 2: and %o2, 0x3c, %g3
164 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
168 EX_ST(STORE(stw, %o5, %o1 + 0x00))
182 sethi %hi(0xffff0000), %g1
191 /* %o4 has the 16-bit sum we have calculated so-far. */
195 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
199 EX_ST(STORE(sth, %o5, %o1 + 0x00))
203 EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
206 EX_ST(STORE(stb, %o5, %o1 + 0x00))
212 sethi %hi(0xffff0000), %g1
220 1: brz,pt GLOBAL_SPARE, 1f
223 /* We started with an odd byte, byte-swap the result. */
229 1: addcc %o3, %o4, %o3
236 95: mov 0, GLOBAL_SPARE
242 EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
244 EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
252 EX_LD(LOAD(lduh, %o0, %o4))
256 EX_ST(STORE(stb, %g2, %o1))
257 add %o4, GLOBAL_SPARE, GLOBAL_SPARE
258 EX_ST(STORE(stb, %o4, %o1 + 1))
264 EX_LD(LOAD(lduw, %o0, %o4))
267 EX_ST(STORE(stb, %g2, %o1))
269 EX_ST(STORE(stb, %g3, %o1 + 1))
271 EX_ST(STORE(stb, %g2, %o1 + 2))
272 addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
273 EX_ST(STORE(stb, %o4, %o1 + 3))
274 addc GLOBAL_SPARE, %g0, GLOBAL_SPARE
278 EX_LD(LOAD(lduw, %o0, %o4))
279 sll GLOBAL_SPARE, 16, %g2
280 srl GLOBAL_SPARE, 16, GLOBAL_SPARE
283 add %g2, GLOBAL_SPARE, GLOBAL_SPARE
286 EX_LD(LOAD(lduh, %o0, %o4))
290 EX_ST(STORE(stb, %g2, %o1))
291 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
292 EX_ST(STORE(stb, %o4, %o1 + 1))
295 sll GLOBAL_SPARE, 16, %o4
296 EX_LD(LOAD(ldub, %o0, %g2))
298 EX_ST(STORE(stb, %g2, %o1))
299 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
300 sll GLOBAL_SPARE, 16, %o4
301 1: addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
302 srl GLOBAL_SPARE, 16, %o4
303 addc %g0, %o4, GLOBAL_SPARE
305 srl GLOBAL_SPARE, 8, %o4
306 and GLOBAL_SPARE, 0xff, %g2
309 or %g2, %o4, GLOBAL_SPARE
310 4: addcc %o3, GLOBAL_SPARE, %o3
314 .size FUNC_NAME, .-FUNC_NAME