1 /* SPDX-License-Identifier: GPL-2.0-or-later */
6 * Optimized RAID-5 checksumming functions for SSE.
10 * Cache avoiding checksumming functions utilizing KNI instructions
11 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
16 * High-speed RAID5 checksumming functions utilizing SSE instructions.
17 * Copyright (C) 1998 Ingo Molnar.
21 * x86-64 changes / gcc fixes from Andi Kleen.
22 * Copyright 2002 Andi Kleen, SuSE Labs.
24 * This hasn't been optimized for the hammer yet, but there are likely
25 * no advantages to be gotten from x86-64 here anyways.
28 #include <asm/fpu/api.h>
31 /* reduce register pressure */
32 # define XOR_CONSTANT_CONSTRAINT "i"
34 # define XOR_CONSTANT_CONSTRAINT "re"
37 #define OFFS(x) "16*("#x")"
38 #define PF_OFFS(x) "256+16*("#x")"
39 #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
40 #define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
41 #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
42 #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
43 #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
44 #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
45 #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
46 #define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
47 #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
48 #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
49 #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
52 #define BLK64(pf, op, i) \
60 xor_sse_2(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
)
62 unsigned long lines
= bytes
>> 8;
98 " add %[inc], %[p1] ;\n"
99 " add %[inc], %[p2] ;\n"
102 : [cnt
] "+r" (lines
),
103 [p1
] "+r" (p1
), [p2
] "+r" (p2
)
104 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
111 xor_sse_2_pf64(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
)
113 unsigned long lines
= bytes
>> 8;
132 " add %[inc], %[p1] ;\n"
133 " add %[inc], %[p2] ;\n"
136 : [cnt
] "+r" (lines
),
137 [p1
] "+r" (p1
), [p2
] "+r" (p2
)
138 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
145 xor_sse_3(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
148 unsigned long lines
= bytes
>> 8;
190 " add %[inc], %[p1] ;\n"
191 " add %[inc], %[p2] ;\n"
192 " add %[inc], %[p3] ;\n"
195 : [cnt
] "+r" (lines
),
196 [p1
] "+r" (p1
), [p2
] "+r" (p2
), [p3
] "+r" (p3
)
197 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
204 xor_sse_3_pf64(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
207 unsigned long lines
= bytes
>> 8;
227 " add %[inc], %[p1] ;\n"
228 " add %[inc], %[p2] ;\n"
229 " add %[inc], %[p3] ;\n"
232 : [cnt
] "+r" (lines
),
233 [p1
] "+r" (p1
), [p2
] "+r" (p2
), [p3
] "+r" (p3
)
234 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
241 xor_sse_4(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
242 unsigned long *p3
, unsigned long *p4
)
244 unsigned long lines
= bytes
>> 8;
292 " add %[inc], %[p1] ;\n"
293 " add %[inc], %[p2] ;\n"
294 " add %[inc], %[p3] ;\n"
295 " add %[inc], %[p4] ;\n"
298 : [cnt
] "+r" (lines
), [p1
] "+r" (p1
),
299 [p2
] "+r" (p2
), [p3
] "+r" (p3
), [p4
] "+r" (p4
)
300 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
307 xor_sse_4_pf64(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
308 unsigned long *p3
, unsigned long *p4
)
310 unsigned long lines
= bytes
>> 8;
331 " add %[inc], %[p1] ;\n"
332 " add %[inc], %[p2] ;\n"
333 " add %[inc], %[p3] ;\n"
334 " add %[inc], %[p4] ;\n"
337 : [cnt
] "+r" (lines
), [p1
] "+r" (p1
),
338 [p2
] "+r" (p2
), [p3
] "+r" (p3
), [p4
] "+r" (p4
)
339 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
346 xor_sse_5(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
347 unsigned long *p3
, unsigned long *p4
, unsigned long *p5
)
349 unsigned long lines
= bytes
>> 8;
403 " add %[inc], %[p1] ;\n"
404 " add %[inc], %[p2] ;\n"
405 " add %[inc], %[p3] ;\n"
406 " add %[inc], %[p4] ;\n"
407 " add %[inc], %[p5] ;\n"
410 : [cnt
] "+r" (lines
), [p1
] "+r" (p1
), [p2
] "+r" (p2
),
411 [p3
] "+r" (p3
), [p4
] "+r" (p4
), [p5
] "+r" (p5
)
412 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
419 xor_sse_5_pf64(unsigned long bytes
, unsigned long *p1
, unsigned long *p2
,
420 unsigned long *p3
, unsigned long *p4
, unsigned long *p5
)
422 unsigned long lines
= bytes
>> 8;
444 " add %[inc], %[p1] ;\n"
445 " add %[inc], %[p2] ;\n"
446 " add %[inc], %[p3] ;\n"
447 " add %[inc], %[p4] ;\n"
448 " add %[inc], %[p5] ;\n"
451 : [cnt
] "+r" (lines
), [p1
] "+r" (p1
), [p2
] "+r" (p2
),
452 [p3
] "+r" (p3
), [p4
] "+r" (p4
), [p5
] "+r" (p5
)
453 : [inc
] XOR_CONSTANT_CONSTRAINT (256UL)
459 static struct xor_block_template xor_block_sse_pf64
= {
460 .name
= "prefetch64-sse",
461 .do_2
= xor_sse_2_pf64
,
462 .do_3
= xor_sse_3_pf64
,
463 .do_4
= xor_sse_4_pf64
,
464 .do_5
= xor_sse_5_pf64
,
477 #undef XOR_CONSTANT_CONSTRAINT
480 # include <asm/xor_32.h>
482 # include <asm/xor_64.h>
485 #define XOR_SELECT_TEMPLATE(FASTEST) \
488 #endif /* _ASM_X86_XOR_H */