1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * include/asm-generic/xor.h
5 * Generic optimized RAID-5 checksumming functions.
8 #include <linux/prefetch.h>
11 xor_8regs_2(unsigned long bytes
, unsigned long * __restrict p1
,
12 const unsigned long * __restrict p2
)
14 long lines
= bytes
/ (sizeof (long)) / 8;
27 } while (--lines
> 0);
31 xor_8regs_3(unsigned long bytes
, unsigned long * __restrict p1
,
32 const unsigned long * __restrict p2
,
33 const unsigned long * __restrict p3
)
35 long lines
= bytes
/ (sizeof (long)) / 8;
38 p1
[0] ^= p2
[0] ^ p3
[0];
39 p1
[1] ^= p2
[1] ^ p3
[1];
40 p1
[2] ^= p2
[2] ^ p3
[2];
41 p1
[3] ^= p2
[3] ^ p3
[3];
42 p1
[4] ^= p2
[4] ^ p3
[4];
43 p1
[5] ^= p2
[5] ^ p3
[5];
44 p1
[6] ^= p2
[6] ^ p3
[6];
45 p1
[7] ^= p2
[7] ^ p3
[7];
49 } while (--lines
> 0);
53 xor_8regs_4(unsigned long bytes
, unsigned long * __restrict p1
,
54 const unsigned long * __restrict p2
,
55 const unsigned long * __restrict p3
,
56 const unsigned long * __restrict p4
)
58 long lines
= bytes
/ (sizeof (long)) / 8;
61 p1
[0] ^= p2
[0] ^ p3
[0] ^ p4
[0];
62 p1
[1] ^= p2
[1] ^ p3
[1] ^ p4
[1];
63 p1
[2] ^= p2
[2] ^ p3
[2] ^ p4
[2];
64 p1
[3] ^= p2
[3] ^ p3
[3] ^ p4
[3];
65 p1
[4] ^= p2
[4] ^ p3
[4] ^ p4
[4];
66 p1
[5] ^= p2
[5] ^ p3
[5] ^ p4
[5];
67 p1
[6] ^= p2
[6] ^ p3
[6] ^ p4
[6];
68 p1
[7] ^= p2
[7] ^ p3
[7] ^ p4
[7];
73 } while (--lines
> 0);
77 xor_8regs_5(unsigned long bytes
, unsigned long * __restrict p1
,
78 const unsigned long * __restrict p2
,
79 const unsigned long * __restrict p3
,
80 const unsigned long * __restrict p4
,
81 const unsigned long * __restrict p5
)
83 long lines
= bytes
/ (sizeof (long)) / 8;
86 p1
[0] ^= p2
[0] ^ p3
[0] ^ p4
[0] ^ p5
[0];
87 p1
[1] ^= p2
[1] ^ p3
[1] ^ p4
[1] ^ p5
[1];
88 p1
[2] ^= p2
[2] ^ p3
[2] ^ p4
[2] ^ p5
[2];
89 p1
[3] ^= p2
[3] ^ p3
[3] ^ p4
[3] ^ p5
[3];
90 p1
[4] ^= p2
[4] ^ p3
[4] ^ p4
[4] ^ p5
[4];
91 p1
[5] ^= p2
[5] ^ p3
[5] ^ p4
[5] ^ p5
[5];
92 p1
[6] ^= p2
[6] ^ p3
[6] ^ p4
[6] ^ p5
[6];
93 p1
[7] ^= p2
[7] ^ p3
[7] ^ p4
[7] ^ p5
[7];
99 } while (--lines
> 0);
103 xor_32regs_2(unsigned long bytes
, unsigned long * __restrict p1
,
104 const unsigned long * __restrict p2
)
106 long lines
= bytes
/ (sizeof (long)) / 8;
109 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
110 d0
= p1
[0]; /* Pull the stuff into registers */
111 d1
= p1
[1]; /* ... in bursts, if possible. */
126 p1
[0] = d0
; /* Store the result (in bursts) */
136 } while (--lines
> 0);
140 xor_32regs_3(unsigned long bytes
, unsigned long * __restrict p1
,
141 const unsigned long * __restrict p2
,
142 const unsigned long * __restrict p3
)
144 long lines
= bytes
/ (sizeof (long)) / 8;
147 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
148 d0
= p1
[0]; /* Pull the stuff into registers */
149 d1
= p1
[1]; /* ... in bursts, if possible. */
172 p1
[0] = d0
; /* Store the result (in bursts) */
183 } while (--lines
> 0);
187 xor_32regs_4(unsigned long bytes
, unsigned long * __restrict p1
,
188 const unsigned long * __restrict p2
,
189 const unsigned long * __restrict p3
,
190 const unsigned long * __restrict p4
)
192 long lines
= bytes
/ (sizeof (long)) / 8;
195 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
196 d0
= p1
[0]; /* Pull the stuff into registers */
197 d1
= p1
[1]; /* ... in bursts, if possible. */
228 p1
[0] = d0
; /* Store the result (in bursts) */
240 } while (--lines
> 0);
244 xor_32regs_5(unsigned long bytes
, unsigned long * __restrict p1
,
245 const unsigned long * __restrict p2
,
246 const unsigned long * __restrict p3
,
247 const unsigned long * __restrict p4
,
248 const unsigned long * __restrict p5
)
250 long lines
= bytes
/ (sizeof (long)) / 8;
253 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
254 d0
= p1
[0]; /* Pull the stuff into registers */
255 d1
= p1
[1]; /* ... in bursts, if possible. */
294 p1
[0] = d0
; /* Store the result (in bursts) */
307 } while (--lines
> 0);
311 xor_8regs_p_2(unsigned long bytes
, unsigned long * __restrict p1
,
312 const unsigned long * __restrict p2
)
314 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
332 } while (--lines
> 0);
338 xor_8regs_p_3(unsigned long bytes
, unsigned long * __restrict p1
,
339 const unsigned long * __restrict p2
,
340 const unsigned long * __restrict p3
)
342 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
352 p1
[0] ^= p2
[0] ^ p3
[0];
353 p1
[1] ^= p2
[1] ^ p3
[1];
354 p1
[2] ^= p2
[2] ^ p3
[2];
355 p1
[3] ^= p2
[3] ^ p3
[3];
356 p1
[4] ^= p2
[4] ^ p3
[4];
357 p1
[5] ^= p2
[5] ^ p3
[5];
358 p1
[6] ^= p2
[6] ^ p3
[6];
359 p1
[7] ^= p2
[7] ^ p3
[7];
363 } while (--lines
> 0);
369 xor_8regs_p_4(unsigned long bytes
, unsigned long * __restrict p1
,
370 const unsigned long * __restrict p2
,
371 const unsigned long * __restrict p3
,
372 const unsigned long * __restrict p4
)
374 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
387 p1
[0] ^= p2
[0] ^ p3
[0] ^ p4
[0];
388 p1
[1] ^= p2
[1] ^ p3
[1] ^ p4
[1];
389 p1
[2] ^= p2
[2] ^ p3
[2] ^ p4
[2];
390 p1
[3] ^= p2
[3] ^ p3
[3] ^ p4
[3];
391 p1
[4] ^= p2
[4] ^ p3
[4] ^ p4
[4];
392 p1
[5] ^= p2
[5] ^ p3
[5] ^ p4
[5];
393 p1
[6] ^= p2
[6] ^ p3
[6] ^ p4
[6];
394 p1
[7] ^= p2
[7] ^ p3
[7] ^ p4
[7];
399 } while (--lines
> 0);
405 xor_8regs_p_5(unsigned long bytes
, unsigned long * __restrict p1
,
406 const unsigned long * __restrict p2
,
407 const unsigned long * __restrict p3
,
408 const unsigned long * __restrict p4
,
409 const unsigned long * __restrict p5
)
411 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
426 p1
[0] ^= p2
[0] ^ p3
[0] ^ p4
[0] ^ p5
[0];
427 p1
[1] ^= p2
[1] ^ p3
[1] ^ p4
[1] ^ p5
[1];
428 p1
[2] ^= p2
[2] ^ p3
[2] ^ p4
[2] ^ p5
[2];
429 p1
[3] ^= p2
[3] ^ p3
[3] ^ p4
[3] ^ p5
[3];
430 p1
[4] ^= p2
[4] ^ p3
[4] ^ p4
[4] ^ p5
[4];
431 p1
[5] ^= p2
[5] ^ p3
[5] ^ p4
[5] ^ p5
[5];
432 p1
[6] ^= p2
[6] ^ p3
[6] ^ p4
[6] ^ p5
[6];
433 p1
[7] ^= p2
[7] ^ p3
[7] ^ p4
[7] ^ p5
[7];
439 } while (--lines
> 0);
445 xor_32regs_p_2(unsigned long bytes
, unsigned long * __restrict p1
,
446 const unsigned long * __restrict p2
)
448 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
454 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
459 d0
= p1
[0]; /* Pull the stuff into registers */
460 d1
= p1
[1]; /* ... in bursts, if possible. */
475 p1
[0] = d0
; /* Store the result (in bursts) */
485 } while (--lines
> 0);
491 xor_32regs_p_3(unsigned long bytes
, unsigned long * __restrict p1
,
492 const unsigned long * __restrict p2
,
493 const unsigned long * __restrict p3
)
495 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
502 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
508 d0
= p1
[0]; /* Pull the stuff into registers */
509 d1
= p1
[1]; /* ... in bursts, if possible. */
532 p1
[0] = d0
; /* Store the result (in bursts) */
543 } while (--lines
> 0);
549 xor_32regs_p_4(unsigned long bytes
, unsigned long * __restrict p1
,
550 const unsigned long * __restrict p2
,
551 const unsigned long * __restrict p3
,
552 const unsigned long * __restrict p4
)
554 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
562 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
569 d0
= p1
[0]; /* Pull the stuff into registers */
570 d1
= p1
[1]; /* ... in bursts, if possible. */
601 p1
[0] = d0
; /* Store the result (in bursts) */
613 } while (--lines
> 0);
619 xor_32regs_p_5(unsigned long bytes
, unsigned long * __restrict p1
,
620 const unsigned long * __restrict p2
,
621 const unsigned long * __restrict p3
,
622 const unsigned long * __restrict p4
,
623 const unsigned long * __restrict p5
)
625 long lines
= bytes
/ (sizeof (long)) / 8 - 1;
634 register long d0
, d1
, d2
, d3
, d4
, d5
, d6
, d7
;
642 d0
= p1
[0]; /* Pull the stuff into registers */
643 d1
= p1
[1]; /* ... in bursts, if possible. */
682 p1
[0] = d0
; /* Store the result (in bursts) */
695 } while (--lines
> 0);
700 static struct xor_block_template xor_block_8regs
= {
708 static struct xor_block_template xor_block_32regs
= {
710 .do_2
= xor_32regs_2
,
711 .do_3
= xor_32regs_3
,
712 .do_4
= xor_32regs_4
,
713 .do_5
= xor_32regs_5
,
716 static struct xor_block_template xor_block_8regs_p __maybe_unused
= {
717 .name
= "8regs_prefetch",
718 .do_2
= xor_8regs_p_2
,
719 .do_3
= xor_8regs_p_3
,
720 .do_4
= xor_8regs_p_4
,
721 .do_5
= xor_8regs_p_5
,
724 static struct xor_block_template xor_block_32regs_p __maybe_unused
= {
725 .name
= "32regs_prefetch",
726 .do_2
= xor_32regs_p_2
,
727 .do_3
= xor_32regs_p_3
,
728 .do_4
= xor_32regs_p_4
,
729 .do_5
= xor_32regs_p_5
,
732 #define XOR_TRY_TEMPLATES \
734 xor_speed(&xor_block_8regs); \
735 xor_speed(&xor_block_8regs_p); \
736 xor_speed(&xor_block_32regs); \
737 xor_speed(&xor_block_32regs_p); \