1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
5 * Template for XOR operations, instantiated in xor_simd.c.
7 * Expected preprocessor definitions:
11 * - LD_INOUT_LINE(buf)
12 * - LD_AND_XOR_LINE(buf)
16 void XOR_FUNC_NAME(2)(unsigned long bytes
,
17 unsigned long * __restrict v1
,
18 const unsigned long * __restrict v2
)
20 unsigned long lines
= bytes
/ LINE_WIDTH
;
23 __asm__
__volatile__ (
27 : : [v1
] "r"(v1
), [v2
] "r"(v2
) : "memory"
30 v1
+= LINE_WIDTH
/ sizeof(unsigned long);
31 v2
+= LINE_WIDTH
/ sizeof(unsigned long);
32 } while (--lines
> 0);
35 void XOR_FUNC_NAME(3)(unsigned long bytes
,
36 unsigned long * __restrict v1
,
37 const unsigned long * __restrict v2
,
38 const unsigned long * __restrict v3
)
40 unsigned long lines
= bytes
/ LINE_WIDTH
;
43 __asm__
__volatile__ (
48 : : [v1
] "r"(v1
), [v2
] "r"(v2
), [v3
] "r"(v3
) : "memory"
51 v1
+= LINE_WIDTH
/ sizeof(unsigned long);
52 v2
+= LINE_WIDTH
/ sizeof(unsigned long);
53 v3
+= LINE_WIDTH
/ sizeof(unsigned long);
54 } while (--lines
> 0);
57 void XOR_FUNC_NAME(4)(unsigned long bytes
,
58 unsigned long * __restrict v1
,
59 const unsigned long * __restrict v2
,
60 const unsigned long * __restrict v3
,
61 const unsigned long * __restrict v4
)
63 unsigned long lines
= bytes
/ LINE_WIDTH
;
66 __asm__
__volatile__ (
72 : : [v1
] "r"(v1
), [v2
] "r"(v2
), [v3
] "r"(v3
), [v4
] "r"(v4
)
76 v1
+= LINE_WIDTH
/ sizeof(unsigned long);
77 v2
+= LINE_WIDTH
/ sizeof(unsigned long);
78 v3
+= LINE_WIDTH
/ sizeof(unsigned long);
79 v4
+= LINE_WIDTH
/ sizeof(unsigned long);
80 } while (--lines
> 0);
83 void XOR_FUNC_NAME(5)(unsigned long bytes
,
84 unsigned long * __restrict v1
,
85 const unsigned long * __restrict v2
,
86 const unsigned long * __restrict v3
,
87 const unsigned long * __restrict v4
,
88 const unsigned long * __restrict v5
)
90 unsigned long lines
= bytes
/ LINE_WIDTH
;
93 __asm__
__volatile__ (
100 : : [v1
] "r"(v1
), [v2
] "r"(v2
), [v3
] "r"(v3
), [v4
] "r"(v4
),
101 [v5
] "r"(v5
) : "memory"
104 v1
+= LINE_WIDTH
/ sizeof(unsigned long);
105 v2
+= LINE_WIDTH
/ sizeof(unsigned long);
106 v3
+= LINE_WIDTH
/ sizeof(unsigned long);
107 v4
+= LINE_WIDTH
/ sizeof(unsigned long);
108 v5
+= LINE_WIDTH
/ sizeof(unsigned long);
109 } while (--lines
> 0);