1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _ASM_X86_XOR_AVX_H
3 #define _ASM_X86_XOR_AVX_H
6 * Optimized RAID-5 checksumming functions for AVX
8 * Copyright (C) 2012 Intel Corporation
9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
16 #include <linux/compiler.h>
17 #include <asm/fpu/api.h>
21 BLOCK(32 * (i + 1), 1) \
22 BLOCK(32 * (i + 2), 2) \
23 BLOCK(32 * (i + 3), 3)
31 static void xor_avx_2(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
)
33 unsigned long lines
= bytes
>> 9;
39 #define BLOCK(i, reg) \
41 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
42 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
43 "m" (p0[i / sizeof(*p0)])); \
44 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
45 "=m" (p0[i / sizeof(*p0)])); \
50 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
51 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
57 static void xor_avx_3(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
60 unsigned long lines
= bytes
>> 9;
66 #define BLOCK(i, reg) \
68 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
69 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
70 "m" (p1[i / sizeof(*p1)])); \
71 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
72 "m" (p0[i / sizeof(*p0)])); \
73 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
74 "=m" (p0[i / sizeof(*p0)])); \
79 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
80 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
81 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
87 static void xor_avx_4(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
88 unsigned long *p2
, unsigned long *p3
)
90 unsigned long lines
= bytes
>> 9;
96 #define BLOCK(i, reg) \
98 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
99 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
100 "m" (p2[i / sizeof(*p2)])); \
101 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
102 "m" (p1[i / sizeof(*p1)])); \
103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104 "m" (p0[i / sizeof(*p0)])); \
105 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
106 "=m" (p0[i / sizeof(*p0)])); \
111 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
112 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
113 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
114 p3
= (unsigned long *)((uintptr_t)p3
+ 512);
120 static void xor_avx_5(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
121 unsigned long *p2
, unsigned long *p3
, unsigned long *p4
)
123 unsigned long lines
= bytes
>> 9;
129 #define BLOCK(i, reg) \
131 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
132 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
133 "m" (p3[i / sizeof(*p3)])); \
134 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
135 "m" (p2[i / sizeof(*p2)])); \
136 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
137 "m" (p1[i / sizeof(*p1)])); \
138 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
139 "m" (p0[i / sizeof(*p0)])); \
140 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
141 "=m" (p0[i / sizeof(*p0)])); \
146 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
147 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
148 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
149 p3
= (unsigned long *)((uintptr_t)p3
+ 512);
150 p4
= (unsigned long *)((uintptr_t)p4
+ 512);
156 static struct xor_block_template xor_block_avx
= {
164 #define AVX_XOR_SPEED \
166 if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
167 xor_speed(&xor_block_avx); \
170 #define AVX_SELECT(FASTEST) \
171 (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
175 #define AVX_XOR_SPEED {}
177 #define AVX_SELECT(FASTEST) (FASTEST)