1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* -*- linux-c -*- ------------------------------------------------------- *
4 * Copyright (C) 2012 Intel Corporation
5 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
7 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
9 * ----------------------------------------------------------------------- */
12 * AVX2 implementation of RAID-6 syndrome functions
18 #include <linux/raid/pq.h>
21 static const struct raid6_avx2_constants
{
23 } raid6_avx2_constants
__aligned(32) = {
24 { 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,
25 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,},
28 static int raid6_have_avx2(void)
30 return boot_cpu_has(X86_FEATURE_AVX2
) && boot_cpu_has(X86_FEATURE_AVX
);
34 * Plain AVX2 implementation
36 static void raid6_avx21_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
38 u8
**dptr
= (u8
**)ptrs
;
42 z0
= disks
- 3; /* Highest data disk */
43 p
= dptr
[z0
+1]; /* XOR parity */
44 q
= dptr
[z0
+2]; /* RS syndrome */
48 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
49 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
51 for (d
= 0; d
< bytes
; d
+= 32) {
52 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
]));
53 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr
[z0
][d
]));/* P[0] */
54 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
-1][d
]));
55 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
56 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr
[z0
-1][d
]));
57 for (z
= z0
-2; z
>= 0; z
--) {
58 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
59 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
60 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
61 asm volatile("vpand %ymm0,%ymm5,%ymm5");
62 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
63 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
64 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
65 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr
[z
][d
]));
67 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
68 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
69 asm volatile("vpand %ymm0,%ymm5,%ymm5");
70 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
71 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
72 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
74 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
75 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
76 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
77 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
80 asm volatile("sfence" : : : "memory");
84 static void raid6_avx21_xor_syndrome(int disks
, int start
, int stop
,
85 size_t bytes
, void **ptrs
)
87 u8
**dptr
= (u8
**)ptrs
;
91 z0
= stop
; /* P/Q right side optimization */
92 p
= dptr
[disks
-2]; /* XOR parity */
93 q
= dptr
[disks
-1]; /* RS syndrome */
97 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
99 for (d
= 0 ; d
< bytes
; d
+= 32) {
100 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
101 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
102 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
104 for (z
= z0
-1 ; z
>= start
; z
--) {
105 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
106 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
107 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
108 asm volatile("vpand %ymm0,%ymm5,%ymm5");
109 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
110 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
111 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
112 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
114 /* P/Q left side optimization */
115 for (z
= start
-1 ; z
>= 0 ; z
--) {
116 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
117 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
118 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
119 asm volatile("vpand %ymm0,%ymm5,%ymm5");
120 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
122 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
123 /* Don't use movntdq for r/w memory area < cache line */
124 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q
[d
]));
125 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p
[d
]));
128 asm volatile("sfence" : : : "memory");
132 const struct raid6_calls raid6_avx2x1
= {
133 raid6_avx21_gen_syndrome
,
134 raid6_avx21_xor_syndrome
,
137 1 /* Has cache hints */
141 * Unrolled-by-2 AVX2 implementation
143 static void raid6_avx22_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
145 u8
**dptr
= (u8
**)ptrs
;
149 z0
= disks
- 3; /* Highest data disk */
150 p
= dptr
[z0
+1]; /* XOR parity */
151 q
= dptr
[z0
+2]; /* RS syndrome */
155 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
156 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
158 /* We uniformly assume a single prefetch covers at least 32 bytes */
159 for (d
= 0; d
< bytes
; d
+= 64) {
160 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
]));
161 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
+32]));
162 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr
[z0
][d
]));/* P[0] */
163 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr
[z0
][d
+32]));/* P[1] */
164 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
165 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
166 for (z
= z0
-1; z
>= 0; z
--) {
167 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
168 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+32]));
169 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
170 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
171 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
172 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
173 asm volatile("vpand %ymm0,%ymm5,%ymm5");
174 asm volatile("vpand %ymm0,%ymm7,%ymm7");
175 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
176 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
177 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr
[z
][d
]));
178 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr
[z
][d
+32]));
179 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
180 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
181 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
182 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
184 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
185 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
186 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
187 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
190 asm volatile("sfence" : : : "memory");
194 static void raid6_avx22_xor_syndrome(int disks
, int start
, int stop
,
195 size_t bytes
, void **ptrs
)
197 u8
**dptr
= (u8
**)ptrs
;
201 z0
= stop
; /* P/Q right side optimization */
202 p
= dptr
[disks
-2]; /* XOR parity */
203 q
= dptr
[disks
-1]; /* RS syndrome */
207 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
209 for (d
= 0 ; d
< bytes
; d
+= 64) {
210 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
211 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr
[z0
][d
+32]));
212 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
213 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p
[d
+32]));
214 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
215 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
217 for (z
= z0
-1 ; z
>= start
; z
--) {
218 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
219 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
220 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
221 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
222 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
223 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
224 asm volatile("vpand %ymm0,%ymm5,%ymm5");
225 asm volatile("vpand %ymm0,%ymm7,%ymm7");
226 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
227 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
228 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
229 asm volatile("vmovdqa %0,%%ymm7"
230 :: "m" (dptr
[z
][d
+32]));
231 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
232 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
233 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
234 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
236 /* P/Q left side optimization */
237 for (z
= start
-1 ; z
>= 0 ; z
--) {
238 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
239 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
240 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
241 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
242 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
243 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
244 asm volatile("vpand %ymm0,%ymm5,%ymm5");
245 asm volatile("vpand %ymm0,%ymm7,%ymm7");
246 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
247 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
249 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
250 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q
[d
+32]));
251 /* Don't use movntdq for r/w memory area < cache line */
252 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q
[d
]));
253 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q
[d
+32]));
254 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p
[d
]));
255 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p
[d
+32]));
258 asm volatile("sfence" : : : "memory");
262 const struct raid6_calls raid6_avx2x2
= {
263 raid6_avx22_gen_syndrome
,
264 raid6_avx22_xor_syndrome
,
267 1 /* Has cache hints */
273 * Unrolled-by-4 AVX2 implementation
275 static void raid6_avx24_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
277 u8
**dptr
= (u8
**)ptrs
;
281 z0
= disks
- 3; /* Highest data disk */
282 p
= dptr
[z0
+1]; /* XOR parity */
283 q
= dptr
[z0
+2]; /* RS syndrome */
287 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
288 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
289 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
290 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
291 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
292 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
293 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
294 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
295 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
296 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
298 for (d
= 0; d
< bytes
; d
+= 128) {
299 for (z
= z0
; z
>= 0; z
--) {
300 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
301 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+32]));
302 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+64]));
303 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+96]));
304 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
305 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
306 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
307 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
308 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
309 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
310 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
311 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
312 asm volatile("vpand %ymm0,%ymm5,%ymm5");
313 asm volatile("vpand %ymm0,%ymm7,%ymm7");
314 asm volatile("vpand %ymm0,%ymm13,%ymm13");
315 asm volatile("vpand %ymm0,%ymm15,%ymm15");
316 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
317 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
318 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
319 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
320 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr
[z
][d
]));
321 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr
[z
][d
+32]));
322 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr
[z
][d
+64]));
323 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr
[z
][d
+96]));
324 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
325 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
326 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
327 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
328 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
329 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
330 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
331 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
333 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
334 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
335 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
336 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
337 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p
[d
+64]));
338 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
339 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p
[d
+96]));
340 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
341 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
342 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
343 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
344 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
345 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q
[d
+64]));
346 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
347 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q
[d
+96]));
348 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
351 asm volatile("sfence" : : : "memory");
355 static void raid6_avx24_xor_syndrome(int disks
, int start
, int stop
,
356 size_t bytes
, void **ptrs
)
358 u8
**dptr
= (u8
**)ptrs
;
362 z0
= stop
; /* P/Q right side optimization */
363 p
= dptr
[disks
-2]; /* XOR parity */
364 q
= dptr
[disks
-1]; /* RS syndrome */
368 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants
.x1d
[0]));
370 for (d
= 0 ; d
< bytes
; d
+= 128) {
371 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
372 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr
[z0
][d
+32]));
373 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr
[z0
][d
+64]));
374 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr
[z0
][d
+96]));
375 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
376 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p
[d
+32]));
377 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p
[d
+64]));
378 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p
[d
+96]));
379 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
380 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
381 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
382 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
384 for (z
= z0
-1 ; z
>= start
; z
--) {
385 asm volatile("prefetchnta %0" :: "m" (dptr
[z
][d
]));
386 asm volatile("prefetchnta %0" :: "m" (dptr
[z
][d
+64]));
387 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
388 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
389 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
390 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
391 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
392 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
393 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
394 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
395 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
396 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
397 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
398 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
399 asm volatile("vpand %ymm0,%ymm5,%ymm5");
400 asm volatile("vpand %ymm0,%ymm7,%ymm7");
401 asm volatile("vpand %ymm0,%ymm13,%ymm13");
402 asm volatile("vpand %ymm0,%ymm15,%ymm15");
403 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
404 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
405 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
406 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
407 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
408 asm volatile("vmovdqa %0,%%ymm7"
409 :: "m" (dptr
[z
][d
+32]));
410 asm volatile("vmovdqa %0,%%ymm13"
411 :: "m" (dptr
[z
][d
+64]));
412 asm volatile("vmovdqa %0,%%ymm15"
413 :: "m" (dptr
[z
][d
+96]));
414 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
415 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
416 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
417 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
418 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
419 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
420 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
421 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
423 asm volatile("prefetchnta %0" :: "m" (q
[d
]));
424 asm volatile("prefetchnta %0" :: "m" (q
[d
+64]));
425 /* P/Q left side optimization */
426 for (z
= start
-1 ; z
>= 0 ; z
--) {
427 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
428 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
429 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
430 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
431 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
432 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
433 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
434 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
435 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
436 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
437 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
438 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
439 asm volatile("vpand %ymm0,%ymm5,%ymm5");
440 asm volatile("vpand %ymm0,%ymm7,%ymm7");
441 asm volatile("vpand %ymm0,%ymm13,%ymm13");
442 asm volatile("vpand %ymm0,%ymm15,%ymm15");
443 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
444 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
445 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
446 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
448 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
449 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
450 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p
[d
+64]));
451 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p
[d
+96]));
452 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
453 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q
[d
+32]));
454 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q
[d
+64]));
455 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q
[d
+96]));
456 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
457 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
458 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q
[d
+64]));
459 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q
[d
+96]));
461 asm volatile("sfence" : : : "memory");
465 const struct raid6_calls raid6_avx2x4
= {
466 raid6_avx24_gen_syndrome
,
467 raid6_avx24_xor_syndrome
,
470 1 /* Has cache hints */
474 #endif /* CONFIG_AS_AVX2 */