1 /* -*- linux-c -*- ------------------------------------------------------- *
3 * Copyright (C) 2012 Intel Corporation
4 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
6 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
12 * Boston MA 02111-1307, USA; either version 2 of the License, or
13 * (at your option) any later version; incorporated herein by reference.
15 * ----------------------------------------------------------------------- */
18 * AVX2 implementation of RAID-6 syndrome functions
24 #include <linux/raid/pq.h>
27 static const struct raid6_avx2_constants
{
29 } raid6_avx2_constants
__aligned(32) = {
30 { 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,
31 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,},
34 static int raid6_have_avx2(void)
36 return boot_cpu_has(X86_FEATURE_AVX2
) && boot_cpu_has(X86_FEATURE_AVX
);
40 * Plain AVX2 implementation
42 static void raid6_avx21_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
44 u8
**dptr
= (u8
**)ptrs
;
48 z0
= disks
- 3; /* Highest data disk */
49 p
= dptr
[z0
+1]; /* XOR parity */
50 q
= dptr
[z0
+2]; /* RS syndrome */
54 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
55 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
57 for (d
= 0; d
< bytes
; d
+= 32) {
58 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
]));
59 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr
[z0
][d
]));/* P[0] */
60 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
-1][d
]));
61 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
62 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr
[z0
-1][d
]));
63 for (z
= z0
-2; z
>= 0; z
--) {
64 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
65 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
66 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
67 asm volatile("vpand %ymm0,%ymm5,%ymm5");
68 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
69 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
70 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
71 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr
[z
][d
]));
73 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
74 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
75 asm volatile("vpand %ymm0,%ymm5,%ymm5");
76 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
77 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
78 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
80 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
81 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
82 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
83 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
86 asm volatile("sfence" : : : "memory");
90 static void raid6_avx21_xor_syndrome(int disks
, int start
, int stop
,
91 size_t bytes
, void **ptrs
)
93 u8
**dptr
= (u8
**)ptrs
;
97 z0
= stop
; /* P/Q right side optimization */
98 p
= dptr
[disks
-2]; /* XOR parity */
99 q
= dptr
[disks
-1]; /* RS syndrome */
103 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
105 for (d
= 0 ; d
< bytes
; d
+= 32) {
106 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
107 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
108 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
110 for (z
= z0
-1 ; z
>= start
; z
--) {
111 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
112 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
113 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
114 asm volatile("vpand %ymm0,%ymm5,%ymm5");
115 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
116 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
117 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
118 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
120 /* P/Q left side optimization */
121 for (z
= start
-1 ; z
>= 0 ; z
--) {
122 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
123 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
124 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
125 asm volatile("vpand %ymm0,%ymm5,%ymm5");
126 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
128 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
129 /* Don't use movntdq for r/w memory area < cache line */
130 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q
[d
]));
131 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p
[d
]));
134 asm volatile("sfence" : : : "memory");
138 const struct raid6_calls raid6_avx2x1
= {
139 raid6_avx21_gen_syndrome
,
140 raid6_avx21_xor_syndrome
,
143 1 /* Has cache hints */
147 * Unrolled-by-2 AVX2 implementation
149 static void raid6_avx22_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
151 u8
**dptr
= (u8
**)ptrs
;
155 z0
= disks
- 3; /* Highest data disk */
156 p
= dptr
[z0
+1]; /* XOR parity */
157 q
= dptr
[z0
+2]; /* RS syndrome */
161 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
162 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
164 /* We uniformly assume a single prefetch covers at least 32 bytes */
165 for (d
= 0; d
< bytes
; d
+= 64) {
166 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
]));
167 asm volatile("prefetchnta %0" : : "m" (dptr
[z0
][d
+32]));
168 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr
[z0
][d
]));/* P[0] */
169 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr
[z0
][d
+32]));/* P[1] */
170 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
171 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
172 for (z
= z0
-1; z
>= 0; z
--) {
173 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
174 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+32]));
175 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
176 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
177 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
178 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
179 asm volatile("vpand %ymm0,%ymm5,%ymm5");
180 asm volatile("vpand %ymm0,%ymm7,%ymm7");
181 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
182 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
183 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr
[z
][d
]));
184 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr
[z
][d
+32]));
185 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
186 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
187 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
188 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
190 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
191 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
192 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
193 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
196 asm volatile("sfence" : : : "memory");
200 static void raid6_avx22_xor_syndrome(int disks
, int start
, int stop
,
201 size_t bytes
, void **ptrs
)
203 u8
**dptr
= (u8
**)ptrs
;
207 z0
= stop
; /* P/Q right side optimization */
208 p
= dptr
[disks
-2]; /* XOR parity */
209 q
= dptr
[disks
-1]; /* RS syndrome */
213 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
215 for (d
= 0 ; d
< bytes
; d
+= 64) {
216 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
217 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr
[z0
][d
+32]));
218 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
219 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p
[d
+32]));
220 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
221 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
223 for (z
= z0
-1 ; z
>= start
; z
--) {
224 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
225 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
226 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
227 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
228 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
229 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
230 asm volatile("vpand %ymm0,%ymm5,%ymm5");
231 asm volatile("vpand %ymm0,%ymm7,%ymm7");
232 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
233 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
234 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
235 asm volatile("vmovdqa %0,%%ymm7"
236 :: "m" (dptr
[z
][d
+32]));
237 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
238 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
239 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
240 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
242 /* P/Q left side optimization */
243 for (z
= start
-1 ; z
>= 0 ; z
--) {
244 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
245 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
246 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
247 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
248 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
249 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
250 asm volatile("vpand %ymm0,%ymm5,%ymm5");
251 asm volatile("vpand %ymm0,%ymm7,%ymm7");
252 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
253 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
255 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
256 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q
[d
+32]));
257 /* Don't use movntdq for r/w memory area < cache line */
258 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q
[d
]));
259 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q
[d
+32]));
260 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p
[d
]));
261 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p
[d
+32]));
264 asm volatile("sfence" : : : "memory");
268 const struct raid6_calls raid6_avx2x2
= {
269 raid6_avx22_gen_syndrome
,
270 raid6_avx22_xor_syndrome
,
273 1 /* Has cache hints */
279 * Unrolled-by-4 AVX2 implementation
281 static void raid6_avx24_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
283 u8
**dptr
= (u8
**)ptrs
;
287 z0
= disks
- 3; /* Highest data disk */
288 p
= dptr
[z0
+1]; /* XOR parity */
289 q
= dptr
[z0
+2]; /* RS syndrome */
293 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants
.x1d
[0]));
294 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
295 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
296 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
297 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
298 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
299 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
300 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
301 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
302 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
304 for (d
= 0; d
< bytes
; d
+= 128) {
305 for (z
= z0
; z
>= 0; z
--) {
306 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
]));
307 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+32]));
308 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+64]));
309 asm volatile("prefetchnta %0" : : "m" (dptr
[z
][d
+96]));
310 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
311 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
312 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
313 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
314 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
315 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
316 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
317 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
318 asm volatile("vpand %ymm0,%ymm5,%ymm5");
319 asm volatile("vpand %ymm0,%ymm7,%ymm7");
320 asm volatile("vpand %ymm0,%ymm13,%ymm13");
321 asm volatile("vpand %ymm0,%ymm15,%ymm15");
322 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
323 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
324 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
325 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
326 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr
[z
][d
]));
327 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr
[z
][d
+32]));
328 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr
[z
][d
+64]));
329 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr
[z
][d
+96]));
330 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
331 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
332 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
333 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
334 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
335 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
336 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
337 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
339 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
340 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
341 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
342 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
343 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p
[d
+64]));
344 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
345 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p
[d
+96]));
346 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
347 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
348 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
349 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
350 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
351 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q
[d
+64]));
352 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
353 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q
[d
+96]));
354 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
357 asm volatile("sfence" : : : "memory");
361 static void raid6_avx24_xor_syndrome(int disks
, int start
, int stop
,
362 size_t bytes
, void **ptrs
)
364 u8
**dptr
= (u8
**)ptrs
;
368 z0
= stop
; /* P/Q right side optimization */
369 p
= dptr
[disks
-2]; /* XOR parity */
370 q
= dptr
[disks
-1]; /* RS syndrome */
374 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants
.x1d
[0]));
376 for (d
= 0 ; d
< bytes
; d
+= 128) {
377 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr
[z0
][d
]));
378 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr
[z0
][d
+32]));
379 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr
[z0
][d
+64]));
380 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr
[z0
][d
+96]));
381 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p
[d
]));
382 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p
[d
+32]));
383 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p
[d
+64]));
384 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p
[d
+96]));
385 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
386 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
387 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
388 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
390 for (z
= z0
-1 ; z
>= start
; z
--) {
391 asm volatile("prefetchnta %0" :: "m" (dptr
[z
][d
]));
392 asm volatile("prefetchnta %0" :: "m" (dptr
[z
][d
+64]));
393 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
394 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
395 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
396 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
397 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
398 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
399 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
400 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
401 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
402 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
403 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
404 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
405 asm volatile("vpand %ymm0,%ymm5,%ymm5");
406 asm volatile("vpand %ymm0,%ymm7,%ymm7");
407 asm volatile("vpand %ymm0,%ymm13,%ymm13");
408 asm volatile("vpand %ymm0,%ymm15,%ymm15");
409 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
410 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
411 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
412 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
413 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr
[z
][d
]));
414 asm volatile("vmovdqa %0,%%ymm7"
415 :: "m" (dptr
[z
][d
+32]));
416 asm volatile("vmovdqa %0,%%ymm13"
417 :: "m" (dptr
[z
][d
+64]));
418 asm volatile("vmovdqa %0,%%ymm15"
419 :: "m" (dptr
[z
][d
+96]));
420 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
421 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
422 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
423 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
424 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
425 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
426 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
427 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
429 asm volatile("prefetchnta %0" :: "m" (q
[d
]));
430 asm volatile("prefetchnta %0" :: "m" (q
[d
+64]));
431 /* P/Q left side optimization */
432 for (z
= start
-1 ; z
>= 0 ; z
--) {
433 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
434 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
435 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
436 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
437 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
438 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
439 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
440 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
441 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
442 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
443 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
444 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
445 asm volatile("vpand %ymm0,%ymm5,%ymm5");
446 asm volatile("vpand %ymm0,%ymm7,%ymm7");
447 asm volatile("vpand %ymm0,%ymm13,%ymm13");
448 asm volatile("vpand %ymm0,%ymm15,%ymm15");
449 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
450 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
451 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
452 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
454 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p
[d
]));
455 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p
[d
+32]));
456 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p
[d
+64]));
457 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p
[d
+96]));
458 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q
[d
]));
459 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q
[d
+32]));
460 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q
[d
+64]));
461 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q
[d
+96]));
462 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q
[d
]));
463 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q
[d
+32]));
464 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q
[d
+64]));
465 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q
[d
+96]));
467 asm volatile("sfence" : : : "memory");
471 const struct raid6_calls raid6_avx2x4
= {
472 raid6_avx24_gen_syndrome
,
473 raid6_avx24_xor_syndrome
,
476 1 /* Has cache hints */
480 #endif /* CONFIG_AS_AVX2 */