1 #ifndef _ASM_X86_XOR_AVX_H
2 #define _ASM_X86_XOR_AVX_H
5 * Optimized RAID-5 checksumming functions for AVX
7 * Copyright (C) 2012 Intel Corporation
8 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
10 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; version 2
20 #include <linux/compiler.h>
21 #include <asm/fpu/api.h>
25 BLOCK(32 * (i + 1), 1) \
26 BLOCK(32 * (i + 2), 2) \
27 BLOCK(32 * (i + 3), 3)
35 static void xor_avx_2(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
)
37 unsigned long lines
= bytes
>> 9;
43 #define BLOCK(i, reg) \
45 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
46 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
47 "m" (p0[i / sizeof(*p0)])); \
48 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
49 "=m" (p0[i / sizeof(*p0)])); \
54 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
55 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
61 static void xor_avx_3(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
64 unsigned long lines
= bytes
>> 9;
70 #define BLOCK(i, reg) \
72 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
73 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
74 "m" (p1[i / sizeof(*p1)])); \
75 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
76 "m" (p0[i / sizeof(*p0)])); \
77 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
78 "=m" (p0[i / sizeof(*p0)])); \
83 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
84 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
85 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
91 static void xor_avx_4(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
92 unsigned long *p2
, unsigned long *p3
)
94 unsigned long lines
= bytes
>> 9;
100 #define BLOCK(i, reg) \
102 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104 "m" (p2[i / sizeof(*p2)])); \
105 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
106 "m" (p1[i / sizeof(*p1)])); \
107 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
108 "m" (p0[i / sizeof(*p0)])); \
109 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
110 "=m" (p0[i / sizeof(*p0)])); \
115 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
116 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
117 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
118 p3
= (unsigned long *)((uintptr_t)p3
+ 512);
124 static void xor_avx_5(unsigned long bytes
, unsigned long *p0
, unsigned long *p1
,
125 unsigned long *p2
, unsigned long *p3
, unsigned long *p4
)
127 unsigned long lines
= bytes
>> 9;
133 #define BLOCK(i, reg) \
135 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
136 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
137 "m" (p3[i / sizeof(*p3)])); \
138 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
139 "m" (p2[i / sizeof(*p2)])); \
140 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
141 "m" (p1[i / sizeof(*p1)])); \
142 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
143 "m" (p0[i / sizeof(*p0)])); \
144 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
145 "=m" (p0[i / sizeof(*p0)])); \
150 p0
= (unsigned long *)((uintptr_t)p0
+ 512);
151 p1
= (unsigned long *)((uintptr_t)p1
+ 512);
152 p2
= (unsigned long *)((uintptr_t)p2
+ 512);
153 p3
= (unsigned long *)((uintptr_t)p3
+ 512);
154 p4
= (unsigned long *)((uintptr_t)p4
+ 512);
160 static struct xor_block_template xor_block_avx
= {
168 #define AVX_XOR_SPEED \
170 if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
171 xor_speed(&xor_block_avx); \
174 #define AVX_SELECT(FASTEST) \
175 (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
179 #define AVX_XOR_SPEED {}
181 #define AVX_SELECT(FASTEST) (FASTEST)