Optimize RAIDZ expansion
[zfs.git] / module / zfs / vdev_raidz_math_aarch64_neonx2.c
blobbd9de91a4ba82abe2edab8038fe59b0d90828b8f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
25 #include <sys/isa_defs.h>
27 #if defined(__aarch64__)
29 #include "vdev_raidz_math_aarch64_neon_common.h"
31 #define SYN_STRIDE 4
33 #define ZERO_STRIDE 8
34 #define ZERO_DEFINE() \
35 GEN_X_DEFINE_0_3() \
36 GEN_X_DEFINE_4_5() \
37 GEN_X_DEFINE_6_7()
38 #define ZERO_D 0, 1, 2, 3, 4, 5, 6, 7
40 #define COPY_STRIDE 8
41 #define COPY_DEFINE() \
42 GEN_X_DEFINE_0_3() \
43 GEN_X_DEFINE_4_5() \
44 GEN_X_DEFINE_6_7()
45 #define COPY_D 0, 1, 2, 3, 4, 5, 6, 7
47 #define ADD_STRIDE 8
48 #define ADD_DEFINE() \
49 GEN_X_DEFINE_0_3() \
50 GEN_X_DEFINE_4_5() \
51 GEN_X_DEFINE_6_7()
52 #define ADD_D 0, 1, 2, 3, 4, 5, 6, 7
54 #define MUL_STRIDE 4
55 #define MUL_DEFINE() \
56 GEN_X_DEFINE_0_3() \
57 GEN_X_DEFINE_33_36()
58 #define MUL_D 0, 1, 2, 3
60 #define GEN_P_DEFINE() \
61 GEN_X_DEFINE_0_3() \
62 GEN_X_DEFINE_33_36()
63 #define GEN_P_STRIDE 4
64 #define GEN_P_P 0, 1, 2, 3
66 #define GEN_PQ_DEFINE() \
67 GEN_X_DEFINE_0_3() \
68 GEN_X_DEFINE_4_5() \
69 GEN_X_DEFINE_6_7() \
70 GEN_X_DEFINE_16() \
71 GEN_X_DEFINE_17() \
72 GEN_X_DEFINE_33_36()
73 #define GEN_PQ_STRIDE 4
74 #define GEN_PQ_D 0, 1, 2, 3
75 #define GEN_PQ_C 4, 5, 6, 7
77 #define GEN_PQR_DEFINE() \
78 GEN_X_DEFINE_0_3() \
79 GEN_X_DEFINE_4_5() \
80 GEN_X_DEFINE_6_7() \
81 GEN_X_DEFINE_16() \
82 GEN_X_DEFINE_17() \
83 GEN_X_DEFINE_33_36()
84 #define GEN_PQR_STRIDE 4
85 #define GEN_PQR_D 0, 1, 2, 3
86 #define GEN_PQR_C 4, 5, 6, 7
88 #define SYN_Q_DEFINE() \
89 GEN_X_DEFINE_0_3() \
90 GEN_X_DEFINE_4_5() \
91 GEN_X_DEFINE_6_7() \
92 GEN_X_DEFINE_16() \
93 GEN_X_DEFINE_17() \
94 GEN_X_DEFINE_33_36()
95 #define SYN_Q_STRIDE 4
96 #define SYN_Q_D 0, 1, 2, 3
97 #define SYN_Q_X 4, 5, 6, 7
99 #define SYN_R_DEFINE() \
100 GEN_X_DEFINE_0_3() \
101 GEN_X_DEFINE_4_5() \
102 GEN_X_DEFINE_6_7() \
103 GEN_X_DEFINE_16() \
104 GEN_X_DEFINE_17() \
105 GEN_X_DEFINE_33_36()
106 #define SYN_R_STRIDE 4
107 #define SYN_R_D 0, 1, 2, 3
108 #define SYN_R_X 4, 5, 6, 7
110 #define SYN_PQ_DEFINE() \
111 GEN_X_DEFINE_0_3() \
112 GEN_X_DEFINE_4_5() \
113 GEN_X_DEFINE_6_7() \
114 GEN_X_DEFINE_16() \
115 GEN_X_DEFINE_17() \
116 GEN_X_DEFINE_33_36()
117 #define SYN_PQ_STRIDE 4
118 #define SYN_PQ_D 0, 1, 2, 3
119 #define SYN_PQ_X 4, 5, 6, 7
121 #define REC_PQ_DEFINE() \
122 GEN_X_DEFINE_0_3() \
123 GEN_X_DEFINE_4_5() \
124 GEN_X_DEFINE_6_7() \
125 GEN_X_DEFINE_8_9() \
126 GEN_X_DEFINE_22_23() \
127 GEN_X_DEFINE_33_36()
128 #define REC_PQ_STRIDE 4
129 #define REC_PQ_X 0, 1, 2, 3
130 #define REC_PQ_Y 4, 5, 6, 7
131 #define REC_PQ_T 8, 9, 22, 23
133 #define SYN_PR_DEFINE() \
134 GEN_X_DEFINE_0_3() \
135 GEN_X_DEFINE_4_5() \
136 GEN_X_DEFINE_6_7() \
137 GEN_X_DEFINE_16() \
138 GEN_X_DEFINE_17() \
139 GEN_X_DEFINE_33_36()
140 #define SYN_PR_STRIDE 4
141 #define SYN_PR_D 0, 1, 2, 3
142 #define SYN_PR_X 4, 5, 6, 7
144 #define REC_PR_DEFINE() \
145 GEN_X_DEFINE_0_3() \
146 GEN_X_DEFINE_4_5() \
147 GEN_X_DEFINE_6_7() \
148 GEN_X_DEFINE_8_9() \
149 GEN_X_DEFINE_22_23() \
150 GEN_X_DEFINE_33_36()
151 #define REC_PR_STRIDE 4
152 #define REC_PR_X 0, 1, 2, 3
153 #define REC_PR_Y 4, 5, 6, 7
154 #define REC_PR_T 8, 9, 22, 23
156 #define SYN_QR_DEFINE() \
157 GEN_X_DEFINE_0_3() \
158 GEN_X_DEFINE_4_5() \
159 GEN_X_DEFINE_6_7() \
160 GEN_X_DEFINE_16() \
161 GEN_X_DEFINE_17() \
162 GEN_X_DEFINE_33_36()
163 #define SYN_QR_STRIDE 4
164 #define SYN_QR_D 0, 1, 2, 3
165 #define SYN_QR_X 4, 5, 6, 7
167 #define REC_QR_DEFINE() \
168 GEN_X_DEFINE_0_3() \
169 GEN_X_DEFINE_4_5() \
170 GEN_X_DEFINE_6_7() \
171 GEN_X_DEFINE_8_9() \
172 GEN_X_DEFINE_22_23() \
173 GEN_X_DEFINE_33_36()
174 #define REC_QR_STRIDE 4
175 #define REC_QR_X 0, 1, 2, 3
176 #define REC_QR_Y 4, 5, 6, 7
177 #define REC_QR_T 8, 9, 22, 23
179 #define SYN_PQR_DEFINE() \
180 GEN_X_DEFINE_0_3() \
181 GEN_X_DEFINE_4_5() \
182 GEN_X_DEFINE_6_7() \
183 GEN_X_DEFINE_16() \
184 GEN_X_DEFINE_17() \
185 GEN_X_DEFINE_33_36()
186 #define SYN_PQR_STRIDE 4
187 #define SYN_PQR_D 0, 1, 2, 3
188 #define SYN_PQR_X 4, 5, 6, 7
190 #define REC_PQR_DEFINE() \
191 GEN_X_DEFINE_0_3() \
192 GEN_X_DEFINE_4_5() \
193 GEN_X_DEFINE_6_7() \
194 GEN_X_DEFINE_8_9() \
195 GEN_X_DEFINE_31() \
196 GEN_X_DEFINE_32() \
197 GEN_X_DEFINE_33_36()
198 #define REC_PQR_STRIDE 2
199 #define REC_PQR_X 0, 1
200 #define REC_PQR_Y 2, 3
201 #define REC_PQR_Z 4, 5
202 #define REC_PQR_XS 6, 7
203 #define REC_PQR_YS 8, 9
205 #include <sys/vdev_raidz_impl.h>
206 #include "vdev_raidz_math_impl.h"
208 DEFINE_GEN_METHODS(aarch64_neonx2);
210 * If compiled with -O0, gcc doesn't do any stack frame coalescing
211 * and -Wframe-larger-than=1024 is triggered in debug mode.
213 #if defined(__GNUC__) && !defined(__clang__)
214 #pragma GCC diagnostic ignored "-Wframe-larger-than="
215 #endif
216 DEFINE_REC_METHODS(aarch64_neonx2);
217 #if defined(__GNUC__) && !defined(__clang__)
218 #pragma GCC diagnostic pop
219 #endif
221 static boolean_t
222 raidz_will_aarch64_neonx2_work(void)
224 return (kfpu_allowed());
227 const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
228 .init = NULL,
229 .fini = NULL,
230 .gen = RAIDZ_GEN_METHODS(aarch64_neonx2),
231 .rec = RAIDZ_REC_METHODS(aarch64_neonx2),
232 .is_supported = &raidz_will_aarch64_neonx2_work,
233 .name = "aarch64_neonx2"
236 #endif /* defined(__aarch64__) */