1 // SPDX-License-Identifier: GPL-2.0-only
3 * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
5 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
7 * Originally based on recov_avx2.c and recov_ssse3.c:
9 * Copyright (C) 2012 Intel Corporation
10 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
13 #include <linux/raid/pq.h>
14 #include "loongarch.h"
17 * Unlike with the syndrome calculation algorithms, there's no boot-time
18 * selection of recovery algorithms by benchmarking, so we have to specify
19 * the priorities and hope the future cores will all have decent vector
20 * support (i.e. no LASX slower than LSX, or even scalar code).
23 #ifdef CONFIG_CPU_HAS_LSX
24 static int raid6_has_lsx(void)
29 static void raid6_2data_recov_lsx(int disks
, size_t bytes
, int faila
,
30 int failb
, void **ptrs
)
33 const u8
*pbmul
; /* P multiplier table for B data */
34 const u8
*qmul
; /* Q multiplier table (for both) */
36 p
= (u8
*)ptrs
[disks
- 2];
37 q
= (u8
*)ptrs
[disks
- 1];
40 * Compute syndrome with zero for the missing data pages
41 * Use the dead data pages as temporary storage for
44 dp
= (u8
*)ptrs
[faila
];
45 ptrs
[faila
] = (void *)raid6_empty_zero_page
;
47 dq
= (u8
*)ptrs
[failb
];
48 ptrs
[failb
] = (void *)raid6_empty_zero_page
;
51 raid6_call
.gen_syndrome(disks
, bytes
, ptrs
);
53 /* Restore pointer table */
59 /* Now, pick the proper data tables */
60 pbmul
= raid6_vgfmul
[raid6_gfexi
[failb
- faila
]];
61 qmul
= raid6_vgfmul
[raid6_gfinv
[raid6_gfexp
[faila
] ^ raid6_gfexp
[failb
]]];
69 asm volatile("vld $vr20, %0" : : "m" (qmul
[0]));
70 asm volatile("vld $vr21, %0" : : "m" (qmul
[16]));
71 asm volatile("vld $vr22, %0" : : "m" (pbmul
[0]));
72 asm volatile("vld $vr23, %0" : : "m" (pbmul
[16]));
76 asm volatile("vld $vr4, %0" : : "m" (q
[0]));
77 asm volatile("vld $vr5, %0" : : "m" (q
[16]));
78 asm volatile("vld $vr6, %0" : : "m" (q
[32]));
79 asm volatile("vld $vr7, %0" : : "m" (q
[48]));
80 /* vr4 - vr7: Q + Qxy */
81 asm volatile("vld $vr8, %0" : : "m" (dq
[0]));
82 asm volatile("vld $vr9, %0" : : "m" (dq
[16]));
83 asm volatile("vld $vr10, %0" : : "m" (dq
[32]));
84 asm volatile("vld $vr11, %0" : : "m" (dq
[48]));
85 asm volatile("vxor.v $vr4, $vr4, $vr8");
86 asm volatile("vxor.v $vr5, $vr5, $vr9");
87 asm volatile("vxor.v $vr6, $vr6, $vr10");
88 asm volatile("vxor.v $vr7, $vr7, $vr11");
90 asm volatile("vld $vr0, %0" : : "m" (p
[0]));
91 asm volatile("vld $vr1, %0" : : "m" (p
[16]));
92 asm volatile("vld $vr2, %0" : : "m" (p
[32]));
93 asm volatile("vld $vr3, %0" : : "m" (p
[48]));
94 /* vr0 - vr3: P + Pxy */
95 asm volatile("vld $vr8, %0" : : "m" (dp
[0]));
96 asm volatile("vld $vr9, %0" : : "m" (dp
[16]));
97 asm volatile("vld $vr10, %0" : : "m" (dp
[32]));
98 asm volatile("vld $vr11, %0" : : "m" (dp
[48]));
99 asm volatile("vxor.v $vr0, $vr0, $vr8");
100 asm volatile("vxor.v $vr1, $vr1, $vr9");
101 asm volatile("vxor.v $vr2, $vr2, $vr10");
102 asm volatile("vxor.v $vr3, $vr3, $vr11");
104 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
105 asm volatile("vsrli.b $vr8, $vr4, 4");
106 asm volatile("vsrli.b $vr9, $vr5, 4");
107 asm volatile("vsrli.b $vr10, $vr6, 4");
108 asm volatile("vsrli.b $vr11, $vr7, 4");
109 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
110 asm volatile("vandi.b $vr4, $vr4, 0x0f");
111 asm volatile("vandi.b $vr5, $vr5, 0x0f");
112 asm volatile("vandi.b $vr6, $vr6, 0x0f");
113 asm volatile("vandi.b $vr7, $vr7, 0x0f");
114 /* lookup from qmul[0] */
115 asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
116 asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
117 asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
118 asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
119 /* lookup from qmul[16] */
120 asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
121 asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
122 asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
123 asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
124 /* vr16 - vr19: B(Q + Qxy) */
125 asm volatile("vxor.v $vr16, $vr8, $vr4");
126 asm volatile("vxor.v $vr17, $vr9, $vr5");
127 asm volatile("vxor.v $vr18, $vr10, $vr6");
128 asm volatile("vxor.v $vr19, $vr11, $vr7");
130 /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
131 asm volatile("vsrli.b $vr4, $vr0, 4");
132 asm volatile("vsrli.b $vr5, $vr1, 4");
133 asm volatile("vsrli.b $vr6, $vr2, 4");
134 asm volatile("vsrli.b $vr7, $vr3, 4");
135 /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
136 asm volatile("vandi.b $vr12, $vr0, 0x0f");
137 asm volatile("vandi.b $vr13, $vr1, 0x0f");
138 asm volatile("vandi.b $vr14, $vr2, 0x0f");
139 asm volatile("vandi.b $vr15, $vr3, 0x0f");
140 /* lookup from pbmul[0] */
141 asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
142 asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
143 asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
144 asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
145 /* lookup from pbmul[16] */
146 asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
147 asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
148 asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
149 asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
150 /* vr4 - vr7: A(P + Pxy) */
151 asm volatile("vxor.v $vr4, $vr4, $vr12");
152 asm volatile("vxor.v $vr5, $vr5, $vr13");
153 asm volatile("vxor.v $vr6, $vr6, $vr14");
154 asm volatile("vxor.v $vr7, $vr7, $vr15");
156 /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
157 asm volatile("vxor.v $vr4, $vr4, $vr16");
158 asm volatile("vxor.v $vr5, $vr5, $vr17");
159 asm volatile("vxor.v $vr6, $vr6, $vr18");
160 asm volatile("vxor.v $vr7, $vr7, $vr19");
161 asm volatile("vst $vr4, %0" : "=m" (dq
[0]));
162 asm volatile("vst $vr5, %0" : "=m" (dq
[16]));
163 asm volatile("vst $vr6, %0" : "=m" (dq
[32]));
164 asm volatile("vst $vr7, %0" : "=m" (dq
[48]));
166 /* vr0 - vr3: P + Pxy + Dx = Dy */
167 asm volatile("vxor.v $vr0, $vr0, $vr4");
168 asm volatile("vxor.v $vr1, $vr1, $vr5");
169 asm volatile("vxor.v $vr2, $vr2, $vr6");
170 asm volatile("vxor.v $vr3, $vr3, $vr7");
171 asm volatile("vst $vr0, %0" : "=m" (dp
[0]));
172 asm volatile("vst $vr1, %0" : "=m" (dp
[16]));
173 asm volatile("vst $vr2, %0" : "=m" (dp
[32]));
174 asm volatile("vst $vr3, %0" : "=m" (dp
[48]));
186 static void raid6_datap_recov_lsx(int disks
, size_t bytes
, int faila
,
190 const u8
*qmul
; /* Q multiplier table */
192 p
= (u8
*)ptrs
[disks
- 2];
193 q
= (u8
*)ptrs
[disks
- 1];
196 * Compute syndrome with zero for the missing data page
197 * Use the dead data page as temporary storage for delta q
199 dq
= (u8
*)ptrs
[faila
];
200 ptrs
[faila
] = (void *)raid6_empty_zero_page
;
201 ptrs
[disks
- 1] = dq
;
203 raid6_call
.gen_syndrome(disks
, bytes
, ptrs
);
205 /* Restore pointer table */
209 /* Now, pick the proper data tables */
210 qmul
= raid6_vgfmul
[raid6_gfinv
[raid6_gfexp
[faila
]]];
214 /* vr22, vr23: qmul */
215 asm volatile("vld $vr22, %0" : : "m" (qmul
[0]));
216 asm volatile("vld $vr23, %0" : : "m" (qmul
[16]));
219 /* vr0 - vr3: P + Dx */
220 asm volatile("vld $vr0, %0" : : "m" (p
[0]));
221 asm volatile("vld $vr1, %0" : : "m" (p
[16]));
222 asm volatile("vld $vr2, %0" : : "m" (p
[32]));
223 asm volatile("vld $vr3, %0" : : "m" (p
[48]));
225 asm volatile("vld $vr4, %0" : : "m" (dq
[0]));
226 asm volatile("vld $vr5, %0" : : "m" (dq
[16]));
227 asm volatile("vld $vr6, %0" : : "m" (dq
[32]));
228 asm volatile("vld $vr7, %0" : : "m" (dq
[48]));
229 /* vr4 - vr7: Q + Qx */
230 asm volatile("vld $vr8, %0" : : "m" (q
[0]));
231 asm volatile("vld $vr9, %0" : : "m" (q
[16]));
232 asm volatile("vld $vr10, %0" : : "m" (q
[32]));
233 asm volatile("vld $vr11, %0" : : "m" (q
[48]));
234 asm volatile("vxor.v $vr4, $vr4, $vr8");
235 asm volatile("vxor.v $vr5, $vr5, $vr9");
236 asm volatile("vxor.v $vr6, $vr6, $vr10");
237 asm volatile("vxor.v $vr7, $vr7, $vr11");
239 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
240 asm volatile("vsrli.b $vr8, $vr4, 4");
241 asm volatile("vsrli.b $vr9, $vr5, 4");
242 asm volatile("vsrli.b $vr10, $vr6, 4");
243 asm volatile("vsrli.b $vr11, $vr7, 4");
244 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
245 asm volatile("vandi.b $vr4, $vr4, 0x0f");
246 asm volatile("vandi.b $vr5, $vr5, 0x0f");
247 asm volatile("vandi.b $vr6, $vr6, 0x0f");
248 asm volatile("vandi.b $vr7, $vr7, 0x0f");
249 /* lookup from qmul[0] */
250 asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
251 asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
252 asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
253 asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
254 /* lookup from qmul[16] */
255 asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
256 asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
257 asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
258 asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
259 /* vr4 - vr7: qmul(Q + Qx) = Dx */
260 asm volatile("vxor.v $vr4, $vr4, $vr8");
261 asm volatile("vxor.v $vr5, $vr5, $vr9");
262 asm volatile("vxor.v $vr6, $vr6, $vr10");
263 asm volatile("vxor.v $vr7, $vr7, $vr11");
264 asm volatile("vst $vr4, %0" : "=m" (dq
[0]));
265 asm volatile("vst $vr5, %0" : "=m" (dq
[16]));
266 asm volatile("vst $vr6, %0" : "=m" (dq
[32]));
267 asm volatile("vst $vr7, %0" : "=m" (dq
[48]));
269 /* vr0 - vr3: P + Dx + Dx = P */
270 asm volatile("vxor.v $vr0, $vr0, $vr4");
271 asm volatile("vxor.v $vr1, $vr1, $vr5");
272 asm volatile("vxor.v $vr2, $vr2, $vr6");
273 asm volatile("vxor.v $vr3, $vr3, $vr7");
274 asm volatile("vst $vr0, %0" : "=m" (p
[0]));
275 asm volatile("vst $vr1, %0" : "=m" (p
[16]));
276 asm volatile("vst $vr2, %0" : "=m" (p
[32]));
277 asm volatile("vst $vr3, %0" : "=m" (p
[48]));
288 const struct raid6_recov_calls raid6_recov_lsx
= {
289 .data2
= raid6_2data_recov_lsx
,
290 .datap
= raid6_datap_recov_lsx
,
291 .valid
= raid6_has_lsx
,
295 #endif /* CONFIG_CPU_HAS_LSX */
297 #ifdef CONFIG_CPU_HAS_LASX
298 static int raid6_has_lasx(void)
303 static void raid6_2data_recov_lasx(int disks
, size_t bytes
, int faila
,
304 int failb
, void **ptrs
)
307 const u8
*pbmul
; /* P multiplier table for B data */
308 const u8
*qmul
; /* Q multiplier table (for both) */
310 p
= (u8
*)ptrs
[disks
- 2];
311 q
= (u8
*)ptrs
[disks
- 1];
314 * Compute syndrome with zero for the missing data pages
315 * Use the dead data pages as temporary storage for
316 * delta p and delta q
318 dp
= (u8
*)ptrs
[faila
];
319 ptrs
[faila
] = (void *)raid6_empty_zero_page
;
320 ptrs
[disks
- 2] = dp
;
321 dq
= (u8
*)ptrs
[failb
];
322 ptrs
[failb
] = (void *)raid6_empty_zero_page
;
323 ptrs
[disks
- 1] = dq
;
325 raid6_call
.gen_syndrome(disks
, bytes
, ptrs
);
327 /* Restore pointer table */
333 /* Now, pick the proper data tables */
334 pbmul
= raid6_vgfmul
[raid6_gfexi
[failb
- faila
]];
335 qmul
= raid6_vgfmul
[raid6_gfinv
[raid6_gfexp
[faila
] ^ raid6_gfexp
[failb
]]];
343 asm volatile("vld $vr20, %0" : : "m" (qmul
[0]));
344 asm volatile("vld $vr21, %0" : : "m" (qmul
[16]));
345 asm volatile("vld $vr22, %0" : : "m" (pbmul
[0]));
346 asm volatile("vld $vr23, %0" : : "m" (pbmul
[16]));
347 asm volatile("xvreplve0.q $xr20, $xr20");
348 asm volatile("xvreplve0.q $xr21, $xr21");
349 asm volatile("xvreplve0.q $xr22, $xr22");
350 asm volatile("xvreplve0.q $xr23, $xr23");
354 asm volatile("xvld $xr0, %0" : : "m" (q
[0]));
355 asm volatile("xvld $xr1, %0" : : "m" (q
[32]));
356 /* xr0, xr1: Q + Qxy */
357 asm volatile("xvld $xr4, %0" : : "m" (dq
[0]));
358 asm volatile("xvld $xr5, %0" : : "m" (dq
[32]));
359 asm volatile("xvxor.v $xr0, $xr0, $xr4");
360 asm volatile("xvxor.v $xr1, $xr1, $xr5");
362 asm volatile("xvld $xr2, %0" : : "m" (p
[0]));
363 asm volatile("xvld $xr3, %0" : : "m" (p
[32]));
364 /* xr2, xr3: P + Pxy */
365 asm volatile("xvld $xr4, %0" : : "m" (dp
[0]));
366 asm volatile("xvld $xr5, %0" : : "m" (dp
[32]));
367 asm volatile("xvxor.v $xr2, $xr2, $xr4");
368 asm volatile("xvxor.v $xr3, $xr3, $xr5");
370 /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
371 asm volatile("xvsrli.b $xr4, $xr0, 4");
372 asm volatile("xvsrli.b $xr5, $xr1, 4");
373 /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
374 asm volatile("xvandi.b $xr0, $xr0, 0x0f");
375 asm volatile("xvandi.b $xr1, $xr1, 0x0f");
376 /* lookup from qmul[0] */
377 asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
378 asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
379 /* lookup from qmul[16] */
380 asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
381 asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
382 /* xr6, xr7: B(Q + Qxy) */
383 asm volatile("xvxor.v $xr6, $xr4, $xr0");
384 asm volatile("xvxor.v $xr7, $xr5, $xr1");
386 /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
387 asm volatile("xvsrli.b $xr4, $xr2, 4");
388 asm volatile("xvsrli.b $xr5, $xr3, 4");
389 /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
390 asm volatile("xvandi.b $xr0, $xr2, 0x0f");
391 asm volatile("xvandi.b $xr1, $xr3, 0x0f");
392 /* lookup from pbmul[0] */
393 asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
394 asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
395 /* lookup from pbmul[16] */
396 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
397 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
398 /* xr0, xr1: A(P + Pxy) */
399 asm volatile("xvxor.v $xr0, $xr0, $xr4");
400 asm volatile("xvxor.v $xr1, $xr1, $xr5");
402 /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
403 asm volatile("xvxor.v $xr0, $xr0, $xr6");
404 asm volatile("xvxor.v $xr1, $xr1, $xr7");
406 /* xr2, xr3: P + Pxy + Dx = Dy */
407 asm volatile("xvxor.v $xr2, $xr2, $xr0");
408 asm volatile("xvxor.v $xr3, $xr3, $xr1");
410 asm volatile("xvst $xr0, %0" : "=m" (dq
[0]));
411 asm volatile("xvst $xr1, %0" : "=m" (dq
[32]));
412 asm volatile("xvst $xr2, %0" : "=m" (dp
[0]));
413 asm volatile("xvst $xr3, %0" : "=m" (dp
[32]));
425 static void raid6_datap_recov_lasx(int disks
, size_t bytes
, int faila
,
429 const u8
*qmul
; /* Q multiplier table */
431 p
= (u8
*)ptrs
[disks
- 2];
432 q
= (u8
*)ptrs
[disks
- 1];
435 * Compute syndrome with zero for the missing data page
436 * Use the dead data page as temporary storage for delta q
438 dq
= (u8
*)ptrs
[faila
];
439 ptrs
[faila
] = (void *)raid6_empty_zero_page
;
440 ptrs
[disks
- 1] = dq
;
442 raid6_call
.gen_syndrome(disks
, bytes
, ptrs
);
444 /* Restore pointer table */
448 /* Now, pick the proper data tables */
449 qmul
= raid6_vgfmul
[raid6_gfinv
[raid6_gfexp
[faila
]]];
453 /* xr22, xr23: qmul */
454 asm volatile("vld $vr22, %0" : : "m" (qmul
[0]));
455 asm volatile("xvreplve0.q $xr22, $xr22");
456 asm volatile("vld $vr23, %0" : : "m" (qmul
[16]));
457 asm volatile("xvreplve0.q $xr23, $xr23");
460 /* xr0, xr1: P + Dx */
461 asm volatile("xvld $xr0, %0" : : "m" (p
[0]));
462 asm volatile("xvld $xr1, %0" : : "m" (p
[32]));
464 asm volatile("xvld $xr2, %0" : : "m" (dq
[0]));
465 asm volatile("xvld $xr3, %0" : : "m" (dq
[32]));
466 /* xr2, xr3: Q + Qx */
467 asm volatile("xvld $xr4, %0" : : "m" (q
[0]));
468 asm volatile("xvld $xr5, %0" : : "m" (q
[32]));
469 asm volatile("xvxor.v $xr2, $xr2, $xr4");
470 asm volatile("xvxor.v $xr3, $xr3, $xr5");
472 /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
473 asm volatile("xvsrli.b $xr4, $xr2, 4");
474 asm volatile("xvsrli.b $xr5, $xr3, 4");
475 /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
476 asm volatile("xvandi.b $xr2, $xr2, 0x0f");
477 asm volatile("xvandi.b $xr3, $xr3, 0x0f");
478 /* lookup from qmul[0] */
479 asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
480 asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
481 /* lookup from qmul[16] */
482 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
483 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
484 /* xr2, xr3: qmul(Q + Qx) = Dx */
485 asm volatile("xvxor.v $xr2, $xr2, $xr4");
486 asm volatile("xvxor.v $xr3, $xr3, $xr5");
488 /* xr0, xr1: P + Dx + Dx = P */
489 asm volatile("xvxor.v $xr0, $xr0, $xr2");
490 asm volatile("xvxor.v $xr1, $xr1, $xr3");
492 asm volatile("xvst $xr2, %0" : "=m" (dq
[0]));
493 asm volatile("xvst $xr3, %0" : "=m" (dq
[32]));
494 asm volatile("xvst $xr0, %0" : "=m" (p
[0]));
495 asm volatile("xvst $xr1, %0" : "=m" (p
[32]));
506 const struct raid6_recov_calls raid6_recov_lasx
= {
507 .data2
= raid6_2data_recov_lasx
,
508 .datap
= raid6_datap_recov_lasx
,
509 .valid
= raid6_has_lasx
,
513 #endif /* CONFIG_CPU_HAS_LASX */