1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
4 * Copyright(c) 2009 Intel Corporation
6 * based on raid6recov.c:
7 * Copyright 2002 H. Peter Anvin
9 #include <linux/kernel.h>
10 #include <linux/interrupt.h>
11 #include <linux/module.h>
12 #include <linux/dma-mapping.h>
13 #include <linux/raid/pq.h>
14 #include <linux/async_tx.h>
15 #include <linux/dmaengine.h>
17 static struct dma_async_tx_descriptor
*
18 async_sum_product(struct page
*dest
, unsigned int d_off
,
19 struct page
**srcs
, unsigned int *src_offs
, unsigned char *coef
,
20 size_t len
, struct async_submit_ctl
*submit
)
22 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
23 &dest
, 1, srcs
, 2, len
);
24 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
25 struct dmaengine_unmap_data
*unmap
= NULL
;
26 const u8
*amul
, *bmul
;
31 unmap
= dmaengine_get_unmap_data(dma
->dev
, 3, GFP_NOWAIT
);
34 struct device
*dev
= dma
->dev
;
36 struct dma_async_tx_descriptor
*tx
;
37 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
39 if (submit
->flags
& ASYNC_TX_FENCE
)
40 dma_flags
|= DMA_PREP_FENCE
;
41 unmap
->addr
[0] = dma_map_page(dev
, srcs
[0], src_offs
[0],
43 unmap
->addr
[1] = dma_map_page(dev
, srcs
[1], src_offs
[1],
47 unmap
->addr
[2] = dma_map_page(dev
, dest
, d_off
,
48 len
, DMA_BIDIRECTIONAL
);
50 /* engine only looks at Q, but expects it to follow P */
51 pq
[1] = unmap
->addr
[2];
54 tx
= dma
->device_prep_dma_pq(chan
, pq
, unmap
->addr
, 2, coef
,
57 dma_set_unmap(tx
, unmap
);
58 async_tx_submit(chan
, tx
, submit
);
59 dmaengine_unmap_put(unmap
);
63 /* could not get a descriptor, unmap and fall through to
64 * the synchronous path
66 dmaengine_unmap_put(unmap
);
69 /* run the operation synchronously */
70 async_tx_quiesce(&submit
->depend_tx
);
71 amul
= raid6_gfmul
[coef
[0]];
72 bmul
= raid6_gfmul
[coef
[1]];
73 a
= page_address(srcs
[0]) + src_offs
[0];
74 b
= page_address(srcs
[1]) + src_offs
[1];
75 c
= page_address(dest
) + d_off
;
86 static struct dma_async_tx_descriptor
*
87 async_mult(struct page
*dest
, unsigned int d_off
, struct page
*src
,
88 unsigned int s_off
, u8 coef
, size_t len
,
89 struct async_submit_ctl
*submit
)
91 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
92 &dest
, 1, &src
, 1, len
);
93 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
94 struct dmaengine_unmap_data
*unmap
= NULL
;
95 const u8
*qmul
; /* Q multiplier table */
99 unmap
= dmaengine_get_unmap_data(dma
->dev
, 3, GFP_NOWAIT
);
102 dma_addr_t dma_dest
[2];
103 struct device
*dev
= dma
->dev
;
104 struct dma_async_tx_descriptor
*tx
;
105 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
107 if (submit
->flags
& ASYNC_TX_FENCE
)
108 dma_flags
|= DMA_PREP_FENCE
;
109 unmap
->addr
[0] = dma_map_page(dev
, src
, s_off
,
112 unmap
->addr
[1] = dma_map_page(dev
, dest
, d_off
,
113 len
, DMA_BIDIRECTIONAL
);
114 dma_dest
[1] = unmap
->addr
[1];
118 /* this looks funny, but the engine looks for Q at
119 * dma_dest[1] and ignores dma_dest[0] as a dest
120 * due to DMA_PREP_PQ_DISABLE_P
122 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, unmap
->addr
,
123 1, &coef
, len
, dma_flags
);
126 dma_set_unmap(tx
, unmap
);
127 dmaengine_unmap_put(unmap
);
128 async_tx_submit(chan
, tx
, submit
);
132 /* could not get a descriptor, unmap and fall through to
133 * the synchronous path
135 dmaengine_unmap_put(unmap
);
138 /* no channel available, or failed to allocate a descriptor, so
139 * perform the operation synchronously
141 async_tx_quiesce(&submit
->depend_tx
);
142 qmul
= raid6_gfmul
[coef
];
143 d
= page_address(dest
) + d_off
;
144 s
= page_address(src
) + s_off
;
152 static struct dma_async_tx_descriptor
*
153 __2data_recov_4(int disks
, size_t bytes
, int faila
, int failb
,
154 struct page
**blocks
, unsigned int *offs
,
155 struct async_submit_ctl
*submit
)
157 struct dma_async_tx_descriptor
*tx
= NULL
;
158 struct page
*p
, *q
, *a
, *b
;
159 unsigned int p_off
, q_off
, a_off
, b_off
;
160 struct page
*srcs
[2];
161 unsigned int src_offs
[2];
162 unsigned char coef
[2];
163 enum async_tx_flags flags
= submit
->flags
;
164 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
165 void *cb_param
= submit
->cb_param
;
166 void *scribble
= submit
->scribble
;
169 p_off
= offs
[disks
-2];
171 q_off
= offs
[disks
-1];
178 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
179 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
184 coef
[0] = raid6_gfexi
[failb
-faila
];
185 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
186 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
187 tx
= async_sum_product(b
, b_off
, srcs
, src_offs
, coef
, bytes
, submit
);
194 init_async_submit(submit
, flags
| ASYNC_TX_XOR_ZERO_DST
, tx
, cb_fn
,
196 tx
= async_xor_offs(a
, a_off
, srcs
, src_offs
, 2, bytes
, submit
);
202 static struct dma_async_tx_descriptor
*
203 __2data_recov_5(int disks
, size_t bytes
, int faila
, int failb
,
204 struct page
**blocks
, unsigned int *offs
,
205 struct async_submit_ctl
*submit
)
207 struct dma_async_tx_descriptor
*tx
= NULL
;
208 struct page
*p
, *q
, *g
, *dp
, *dq
;
209 unsigned int p_off
, q_off
, g_off
, dp_off
, dq_off
;
210 struct page
*srcs
[2];
211 unsigned int src_offs
[2];
212 unsigned char coef
[2];
213 enum async_tx_flags flags
= submit
->flags
;
214 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
215 void *cb_param
= submit
->cb_param
;
216 void *scribble
= submit
->scribble
;
217 int good_srcs
, good
, i
;
221 for (i
= 0; i
< disks
-2; i
++) {
222 if (blocks
[i
] == NULL
)
224 if (i
== faila
|| i
== failb
)
229 BUG_ON(good_srcs
> 1);
232 p_off
= offs
[disks
-2];
234 q_off
= offs
[disks
-1];
238 /* Compute syndrome with zero for the missing data pages
239 * Use the dead data pages as temporary storage for delta p and
243 dp_off
= offs
[faila
];
245 dq_off
= offs
[failb
];
247 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
248 tx
= async_memcpy(dp
, g
, dp_off
, g_off
, bytes
, submit
);
249 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
250 tx
= async_mult(dq
, dq_off
, g
, g_off
,
251 raid6_gfexp
[good
], bytes
, submit
);
253 /* compute P + Pxy */
255 src_offs
[0] = dp_off
;
258 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
259 NULL
, NULL
, scribble
);
260 tx
= async_xor_offs(dp
, dp_off
, srcs
, src_offs
, 2, bytes
, submit
);
262 /* compute Q + Qxy */
264 src_offs
[0] = dq_off
;
267 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
268 NULL
, NULL
, scribble
);
269 tx
= async_xor_offs(dq
, dq_off
, srcs
, src_offs
, 2, bytes
, submit
);
271 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
273 src_offs
[0] = dp_off
;
275 src_offs
[1] = dq_off
;
276 coef
[0] = raid6_gfexi
[failb
-faila
];
277 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
278 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
279 tx
= async_sum_product(dq
, dq_off
, srcs
, src_offs
, coef
, bytes
, submit
);
283 src_offs
[0] = dp_off
;
285 src_offs
[1] = dq_off
;
286 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
288 tx
= async_xor_offs(dp
, dp_off
, srcs
, src_offs
, 2, bytes
, submit
);
293 static struct dma_async_tx_descriptor
*
294 __2data_recov_n(int disks
, size_t bytes
, int faila
, int failb
,
295 struct page
**blocks
, unsigned int *offs
,
296 struct async_submit_ctl
*submit
)
298 struct dma_async_tx_descriptor
*tx
= NULL
;
299 struct page
*p
, *q
, *dp
, *dq
;
300 unsigned int p_off
, q_off
, dp_off
, dq_off
;
301 struct page
*srcs
[2];
302 unsigned int src_offs
[2];
303 unsigned char coef
[2];
304 enum async_tx_flags flags
= submit
->flags
;
305 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
306 void *cb_param
= submit
->cb_param
;
307 void *scribble
= submit
->scribble
;
310 p_off
= offs
[disks
-2];
312 q_off
= offs
[disks
-1];
314 /* Compute syndrome with zero for the missing data pages
315 * Use the dead data pages as temporary storage for
316 * delta p and delta q
319 dp_off
= offs
[faila
];
320 blocks
[faila
] = NULL
;
321 blocks
[disks
-2] = dp
;
322 offs
[disks
-2] = dp_off
;
324 dq_off
= offs
[failb
];
325 blocks
[failb
] = NULL
;
326 blocks
[disks
-1] = dq
;
327 offs
[disks
-1] = dq_off
;
329 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
330 tx
= async_gen_syndrome(blocks
, offs
, disks
, bytes
, submit
);
332 /* Restore pointer table */
334 offs
[faila
] = dp_off
;
336 offs
[failb
] = dq_off
;
338 offs
[disks
-2] = p_off
;
340 offs
[disks
-1] = q_off
;
342 /* compute P + Pxy */
344 src_offs
[0] = dp_off
;
347 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
348 NULL
, NULL
, scribble
);
349 tx
= async_xor_offs(dp
, dp_off
, srcs
, src_offs
, 2, bytes
, submit
);
351 /* compute Q + Qxy */
353 src_offs
[0] = dq_off
;
356 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
357 NULL
, NULL
, scribble
);
358 tx
= async_xor_offs(dq
, dq_off
, srcs
, src_offs
, 2, bytes
, submit
);
360 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
362 src_offs
[0] = dp_off
;
364 src_offs
[1] = dq_off
;
365 coef
[0] = raid6_gfexi
[failb
-faila
];
366 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
367 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
368 tx
= async_sum_product(dq
, dq_off
, srcs
, src_offs
, coef
, bytes
, submit
);
372 src_offs
[0] = dp_off
;
374 src_offs
[1] = dq_off
;
375 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
377 tx
= async_xor_offs(dp
, dp_off
, srcs
, src_offs
, 2, bytes
, submit
);
383 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
384 * @disks: number of disks in the RAID-6 array
386 * @faila: first failed drive index
387 * @failb: second failed drive index
388 * @blocks: array of source pointers where the last two entries are p and q
389 * @offs: array of offset for pages in blocks
390 * @submit: submission/completion modifiers
392 struct dma_async_tx_descriptor
*
393 async_raid6_2data_recov(int disks
, size_t bytes
, int faila
, int failb
,
394 struct page
**blocks
, unsigned int *offs
,
395 struct async_submit_ctl
*submit
)
397 void *scribble
= submit
->scribble
;
398 int non_zero_srcs
, i
;
400 BUG_ON(faila
== failb
);
404 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
406 /* if a dma resource is not available or a scribble buffer is not
407 * available punt to the synchronous path. In the 'dma not
408 * available' case be sure to use the scribble buffer to
409 * preserve the content of 'blocks' as the caller intended.
411 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
412 void **ptrs
= scribble
? scribble
: (void **) blocks
;
414 async_tx_quiesce(&submit
->depend_tx
);
415 for (i
= 0; i
< disks
; i
++)
416 if (blocks
[i
] == NULL
)
417 ptrs
[i
] = (void *) raid6_empty_zero_page
;
419 ptrs
[i
] = page_address(blocks
[i
]) + offs
[i
];
421 raid6_2data_recov(disks
, bytes
, faila
, failb
, ptrs
);
423 async_tx_sync_epilog(submit
);
429 for (i
= 0; i
< disks
-2 && non_zero_srcs
< 4; i
++)
432 switch (non_zero_srcs
) {
435 /* There must be at least 2 sources - the failed devices. */
439 /* dma devices do not uniformly understand a zero source pq
440 * operation (in contrast to the synchronous case), so
441 * explicitly handle the special case of a 4 disk array with
442 * both data disks missing.
444 return __2data_recov_4(disks
, bytes
, faila
, failb
,
445 blocks
, offs
, submit
);
447 /* dma devices do not uniformly understand a single
448 * source pq operation (in contrast to the synchronous
449 * case), so explicitly handle the special case of a 5 disk
450 * array with 2 of 3 data disks missing.
452 return __2data_recov_5(disks
, bytes
, faila
, failb
,
453 blocks
, offs
, submit
);
455 return __2data_recov_n(disks
, bytes
, faila
, failb
,
456 blocks
, offs
, submit
);
459 EXPORT_SYMBOL_GPL(async_raid6_2data_recov
);
462 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
463 * @disks: number of disks in the RAID-6 array
465 * @faila: failed drive index
466 * @blocks: array of source pointers where the last two entries are p and q
467 * @offs: array of offset for pages in blocks
468 * @submit: submission/completion modifiers
470 struct dma_async_tx_descriptor
*
471 async_raid6_datap_recov(int disks
, size_t bytes
, int faila
,
472 struct page
**blocks
, unsigned int *offs
,
473 struct async_submit_ctl
*submit
)
475 struct dma_async_tx_descriptor
*tx
= NULL
;
476 struct page
*p
, *q
, *dq
;
477 unsigned int p_off
, q_off
, dq_off
;
479 enum async_tx_flags flags
= submit
->flags
;
480 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
481 void *cb_param
= submit
->cb_param
;
482 void *scribble
= submit
->scribble
;
483 int good_srcs
, good
, i
;
484 struct page
*srcs
[2];
485 unsigned int src_offs
[2];
487 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
489 /* if a dma resource is not available or a scribble buffer is not
490 * available punt to the synchronous path. In the 'dma not
491 * available' case be sure to use the scribble buffer to
492 * preserve the content of 'blocks' as the caller intended.
494 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
495 void **ptrs
= scribble
? scribble
: (void **) blocks
;
497 async_tx_quiesce(&submit
->depend_tx
);
498 for (i
= 0; i
< disks
; i
++)
499 if (blocks
[i
] == NULL
)
500 ptrs
[i
] = (void*)raid6_empty_zero_page
;
502 ptrs
[i
] = page_address(blocks
[i
]) + offs
[i
];
504 raid6_datap_recov(disks
, bytes
, faila
, ptrs
);
506 async_tx_sync_epilog(submit
);
513 for (i
= 0; i
< disks
-2; i
++) {
523 BUG_ON(good_srcs
== 0);
526 p_off
= offs
[disks
-2];
528 q_off
= offs
[disks
-1];
530 /* Compute syndrome with zero for the missing data page
531 * Use the dead data page as temporary storage for delta q
534 dq_off
= offs
[faila
];
535 blocks
[faila
] = NULL
;
536 blocks
[disks
-1] = dq
;
537 offs
[disks
-1] = dq_off
;
539 /* in the 4-disk case we only need to perform a single source
540 * multiplication with the one good data block.
542 if (good_srcs
== 1) {
543 struct page
*g
= blocks
[good
];
544 unsigned int g_off
= offs
[good
];
546 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
548 tx
= async_memcpy(p
, g
, p_off
, g_off
, bytes
, submit
);
550 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
552 tx
= async_mult(dq
, dq_off
, g
, g_off
,
553 raid6_gfexp
[good
], bytes
, submit
);
555 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
557 tx
= async_gen_syndrome(blocks
, offs
, disks
, bytes
, submit
);
560 /* Restore pointer table */
562 offs
[faila
] = dq_off
;
564 offs
[disks
-1] = q_off
;
566 /* calculate g^{-faila} */
567 coef
= raid6_gfinv
[raid6_gfexp
[faila
]];
570 src_offs
[0] = dq_off
;
573 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
574 NULL
, NULL
, scribble
);
575 tx
= async_xor_offs(dq
, dq_off
, srcs
, src_offs
, 2, bytes
, submit
);
577 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
578 tx
= async_mult(dq
, dq_off
, dq
, dq_off
, coef
, bytes
, submit
);
583 src_offs
[1] = dq_off
;
584 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
586 tx
= async_xor_offs(p
, p_off
, srcs
, src_offs
, 2, bytes
, submit
);
590 EXPORT_SYMBOL_GPL(async_raid6_datap_recov
);
592 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
593 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
594 MODULE_LICENSE("GPL");