2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/module.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/raid/pq.h>
28 #include <linux/async_tx.h>
29 #include <linux/dmaengine.h>
31 static struct dma_async_tx_descriptor
*
32 async_sum_product(struct page
*dest
, struct page
**srcs
, unsigned char *coef
,
33 size_t len
, struct async_submit_ctl
*submit
)
35 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
36 &dest
, 1, srcs
, 2, len
);
37 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
38 struct dmaengine_unmap_data
*unmap
= NULL
;
39 const u8
*amul
, *bmul
;
44 unmap
= dmaengine_get_unmap_data(dma
->dev
, 3, GFP_NOWAIT
);
47 struct device
*dev
= dma
->dev
;
49 struct dma_async_tx_descriptor
*tx
;
50 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
52 if (submit
->flags
& ASYNC_TX_FENCE
)
53 dma_flags
|= DMA_PREP_FENCE
;
54 unmap
->addr
[0] = dma_map_page(dev
, srcs
[0], 0, len
, DMA_TO_DEVICE
);
55 unmap
->addr
[1] = dma_map_page(dev
, srcs
[1], 0, len
, DMA_TO_DEVICE
);
58 unmap
->addr
[2] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
60 /* engine only looks at Q, but expects it to follow P */
61 pq
[1] = unmap
->addr
[2];
64 tx
= dma
->device_prep_dma_pq(chan
, pq
, unmap
->addr
, 2, coef
,
67 dma_set_unmap(tx
, unmap
);
68 async_tx_submit(chan
, tx
, submit
);
69 dmaengine_unmap_put(unmap
);
73 /* could not get a descriptor, unmap and fall through to
74 * the synchronous path
76 dmaengine_unmap_put(unmap
);
79 /* run the operation synchronously */
80 async_tx_quiesce(&submit
->depend_tx
);
81 amul
= raid6_gfmul
[coef
[0]];
82 bmul
= raid6_gfmul
[coef
[1]];
83 a
= page_address(srcs
[0]);
84 b
= page_address(srcs
[1]);
85 c
= page_address(dest
);
96 static struct dma_async_tx_descriptor
*
97 async_mult(struct page
*dest
, struct page
*src
, u8 coef
, size_t len
,
98 struct async_submit_ctl
*submit
)
100 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
101 &dest
, 1, &src
, 1, len
);
102 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
103 struct dmaengine_unmap_data
*unmap
= NULL
;
104 const u8
*qmul
; /* Q multiplier table */
108 unmap
= dmaengine_get_unmap_data(dma
->dev
, 3, GFP_NOWAIT
);
111 dma_addr_t dma_dest
[2];
112 struct device
*dev
= dma
->dev
;
113 struct dma_async_tx_descriptor
*tx
;
114 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
116 if (submit
->flags
& ASYNC_TX_FENCE
)
117 dma_flags
|= DMA_PREP_FENCE
;
118 unmap
->addr
[0] = dma_map_page(dev
, src
, 0, len
, DMA_TO_DEVICE
);
120 unmap
->addr
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
121 dma_dest
[1] = unmap
->addr
[1];
125 /* this looks funny, but the engine looks for Q at
126 * dma_dest[1] and ignores dma_dest[0] as a dest
127 * due to DMA_PREP_PQ_DISABLE_P
129 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, unmap
->addr
,
130 1, &coef
, len
, dma_flags
);
133 dma_set_unmap(tx
, unmap
);
134 dmaengine_unmap_put(unmap
);
135 async_tx_submit(chan
, tx
, submit
);
139 /* could not get a descriptor, unmap and fall through to
140 * the synchronous path
142 dmaengine_unmap_put(unmap
);
145 /* no channel available, or failed to allocate a descriptor, so
146 * perform the operation synchronously
148 async_tx_quiesce(&submit
->depend_tx
);
149 qmul
= raid6_gfmul
[coef
];
150 d
= page_address(dest
);
151 s
= page_address(src
);
159 static struct dma_async_tx_descriptor
*
160 __2data_recov_4(int disks
, size_t bytes
, int faila
, int failb
,
161 struct page
**blocks
, struct async_submit_ctl
*submit
)
163 struct dma_async_tx_descriptor
*tx
= NULL
;
164 struct page
*p
, *q
, *a
, *b
;
165 struct page
*srcs
[2];
166 unsigned char coef
[2];
167 enum async_tx_flags flags
= submit
->flags
;
168 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
169 void *cb_param
= submit
->cb_param
;
170 void *scribble
= submit
->scribble
;
178 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
179 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
182 coef
[0] = raid6_gfexi
[failb
-faila
];
183 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
184 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
185 tx
= async_sum_product(b
, srcs
, coef
, bytes
, submit
);
190 init_async_submit(submit
, flags
| ASYNC_TX_XOR_ZERO_DST
, tx
, cb_fn
,
192 tx
= async_xor(a
, srcs
, 0, 2, bytes
, submit
);
198 static struct dma_async_tx_descriptor
*
199 __2data_recov_5(int disks
, size_t bytes
, int faila
, int failb
,
200 struct page
**blocks
, struct async_submit_ctl
*submit
)
202 struct dma_async_tx_descriptor
*tx
= NULL
;
203 struct page
*p
, *q
, *g
, *dp
, *dq
;
204 struct page
*srcs
[2];
205 unsigned char coef
[2];
206 enum async_tx_flags flags
= submit
->flags
;
207 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
208 void *cb_param
= submit
->cb_param
;
209 void *scribble
= submit
->scribble
;
210 int good_srcs
, good
, i
;
214 for (i
= 0; i
< disks
-2; i
++) {
215 if (blocks
[i
] == NULL
)
217 if (i
== faila
|| i
== failb
)
222 BUG_ON(good_srcs
> 1);
228 /* Compute syndrome with zero for the missing data pages
229 * Use the dead data pages as temporary storage for delta p and
235 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
236 tx
= async_memcpy(dp
, g
, 0, 0, bytes
, submit
);
237 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
238 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
240 /* compute P + Pxy */
243 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
244 NULL
, NULL
, scribble
);
245 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
247 /* compute Q + Qxy */
250 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
251 NULL
, NULL
, scribble
);
252 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
254 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
257 coef
[0] = raid6_gfexi
[failb
-faila
];
258 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
259 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
260 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
265 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
267 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
272 static struct dma_async_tx_descriptor
*
273 __2data_recov_n(int disks
, size_t bytes
, int faila
, int failb
,
274 struct page
**blocks
, struct async_submit_ctl
*submit
)
276 struct dma_async_tx_descriptor
*tx
= NULL
;
277 struct page
*p
, *q
, *dp
, *dq
;
278 struct page
*srcs
[2];
279 unsigned char coef
[2];
280 enum async_tx_flags flags
= submit
->flags
;
281 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
282 void *cb_param
= submit
->cb_param
;
283 void *scribble
= submit
->scribble
;
288 /* Compute syndrome with zero for the missing data pages
289 * Use the dead data pages as temporary storage for
290 * delta p and delta q
293 blocks
[faila
] = NULL
;
294 blocks
[disks
-2] = dp
;
296 blocks
[failb
] = NULL
;
297 blocks
[disks
-1] = dq
;
299 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
300 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
302 /* Restore pointer table */
308 /* compute P + Pxy */
311 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
312 NULL
, NULL
, scribble
);
313 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
315 /* compute Q + Qxy */
318 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
319 NULL
, NULL
, scribble
);
320 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
322 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
325 coef
[0] = raid6_gfexi
[failb
-faila
];
326 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
327 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
328 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
333 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
335 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
341 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
342 * @disks: number of disks in the RAID-6 array
344 * @faila: first failed drive index
345 * @failb: second failed drive index
346 * @blocks: array of source pointers where the last two entries are p and q
347 * @submit: submission/completion modifiers
349 struct dma_async_tx_descriptor
*
350 async_raid6_2data_recov(int disks
, size_t bytes
, int faila
, int failb
,
351 struct page
**blocks
, struct async_submit_ctl
*submit
)
353 void *scribble
= submit
->scribble
;
354 int non_zero_srcs
, i
;
356 BUG_ON(faila
== failb
);
360 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
362 /* if a dma resource is not available or a scribble buffer is not
363 * available punt to the synchronous path. In the 'dma not
364 * available' case be sure to use the scribble buffer to
365 * preserve the content of 'blocks' as the caller intended.
367 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
368 void **ptrs
= scribble
? scribble
: (void **) blocks
;
370 async_tx_quiesce(&submit
->depend_tx
);
371 for (i
= 0; i
< disks
; i
++)
372 if (blocks
[i
] == NULL
)
373 ptrs
[i
] = (void *) raid6_empty_zero_page
;
375 ptrs
[i
] = page_address(blocks
[i
]);
377 raid6_2data_recov(disks
, bytes
, faila
, failb
, ptrs
);
379 async_tx_sync_epilog(submit
);
385 for (i
= 0; i
< disks
-2 && non_zero_srcs
< 4; i
++)
388 switch (non_zero_srcs
) {
391 /* There must be at least 2 sources - the failed devices. */
395 /* dma devices do not uniformly understand a zero source pq
396 * operation (in contrast to the synchronous case), so
397 * explicitly handle the special case of a 4 disk array with
398 * both data disks missing.
400 return __2data_recov_4(disks
, bytes
, faila
, failb
, blocks
, submit
);
402 /* dma devices do not uniformly understand a single
403 * source pq operation (in contrast to the synchronous
404 * case), so explicitly handle the special case of a 5 disk
405 * array with 2 of 3 data disks missing.
407 return __2data_recov_5(disks
, bytes
, faila
, failb
, blocks
, submit
);
409 return __2data_recov_n(disks
, bytes
, faila
, failb
, blocks
, submit
);
412 EXPORT_SYMBOL_GPL(async_raid6_2data_recov
);
415 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
416 * @disks: number of disks in the RAID-6 array
418 * @faila: failed drive index
419 * @blocks: array of source pointers where the last two entries are p and q
420 * @submit: submission/completion modifiers
422 struct dma_async_tx_descriptor
*
423 async_raid6_datap_recov(int disks
, size_t bytes
, int faila
,
424 struct page
**blocks
, struct async_submit_ctl
*submit
)
426 struct dma_async_tx_descriptor
*tx
= NULL
;
427 struct page
*p
, *q
, *dq
;
429 enum async_tx_flags flags
= submit
->flags
;
430 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
431 void *cb_param
= submit
->cb_param
;
432 void *scribble
= submit
->scribble
;
433 int good_srcs
, good
, i
;
434 struct page
*srcs
[2];
436 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
438 /* if a dma resource is not available or a scribble buffer is not
439 * available punt to the synchronous path. In the 'dma not
440 * available' case be sure to use the scribble buffer to
441 * preserve the content of 'blocks' as the caller intended.
443 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
444 void **ptrs
= scribble
? scribble
: (void **) blocks
;
446 async_tx_quiesce(&submit
->depend_tx
);
447 for (i
= 0; i
< disks
; i
++)
448 if (blocks
[i
] == NULL
)
449 ptrs
[i
] = (void*)raid6_empty_zero_page
;
451 ptrs
[i
] = page_address(blocks
[i
]);
453 raid6_datap_recov(disks
, bytes
, faila
, ptrs
);
455 async_tx_sync_epilog(submit
);
462 for (i
= 0; i
< disks
-2; i
++) {
472 BUG_ON(good_srcs
== 0);
477 /* Compute syndrome with zero for the missing data page
478 * Use the dead data page as temporary storage for delta q
481 blocks
[faila
] = NULL
;
482 blocks
[disks
-1] = dq
;
484 /* in the 4-disk case we only need to perform a single source
485 * multiplication with the one good data block.
487 if (good_srcs
== 1) {
488 struct page
*g
= blocks
[good
];
490 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
492 tx
= async_memcpy(p
, g
, 0, 0, bytes
, submit
);
494 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
496 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
498 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
500 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
503 /* Restore pointer table */
507 /* calculate g^{-faila} */
508 coef
= raid6_gfinv
[raid6_gfexp
[faila
]];
512 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
513 NULL
, NULL
, scribble
);
514 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
516 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
517 tx
= async_mult(dq
, dq
, coef
, bytes
, submit
);
521 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
523 tx
= async_xor(p
, srcs
, 0, 2, bytes
, submit
);
527 EXPORT_SYMBOL_GPL(async_raid6_datap_recov
);
529 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
530 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
531 MODULE_LICENSE("GPL");