2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/module.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/raid/pq.h>
28 #include <linux/async_tx.h>
30 static struct dma_async_tx_descriptor
*
31 async_sum_product(struct page
*dest
, struct page
**srcs
, unsigned char *coef
,
32 size_t len
, struct async_submit_ctl
*submit
)
34 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
35 &dest
, 1, srcs
, 2, len
);
36 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
37 const u8
*amul
, *bmul
;
42 dma_addr_t dma_dest
[2];
43 dma_addr_t dma_src
[2];
44 struct device
*dev
= dma
->dev
;
45 struct dma_async_tx_descriptor
*tx
;
46 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
48 if (submit
->flags
& ASYNC_TX_FENCE
)
49 dma_flags
|= DMA_PREP_FENCE
;
50 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
51 dma_src
[0] = dma_map_page(dev
, srcs
[0], 0, len
, DMA_TO_DEVICE
);
52 dma_src
[1] = dma_map_page(dev
, srcs
[1], 0, len
, DMA_TO_DEVICE
);
53 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 2, coef
,
56 async_tx_submit(chan
, tx
, submit
);
60 /* could not get a descriptor, unmap and fall through to
61 * the synchronous path
63 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
64 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
65 dma_unmap_page(dev
, dma_src
[1], len
, DMA_TO_DEVICE
);
68 /* run the operation synchronously */
69 async_tx_quiesce(&submit
->depend_tx
);
70 amul
= raid6_gfmul
[coef
[0]];
71 bmul
= raid6_gfmul
[coef
[1]];
72 a
= page_address(srcs
[0]);
73 b
= page_address(srcs
[1]);
74 c
= page_address(dest
);
85 static struct dma_async_tx_descriptor
*
86 async_mult(struct page
*dest
, struct page
*src
, u8 coef
, size_t len
,
87 struct async_submit_ctl
*submit
)
89 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
90 &dest
, 1, &src
, 1, len
);
91 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
92 const u8
*qmul
; /* Q multiplier table */
96 dma_addr_t dma_dest
[2];
97 dma_addr_t dma_src
[1];
98 struct device
*dev
= dma
->dev
;
99 struct dma_async_tx_descriptor
*tx
;
100 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
102 if (submit
->flags
& ASYNC_TX_FENCE
)
103 dma_flags
|= DMA_PREP_FENCE
;
104 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
105 dma_src
[0] = dma_map_page(dev
, src
, 0, len
, DMA_TO_DEVICE
);
106 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 1, &coef
,
109 async_tx_submit(chan
, tx
, submit
);
113 /* could not get a descriptor, unmap and fall through to
114 * the synchronous path
116 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
117 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
120 /* no channel available, or failed to allocate a descriptor, so
121 * perform the operation synchronously
123 async_tx_quiesce(&submit
->depend_tx
);
124 qmul
= raid6_gfmul
[coef
];
125 d
= page_address(dest
);
126 s
= page_address(src
);
134 static struct dma_async_tx_descriptor
*
135 __2data_recov_4(int disks
, size_t bytes
, int faila
, int failb
,
136 struct page
**blocks
, struct async_submit_ctl
*submit
)
138 struct dma_async_tx_descriptor
*tx
= NULL
;
139 struct page
*p
, *q
, *a
, *b
;
140 struct page
*srcs
[2];
141 unsigned char coef
[2];
142 enum async_tx_flags flags
= submit
->flags
;
143 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
144 void *cb_param
= submit
->cb_param
;
145 void *scribble
= submit
->scribble
;
153 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
154 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
157 coef
[0] = raid6_gfexi
[failb
-faila
];
158 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
159 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
160 tx
= async_sum_product(b
, srcs
, coef
, bytes
, submit
);
165 init_async_submit(submit
, flags
| ASYNC_TX_XOR_ZERO_DST
, tx
, cb_fn
,
167 tx
= async_xor(a
, srcs
, 0, 2, bytes
, submit
);
173 static struct dma_async_tx_descriptor
*
174 __2data_recov_5(int disks
, size_t bytes
, int faila
, int failb
,
175 struct page
**blocks
, struct async_submit_ctl
*submit
)
177 struct dma_async_tx_descriptor
*tx
= NULL
;
178 struct page
*p
, *q
, *g
, *dp
, *dq
;
179 struct page
*srcs
[2];
180 unsigned char coef
[2];
181 enum async_tx_flags flags
= submit
->flags
;
182 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
183 void *cb_param
= submit
->cb_param
;
184 void *scribble
= submit
->scribble
;
185 int good_srcs
, good
, i
;
189 for (i
= 0; i
< disks
-2; i
++) {
190 if (blocks
[i
] == NULL
)
192 if (i
== faila
|| i
== failb
)
197 BUG_ON(good_srcs
> 1);
203 /* Compute syndrome with zero for the missing data pages
204 * Use the dead data pages as temporary storage for delta p and
210 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
211 tx
= async_memcpy(dp
, g
, 0, 0, bytes
, submit
);
212 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
213 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
215 /* compute P + Pxy */
218 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
219 NULL
, NULL
, scribble
);
220 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
222 /* compute Q + Qxy */
225 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
226 NULL
, NULL
, scribble
);
227 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
229 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
232 coef
[0] = raid6_gfexi
[failb
-faila
];
233 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
234 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
235 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
240 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
242 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
247 static struct dma_async_tx_descriptor
*
248 __2data_recov_n(int disks
, size_t bytes
, int faila
, int failb
,
249 struct page
**blocks
, struct async_submit_ctl
*submit
)
251 struct dma_async_tx_descriptor
*tx
= NULL
;
252 struct page
*p
, *q
, *dp
, *dq
;
253 struct page
*srcs
[2];
254 unsigned char coef
[2];
255 enum async_tx_flags flags
= submit
->flags
;
256 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
257 void *cb_param
= submit
->cb_param
;
258 void *scribble
= submit
->scribble
;
263 /* Compute syndrome with zero for the missing data pages
264 * Use the dead data pages as temporary storage for
265 * delta p and delta q
268 blocks
[faila
] = NULL
;
269 blocks
[disks
-2] = dp
;
271 blocks
[failb
] = NULL
;
272 blocks
[disks
-1] = dq
;
274 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
275 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
277 /* Restore pointer table */
283 /* compute P + Pxy */
286 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
287 NULL
, NULL
, scribble
);
288 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
290 /* compute Q + Qxy */
293 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
294 NULL
, NULL
, scribble
);
295 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
297 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
300 coef
[0] = raid6_gfexi
[failb
-faila
];
301 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
302 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
303 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
308 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
310 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
316 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
317 * @disks: number of disks in the RAID-6 array
319 * @faila: first failed drive index
320 * @failb: second failed drive index
321 * @blocks: array of source pointers where the last two entries are p and q
322 * @submit: submission/completion modifiers
324 struct dma_async_tx_descriptor
*
325 async_raid6_2data_recov(int disks
, size_t bytes
, int faila
, int failb
,
326 struct page
**blocks
, struct async_submit_ctl
*submit
)
328 void *scribble
= submit
->scribble
;
329 int non_zero_srcs
, i
;
331 BUG_ON(faila
== failb
);
335 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
337 /* if a dma resource is not available or a scribble buffer is not
338 * available punt to the synchronous path. In the 'dma not
339 * available' case be sure to use the scribble buffer to
340 * preserve the content of 'blocks' as the caller intended.
342 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
343 void **ptrs
= scribble
? scribble
: (void **) blocks
;
345 async_tx_quiesce(&submit
->depend_tx
);
346 for (i
= 0; i
< disks
; i
++)
347 if (blocks
[i
] == NULL
)
348 ptrs
[i
] = (void *) raid6_empty_zero_page
;
350 ptrs
[i
] = page_address(blocks
[i
]);
352 raid6_2data_recov(disks
, bytes
, faila
, failb
, ptrs
);
354 async_tx_sync_epilog(submit
);
360 for (i
= 0; i
< disks
-2 && non_zero_srcs
< 4; i
++)
363 switch (non_zero_srcs
) {
366 /* There must be at least 2 sources - the failed devices. */
370 /* dma devices do not uniformly understand a zero source pq
371 * operation (in contrast to the synchronous case), so
372 * explicitly handle the special case of a 4 disk array with
373 * both data disks missing.
375 return __2data_recov_4(disks
, bytes
, faila
, failb
, blocks
, submit
);
377 /* dma devices do not uniformly understand a single
378 * source pq operation (in contrast to the synchronous
379 * case), so explicitly handle the special case of a 5 disk
380 * array with 2 of 3 data disks missing.
382 return __2data_recov_5(disks
, bytes
, faila
, failb
, blocks
, submit
);
384 return __2data_recov_n(disks
, bytes
, faila
, failb
, blocks
, submit
);
387 EXPORT_SYMBOL_GPL(async_raid6_2data_recov
);
390 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
391 * @disks: number of disks in the RAID-6 array
393 * @faila: failed drive index
394 * @blocks: array of source pointers where the last two entries are p and q
395 * @submit: submission/completion modifiers
397 struct dma_async_tx_descriptor
*
398 async_raid6_datap_recov(int disks
, size_t bytes
, int faila
,
399 struct page
**blocks
, struct async_submit_ctl
*submit
)
401 struct dma_async_tx_descriptor
*tx
= NULL
;
402 struct page
*p
, *q
, *dq
;
404 enum async_tx_flags flags
= submit
->flags
;
405 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
406 void *cb_param
= submit
->cb_param
;
407 void *scribble
= submit
->scribble
;
408 int good_srcs
, good
, i
;
409 struct page
*srcs
[2];
411 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
413 /* if a dma resource is not available or a scribble buffer is not
414 * available punt to the synchronous path. In the 'dma not
415 * available' case be sure to use the scribble buffer to
416 * preserve the content of 'blocks' as the caller intended.
418 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
419 void **ptrs
= scribble
? scribble
: (void **) blocks
;
421 async_tx_quiesce(&submit
->depend_tx
);
422 for (i
= 0; i
< disks
; i
++)
423 if (blocks
[i
] == NULL
)
424 ptrs
[i
] = (void*)raid6_empty_zero_page
;
426 ptrs
[i
] = page_address(blocks
[i
]);
428 raid6_datap_recov(disks
, bytes
, faila
, ptrs
);
430 async_tx_sync_epilog(submit
);
437 for (i
= 0; i
< disks
-2; i
++) {
447 BUG_ON(good_srcs
== 0);
452 /* Compute syndrome with zero for the missing data page
453 * Use the dead data page as temporary storage for delta q
456 blocks
[faila
] = NULL
;
457 blocks
[disks
-1] = dq
;
459 /* in the 4-disk case we only need to perform a single source
460 * multiplication with the one good data block.
462 if (good_srcs
== 1) {
463 struct page
*g
= blocks
[good
];
465 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
467 tx
= async_memcpy(p
, g
, 0, 0, bytes
, submit
);
469 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
471 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
473 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
475 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
478 /* Restore pointer table */
482 /* calculate g^{-faila} */
483 coef
= raid6_gfinv
[raid6_gfexp
[faila
]];
487 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
488 NULL
, NULL
, scribble
);
489 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
491 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
492 tx
= async_mult(dq
, dq
, coef
, bytes
, submit
);
496 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
498 tx
= async_xor(p
, srcs
, 0, 2, bytes
, submit
);
502 EXPORT_SYMBOL_GPL(async_raid6_datap_recov
);
504 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
505 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
506 MODULE_LICENSE("GPL");