2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/raid/pq.h>
27 #include <linux/async_tx.h>
29 static struct dma_async_tx_descriptor
*
30 async_sum_product(struct page
*dest
, struct page
**srcs
, unsigned char *coef
,
31 size_t len
, struct async_submit_ctl
*submit
)
33 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
34 &dest
, 1, srcs
, 2, len
);
35 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
36 const u8
*amul
, *bmul
;
41 dma_addr_t dma_dest
[2];
42 dma_addr_t dma_src
[2];
43 struct device
*dev
= dma
->dev
;
44 struct dma_async_tx_descriptor
*tx
;
45 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
47 if (submit
->flags
& ASYNC_TX_FENCE
)
48 dma_flags
|= DMA_PREP_FENCE
;
49 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
50 dma_src
[0] = dma_map_page(dev
, srcs
[0], 0, len
, DMA_TO_DEVICE
);
51 dma_src
[1] = dma_map_page(dev
, srcs
[1], 0, len
, DMA_TO_DEVICE
);
52 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 2, coef
,
55 async_tx_submit(chan
, tx
, submit
);
59 /* could not get a descriptor, unmap and fall through to
60 * the synchronous path
62 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
63 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
64 dma_unmap_page(dev
, dma_src
[1], len
, DMA_TO_DEVICE
);
67 /* run the operation synchronously */
68 async_tx_quiesce(&submit
->depend_tx
);
69 amul
= raid6_gfmul
[coef
[0]];
70 bmul
= raid6_gfmul
[coef
[1]];
71 a
= page_address(srcs
[0]);
72 b
= page_address(srcs
[1]);
73 c
= page_address(dest
);
84 static struct dma_async_tx_descriptor
*
85 async_mult(struct page
*dest
, struct page
*src
, u8 coef
, size_t len
,
86 struct async_submit_ctl
*submit
)
88 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
89 &dest
, 1, &src
, 1, len
);
90 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
91 const u8
*qmul
; /* Q multiplier table */
95 dma_addr_t dma_dest
[2];
96 dma_addr_t dma_src
[1];
97 struct device
*dev
= dma
->dev
;
98 struct dma_async_tx_descriptor
*tx
;
99 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
101 if (submit
->flags
& ASYNC_TX_FENCE
)
102 dma_flags
|= DMA_PREP_FENCE
;
103 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
104 dma_src
[0] = dma_map_page(dev
, src
, 0, len
, DMA_TO_DEVICE
);
105 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 1, &coef
,
108 async_tx_submit(chan
, tx
, submit
);
112 /* could not get a descriptor, unmap and fall through to
113 * the synchronous path
115 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
116 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
119 /* no channel available, or failed to allocate a descriptor, so
120 * perform the operation synchronously
122 async_tx_quiesce(&submit
->depend_tx
);
123 qmul
= raid6_gfmul
[coef
];
124 d
= page_address(dest
);
125 s
= page_address(src
);
133 static struct dma_async_tx_descriptor
*
134 __2data_recov_4(int disks
, size_t bytes
, int faila
, int failb
,
135 struct page
**blocks
, struct async_submit_ctl
*submit
)
137 struct dma_async_tx_descriptor
*tx
= NULL
;
138 struct page
*p
, *q
, *a
, *b
;
139 struct page
*srcs
[2];
140 unsigned char coef
[2];
141 enum async_tx_flags flags
= submit
->flags
;
142 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
143 void *cb_param
= submit
->cb_param
;
144 void *scribble
= submit
->scribble
;
152 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
153 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
156 coef
[0] = raid6_gfexi
[failb
-faila
];
157 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
158 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
159 tx
= async_sum_product(b
, srcs
, coef
, bytes
, submit
);
164 init_async_submit(submit
, flags
| ASYNC_TX_XOR_ZERO_DST
, tx
, cb_fn
,
166 tx
= async_xor(a
, srcs
, 0, 2, bytes
, submit
);
172 static struct dma_async_tx_descriptor
*
173 __2data_recov_5(int disks
, size_t bytes
, int faila
, int failb
,
174 struct page
**blocks
, struct async_submit_ctl
*submit
)
176 struct dma_async_tx_descriptor
*tx
= NULL
;
177 struct page
*p
, *q
, *g
, *dp
, *dq
;
178 struct page
*srcs
[2];
179 unsigned char coef
[2];
180 enum async_tx_flags flags
= submit
->flags
;
181 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
182 void *cb_param
= submit
->cb_param
;
183 void *scribble
= submit
->scribble
;
184 int good_srcs
, good
, i
;
188 for (i
= 0; i
< disks
-2; i
++) {
189 if (blocks
[i
] == NULL
)
191 if (i
== faila
|| i
== failb
)
196 BUG_ON(good_srcs
> 1);
202 /* Compute syndrome with zero for the missing data pages
203 * Use the dead data pages as temporary storage for delta p and
209 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
210 tx
= async_memcpy(dp
, g
, 0, 0, bytes
, submit
);
211 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
212 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
214 /* compute P + Pxy */
217 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
218 NULL
, NULL
, scribble
);
219 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
221 /* compute Q + Qxy */
224 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
225 NULL
, NULL
, scribble
);
226 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
228 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
231 coef
[0] = raid6_gfexi
[failb
-faila
];
232 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
233 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
234 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
239 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
241 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
246 static struct dma_async_tx_descriptor
*
247 __2data_recov_n(int disks
, size_t bytes
, int faila
, int failb
,
248 struct page
**blocks
, struct async_submit_ctl
*submit
)
250 struct dma_async_tx_descriptor
*tx
= NULL
;
251 struct page
*p
, *q
, *dp
, *dq
;
252 struct page
*srcs
[2];
253 unsigned char coef
[2];
254 enum async_tx_flags flags
= submit
->flags
;
255 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
256 void *cb_param
= submit
->cb_param
;
257 void *scribble
= submit
->scribble
;
262 /* Compute syndrome with zero for the missing data pages
263 * Use the dead data pages as temporary storage for
264 * delta p and delta q
267 blocks
[faila
] = NULL
;
268 blocks
[disks
-2] = dp
;
270 blocks
[failb
] = NULL
;
271 blocks
[disks
-1] = dq
;
273 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
274 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
276 /* Restore pointer table */
282 /* compute P + Pxy */
285 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
286 NULL
, NULL
, scribble
);
287 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
289 /* compute Q + Qxy */
292 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
293 NULL
, NULL
, scribble
);
294 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
296 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
299 coef
[0] = raid6_gfexi
[failb
-faila
];
300 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
301 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
302 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
307 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
309 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
315 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
316 * @disks: number of disks in the RAID-6 array
318 * @faila: first failed drive index
319 * @failb: second failed drive index
320 * @blocks: array of source pointers where the last two entries are p and q
321 * @submit: submission/completion modifiers
323 struct dma_async_tx_descriptor
*
324 async_raid6_2data_recov(int disks
, size_t bytes
, int faila
, int failb
,
325 struct page
**blocks
, struct async_submit_ctl
*submit
)
327 int non_zero_srcs
, i
;
329 BUG_ON(faila
== failb
);
333 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
335 /* we need to preserve the contents of 'blocks' for the async
336 * case, so punt to synchronous if a scribble buffer is not available
338 if (!submit
->scribble
) {
339 void **ptrs
= (void **) blocks
;
341 async_tx_quiesce(&submit
->depend_tx
);
342 for (i
= 0; i
< disks
; i
++)
343 if (blocks
[i
] == NULL
)
344 ptrs
[i
] = (void *) raid6_empty_zero_page
;
346 ptrs
[i
] = page_address(blocks
[i
]);
348 raid6_2data_recov(disks
, bytes
, faila
, failb
, ptrs
);
350 async_tx_sync_epilog(submit
);
356 for (i
= 0; i
< disks
-2 && non_zero_srcs
< 4; i
++)
359 switch (non_zero_srcs
) {
362 /* There must be at least 2 sources - the failed devices. */
366 /* dma devices do not uniformly understand a zero source pq
367 * operation (in contrast to the synchronous case), so
368 * explicitly handle the special case of a 4 disk array with
369 * both data disks missing.
371 return __2data_recov_4(disks
, bytes
, faila
, failb
, blocks
, submit
);
373 /* dma devices do not uniformly understand a single
374 * source pq operation (in contrast to the synchronous
375 * case), so explicitly handle the special case of a 5 disk
376 * array with 2 of 3 data disks missing.
378 return __2data_recov_5(disks
, bytes
, faila
, failb
, blocks
, submit
);
380 return __2data_recov_n(disks
, bytes
, faila
, failb
, blocks
, submit
);
383 EXPORT_SYMBOL_GPL(async_raid6_2data_recov
);
386 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
387 * @disks: number of disks in the RAID-6 array
389 * @faila: failed drive index
390 * @blocks: array of source pointers where the last two entries are p and q
391 * @submit: submission/completion modifiers
393 struct dma_async_tx_descriptor
*
394 async_raid6_datap_recov(int disks
, size_t bytes
, int faila
,
395 struct page
**blocks
, struct async_submit_ctl
*submit
)
397 struct dma_async_tx_descriptor
*tx
= NULL
;
398 struct page
*p
, *q
, *dq
;
400 enum async_tx_flags flags
= submit
->flags
;
401 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
402 void *cb_param
= submit
->cb_param
;
403 void *scribble
= submit
->scribble
;
404 int good_srcs
, good
, i
;
405 struct page
*srcs
[2];
407 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
409 /* we need to preserve the contents of 'blocks' for the async
410 * case, so punt to synchronous if a scribble buffer is not available
413 void **ptrs
= (void **) blocks
;
415 async_tx_quiesce(&submit
->depend_tx
);
416 for (i
= 0; i
< disks
; i
++)
417 if (blocks
[i
] == NULL
)
418 ptrs
[i
] = (void*)raid6_empty_zero_page
;
420 ptrs
[i
] = page_address(blocks
[i
]);
422 raid6_datap_recov(disks
, bytes
, faila
, ptrs
);
424 async_tx_sync_epilog(submit
);
431 for (i
= 0; i
< disks
-2; i
++) {
441 BUG_ON(good_srcs
== 0);
446 /* Compute syndrome with zero for the missing data page
447 * Use the dead data page as temporary storage for delta q
450 blocks
[faila
] = NULL
;
451 blocks
[disks
-1] = dq
;
453 /* in the 4-disk case we only need to perform a single source
454 * multiplication with the one good data block.
456 if (good_srcs
== 1) {
457 struct page
*g
= blocks
[good
];
459 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
461 tx
= async_memcpy(p
, g
, 0, 0, bytes
, submit
);
463 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
465 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
467 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
469 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
472 /* Restore pointer table */
476 /* calculate g^{-faila} */
477 coef
= raid6_gfinv
[raid6_gfexp
[faila
]];
481 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
482 NULL
, NULL
, scribble
);
483 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
485 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
486 tx
= async_mult(dq
, dq
, coef
, bytes
, submit
);
490 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
492 tx
= async_xor(p
, srcs
, 0, 2, bytes
, submit
);
496 EXPORT_SYMBOL_GPL(async_raid6_datap_recov
);
498 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
499 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
500 MODULE_LICENSE("GPL");