1 /* $NetBSD: rf_pq.c,v 1.15 2005/12/11 12:23:37 christos Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
6 * Author: Daniel Stodolsky
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
30 * Code for RAID level 6 (P + Q) disk array architecture.
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.15 2005/12/11 12:23:37 christos Exp $");
38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
40 #include <dev/raidframe/raidframevar.h>
44 #include "rf_dagffrd.h"
45 #include "rf_dagffwr.h"
46 #include "rf_dagdegrd.h"
47 #include "rf_dagdegwr.h"
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_etimer.h"
52 #include "rf_general.h"
56 RF_RedFuncs_t rf_pFuncs
= {rf_RegularONPFunc
, "Regular Old-New P", rf_SimpleONPFunc
, "Simple Old-New P"};
57 RF_RedFuncs_t rf_pRecoveryFuncs
= {rf_RecoveryPFunc
, "Recovery P Func", rf_RecoveryPFunc
, "Recovery P Func"};
60 rf_RegularONPFunc(RF_DagNode_t
*node
)
62 return (rf_RegularXorFunc(node
));
65 same as simpleONQ func, but the coefficient is always 1
69 rf_SimpleONPFunc(RF_DagNode_t
*node
)
71 return (rf_SimpleXorFunc(node
));
75 rf_RecoveryPFunc(RF_DagNode_t
*node
)
77 return (rf_RecoveryXorFunc(node
));
81 rf_RegularPFunc(RF_DagNode_t
*node
)
83 return (rf_RegularXorFunc(node
));
85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
89 QDelta(char *dest
, char *obuf
, char *nbuf
, unsigned length
,
92 rf_InvertQ(unsigned long *qbuf
, unsigned long *abuf
,
93 unsigned length
, unsigned coeff
);
95 RF_RedFuncs_t rf_qFuncs
= {rf_RegularONQFunc
, "Regular Old-New Q", rf_SimpleONQFunc
, "Simple Old-New Q"};
96 RF_RedFuncs_t rf_qRecoveryFuncs
= {rf_RecoveryQFunc
, "Recovery Q Func", rf_RecoveryQFunc
, "Recovery Q Func"};
97 RF_RedFuncs_t rf_pqRecoveryFuncs
= {rf_RecoveryPQFunc
, "Recovery PQ Func", rf_RecoveryPQFunc
, "Recovery PQ Func"};
103 RF_AccessStripeMap_t
* asmap
,
104 RF_VoidFuncPtr
* createFunc
)
106 RF_RaidLayout_t
*layoutPtr
= &(raidPtr
->Layout
);
107 unsigned ndfail
= asmap
->numDataFailed
;
108 unsigned npfail
= asmap
->numParityFailed
;
109 unsigned ntfail
= npfail
+ ndfail
;
111 RF_ASSERT(RF_IO_IS_R_OR_W(type
));
113 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
117 /* ok, we can do this I/O */
118 if (type
== RF_IO_TYPE_READ
) {
121 /* fault free read */
122 *createFunc
= (RF_VoidFuncPtr
) rf_CreateFaultFreeReadDAG
; /* same as raid 5 */
125 /* lost a single data unit */
126 /* two cases: (1) parity is not lost. do a normal raid
127 * 5 reconstruct read. (2) parity is lost. do a
128 * reconstruct read using "q". */
129 if (ntfail
== 2) { /* also lost redundancy */
130 if (asmap
->failedPDAs
[1]->type
== RF_PDA_TYPE_PARITY
)
131 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_110_CreateReadDAG
;
133 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_101_CreateReadDAG
;
135 /* P and Q are ok. But is there a failure in
136 * some unaccessed data unit? */
137 if (rf_NumFailedDataUnitsInStripe(raidPtr
, asmap
) == 2)
138 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_200_CreateReadDAG
;
140 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_100_CreateReadDAG
;
144 /* lost two data units */
145 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_200_CreateReadDAG
;
152 case 0: /* fault free */
153 if (rf_suppressLocksAndLargeWrites
||
154 (((asmap
->numStripeUnitsAccessed
<= (layoutPtr
->numDataCol
/ 2)) && (layoutPtr
->numDataCol
!= 1)) ||
155 (asmap
->parityInfo
->next
!= NULL
) || (asmap
->qInfo
->next
!= NULL
) || rf_CheckStripeForFailures(raidPtr
, asmap
))) {
157 *createFunc
= (RF_VoidFuncPtr
) rf_PQCreateSmallWriteDAG
;
159 *createFunc
= (RF_VoidFuncPtr
) rf_PQCreateLargeWriteDAG
;
163 case 1: /* single disk fault */
165 RF_ASSERT((asmap
->failedPDAs
[0]->type
== RF_PDA_TYPE_PARITY
) || (asmap
->failedPDAs
[0]->type
== RF_PDA_TYPE_Q
));
166 if (asmap
->failedPDAs
[0]->type
== RF_PDA_TYPE_Q
) { /* q died, treat like
169 if (((asmap
->numStripeUnitsAccessed
<= (layoutPtr
->numDataCol
/ 2)) || (asmap
->numStripeUnitsAccessed
== 1))
170 || rf_NumFailedDataUnitsInStripe(raidPtr
, asmap
))
171 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_001_CreateSmallWriteDAG
;
173 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_001_CreateLargeWriteDAG
;
174 } else {/* parity died, small write only updating Q */
175 if (((asmap
->numStripeUnitsAccessed
<= (layoutPtr
->numDataCol
/ 2)) || (asmap
->numStripeUnitsAccessed
== 1))
176 || rf_NumFailedDataUnitsInStripe(raidPtr
, asmap
))
177 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_010_CreateSmallWriteDAG
;
179 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_010_CreateLargeWriteDAG
;
181 } else { /* data missing. Do a P reconstruct write if
182 * only a single data unit is lost in the
183 * stripe, otherwise a PQ reconstruct write. */
184 if (rf_NumFailedDataUnitsInStripe(raidPtr
, asmap
) == 2)
185 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_200_CreateWriteDAG
;
187 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_100_CreateWriteDAG
;
191 case 2: /* two disk faults */
193 case 2: /* both p and q dead */
194 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_011_CreateWriteDAG
;
196 case 1: /* either p or q and dead data */
197 RF_ASSERT(asmap
->failedPDAs
[0]->type
== RF_PDA_TYPE_DATA
);
198 RF_ASSERT((asmap
->failedPDAs
[1]->type
== RF_PDA_TYPE_PARITY
) || (asmap
->failedPDAs
[1]->type
== RF_PDA_TYPE_Q
));
199 if (asmap
->failedPDAs
[1]->type
== RF_PDA_TYPE_Q
)
200 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_101_CreateWriteDAG
;
202 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_110_CreateWriteDAG
;
204 case 0: /* double data loss */
205 *createFunc
= (RF_VoidFuncPtr
) rf_PQ_200_CreateWriteDAG
;
210 default: /* more than 2 disk faults */
217 Used as a stop gap info function
221 PQOne(RF_Raid_t
*raidPtr
, int *nSucc
, int *nAnte
, RF_AccessStripeMap_t
*asmap
)
227 PQOneTwo(RF_Raid_t
*raidPtr
, int *nSucc
, int *nAnte
, RF_AccessStripeMap_t
*asmap
)
234 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG
)
236 rf_CommonCreateLargeWriteDAG(raidPtr
, asmap
, dag_h
, bp
, flags
, allocList
, 2,
237 rf_RegularPQFunc
, RF_FALSE
);
241 rf_RegularONQFunc(RF_DagNode_t
*node
)
243 int np
= node
->numParams
;
245 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[np
- 1].p
;
247 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
251 RF_PhysDiskAddr_t
*old
, *new;
253 unsigned secPerSU
= raidPtr
->Layout
.sectorsPerStripeUnit
;
255 RF_ETIMER_START(timer
);
258 RF_ASSERT(4 * d
+ 3 == np
);
259 qbuf
= (char *) node
->params
[2 * d
+ 1].p
; /* q buffer */
260 for (i
= 0; i
< d
; i
++) {
261 old
= (RF_PhysDiskAddr_t
*) node
->params
[2 * i
].p
;
262 obuf
= (char *) node
->params
[2 * i
+ 1].p
;
263 new = (RF_PhysDiskAddr_t
*) node
->params
[2 * (d
+ 1 + i
)].p
;
264 nbuf
= (char *) node
->params
[2 * (d
+ 1 + i
) + 1].p
;
265 RF_ASSERT(new->numSector
== old
->numSector
);
266 RF_ASSERT(new->raidAddress
== old
->raidAddress
);
267 /* the stripe unit within the stripe tells us the coefficient
268 * to use for the multiply. */
269 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), new->raidAddress
);
270 /* compute the data unit offset within the column, then add
272 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
273 qpbuf
= qbuf
+ rf_RaidAddressToByte(raidPtr
, old
->startSector
% secPerSU
);
274 QDelta(qpbuf
, obuf
, nbuf
, rf_RaidAddressToByte(raidPtr
, old
->numSector
), coeff
);
277 RF_ETIMER_STOP(timer
);
278 RF_ETIMER_EVAL(timer
);
279 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
280 rf_GenericWakeupFunc(node
, 0); /* call wake func explicitly since no
281 * I/O in this node */
285 See the SimpleXORFunc for the difference between a simple and regular func.
286 These Q functions should be used for
288 new q = Q(data,old data,old q)
290 style updates and not for
292 q = ( new data, new data, .... )
296 The simple q takes 2(2d+1)+1 params, where d is the number
297 of stripes written. The order of params is
298 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
299 [2d] old q pda_0, old q buffer
300 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
305 rf_SimpleONQFunc(RF_DagNode_t
*node
)
307 int np
= node
->numParams
;
309 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[np
- 1].p
;
311 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
315 RF_PhysDiskAddr_t
*old
, *new;
318 RF_ETIMER_START(timer
);
321 RF_ASSERT(4 * d
+ 3 == np
);
322 qbuf
= (char *) node
->params
[2 * d
+ 1].p
; /* q buffer */
323 for (i
= 0; i
< d
; i
++) {
324 old
= (RF_PhysDiskAddr_t
*) node
->params
[2 * i
].p
;
325 obuf
= (char *) node
->params
[2 * i
+ 1].p
;
326 new = (RF_PhysDiskAddr_t
*) node
->params
[2 * (d
+ 1 + i
)].p
;
327 nbuf
= (char *) node
->params
[2 * (d
+ 1 + i
) + 1].p
;
328 RF_ASSERT(new->numSector
== old
->numSector
);
329 RF_ASSERT(new->raidAddress
== old
->raidAddress
);
330 /* the stripe unit within the stripe tells us the coefficient
331 * to use for the multiply. */
332 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), new->raidAddress
);
333 /* compute the data unit offset within the column, then add
335 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
336 QDelta(qbuf
, obuf
, nbuf
, rf_RaidAddressToByte(raidPtr
, old
->numSector
), coeff
);
339 RF_ETIMER_STOP(timer
);
340 RF_ETIMER_EVAL(timer
);
341 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
342 rf_GenericWakeupFunc(node
, 0); /* call wake func explicitly since no
343 * I/O in this node */
346 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG
)
348 rf_CommonCreateSmallWriteDAG(raidPtr
, asmap
, dag_h
, bp
, flags
, allocList
, &rf_pFuncs
, &rf_qFuncs
);
351 static void RegularQSubr(RF_DagNode_t
*node
, char *qbuf
);
354 RegularQSubr(RF_DagNode_t
*node
, char *qbuf
)
356 int np
= node
->numParams
;
358 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[np
- 1].p
;
359 unsigned secPerSU
= raidPtr
->Layout
.sectorsPerStripeUnit
;
361 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
364 RF_PhysDiskAddr_t
*old
;
367 RF_ETIMER_START(timer
);
370 RF_ASSERT(2 * d
+ 1 == np
);
371 for (i
= 0; i
< d
; i
++) {
372 old
= (RF_PhysDiskAddr_t
*) node
->params
[2 * i
].p
;
373 obuf
= (char *) node
->params
[2 * i
+ 1].p
;
374 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), old
->raidAddress
);
375 /* compute the data unit offset within the column, then add
377 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
378 /* the input buffers may not all be aligned with the start of
379 * the stripe. so shift by their sector offset within the
381 qpbuf
= qbuf
+ rf_RaidAddressToByte(raidPtr
, old
->startSector
% secPerSU
);
382 rf_IncQ((unsigned long *) qpbuf
, (unsigned long *) obuf
, rf_RaidAddressToByte(raidPtr
, old
->numSector
), coeff
);
385 RF_ETIMER_STOP(timer
);
386 RF_ETIMER_EVAL(timer
);
387 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
390 used in degraded writes.
393 static void DegrQSubr(RF_DagNode_t
*node
);
396 DegrQSubr(RF_DagNode_t
*node
)
398 int np
= node
->numParams
;
400 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[np
- 1].p
;
401 unsigned secPerSU
= raidPtr
->Layout
.sectorsPerStripeUnit
;
403 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
405 char *qbuf
= node
->results
[1];
407 RF_PhysDiskAddr_t
*old
;
412 old
= (RF_PhysDiskAddr_t
*) node
->params
[np
- 2].p
;
413 fail_start
= old
->startSector
% secPerSU
;
415 RF_ETIMER_START(timer
);
418 RF_ASSERT(2 * d
+ 2 == np
);
419 for (i
= 0; i
< d
; i
++) {
420 old
= (RF_PhysDiskAddr_t
*) node
->params
[2 * i
].p
;
421 obuf
= (char *) node
->params
[2 * i
+ 1].p
;
422 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), old
->raidAddress
);
423 /* compute the data unit offset within the column, then add
425 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
426 /* the input buffers may not all be aligned with the start of
427 * the stripe. so shift by their sector offset within the
429 j
= old
->startSector
% secPerSU
;
430 RF_ASSERT(j
>= fail_start
);
431 qpbuf
= qbuf
+ rf_RaidAddressToByte(raidPtr
, j
- fail_start
);
432 rf_IncQ((unsigned long *) qpbuf
, (unsigned long *) obuf
, rf_RaidAddressToByte(raidPtr
, old
->numSector
), coeff
);
435 RF_ETIMER_STOP(timer
);
436 RF_ETIMER_EVAL(timer
);
437 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
440 Called by large write code to compute the new parity and the new q.
442 structure of the params:
444 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
447 for a total of 2d+1 arguments.
448 The result buffers results[0], results[1] are the buffers for the p and q,
451 We compute Q first, then compute P. The P calculation may try to reuse
452 one of the input buffers for its output, so if we computed P first, we would
453 corrupt the input for the q calculation.
457 rf_RegularPQFunc(RF_DagNode_t
*node
)
459 RegularQSubr(node
, node
->results
[1]);
460 return (rf_RegularXorFunc(node
)); /* does the wakeup */
464 rf_RegularQFunc(RF_DagNode_t
*node
)
466 /* Almost ... adjust Qsubr args */
467 RegularQSubr(node
, node
->results
[0]);
468 rf_GenericWakeupFunc(node
, 0); /* call wake func explicitly since no
469 * I/O in this node */
473 Called by singly degraded write code to compute the new parity and the new q.
475 structure of the params:
477 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
480 for a total of 2d+2 arguments.
481 The result buffers results[0], results[1] are the buffers for the parity and q,
484 We compute Q first, then compute parity. The parity calculation may try to reuse
485 one of the input buffers for its output, so if we computed parity first, we would
486 corrupt the input for the q calculation.
488 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
492 rf_Degraded_100_PQFunc(RF_DagNode_t
*node
)
494 int np
= node
->numParams
;
498 rf_RecoveryXorFunc(node
);
503 The two below are used when reading a stripe with a single lost data unit.
506 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
508 and results[0] contains the data buffer. Which is originally zero-filled.
512 /* this Q func is used by the degraded-mode dag functions to recover lost data.
513 * the second-to-last parameter is the PDA for the failed portion of the access.
514 * the code here looks at this PDA and assumes that the xor target buffer is
515 * equal in size to the number of sectors in the failed PDA. It then uses
516 * the other PDAs in the parameter list to determine where within the target
517 * buffer the corresponding data should be xored.
519 * Recall the basic equation is
521 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
523 * so to recover data_j we need
525 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
527 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
528 * copying Q into it. Then we need to do a table lookup to convert to solve
534 rf_RecoveryQFunc(RF_DagNode_t
*node
)
536 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[node
->numParams
- 1].p
;
537 RF_RaidLayout_t
*layoutPtr
= (RF_RaidLayout_t
*) & raidPtr
->Layout
;
538 RF_PhysDiskAddr_t
*failedPDA
= (RF_PhysDiskAddr_t
*) node
->params
[node
->numParams
- 2].p
;
540 RF_PhysDiskAddr_t
*pda
;
541 RF_RaidAddr_t suoffset
, failedSUOffset
= rf_StripeUnitOffset(layoutPtr
, failedPDA
->startSector
);
542 char *srcbuf
, *destbuf
;
543 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
547 RF_ETIMER_START(timer
);
548 /* start by copying Q into the buffer */
549 memcpy(node
->results
[0], node
->params
[node
->numParams
- 3].p
,
550 rf_RaidAddressToByte(raidPtr
, failedPDA
->numSector
));
551 for (i
= 0; i
< node
->numParams
- 4; i
+= 2) {
552 RF_ASSERT(node
->params
[i
+ 1].p
!= node
->results
[0]);
553 pda
= (RF_PhysDiskAddr_t
*) node
->params
[i
].p
;
554 srcbuf
= (char *) node
->params
[i
+ 1].p
;
555 suoffset
= rf_StripeUnitOffset(layoutPtr
, pda
->startSector
);
556 destbuf
= ((char *) node
->results
[0]) + rf_RaidAddressToByte(raidPtr
, suoffset
- failedSUOffset
);
557 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), pda
->raidAddress
);
558 /* compute the data unit offset within the column */
559 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
560 rf_IncQ((unsigned long *) destbuf
, (unsigned long *) srcbuf
, rf_RaidAddressToByte(raidPtr
, pda
->numSector
), coeff
);
562 /* Do the nasty inversion now */
563 coeff
= (rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), failedPDA
->startSector
) % raidPtr
->Layout
.numDataCol
);
564 rf_InvertQ(node
->results
[0], node
->results
[0], rf_RaidAddressToByte(raidPtr
, pda
->numSector
), coeff
);
565 RF_ETIMER_STOP(timer
);
566 RF_ETIMER_EVAL(timer
);
567 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
568 rf_GenericWakeupFunc(node
, 0);
573 rf_RecoveryPQFunc(RF_DagNode_t
*node
)
575 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[node
->numParams
- 1].p
;
576 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr
->raidid
);
580 Degraded write Q subroutine.
582 Large-write style Q computation.
585 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
589 This is a "simple style" recovery func.
593 rf_PQ_DegradedWriteQFunc(RF_DagNode_t
*node
)
595 int np
= node
->numParams
;
597 RF_Raid_t
*raidPtr
= (RF_Raid_t
*) node
->params
[np
- 1].p
;
598 unsigned secPerSU
= raidPtr
->Layout
.sectorsPerStripeUnit
;
600 RF_AccTraceEntry_t
*tracerec
= node
->dagHdr
->tracerec
;
602 char *qbuf
= node
->results
[0];
604 RF_PhysDiskAddr_t
*old
;
608 old
= (RF_PhysDiskAddr_t
*) node
->params
[np
- 2].p
;
609 fail_start
= old
->startSector
% secPerSU
;
611 RF_ETIMER_START(timer
);
614 RF_ASSERT(2 * d
+ 2 == np
);
616 for (i
= 0; i
< d
; i
++) {
617 old
= (RF_PhysDiskAddr_t
*) node
->params
[2 * i
].p
;
618 obuf
= (char *) node
->params
[2 * i
+ 1].p
;
619 coeff
= rf_RaidAddressToStripeUnitID(&(raidPtr
->Layout
), old
->raidAddress
);
620 /* compute the data unit offset within the column, then add
622 coeff
= (coeff
% raidPtr
->Layout
.numDataCol
);
623 j
= old
->startSector
% secPerSU
;
624 RF_ASSERT(j
>= fail_start
);
625 qpbuf
= qbuf
+ rf_RaidAddressToByte(raidPtr
, j
- fail_start
);
626 rf_IncQ((unsigned long *) qpbuf
, (unsigned long *) obuf
, rf_RaidAddressToByte(raidPtr
, old
->numSector
), coeff
);
629 RF_ETIMER_STOP(timer
);
630 RF_ETIMER_EVAL(timer
);
631 tracerec
->q_us
+= RF_ETIMER_VAL_US(timer
);
632 rf_GenericWakeupFunc(node
, 0);
643 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
650 rf_IncQ(unsigned long *dest
, unsigned long *buf
, unsigned length
, unsigned coeff
)
652 unsigned long a
, d
, new;
653 unsigned long a1
, a2
;
654 unsigned int *q
= &(rf_qfor
[28 - coeff
][0]);
655 unsigned r
= rf_rn
[coeff
+ 1];
657 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
658 #define INSERT(a,i) (a << (5L*i))
661 /* 13 5 bit quants in a 64 bit word */
665 a1
= EXTRACT(a
, 0) ^ r
;
666 a2
= EXTRACT(a
, 1) ^ r
;
667 new = INSERT(a2
, 1) | a1
;
668 a1
= EXTRACT(a
, 2) ^ r
;
669 a2
= EXTRACT(a
, 3) ^ r
;
672 new = new | INSERT(a1
, 2) | INSERT(a2
, 3);
673 a1
= EXTRACT(a
, 4) ^ r
;
674 a2
= EXTRACT(a
, 5) ^ r
;
677 new = new | INSERT(a1
, 4) | INSERT(a2
, 5);
678 a1
= EXTRACT(a
, 5) ^ r
;
679 a2
= EXTRACT(a
, 6) ^ r
;
682 new = new | INSERT(a1
, 5) | INSERT(a2
, 6);
684 a1
= EXTRACT(a
, 7) ^ r
;
685 a2
= EXTRACT(a
, 8) ^ r
;
688 new = new | INSERT(a1
, 7) | INSERT(a2
, 8);
689 a1
= EXTRACT(a
, 9) ^ r
;
690 a2
= EXTRACT(a
, 10) ^ r
;
693 new = new | INSERT(a1
, 9) | INSERT(a2
, 10);
694 a1
= EXTRACT(a
, 11) ^ r
;
695 a2
= EXTRACT(a
, 12) ^ r
;
698 new = new | INSERT(a1
, 11) | INSERT(a2
, 12);
699 #endif /* RF_LONGSHIFT > 2 */
708 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
711 optimization: compute old ^ new on 64 bit basis.
724 unsigned long a
, d
, new;
725 unsigned long a1
, a2
;
726 unsigned int *q
= &(rf_qfor
[28 - coeff
][0]);
727 unsigned int r
= rf_rn
[coeff
+ 1];
729 r
= a1
= a2
= new = d
= a
= 0; /* XXX for now... */
730 q
= NULL
; /* XXX for now */
733 /* PQ in kernel currently not supported because the encoding/decoding
734 * table is not present */
735 memset(dest
, 0, length
);
737 /* this code probably doesn't work and should be rewritten -wvcii */
738 /* 13 5 bit quants in a 64 bit word */
741 a
= *obuf
++; /* XXX need to reorg to avoid cache conflicts */
744 a1
= EXTRACT(a
, 0) ^ r
;
745 a2
= EXTRACT(a
, 1) ^ r
;
748 new = INSERT(a2
, 1) | a1
;
749 a1
= EXTRACT(a
, 2) ^ r
;
750 a2
= EXTRACT(a
, 3) ^ r
;
753 new = new | INSERT(a1
, 2) | INSERT(a2
, 3);
754 a1
= EXTRACT(a
, 4) ^ r
;
755 a2
= EXTRACT(a
, 5) ^ r
;
758 new = new | INSERT(a1
, 4) | INSERT(a2
, 5);
759 a1
= EXTRACT(a
, 5) ^ r
;
760 a2
= EXTRACT(a
, 6) ^ r
;
763 new = new | INSERT(a1
, 5) | INSERT(a2
, 6);
765 a1
= EXTRACT(a
, 7) ^ r
;
766 a2
= EXTRACT(a
, 8) ^ r
;
769 new = new | INSERT(a1
, 7) | INSERT(a2
, 8);
770 a1
= EXTRACT(a
, 9) ^ r
;
771 a2
= EXTRACT(a
, 10) ^ r
;
774 new = new | INSERT(a1
, 9) | INSERT(a2
, 10);
775 a1
= EXTRACT(a
, 11) ^ r
;
776 a2
= EXTRACT(a
, 12) ^ r
;
779 new = new | INSERT(a1
, 11) | INSERT(a2
, 12);
780 #endif /* RF_LONGSHIFT > 2 */
788 recover columns a and b from the given p and q into
789 bufs abuf and bbuf. All bufs are word aligned.
797 * Everything about this seems wrong.
800 rf_PQ_recover(unsigned long *pbuf
, unsigned long *qbuf
, unsigned long *abuf
, unsigned long *bbuf
, unsigned length
, unsigned coeff_a
, unsigned coeff_b
)
802 unsigned long p
, q
, a
, a0
, a1
;
803 int col
= (29 * coeff_a
) + coeff_b
;
804 unsigned char *q0
= &(rf_qinv
[col
][0]);
812 a
= q0
[a0
<< 5 | a1
];
816 a = a | INSERT(q0[a0<<5 | a1],i)
838 Lost parity and a data column. Recover that data column.
839 Assume col coeff is lost. Let q the contents of Q after
840 all surviving data columns have been q-xored out of it.
841 Then we have the equation
843 q[28-coeff][a_i ^ r_i+1] = q
845 but q is cyclic with period 31.
846 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
847 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
849 so a_i = r_{coeff+1} ^ q[3+coeff][q]
851 The routine is passed q buffer and the buffer
852 the data is to be recoverd into. They can be the same.
864 unsigned long a
, new;
865 unsigned long a1
, a2
;
866 unsigned int *q
= &(rf_qfor
[3 + coeff
][0]);
867 unsigned r
= rf_rn
[coeff
+ 1];
869 /* 13 5 bit quants in a 64 bit word */
877 new = INSERT(a2
, 1) | a1
;
883 new = new | INSERT(a1,i) | INSERT(a2,j)
892 #endif /* RF_LONGSHIFT > 2 */
897 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
898 * (RF_INCLUDE_RAID6 > 0) */