No empty .Rs/.Re
[netbsd-mini2440.git] / sys / dev / raidframe / rf_pq.c
blob28e271273984ada77972e7595c4ee980b1555ed8
1 /* $NetBSD: rf_pq.c,v 1.15 2005/12/11 12:23:37 christos Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
6 * Author: Daniel Stodolsky
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
30 * Code for RAID level 6 (P + Q) disk array architecture.
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.15 2005/12/11 12:23:37 christos Exp $");
36 #include "rf_archs.h"
38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
40 #include <dev/raidframe/raidframevar.h>
42 #include "rf_raid.h"
43 #include "rf_dag.h"
44 #include "rf_dagffrd.h"
45 #include "rf_dagffwr.h"
46 #include "rf_dagdegrd.h"
47 #include "rf_dagdegwr.h"
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_etimer.h"
51 #include "rf_pqdeg.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_pq.h"
56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
59 int
60 rf_RegularONPFunc(RF_DagNode_t *node)
62 return (rf_RegularXorFunc(node));
65 same as simpleONQ func, but the coefficient is always 1
68 int
69 rf_SimpleONPFunc(RF_DagNode_t *node)
71 return (rf_SimpleXorFunc(node));
74 int
75 rf_RecoveryPFunc(RF_DagNode_t *node)
77 return (rf_RecoveryXorFunc(node));
80 int
81 rf_RegularPFunc(RF_DagNode_t *node)
83 return (rf_RegularXorFunc(node));
85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
88 static void
89 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
90 unsigned char coeff);
91 static void
92 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
93 unsigned length, unsigned coeff);
95 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
96 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
97 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
99 void
100 rf_PQDagSelect(
101 RF_Raid_t * raidPtr,
102 RF_IoType_t type,
103 RF_AccessStripeMap_t * asmap,
104 RF_VoidFuncPtr * createFunc)
106 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
107 unsigned ndfail = asmap->numDataFailed;
108 unsigned npfail = asmap->numParityFailed;
109 unsigned ntfail = npfail + ndfail;
111 RF_ASSERT(RF_IO_IS_R_OR_W(type));
112 if (ntfail > 2) {
113 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
114 *createFunc = NULL;
115 return;
117 /* ok, we can do this I/O */
118 if (type == RF_IO_TYPE_READ) {
119 switch (ndfail) {
120 case 0:
121 /* fault free read */
122 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
123 break;
124 case 1:
125 /* lost a single data unit */
126 /* two cases: (1) parity is not lost. do a normal raid
127 * 5 reconstruct read. (2) parity is lost. do a
128 * reconstruct read using "q". */
129 if (ntfail == 2) { /* also lost redundancy */
130 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
131 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
132 else
133 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
134 } else {
135 /* P and Q are ok. But is there a failure in
136 * some unaccessed data unit? */
137 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
138 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
139 else
140 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
142 break;
143 case 2:
144 /* lost two data units */
145 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
146 break;
148 return;
150 /* a write */
151 switch (ntfail) {
152 case 0: /* fault free */
153 if (rf_suppressLocksAndLargeWrites ||
154 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
155 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
158 } else {
159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
161 break;
163 case 1: /* single disk fault */
164 if (npfail == 1) {
165 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
166 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
167 * normal mode raid5
168 * write. */
169 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
170 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
172 else
173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
174 } else {/* parity died, small write only updating Q */
175 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
176 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
178 else
179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
181 } else { /* data missing. Do a P reconstruct write if
182 * only a single data unit is lost in the
183 * stripe, otherwise a PQ reconstruct write. */
184 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
185 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
186 else
187 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
189 break;
191 case 2: /* two disk faults */
192 switch (npfail) {
193 case 2: /* both p and q dead */
194 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
195 break;
196 case 1: /* either p or q and dead data */
197 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
198 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
199 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
200 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
201 else
202 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
203 break;
204 case 0: /* double data loss */
205 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
206 break;
208 break;
210 default: /* more than 2 disk faults */
211 *createFunc = NULL;
212 RF_PANIC();
214 return;
217 Used as a stop gap info function
219 #if 0
220 static void
221 PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
223 *nSucc = *nAnte = 1;
226 static void
227 PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
229 *nSucc = 1;
230 *nAnte = 2;
232 #endif
234 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
236 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
237 rf_RegularPQFunc, RF_FALSE);
241 rf_RegularONQFunc(RF_DagNode_t *node)
243 int np = node->numParams;
244 int d;
245 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
246 int i;
247 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
248 RF_Etimer_t timer;
249 char *qbuf, *qpbuf;
250 char *obuf, *nbuf;
251 RF_PhysDiskAddr_t *old, *new;
252 unsigned long coeff;
253 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
255 RF_ETIMER_START(timer);
257 d = (np - 3) / 4;
258 RF_ASSERT(4 * d + 3 == np);
259 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
260 for (i = 0; i < d; i++) {
261 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
262 obuf = (char *) node->params[2 * i + 1].p;
263 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
264 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
265 RF_ASSERT(new->numSector == old->numSector);
266 RF_ASSERT(new->raidAddress == old->raidAddress);
267 /* the stripe unit within the stripe tells us the coefficient
268 * to use for the multiply. */
269 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
270 /* compute the data unit offset within the column, then add
271 * one */
272 coeff = (coeff % raidPtr->Layout.numDataCol);
273 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
274 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
277 RF_ETIMER_STOP(timer);
278 RF_ETIMER_EVAL(timer);
279 tracerec->q_us += RF_ETIMER_VAL_US(timer);
280 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
281 * I/O in this node */
282 return (0);
285 See the SimpleXORFunc for the difference between a simple and regular func.
286 These Q functions should be used for
288 new q = Q(data,old data,old q)
290 style updates and not for
292 q = ( new data, new data, .... )
294 computations.
296 The simple q takes 2(2d+1)+1 params, where d is the number
297 of stripes written. The order of params is
298 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
299 [2d] old q pda_0, old q buffer
300 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
301 raidPtr
305 rf_SimpleONQFunc(RF_DagNode_t *node)
307 int np = node->numParams;
308 int d;
309 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
310 int i;
311 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
312 RF_Etimer_t timer;
313 char *qbuf;
314 char *obuf, *nbuf;
315 RF_PhysDiskAddr_t *old, *new;
316 unsigned long coeff;
318 RF_ETIMER_START(timer);
320 d = (np - 3) / 4;
321 RF_ASSERT(4 * d + 3 == np);
322 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
323 for (i = 0; i < d; i++) {
324 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
325 obuf = (char *) node->params[2 * i + 1].p;
326 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
327 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
328 RF_ASSERT(new->numSector == old->numSector);
329 RF_ASSERT(new->raidAddress == old->raidAddress);
330 /* the stripe unit within the stripe tells us the coefficient
331 * to use for the multiply. */
332 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
333 /* compute the data unit offset within the column, then add
334 * one */
335 coeff = (coeff % raidPtr->Layout.numDataCol);
336 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
339 RF_ETIMER_STOP(timer);
340 RF_ETIMER_EVAL(timer);
341 tracerec->q_us += RF_ETIMER_VAL_US(timer);
342 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
343 * I/O in this node */
344 return (0);
346 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
348 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
351 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
353 static void
354 RegularQSubr(RF_DagNode_t *node, char *qbuf)
356 int np = node->numParams;
357 int d;
358 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
359 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
360 int i;
361 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
362 RF_Etimer_t timer;
363 char *obuf, *qpbuf;
364 RF_PhysDiskAddr_t *old;
365 unsigned long coeff;
367 RF_ETIMER_START(timer);
369 d = (np - 1) / 2;
370 RF_ASSERT(2 * d + 1 == np);
371 for (i = 0; i < d; i++) {
372 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
373 obuf = (char *) node->params[2 * i + 1].p;
374 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
375 /* compute the data unit offset within the column, then add
376 * one */
377 coeff = (coeff % raidPtr->Layout.numDataCol);
378 /* the input buffers may not all be aligned with the start of
379 * the stripe. so shift by their sector offset within the
380 * stripe unit */
381 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
382 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
385 RF_ETIMER_STOP(timer);
386 RF_ETIMER_EVAL(timer);
387 tracerec->q_us += RF_ETIMER_VAL_US(timer);
390 used in degraded writes.
393 static void DegrQSubr(RF_DagNode_t *node);
395 static void
396 DegrQSubr(RF_DagNode_t *node)
398 int np = node->numParams;
399 int d;
400 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
401 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
402 int i;
403 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
404 RF_Etimer_t timer;
405 char *qbuf = node->results[1];
406 char *obuf, *qpbuf;
407 RF_PhysDiskAddr_t *old;
408 unsigned long coeff;
409 unsigned fail_start;
410 int j;
412 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
413 fail_start = old->startSector % secPerSU;
415 RF_ETIMER_START(timer);
417 d = (np - 2) / 2;
418 RF_ASSERT(2 * d + 2 == np);
419 for (i = 0; i < d; i++) {
420 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
421 obuf = (char *) node->params[2 * i + 1].p;
422 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
423 /* compute the data unit offset within the column, then add
424 * one */
425 coeff = (coeff % raidPtr->Layout.numDataCol);
426 /* the input buffers may not all be aligned with the start of
427 * the stripe. so shift by their sector offset within the
428 * stripe unit */
429 j = old->startSector % secPerSU;
430 RF_ASSERT(j >= fail_start);
431 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
432 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
435 RF_ETIMER_STOP(timer);
436 RF_ETIMER_EVAL(timer);
437 tracerec->q_us += RF_ETIMER_VAL_US(timer);
440 Called by large write code to compute the new parity and the new q.
442 structure of the params:
444 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
445 raidPtr
447 for a total of 2d+1 arguments.
448 The result buffers results[0], results[1] are the buffers for the p and q,
449 respectively.
451 We compute Q first, then compute P. The P calculation may try to reuse
452 one of the input buffers for its output, so if we computed P first, we would
453 corrupt the input for the q calculation.
457 rf_RegularPQFunc(RF_DagNode_t *node)
459 RegularQSubr(node, node->results[1]);
460 return (rf_RegularXorFunc(node)); /* does the wakeup */
464 rf_RegularQFunc(RF_DagNode_t *node)
466 /* Almost ... adjust Qsubr args */
467 RegularQSubr(node, node->results[0]);
468 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
469 * I/O in this node */
470 return (0);
473 Called by singly degraded write code to compute the new parity and the new q.
475 structure of the params:
477 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
478 failedPDA raidPtr
480 for a total of 2d+2 arguments.
481 The result buffers results[0], results[1] are the buffers for the parity and q,
482 respectively.
484 We compute Q first, then compute parity. The parity calculation may try to reuse
485 one of the input buffers for its output, so if we computed parity first, we would
486 corrupt the input for the q calculation.
488 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
491 void
492 rf_Degraded_100_PQFunc(RF_DagNode_t *node)
494 int np = node->numParams;
496 RF_ASSERT(np >= 2);
497 DegrQSubr(node);
498 rf_RecoveryXorFunc(node);
503 The two below are used when reading a stripe with a single lost data unit.
504 The parameters are
506 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
508 and results[0] contains the data buffer. Which is originally zero-filled.
512 /* this Q func is used by the degraded-mode dag functions to recover lost data.
513 * the second-to-last parameter is the PDA for the failed portion of the access.
514 * the code here looks at this PDA and assumes that the xor target buffer is
515 * equal in size to the number of sectors in the failed PDA. It then uses
516 * the other PDAs in the parameter list to determine where within the target
517 * buffer the corresponding data should be xored.
519 * Recall the basic equation is
521 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
523 * so to recover data_j we need
525 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
527 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
528 * copying Q into it. Then we need to do a table lookup to convert to solve
529 * data_j /= J
534 rf_RecoveryQFunc(RF_DagNode_t *node)
536 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
537 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
538 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
539 int i;
540 RF_PhysDiskAddr_t *pda;
541 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
542 char *srcbuf, *destbuf;
543 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
544 RF_Etimer_t timer;
545 unsigned long coeff;
547 RF_ETIMER_START(timer);
548 /* start by copying Q into the buffer */
549 memcpy(node->results[0], node->params[node->numParams - 3].p,
550 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
551 for (i = 0; i < node->numParams - 4; i += 2) {
552 RF_ASSERT(node->params[i + 1].p != node->results[0]);
553 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
554 srcbuf = (char *) node->params[i + 1].p;
555 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
556 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
557 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
558 /* compute the data unit offset within the column */
559 coeff = (coeff % raidPtr->Layout.numDataCol);
560 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
562 /* Do the nasty inversion now */
563 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
564 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
565 RF_ETIMER_STOP(timer);
566 RF_ETIMER_EVAL(timer);
567 tracerec->q_us += RF_ETIMER_VAL_US(timer);
568 rf_GenericWakeupFunc(node, 0);
569 return (0);
573 rf_RecoveryPQFunc(RF_DagNode_t *node)
575 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
576 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
577 return (1);
580 Degraded write Q subroutine.
581 Used when P is dead.
582 Large-write style Q computation.
583 Parameters
585 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
587 We ignore failedPDA.
589 This is a "simple style" recovery func.
592 void
593 rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
595 int np = node->numParams;
596 int d;
597 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
598 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
599 int i;
600 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
601 RF_Etimer_t timer;
602 char *qbuf = node->results[0];
603 char *obuf, *qpbuf;
604 RF_PhysDiskAddr_t *old;
605 unsigned long coeff;
606 int fail_start, j;
608 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
609 fail_start = old->startSector % secPerSU;
611 RF_ETIMER_START(timer);
613 d = (np - 2) / 2;
614 RF_ASSERT(2 * d + 2 == np);
616 for (i = 0; i < d; i++) {
617 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
618 obuf = (char *) node->params[2 * i + 1].p;
619 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
620 /* compute the data unit offset within the column, then add
621 * one */
622 coeff = (coeff % raidPtr->Layout.numDataCol);
623 j = old->startSector % secPerSU;
624 RF_ASSERT(j >= fail_start);
625 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
626 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
629 RF_ETIMER_STOP(timer);
630 RF_ETIMER_EVAL(timer);
631 tracerec->q_us += RF_ETIMER_VAL_US(timer);
632 rf_GenericWakeupFunc(node, 0);
638 /* Q computations */
641 coeff - colummn;
643 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
645 on 5-bit basis;
646 length in bytes;
649 void
650 rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, unsigned coeff)
652 unsigned long a, d, new;
653 unsigned long a1, a2;
654 unsigned int *q = &(rf_qfor[28 - coeff][0]);
655 unsigned r = rf_rn[coeff + 1];
657 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
658 #define INSERT(a,i) (a << (5L*i))
660 length /= 8;
661 /* 13 5 bit quants in a 64 bit word */
662 while (length) {
663 a = *buf++;
664 d = *dest;
665 a1 = EXTRACT(a, 0) ^ r;
666 a2 = EXTRACT(a, 1) ^ r;
667 new = INSERT(a2, 1) | a1;
668 a1 = EXTRACT(a, 2) ^ r;
669 a2 = EXTRACT(a, 3) ^ r;
670 a1 = q[a1];
671 a2 = q[a2];
672 new = new | INSERT(a1, 2) | INSERT(a2, 3);
673 a1 = EXTRACT(a, 4) ^ r;
674 a2 = EXTRACT(a, 5) ^ r;
675 a1 = q[a1];
676 a2 = q[a2];
677 new = new | INSERT(a1, 4) | INSERT(a2, 5);
678 a1 = EXTRACT(a, 5) ^ r;
679 a2 = EXTRACT(a, 6) ^ r;
680 a1 = q[a1];
681 a2 = q[a2];
682 new = new | INSERT(a1, 5) | INSERT(a2, 6);
683 #if RF_LONGSHIFT > 2
684 a1 = EXTRACT(a, 7) ^ r;
685 a2 = EXTRACT(a, 8) ^ r;
686 a1 = q[a1];
687 a2 = q[a2];
688 new = new | INSERT(a1, 7) | INSERT(a2, 8);
689 a1 = EXTRACT(a, 9) ^ r;
690 a2 = EXTRACT(a, 10) ^ r;
691 a1 = q[a1];
692 a2 = q[a2];
693 new = new | INSERT(a1, 9) | INSERT(a2, 10);
694 a1 = EXTRACT(a, 11) ^ r;
695 a2 = EXTRACT(a, 12) ^ r;
696 a1 = q[a1];
697 a2 = q[a2];
698 new = new | INSERT(a1, 11) | INSERT(a2, 12);
699 #endif /* RF_LONGSHIFT > 2 */
700 d ^= new;
701 *dest++ = d;
702 length--;
706 compute
708 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
710 on a five bit basis.
711 optimization: compute old ^ new on 64 bit basis.
713 length in bytes.
716 static void
717 QDelta(
718 char *dest,
719 char *obuf,
720 char *nbuf,
721 unsigned length,
722 unsigned char coeff)
724 unsigned long a, d, new;
725 unsigned long a1, a2;
726 unsigned int *q = &(rf_qfor[28 - coeff][0]);
727 unsigned int r = rf_rn[coeff + 1];
729 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
730 q = NULL; /* XXX for now */
732 #ifdef _KERNEL
733 /* PQ in kernel currently not supported because the encoding/decoding
734 * table is not present */
735 memset(dest, 0, length);
736 #else /* KERNEL */
737 /* this code probably doesn't work and should be rewritten -wvcii */
738 /* 13 5 bit quants in a 64 bit word */
739 length /= 8;
740 while (length) {
741 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
742 a ^= *nbuf++;
743 d = *dest;
744 a1 = EXTRACT(a, 0) ^ r;
745 a2 = EXTRACT(a, 1) ^ r;
746 a1 = q[a1];
747 a2 = q[a2];
748 new = INSERT(a2, 1) | a1;
749 a1 = EXTRACT(a, 2) ^ r;
750 a2 = EXTRACT(a, 3) ^ r;
751 a1 = q[a1];
752 a2 = q[a2];
753 new = new | INSERT(a1, 2) | INSERT(a2, 3);
754 a1 = EXTRACT(a, 4) ^ r;
755 a2 = EXTRACT(a, 5) ^ r;
756 a1 = q[a1];
757 a2 = q[a2];
758 new = new | INSERT(a1, 4) | INSERT(a2, 5);
759 a1 = EXTRACT(a, 5) ^ r;
760 a2 = EXTRACT(a, 6) ^ r;
761 a1 = q[a1];
762 a2 = q[a2];
763 new = new | INSERT(a1, 5) | INSERT(a2, 6);
764 #if RF_LONGSHIFT > 2
765 a1 = EXTRACT(a, 7) ^ r;
766 a2 = EXTRACT(a, 8) ^ r;
767 a1 = q[a1];
768 a2 = q[a2];
769 new = new | INSERT(a1, 7) | INSERT(a2, 8);
770 a1 = EXTRACT(a, 9) ^ r;
771 a2 = EXTRACT(a, 10) ^ r;
772 a1 = q[a1];
773 a2 = q[a2];
774 new = new | INSERT(a1, 9) | INSERT(a2, 10);
775 a1 = EXTRACT(a, 11) ^ r;
776 a2 = EXTRACT(a, 12) ^ r;
777 a1 = q[a1];
778 a2 = q[a2];
779 new = new | INSERT(a1, 11) | INSERT(a2, 12);
780 #endif /* RF_LONGSHIFT > 2 */
781 d ^= new;
782 *dest++ = d;
783 length--;
785 #endif /* _KERNEL */
788 recover columns a and b from the given p and q into
789 bufs abuf and bbuf. All bufs are word aligned.
790 Length is in bytes.
795 * XXX
797 * Everything about this seems wrong.
799 void
800 rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
802 unsigned long p, q, a, a0, a1;
803 int col = (29 * coeff_a) + coeff_b;
804 unsigned char *q0 = &(rf_qinv[col][0]);
806 length /= 8;
807 while (length) {
808 p = *pbuf++;
809 q = *qbuf++;
810 a0 = EXTRACT(p, 0);
811 a1 = EXTRACT(q, 0);
812 a = q0[a0 << 5 | a1];
813 #define MF(i) \
814 a0 = EXTRACT(p,i); \
815 a1 = EXTRACT(q,i); \
816 a = a | INSERT(q0[a0<<5 | a1],i)
818 MF(1);
819 MF(2);
820 MF(3);
821 MF(4);
822 MF(5);
823 MF(6);
824 #if 0
825 MF(7);
826 MF(8);
827 MF(9);
828 MF(10);
829 MF(11);
830 MF(12);
831 #endif /* 0 */
832 *abuf++ = a;
833 *bbuf++ = a ^ p;
834 length--;
838 Lost parity and a data column. Recover that data column.
839 Assume col coeff is lost. Let q the contents of Q after
840 all surviving data columns have been q-xored out of it.
841 Then we have the equation
843 q[28-coeff][a_i ^ r_i+1] = q
845 but q is cyclic with period 31.
846 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
847 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
849 so a_i = r_{coeff+1} ^ q[3+coeff][q]
851 The routine is passed q buffer and the buffer
852 the data is to be recoverd into. They can be the same.
857 static void
858 rf_InvertQ(
859 unsigned long *qbuf,
860 unsigned long *abuf,
861 unsigned length,
862 unsigned coeff)
864 unsigned long a, new;
865 unsigned long a1, a2;
866 unsigned int *q = &(rf_qfor[3 + coeff][0]);
867 unsigned r = rf_rn[coeff + 1];
869 /* 13 5 bit quants in a 64 bit word */
870 length /= 8;
871 while (length) {
872 a = *qbuf++;
873 a1 = EXTRACT(a, 0);
874 a2 = EXTRACT(a, 1);
875 a1 = r ^ q[a1];
876 a2 = r ^ q[a2];
877 new = INSERT(a2, 1) | a1;
878 #define M(i,j) \
879 a1 = EXTRACT(a,i); \
880 a2 = EXTRACT(a,j); \
881 a1 = r ^ q[a1]; \
882 a2 = r ^ q[a2]; \
883 new = new | INSERT(a1,i) | INSERT(a2,j)
885 M(2, 3);
886 M(4, 5);
887 M(5, 6);
888 #if RF_LONGSHIFT > 2
889 M(7, 8);
890 M(9, 10);
891 M(11, 12);
892 #endif /* RF_LONGSHIFT > 2 */
893 *abuf++ = new;
894 length--;
897 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
898 * (RF_INCLUDE_RAID6 > 0) */