No empty .Rs/.Re
[netbsd-mini2440.git] / sys / dev / raidframe / rf_dagffwr.c
blob80f780275644697118d35f8d57e3758870e8a42b
1 /* $NetBSD: rf_dagffwr.c,v 1.32 2006/10/12 01:31:50 christos Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
30 * rf_dagff.c
32 * code for creating fault-free DAGs
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.32 2006/10/12 01:31:50 christos Exp $");
39 #include <dev/raidframe/raidframevar.h>
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_debugMem.h"
46 #include "rf_dagffrd.h"
47 #include "rf_general.h"
48 #include "rf_dagffwr.h"
49 #include "rf_map.h"
51 /******************************************************************************
53 * General comments on DAG creation:
55 * All DAGs in this file use roll-away error recovery. Each DAG has a single
56 * commit node, usually called "Cmt." If an error occurs before the Cmt node
57 * is reached, the execution engine will halt forward execution and work
58 * backward through the graph, executing the undo functions. Assuming that
59 * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 * does not make changes to permanent state, the graph will fail atomically.
61 * If an error occurs after the Cmt node executes, the engine will roll-forward
62 * through the graph, blindly executing nodes until it reaches the end.
63 * If a graph reaches the end, it is assumed to have completed successfully.
65 * A graph has only 1 Cmt node.
70 /******************************************************************************
72 * The following wrappers map the standard DAG creation interface to the
73 * DAG creation routines. Additionally, these wrappers enable experimentation
74 * with new DAG structures by providing an extra level of indirection, allowing
75 * the DAG creation routines to be replaced at this single point.
79 void
80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 RF_DagHeader_t *dag_h, void *bp,
82 RF_RaidAccessFlags_t flags,
83 RF_AllocListElem_t *allocList,
84 RF_IoType_t type)
86 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 RF_IO_TYPE_WRITE);
90 void
91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 RF_DagHeader_t *dag_h, void *bp,
93 RF_RaidAccessFlags_t flags,
94 RF_AllocListElem_t *allocList,
95 RF_IoType_t type)
97 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 RF_IO_TYPE_WRITE);
101 void
102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 RF_DagHeader_t *dag_h, void *bp,
104 RF_RaidAccessFlags_t flags,
105 RF_AllocListElem_t *allocList)
107 /* "normal" rollaway */
108 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 allocList, &rf_xorFuncs, NULL);
112 void
113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 RF_DagHeader_t *dag_h, void *bp,
115 RF_RaidAccessFlags_t flags,
116 RF_AllocListElem_t *allocList)
118 /* "normal" rollaway */
119 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 allocList, 1, rf_RegularXorFunc, RF_TRUE);
124 /******************************************************************************
126 * DAG creation code begins here
130 /******************************************************************************
132 * creates a DAG to perform a large-write operation:
134 * / Rod \ / Wnd \
135 * H -- block- Rod - Xor - Cmt - Wnd --- T
136 * \ Rod / \ Wnp /
137 * \[Wnq]/
139 * The XOR node also does the Q calculation in the P+Q architecture.
140 * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 * undoable - or - they make no changes to permanent state.
143 * Rod = read old data
144 * Cmt = commit node
145 * Wnp = write new parity
146 * Wnd = write new data
147 * Wnq = write new "q"
148 * [] denotes optional segments in the graph
150 * Parameters: raidPtr - description of the physical array
151 * asmap - logical & physical addresses for this access
152 * bp - buffer ptr (holds write data)
153 * flags - general flags (e.g. disk locking)
154 * allocList - list of memory allocated in DAG creation
155 * nfaults - number of faults array can tolerate
156 * (equal to # redundancy units in stripe)
157 * redfuncs - list of redundancy generating functions
159 *****************************************************************************/
161 void
162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 RF_DagHeader_t *dag_h, void *bp,
164 RF_RaidAccessFlags_t flags,
165 RF_AllocListElem_t *allocList,
166 int nfaults, int (*redFunc) (RF_DagNode_t *),
167 int allowBufferRecycle)
169 RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
171 int nWndNodes, nRodNodes, i, nodeNum, asmNum;
172 RF_AccessStripeMapHeader_t *new_asm_h[2];
173 RF_StripeNum_t parityStripeID;
174 char *sosBuffer, *eosBuffer;
175 RF_ReconUnitNum_t which_ru;
176 RF_RaidLayout_t *layoutPtr;
177 RF_PhysDiskAddr_t *pda;
179 layoutPtr = &(raidPtr->Layout);
180 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
181 asmap->raidAddress,
182 &which_ru);
184 #if RF_DEBUG_DAG
185 if (rf_dagDebug) {
186 printf("[Creating large-write DAG]\n");
188 #endif
189 dag_h->creator = "LargeWriteDAG";
191 dag_h->numCommitNodes = 1;
192 dag_h->numCommits = 0;
193 dag_h->numSuccedents = 1;
195 /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
196 nWndNodes = asmap->numStripeUnitsAccessed;
198 for (i = 0; i < nWndNodes; i++) {
199 tmpNode = rf_AllocDAGNode();
200 tmpNode->list_next = dag_h->nodes;
201 dag_h->nodes = tmpNode;
203 wndNodes = dag_h->nodes;
205 xorNode = rf_AllocDAGNode();
206 xorNode->list_next = dag_h->nodes;
207 dag_h->nodes = xorNode;
209 wnpNode = rf_AllocDAGNode();
210 wnpNode->list_next = dag_h->nodes;
211 dag_h->nodes = wnpNode;
213 blockNode = rf_AllocDAGNode();
214 blockNode->list_next = dag_h->nodes;
215 dag_h->nodes = blockNode;
217 commitNode = rf_AllocDAGNode();
218 commitNode->list_next = dag_h->nodes;
219 dag_h->nodes = commitNode;
221 termNode = rf_AllocDAGNode();
222 termNode->list_next = dag_h->nodes;
223 dag_h->nodes = termNode;
225 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
226 if (nfaults == 2) {
227 wnqNode = rf_AllocDAGNode();
228 } else {
229 #endif
230 wnqNode = NULL;
231 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
233 #endif
234 rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
235 new_asm_h, &nRodNodes, &sosBuffer,
236 &eosBuffer, allocList);
237 if (nRodNodes > 0) {
238 for (i = 0; i < nRodNodes; i++) {
239 tmpNode = rf_AllocDAGNode();
240 tmpNode->list_next = dag_h->nodes;
241 dag_h->nodes = tmpNode;
243 rodNodes = dag_h->nodes;
244 } else {
245 rodNodes = NULL;
248 /* begin node initialization */
249 if (nRodNodes > 0) {
250 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
251 rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
252 dag_h, "Nil", allocList);
253 } else {
254 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
255 rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
256 dag_h, "Nil", allocList);
259 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
260 rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
261 dag_h, "Cmt", allocList);
262 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
263 rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
264 dag_h, "Trm", allocList);
266 /* initialize the Rod nodes */
267 tmpNode = rodNodes;
268 for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
269 if (new_asm_h[asmNum]) {
270 pda = new_asm_h[asmNum]->stripeMap->physInfo;
271 while (pda) {
272 rf_InitNode(tmpNode, rf_wait,
273 RF_FALSE, rf_DiskReadFunc,
274 rf_DiskReadUndoFunc,
275 rf_GenericWakeupFunc,
276 1, 1, 4, 0, dag_h,
277 "Rod", allocList);
278 tmpNode->params[0].p = pda;
279 tmpNode->params[1].p = pda->bufPtr;
280 tmpNode->params[2].v = parityStripeID;
281 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
282 which_ru);
283 nodeNum++;
284 pda = pda->next;
285 tmpNode = tmpNode->list_next;
289 RF_ASSERT(nodeNum == nRodNodes);
291 /* initialize the wnd nodes */
292 pda = asmap->physInfo;
293 tmpNode = wndNodes;
294 for (i = 0; i < nWndNodes; i++) {
295 rf_InitNode(tmpNode, rf_wait, RF_FALSE,
296 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
297 rf_GenericWakeupFunc, 1, 1, 4, 0,
298 dag_h, "Wnd", allocList);
299 RF_ASSERT(pda != NULL);
300 tmpNode->params[0].p = pda;
301 tmpNode->params[1].p = pda->bufPtr;
302 tmpNode->params[2].v = parityStripeID;
303 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
304 pda = pda->next;
305 tmpNode = tmpNode->list_next;
308 /* initialize the redundancy node */
309 if (nRodNodes > 0) {
310 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
311 rf_NullNodeUndoFunc, NULL, 1,
312 nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
313 nfaults, dag_h, "Xr ", allocList);
314 } else {
315 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
316 rf_NullNodeUndoFunc, NULL, 1,
317 1, 2 * (nWndNodes + nRodNodes) + 1,
318 nfaults, dag_h, "Xr ", allocList);
320 xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
321 tmpNode = wndNodes;
322 for (i = 0; i < nWndNodes; i++) {
323 /* pda */
324 xorNode->params[2 * i + 0] = tmpNode->params[0];
325 /* buf ptr */
326 xorNode->params[2 * i + 1] = tmpNode->params[1];
327 tmpNode = tmpNode->list_next;
329 tmpNode = rodNodes;
330 for (i = 0; i < nRodNodes; i++) {
331 /* pda */
332 xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
333 /* buf ptr */
334 xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
335 tmpNode = tmpNode->list_next;
337 /* xor node needs to get at RAID information */
338 xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
341 * Look for an Rod node that reads a complete SU. If none,
342 * alloc a buffer to receive the parity info. Note that we
343 * can't use a new data buffer because it will not have gotten
344 * written when the xor occurs. */
345 if (allowBufferRecycle) {
346 tmpNode = rodNodes;
347 for (i = 0; i < nRodNodes; i++) {
348 if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
349 break;
350 tmpNode = tmpNode->list_next;
353 if ((!allowBufferRecycle) || (i == nRodNodes)) {
354 xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
355 } else {
356 /* this works because the only way we get here is if
357 allowBufferRecycle is true and we went through the
358 above for loop, and exited via the break before
359 i==nRodNodes was true. That means tmpNode will
360 still point to a valid node -- the one we want for
361 here! */
362 xorNode->results[0] = tmpNode->params[1].p;
365 /* initialize the Wnp node */
366 rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
367 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
368 dag_h, "Wnp", allocList);
369 wnpNode->params[0].p = asmap->parityInfo;
370 wnpNode->params[1].p = xorNode->results[0];
371 wnpNode->params[2].v = parityStripeID;
372 wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
373 /* parityInfo must describe entire parity unit */
374 RF_ASSERT(asmap->parityInfo->next == NULL);
376 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
377 if (nfaults == 2) {
379 * We never try to recycle a buffer for the Q calcuation
380 * in addition to the parity. This would cause two buffers
381 * to get smashed during the P and Q calculation, guaranteeing
382 * one would be wrong.
384 RF_MallocAndAdd(xorNode->results[1],
385 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
386 (void *), allocList);
387 rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
388 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
389 1, 1, 4, 0, dag_h, "Wnq", allocList);
390 wnqNode->params[0].p = asmap->qInfo;
391 wnqNode->params[1].p = xorNode->results[1];
392 wnqNode->params[2].v = parityStripeID;
393 wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
394 /* parityInfo must describe entire parity unit */
395 RF_ASSERT(asmap->parityInfo->next == NULL);
397 #endif
399 * Connect nodes to form graph.
402 /* connect dag header to block node */
403 RF_ASSERT(blockNode->numAntecedents == 0);
404 dag_h->succedents[0] = blockNode;
406 if (nRodNodes > 0) {
407 /* connect the block node to the Rod nodes */
408 RF_ASSERT(blockNode->numSuccedents == nRodNodes);
409 RF_ASSERT(xorNode->numAntecedents == nRodNodes);
410 tmpNode = rodNodes;
411 for (i = 0; i < nRodNodes; i++) {
412 RF_ASSERT(tmpNode->numAntecedents == 1);
413 blockNode->succedents[i] = tmpNode;
414 tmpNode->antecedents[0] = blockNode;
415 tmpNode->antType[0] = rf_control;
417 /* connect the Rod nodes to the Xor node */
418 RF_ASSERT(tmpNode->numSuccedents == 1);
419 tmpNode->succedents[0] = xorNode;
420 xorNode->antecedents[i] = tmpNode;
421 xorNode->antType[i] = rf_trueData;
422 tmpNode = tmpNode->list_next;
424 } else {
425 /* connect the block node to the Xor node */
426 RF_ASSERT(blockNode->numSuccedents == 1);
427 RF_ASSERT(xorNode->numAntecedents == 1);
428 blockNode->succedents[0] = xorNode;
429 xorNode->antecedents[0] = blockNode;
430 xorNode->antType[0] = rf_control;
433 /* connect the xor node to the commit node */
434 RF_ASSERT(xorNode->numSuccedents == 1);
435 RF_ASSERT(commitNode->numAntecedents == 1);
436 xorNode->succedents[0] = commitNode;
437 commitNode->antecedents[0] = xorNode;
438 commitNode->antType[0] = rf_control;
440 /* connect the commit node to the write nodes */
441 RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
442 tmpNode = wndNodes;
443 for (i = 0; i < nWndNodes; i++) {
444 RF_ASSERT(wndNodes->numAntecedents == 1);
445 commitNode->succedents[i] = tmpNode;
446 tmpNode->antecedents[0] = commitNode;
447 tmpNode->antType[0] = rf_control;
448 tmpNode = tmpNode->list_next;
450 RF_ASSERT(wnpNode->numAntecedents == 1);
451 commitNode->succedents[nWndNodes] = wnpNode;
452 wnpNode->antecedents[0] = commitNode;
453 wnpNode->antType[0] = rf_trueData;
454 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
455 if (nfaults == 2) {
456 RF_ASSERT(wnqNode->numAntecedents == 1);
457 commitNode->succedents[nWndNodes + 1] = wnqNode;
458 wnqNode->antecedents[0] = commitNode;
459 wnqNode->antType[0] = rf_trueData;
461 #endif
462 /* connect the write nodes to the term node */
463 RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
464 RF_ASSERT(termNode->numSuccedents == 0);
465 tmpNode = wndNodes;
466 for (i = 0; i < nWndNodes; i++) {
467 RF_ASSERT(wndNodes->numSuccedents == 1);
468 tmpNode->succedents[0] = termNode;
469 termNode->antecedents[i] = tmpNode;
470 termNode->antType[i] = rf_control;
471 tmpNode = tmpNode->list_next;
473 RF_ASSERT(wnpNode->numSuccedents == 1);
474 wnpNode->succedents[0] = termNode;
475 termNode->antecedents[nWndNodes] = wnpNode;
476 termNode->antType[nWndNodes] = rf_control;
477 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
478 if (nfaults == 2) {
479 RF_ASSERT(wnqNode->numSuccedents == 1);
480 wnqNode->succedents[0] = termNode;
481 termNode->antecedents[nWndNodes + 1] = wnqNode;
482 termNode->antType[nWndNodes + 1] = rf_control;
484 #endif
486 /******************************************************************************
488 * creates a DAG to perform a small-write operation (either raid 5 or pq),
489 * which is as follows:
491 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
492 * \- Rod X / \----> Wnd [Und]-/
493 * [\- Rod X / \---> Wnd [Und]-/]
494 * [\- Roq -> Q / \--> Wnq [Unq]-/]
496 * Rop = read old parity
497 * Rod = read old data
498 * Roq = read old "q"
499 * Cmt = commit node
500 * Und = unlock data disk
501 * Unp = unlock parity disk
502 * Unq = unlock q disk
503 * Wnp = write new parity
504 * Wnd = write new data
505 * Wnq = write new "q"
506 * [ ] denotes optional segments in the graph
508 * Parameters: raidPtr - description of the physical array
509 * asmap - logical & physical addresses for this access
510 * bp - buffer ptr (holds write data)
511 * flags - general flags (e.g. disk locking)
512 * allocList - list of memory allocated in DAG creation
513 * pfuncs - list of parity generating functions
514 * qfuncs - list of q generating functions
516 * A null qfuncs indicates single fault tolerant
517 *****************************************************************************/
519 void
520 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
521 RF_DagHeader_t *dag_h, void *bp,
522 RF_RaidAccessFlags_t flags,
523 RF_AllocListElem_t *allocList,
524 const RF_RedFuncs_t *pfuncs,
525 const RF_RedFuncs_t *qfuncs)
527 RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
528 RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
529 RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
530 RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
531 RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
532 RF_DagNode_t *tmpwriteParityNode;
533 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
534 RF_DagNode_t *tmpwriteQNode;
535 #endif
536 int i, j, nNodes, totalNumNodes;
537 RF_ReconUnitNum_t which_ru;
538 int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
539 int (*qfunc) (RF_DagNode_t *);
540 int numDataNodes, numParityNodes;
541 RF_StripeNum_t parityStripeID;
542 RF_PhysDiskAddr_t *pda;
543 const char *name, *qname;
544 long nfaults;
546 nfaults = qfuncs ? 2 : 1;
548 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
549 asmap->raidAddress, &which_ru);
550 pda = asmap->physInfo;
551 numDataNodes = asmap->numStripeUnitsAccessed;
552 numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
554 #if RF_DEBUG_DAG
555 if (rf_dagDebug) {
556 printf("[Creating small-write DAG]\n");
558 #endif
559 RF_ASSERT(numDataNodes > 0);
560 dag_h->creator = "SmallWriteDAG";
562 dag_h->numCommitNodes = 1;
563 dag_h->numCommits = 0;
564 dag_h->numSuccedents = 1;
567 * DAG creation occurs in four steps:
568 * 1. count the number of nodes in the DAG
569 * 2. create the nodes
570 * 3. initialize the nodes
571 * 4. connect the nodes
575 * Step 1. compute number of nodes in the graph
578 /* number of nodes: a read and write for each data unit a
579 * redundancy computation node for each parity node (nfaults *
580 * nparity) a read and write for each parity unit a block and
581 * commit node (2) a terminate node if atomic RMW an unlock
582 * node for each data unit, redundancy unit */
583 totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
584 + (nfaults * 2 * numParityNodes) + 3;
586 * Step 2. create the nodes
589 blockNode = rf_AllocDAGNode();
590 blockNode->list_next = dag_h->nodes;
591 dag_h->nodes = blockNode;
593 commitNode = rf_AllocDAGNode();
594 commitNode->list_next = dag_h->nodes;
595 dag_h->nodes = commitNode;
597 for (i = 0; i < numDataNodes; i++) {
598 tmpNode = rf_AllocDAGNode();
599 tmpNode->list_next = dag_h->nodes;
600 dag_h->nodes = tmpNode;
602 readDataNodes = dag_h->nodes;
604 for (i = 0; i < numParityNodes; i++) {
605 tmpNode = rf_AllocDAGNode();
606 tmpNode->list_next = dag_h->nodes;
607 dag_h->nodes = tmpNode;
609 readParityNodes = dag_h->nodes;
611 for (i = 0; i < numDataNodes; i++) {
612 tmpNode = rf_AllocDAGNode();
613 tmpNode->list_next = dag_h->nodes;
614 dag_h->nodes = tmpNode;
616 writeDataNodes = dag_h->nodes;
618 for (i = 0; i < numParityNodes; i++) {
619 tmpNode = rf_AllocDAGNode();
620 tmpNode->list_next = dag_h->nodes;
621 dag_h->nodes = tmpNode;
623 writeParityNodes = dag_h->nodes;
625 for (i = 0; i < numParityNodes; i++) {
626 tmpNode = rf_AllocDAGNode();
627 tmpNode->list_next = dag_h->nodes;
628 dag_h->nodes = tmpNode;
630 xorNodes = dag_h->nodes;
632 termNode = rf_AllocDAGNode();
633 termNode->list_next = dag_h->nodes;
634 dag_h->nodes = termNode;
636 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
637 if (nfaults == 2) {
638 for (i = 0; i < numParityNodes; i++) {
639 tmpNode = rf_AllocDAGNode();
640 tmpNode->list_next = dag_h->nodes;
641 dag_h->nodes = tmpNode;
643 readQNodes = dag_h->nodes;
645 for (i = 0; i < numParityNodes; i++) {
646 tmpNode = rf_AllocDAGNode();
647 tmpNode->list_next = dag_h->nodes;
648 dag_h->nodes = tmpNode;
650 writeQNodes = dag_h->nodes;
652 for (i = 0; i < numParityNodes; i++) {
653 tmpNode = rf_AllocDAGNode();
654 tmpNode->list_next = dag_h->nodes;
655 dag_h->nodes = tmpNode;
657 qNodes = dag_h->nodes;
658 } else {
659 #endif
660 readQNodes = writeQNodes = qNodes = NULL;
661 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
663 #endif
666 * Step 3. initialize the nodes
668 /* initialize block node (Nil) */
669 nNodes = numDataNodes + (nfaults * numParityNodes);
670 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
671 rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
672 dag_h, "Nil", allocList);
674 /* initialize commit node (Cmt) */
675 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
676 rf_NullNodeUndoFunc, NULL, nNodes,
677 (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
679 /* initialize terminate node (Trm) */
680 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
681 rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
682 dag_h, "Trm", allocList);
684 /* initialize nodes which read old data (Rod) */
685 tmpreadDataNode = readDataNodes;
686 for (i = 0; i < numDataNodes; i++) {
687 rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
688 rf_DiskReadFunc, rf_DiskReadUndoFunc,
689 rf_GenericWakeupFunc, (nfaults * numParityNodes),
690 1, 4, 0, dag_h, "Rod", allocList);
691 RF_ASSERT(pda != NULL);
692 /* physical disk addr desc */
693 tmpreadDataNode->params[0].p = pda;
694 /* buffer to hold old data */
695 tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
696 tmpreadDataNode->params[2].v = parityStripeID;
697 tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
698 which_ru);
699 pda = pda->next;
700 for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
701 tmpreadDataNode->propList[j] = NULL;
703 tmpreadDataNode = tmpreadDataNode->list_next;
706 /* initialize nodes which read old parity (Rop) */
707 pda = asmap->parityInfo;
708 i = 0;
709 tmpreadParityNode = readParityNodes;
710 for (i = 0; i < numParityNodes; i++) {
711 RF_ASSERT(pda != NULL);
712 rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
713 rf_DiskReadFunc, rf_DiskReadUndoFunc,
714 rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
715 dag_h, "Rop", allocList);
716 tmpreadParityNode->params[0].p = pda;
717 /* buffer to hold old parity */
718 tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
719 tmpreadParityNode->params[2].v = parityStripeID;
720 tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
721 which_ru);
722 pda = pda->next;
723 for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
724 tmpreadParityNode->propList[0] = NULL;
726 tmpreadParityNode = tmpreadParityNode->list_next;
729 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
730 /* initialize nodes which read old Q (Roq) */
731 if (nfaults == 2) {
732 pda = asmap->qInfo;
733 tmpreadQNode = readQNodes;
734 for (i = 0; i < numParityNodes; i++) {
735 RF_ASSERT(pda != NULL);
736 rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
737 rf_DiskReadFunc, rf_DiskReadUndoFunc,
738 rf_GenericWakeupFunc, numParityNodes,
739 1, 4, 0, dag_h, "Roq", allocList);
740 tmpreadQNode->params[0].p = pda;
741 /* buffer to hold old Q */
742 tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
743 pda->numSector << raidPtr->logBytesPerSector);
744 tmpreadQNode->params[2].v = parityStripeID;
745 tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
746 which_ru);
747 pda = pda->next;
748 for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
749 tmpreadQNode->propList[0] = NULL;
751 tmpreadQNode = tmpreadQNode->list_next;
754 #endif
755 /* initialize nodes which write new data (Wnd) */
756 pda = asmap->physInfo;
757 tmpwriteDataNode = writeDataNodes;
758 for (i = 0; i < numDataNodes; i++) {
759 RF_ASSERT(pda != NULL);
760 rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
761 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
762 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
763 "Wnd", allocList);
764 /* physical disk addr desc */
765 tmpwriteDataNode->params[0].p = pda;
766 /* buffer holding new data to be written */
767 tmpwriteDataNode->params[1].p = pda->bufPtr;
768 tmpwriteDataNode->params[2].v = parityStripeID;
769 tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
770 which_ru);
771 pda = pda->next;
772 tmpwriteDataNode = tmpwriteDataNode->list_next;
776 * Initialize nodes which compute new parity and Q.
779 * We use the simple XOR func in the double-XOR case, and when
780 * we're accessing only a portion of one stripe unit. The
781 * distinction between the two is that the regular XOR func
782 * assumes that the targbuf is a full SU in size, and examines
783 * the pda associated with the buffer to decide where within
784 * the buffer to XOR the data, whereas the simple XOR func
785 * just XORs the data into the start of the buffer. */
786 if ((numParityNodes == 2) || ((numDataNodes == 1)
787 && (asmap->totalSectorsAccessed <
788 raidPtr->Layout.sectorsPerStripeUnit))) {
789 func = pfuncs->simple;
790 undoFunc = rf_NullNodeUndoFunc;
791 name = pfuncs->SimpleName;
792 if (qfuncs) {
793 qfunc = qfuncs->simple;
794 qname = qfuncs->SimpleName;
795 } else {
796 qfunc = NULL;
797 qname = NULL;
799 } else {
800 func = pfuncs->regular;
801 undoFunc = rf_NullNodeUndoFunc;
802 name = pfuncs->RegularName;
803 if (qfuncs) {
804 qfunc = qfuncs->regular;
805 qname = qfuncs->RegularName;
806 } else {
807 qfunc = NULL;
808 qname = NULL;
812 * Initialize the xor nodes: params are {pda,buf}
813 * from {Rod,Wnd,Rop} nodes, and raidPtr
815 if (numParityNodes == 2) {
816 /* double-xor case */
817 tmpxorNode = xorNodes;
818 tmpreadDataNode = readDataNodes;
819 tmpreadParityNode = readParityNodes;
820 tmpwriteDataNode = writeDataNodes;
821 tmpqNode = qNodes;
822 tmpreadQNode = readQNodes;
823 for (i = 0; i < numParityNodes; i++) {
824 /* note: no wakeup func for xor */
825 rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
826 undoFunc, NULL, 1,
827 (numDataNodes + numParityNodes),
828 7, 1, dag_h, name, allocList);
829 tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
830 tmpxorNode->params[0] = tmpreadDataNode->params[0];
831 tmpxorNode->params[1] = tmpreadDataNode->params[1];
832 tmpxorNode->params[2] = tmpreadParityNode->params[0];
833 tmpxorNode->params[3] = tmpreadParityNode->params[1];
834 tmpxorNode->params[4] = tmpwriteDataNode->params[0];
835 tmpxorNode->params[5] = tmpwriteDataNode->params[1];
836 tmpxorNode->params[6].p = raidPtr;
837 /* use old parity buf as target buf */
838 tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
839 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
840 if (nfaults == 2) {
841 /* note: no wakeup func for qor */
842 rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
843 qfunc, undoFunc, NULL, 1,
844 (numDataNodes + numParityNodes),
845 7, 1, dag_h, qname, allocList);
846 tmpqNode->params[0] = tmpreadDataNode->params[0];
847 tmpqNode->params[1] = tmpreadDataNode->params[1];
848 tmpqNode->params[2] = tmpreadQNode->.params[0];
849 tmpqNode->params[3] = tmpreadQNode->params[1];
850 tmpqNode->params[4] = tmpwriteDataNode->params[0];
851 tmpqNode->params[5] = tmpwriteDataNode->params[1];
852 tmpqNode->params[6].p = raidPtr;
853 /* use old Q buf as target buf */
854 tmpqNode->results[0] = tmpreadQNode->params[1].p;
855 tmpqNode = tmpqNode->list_next;
856 tmpreadQNodes = tmpreadQNodes->list_next;
858 #endif
859 tmpxorNode = tmpxorNode->list_next;
860 tmpreadDataNode = tmpreadDataNode->list_next;
861 tmpreadParityNode = tmpreadParityNode->list_next;
862 tmpwriteDataNode = tmpwriteDataNode->list_next;
864 } else {
865 /* there is only one xor node in this case */
866 rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
867 undoFunc, NULL, 1, (numDataNodes + numParityNodes),
868 (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
869 dag_h, name, allocList);
870 xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
871 tmpreadDataNode = readDataNodes;
872 for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
873 out the "+1" into the "deal with Rop separately below */
874 /* set up params related to Rod nodes */
875 xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
876 xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
877 tmpreadDataNode = tmpreadDataNode->list_next;
879 /* deal with Rop separately */
880 xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
881 xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
883 tmpwriteDataNode = writeDataNodes;
884 for (i = 0; i < numDataNodes; i++) {
885 /* set up params related to Wnd and Wnp nodes */
886 xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
887 tmpwriteDataNode->params[0];
888 xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
889 tmpwriteDataNode->params[1];
890 tmpwriteDataNode = tmpwriteDataNode->list_next;
892 /* xor node needs to get at RAID information */
893 xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
894 xorNodes->results[0] = readParityNodes->params[1].p;
895 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
896 if (nfaults == 2) {
897 rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
898 undoFunc, NULL, 1,
899 (numDataNodes + numParityNodes),
900 (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
901 dag_h, qname, allocList);
902 tmpreadDataNode = readDataNodes;
903 for (i = 0; i < numDataNodes; i++) {
904 /* set up params related to Rod */
905 qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
906 qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
907 tmpreadDataNode = tmpreadDataNode->list_next;
909 /* and read old q */
910 qNodes->params[2 * numDataNodes + 0] = /* pda */
911 readQNodes->params[0];
912 qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
913 readQNodes->params[1];
914 tmpwriteDataNode = writeDataNodes;
915 for (i = 0; i < numDataNodes; i++) {
916 /* set up params related to Wnd nodes */
917 qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
918 tmpwriteDataNode->params[0];
919 qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
920 tmpwriteDataNode->params[1];
921 tmpwriteDataNode = tmpwriteDataNode->list_next;
923 /* xor node needs to get at RAID information */
924 qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
925 qNodes->results[0] = readQNodes->params[1].p;
927 #endif
930 /* initialize nodes which write new parity (Wnp) */
931 pda = asmap->parityInfo;
932 tmpwriteParityNode = writeParityNodes;
933 tmpxorNode = xorNodes;
934 for (i = 0; i < numParityNodes; i++) {
935 rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
936 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
937 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
938 "Wnp", allocList);
939 RF_ASSERT(pda != NULL);
940 tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
941 * filled in by xor node */
942 tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
943 * parity write
944 * operation */
945 tmpwriteParityNode->params[2].v = parityStripeID;
946 tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
947 which_ru);
948 pda = pda->next;
949 tmpwriteParityNode = tmpwriteParityNode->list_next;
950 tmpxorNode = tmpxorNode->list_next;
953 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
954 /* initialize nodes which write new Q (Wnq) */
955 if (nfaults == 2) {
956 pda = asmap->qInfo;
957 tmpwriteQNode = writeQNodes;
958 tmpqNode = qNodes;
959 for (i = 0; i < numParityNodes; i++) {
960 rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
961 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
962 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
963 "Wnq", allocList);
964 RF_ASSERT(pda != NULL);
965 tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
966 * filled in by xor node */
967 tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
968 * parity write
969 * operation */
970 tmpwriteQNode->params[2].v = parityStripeID;
971 tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
972 which_ru);
973 pda = pda->next;
974 tmpwriteQNode = tmpwriteQNode->list_next;
975 tmpqNode = tmpqNode->list_next;
978 #endif
980 * Step 4. connect the nodes.
983 /* connect header to block node */
984 dag_h->succedents[0] = blockNode;
986 /* connect block node to read old data nodes */
987 RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
988 tmpreadDataNode = readDataNodes;
989 for (i = 0; i < numDataNodes; i++) {
990 blockNode->succedents[i] = tmpreadDataNode;
991 RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
992 tmpreadDataNode->antecedents[0] = blockNode;
993 tmpreadDataNode->antType[0] = rf_control;
994 tmpreadDataNode = tmpreadDataNode->list_next;
997 /* connect block node to read old parity nodes */
998 tmpreadParityNode = readParityNodes;
999 for (i = 0; i < numParityNodes; i++) {
1000 blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1001 RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1002 tmpreadParityNode->antecedents[0] = blockNode;
1003 tmpreadParityNode->antType[0] = rf_control;
1004 tmpreadParityNode = tmpreadParityNode->list_next;
1007 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1008 /* connect block node to read old Q nodes */
1009 if (nfaults == 2) {
1010 tmpreadQNode = readQNodes;
1011 for (i = 0; i < numParityNodes; i++) {
1012 blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1013 RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1014 tmpreadQNode->antecedents[0] = blockNode;
1015 tmpreadQNode->antType[0] = rf_control;
1016 tmpreadQNode = tmpreadQNode->list_next;
1019 #endif
1020 /* connect read old data nodes to xor nodes */
1021 tmpreadDataNode = readDataNodes;
1022 for (i = 0; i < numDataNodes; i++) {
1023 RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1024 tmpxorNode = xorNodes;
1025 for (j = 0; j < numParityNodes; j++) {
1026 RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1027 tmpreadDataNode->succedents[j] = tmpxorNode;
1028 tmpxorNode->antecedents[i] = tmpreadDataNode;
1029 tmpxorNode->antType[i] = rf_trueData;
1030 tmpxorNode = tmpxorNode->list_next;
1032 tmpreadDataNode = tmpreadDataNode->list_next;
1035 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1036 /* connect read old data nodes to q nodes */
1037 if (nfaults == 2) {
1038 tmpreadDataNode = readDataNodes;
1039 for (i = 0; i < numDataNodes; i++) {
1040 tmpqNode = qNodes;
1041 for (j = 0; j < numParityNodes; j++) {
1042 RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1043 tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1044 tmpqNode->antecedents[i] = tmpreadDataNode;
1045 tmpqNode->antType[i] = rf_trueData;
1046 tmpqNode = tmpqNode->list_next;
1048 tmpreadDataNode = tmpreadDataNode->list_next;
1051 #endif
1052 /* connect read old parity nodes to xor nodes */
1053 tmpreadParityNode = readParityNodes;
1054 for (i = 0; i < numParityNodes; i++) {
1055 RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1056 tmpxorNode = xorNodes;
1057 for (j = 0; j < numParityNodes; j++) {
1058 tmpreadParityNode->succedents[j] = tmpxorNode;
1059 tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1060 tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1061 tmpxorNode = tmpxorNode->list_next;
1063 tmpreadParityNode = tmpreadParityNode->list_next;
1066 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1067 /* connect read old q nodes to q nodes */
1068 if (nfaults == 2) {
1069 tmpreadParityNode = readParityNodes;
1070 tmpreadQNode = readQNodes;
1071 for (i = 0; i < numParityNodes; i++) {
1072 RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1073 tmpqNode = qNodes;
1074 for (j = 0; j < numParityNodes; j++) {
1075 tmpreadQNode->succedents[j] = tmpqNode;
1076 tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1077 tmpqNode->antType[numDataNodes + i] = rf_trueData;
1078 tmpqNode = tmpqNode->list_next;
1080 tmpreadParityNode = tmpreadParityNode->list_next;
1081 tmpreadQNode = tmpreadQNode->list_next;
1084 #endif
1085 /* connect xor nodes to commit node */
1086 RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1087 tmpxorNode = xorNodes;
1088 for (i = 0; i < numParityNodes; i++) {
1089 RF_ASSERT(tmpxorNode->numSuccedents == 1);
1090 tmpxorNode->succedents[0] = commitNode;
1091 commitNode->antecedents[i] = tmpxorNode;
1092 commitNode->antType[i] = rf_control;
1093 tmpxorNode = tmpxorNode->list_next;
1096 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1097 /* connect q nodes to commit node */
1098 if (nfaults == 2) {
1099 tmpqNode = qNodes;
1100 for (i = 0; i < numParityNodes; i++) {
1101 RF_ASSERT(tmpqNode->numSuccedents == 1);
1102 tmpqNode->succedents[0] = commitNode;
1103 commitNode->antecedents[i + numParityNodes] = tmpqNode;
1104 commitNode->antType[i + numParityNodes] = rf_control;
1105 tmpqNode = tmpqNode->list_next;
1108 #endif
1109 /* connect commit node to write nodes */
1110 RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1111 tmpwriteDataNode = writeDataNodes;
1112 for (i = 0; i < numDataNodes; i++) {
1113 RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1114 commitNode->succedents[i] = tmpwriteDataNode;
1115 tmpwriteDataNode->antecedents[0] = commitNode;
1116 tmpwriteDataNode->antType[0] = rf_trueData;
1117 tmpwriteDataNode = tmpwriteDataNode->list_next;
1119 tmpwriteParityNode = writeParityNodes;
1120 for (i = 0; i < numParityNodes; i++) {
1121 RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1122 commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1123 tmpwriteParityNode->antecedents[0] = commitNode;
1124 tmpwriteParityNode->antType[0] = rf_trueData;
1125 tmpwriteParityNode = tmpwriteParityNode->list_next;
1127 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1128 if (nfaults == 2) {
1129 tmpwriteQNode = writeQNodes;
1130 for (i = 0; i < numParityNodes; i++) {
1131 RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1132 commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1133 tmpwriteQNode->antecedents[0] = commitNode;
1134 tmpwriteQNode->antType[0] = rf_trueData;
1135 tmpwriteQNode = tmpwriteQNode->list_next;
1138 #endif
1139 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1140 RF_ASSERT(termNode->numSuccedents == 0);
1141 tmpwriteDataNode = writeDataNodes;
1142 for (i = 0; i < numDataNodes; i++) {
1143 /* connect write new data nodes to term node */
1144 RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1145 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1146 tmpwriteDataNode->succedents[0] = termNode;
1147 termNode->antecedents[i] = tmpwriteDataNode;
1148 termNode->antType[i] = rf_control;
1149 tmpwriteDataNode = tmpwriteDataNode->list_next;
1152 tmpwriteParityNode = writeParityNodes;
1153 for (i = 0; i < numParityNodes; i++) {
1154 RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1155 tmpwriteParityNode->succedents[0] = termNode;
1156 termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1157 termNode->antType[numDataNodes + i] = rf_control;
1158 tmpwriteParityNode = tmpwriteParityNode->list_next;
1161 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1162 if (nfaults == 2) {
1163 tmpwriteQNode = writeQNodes;
1164 for (i = 0; i < numParityNodes; i++) {
1165 RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1166 tmpwriteQNode->succedents[0] = termNode;
1167 termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1168 termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1169 tmpwriteQNode = tmpwriteQNode->list_next;
1172 #endif
1176 /******************************************************************************
1177 * create a write graph (fault-free or degraded) for RAID level 1
1179 * Hdr -> Commit -> Wpd -> Nil -> Trm
1180 * -> Wsd ->
1182 * The "Wpd" node writes data to the primary copy in the mirror pair
1183 * The "Wsd" node writes data to the secondary copy in the mirror pair
1185 * Parameters: raidPtr - description of the physical array
1186 * asmap - logical & physical addresses for this access
1187 * bp - buffer ptr (holds write data)
1188 * flags - general flags (e.g. disk locking)
1189 * allocList - list of memory allocated in DAG creation
1190 *****************************************************************************/
1192 void
1193 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1194 RF_DagHeader_t *dag_h, void *bp,
1195 RF_RaidAccessFlags_t flags,
1196 RF_AllocListElem_t *allocList)
1198 RF_DagNode_t *unblockNode, *termNode, *commitNode;
1199 RF_DagNode_t *wndNode, *wmirNode;
1200 RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1201 int nWndNodes, nWmirNodes, i;
1202 RF_ReconUnitNum_t which_ru;
1203 RF_PhysDiskAddr_t *pda, *pdaP;
1204 RF_StripeNum_t parityStripeID;
1206 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1207 asmap->raidAddress, &which_ru);
1208 #if RF_DEBUG_DAG
1209 if (rf_dagDebug) {
1210 printf("[Creating RAID level 1 write DAG]\n");
1212 #endif
1213 dag_h->creator = "RaidOneWriteDAG";
1215 /* 2 implies access not SU aligned */
1216 nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1217 nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1219 /* alloc the Wnd nodes and the Wmir node */
1220 if (asmap->numDataFailed == 1)
1221 nWndNodes--;
1222 if (asmap->numParityFailed == 1)
1223 nWmirNodes--;
1225 /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1226 * + terminator) */
1227 for (i = 0; i < nWndNodes; i++) {
1228 tmpNode = rf_AllocDAGNode();
1229 tmpNode->list_next = dag_h->nodes;
1230 dag_h->nodes = tmpNode;
1232 wndNode = dag_h->nodes;
1234 for (i = 0; i < nWmirNodes; i++) {
1235 tmpNode = rf_AllocDAGNode();
1236 tmpNode->list_next = dag_h->nodes;
1237 dag_h->nodes = tmpNode;
1239 wmirNode = dag_h->nodes;
1241 commitNode = rf_AllocDAGNode();
1242 commitNode->list_next = dag_h->nodes;
1243 dag_h->nodes = commitNode;
1245 unblockNode = rf_AllocDAGNode();
1246 unblockNode->list_next = dag_h->nodes;
1247 dag_h->nodes = unblockNode;
1249 termNode = rf_AllocDAGNode();
1250 termNode->list_next = dag_h->nodes;
1251 dag_h->nodes = termNode;
1253 /* this dag can commit immediately */
1254 dag_h->numCommitNodes = 1;
1255 dag_h->numCommits = 0;
1256 dag_h->numSuccedents = 1;
1258 /* initialize the commit, unblock, and term nodes */
1259 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1260 rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1261 0, 0, 0, dag_h, "Cmt", allocList);
1262 rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1263 rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1264 0, 0, dag_h, "Nil", allocList);
1265 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1266 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1267 dag_h, "Trm", allocList);
1269 /* initialize the wnd nodes */
1270 if (nWndNodes > 0) {
1271 pda = asmap->physInfo;
1272 tmpwndNode = wndNode;
1273 for (i = 0; i < nWndNodes; i++) {
1274 rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1275 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1276 rf_GenericWakeupFunc, 1, 1, 4, 0,
1277 dag_h, "Wpd", allocList);
1278 RF_ASSERT(pda != NULL);
1279 tmpwndNode->params[0].p = pda;
1280 tmpwndNode->params[1].p = pda->bufPtr;
1281 tmpwndNode->params[2].v = parityStripeID;
1282 tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1283 pda = pda->next;
1284 tmpwndNode = tmpwndNode->list_next;
1286 RF_ASSERT(pda == NULL);
1288 /* initialize the mirror nodes */
1289 if (nWmirNodes > 0) {
1290 pda = asmap->physInfo;
1291 pdaP = asmap->parityInfo;
1292 tmpwmirNode = wmirNode;
1293 for (i = 0; i < nWmirNodes; i++) {
1294 rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1295 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1296 rf_GenericWakeupFunc, 1, 1, 4, 0,
1297 dag_h, "Wsd", allocList);
1298 RF_ASSERT(pda != NULL);
1299 tmpwmirNode->params[0].p = pdaP;
1300 tmpwmirNode->params[1].p = pda->bufPtr;
1301 tmpwmirNode->params[2].v = parityStripeID;
1302 tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1303 pda = pda->next;
1304 pdaP = pdaP->next;
1305 tmpwmirNode = tmpwmirNode->list_next;
1307 RF_ASSERT(pda == NULL);
1308 RF_ASSERT(pdaP == NULL);
1310 /* link the header node to the commit node */
1311 RF_ASSERT(dag_h->numSuccedents == 1);
1312 RF_ASSERT(commitNode->numAntecedents == 0);
1313 dag_h->succedents[0] = commitNode;
1315 /* link the commit node to the write nodes */
1316 RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1317 tmpwndNode = wndNode;
1318 for (i = 0; i < nWndNodes; i++) {
1319 RF_ASSERT(tmpwndNode->numAntecedents == 1);
1320 commitNode->succedents[i] = tmpwndNode;
1321 tmpwndNode->antecedents[0] = commitNode;
1322 tmpwndNode->antType[0] = rf_control;
1323 tmpwndNode = tmpwndNode->list_next;
1325 tmpwmirNode = wmirNode;
1326 for (i = 0; i < nWmirNodes; i++) {
1327 RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1328 commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1329 tmpwmirNode->antecedents[0] = commitNode;
1330 tmpwmirNode->antType[0] = rf_control;
1331 tmpwmirNode = tmpwmirNode->list_next;
1334 /* link the write nodes to the unblock node */
1335 RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1336 tmpwndNode = wndNode;
1337 for (i = 0; i < nWndNodes; i++) {
1338 RF_ASSERT(tmpwndNode->numSuccedents == 1);
1339 tmpwndNode->succedents[0] = unblockNode;
1340 unblockNode->antecedents[i] = tmpwndNode;
1341 unblockNode->antType[i] = rf_control;
1342 tmpwndNode = tmpwndNode->list_next;
1344 tmpwmirNode = wmirNode;
1345 for (i = 0; i < nWmirNodes; i++) {
1346 RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1347 tmpwmirNode->succedents[0] = unblockNode;
1348 unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1349 unblockNode->antType[i + nWndNodes] = rf_control;
1350 tmpwmirNode = tmpwmirNode->list_next;
1353 /* link the unblock node to the term node */
1354 RF_ASSERT(unblockNode->numSuccedents == 1);
1355 RF_ASSERT(termNode->numAntecedents == 1);
1356 RF_ASSERT(termNode->numSuccedents == 0);
1357 unblockNode->succedents[0] = termNode;
1358 termNode->antecedents[0] = unblockNode;
1359 termNode->antType[0] = rf_control;