1 /* $NetBSD: rf_parityloggingdags.c,v 1.18 2006/11/16 01:33:23 christos Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
6 * Author: William V. Courtright II
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
30 DAGs specific to parity logging are created here
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_parityloggingdags.c,v 1.18 2006/11/16 01:33:23 christos Exp $");
37 #include "opt_raid_diagnostic.h"
42 #if RF_INCLUDE_PARITYLOGGING > 0
44 #include <dev/raidframe/raidframevar.h>
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_debugMem.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
54 #include "rf_parityloggingdags.h"
56 /******************************************************************************
58 * creates a DAG to perform a large-write operation:
61 * H -- NIL- Rod - NIL - Wnd ------ NIL - T
62 * \ Rod / \ Xor - Lpo /
64 * The writes are not done until the reads complete because if they were done in
65 * parallel, a failure on one of the reads could leave the parity in an inconsistent
66 * state, so that the retry with a new DAG would produce erroneous parity.
68 * Note: this DAG has the nasty property that none of the buffers allocated for reading
69 * old data can be freed until the XOR node fires. Need to fix this.
71 * The last two arguments are the number of faults tolerated, and function for the
72 * redundancy calculation. The undo for the redundancy calc is assumed to be null
74 *****************************************************************************/
77 rf_CommonCreateParityLoggingLargeWriteDAG(
79 RF_AccessStripeMap_t
* asmap
,
80 RF_DagHeader_t
* dag_h
,
82 RF_RaidAccessFlags_t flags
,
83 RF_AllocListElem_t
* allocList
,
85 int (*redFunc
) (RF_DagNode_t
*))
87 RF_DagNode_t
*nodes
, *wndNodes
, *rodNodes
= NULL
, *syncNode
, *xorNode
,
88 *lpoNode
, *blockNode
, *unblockNode
, *termNode
;
89 int nWndNodes
, nRodNodes
, i
;
90 RF_RaidLayout_t
*layoutPtr
= &(raidPtr
->Layout
);
91 RF_AccessStripeMapHeader_t
*new_asm_h
[2];
93 RF_ReconUnitNum_t which_ru
;
94 char *sosBuffer
, *eosBuffer
;
95 RF_PhysDiskAddr_t
*pda
;
96 RF_StripeNum_t parityStripeID
= rf_RaidAddressToParityStripeID(&(raidPtr
->Layout
), asmap
->raidAddress
, &which_ru
);
99 printf("[Creating parity-logging large-write DAG]\n");
100 RF_ASSERT(nfaults
== 1);/* this arch only single fault tolerant */
101 dag_h
->creator
= "ParityLoggingLargeWriteDAG";
103 /* alloc the Wnd nodes, the xor node, and the Lpo node */
104 nWndNodes
= asmap
->numStripeUnitsAccessed
;
105 RF_MallocAndAdd(nodes
, (nWndNodes
+ 6) * sizeof(RF_DagNode_t
),
106 (RF_DagNode_t
*), allocList
);
108 wndNodes
= &nodes
[i
];
114 blockNode
= &nodes
[i
];
116 syncNode
= &nodes
[i
];
118 unblockNode
= &nodes
[i
];
120 termNode
= &nodes
[i
];
123 dag_h
->numCommitNodes
= nWndNodes
+ 1;
124 dag_h
->numCommits
= 0;
125 dag_h
->numSuccedents
= 1;
127 rf_MapUnaccessedPortionOfStripe(raidPtr
, layoutPtr
, asmap
, dag_h
, new_asm_h
, &nRodNodes
, &sosBuffer
, &eosBuffer
, allocList
);
129 RF_MallocAndAdd(rodNodes
, nRodNodes
* sizeof(RF_DagNode_t
),
130 (RF_DagNode_t
*), allocList
);
132 /* begin node initialization */
133 rf_InitNode(blockNode
, rf_wait
, RF_FALSE
, rf_NullNodeFunc
, rf_NullNodeUndoFunc
, NULL
, nRodNodes
+ 1, 0, 0, 0, dag_h
, "Nil", allocList
);
134 rf_InitNode(unblockNode
, rf_wait
, RF_FALSE
, rf_NullNodeFunc
, rf_NullNodeUndoFunc
, NULL
, 1, nWndNodes
+ 1, 0, 0, dag_h
, "Nil", allocList
);
135 rf_InitNode(syncNode
, rf_wait
, RF_FALSE
, rf_NullNodeFunc
, rf_NullNodeUndoFunc
, NULL
, nWndNodes
+ 1, nRodNodes
+ 1, 0, 0, dag_h
, "Nil", allocList
);
136 rf_InitNode(termNode
, rf_wait
, RF_FALSE
, rf_TerminateFunc
, rf_TerminateUndoFunc
, NULL
, 0, 1, 0, 0, dag_h
, "Trm", allocList
);
138 /* initialize the Rod nodes */
139 for (nodeNum
= asmNum
= 0; asmNum
< 2; asmNum
++) {
140 if (new_asm_h
[asmNum
]) {
141 pda
= new_asm_h
[asmNum
]->stripeMap
->physInfo
;
143 rf_InitNode(&rodNodes
[nodeNum
], rf_wait
, RF_FALSE
, rf_DiskReadFunc
, rf_DiskReadUndoFunc
, rf_GenericWakeupFunc
, 1, 1, 4, 0, dag_h
, "Rod", allocList
);
144 rodNodes
[nodeNum
].params
[0].p
= pda
;
145 rodNodes
[nodeNum
].params
[1].p
= pda
->bufPtr
;
146 rodNodes
[nodeNum
].params
[2].v
= parityStripeID
;
147 rodNodes
[nodeNum
].params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, which_ru
);
153 RF_ASSERT(nodeNum
== nRodNodes
);
155 /* initialize the wnd nodes */
156 pda
= asmap
->physInfo
;
157 for (i
= 0; i
< nWndNodes
; i
++) {
158 rf_InitNode(&wndNodes
[i
], rf_wait
, RF_TRUE
, rf_DiskWriteFunc
, rf_DiskWriteUndoFunc
, rf_GenericWakeupFunc
, 1, 1, 4, 0, dag_h
, "Wnd", allocList
);
159 RF_ASSERT(pda
!= NULL
);
160 wndNodes
[i
].params
[0].p
= pda
;
161 wndNodes
[i
].params
[1].p
= pda
->bufPtr
;
162 wndNodes
[i
].params
[2].v
= parityStripeID
;
163 wndNodes
[i
].params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, which_ru
);
167 /* initialize the redundancy node */
168 rf_InitNode(xorNode
, rf_wait
, RF_TRUE
, redFunc
, rf_NullNodeUndoFunc
, NULL
, 1, 1, 2 * (nWndNodes
+ nRodNodes
) + 1, 1, dag_h
, "Xr ", allocList
);
169 xorNode
->flags
|= RF_DAGNODE_FLAG_YIELD
;
170 for (i
= 0; i
< nWndNodes
; i
++) {
171 xorNode
->params
[2 * i
+ 0] = wndNodes
[i
].params
[0]; /* pda */
172 xorNode
->params
[2 * i
+ 1] = wndNodes
[i
].params
[1]; /* buf ptr */
174 for (i
= 0; i
< nRodNodes
; i
++) {
175 xorNode
->params
[2 * (nWndNodes
+ i
) + 0] = rodNodes
[i
].params
[0]; /* pda */
176 xorNode
->params
[2 * (nWndNodes
+ i
) + 1] = rodNodes
[i
].params
[1]; /* buf ptr */
178 xorNode
->params
[2 * (nWndNodes
+ nRodNodes
)].p
= raidPtr
; /* xor node needs to get
179 * at RAID information */
181 /* look for an Rod node that reads a complete SU. If none, alloc a
182 * buffer to receive the parity info. Note that we can't use a new
183 * data buffer because it will not have gotten written when the xor
185 for (i
= 0; i
< nRodNodes
; i
++)
186 if (((RF_PhysDiskAddr_t
*) rodNodes
[i
].params
[0].p
)->numSector
== raidPtr
->Layout
.sectorsPerStripeUnit
)
188 if (i
== nRodNodes
) {
189 RF_MallocAndAdd(xorNode
->results
[0],
190 rf_RaidAddressToByte(raidPtr
, raidPtr
->Layout
.sectorsPerStripeUnit
), (void *), allocList
);
192 xorNode
->results
[0] = rodNodes
[i
].params
[1].p
;
195 /* initialize the Lpo node */
196 rf_InitNode(lpoNode
, rf_wait
, RF_FALSE
, rf_ParityLogOverwriteFunc
, rf_ParityLogOverwriteUndoFunc
, rf_GenericWakeupFunc
, 1, 1, 2, 0, dag_h
, "Lpo", allocList
);
198 lpoNode
->params
[0].p
= asmap
->parityInfo
;
199 lpoNode
->params
[1].p
= xorNode
->results
[0];
200 RF_ASSERT(asmap
->parityInfo
->next
== NULL
); /* parityInfo must
204 /* connect nodes to form graph */
206 /* connect dag header to block node */
207 RF_ASSERT(dag_h
->numSuccedents
== 1);
208 RF_ASSERT(blockNode
->numAntecedents
== 0);
209 dag_h
->succedents
[0] = blockNode
;
211 /* connect the block node to the Rod nodes */
212 RF_ASSERT(blockNode
->numSuccedents
== nRodNodes
+ 1);
213 for (i
= 0; i
< nRodNodes
; i
++) {
214 RF_ASSERT(rodNodes
[i
].numAntecedents
== 1);
215 blockNode
->succedents
[i
] = &rodNodes
[i
];
216 rodNodes
[i
].antecedents
[0] = blockNode
;
217 rodNodes
[i
].antType
[0] = rf_control
;
220 /* connect the block node to the sync node */
221 /* necessary if nRodNodes == 0 */
222 RF_ASSERT(syncNode
->numAntecedents
== nRodNodes
+ 1);
223 blockNode
->succedents
[nRodNodes
] = syncNode
;
224 syncNode
->antecedents
[0] = blockNode
;
225 syncNode
->antType
[0] = rf_control
;
227 /* connect the Rod nodes to the syncNode */
228 for (i
= 0; i
< nRodNodes
; i
++) {
229 rodNodes
[i
].succedents
[0] = syncNode
;
230 syncNode
->antecedents
[1 + i
] = &rodNodes
[i
];
231 syncNode
->antType
[1 + i
] = rf_control
;
234 /* connect the sync node to the xor node */
235 RF_ASSERT(syncNode
->numSuccedents
== nWndNodes
+ 1);
236 RF_ASSERT(xorNode
->numAntecedents
== 1);
237 syncNode
->succedents
[0] = xorNode
;
238 xorNode
->antecedents
[0] = syncNode
;
239 xorNode
->antType
[0] = rf_trueData
; /* carry forward from sync */
241 /* connect the sync node to the Wnd nodes */
242 for (i
= 0; i
< nWndNodes
; i
++) {
243 RF_ASSERT(wndNodes
->numAntecedents
== 1);
244 syncNode
->succedents
[1 + i
] = &wndNodes
[i
];
245 wndNodes
[i
].antecedents
[0] = syncNode
;
246 wndNodes
[i
].antType
[0] = rf_control
;
249 /* connect the xor node to the Lpo node */
250 RF_ASSERT(xorNode
->numSuccedents
== 1);
251 RF_ASSERT(lpoNode
->numAntecedents
== 1);
252 xorNode
->succedents
[0] = lpoNode
;
253 lpoNode
->antecedents
[0] = xorNode
;
254 lpoNode
->antType
[0] = rf_trueData
;
256 /* connect the Wnd nodes to the unblock node */
257 RF_ASSERT(unblockNode
->numAntecedents
== nWndNodes
+ 1);
258 for (i
= 0; i
< nWndNodes
; i
++) {
259 RF_ASSERT(wndNodes
->numSuccedents
== 1);
260 wndNodes
[i
].succedents
[0] = unblockNode
;
261 unblockNode
->antecedents
[i
] = &wndNodes
[i
];
262 unblockNode
->antType
[i
] = rf_control
;
265 /* connect the Lpo node to the unblock node */
266 RF_ASSERT(lpoNode
->numSuccedents
== 1);
267 lpoNode
->succedents
[0] = unblockNode
;
268 unblockNode
->antecedents
[nWndNodes
] = lpoNode
;
269 unblockNode
->antType
[nWndNodes
] = rf_control
;
271 /* connect unblock node to terminator */
272 RF_ASSERT(unblockNode
->numSuccedents
== 1);
273 RF_ASSERT(termNode
->numAntecedents
== 1);
274 RF_ASSERT(termNode
->numSuccedents
== 0);
275 unblockNode
->succedents
[0] = termNode
;
276 termNode
->antecedents
[0] = unblockNode
;
277 termNode
->antType
[0] = rf_control
;
283 /******************************************************************************
285 * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows:
305 * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
306 * When the access spans a stripe unit boundary and is less than one SU in size, there will
307 * be two Rop -- X -- Wnp branches. I call this the "double-XOR" case.
308 * The second output from each Rod node goes to the X node. In the double-XOR
309 * case, there are exactly 2 Rod nodes, and each sends one output to one X node.
310 * There is one Rod -- Wnd -- T branch for each stripe unit being updated.
312 * The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG.
314 * Note: this DAG ignores all the optimizations related to making the RMWs atomic.
315 * it also has the nasty property that none of the buffers allocated for reading
316 * old data & parity can be freed until the XOR node fires. Need to fix this.
318 * A null qfuncs indicates single fault tolerant
319 *****************************************************************************/
322 rf_CommonCreateParityLoggingSmallWriteDAG(
324 RF_AccessStripeMap_t
* asmap
,
325 RF_DagHeader_t
* dag_h
,
327 RF_RaidAccessFlags_t flags
,
328 RF_AllocListElem_t
* allocList
,
329 const RF_RedFuncs_t
* pfuncs
,
330 const RF_RedFuncs_t
* qfuncs
)
332 RF_DagNode_t
*xorNodes
, *blockNode
, *unblockNode
, *nodes
;
333 RF_DagNode_t
*readDataNodes
, *readParityNodes
;
334 RF_DagNode_t
*writeDataNodes
, *lpuNodes
;
335 RF_DagNode_t
*termNode
;
336 RF_PhysDiskAddr_t
*pda
= asmap
->physInfo
;
337 int numDataNodes
= asmap
->numStripeUnitsAccessed
;
338 int numParityNodes
= (asmap
->parityInfo
->next
) ? 2 : 1;
339 int i
, j
, nNodes
, totalNumNodes
;
340 RF_ReconUnitNum_t which_ru
;
341 int (*func
) (RF_DagNode_t
* node
), (*undoFunc
) (RF_DagNode_t
* node
);
342 int (*qfunc
) (RF_DagNode_t
* node
);
343 const char *name
, *qname
;
344 RF_StripeNum_t parityStripeID
= rf_RaidAddressToParityStripeID(&(raidPtr
->Layout
), asmap
->raidAddress
, &which_ru
);
345 #ifdef RAID_DIAGNOSTIC
346 long nfaults
= qfuncs
? 2 : 1;
347 #endif /* RAID_DIAGNOSTIC */
350 printf("[Creating parity-logging small-write DAG]\n");
351 RF_ASSERT(numDataNodes
> 0);
352 RF_ASSERT(nfaults
== 1);
353 dag_h
->creator
= "ParityLoggingSmallWriteDAG";
355 /* DAG creation occurs in three steps: 1. count the number of nodes in
356 * the DAG 2. create the nodes 3. initialize the nodes 4. connect the
359 /* Step 1. compute number of nodes in the graph */
361 /* number of nodes: a read and write for each data unit a redundancy
362 * computation node for each parity node a read and Lpu for each
363 * parity unit a block and unblock node (2) a terminator node if
364 * atomic RMW an unlock node for each data unit, redundancy unit */
365 totalNumNodes
= (2 * numDataNodes
) + numParityNodes
+ (2 * numParityNodes
) + 3;
367 nNodes
= numDataNodes
+ numParityNodes
;
369 dag_h
->numCommitNodes
= numDataNodes
+ numParityNodes
;
370 dag_h
->numCommits
= 0;
371 dag_h
->numSuccedents
= 1;
373 /* Step 2. create the nodes */
374 RF_MallocAndAdd(nodes
, totalNumNodes
* sizeof(RF_DagNode_t
),
375 (RF_DagNode_t
*), allocList
);
377 blockNode
= &nodes
[i
];
379 unblockNode
= &nodes
[i
];
381 readDataNodes
= &nodes
[i
];
383 readParityNodes
= &nodes
[i
];
385 writeDataNodes
= &nodes
[i
];
387 lpuNodes
= &nodes
[i
];
389 xorNodes
= &nodes
[i
];
391 termNode
= &nodes
[i
];
394 RF_ASSERT(i
== totalNumNodes
);
396 /* Step 3. initialize the nodes */
397 /* initialize block node (Nil) */
398 rf_InitNode(blockNode
, rf_wait
, RF_FALSE
, rf_NullNodeFunc
, rf_NullNodeUndoFunc
, NULL
, nNodes
, 0, 0, 0, dag_h
, "Nil", allocList
);
400 /* initialize unblock node (Nil) */
401 rf_InitNode(unblockNode
, rf_wait
, RF_FALSE
, rf_NullNodeFunc
, rf_NullNodeUndoFunc
, NULL
, 1, nNodes
, 0, 0, dag_h
, "Nil", allocList
);
403 /* initialize terminatory node (Trm) */
404 rf_InitNode(termNode
, rf_wait
, RF_FALSE
, rf_TerminateFunc
, rf_TerminateUndoFunc
, NULL
, 0, 1, 0, 0, dag_h
, "Trm", allocList
);
406 /* initialize nodes which read old data (Rod) */
407 for (i
= 0; i
< numDataNodes
; i
++) {
408 rf_InitNode(&readDataNodes
[i
], rf_wait
, RF_FALSE
, rf_DiskReadFunc
, rf_DiskReadUndoFunc
, rf_GenericWakeupFunc
, nNodes
, 1, 4, 0, dag_h
, "Rod", allocList
);
409 RF_ASSERT(pda
!= NULL
);
410 readDataNodes
[i
].params
[0].p
= pda
; /* physical disk addr
412 readDataNodes
[i
].params
[1].p
= rf_AllocBuffer(raidPtr
, dag_h
, pda
->numSector
<< raidPtr
->logBytesPerSector
); /* buffer to hold old data */
413 readDataNodes
[i
].params
[2].v
= parityStripeID
;
414 readDataNodes
[i
].params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, which_ru
);
416 readDataNodes
[i
].propList
[0] = NULL
;
417 readDataNodes
[i
].propList
[1] = NULL
;
420 /* initialize nodes which read old parity (Rop) */
421 pda
= asmap
->parityInfo
;
423 for (i
= 0; i
< numParityNodes
; i
++) {
424 RF_ASSERT(pda
!= NULL
);
425 rf_InitNode(&readParityNodes
[i
], rf_wait
, RF_FALSE
, rf_DiskReadFunc
, rf_DiskReadUndoFunc
, rf_GenericWakeupFunc
, nNodes
, 1, 4, 0, dag_h
, "Rop", allocList
);
426 readParityNodes
[i
].params
[0].p
= pda
;
427 readParityNodes
[i
].params
[1].p
= rf_AllocBuffer(raidPtr
, dag_h
, pda
->numSector
<< raidPtr
->logBytesPerSector
); /* buffer to hold old parity */
428 readParityNodes
[i
].params
[2].v
= parityStripeID
;
429 readParityNodes
[i
].params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, which_ru
);
430 readParityNodes
[i
].propList
[0] = NULL
;
434 /* initialize nodes which write new data (Wnd) */
435 pda
= asmap
->physInfo
;
436 for (i
= 0; i
< numDataNodes
; i
++) {
437 RF_ASSERT(pda
!= NULL
);
438 rf_InitNode(&writeDataNodes
[i
], rf_wait
, RF_TRUE
, rf_DiskWriteFunc
, rf_DiskWriteUndoFunc
, rf_GenericWakeupFunc
, 1, nNodes
, 4, 0, dag_h
, "Wnd", allocList
);
439 writeDataNodes
[i
].params
[0].p
= pda
; /* physical disk addr
441 writeDataNodes
[i
].params
[1].p
= pda
->bufPtr
; /* buffer holding new
442 * data to be written */
443 writeDataNodes
[i
].params
[2].v
= parityStripeID
;
444 writeDataNodes
[i
].params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, which_ru
);
450 /* initialize nodes which compute new parity */
451 /* we use the simple XOR func in the double-XOR case, and when we're
452 * accessing only a portion of one stripe unit. the distinction
453 * between the two is that the regular XOR func assumes that the
454 * targbuf is a full SU in size, and examines the pda associated with
455 * the buffer to decide where within the buffer to XOR the data,
456 * whereas the simple XOR func just XORs the data into the start of
458 if ((numParityNodes
== 2) || ((numDataNodes
== 1) && (asmap
->totalSectorsAccessed
< raidPtr
->Layout
.sectorsPerStripeUnit
))) {
459 func
= pfuncs
->simple
;
460 undoFunc
= rf_NullNodeUndoFunc
;
461 name
= pfuncs
->SimpleName
;
463 qfunc
= qfuncs
->simple
;
464 qname
= qfuncs
->SimpleName
;
467 func
= pfuncs
->regular
;
468 undoFunc
= rf_NullNodeUndoFunc
;
469 name
= pfuncs
->RegularName
;
471 qfunc
= qfuncs
->regular
;
472 qname
= qfuncs
->RegularName
;
475 /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop}
476 * nodes, and raidPtr */
477 if (numParityNodes
== 2) { /* double-xor case */
478 for (i
= 0; i
< numParityNodes
; i
++) {
479 rf_InitNode(&xorNodes
[i
], rf_wait
, RF_TRUE
, func
, undoFunc
, NULL
, 1, nNodes
, 7, 1, dag_h
, name
, allocList
); /* no wakeup func for
481 xorNodes
[i
].flags
|= RF_DAGNODE_FLAG_YIELD
;
482 xorNodes
[i
].params
[0] = readDataNodes
[i
].params
[0];
483 xorNodes
[i
].params
[1] = readDataNodes
[i
].params
[1];
484 xorNodes
[i
].params
[2] = readParityNodes
[i
].params
[0];
485 xorNodes
[i
].params
[3] = readParityNodes
[i
].params
[1];
486 xorNodes
[i
].params
[4] = writeDataNodes
[i
].params
[0];
487 xorNodes
[i
].params
[5] = writeDataNodes
[i
].params
[1];
488 xorNodes
[i
].params
[6].p
= raidPtr
;
489 xorNodes
[i
].results
[0] = readParityNodes
[i
].params
[1].p
; /* use old parity buf as
493 /* there is only one xor node in this case */
494 rf_InitNode(&xorNodes
[0], rf_wait
, RF_TRUE
, func
, undoFunc
, NULL
, 1, nNodes
, (2 * (numDataNodes
+ numDataNodes
+ 1) + 1), 1, dag_h
, name
, allocList
);
495 xorNodes
[0].flags
|= RF_DAGNODE_FLAG_YIELD
;
496 for (i
= 0; i
< numDataNodes
+ 1; i
++) {
497 /* set up params related to Rod and Rop nodes */
498 xorNodes
[0].params
[2 * i
+ 0] = readDataNodes
[i
].params
[0]; /* pda */
499 xorNodes
[0].params
[2 * i
+ 1] = readDataNodes
[i
].params
[1]; /* buffer pointer */
501 for (i
= 0; i
< numDataNodes
; i
++) {
502 /* set up params related to Wnd and Wnp nodes */
503 xorNodes
[0].params
[2 * (numDataNodes
+ 1 + i
) + 0] = writeDataNodes
[i
].params
[0]; /* pda */
504 xorNodes
[0].params
[2 * (numDataNodes
+ 1 + i
) + 1] = writeDataNodes
[i
].params
[1]; /* buffer pointer */
506 xorNodes
[0].params
[2 * (numDataNodes
+ numDataNodes
+ 1)].p
= raidPtr
; /* xor node needs to get
507 * at RAID information */
508 xorNodes
[0].results
[0] = readParityNodes
[0].params
[1].p
;
511 /* initialize the log node(s) */
512 pda
= asmap
->parityInfo
;
513 for (i
= 0; i
< numParityNodes
; i
++) {
515 rf_InitNode(&lpuNodes
[i
], rf_wait
, RF_FALSE
, rf_ParityLogUpdateFunc
, rf_ParityLogUpdateUndoFunc
, rf_GenericWakeupFunc
, 1, 1, 2, 0, dag_h
, "Lpu", allocList
);
516 lpuNodes
[i
].params
[0].p
= pda
; /* PhysDiskAddr of parity */
517 lpuNodes
[i
].params
[1].p
= xorNodes
[i
].results
[0]; /* buffer pointer to
523 /* Step 4. connect the nodes */
525 /* connect header to block node */
526 RF_ASSERT(dag_h
->numSuccedents
== 1);
527 RF_ASSERT(blockNode
->numAntecedents
== 0);
528 dag_h
->succedents
[0] = blockNode
;
530 /* connect block node to read old data nodes */
531 RF_ASSERT(blockNode
->numSuccedents
== (numDataNodes
+ numParityNodes
));
532 for (i
= 0; i
< numDataNodes
; i
++) {
533 blockNode
->succedents
[i
] = &readDataNodes
[i
];
534 RF_ASSERT(readDataNodes
[i
].numAntecedents
== 1);
535 readDataNodes
[i
].antecedents
[0] = blockNode
;
536 readDataNodes
[i
].antType
[0] = rf_control
;
539 /* connect block node to read old parity nodes */
540 for (i
= 0; i
< numParityNodes
; i
++) {
541 blockNode
->succedents
[numDataNodes
+ i
] = &readParityNodes
[i
];
542 RF_ASSERT(readParityNodes
[i
].numAntecedents
== 1);
543 readParityNodes
[i
].antecedents
[0] = blockNode
;
544 readParityNodes
[i
].antType
[0] = rf_control
;
547 /* connect read old data nodes to write new data nodes */
548 for (i
= 0; i
< numDataNodes
; i
++) {
549 RF_ASSERT(readDataNodes
[i
].numSuccedents
== numDataNodes
+ numParityNodes
);
550 for (j
= 0; j
< numDataNodes
; j
++) {
551 RF_ASSERT(writeDataNodes
[j
].numAntecedents
== numDataNodes
+ numParityNodes
);
552 readDataNodes
[i
].succedents
[j
] = &writeDataNodes
[j
];
553 writeDataNodes
[j
].antecedents
[i
] = &readDataNodes
[i
];
555 writeDataNodes
[j
].antType
[i
] = rf_antiData
;
557 writeDataNodes
[j
].antType
[i
] = rf_control
;
561 /* connect read old data nodes to xor nodes */
562 for (i
= 0; i
< numDataNodes
; i
++)
563 for (j
= 0; j
< numParityNodes
; j
++) {
564 RF_ASSERT(xorNodes
[j
].numAntecedents
== numDataNodes
+ numParityNodes
);
565 readDataNodes
[i
].succedents
[numDataNodes
+ j
] = &xorNodes
[j
];
566 xorNodes
[j
].antecedents
[i
] = &readDataNodes
[i
];
567 xorNodes
[j
].antType
[i
] = rf_trueData
;
570 /* connect read old parity nodes to write new data nodes */
571 for (i
= 0; i
< numParityNodes
; i
++) {
572 RF_ASSERT(readParityNodes
[i
].numSuccedents
== numDataNodes
+ numParityNodes
);
573 for (j
= 0; j
< numDataNodes
; j
++) {
574 readParityNodes
[i
].succedents
[j
] = &writeDataNodes
[j
];
575 writeDataNodes
[j
].antecedents
[numDataNodes
+ i
] = &readParityNodes
[i
];
576 writeDataNodes
[j
].antType
[numDataNodes
+ i
] = rf_control
;
580 /* connect read old parity nodes to xor nodes */
581 for (i
= 0; i
< numParityNodes
; i
++)
582 for (j
= 0; j
< numParityNodes
; j
++) {
583 readParityNodes
[i
].succedents
[numDataNodes
+ j
] = &xorNodes
[j
];
584 xorNodes
[j
].antecedents
[numDataNodes
+ i
] = &readParityNodes
[i
];
585 xorNodes
[j
].antType
[numDataNodes
+ i
] = rf_trueData
;
588 /* connect xor nodes to write new parity nodes */
589 for (i
= 0; i
< numParityNodes
; i
++) {
590 RF_ASSERT(xorNodes
[i
].numSuccedents
== 1);
591 RF_ASSERT(lpuNodes
[i
].numAntecedents
== 1);
592 xorNodes
[i
].succedents
[0] = &lpuNodes
[i
];
593 lpuNodes
[i
].antecedents
[0] = &xorNodes
[i
];
594 lpuNodes
[i
].antType
[0] = rf_trueData
;
597 for (i
= 0; i
< numDataNodes
; i
++) {
598 /* connect write new data nodes to unblock node */
599 RF_ASSERT(writeDataNodes
[i
].numSuccedents
== 1);
600 RF_ASSERT(unblockNode
->numAntecedents
== (numDataNodes
+ (nfaults
* numParityNodes
)));
601 writeDataNodes
[i
].succedents
[0] = unblockNode
;
602 unblockNode
->antecedents
[i
] = &writeDataNodes
[i
];
603 unblockNode
->antType
[i
] = rf_control
;
606 /* connect write new parity nodes to unblock node */
607 for (i
= 0; i
< numParityNodes
; i
++) {
608 RF_ASSERT(lpuNodes
[i
].numSuccedents
== 1);
609 lpuNodes
[i
].succedents
[0] = unblockNode
;
610 unblockNode
->antecedents
[numDataNodes
+ i
] = &lpuNodes
[i
];
611 unblockNode
->antType
[numDataNodes
+ i
] = rf_control
;
614 /* connect unblock node to terminator */
615 RF_ASSERT(unblockNode
->numSuccedents
== 1);
616 RF_ASSERT(termNode
->numAntecedents
== 1);
617 RF_ASSERT(termNode
->numSuccedents
== 0);
618 unblockNode
->succedents
[0] = termNode
;
619 termNode
->antecedents
[0] = unblockNode
;
620 termNode
->antType
[0] = rf_control
;
625 rf_CreateParityLoggingSmallWriteDAG(
627 RF_AccessStripeMap_t
* asmap
,
628 RF_DagHeader_t
* dag_h
,
630 RF_RaidAccessFlags_t flags
,
631 RF_AllocListElem_t
* allocList
,
632 const RF_RedFuncs_t
* pfuncs
,
633 const RF_RedFuncs_t
* qfuncs
)
635 dag_h
->creator
= "ParityLoggingSmallWriteDAG";
636 rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr
, asmap
, dag_h
, bp
, flags
, allocList
, &rf_xorFuncs
, NULL
);
641 rf_CreateParityLoggingLargeWriteDAG(
643 RF_AccessStripeMap_t
* asmap
,
644 RF_DagHeader_t
* dag_h
,
646 RF_RaidAccessFlags_t flags
,
647 RF_AllocListElem_t
* allocList
,
649 int (*redFunc
) (RF_DagNode_t
*))
651 dag_h
->creator
= "ParityLoggingSmallWriteDAG";
652 rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr
, asmap
, dag_h
, bp
, flags
, allocList
, 1, rf_RegularXorFunc
);
654 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */