1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.22 2007/03/04 06:02:38 christos Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
6 * Author: William V. Courtright II
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
28 /* Code for flushing and reintegration operations related to parity logging.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.22 2007/03/04 06:02:38 christos Exp $");
37 #if RF_INCLUDE_PARITYLOGGING > 0
39 #include <dev/raidframe/raidframevar.h>
41 #include "rf_threadstuff.h"
42 #include "rf_mcpair.h"
45 #include "rf_dagfuncs.h"
47 #include "rf_layout.h"
48 #include "rf_diskqueue.h"
49 #include "rf_paritylog.h"
50 #include "rf_general.h"
51 #include "rf_etimer.h"
52 #include "rf_paritylogging.h"
53 #include "rf_engine.h"
54 #include "rf_dagutils.h"
56 #include "rf_parityscan.h"
58 #include "rf_paritylogDiskMgr.h"
60 static void *AcquireReintBuffer(RF_RegionBufferQueue_t
*);
63 AcquireReintBuffer(RF_RegionBufferQueue_t
*pool
)
67 /* Return a region buffer from the free list (pool). If the free list
68 * is empty, WAIT. BLOCKING */
70 RF_LOCK_MUTEX(pool
->mutex
);
71 if (pool
->availableBuffers
> 0) {
72 bufPtr
= pool
->buffers
[pool
->availBuffersIndex
];
73 pool
->availableBuffers
--;
74 pool
->availBuffersIndex
++;
75 if (pool
->availBuffersIndex
== pool
->totalBuffers
)
76 pool
->availBuffersIndex
= 0;
77 RF_UNLOCK_MUTEX(pool
->mutex
);
79 RF_PANIC(); /* should never happen in correct config,
81 RF_WAIT_COND(pool
->cond
, pool
->mutex
);
88 RF_RegionBufferQueue_t
* pool
,
91 /* Insert a region buffer (bufPtr) into the free list (pool).
94 RF_LOCK_MUTEX(pool
->mutex
);
95 pool
->availableBuffers
++;
96 pool
->buffers
[pool
->emptyBuffersIndex
] = bufPtr
;
97 pool
->emptyBuffersIndex
++;
98 if (pool
->emptyBuffersIndex
== pool
->totalBuffers
)
99 pool
->emptyBuffersIndex
= 0;
100 RF_ASSERT(pool
->availableBuffers
<= pool
->totalBuffers
);
101 RF_UNLOCK_MUTEX(pool
->mutex
);
102 RF_SIGNAL_COND(pool
->cond
);
109 RF_RegionId_t regionID
,
110 RF_MCPair_t
* rrd_mcpair
,
113 RF_DagHeader_t
** rrd_dag_h
,
114 RF_AllocListElem_t
** rrd_alloclist
,
115 RF_PhysDiskAddr_t
** rrd_pda
)
117 /* Initiate the read a region log from disk. Once initiated, return
118 * to the calling routine.
122 RF_AccTraceEntry_t
*tracerec
;
123 RF_DagNode_t
*rrd_rdNode
;
125 /* create DAG to read region log from disk */
126 rf_MakeAllocList(*rrd_alloclist
);
127 *rrd_dag_h
= rf_MakeSimpleDAG(raidPtr
, 1, 0, regionBuffer
,
128 rf_DiskReadFunc
, rf_DiskReadUndoFunc
,
129 "Rrl", *rrd_alloclist
,
131 RF_IO_NORMAL_PRIORITY
);
133 /* create and initialize PDA for the core log */
134 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
136 *rrd_pda
= rf_AllocPDAList(1);
137 rf_MapLogParityLogging(raidPtr
, regionID
, 0,
138 &((*rrd_pda
)->col
), &((*rrd_pda
)->startSector
));
139 (*rrd_pda
)->numSector
= raidPtr
->regionInfo
[regionID
].capacity
;
141 if ((*rrd_pda
)->next
) {
142 (*rrd_pda
)->next
= NULL
;
143 printf("set rrd_pda->next to NULL\n");
145 /* initialize DAG parameters */
146 RF_Malloc(tracerec
,sizeof(RF_AccTraceEntry_t
), (RF_AccTraceEntry_t
*));
147 memset((char *) tracerec
, 0, sizeof(RF_AccTraceEntry_t
));
148 (*rrd_dag_h
)->tracerec
= tracerec
;
149 rrd_rdNode
= (*rrd_dag_h
)->succedents
[0]->succedents
[0];
150 rrd_rdNode
->params
[0].p
= *rrd_pda
;
151 /* rrd_rdNode->params[1] = regionBuffer; */
152 rrd_rdNode
->params
[2].v
= 0;
153 rrd_rdNode
->params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, 0);
155 /* launch region log read dag */
156 rf_DispatchDAG(*rrd_dag_h
, (void (*) (void *)) rf_MCPairWakeupFunc
,
157 (void *) rrd_mcpair
);
164 RF_ParityLog_t
* log
,
165 RF_MCPair_t
* fwr_mcpair
,
167 RF_DagHeader_t
** fwr_dag_h
,
168 RF_AllocListElem_t
** fwr_alloclist
,
169 RF_PhysDiskAddr_t
** fwr_pda
)
171 RF_RegionId_t regionID
= log
->regionID
;
172 RF_AccTraceEntry_t
*tracerec
;
173 RF_SectorNum_t regionOffset
;
174 RF_DagNode_t
*fwr_wrNode
;
176 /* Initiate the write of a core log to a region log disk. Once
177 * initiated, return to the calling routine.
181 /* create DAG to write a core log to a region log disk */
182 rf_MakeAllocList(*fwr_alloclist
);
183 *fwr_dag_h
= rf_MakeSimpleDAG(raidPtr
, 1, 0, log
->bufPtr
,
184 rf_DiskWriteFunc
, rf_DiskWriteUndoFunc
,
185 "Wcl", *fwr_alloclist
, RF_DAG_FLAGS_NONE
, RF_IO_NORMAL_PRIORITY
);
187 /* create and initialize PDA for the region log */
188 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
190 *fwr_pda
= rf_AllocPDAList(1);
191 regionOffset
= log
->diskOffset
;
192 rf_MapLogParityLogging(raidPtr
, regionID
, regionOffset
,
194 &((*fwr_pda
)->startSector
));
195 (*fwr_pda
)->numSector
= raidPtr
->numSectorsPerLog
;
197 /* initialize DAG parameters */
198 RF_Malloc(tracerec
,sizeof(RF_AccTraceEntry_t
), (RF_AccTraceEntry_t
*));
199 memset((char *) tracerec
, 0, sizeof(RF_AccTraceEntry_t
));
200 (*fwr_dag_h
)->tracerec
= tracerec
;
201 fwr_wrNode
= (*fwr_dag_h
)->succedents
[0]->succedents
[0];
202 fwr_wrNode
->params
[0].p
= *fwr_pda
;
203 /* fwr_wrNode->params[1] = log->bufPtr; */
204 fwr_wrNode
->params
[2].v
= 0;
205 fwr_wrNode
->params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, 0);
207 /* launch the dag to write the core log to disk */
208 rf_DispatchDAG(*fwr_dag_h
, (void (*) (void *)) rf_MCPairWakeupFunc
,
209 (void *) fwr_mcpair
);
215 RF_RegionId_t regionID
,
216 RF_MCPair_t
* prd_mcpair
,
219 RF_DagHeader_t
** prd_dag_h
,
220 RF_AllocListElem_t
** prd_alloclist
,
221 RF_PhysDiskAddr_t
** prd_pda
)
223 /* Initiate the read region parity from disk. Once initiated, return
224 * to the calling routine.
228 RF_AccTraceEntry_t
*tracerec
;
229 RF_DagNode_t
*prd_rdNode
;
231 /* create DAG to read region parity from disk */
232 rf_MakeAllocList(*prd_alloclist
);
233 *prd_dag_h
= rf_MakeSimpleDAG(raidPtr
, 1, 0, NULL
, rf_DiskReadFunc
,
234 rf_DiskReadUndoFunc
, "Rrp",
235 *prd_alloclist
, RF_DAG_FLAGS_NONE
,
236 RF_IO_NORMAL_PRIORITY
);
238 /* create and initialize PDA for region parity */
239 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
241 *prd_pda
= rf_AllocPDAList(1);
242 rf_MapRegionParity(raidPtr
, regionID
,
243 &((*prd_pda
)->col
), &((*prd_pda
)->startSector
),
244 &((*prd_pda
)->numSector
));
245 if (rf_parityLogDebug
)
246 printf("[reading %d sectors of parity from region %d]\n",
247 (int) (*prd_pda
)->numSector
, regionID
);
248 if ((*prd_pda
)->next
) {
249 (*prd_pda
)->next
= NULL
;
250 printf("set prd_pda->next to NULL\n");
252 /* initialize DAG parameters */
253 RF_Malloc(tracerec
,sizeof(RF_AccTraceEntry_t
), (RF_AccTraceEntry_t
*));
254 memset((char *) tracerec
, 0, sizeof(RF_AccTraceEntry_t
));
255 (*prd_dag_h
)->tracerec
= tracerec
;
256 prd_rdNode
= (*prd_dag_h
)->succedents
[0]->succedents
[0];
257 prd_rdNode
->params
[0].p
= *prd_pda
;
258 prd_rdNode
->params
[1].p
= parityBuffer
;
259 prd_rdNode
->params
[2].v
= 0;
260 prd_rdNode
->params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, 0);
261 #if RF_DEBUG_VALIDATE_DAG
262 if (rf_validateDAGDebug
)
263 rf_ValidateDAG(*prd_dag_h
);
265 /* launch region parity read dag */
266 rf_DispatchDAG(*prd_dag_h
, (void (*) (void *)) rf_MCPairWakeupFunc
,
267 (void *) prd_mcpair
);
272 RF_RegionId_t regionID
,
273 RF_MCPair_t
* pwr_mcpair
,
276 RF_DagHeader_t
** pwr_dag_h
,
277 RF_AllocListElem_t
** pwr_alloclist
,
278 RF_PhysDiskAddr_t
** pwr_pda
)
280 /* Initiate the write of region parity to disk. Once initiated, return
281 * to the calling routine.
285 RF_AccTraceEntry_t
*tracerec
;
286 RF_DagNode_t
*pwr_wrNode
;
288 /* create DAG to write region log from disk */
289 rf_MakeAllocList(*pwr_alloclist
);
290 *pwr_dag_h
= rf_MakeSimpleDAG(raidPtr
, 1, 0, parityBuffer
,
291 rf_DiskWriteFunc
, rf_DiskWriteUndoFunc
,
292 "Wrp", *pwr_alloclist
,
294 RF_IO_NORMAL_PRIORITY
);
296 /* create and initialize PDA for region parity */
297 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
299 *pwr_pda
= rf_AllocPDAList(1);
300 rf_MapRegionParity(raidPtr
, regionID
,
301 &((*pwr_pda
)->col
), &((*pwr_pda
)->startSector
),
302 &((*pwr_pda
)->numSector
));
304 /* initialize DAG parameters */
305 RF_Malloc(tracerec
,sizeof(RF_AccTraceEntry_t
), (RF_AccTraceEntry_t
*));
306 memset((char *) tracerec
, 0, sizeof(RF_AccTraceEntry_t
));
307 (*pwr_dag_h
)->tracerec
= tracerec
;
308 pwr_wrNode
= (*pwr_dag_h
)->succedents
[0]->succedents
[0];
309 pwr_wrNode
->params
[0].p
= *pwr_pda
;
310 /* pwr_wrNode->params[1] = parityBuffer; */
311 pwr_wrNode
->params
[2].v
= 0;
312 pwr_wrNode
->params
[3].v
= RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY
, 0);
314 /* launch the dag to write region parity to disk */
315 rf_DispatchDAG(*pwr_dag_h
, (void (*) (void *)) rf_MCPairWakeupFunc
,
316 (void *) pwr_mcpair
);
322 RF_ParityLog_t
* logList
)
324 /* Flush a linked list of core logs to the log disk. Logs contain the
325 * disk location where they should be written. Logs were written in
326 * FIFO order and that order must be preserved.
328 * Recommended optimizations: 1) allow multiple flushes to occur
329 * simultaneously 2) coalesce contiguous flush operations
334 RF_RegionId_t regionID
;
335 RF_MCPair_t
*fwr_mcpair
;
336 RF_DagHeader_t
*fwr_dag_h
;
337 RF_AllocListElem_t
*fwr_alloclist
;
338 RF_PhysDiskAddr_t
*fwr_pda
;
340 fwr_mcpair
= rf_AllocMCPair();
341 RF_LOCK_MUTEX(fwr_mcpair
->mutex
);
346 regionID
= log
->regionID
;
348 /* create and launch a DAG to write the core log */
349 if (rf_parityLogDebug
)
350 printf("[initiating write of core log for region %d]\n", regionID
);
351 fwr_mcpair
->flag
= RF_FALSE
;
352 WriteCoreLog(log
, fwr_mcpair
, raidPtr
, &fwr_dag_h
,
353 &fwr_alloclist
, &fwr_pda
);
355 /* wait for the DAG to complete */
356 while (!fwr_mcpair
->flag
)
357 RF_WAIT_COND(fwr_mcpair
->cond
, fwr_mcpair
->mutex
);
358 if (fwr_dag_h
->status
!= rf_enable
) {
359 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID
);
362 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
363 rf_FreePhysDiskAddr(fwr_pda
);
364 rf_FreeDAG(fwr_dag_h
);
365 rf_FreeAllocList(fwr_alloclist
);
369 RF_UNLOCK_MUTEX(fwr_mcpair
->mutex
);
370 rf_FreeMCPair(fwr_mcpair
);
371 rf_ReleaseParityLogs(raidPtr
, logList
);
377 RF_RegionId_t regionID
,
378 RF_ParityLog_t
* coreLog
)
380 RF_MCPair_t
*rrd_mcpair
= NULL
, *prd_mcpair
, *pwr_mcpair
;
381 RF_DagHeader_t
*rrd_dag_h
= NULL
, *prd_dag_h
, *pwr_dag_h
;
382 RF_AllocListElem_t
*rrd_alloclist
= NULL
, *prd_alloclist
, *pwr_alloclist
;
383 RF_PhysDiskAddr_t
*rrd_pda
= NULL
, *prd_pda
, *pwr_pda
;
384 void *parityBuffer
, *regionBuffer
= NULL
;
386 /* Reintegrate a region (regionID).
388 * 1. acquire region and parity buffers
389 * 2. read log from disk
390 * 3. read parity from disk
391 * 4. apply log to parity
392 * 5. apply core log to parity
393 * 6. write new parity to disk
397 if (rf_parityLogDebug
)
398 printf("[reintegrating region %d]\n", regionID
);
400 /* initiate read of region parity */
401 if (rf_parityLogDebug
)
402 printf("[initiating read of parity for region %d]\n",regionID
);
403 parityBuffer
= AcquireReintBuffer(&raidPtr
->parityBufferPool
);
404 prd_mcpair
= rf_AllocMCPair();
405 RF_LOCK_MUTEX(prd_mcpair
->mutex
);
406 prd_mcpair
->flag
= RF_FALSE
;
407 ReadRegionParity(regionID
, prd_mcpair
, parityBuffer
, raidPtr
,
408 &prd_dag_h
, &prd_alloclist
, &prd_pda
);
410 /* if region log nonempty, initiate read */
411 if (raidPtr
->regionInfo
[regionID
].diskCount
> 0) {
412 if (rf_parityLogDebug
)
413 printf("[initiating read of disk log for region %d]\n",
415 regionBuffer
= AcquireReintBuffer(&raidPtr
->regionBufferPool
);
416 rrd_mcpair
= rf_AllocMCPair();
417 RF_LOCK_MUTEX(rrd_mcpair
->mutex
);
418 rrd_mcpair
->flag
= RF_FALSE
;
419 ReadRegionLog(regionID
, rrd_mcpair
, regionBuffer
, raidPtr
,
420 &rrd_dag_h
, &rrd_alloclist
, &rrd_pda
);
422 /* wait on read of region parity to complete */
423 while (!prd_mcpair
->flag
) {
424 RF_WAIT_COND(prd_mcpair
->cond
, prd_mcpair
->mutex
);
426 RF_UNLOCK_MUTEX(prd_mcpair
->mutex
);
427 if (prd_dag_h
->status
!= rf_enable
) {
428 RF_ERRORMSG("Unable to read parity from disk\n");
429 /* add code to fail the parity disk */
432 /* apply core log to parity */
433 /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
435 if (raidPtr
->regionInfo
[regionID
].diskCount
> 0) {
436 /* wait on read of region log to complete */
437 while (!rrd_mcpair
->flag
)
438 RF_WAIT_COND(rrd_mcpair
->cond
, rrd_mcpair
->mutex
);
439 RF_UNLOCK_MUTEX(rrd_mcpair
->mutex
);
440 if (rrd_dag_h
->status
!= rf_enable
) {
441 RF_ERRORMSG("Unable to read region log from disk\n");
442 /* add code to fail the log disk */
445 /* apply region log to parity */
446 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
447 /* release resources associated with region log */
448 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
449 rf_FreePhysDiskAddr(rrd_pda
);
450 rf_FreeDAG(rrd_dag_h
);
451 rf_FreeAllocList(rrd_alloclist
);
452 rf_FreeMCPair(rrd_mcpair
);
453 ReleaseReintBuffer(&raidPtr
->regionBufferPool
, regionBuffer
);
455 /* write reintegrated parity to disk */
456 if (rf_parityLogDebug
)
457 printf("[initiating write of parity for region %d]\n",
459 pwr_mcpair
= rf_AllocMCPair();
460 RF_LOCK_MUTEX(pwr_mcpair
->mutex
);
461 pwr_mcpair
->flag
= RF_FALSE
;
462 WriteRegionParity(regionID
, pwr_mcpair
, parityBuffer
, raidPtr
,
463 &pwr_dag_h
, &pwr_alloclist
, &pwr_pda
);
464 while (!pwr_mcpair
->flag
)
465 RF_WAIT_COND(pwr_mcpair
->cond
, pwr_mcpair
->mutex
);
466 RF_UNLOCK_MUTEX(pwr_mcpair
->mutex
);
467 if (pwr_dag_h
->status
!= rf_enable
) {
468 RF_ERRORMSG("Unable to write parity to disk\n");
469 /* add code to fail the parity disk */
472 /* release resources associated with read of old parity */
473 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
474 rf_FreePhysDiskAddr(prd_pda
);
475 rf_FreeDAG(prd_dag_h
);
476 rf_FreeAllocList(prd_alloclist
);
477 rf_FreeMCPair(prd_mcpair
);
479 /* release resources associated with write of new parity */
480 ReleaseReintBuffer(&raidPtr
->parityBufferPool
, parityBuffer
);
481 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
482 rf_FreePhysDiskAddr(pwr_pda
);
483 rf_FreeDAG(pwr_dag_h
);
484 rf_FreeAllocList(pwr_alloclist
);
485 rf_FreeMCPair(pwr_mcpair
);
487 if (rf_parityLogDebug
)
488 printf("[finished reintegrating region %d]\n", regionID
);
496 RF_ParityLog_t
* logList
)
498 RF_ParityLog_t
*log
, *freeLogList
= NULL
;
499 RF_ParityLogData_t
*logData
, *logDataList
;
500 RF_RegionId_t regionID
;
505 logList
= logList
->next
;
507 regionID
= log
->regionID
;
508 ReintegrateRegion(raidPtr
, regionID
, log
);
511 /* remove all items which are blocked on reintegration of this
513 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
514 logData
= rf_SearchAndDequeueParityLogData(raidPtr
, regionID
,
515 &raidPtr
->parityLogDiskQueue
.reintBlockHead
,
516 &raidPtr
->parityLogDiskQueue
.reintBlockTail
,
518 logDataList
= logData
;
520 logData
->next
= rf_SearchAndDequeueParityLogData(
522 &raidPtr
->parityLogDiskQueue
.reintBlockHead
,
523 &raidPtr
->parityLogDiskQueue
.reintBlockTail
,
525 logData
= logData
->next
;
527 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
529 /* process blocked log data and clear reintInProgress flag for
532 rf_ParityLogAppend(logDataList
, RF_TRUE
, &log
, RF_TRUE
);
534 /* Enable flushing for this region. Holding both
535 * locks provides a synchronization barrier with
536 * DumpParityLogToDisk */
537 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
538 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
539 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
540 raidPtr
->regionInfo
[regionID
].diskCount
= 0;
541 raidPtr
->regionInfo
[regionID
].reintInProgress
= RF_FALSE
;
542 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
543 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
); /* flushing is now
545 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
547 /* if log wasn't used, attach it to the list of logs to be
550 log
->next
= freeLogList
;
555 rf_ReleaseParityLogs(raidPtr
, freeLogList
);
559 rf_ShutdownLogging(RF_Raid_t
* raidPtr
)
561 /* shutdown parity logging 1) disable parity logging in all regions 2)
562 * reintegrate all regions */
564 RF_SectorCount_t diskCount
;
565 RF_RegionId_t regionID
;
568 if (rf_parityLogDebug
)
569 printf("[shutting down parity logging]\n");
570 /* Since parity log maps are volatile, we must reintegrate all
572 if (rf_forceParityLogReint
) {
573 for (regionID
= 0; regionID
< rf_numParityRegions
; regionID
++) {
574 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
575 raidPtr
->regionInfo
[regionID
].loggingEnabled
=
577 log
= raidPtr
->regionInfo
[regionID
].coreLog
;
578 raidPtr
->regionInfo
[regionID
].coreLog
= NULL
;
579 diskCount
= raidPtr
->regionInfo
[regionID
].diskCount
;
580 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
581 if (diskCount
> 0 || log
!= NULL
)
582 ReintegrateRegion(raidPtr
, regionID
, log
);
584 rf_ReleaseParityLogs(raidPtr
, log
);
587 if (rf_parityLogDebug
) {
588 printf("[parity logging disabled]\n");
589 printf("[should be done!]\n");
595 rf_ParityLoggingDiskManager(RF_Raid_t
* raidPtr
)
597 RF_ParityLog_t
*reintQueue
, *flushQueue
;
598 int workNeeded
, done
= RF_FALSE
;
601 /* Main program for parity logging disk thread. This routine waits
602 * for work to appear in either the flush or reintegration queues and
603 * is responsible for flushing core logs to the log disk as well as
604 * reintegrating parity regions.
610 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
613 * Inform our creator that we're running. Don't bother doing the
614 * mutex lock/unlock dance- we locked above, and we'll unlock
615 * below with nothing to do, yet.
617 raidPtr
->parityLogDiskQueue
.threadState
|= RF_PLOG_RUNNING
;
618 RF_SIGNAL_COND(raidPtr
->parityLogDiskQueue
.cond
);
620 /* empty the work queues */
621 flushQueue
= raidPtr
->parityLogDiskQueue
.flushQueue
;
622 raidPtr
->parityLogDiskQueue
.flushQueue
= NULL
;
623 reintQueue
= raidPtr
->parityLogDiskQueue
.reintQueue
;
624 raidPtr
->parityLogDiskQueue
.reintQueue
= NULL
;
625 workNeeded
= (flushQueue
|| reintQueue
);
629 /* First, flush all logs in the flush queue, freeing
630 * buffers Second, reintegrate all regions which are
631 * reported as full. Third, append queued log data
634 * Note: Incoming appends (ParityLogAppend) can block on
635 * either 1. empty buffer pool 2. region under
636 * reintegration To preserve a global FIFO ordering of
637 * appends, buffers are not released to the world
638 * until those appends blocked on buffers are removed
639 * from the append queue. Similarly, regions which
640 * are reintegrated are not opened for general use
641 * until the append queue has been emptied. */
643 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
645 /* empty flushQueue, using free'd log buffers to
648 FlushLogsToDisk(raidPtr
, flushQueue
);
650 /* empty reintQueue, flushing from reintTail as we go */
652 ReintegrateLogs(raidPtr
, reintQueue
);
654 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
655 flushQueue
= raidPtr
->parityLogDiskQueue
.flushQueue
;
656 raidPtr
->parityLogDiskQueue
.flushQueue
= NULL
;
657 reintQueue
= raidPtr
->parityLogDiskQueue
.reintQueue
;
658 raidPtr
->parityLogDiskQueue
.reintQueue
= NULL
;
659 workNeeded
= (flushQueue
|| reintQueue
);
661 /* no work is needed at this point */
662 if (raidPtr
->parityLogDiskQueue
.threadState
& RF_PLOG_TERMINATE
) {
663 /* shutdown parity logging 1. disable parity logging
664 * in all regions 2. reintegrate all regions */
665 done
= RF_TRUE
; /* thread disabled, no work needed */
666 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
667 rf_ShutdownLogging(raidPtr
);
670 /* thread enabled, no work needed, so sleep */
671 if (rf_parityLogDebug
)
672 printf("[parity logging disk manager sleeping]\n");
673 RF_WAIT_COND(raidPtr
->parityLogDiskQueue
.cond
,
674 raidPtr
->parityLogDiskQueue
.mutex
);
675 if (rf_parityLogDebug
)
676 printf("[parity logging disk manager just woke up]\n");
677 flushQueue
= raidPtr
->parityLogDiskQueue
.flushQueue
;
678 raidPtr
->parityLogDiskQueue
.flushQueue
= NULL
;
679 reintQueue
= raidPtr
->parityLogDiskQueue
.reintQueue
;
680 raidPtr
->parityLogDiskQueue
.reintQueue
= NULL
;
681 workNeeded
= (flushQueue
|| reintQueue
);
685 * Announce that we're done.
687 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
688 raidPtr
->parityLogDiskQueue
.threadState
|= RF_PLOG_SHUTDOWN
;
689 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
690 RF_SIGNAL_COND(raidPtr
->parityLogDiskQueue
.cond
);
695 * In the NetBSD kernel, the thread must exit; returning would
696 * cause the proc trampoline to attempt to return to userspace.
698 kthread_exit(0); /* does not return */
700 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */