1 /* $NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
6 * Author: William V. Courtright II
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /* Code for manipulating in-core parity logs
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $");
38 #if RF_INCLUDE_PARITYLOGGING > 0
41 * Append-only log for recording parity "update" and "overwrite" records
44 #include <dev/raidframe/raidframevar.h>
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
50 #include "rf_dagfuncs.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
61 static RF_CommonLogData_t
*
62 AllocParityLogCommonData(RF_Raid_t
* raidPtr
)
64 RF_CommonLogData_t
*common
= NULL
;
66 /* Return a struct for holding common parity log information from the
67 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
68 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
71 if (raidPtr
->parityLogDiskQueue
.freeCommonList
) {
72 common
= raidPtr
->parityLogDiskQueue
.freeCommonList
;
73 raidPtr
->parityLogDiskQueue
.freeCommonList
= raidPtr
->parityLogDiskQueue
.freeCommonList
->next
;
74 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
76 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
77 RF_Malloc(common
, sizeof(RF_CommonLogData_t
), (RF_CommonLogData_t
*));
78 rf_mutex_init(&common
->mutex
);
85 FreeParityLogCommonData(RF_CommonLogData_t
* common
)
89 /* Insert a single struct for holding parity log information (data)
90 * into the free list (rf_parityLogDiskQueue.freeCommonList).
93 raidPtr
= common
->raidPtr
;
94 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
95 common
->next
= raidPtr
->parityLogDiskQueue
.freeCommonList
;
96 raidPtr
->parityLogDiskQueue
.freeCommonList
= common
;
97 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
100 static RF_ParityLogData_t
*
101 AllocParityLogData(RF_Raid_t
* raidPtr
)
103 RF_ParityLogData_t
*data
= NULL
;
105 /* Return a struct for holding parity log information from the free
106 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
107 * call RF_Malloc to create a new structure. NON-BLOCKING */
109 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
110 if (raidPtr
->parityLogDiskQueue
.freeDataList
) {
111 data
= raidPtr
->parityLogDiskQueue
.freeDataList
;
112 raidPtr
->parityLogDiskQueue
.freeDataList
= raidPtr
->parityLogDiskQueue
.freeDataList
->next
;
113 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
115 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
116 RF_Malloc(data
, sizeof(RF_ParityLogData_t
), (RF_ParityLogData_t
*));
125 FreeParityLogData(RF_ParityLogData_t
* data
)
127 RF_ParityLogData_t
*nextItem
;
130 /* Insert a linked list of structs for holding parity log information
131 * (data) into the free list (parityLogDiskQueue.freeList).
134 raidPtr
= data
->common
->raidPtr
;
135 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
137 nextItem
= data
->next
;
138 data
->next
= raidPtr
->parityLogDiskQueue
.freeDataList
;
139 raidPtr
->parityLogDiskQueue
.freeDataList
= data
;
142 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
147 EnqueueParityLogData(
148 RF_ParityLogData_t
* data
,
149 RF_ParityLogData_t
** head
,
150 RF_ParityLogData_t
** tail
)
154 /* Insert an in-core parity log (*data) into the head of a disk queue
155 * (*head, *tail). NON-BLOCKING */
157 raidPtr
= data
->common
->raidPtr
;
158 if (rf_parityLogDebug
)
159 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data
->regionID
, (int) data
->diskAddress
.raidAddress
, (int) data
->diskAddress
.numSector
);
160 RF_ASSERT(data
->prev
== NULL
);
161 RF_ASSERT(data
->next
== NULL
);
162 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
164 /* insert into head of queue */
165 RF_ASSERT((*head
)->prev
== NULL
);
166 RF_ASSERT((*tail
)->next
== NULL
);
168 (*head
)->prev
= data
;
171 /* insert into empty list */
172 RF_ASSERT(*head
== NULL
);
173 RF_ASSERT(*tail
== NULL
);
177 RF_ASSERT((*head
)->prev
== NULL
);
178 RF_ASSERT((*tail
)->next
== NULL
);
179 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
182 static RF_ParityLogData_t
*
183 DequeueParityLogData(
185 RF_ParityLogData_t
** head
,
186 RF_ParityLogData_t
** tail
,
189 RF_ParityLogData_t
*data
;
191 /* Remove and return an in-core parity log from the tail of a disk
192 * queue (*head, *tail). NON-BLOCKING */
194 /* remove from tail, preserving FIFO order */
196 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
199 if (*head
== *tail
) {
200 /* removing last item from queue */
204 *tail
= (*tail
)->prev
;
205 (*tail
)->next
= NULL
;
206 RF_ASSERT((*head
)->prev
== NULL
);
207 RF_ASSERT((*tail
)->next
== NULL
);
211 if (rf_parityLogDebug
)
212 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data
->regionID
, (int) data
->diskAddress
.raidAddress
, (int) data
->diskAddress
.numSector
);
215 RF_ASSERT((*head
)->prev
== NULL
);
216 RF_ASSERT((*tail
)->next
== NULL
);
219 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
225 RequeueParityLogData(
226 RF_ParityLogData_t
* data
,
227 RF_ParityLogData_t
** head
,
228 RF_ParityLogData_t
** tail
)
232 /* Insert an in-core parity log (*data) into the tail of a disk queue
233 * (*head, *tail). NON-BLOCKING */
235 raidPtr
= data
->common
->raidPtr
;
237 if (rf_parityLogDebug
)
238 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data
->regionID
, (int) data
->diskAddress
.raidAddress
, (int) data
->diskAddress
.numSector
);
239 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
241 /* append to tail of list */
244 (*tail
)->next
= data
;
247 /* inserting into an empty list */
250 (*head
)->prev
= NULL
;
251 (*tail
)->next
= NULL
;
253 RF_ASSERT((*head
)->prev
== NULL
);
254 RF_ASSERT((*tail
)->next
== NULL
);
255 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
259 rf_CreateParityLogData(
260 RF_ParityRecordType_t operation
,
261 RF_PhysDiskAddr_t
* pda
,
264 int (*wakeFunc
) (RF_DagNode_t
* node
, int status
),
266 RF_AccTraceEntry_t
* tracerec
,
267 RF_Etimer_t startTime
)
269 RF_ParityLogData_t
*data
, *resultHead
= NULL
, *resultTail
= NULL
;
270 RF_CommonLogData_t
*common
;
271 RF_PhysDiskAddr_t
*diskAddress
;
272 int boundary
, offset
= 0;
274 /* Return an initialized struct of info to be logged. Build one item
275 * per physical disk address, one item per region.
280 common
= AllocParityLogCommonData(raidPtr
);
283 common
->operation
= operation
;
284 common
->bufPtr
= bufPtr
;
285 common
->raidPtr
= raidPtr
;
286 common
->wakeFunc
= wakeFunc
;
287 common
->wakeArg
= wakeArg
;
288 common
->tracerec
= tracerec
;
289 common
->startTime
= startTime
;
292 if (rf_parityLogDebug
)
293 printf("[entering CreateParityLogData]\n");
294 while (diskAddress
) {
296 data
= AllocParityLogData(raidPtr
);
298 data
->common
= common
;
301 data
->regionID
= rf_MapRegionIDParityLogging(raidPtr
, diskAddress
->startSector
);
302 if (data
->regionID
== rf_MapRegionIDParityLogging(raidPtr
, diskAddress
->startSector
+ diskAddress
->numSector
- 1)) {
303 /* disk address does not cross a region boundary */
304 data
->diskAddress
= *diskAddress
;
305 data
->bufOffset
= offset
;
306 offset
= offset
+ diskAddress
->numSector
;
307 EnqueueParityLogData(data
, &resultHead
, &resultTail
);
308 /* adjust disk address */
309 diskAddress
= diskAddress
->next
;
311 /* disk address crosses a region boundary */
312 /* find address where region is crossed */
314 while (data
->regionID
== rf_MapRegionIDParityLogging(raidPtr
, diskAddress
->startSector
+ boundary
))
317 /* enter data before the boundary */
318 data
->diskAddress
= *diskAddress
;
319 data
->diskAddress
.numSector
= boundary
;
320 data
->bufOffset
= offset
;
322 EnqueueParityLogData(data
, &resultHead
, &resultTail
);
323 /* adjust disk address */
324 diskAddress
->startSector
+= boundary
;
325 diskAddress
->numSector
-= boundary
;
328 if (rf_parityLogDebug
)
329 printf("[leaving CreateParityLogData]\n");
335 rf_SearchAndDequeueParityLogData(
338 RF_ParityLogData_t
** head
,
339 RF_ParityLogData_t
** tail
,
342 RF_ParityLogData_t
*w
;
344 /* Remove and return an in-core parity log from a specified region
345 * (regionID). If a matching log is not found, return NULL.
349 /* walk backward through a list, looking for an entry with a matching
352 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
355 if (w
->regionID
== regionID
) {
356 /* remove an element from the list */
358 if (*head
== *tail
) {
359 /* removing only element in the list */
363 /* removing last item in the list */
364 *tail
= (*tail
)->prev
;
365 (*tail
)->next
= NULL
;
366 RF_ASSERT((*head
)->prev
== NULL
);
367 RF_ASSERT((*tail
)->next
== NULL
);
371 /* removing first item in the list */
372 *head
= (*head
)->next
;
373 (*head
)->prev
= NULL
;
374 RF_ASSERT((*head
)->prev
== NULL
);
375 RF_ASSERT((*tail
)->next
== NULL
);
377 /* removing an item from the middle of
379 w
->prev
->next
= w
->next
;
380 w
->next
->prev
= w
->prev
;
381 RF_ASSERT((*head
)->prev
== NULL
);
382 RF_ASSERT((*tail
)->next
== NULL
);
387 if (rf_parityLogDebug
)
388 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w
->regionID
, (int) w
->diskAddress
.raidAddress
, (int) w
->diskAddress
.numSector
);
394 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
398 static RF_ParityLogData_t
*
399 DequeueMatchingLogData(
401 RF_ParityLogData_t
** head
,
402 RF_ParityLogData_t
** tail
)
404 RF_ParityLogData_t
*logDataList
, *logData
;
407 /* Remove and return an in-core parity log from the tail of a disk
408 * queue (*head, *tail). Then remove all matching (identical
409 * regionIDs) logData and return as a linked list.
413 logDataList
= DequeueParityLogData(raidPtr
, head
, tail
, RF_TRUE
);
415 regionID
= logDataList
->regionID
;
416 logData
= logDataList
;
417 logData
->next
= rf_SearchAndDequeueParityLogData(raidPtr
, regionID
, head
, tail
, RF_TRUE
);
418 while (logData
->next
) {
419 logData
= logData
->next
;
420 logData
->next
= rf_SearchAndDequeueParityLogData(raidPtr
, regionID
, head
, tail
, RF_TRUE
);
423 return (logDataList
);
427 static RF_ParityLog_t
*
429 RF_ParityLogData_t
* logData
,
432 RF_ParityLog_t
*log
= NULL
;
435 /* Grab a log buffer from the pool and return it. If no buffers are
436 * available, return NULL. NON-BLOCKING */
437 raidPtr
= logData
->common
->raidPtr
;
438 RF_LOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
439 if (raidPtr
->parityLogPool
.parityLogs
) {
440 log
= raidPtr
->parityLogPool
.parityLogs
;
441 raidPtr
->parityLogPool
.parityLogs
= raidPtr
->parityLogPool
.parityLogs
->next
;
442 log
->regionID
= logData
->regionID
;
445 raidPtr
->logsInUse
++;
446 RF_ASSERT(raidPtr
->logsInUse
>= 0 && raidPtr
->logsInUse
<= raidPtr
->numParityLogs
);
448 /* no logs available, so place ourselves on the queue of work
449 * waiting on log buffers this is done while
450 * parityLogPool.mutex is held, to ensure synchronization with
451 * ReleaseParityLogs. */
452 if (rf_parityLogDebug
)
453 printf("[blocked on log, region %d, finish %d]\n", logData
->regionID
, finish
);
455 RequeueParityLogData(logData
, &raidPtr
->parityLogDiskQueue
.logBlockHead
, &raidPtr
->parityLogDiskQueue
.logBlockTail
);
457 EnqueueParityLogData(logData
, &raidPtr
->parityLogDiskQueue
.logBlockHead
, &raidPtr
->parityLogDiskQueue
.logBlockTail
);
459 RF_UNLOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
464 rf_ReleaseParityLogs(
466 RF_ParityLog_t
* firstLog
)
468 RF_ParityLogData_t
*logDataList
;
469 RF_ParityLog_t
*log
, *lastLog
;
472 /* Insert a linked list of parity logs (firstLog) to the free list
473 * (parityLogPool.parityLogPool)
479 /* Before returning logs to global free list, service all requests
480 * which are blocked on logs. Holding mutexes for parityLogPool and
481 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
482 RF_LOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
483 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
484 logDataList
= DequeueMatchingLogData(raidPtr
, &raidPtr
->parityLogDiskQueue
.logBlockHead
, &raidPtr
->parityLogDiskQueue
.logBlockTail
);
487 firstLog
= firstLog
->next
;
490 while (logDataList
&& log
) {
491 RF_UNLOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
492 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
493 rf_ParityLogAppend(logDataList
, RF_TRUE
, &log
, RF_FALSE
);
494 if (rf_parityLogDebug
)
495 printf("[finishing up buf-blocked log data, region %d]\n", logDataList
->regionID
);
499 firstLog
= firstLog
->next
;
504 RF_LOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
505 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
507 logDataList
= DequeueMatchingLogData(raidPtr
, &raidPtr
->parityLogDiskQueue
.logBlockHead
, &raidPtr
->parityLogDiskQueue
.logBlockTail
);
509 /* return remaining logs to pool */
511 log
->next
= firstLog
;
516 raidPtr
->logsInUse
--;
517 RF_ASSERT(raidPtr
->logsInUse
>= 0 && raidPtr
->logsInUse
<= raidPtr
->numParityLogs
);
518 while (lastLog
->next
) {
519 lastLog
= lastLog
->next
;
520 raidPtr
->logsInUse
--;
521 RF_ASSERT(raidPtr
->logsInUse
>= 0 && raidPtr
->logsInUse
<= raidPtr
->numParityLogs
);
523 lastLog
->next
= raidPtr
->parityLogPool
.parityLogs
;
524 raidPtr
->parityLogPool
.parityLogs
= firstLog
;
526 log
= raidPtr
->parityLogPool
.parityLogs
;
531 RF_ASSERT(cnt
+ raidPtr
->logsInUse
== raidPtr
->numParityLogs
);
533 RF_UNLOCK_MUTEX(raidPtr
->parityLogPool
.mutex
);
534 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
541 RF_ParityLog_t
* log
)
545 /* Insert an in-core parity log (log) into the disk queue of
546 * reintegration work. Set the flag (reintInProgress) for the
547 * specified region (regionID) to indicate that reintegration is in
548 * progress for this region. NON-BLOCKING */
550 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
551 raidPtr
->regionInfo
[regionID
].reintInProgress
= RF_TRUE
; /* cleared when reint
554 if (rf_parityLogDebug
)
555 printf("[requesting reintegration of region %d]\n", log
->regionID
);
556 /* move record to reintegration queue */
557 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
558 log
->next
= raidPtr
->parityLogDiskQueue
.reintQueue
;
559 raidPtr
->parityLogDiskQueue
.reintQueue
= log
;
560 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
561 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
562 RF_SIGNAL_COND(raidPtr
->parityLogDiskQueue
.cond
);
568 RF_ParityLog_t
* log
)
570 /* insert a core log (log) into a list of logs
571 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
575 RF_ASSERT(log
->numRecords
== raidPtr
->numSectorsPerLog
);
576 RF_ASSERT(log
->next
== NULL
);
577 /* move log to flush queue */
578 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
579 log
->next
= raidPtr
->parityLogDiskQueue
.flushQueue
;
580 raidPtr
->parityLogDiskQueue
.flushQueue
= log
;
581 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
582 RF_SIGNAL_COND(raidPtr
->parityLogDiskQueue
.cond
);
588 RF_ParityLogData_t
* logData
)
590 int i
, diskCount
, regionID
= logData
->regionID
;
594 raidPtr
= logData
->common
->raidPtr
;
596 /* Move a core log to disk. If the log disk is full, initiate
599 * Return (0) if we can enqueue the dump immediately, otherwise return
600 * (1) to indicate we are blocked on reintegration and control of the
601 * thread should be relinquished.
603 * Caller must hold regionInfo[regionID].mutex
607 if (rf_parityLogDebug
)
608 printf("[dumping parity log to disk, region %d]\n", regionID
);
609 log
= raidPtr
->regionInfo
[regionID
].coreLog
;
610 RF_ASSERT(log
->numRecords
== raidPtr
->numSectorsPerLog
);
611 RF_ASSERT(log
->next
== NULL
);
613 /* if reintegration is in progress, must queue work */
614 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
615 if (raidPtr
->regionInfo
[regionID
].reintInProgress
) {
616 /* Can not proceed since this region is currently being
617 * reintegrated. We can not block, so queue remaining work and
619 if (rf_parityLogDebug
)
620 printf("[region %d waiting on reintegration]\n", regionID
);
621 /* XXX not sure about the use of finish - shouldn't this
622 * always be "Enqueue"? */
624 RequeueParityLogData(logData
, &raidPtr
->parityLogDiskQueue
.reintBlockHead
, &raidPtr
->parityLogDiskQueue
.reintBlockTail
);
626 EnqueueParityLogData(logData
, &raidPtr
->parityLogDiskQueue
.reintBlockHead
, &raidPtr
->parityLogDiskQueue
.reintBlockTail
);
627 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
628 return (1); /* relenquish control of this thread */
630 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
631 raidPtr
->regionInfo
[regionID
].coreLog
= NULL
;
632 if ((raidPtr
->regionInfo
[regionID
].diskCount
) < raidPtr
->regionInfo
[regionID
].capacity
)
633 /* IMPORTANT!! this loop bound assumes region disk holds an
634 * integral number of core logs */
636 /* update disk map for this region */
637 diskCount
= raidPtr
->regionInfo
[regionID
].diskCount
;
638 for (i
= 0; i
< raidPtr
->numSectorsPerLog
; i
++) {
639 raidPtr
->regionInfo
[regionID
].diskMap
[i
+ diskCount
].operation
= log
->records
[i
].operation
;
640 raidPtr
->regionInfo
[regionID
].diskMap
[i
+ diskCount
].parityAddr
= log
->records
[i
].parityAddr
;
642 log
->diskOffset
= diskCount
;
643 raidPtr
->regionInfo
[regionID
].diskCount
+= raidPtr
->numSectorsPerLog
;
644 FlushLog(raidPtr
, log
);
646 /* no room for log on disk, send it to disk manager and
647 * request reintegration */
648 RF_ASSERT(raidPtr
->regionInfo
[regionID
].diskCount
== raidPtr
->regionInfo
[regionID
].capacity
);
649 ReintLog(raidPtr
, regionID
, log
);
651 if (rf_parityLogDebug
)
652 printf("[finished dumping parity log to disk, region %d]\n", regionID
);
658 RF_ParityLogData_t
* logData
,
660 RF_ParityLog_t
** incomingLog
,
663 int regionID
, logItem
, itemDone
;
664 RF_ParityLogData_t
*item
;
665 int punt
, done
= RF_FALSE
;
669 int (*wakeFunc
) (RF_DagNode_t
* node
, int status
);
672 /* Add parity to the appropriate log, one sector at a time. This
673 * routine is called is called by dag functions ParityLogUpdateFunc
674 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
676 * Parity to be logged is contained in a linked-list (logData). When
677 * this routine returns, every sector in the list will be in one of
678 * three places: 1) entered into the parity log 2) queued, waiting on
679 * reintegration 3) queued, waiting on a core log
681 * Blocked work is passed to the ParityLoggingDiskManager for completion.
682 * Later, as conditions which required the block are removed, the work
683 * reenters this routine with the "finish" parameter set to "RF_TRUE."
687 raidPtr
= logData
->common
->raidPtr
;
688 /* lock the region for the first item in logData */
689 RF_ASSERT(logData
!= NULL
);
690 regionID
= logData
->regionID
;
691 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
692 RF_ASSERT(raidPtr
->regionInfo
[regionID
].loggingEnabled
);
694 if (clearReintFlag
) {
695 /* Enable flushing for this region. Holding both locks
696 * provides a synchronization barrier with DumpParityLogToDisk */
697 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
);
698 RF_LOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
699 RF_ASSERT(raidPtr
->regionInfo
[regionID
].reintInProgress
== RF_TRUE
);
700 raidPtr
->regionInfo
[regionID
].diskCount
= 0;
701 raidPtr
->regionInfo
[regionID
].reintInProgress
= RF_FALSE
;
702 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].reintMutex
); /* flushing is now
704 RF_UNLOCK_MUTEX(raidPtr
->parityLogDiskQueue
.mutex
);
706 /* process each item in logData */
708 /* remove an item from logData */
710 logData
= logData
->next
;
714 if (rf_parityLogDebug
)
715 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item
->regionID
, (int) item
->diskAddress
.raidAddress
, (int) item
->diskAddress
.numSector
);
717 /* see if we moved to a new region */
718 if (regionID
!= item
->regionID
) {
719 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
720 regionID
= item
->regionID
;
721 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
722 RF_ASSERT(raidPtr
->regionInfo
[regionID
].loggingEnabled
);
724 punt
= RF_FALSE
;/* Set to RF_TRUE if work is blocked. This
725 * can happen in one of two ways: 1) no core
726 * log (AcquireParityLog) 2) waiting on
727 * reintegration (DumpParityLogToDisk) If punt
728 * is RF_TRUE, the dataItem was queued, so
729 * skip to next item. */
731 /* process item, one sector at a time, until all sectors
732 * processed or we punt */
733 if (item
->diskAddress
.numSector
> 0)
737 while (!punt
&& !done
) {
738 /* verify that a core log exists for this region */
739 if (!raidPtr
->regionInfo
[regionID
].coreLog
) {
740 /* Attempt to acquire a parity log. If
741 * acquisition fails, queue remaining work in
742 * data item and move to nextItem. */
745 RF_ASSERT((*incomingLog
)->next
== NULL
);
746 raidPtr
->regionInfo
[regionID
].coreLog
= *incomingLog
;
747 raidPtr
->regionInfo
[regionID
].coreLog
->regionID
= regionID
;
750 raidPtr
->regionInfo
[regionID
].coreLog
= AcquireParityLog(item
, finish
);
752 raidPtr
->regionInfo
[regionID
].coreLog
= AcquireParityLog(item
, finish
);
753 /* Note: AcquireParityLog either returns a log
754 * or enqueues currentItem */
756 if (!raidPtr
->regionInfo
[regionID
].coreLog
)
757 punt
= RF_TRUE
; /* failed to find a core log */
759 RF_ASSERT(raidPtr
->regionInfo
[regionID
].coreLog
->next
== NULL
);
760 /* verify that the log has room for new
762 /* if log is full, dump it to disk and grab a
764 if (raidPtr
->regionInfo
[regionID
].coreLog
->numRecords
== raidPtr
->numSectorsPerLog
) {
765 /* log is full, dump it to disk */
766 if (DumpParityLogToDisk(finish
, item
))
767 punt
= RF_TRUE
; /* dump unsuccessful,
771 /* dump was successful */
774 RF_ASSERT((*incomingLog
)->next
== NULL
);
775 raidPtr
->regionInfo
[regionID
].coreLog
= *incomingLog
;
776 raidPtr
->regionInfo
[regionID
].coreLog
->regionID
= regionID
;
779 raidPtr
->regionInfo
[regionID
].coreLog
= AcquireParityLog(item
, finish
);
781 raidPtr
->regionInfo
[regionID
].coreLog
= AcquireParityLog(item
, finish
);
782 /* if a core log is not
783 * available, must queue work
785 if (!raidPtr
->regionInfo
[regionID
].coreLog
)
786 punt
= RF_TRUE
; /* blocked on log
791 /* if we didn't punt on this item, attempt to add a
792 * sector to the core log */
794 RF_ASSERT(raidPtr
->regionInfo
[regionID
].coreLog
->next
== NULL
);
795 /* at this point, we have a core log with
796 * enough room for a sector */
797 /* copy a sector into the log */
798 log
= raidPtr
->regionInfo
[regionID
].coreLog
;
799 RF_ASSERT(log
->numRecords
< raidPtr
->numSectorsPerLog
);
800 logItem
= log
->numRecords
++;
801 log
->records
[logItem
].parityAddr
= item
->diskAddress
;
802 RF_ASSERT(log
->records
[logItem
].parityAddr
.startSector
>= raidPtr
->regionInfo
[regionID
].parityStartAddr
);
803 RF_ASSERT(log
->records
[logItem
].parityAddr
.startSector
< raidPtr
->regionInfo
[regionID
].parityStartAddr
+ raidPtr
->regionInfo
[regionID
].numSectorsParity
);
804 log
->records
[logItem
].parityAddr
.numSector
= 1;
805 log
->records
[logItem
].operation
= item
->common
->operation
;
806 memcpy((char *)log
->bufPtr
+ (logItem
* (1 << item
->common
->raidPtr
->logBytesPerSector
)), ((char *)item
->common
->bufPtr
+ (item
->bufOffset
++ * (1 << item
->common
->raidPtr
->logBytesPerSector
))), (1 << item
->common
->raidPtr
->logBytesPerSector
));
807 item
->diskAddress
.numSector
--;
808 item
->diskAddress
.startSector
++;
809 if (item
->diskAddress
.numSector
== 0)
815 /* Processed this item completely, decrement count of
816 * items to be processed. */
817 RF_ASSERT(item
->diskAddress
.numSector
== 0);
818 RF_LOCK_MUTEX(item
->common
->mutex
);
820 if (item
->common
->cnt
== 0)
824 RF_UNLOCK_MUTEX(item
->common
->mutex
);
826 /* Finished processing all log data for this
827 * IO Return structs to free list and invoke
828 * wakeup function. */
829 timer
= item
->common
->startTime
; /* grab initial value of
831 RF_ETIMER_STOP(timer
);
832 RF_ETIMER_EVAL(timer
);
833 item
->common
->tracerec
->plog_us
+= RF_ETIMER_VAL_US(timer
);
834 if (rf_parityLogDebug
)
835 printf("[waking process for region %d]\n", item
->regionID
);
836 wakeFunc
= item
->common
->wakeFunc
;
837 wakeArg
= item
->common
->wakeArg
;
838 FreeParityLogCommonData(item
->common
);
839 FreeParityLogData(item
);
840 (wakeFunc
) (wakeArg
, 0);
842 FreeParityLogData(item
);
845 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
846 if (rf_parityLogDebug
)
847 printf("[exiting ParityLogAppend]\n");
853 rf_EnableParityLogging(RF_Raid_t
* raidPtr
)
857 for (regionID
= 0; regionID
< rf_numParityRegions
; regionID
++) {
858 RF_LOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
859 raidPtr
->regionInfo
[regionID
].loggingEnabled
= RF_TRUE
;
860 RF_UNLOCK_MUTEX(raidPtr
->regionInfo
[regionID
].mutex
);
862 if (rf_parityLogDebug
)
863 printf("[parity logging enabled]\n");
865 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */