Expand PMF_FN_* macros.
[netbsd-mini2440.git] / sys / dev / raidframe / rf_paritylog.c
blobf8ca82f42fc09d52f87e5c1d53d0e9c979afb1a2
1 /* $NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
6 * Author: William V. Courtright II
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /* Code for manipulating in-core parity logs
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.12 2006/04/26 17:08:48 oster Exp $");
36 #include "rf_archs.h"
38 #if RF_INCLUDE_PARITYLOGGING > 0
41 * Append-only log for recording parity "update" and "overwrite" records
44 #include <dev/raidframe/raidframevar.h>
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
48 #include "rf_raid.h"
49 #include "rf_dag.h"
50 #include "rf_dagfuncs.h"
51 #include "rf_desc.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
57 #include "rf_map.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
61 static RF_CommonLogData_t *
62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
64 RF_CommonLogData_t *common = NULL;
66 /* Return a struct for holding common parity log information from the
67 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
68 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
71 if (raidPtr->parityLogDiskQueue.freeCommonList) {
72 common = raidPtr->parityLogDiskQueue.freeCommonList;
73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75 } else {
76 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
77 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
78 rf_mutex_init(&common->mutex);
80 common->next = NULL;
81 return (common);
84 static void
85 FreeParityLogCommonData(RF_CommonLogData_t * common)
87 RF_Raid_t *raidPtr;
89 /* Insert a single struct for holding parity log information (data)
90 * into the free list (rf_parityLogDiskQueue.freeCommonList).
91 * NON-BLOCKING */
93 raidPtr = common->raidPtr;
94 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
95 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
96 raidPtr->parityLogDiskQueue.freeCommonList = common;
97 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
100 static RF_ParityLogData_t *
101 AllocParityLogData(RF_Raid_t * raidPtr)
103 RF_ParityLogData_t *data = NULL;
105 /* Return a struct for holding parity log information from the free
106 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
107 * call RF_Malloc to create a new structure. NON-BLOCKING */
109 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
110 if (raidPtr->parityLogDiskQueue.freeDataList) {
111 data = raidPtr->parityLogDiskQueue.freeDataList;
112 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
113 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
114 } else {
115 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
116 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
118 data->next = NULL;
119 data->prev = NULL;
120 return (data);
124 static void
125 FreeParityLogData(RF_ParityLogData_t * data)
127 RF_ParityLogData_t *nextItem;
128 RF_Raid_t *raidPtr;
130 /* Insert a linked list of structs for holding parity log information
131 * (data) into the free list (parityLogDiskQueue.freeList).
132 * NON-BLOCKING */
134 raidPtr = data->common->raidPtr;
135 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
136 while (data) {
137 nextItem = data->next;
138 data->next = raidPtr->parityLogDiskQueue.freeDataList;
139 raidPtr->parityLogDiskQueue.freeDataList = data;
140 data = nextItem;
142 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
146 static void
147 EnqueueParityLogData(
148 RF_ParityLogData_t * data,
149 RF_ParityLogData_t ** head,
150 RF_ParityLogData_t ** tail)
152 RF_Raid_t *raidPtr;
154 /* Insert an in-core parity log (*data) into the head of a disk queue
155 * (*head, *tail). NON-BLOCKING */
157 raidPtr = data->common->raidPtr;
158 if (rf_parityLogDebug)
159 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
160 RF_ASSERT(data->prev == NULL);
161 RF_ASSERT(data->next == NULL);
162 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
163 if (*head) {
164 /* insert into head of queue */
165 RF_ASSERT((*head)->prev == NULL);
166 RF_ASSERT((*tail)->next == NULL);
167 data->next = *head;
168 (*head)->prev = data;
169 *head = data;
170 } else {
171 /* insert into empty list */
172 RF_ASSERT(*head == NULL);
173 RF_ASSERT(*tail == NULL);
174 *head = data;
175 *tail = data;
177 RF_ASSERT((*head)->prev == NULL);
178 RF_ASSERT((*tail)->next == NULL);
179 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
182 static RF_ParityLogData_t *
183 DequeueParityLogData(
184 RF_Raid_t * raidPtr,
185 RF_ParityLogData_t ** head,
186 RF_ParityLogData_t ** tail,
187 int ignoreLocks)
189 RF_ParityLogData_t *data;
191 /* Remove and return an in-core parity log from the tail of a disk
192 * queue (*head, *tail). NON-BLOCKING */
194 /* remove from tail, preserving FIFO order */
195 if (!ignoreLocks)
196 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
197 data = *tail;
198 if (data) {
199 if (*head == *tail) {
200 /* removing last item from queue */
201 *head = NULL;
202 *tail = NULL;
203 } else {
204 *tail = (*tail)->prev;
205 (*tail)->next = NULL;
206 RF_ASSERT((*head)->prev == NULL);
207 RF_ASSERT((*tail)->next == NULL);
209 data->next = NULL;
210 data->prev = NULL;
211 if (rf_parityLogDebug)
212 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
214 if (*head) {
215 RF_ASSERT((*head)->prev == NULL);
216 RF_ASSERT((*tail)->next == NULL);
218 if (!ignoreLocks)
219 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
220 return (data);
224 static void
225 RequeueParityLogData(
226 RF_ParityLogData_t * data,
227 RF_ParityLogData_t ** head,
228 RF_ParityLogData_t ** tail)
230 RF_Raid_t *raidPtr;
232 /* Insert an in-core parity log (*data) into the tail of a disk queue
233 * (*head, *tail). NON-BLOCKING */
235 raidPtr = data->common->raidPtr;
236 RF_ASSERT(data);
237 if (rf_parityLogDebug)
238 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
239 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
240 if (*tail) {
241 /* append to tail of list */
242 data->prev = *tail;
243 data->next = NULL;
244 (*tail)->next = data;
245 *tail = data;
246 } else {
247 /* inserting into an empty list */
248 *head = data;
249 *tail = data;
250 (*head)->prev = NULL;
251 (*tail)->next = NULL;
253 RF_ASSERT((*head)->prev == NULL);
254 RF_ASSERT((*tail)->next == NULL);
255 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
258 RF_ParityLogData_t *
259 rf_CreateParityLogData(
260 RF_ParityRecordType_t operation,
261 RF_PhysDiskAddr_t * pda,
262 void *bufPtr,
263 RF_Raid_t * raidPtr,
264 int (*wakeFunc) (RF_DagNode_t * node, int status),
265 void *wakeArg,
266 RF_AccTraceEntry_t * tracerec,
267 RF_Etimer_t startTime)
269 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
270 RF_CommonLogData_t *common;
271 RF_PhysDiskAddr_t *diskAddress;
272 int boundary, offset = 0;
274 /* Return an initialized struct of info to be logged. Build one item
275 * per physical disk address, one item per region.
277 * NON-BLOCKING */
279 diskAddress = pda;
280 common = AllocParityLogCommonData(raidPtr);
281 RF_ASSERT(common);
283 common->operation = operation;
284 common->bufPtr = bufPtr;
285 common->raidPtr = raidPtr;
286 common->wakeFunc = wakeFunc;
287 common->wakeArg = wakeArg;
288 common->tracerec = tracerec;
289 common->startTime = startTime;
290 common->cnt = 0;
292 if (rf_parityLogDebug)
293 printf("[entering CreateParityLogData]\n");
294 while (diskAddress) {
295 common->cnt++;
296 data = AllocParityLogData(raidPtr);
297 RF_ASSERT(data);
298 data->common = common;
299 data->next = NULL;
300 data->prev = NULL;
301 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
302 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
303 /* disk address does not cross a region boundary */
304 data->diskAddress = *diskAddress;
305 data->bufOffset = offset;
306 offset = offset + diskAddress->numSector;
307 EnqueueParityLogData(data, &resultHead, &resultTail);
308 /* adjust disk address */
309 diskAddress = diskAddress->next;
310 } else {
311 /* disk address crosses a region boundary */
312 /* find address where region is crossed */
313 boundary = 0;
314 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
315 boundary++;
317 /* enter data before the boundary */
318 data->diskAddress = *diskAddress;
319 data->diskAddress.numSector = boundary;
320 data->bufOffset = offset;
321 offset += boundary;
322 EnqueueParityLogData(data, &resultHead, &resultTail);
323 /* adjust disk address */
324 diskAddress->startSector += boundary;
325 diskAddress->numSector -= boundary;
328 if (rf_parityLogDebug)
329 printf("[leaving CreateParityLogData]\n");
330 return (resultHead);
334 RF_ParityLogData_t *
335 rf_SearchAndDequeueParityLogData(
336 RF_Raid_t * raidPtr,
337 int regionID,
338 RF_ParityLogData_t ** head,
339 RF_ParityLogData_t ** tail,
340 int ignoreLocks)
342 RF_ParityLogData_t *w;
344 /* Remove and return an in-core parity log from a specified region
345 * (regionID). If a matching log is not found, return NULL.
347 * NON-BLOCKING. */
349 /* walk backward through a list, looking for an entry with a matching
350 * region ID */
351 if (!ignoreLocks)
352 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
353 w = (*tail);
354 while (w) {
355 if (w->regionID == regionID) {
356 /* remove an element from the list */
357 if (w == *tail) {
358 if (*head == *tail) {
359 /* removing only element in the list */
360 *head = NULL;
361 *tail = NULL;
362 } else {
363 /* removing last item in the list */
364 *tail = (*tail)->prev;
365 (*tail)->next = NULL;
366 RF_ASSERT((*head)->prev == NULL);
367 RF_ASSERT((*tail)->next == NULL);
369 } else {
370 if (w == *head) {
371 /* removing first item in the list */
372 *head = (*head)->next;
373 (*head)->prev = NULL;
374 RF_ASSERT((*head)->prev == NULL);
375 RF_ASSERT((*tail)->next == NULL);
376 } else {
377 /* removing an item from the middle of
378 * the list */
379 w->prev->next = w->next;
380 w->next->prev = w->prev;
381 RF_ASSERT((*head)->prev == NULL);
382 RF_ASSERT((*tail)->next == NULL);
385 w->prev = NULL;
386 w->next = NULL;
387 if (rf_parityLogDebug)
388 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
389 return (w);
390 } else
391 w = w->prev;
393 if (!ignoreLocks)
394 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
395 return (NULL);
398 static RF_ParityLogData_t *
399 DequeueMatchingLogData(
400 RF_Raid_t * raidPtr,
401 RF_ParityLogData_t ** head,
402 RF_ParityLogData_t ** tail)
404 RF_ParityLogData_t *logDataList, *logData;
405 int regionID;
407 /* Remove and return an in-core parity log from the tail of a disk
408 * queue (*head, *tail). Then remove all matching (identical
409 * regionIDs) logData and return as a linked list.
411 * NON-BLOCKING */
413 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
414 if (logDataList) {
415 regionID = logDataList->regionID;
416 logData = logDataList;
417 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
418 while (logData->next) {
419 logData = logData->next;
420 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
423 return (logDataList);
427 static RF_ParityLog_t *
428 AcquireParityLog(
429 RF_ParityLogData_t * logData,
430 int finish)
432 RF_ParityLog_t *log = NULL;
433 RF_Raid_t *raidPtr;
435 /* Grab a log buffer from the pool and return it. If no buffers are
436 * available, return NULL. NON-BLOCKING */
437 raidPtr = logData->common->raidPtr;
438 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
439 if (raidPtr->parityLogPool.parityLogs) {
440 log = raidPtr->parityLogPool.parityLogs;
441 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
442 log->regionID = logData->regionID;
443 log->numRecords = 0;
444 log->next = NULL;
445 raidPtr->logsInUse++;
446 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
447 } else {
448 /* no logs available, so place ourselves on the queue of work
449 * waiting on log buffers this is done while
450 * parityLogPool.mutex is held, to ensure synchronization with
451 * ReleaseParityLogs. */
452 if (rf_parityLogDebug)
453 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
454 if (finish)
455 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
456 else
457 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
460 return (log);
463 void
464 rf_ReleaseParityLogs(
465 RF_Raid_t * raidPtr,
466 RF_ParityLog_t * firstLog)
468 RF_ParityLogData_t *logDataList;
469 RF_ParityLog_t *log, *lastLog;
470 int cnt;
472 /* Insert a linked list of parity logs (firstLog) to the free list
473 * (parityLogPool.parityLogPool)
475 * NON-BLOCKING. */
477 RF_ASSERT(firstLog);
479 /* Before returning logs to global free list, service all requests
480 * which are blocked on logs. Holding mutexes for parityLogPool and
481 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
482 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
483 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
484 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
485 log = firstLog;
486 if (firstLog)
487 firstLog = firstLog->next;
488 log->numRecords = 0;
489 log->next = NULL;
490 while (logDataList && log) {
491 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
492 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
493 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
494 if (rf_parityLogDebug)
495 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
496 if (log == NULL) {
497 log = firstLog;
498 if (firstLog) {
499 firstLog = firstLog->next;
500 log->numRecords = 0;
501 log->next = NULL;
504 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
505 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
506 if (log)
507 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
509 /* return remaining logs to pool */
510 if (log) {
511 log->next = firstLog;
512 firstLog = log;
514 if (firstLog) {
515 lastLog = firstLog;
516 raidPtr->logsInUse--;
517 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
518 while (lastLog->next) {
519 lastLog = lastLog->next;
520 raidPtr->logsInUse--;
521 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523 lastLog->next = raidPtr->parityLogPool.parityLogs;
524 raidPtr->parityLogPool.parityLogs = firstLog;
525 cnt = 0;
526 log = raidPtr->parityLogPool.parityLogs;
527 while (log) {
528 cnt++;
529 log = log->next;
531 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
533 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
534 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
537 static void
538 ReintLog(
539 RF_Raid_t * raidPtr,
540 int regionID,
541 RF_ParityLog_t * log)
543 RF_ASSERT(log);
545 /* Insert an in-core parity log (log) into the disk queue of
546 * reintegration work. Set the flag (reintInProgress) for the
547 * specified region (regionID) to indicate that reintegration is in
548 * progress for this region. NON-BLOCKING */
550 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
551 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
552 * complete */
554 if (rf_parityLogDebug)
555 printf("[requesting reintegration of region %d]\n", log->regionID);
556 /* move record to reintegration queue */
557 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
558 log->next = raidPtr->parityLogDiskQueue.reintQueue;
559 raidPtr->parityLogDiskQueue.reintQueue = log;
560 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
561 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
562 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
565 static void
566 FlushLog(
567 RF_Raid_t * raidPtr,
568 RF_ParityLog_t * log)
570 /* insert a core log (log) into a list of logs
571 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
572 * NON-BLOCKING */
574 RF_ASSERT(log);
575 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
576 RF_ASSERT(log->next == NULL);
577 /* move log to flush queue */
578 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
579 log->next = raidPtr->parityLogDiskQueue.flushQueue;
580 raidPtr->parityLogDiskQueue.flushQueue = log;
581 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
582 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
585 static int
586 DumpParityLogToDisk(
587 int finish,
588 RF_ParityLogData_t * logData)
590 int i, diskCount, regionID = logData->regionID;
591 RF_ParityLog_t *log;
592 RF_Raid_t *raidPtr;
594 raidPtr = logData->common->raidPtr;
596 /* Move a core log to disk. If the log disk is full, initiate
597 * reintegration.
599 * Return (0) if we can enqueue the dump immediately, otherwise return
600 * (1) to indicate we are blocked on reintegration and control of the
601 * thread should be relinquished.
603 * Caller must hold regionInfo[regionID].mutex
605 * NON-BLOCKING */
607 if (rf_parityLogDebug)
608 printf("[dumping parity log to disk, region %d]\n", regionID);
609 log = raidPtr->regionInfo[regionID].coreLog;
610 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
611 RF_ASSERT(log->next == NULL);
613 /* if reintegration is in progress, must queue work */
614 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
615 if (raidPtr->regionInfo[regionID].reintInProgress) {
616 /* Can not proceed since this region is currently being
617 * reintegrated. We can not block, so queue remaining work and
618 * return */
619 if (rf_parityLogDebug)
620 printf("[region %d waiting on reintegration]\n", regionID);
621 /* XXX not sure about the use of finish - shouldn't this
622 * always be "Enqueue"? */
623 if (finish)
624 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
625 else
626 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
627 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
628 return (1); /* relenquish control of this thread */
630 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631 raidPtr->regionInfo[regionID].coreLog = NULL;
632 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
633 /* IMPORTANT!! this loop bound assumes region disk holds an
634 * integral number of core logs */
636 /* update disk map for this region */
637 diskCount = raidPtr->regionInfo[regionID].diskCount;
638 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
639 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
640 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
642 log->diskOffset = diskCount;
643 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
644 FlushLog(raidPtr, log);
645 } else {
646 /* no room for log on disk, send it to disk manager and
647 * request reintegration */
648 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
649 ReintLog(raidPtr, regionID, log);
651 if (rf_parityLogDebug)
652 printf("[finished dumping parity log to disk, region %d]\n", regionID);
653 return (0);
657 rf_ParityLogAppend(
658 RF_ParityLogData_t * logData,
659 int finish,
660 RF_ParityLog_t ** incomingLog,
661 int clearReintFlag)
663 int regionID, logItem, itemDone;
664 RF_ParityLogData_t *item;
665 int punt, done = RF_FALSE;
666 RF_ParityLog_t *log;
667 RF_Raid_t *raidPtr;
668 RF_Etimer_t timer;
669 int (*wakeFunc) (RF_DagNode_t * node, int status);
670 void *wakeArg;
672 /* Add parity to the appropriate log, one sector at a time. This
673 * routine is called is called by dag functions ParityLogUpdateFunc
674 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
676 * Parity to be logged is contained in a linked-list (logData). When
677 * this routine returns, every sector in the list will be in one of
678 * three places: 1) entered into the parity log 2) queued, waiting on
679 * reintegration 3) queued, waiting on a core log
681 * Blocked work is passed to the ParityLoggingDiskManager for completion.
682 * Later, as conditions which required the block are removed, the work
683 * reenters this routine with the "finish" parameter set to "RF_TRUE."
685 * NON-BLOCKING */
687 raidPtr = logData->common->raidPtr;
688 /* lock the region for the first item in logData */
689 RF_ASSERT(logData != NULL);
690 regionID = logData->regionID;
691 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
692 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
694 if (clearReintFlag) {
695 /* Enable flushing for this region. Holding both locks
696 * provides a synchronization barrier with DumpParityLogToDisk */
697 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
698 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
699 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
700 raidPtr->regionInfo[regionID].diskCount = 0;
701 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
702 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
703 * enabled */
704 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
706 /* process each item in logData */
707 while (logData) {
708 /* remove an item from logData */
709 item = logData;
710 logData = logData->next;
711 item->next = NULL;
712 item->prev = NULL;
714 if (rf_parityLogDebug)
715 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
717 /* see if we moved to a new region */
718 if (regionID != item->regionID) {
719 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
720 regionID = item->regionID;
721 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
722 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
724 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
725 * can happen in one of two ways: 1) no core
726 * log (AcquireParityLog) 2) waiting on
727 * reintegration (DumpParityLogToDisk) If punt
728 * is RF_TRUE, the dataItem was queued, so
729 * skip to next item. */
731 /* process item, one sector at a time, until all sectors
732 * processed or we punt */
733 if (item->diskAddress.numSector > 0)
734 done = RF_FALSE;
735 else
736 RF_ASSERT(0);
737 while (!punt && !done) {
738 /* verify that a core log exists for this region */
739 if (!raidPtr->regionInfo[regionID].coreLog) {
740 /* Attempt to acquire a parity log. If
741 * acquisition fails, queue remaining work in
742 * data item and move to nextItem. */
743 if (incomingLog)
744 if (*incomingLog) {
745 RF_ASSERT((*incomingLog)->next == NULL);
746 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
747 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
748 *incomingLog = NULL;
749 } else
750 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
751 else
752 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
753 /* Note: AcquireParityLog either returns a log
754 * or enqueues currentItem */
756 if (!raidPtr->regionInfo[regionID].coreLog)
757 punt = RF_TRUE; /* failed to find a core log */
758 else {
759 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
760 /* verify that the log has room for new
761 * entries */
762 /* if log is full, dump it to disk and grab a
763 * new log */
764 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
765 /* log is full, dump it to disk */
766 if (DumpParityLogToDisk(finish, item))
767 punt = RF_TRUE; /* dump unsuccessful,
768 * blocked on
769 * reintegration */
770 else {
771 /* dump was successful */
772 if (incomingLog)
773 if (*incomingLog) {
774 RF_ASSERT((*incomingLog)->next == NULL);
775 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
776 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
777 *incomingLog = NULL;
778 } else
779 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
780 else
781 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
782 /* if a core log is not
783 * available, must queue work
784 * and return */
785 if (!raidPtr->regionInfo[regionID].coreLog)
786 punt = RF_TRUE; /* blocked on log
787 * availability */
791 /* if we didn't punt on this item, attempt to add a
792 * sector to the core log */
793 if (!punt) {
794 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
795 /* at this point, we have a core log with
796 * enough room for a sector */
797 /* copy a sector into the log */
798 log = raidPtr->regionInfo[regionID].coreLog;
799 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
800 logItem = log->numRecords++;
801 log->records[logItem].parityAddr = item->diskAddress;
802 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
803 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
804 log->records[logItem].parityAddr.numSector = 1;
805 log->records[logItem].operation = item->common->operation;
806 memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
807 item->diskAddress.numSector--;
808 item->diskAddress.startSector++;
809 if (item->diskAddress.numSector == 0)
810 done = RF_TRUE;
814 if (!punt) {
815 /* Processed this item completely, decrement count of
816 * items to be processed. */
817 RF_ASSERT(item->diskAddress.numSector == 0);
818 RF_LOCK_MUTEX(item->common->mutex);
819 item->common->cnt--;
820 if (item->common->cnt == 0)
821 itemDone = RF_TRUE;
822 else
823 itemDone = RF_FALSE;
824 RF_UNLOCK_MUTEX(item->common->mutex);
825 if (itemDone) {
826 /* Finished processing all log data for this
827 * IO Return structs to free list and invoke
828 * wakeup function. */
829 timer = item->common->startTime; /* grab initial value of
830 * timer */
831 RF_ETIMER_STOP(timer);
832 RF_ETIMER_EVAL(timer);
833 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
834 if (rf_parityLogDebug)
835 printf("[waking process for region %d]\n", item->regionID);
836 wakeFunc = item->common->wakeFunc;
837 wakeArg = item->common->wakeArg;
838 FreeParityLogCommonData(item->common);
839 FreeParityLogData(item);
840 (wakeFunc) (wakeArg, 0);
841 } else
842 FreeParityLogData(item);
845 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
846 if (rf_parityLogDebug)
847 printf("[exiting ParityLogAppend]\n");
848 return (0);
852 void
853 rf_EnableParityLogging(RF_Raid_t * raidPtr)
855 int regionID;
857 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
858 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
859 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
860 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
862 if (rf_parityLogDebug)
863 printf("[parity logging enabled]\n");
865 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */