sys/dev/raidframe/rf_diskqueue.c

   1 /*      $NetBSD: rf_diskqueue.c,v 1.51 2008/06/17 14:53:11 reinoud Exp $        */
   2 /*
   3  * Copyright (c) 1995 Carnegie-Mellon University.
   4  * All rights reserved.
   5  *
   6  * Author: Mark Holland
   7  *
   8  * Permission to use, copy, modify and distribute this software and
   9  * its documentation is hereby granted, provided that both the copyright
  10  * notice and this permission notice appear in all copies of the
  11  * software, derivative works or modified versions, and any portions
  12  * thereof, and that both notices appear in supporting documentation.
  13  *
  14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  17  *
  18  * Carnegie Mellon requests users of this software to return to
  19  *
  20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  21  *  School of Computer Science
  22  *  Carnegie Mellon University
  23  *  Pittsburgh PA 15213-3890
  24  *
  25  * any improvements or extensions that they make and grant Carnegie the
  26  * rights to redistribute these changes.
  27  */
  28
  29 /****************************************************************************
  30  *
  31  * rf_diskqueue.c -- higher-level disk queue code
  32  *
  33  * the routines here are a generic wrapper around the actual queueing
  34  * routines.  The code here implements thread scheduling, synchronization,
  35  * and locking ops (see below) on top of the lower-level queueing code.
  36  *
  37  * to support atomic RMW, we implement "locking operations".  When a
  38  * locking op is dispatched to the lower levels of the driver, the
  39  * queue is locked, and no further I/Os are dispatched until the queue
  40  * receives & completes a corresponding "unlocking operation".  This
  41  * code relies on the higher layers to guarantee that a locking op
  42  * will always be eventually followed by an unlocking op.  The model
  43  * is that the higher layers are structured so locking and unlocking
  44  * ops occur in pairs, i.e.  an unlocking op cannot be generated until
  45  * after a locking op reports completion.  There is no good way to
  46  * check to see that an unlocking op "corresponds" to the op that
  47  * currently has the queue locked, so we make no such attempt.  Since
  48  * by definition there can be only one locking op outstanding on a
  49  * disk, this should not be a problem.
  50  *
  51  * In the kernel, we allow multiple I/Os to be concurrently dispatched
  52  * to the disk driver.  In order to support locking ops in this
  53  * environment, when we decide to do a locking op, we stop dispatching
  54  * new I/Os and wait until all dispatched I/Os have completed before
  55  * dispatching the locking op.
  56  *
  57  * Unfortunately, the code is different in the 3 different operating
  58  * states (user level, kernel, simulator).  In the kernel, I/O is
  59  * non-blocking, and we have no disk threads to dispatch for us.
  60  * Therefore, we have to dispatch new I/Os to the scsi driver at the
  61  * time of enqueue, and also at the time of completion.  At user
  62  * level, I/O is blocking, and so only the disk threads may dispatch
  63  * I/Os.  Thus at user level, all we can do at enqueue time is enqueue
  64  * and wake up the disk thread to do the dispatch.
  65  *
  66  ****************************************************************************/
  67
  68 #include <sys/cdefs.h>
  69 __KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.51 2008/06/17 14:53:11 reinoud Exp $");
  70
  71 #include <dev/raidframe/raidframevar.h>
  72
  73 #include "rf_threadstuff.h"
  74 #include "rf_raid.h"
  75 #include "rf_diskqueue.h"
  76 #include "rf_alloclist.h"
  77 #include "rf_acctrace.h"
  78 #include "rf_etimer.h"
  79 #include "rf_general.h"
  80 #include "rf_debugprint.h"
  81 #include "rf_shutdown.h"
  82 #include "rf_cvscan.h"
  83 #include "rf_sstf.h"
  84 #include "rf_fifo.h"
  85 #include "rf_kintf.h"
  86
  87 static void rf_ShutdownDiskQueueSystem(void *);
  88
  89 #ifndef RF_DEBUG_DISKQUEUE
  90 #define RF_DEBUG_DISKQUEUE 0
  91 #endif
  92
  93 #if RF_DEBUG_DISKQUEUE
  94 #define Dprintf1(s,a)         if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
  95 #define Dprintf2(s,a,b)       if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
  96 #define Dprintf3(s,a,b,c)     if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
  97 #else
  98 #define Dprintf1(s,a)
  99 #define Dprintf2(s,a,b)
 100 #define Dprintf3(s,a,b,c)
 101 #endif
 102
 103 /*****************************************************************************
 104  *
 105  * the disk queue switch defines all the functions used in the
 106  * different queueing disciplines queue ID, init routine, enqueue
 107  * routine, dequeue routine
 108  *
 109  ****************************************************************************/
 110
 111 static const RF_DiskQueueSW_t diskqueuesw[] = {
 112         {"fifo",                /* FIFO */
 113                 rf_FifoCreate,
 114                 rf_FifoEnqueue,
 115                 rf_FifoDequeue,
 116                 rf_FifoPeek,
 117         rf_FifoPromote},
 118
 119         {"cvscan",              /* cvscan */
 120                 rf_CvscanCreate,
 121                 rf_CvscanEnqueue,
 122                 rf_CvscanDequeue,
 123                 rf_CvscanPeek,
 124         rf_CvscanPromote},
 125
 126         {"sstf",                /* shortest seek time first */
 127                 rf_SstfCreate,
 128                 rf_SstfEnqueue,
 129                 rf_SstfDequeue,
 130                 rf_SstfPeek,
 131         rf_SstfPromote},
 132
 133         {"scan",                /* SCAN (two-way elevator) */
 134                 rf_ScanCreate,
 135                 rf_SstfEnqueue,
 136                 rf_ScanDequeue,
 137                 rf_ScanPeek,
 138         rf_SstfPromote},
 139
 140         {"cscan",               /* CSCAN (one-way elevator) */
 141                 rf_CscanCreate,
 142                 rf_SstfEnqueue,
 143                 rf_CscanDequeue,
 144                 rf_CscanPeek,
 145         rf_SstfPromote},
 146
 147 };
 148 #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
 149
 150 #define RF_MAX_FREE_DQD 256
 151 #define RF_MIN_FREE_DQD  64
 152
 153 #include <sys/buf.h>
 154
 155 /* configures a single disk queue */
 156
 157 int
 158 rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
 159                       RF_RowCol_t c, const RF_DiskQueueSW_t *p,
 160                       RF_SectorCount_t sectPerDisk, dev_t dev,
 161                       int maxOutstanding, RF_ShutdownList_t **listp,
 162                       RF_AllocListElem_t *clList)
 163 {
 164         diskqueue->col = c;
 165         diskqueue->qPtr = p;
 166         diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
 167         diskqueue->dev = dev;
 168         diskqueue->numOutstanding = 0;
 169         diskqueue->queueLength = 0;
 170         diskqueue->maxOutstanding = maxOutstanding;
 171         diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
 172         diskqueue->flags = 0;
 173         diskqueue->raidPtr = raidPtr;
 174         diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
 175         rf_mutex_init(&diskqueue->mutex);
 176         diskqueue->cond = 0;
 177         return (0);
 178 }
 179
 180 static void
 181 rf_ShutdownDiskQueueSystem(void *ignored)
 182 {
 183         pool_destroy(&rf_pools.dqd);
 184 }
 185
 186 int
 187 rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp)
 188 {
 189
 190         rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t),
 191                      "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
 192         rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
 193
 194         return (0);
 195 }
 196
 197 int
 198 rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
 199                        RF_Config_t *cfgPtr)
 200 {
 201         RF_DiskQueue_t *diskQueues, *spareQueues;
 202         const RF_DiskQueueSW_t *p;
 203         RF_RowCol_t r,c;
 204         int     rc, i;
 205
 206         raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
 207
 208         for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
 209                 if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
 210                         p = &diskqueuesw[i];
 211                         break;
 212                 }
 213         }
 214         if (p == NULL) {
 215                 RF_ERRORMSG2("Unknown queue type \"%s\".  Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
 216                 p = &diskqueuesw[0];
 217         }
 218         raidPtr->qType = p;
 219
 220         RF_MallocAndAdd(diskQueues,
 221                         (raidPtr->numCol + RF_MAXSPARE) *
 222                         sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *),
 223                         raidPtr->cleanupList);
 224         if (diskQueues == NULL)
 225                 return (ENOMEM);
 226         raidPtr->Queues = diskQueues;
 227
 228         for (c = 0; c < raidPtr->numCol; c++) {
 229                 rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c],
 230                                            c, p,
 231                                            raidPtr->sectorsPerDisk,
 232                                            raidPtr->Disks[c].dev,
 233                                            cfgPtr->maxOutstandingDiskReqs,
 234                                            listp, raidPtr->cleanupList);
 235                 if (rc)
 236                         return (rc);
 237         }
 238
 239         spareQueues = &raidPtr->Queues[raidPtr->numCol];
 240         for (r = 0; r < raidPtr->numSpare; r++) {
 241                 rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
 242                                            raidPtr->numCol + r, p,
 243                                            raidPtr->sectorsPerDisk,
 244                                            raidPtr->Disks[raidPtr->numCol + r].dev,
 245                                            cfgPtr->maxOutstandingDiskReqs, listp,
 246                                            raidPtr->cleanupList);
 247                 if (rc)
 248                         return (rc);
 249         }
 250         return (0);
 251 }
 252 /* Enqueue a disk I/O
 253  *
 254  * In the kernel, I/O is non-blocking and so we'd like to have multiple
 255  * I/Os outstanding on the physical disks when possible.
 256  *
 257  * when any request arrives at a queue, we have two choices:
 258  *    dispatch it to the lower levels
 259  *    queue it up
 260  *
 261  * kernel rules for when to do what:
 262  *    unlocking req  :  always dispatch it
 263  *    normal req     :  queue empty => dispatch it & set priority
 264  *                      queue not full & priority is ok => dispatch it
 265  *                      else queue it
 266  */
 267 void
 268 rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri)
 269 {
 270         RF_ETIMER_START(req->qtime);
 271         RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
 272         req->priority = pri;
 273
 274 #if RF_DEBUG_DISKQUEUE
 275         if (rf_queueDebug && (req->numSector == 0)) {
 276                 printf("Warning: Enqueueing zero-sector access\n");
 277         }
 278 #endif
 279         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
 280         if (RF_OK_TO_DISPATCH(queue, req)) {
 281                 Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col);
 282                 rf_DispatchKernelIO(queue, req);
 283         } else {
 284                 queue->queueLength++;   /* increment count of number of requests waiting in this queue */
 285                 Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col);
 286                 req->queue = (void *) queue;
 287                 (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
 288         }
 289         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
 290 }
 291
 292
 293 /* get the next set of I/Os started */
 294 void
 295 rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status)
 296 {
 297         int     done = 0;
 298
 299         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
 300         queue->numOutstanding--;
 301         RF_ASSERT(queue->numOutstanding >= 0);
 302
 303         /* dispatch requests to the disk until we find one that we can't. */
 304         /* no reason to continue once we've filled up the queue */
 305         /* no reason to even start if the queue is locked */
 306
 307         while (!done && !RF_QUEUE_FULL(queue)) {
 308                 req = (queue->qPtr->Dequeue) (queue->qHdr);
 309                 if (req) {
 310                         Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col);
 311                         queue->queueLength--;   /* decrement count of number of requests waiting in this queue */
 312                         RF_ASSERT(queue->queueLength >= 0);
 313                         if (RF_OK_TO_DISPATCH(queue, req)) {
 314                                 Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col);
 315                                 rf_DispatchKernelIO(queue, req);
 316                         } else {
 317                                 /* we can't dispatch it, so just re-enqueue it.
 318                                    potential trouble here if disk queues batch reqs */
 319                                 Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col);
 320                                 queue->queueLength++;
 321                                 (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
 322                                 done = 1;
 323                         }
 324                 } else {
 325                         Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
 326                         done = 1;
 327                 }
 328         }
 329
 330         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
 331 }
 332 /* promotes accesses tagged with the given parityStripeID from low priority
 333  * to normal priority.  This promotion is optional, meaning that a queue
 334  * need not implement it.  If there is no promotion routine associated with
 335  * a queue, this routine does nothing and returns -1.
 336  */
 337 int
 338 rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
 339                  RF_ReconUnitNum_t which_ru)
 340 {
 341         int     retval;
 342
 343         if (!queue->qPtr->Promote)
 344                 return (-1);
 345         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
 346         retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
 347         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
 348         return (retval);
 349 }
 350
 351 RF_DiskQueueData_t *
 352 rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
 353                        RF_SectorCount_t nsect, void *bf,
 354                        RF_StripeNum_t parityStripeID,
 355                        RF_ReconUnitNum_t which_ru,
 356                        int (*wakeF) (void *, int), void *arg,
 357                        RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr,
 358                        RF_DiskQueueDataFlags_t flags, void *kb_proc,
 359                        int waitflag)
 360 {
 361         RF_DiskQueueData_t *p;
 362
 363         p = pool_get(&rf_pools.dqd, waitflag);
 364         if (p == NULL)
 365                 return (NULL);
 366
 367         memset(p, 0, sizeof(RF_DiskQueueData_t));
 368         if (waitflag == PR_WAITOK) {
 369                 p->bp = getiobuf(NULL, true);
 370         } else {
 371                 p->bp = getiobuf(NULL, false);
 372         }
 373         if (p->bp == NULL) {
 374                 pool_put(&rf_pools.dqd, p);
 375                 return (NULL);
 376         }
 377         SET(p->bp->b_cflags, BC_BUSY);  /* mark buffer busy */
 378
 379         p->sectorOffset = ssect + rf_protectedSectors;
 380         p->numSector = nsect;
 381         p->type = typ;
 382         p->buf = bf;
 383         p->parityStripeID = parityStripeID;
 384         p->which_ru = which_ru;
 385         p->CompleteFunc = wakeF;
 386         p->argument = arg;
 387         p->next = NULL;
 388         p->tracerec = tracerec;
 389         p->priority = RF_IO_NORMAL_PRIORITY;
 390         p->raidPtr = raidPtr;
 391         p->flags = flags;
 392         p->b_proc = kb_proc;
 393         return (p);
 394 }
 395
 396 void
 397 rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
 398 {
 399         int s;
 400         s = splbio();           /* XXX protect only pool_put, or neither? */
 401         putiobuf(p->bp);
 402         pool_put(&rf_pools.dqd, p);
 403         splx(s);
 404 }