No empty .Rs/.Re
[netbsd-mini2440.git] / sys / dev / raidframe / rf_copyback.c
bloba6c7e368349235463b21902797db96698f0c632d
1 /* $NetBSD: rf_copyback.c,v 1.41 2008/01/26 20:44:37 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
6 * Author: Mark Holland
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /*****************************************************************************
31 * copyback.c -- code to copy reconstructed data back from spare space to
32 * the replaced disk.
34 * the code operates using callbacks on the I/Os to continue with the
35 * next unit to be copied back. We do this because a simple loop
36 * containing blocking I/Os will not work in the simulator.
38 ****************************************************************************/
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.41 2008/01/26 20:44:37 oster Exp $");
43 #include <dev/raidframe/raidframevar.h>
45 #include <sys/time.h>
46 #include <sys/buf.h>
47 #include "rf_raid.h"
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
52 #include "rf_utils.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
57 #include "rf_kintf.h"
59 #define RF_COPYBACK_DATA 0
60 #define RF_COPYBACK_PARITY 1
62 int rf_copyback_in_progress;
64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status);
65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status);
66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
67 RF_RaidAddr_t addr, RF_RowCol_t testCol,
68 RF_SectorNum_t testOffs);
69 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);
71 int
72 rf_ConfigureCopyback(RF_ShutdownList_t **listp)
74 rf_copyback_in_progress = 0;
75 return (0);
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/ioctl.h>
82 #include <sys/fcntl.h>
83 #include <sys/vnode.h>
85 /* do a complete copyback */
86 void
87 rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
89 RF_ComponentLabel_t *c_label;
90 int found, retcode;
91 RF_CopybackDesc_t *desc;
92 RF_RowCol_t fcol;
93 RF_RaidDisk_t *badDisk;
94 char *databuf;
96 struct vnode *vp;
97 struct vattr va;
99 int ac;
101 fcol = 0;
102 found = 0;
103 for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
104 if (raidPtr->Disks[fcol].status == rf_ds_dist_spared
105 || raidPtr->Disks[fcol].status == rf_ds_spared) {
106 found = 1;
107 break;
111 if (!found) {
112 printf("raid%d: no disks need copyback\n", raidPtr->raidid);
113 return;
116 badDisk = &raidPtr->Disks[fcol];
118 /* This device may have been opened successfully the first time. Close
119 * it before trying to open it again.. */
121 if (raidPtr->raid_cinfo[fcol].ci_vp != NULL) {
122 printf("Closed the open device: %s\n",
123 raidPtr->Disks[fcol].devname);
124 vp = raidPtr->raid_cinfo[fcol].ci_vp;
125 ac = raidPtr->Disks[fcol].auto_configured;
126 rf_close_component(raidPtr, vp, ac);
127 raidPtr->raid_cinfo[fcol].ci_vp = NULL;
130 /* note that this disk was *not* auto_configured (any longer) */
131 raidPtr->Disks[fcol].auto_configured = 0;
133 printf("About to (re-)open the device: %s\n",
134 raidPtr->Disks[fcol].devname);
136 retcode = dk_lookup(raidPtr->Disks[fcol].devname, curlwp, &vp,
137 UIO_SYSSPACE);
139 if (retcode) {
140 printf("raid%d: copyback: dk_lookup on device: %s failed: %d!\n",
141 raidPtr->raidid, raidPtr->Disks[fcol].devname,
142 retcode);
144 /* XXX the component isn't responding properly... must be
145 * still dead :-( */
146 return;
148 } else {
150 /* Ok, so we can at least do a lookup... How about actually
151 * getting a vp for it? */
153 if ((retcode = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
154 return;
155 retcode = rf_getdisksize(vp, curlwp, &raidPtr->Disks[fcol]);
156 if (retcode) {
157 return;
160 raidPtr->raid_cinfo[fcol].ci_vp = vp;
161 raidPtr->raid_cinfo[fcol].ci_dev = va.va_rdev;
163 raidPtr->Disks[fcol].dev = va.va_rdev; /* XXX or the above? */
165 /* we allow the user to specify that only a fraction of the
166 * disks should be used this is just for debug: it speeds up
167 * the parity scan */
168 raidPtr->Disks[fcol].numBlocks =
169 raidPtr->Disks[fcol].numBlocks *
170 rf_sizePercentage / 100;
173 if (retcode) {
174 printf("raid%d: copyback: target disk failed TUR\n",
175 raidPtr->raidid);
176 return;
178 /* get a buffer to hold one SU */
179 RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
181 /* create a descriptor */
182 RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
183 desc->raidPtr = raidPtr;
184 desc->status = 0;
185 desc->fcol = fcol;
186 desc->spCol = badDisk->spareCol;
187 desc->stripeAddr = 0;
188 desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
189 desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
190 desc->databuf = databuf;
191 desc->mcpair = rf_AllocMCPair();
193 /* quiesce the array, since we don't want to code support for user
194 * accs here */
195 rf_SuspendNewRequestsAndWait(raidPtr);
197 /* adjust state of the array and of the disks */
198 RF_LOCK_MUTEX(raidPtr->mutex);
199 raidPtr->Disks[desc->fcol].status = rf_ds_optimal;
200 raidPtr->status = rf_rs_optimal;
201 rf_copyback_in_progress = 1; /* debug only */
202 RF_UNLOCK_MUTEX(raidPtr->mutex);
204 RF_GETTIME(desc->starttime);
205 rf_ContinueCopyback(desc);
207 /* Data has been restored. Fix up the component label. */
208 /* Don't actually need the read here.. */
210 c_label = raidget_component_label(raidPtr, fcol);
211 raid_init_component_label(raidPtr, c_label);
213 c_label->row = 0;
214 c_label->column = fcol;
215 c_label->partitionSize = raidPtr->Disks[fcol].partitionSize;
217 raidflush_component_label(raidPtr, fcol);
219 /* XXXjld why is this here? */
220 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
225 * invoked via callback after a copyback I/O has completed to
226 * continue on with the next one
228 void
229 rf_ContinueCopyback(RF_CopybackDesc_t *desc)
231 RF_SectorNum_t testOffs, stripeAddr;
232 RF_Raid_t *raidPtr = desc->raidPtr;
233 RF_RaidAddr_t addr;
234 RF_RowCol_t testCol;
235 #if RF_DEBUG_RECON
236 int old_pctg, new_pctg;
237 struct timeval t, diff;
238 #endif
239 int done;
241 #if RF_DEBUG_RECON
242 old_pctg = (-1);
243 #endif
244 while (1) {
245 stripeAddr = desc->stripeAddr;
246 desc->raidPtr->copyback_stripes_done = stripeAddr
247 / desc->sectPerStripe;
248 #if RF_DEBUG_RECON
249 if (rf_prReconSched) {
250 old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
252 #endif
253 desc->stripeAddr += desc->sectPerStripe;
254 #if RF_DEBUG_RECON
255 if (rf_prReconSched) {
256 new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
257 if (new_pctg != old_pctg) {
258 RF_GETTIME(t);
259 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
260 printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
263 #endif
264 if (stripeAddr >= raidPtr->totalSectors) {
265 rf_CopybackComplete(desc, 0);
266 return;
268 /* walk through the current stripe, su-by-su */
269 for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {
271 /* map the SU, disallowing remap to spare space */
272 (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testCol, &testOffs, RF_DONT_REMAP);
274 if (testCol == desc->fcol) {
275 rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testCol, testOffs);
276 done = 1;
277 break;
281 if (!done) {
282 /* we didn't find the failed disk in the data part.
283 * check parity. */
285 /* map the parity for this stripe, disallowing remap
286 * to spare space */
287 (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testCol, &testOffs, RF_DONT_REMAP);
289 if (testCol == desc->fcol) {
290 rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testCol, testOffs);
293 /* check to see if the last read/write pair failed */
294 if (desc->status) {
295 rf_CopybackComplete(desc, 1);
296 return;
298 /* we didn't find any units to copy back in this stripe.
299 * Continue with the next one */
304 /* copyback one unit */
305 static void
306 rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
307 RF_RowCol_t testCol, RF_SectorNum_t testOffs)
309 RF_SectorCount_t sectPerSU = desc->sectPerSU;
310 RF_Raid_t *raidPtr = desc->raidPtr;
311 RF_RowCol_t spCol = desc->spCol;
312 RF_SectorNum_t spOffs;
314 /* find the spare spare location for this SU */
315 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
316 if (typ == RF_COPYBACK_DATA)
317 raidPtr->Layout.map->MapSector(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
318 else
319 raidPtr->Layout.map->MapParity(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
320 } else {
321 spOffs = testOffs;
324 /* create reqs to read the old location & write the new */
325 desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
326 sectPerSU, desc->databuf, 0L, 0,
327 (int (*) (void *, int)) rf_CopybackReadDoneProc, desc,
328 NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL,
329 PR_WAITOK);
330 desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
331 sectPerSU, desc->databuf, 0L, 0,
332 (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc,
333 NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL,
334 PR_WAITOK);
335 desc->fcol = testCol;
337 /* enqueue the read. the write will go out as part of the callback on
338 * the read. at user-level & in the kernel, wait for the read-write
339 * pair to complete. in the simulator, just return, since everything
340 * will happen as callbacks */
342 RF_LOCK_MUTEX(desc->mcpair->mutex);
343 desc->mcpair->flag = 0;
344 RF_UNLOCK_MUTEX(desc->mcpair->mutex);
346 rf_DiskIOEnqueue(&raidPtr->Queues[spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
348 RF_LOCK_MUTEX(desc->mcpair->mutex);
349 while (!desc->mcpair->flag) {
350 RF_WAIT_MCPAIR(desc->mcpair);
352 RF_UNLOCK_MUTEX(desc->mcpair->mutex);
353 rf_FreeDiskQueueData(desc->readreq);
354 rf_FreeDiskQueueData(desc->writereq);
359 /* called at interrupt context when the read has completed. just send out the write */
360 static int
361 rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
363 if (status) { /* invoke the callback with bad status */
364 printf("raid%d: copyback read failed. Aborting.\n",
365 desc->raidPtr->raidid);
366 (desc->writereq->CompleteFunc) (desc, -100);
367 } else {
368 rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
370 return (0);
372 /* called at interrupt context when the write has completed.
373 * at user level & in the kernel, wake up the copyback thread.
374 * in the simulator, invoke the next copyback directly.
375 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
377 static int
378 rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
380 if (status && status != -100) {
381 printf("raid%d: copyback write failed. Aborting.\n",
382 desc->raidPtr->raidid);
384 desc->status = status;
385 rf_MCPairWakeupFunc(desc->mcpair);
386 return (0);
388 /* invoked when the copyback has completed */
389 static void
390 rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
392 RF_Raid_t *raidPtr = desc->raidPtr;
393 struct timeval t, diff;
395 if (!status) {
396 RF_LOCK_MUTEX(raidPtr->mutex);
397 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
398 RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
399 rf_FreeSpareTable(raidPtr);
400 } else {
401 raidPtr->Disks[desc->spCol].status = rf_ds_spare;
403 RF_UNLOCK_MUTEX(raidPtr->mutex);
405 RF_GETTIME(t);
406 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
407 #if 0
408 printf("Copyback time was %d.%06d seconds\n",
409 (int) diff.tv_sec, (int) diff.tv_usec);
410 #endif
411 } else
412 printf("raid%d: Copyback failure. Status: %d\n",
413 raidPtr->raidid, status);
415 RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
416 rf_FreeMCPair(desc->mcpair);
417 RF_Free(desc, sizeof(*desc));
419 rf_copyback_in_progress = 0;
420 rf_ResumeNewRequests(raidPtr);