1 /* $NetBSD: rf_copyback.c,v 1.41 2008/01/26 20:44:37 oster Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /*****************************************************************************
31 * copyback.c -- code to copy reconstructed data back from spare space to
34 * the code operates using callbacks on the I/Os to continue with the
35 * next unit to be copied back. We do this because a simple loop
36 * containing blocking I/Os will not work in the simulator.
38 ****************************************************************************/
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.41 2008/01/26 20:44:37 oster Exp $");
43 #include <dev/raidframe/raidframevar.h>
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
59 #define RF_COPYBACK_DATA 0
60 #define RF_COPYBACK_PARITY 1
62 int rf_copyback_in_progress
;
64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t
* desc
, int status
);
65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t
* desc
, int status
);
66 static void rf_CopybackOne(RF_CopybackDesc_t
* desc
, int typ
,
67 RF_RaidAddr_t addr
, RF_RowCol_t testCol
,
68 RF_SectorNum_t testOffs
);
69 static void rf_CopybackComplete(RF_CopybackDesc_t
* desc
, int status
);
72 rf_ConfigureCopyback(RF_ShutdownList_t
**listp
)
74 rf_copyback_in_progress
= 0;
78 #include <sys/param.h>
79 #include <sys/systm.h>
81 #include <sys/ioctl.h>
82 #include <sys/fcntl.h>
83 #include <sys/vnode.h>
85 /* do a complete copyback */
87 rf_CopybackReconstructedData(RF_Raid_t
*raidPtr
)
89 RF_ComponentLabel_t
*c_label
;
91 RF_CopybackDesc_t
*desc
;
93 RF_RaidDisk_t
*badDisk
;
103 for (fcol
= 0; fcol
< raidPtr
->numCol
; fcol
++) {
104 if (raidPtr
->Disks
[fcol
].status
== rf_ds_dist_spared
105 || raidPtr
->Disks
[fcol
].status
== rf_ds_spared
) {
112 printf("raid%d: no disks need copyback\n", raidPtr
->raidid
);
116 badDisk
= &raidPtr
->Disks
[fcol
];
118 /* This device may have been opened successfully the first time. Close
119 * it before trying to open it again.. */
121 if (raidPtr
->raid_cinfo
[fcol
].ci_vp
!= NULL
) {
122 printf("Closed the open device: %s\n",
123 raidPtr
->Disks
[fcol
].devname
);
124 vp
= raidPtr
->raid_cinfo
[fcol
].ci_vp
;
125 ac
= raidPtr
->Disks
[fcol
].auto_configured
;
126 rf_close_component(raidPtr
, vp
, ac
);
127 raidPtr
->raid_cinfo
[fcol
].ci_vp
= NULL
;
130 /* note that this disk was *not* auto_configured (any longer) */
131 raidPtr
->Disks
[fcol
].auto_configured
= 0;
133 printf("About to (re-)open the device: %s\n",
134 raidPtr
->Disks
[fcol
].devname
);
136 retcode
= dk_lookup(raidPtr
->Disks
[fcol
].devname
, curlwp
, &vp
,
140 printf("raid%d: copyback: dk_lookup on device: %s failed: %d!\n",
141 raidPtr
->raidid
, raidPtr
->Disks
[fcol
].devname
,
144 /* XXX the component isn't responding properly... must be
150 /* Ok, so we can at least do a lookup... How about actually
151 * getting a vp for it? */
153 if ((retcode
= VOP_GETATTR(vp
, &va
, curlwp
->l_cred
)) != 0)
155 retcode
= rf_getdisksize(vp
, curlwp
, &raidPtr
->Disks
[fcol
]);
160 raidPtr
->raid_cinfo
[fcol
].ci_vp
= vp
;
161 raidPtr
->raid_cinfo
[fcol
].ci_dev
= va
.va_rdev
;
163 raidPtr
->Disks
[fcol
].dev
= va
.va_rdev
; /* XXX or the above? */
165 /* we allow the user to specify that only a fraction of the
166 * disks should be used this is just for debug: it speeds up
168 raidPtr
->Disks
[fcol
].numBlocks
=
169 raidPtr
->Disks
[fcol
].numBlocks
*
170 rf_sizePercentage
/ 100;
174 printf("raid%d: copyback: target disk failed TUR\n",
178 /* get a buffer to hold one SU */
179 RF_Malloc(databuf
, rf_RaidAddressToByte(raidPtr
, raidPtr
->Layout
.sectorsPerStripeUnit
), (char *));
181 /* create a descriptor */
182 RF_Malloc(desc
, sizeof(*desc
), (RF_CopybackDesc_t
*));
183 desc
->raidPtr
= raidPtr
;
186 desc
->spCol
= badDisk
->spareCol
;
187 desc
->stripeAddr
= 0;
188 desc
->sectPerSU
= raidPtr
->Layout
.sectorsPerStripeUnit
;
189 desc
->sectPerStripe
= raidPtr
->Layout
.sectorsPerStripeUnit
* raidPtr
->Layout
.numDataCol
;
190 desc
->databuf
= databuf
;
191 desc
->mcpair
= rf_AllocMCPair();
193 /* quiesce the array, since we don't want to code support for user
195 rf_SuspendNewRequestsAndWait(raidPtr
);
197 /* adjust state of the array and of the disks */
198 RF_LOCK_MUTEX(raidPtr
->mutex
);
199 raidPtr
->Disks
[desc
->fcol
].status
= rf_ds_optimal
;
200 raidPtr
->status
= rf_rs_optimal
;
201 rf_copyback_in_progress
= 1; /* debug only */
202 RF_UNLOCK_MUTEX(raidPtr
->mutex
);
204 RF_GETTIME(desc
->starttime
);
205 rf_ContinueCopyback(desc
);
207 /* Data has been restored. Fix up the component label. */
208 /* Don't actually need the read here.. */
210 c_label
= raidget_component_label(raidPtr
, fcol
);
211 raid_init_component_label(raidPtr
, c_label
);
214 c_label
->column
= fcol
;
215 c_label
->partitionSize
= raidPtr
->Disks
[fcol
].partitionSize
;
217 raidflush_component_label(raidPtr
, fcol
);
219 /* XXXjld why is this here? */
220 rf_update_component_labels(raidPtr
, RF_NORMAL_COMPONENT_UPDATE
);
225 * invoked via callback after a copyback I/O has completed to
226 * continue on with the next one
229 rf_ContinueCopyback(RF_CopybackDesc_t
*desc
)
231 RF_SectorNum_t testOffs
, stripeAddr
;
232 RF_Raid_t
*raidPtr
= desc
->raidPtr
;
236 int old_pctg
, new_pctg
;
237 struct timeval t
, diff
;
245 stripeAddr
= desc
->stripeAddr
;
246 desc
->raidPtr
->copyback_stripes_done
= stripeAddr
247 / desc
->sectPerStripe
;
249 if (rf_prReconSched
) {
250 old_pctg
= 100 * desc
->stripeAddr
/ raidPtr
->totalSectors
;
253 desc
->stripeAddr
+= desc
->sectPerStripe
;
255 if (rf_prReconSched
) {
256 new_pctg
= 100 * desc
->stripeAddr
/ raidPtr
->totalSectors
;
257 if (new_pctg
!= old_pctg
) {
259 RF_TIMEVAL_DIFF(&desc
->starttime
, &t
, &diff
);
260 printf("%d %d.%06d\n", new_pctg
, (int) diff
.tv_sec
, (int) diff
.tv_usec
);
264 if (stripeAddr
>= raidPtr
->totalSectors
) {
265 rf_CopybackComplete(desc
, 0);
268 /* walk through the current stripe, su-by-su */
269 for (done
= 0, addr
= stripeAddr
; addr
< stripeAddr
+ desc
->sectPerStripe
; addr
+= desc
->sectPerSU
) {
271 /* map the SU, disallowing remap to spare space */
272 (raidPtr
->Layout
.map
->MapSector
) (raidPtr
, addr
, &testCol
, &testOffs
, RF_DONT_REMAP
);
274 if (testCol
== desc
->fcol
) {
275 rf_CopybackOne(desc
, RF_COPYBACK_DATA
, addr
, testCol
, testOffs
);
282 /* we didn't find the failed disk in the data part.
285 /* map the parity for this stripe, disallowing remap
287 (raidPtr
->Layout
.map
->MapParity
) (raidPtr
, stripeAddr
, &testCol
, &testOffs
, RF_DONT_REMAP
);
289 if (testCol
== desc
->fcol
) {
290 rf_CopybackOne(desc
, RF_COPYBACK_PARITY
, stripeAddr
, testCol
, testOffs
);
293 /* check to see if the last read/write pair failed */
295 rf_CopybackComplete(desc
, 1);
298 /* we didn't find any units to copy back in this stripe.
299 * Continue with the next one */
304 /* copyback one unit */
306 rf_CopybackOne(RF_CopybackDesc_t
*desc
, int typ
, RF_RaidAddr_t addr
,
307 RF_RowCol_t testCol
, RF_SectorNum_t testOffs
)
309 RF_SectorCount_t sectPerSU
= desc
->sectPerSU
;
310 RF_Raid_t
*raidPtr
= desc
->raidPtr
;
311 RF_RowCol_t spCol
= desc
->spCol
;
312 RF_SectorNum_t spOffs
;
314 /* find the spare spare location for this SU */
315 if (raidPtr
->Layout
.map
->flags
& RF_DISTRIBUTE_SPARE
) {
316 if (typ
== RF_COPYBACK_DATA
)
317 raidPtr
->Layout
.map
->MapSector(raidPtr
, addr
, &spCol
, &spOffs
, RF_REMAP
);
319 raidPtr
->Layout
.map
->MapParity(raidPtr
, addr
, &spCol
, &spOffs
, RF_REMAP
);
324 /* create reqs to read the old location & write the new */
325 desc
->readreq
= rf_CreateDiskQueueData(RF_IO_TYPE_READ
, spOffs
,
326 sectPerSU
, desc
->databuf
, 0L, 0,
327 (int (*) (void *, int)) rf_CopybackReadDoneProc
, desc
,
328 NULL
, (void *) raidPtr
, RF_DISKQUEUE_DATA_FLAGS_NONE
, NULL
,
330 desc
->writereq
= rf_CreateDiskQueueData(RF_IO_TYPE_WRITE
, testOffs
,
331 sectPerSU
, desc
->databuf
, 0L, 0,
332 (int (*) (void *, int)) rf_CopybackWriteDoneProc
, desc
,
333 NULL
, (void *) raidPtr
, RF_DISKQUEUE_DATA_FLAGS_NONE
, NULL
,
335 desc
->fcol
= testCol
;
337 /* enqueue the read. the write will go out as part of the callback on
338 * the read. at user-level & in the kernel, wait for the read-write
339 * pair to complete. in the simulator, just return, since everything
340 * will happen as callbacks */
342 RF_LOCK_MUTEX(desc
->mcpair
->mutex
);
343 desc
->mcpair
->flag
= 0;
344 RF_UNLOCK_MUTEX(desc
->mcpair
->mutex
);
346 rf_DiskIOEnqueue(&raidPtr
->Queues
[spCol
], desc
->readreq
, RF_IO_NORMAL_PRIORITY
);
348 RF_LOCK_MUTEX(desc
->mcpair
->mutex
);
349 while (!desc
->mcpair
->flag
) {
350 RF_WAIT_MCPAIR(desc
->mcpair
);
352 RF_UNLOCK_MUTEX(desc
->mcpair
->mutex
);
353 rf_FreeDiskQueueData(desc
->readreq
);
354 rf_FreeDiskQueueData(desc
->writereq
);
359 /* called at interrupt context when the read has completed. just send out the write */
361 rf_CopybackReadDoneProc(RF_CopybackDesc_t
*desc
, int status
)
363 if (status
) { /* invoke the callback with bad status */
364 printf("raid%d: copyback read failed. Aborting.\n",
365 desc
->raidPtr
->raidid
);
366 (desc
->writereq
->CompleteFunc
) (desc
, -100);
368 rf_DiskIOEnqueue(&(desc
->raidPtr
->Queues
[desc
->fcol
]), desc
->writereq
, RF_IO_NORMAL_PRIORITY
);
372 /* called at interrupt context when the write has completed.
373 * at user level & in the kernel, wake up the copyback thread.
374 * in the simulator, invoke the next copyback directly.
375 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
378 rf_CopybackWriteDoneProc(RF_CopybackDesc_t
*desc
, int status
)
380 if (status
&& status
!= -100) {
381 printf("raid%d: copyback write failed. Aborting.\n",
382 desc
->raidPtr
->raidid
);
384 desc
->status
= status
;
385 rf_MCPairWakeupFunc(desc
->mcpair
);
388 /* invoked when the copyback has completed */
390 rf_CopybackComplete(RF_CopybackDesc_t
*desc
, int status
)
392 RF_Raid_t
*raidPtr
= desc
->raidPtr
;
393 struct timeval t
, diff
;
396 RF_LOCK_MUTEX(raidPtr
->mutex
);
397 if (raidPtr
->Layout
.map
->flags
& RF_DISTRIBUTE_SPARE
) {
398 RF_ASSERT(raidPtr
->Layout
.map
->parityConfig
== 'D');
399 rf_FreeSpareTable(raidPtr
);
401 raidPtr
->Disks
[desc
->spCol
].status
= rf_ds_spare
;
403 RF_UNLOCK_MUTEX(raidPtr
->mutex
);
406 RF_TIMEVAL_DIFF(&desc
->starttime
, &t
, &diff
);
408 printf("Copyback time was %d.%06d seconds\n",
409 (int) diff
.tv_sec
, (int) diff
.tv_usec
);
412 printf("raid%d: Copyback failure. Status: %d\n",
413 raidPtr
->raidid
, status
);
415 RF_Free(desc
->databuf
, rf_RaidAddressToByte(raidPtr
, desc
->sectPerSU
));
416 rf_FreeMCPair(desc
->mcpair
);
417 RF_Free(desc
, sizeof(*desc
));
419 rf_copyback_in_progress
= 0;
420 rf_ResumeNewRequests(raidPtr
);