1 /* $NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $ */
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
6 * This code is derived from software contributed to The NetBSD Foundation
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
35 * Author: Mark Holland
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 * Carnegie Mellon requests users of this software to return to
49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $");
65 #include <dev/raidframe/raidframevar.h>
68 #include "rf_alloclist.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
73 #include "rf_netbsd.h"
75 #include <sys/param.h>
76 #include <sys/systm.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
83 static int rf_AllocDiskStructures(RF_Raid_t
*, RF_Config_t
*);
84 static void rf_print_label_status( RF_Raid_t
*, int, char *,
85 RF_ComponentLabel_t
*);
86 static int rf_check_label_vitals( RF_Raid_t
*, int, int, char *,
87 RF_ComponentLabel_t
*, int, int );
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
92 /**************************************************************************
94 * initialize the disks comprising the array
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
104 **************************************************************************/
107 rf_ConfigureDisks(RF_ShutdownList_t
**listp
, RF_Raid_t
*raidPtr
,
110 RF_RaidDisk_t
*disks
;
111 RF_SectorCount_t min_numblks
= (RF_SectorCount_t
) 0x7FFFFFFFFFFFLL
;
114 unsigned i
, count
, foundone
= 0, numFailuresThisRow
;
117 force
= cfgPtr
->force
;
119 ret
= rf_AllocDiskStructures(raidPtr
, cfgPtr
);
123 disks
= raidPtr
->Disks
;
125 numFailuresThisRow
= 0;
126 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
127 ret
= rf_ConfigureDisk(raidPtr
,
128 &cfgPtr
->devnames
[0][c
][0],
134 if (disks
[c
].status
== rf_ds_optimal
) {
135 raidfetch_component_label(raidPtr
, c
);
138 if (disks
[c
].status
!= rf_ds_optimal
) {
139 numFailuresThisRow
++;
141 if (disks
[c
].numBlocks
< min_numblks
)
142 min_numblks
= disks
[c
].numBlocks
;
143 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64
" blockSize %d (%ld MB)\n",
147 (long int) disks
[c
].numBlocks
*
148 disks
[c
].blockSize
/ 1024 / 1024);
151 /* XXX fix for n-fault tolerant */
152 /* XXX this should probably check to see how many failures
153 we can handle for this configuration! */
154 if (numFailuresThisRow
> 0)
155 raidPtr
->status
= rf_rs_degraded
;
157 /* all disks must be the same size & have the same block size, bs must
161 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
162 if (disks
[c
].status
== rf_ds_optimal
) {
163 bs
= disks
[c
].blockSize
;
169 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
173 for (count
= 0, i
= 1; i
; i
<<= 1)
177 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs
);
182 if (rf_CheckLabels( raidPtr
, cfgPtr
)) {
183 printf("raid%d: There were fatal errors\n", raidPtr
->raidid
);
185 printf("raid%d: Fatal errors being ignored.\n",
193 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
194 if (disks
[c
].status
== rf_ds_optimal
) {
195 if (disks
[c
].blockSize
!= bs
) {
196 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c
);
200 if (disks
[c
].numBlocks
!= min_numblks
) {
201 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
202 c
, (int) min_numblks
);
203 disks
[c
].numBlocks
= min_numblks
;
208 raidPtr
->sectorsPerDisk
= min_numblks
;
209 raidPtr
->logBytesPerSector
= ffs(bs
) - 1;
210 raidPtr
->bytesPerSector
= bs
;
211 raidPtr
->sectorMask
= bs
- 1;
216 rf_UnconfigureVnodes( raidPtr
);
222 /****************************************************************************
223 * set up the data structures describing the spare disks in the array
224 * recall from the above comment that the spare disk descriptors are stored
225 * in row zero, which is specially expanded to hold them.
226 ****************************************************************************/
228 rf_ConfigureSpareDisks(RF_ShutdownList_t
**listp
, RF_Raid_t
*raidPtr
,
233 RF_RaidDisk_t
*disks
;
238 /* The space for the spares should have already been allocated by
239 * ConfigureDisks() */
241 disks
= &raidPtr
->Disks
[raidPtr
->numCol
];
242 for (i
= 0; i
< raidPtr
->numSpare
; i
++) {
243 ret
= rf_ConfigureDisk(raidPtr
, &cfgPtr
->spare_names
[i
][0],
244 &disks
[i
], raidPtr
->numCol
+ i
);
247 if (disks
[i
].status
!= rf_ds_optimal
) {
248 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
249 &cfgPtr
->spare_names
[i
][0]);
251 disks
[i
].status
= rf_ds_spare
; /* change status to
253 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64
" blockSize %d (%ld MB)\n", i
,
255 disks
[i
].numBlocks
, disks
[i
].blockSize
,
256 (long int) disks
[i
].numBlocks
*
257 disks
[i
].blockSize
/ 1024 / 1024);
262 /* check sizes and block sizes on spare disks */
263 bs
= 1 << raidPtr
->logBytesPerSector
;
264 for (i
= 0; i
< raidPtr
->numSpare
; i
++) {
265 if (disks
[i
].blockSize
!= bs
) {
266 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks
[i
].blockSize
, disks
[i
].devname
, bs
);
270 if (disks
[i
].numBlocks
< raidPtr
->sectorsPerDisk
) {
271 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64
" blocks)\n",
272 disks
[i
].devname
, disks
[i
].blockSize
,
273 raidPtr
->sectorsPerDisk
);
277 if (disks
[i
].numBlocks
> raidPtr
->sectorsPerDisk
) {
278 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64
" blocks (from %" PRIu64
")\n",
280 raidPtr
->sectorsPerDisk
,
283 disks
[i
].numBlocks
= raidPtr
->sectorsPerDisk
;
291 /* Release the hold on the main components. We've failed to allocate
292 * a spare, and since we're failing, we need to free things..
294 XXX failing to allocate a spare is *not* that big of a deal...
295 We *can* survive without it, if need be, esp. if we get hot
298 If we don't fail out here, then we need a way to remove this spare...
299 that should be easier to do here than if we are "live"...
303 rf_UnconfigureVnodes( raidPtr
);
309 rf_AllocDiskStructures(RF_Raid_t
*raidPtr
, RF_Config_t
*cfgPtr
)
313 /* We allocate RF_MAXSPARE on the first row so that we
314 have room to do hot-swapping of spares */
315 RF_MallocAndAdd(raidPtr
->Disks
, (raidPtr
->numCol
+ RF_MAXSPARE
) *
316 sizeof(RF_RaidDisk_t
), (RF_RaidDisk_t
*),
317 raidPtr
->cleanupList
);
318 if (raidPtr
->Disks
== NULL
) {
323 /* get space for device specific stuff.. */
324 RF_MallocAndAdd(raidPtr
->raid_cinfo
,
325 (raidPtr
->numCol
+ RF_MAXSPARE
) *
326 sizeof(struct raidcinfo
), (struct raidcinfo
*),
327 raidPtr
->cleanupList
);
329 if (raidPtr
->raid_cinfo
== NULL
) {
336 rf_UnconfigureVnodes( raidPtr
);
342 /* configure a single disk during auto-configuration at boot */
344 rf_AutoConfigureDisks(RF_Raid_t
*raidPtr
, RF_Config_t
*cfgPtr
,
345 RF_AutoConfig_t
*auto_config
)
347 RF_RaidDisk_t
*disks
;
348 RF_RaidDisk_t
*diskPtr
;
350 RF_SectorCount_t min_numblks
= (RF_SectorCount_t
) 0x7FFFFFFFFFFFLL
;
352 int numFailuresThisRow
;
356 int mod_counter_found
;
359 printf("Starting autoconfiguration of RAID set...\n");
362 ret
= rf_AllocDiskStructures(raidPtr
, cfgPtr
);
366 disks
= raidPtr
->Disks
;
368 /* assume the parity will be fine.. */
369 parity_good
= RF_RAID_CLEAN
;
371 /* Check for mod_counters that are too low */
372 mod_counter_found
= 0;
376 if (mod_counter_found
==0) {
377 mod_counter
= ac
->clabel
->mod_counter
;
378 mod_counter_found
= 1;
380 if (ac
->clabel
->mod_counter
> mod_counter
) {
381 mod_counter
= ac
->clabel
->mod_counter
;
384 ac
->flag
= 0; /* clear the general purpose flag */
390 numFailuresThisRow
= 0;
391 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
394 /* find this row/col in the autoconfig */
396 printf("Looking for %d in autoconfig\n",c
);
400 if (ac
->clabel
==NULL
) {
401 /* big-time bad news. */
404 if ((ac
->clabel
->column
== c
) &&
405 (ac
->clabel
->mod_counter
== mod_counter
)) {
406 /* it's this one... */
407 /* flag it as 'used', so we don't
411 printf("Found: %s at %d\n",
421 /* we didn't find an exact match with a
422 correct mod_counter above... can we find
423 one with an incorrect mod_counter to use
424 instead? (this one, if we find it, will be
425 marked as failed once the set configures)
430 if (ac
->clabel
==NULL
) {
431 /* big-time bad news. */
434 if (ac
->clabel
->column
== c
) {
436 flag it as 'used', so we
437 don't free it later. */
440 printf("Found(low mod_counter): %s at %d\n",
453 /* Found it. Configure it.. */
454 diskPtr
->blockSize
= ac
->clabel
->blockSize
;
455 diskPtr
->numBlocks
= ac
->clabel
->numBlocks
;
456 /* Note: rf_protectedSectors is already
457 factored into numBlocks here */
458 raidPtr
->raid_cinfo
[c
].ci_vp
= ac
->vp
;
459 raidPtr
->raid_cinfo
[c
].ci_dev
= ac
->dev
;
461 memcpy(raidget_component_label(raidPtr
, c
),
462 ac
->clabel
, sizeof(*ac
->clabel
));
463 snprintf(diskPtr
->devname
, sizeof(diskPtr
->devname
),
464 "/dev/%s", ac
->devname
);
466 /* note the fact that this component was
467 autoconfigured. You'll need this info
468 later. Trust me :) */
469 diskPtr
->auto_configured
= 1;
470 diskPtr
->dev
= ac
->dev
;
473 * we allow the user to specify that
474 * only a fraction of the disks should
475 * be used this is just for debug: it
476 * speeds up the parity scan
479 diskPtr
->numBlocks
= diskPtr
->numBlocks
*
480 rf_sizePercentage
/ 100;
482 /* XXX these will get set multiple times,
483 but since we're autoconfiguring, they'd
484 better be always the same each time!
485 If not, this is the least of your worries */
487 bs
= diskPtr
->blockSize
;
488 min_numblks
= diskPtr
->numBlocks
;
490 /* this gets done multiple times, but that's
491 fine -- the serial number will be the same
492 for all components, guaranteed */
493 raidPtr
->serial_number
= ac
->clabel
->serial_number
;
494 /* check the last time the label was modified */
496 if (ac
->clabel
->mod_counter
!= mod_counter
) {
497 /* Even though we've filled in all of
498 the above, we don't trust this
499 component since it's modification
500 counter is not in sync with the
501 rest, and we really consider it to
503 disks
[c
].status
= rf_ds_failed
;
504 numFailuresThisRow
++;
506 if (ac
->clabel
->clean
!= RF_RAID_CLEAN
) {
507 parity_good
= RF_RAID_DIRTY
;
511 /* Didn't find it at all!! Component must
513 disks
[c
].status
= rf_ds_failed
;
514 snprintf(disks
[c
].devname
, sizeof(disks
[c
].devname
),
516 numFailuresThisRow
++;
519 /* XXX fix for n-fault tolerant */
520 /* XXX this should probably check to see how many failures
521 we can handle for this configuration! */
522 if (numFailuresThisRow
> 0) {
523 raidPtr
->status
= rf_rs_degraded
;
524 raidPtr
->numFailures
= numFailuresThisRow
;
527 /* close the device for the ones that didn't get used */
532 vn_lock(ac
->vp
, LK_EXCLUSIVE
| LK_RETRY
);
533 VOP_CLOSE(ac
->vp
, FREAD
| FWRITE
, NOCRED
);
537 printf("Released %s from auto-config set.\n",
544 raidPtr
->mod_counter
= mod_counter
;
546 /* note the state of the parity, if any */
547 raidPtr
->parity_good
= parity_good
;
548 raidPtr
->sectorsPerDisk
= min_numblks
;
549 raidPtr
->logBytesPerSector
= ffs(bs
) - 1;
550 raidPtr
->bytesPerSector
= bs
;
551 raidPtr
->sectorMask
= bs
- 1;
556 rf_UnconfigureVnodes( raidPtr
);
562 /* configure a single disk in the array */
564 rf_ConfigureDisk(RF_Raid_t
*raidPtr
, char *bf
, RF_RaidDisk_t
*diskPtr
,
572 p
= rf_find_non_white(bf
);
573 if (p
[strlen(p
) - 1] == '\n') {
574 /* strip off the newline */
575 p
[strlen(p
) - 1] = '\0';
577 (void) strcpy(diskPtr
->devname
, p
);
579 /* Let's start by claiming the component is fine and well... */
580 diskPtr
->status
= rf_ds_optimal
;
582 raidPtr
->raid_cinfo
[col
].ci_vp
= NULL
;
583 raidPtr
->raid_cinfo
[col
].ci_dev
= 0;
585 if (!strcmp("absent", diskPtr
->devname
)) {
586 printf("Ignoring missing component at column %d\n", col
);
587 sprintf(diskPtr
->devname
, "component%d", col
);
588 diskPtr
->status
= rf_ds_failed
;
592 error
= dk_lookup(diskPtr
->devname
, curlwp
, &vp
, UIO_SYSSPACE
);
594 printf("dk_lookup on device: %s failed!\n", diskPtr
->devname
);
595 if (error
== ENXIO
) {
596 /* the component isn't there... must be dead :-( */
597 diskPtr
->status
= rf_ds_failed
;
602 if (diskPtr
->status
== rf_ds_optimal
) {
604 if ((error
= VOP_GETATTR(vp
, &va
, curlwp
->l_cred
)) != 0)
606 if ((error
= rf_getdisksize(vp
, curlwp
, diskPtr
)) != 0)
609 raidPtr
->raid_cinfo
[col
].ci_vp
= vp
;
610 raidPtr
->raid_cinfo
[col
].ci_dev
= va
.va_rdev
;
612 /* This component was not automatically configured */
613 diskPtr
->auto_configured
= 0;
614 diskPtr
->dev
= va
.va_rdev
;
616 /* we allow the user to specify that only a fraction of the
617 * disks should be used this is just for debug: it speeds up
619 diskPtr
->numBlocks
= diskPtr
->numBlocks
*
620 rf_sizePercentage
/ 100;
626 rf_print_label_status(RF_Raid_t
*raidPtr
, int column
, char *dev_name
,
627 RF_ComponentLabel_t
*ci_label
)
630 printf("raid%d: Component %s being configured at col: %d\n",
631 raidPtr
->raidid
, dev_name
, column
);
632 printf(" Column: %d Num Columns: %d\n",
634 ci_label
->num_columns
);
635 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
636 ci_label
->version
, ci_label
->serial_number
,
637 ci_label
->mod_counter
);
638 printf(" Clean: %s Status: %d\n",
639 ci_label
->clean
? "Yes" : "No", ci_label
->status
);
642 static int rf_check_label_vitals(RF_Raid_t
*raidPtr
, int row
, int column
,
643 char *dev_name
, RF_ComponentLabel_t
*ci_label
,
644 int serial_number
, int mod_counter
)
648 if (serial_number
!= ci_label
->serial_number
) {
649 printf("%s has a different serial number: %d %d\n",
650 dev_name
, serial_number
, ci_label
->serial_number
);
653 if (mod_counter
!= ci_label
->mod_counter
) {
654 printf("%s has a different modification count: %d %d\n",
655 dev_name
, mod_counter
, ci_label
->mod_counter
);
658 if (row
!= ci_label
->row
) {
659 printf("Row out of alignment for: %s\n", dev_name
);
662 if (column
!= ci_label
->column
) {
663 printf("Column out of alignment for: %s\n", dev_name
);
666 if (raidPtr
->numCol
!= ci_label
->num_columns
) {
667 printf("Number of columns do not match for: %s\n", dev_name
);
670 if (ci_label
->clean
== 0) {
671 /* it's not clean, but that's not fatal */
672 printf("%s is not clean!\n", dev_name
);
680 rf_CheckLabels() - check all the component labels for consistency.
681 Return an error if there is anything major amiss.
686 rf_CheckLabels(RF_Raid_t
*raidPtr
, RF_Config_t
*cfgPtr
)
690 RF_ComponentLabel_t
*ci_label
;
691 int serial_number
= 0;
709 force
= cfgPtr
->force
;
712 We're going to try to be a little intelligent here. If one
713 component's label is bogus, and we can identify that it's the
714 *only* one that's gone, we'll mark it as "failed" and allow
715 the configuration to proceed. This will be the *only* case
716 that we'll proceed if there would be (otherwise) fatal errors.
718 Basically we simply keep a count of how many components had
719 what serial number. If all but one agree, we simply mark
720 the disagreeing component as being failed, and allow
721 things to come up "normally".
723 We do this first for serial numbers, and then for "mod_counter".
730 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
731 ci_label
= raidget_component_label(raidPtr
, c
);
733 for(i
=0;i
<num_ser
;i
++) {
734 if (ser_values
[i
] == ci_label
->serial_number
) {
741 ser_values
[num_ser
] = ci_label
->serial_number
;
742 ser_count
[num_ser
] = 1;
750 for(i
=0;i
<num_mod
;i
++) {
751 if (mod_values
[i
] == ci_label
->mod_counter
) {
758 mod_values
[num_mod
] = ci_label
->mod_counter
;
759 mod_count
[num_mod
] = 1;
768 printf("raid%d: Summary of serial numbers:\n", raidPtr
->raidid
);
769 for(i
=0;i
<num_ser
;i
++) {
770 printf("%d %d\n", ser_values
[i
], ser_count
[i
]);
772 printf("raid%d: Summary of mod counters:\n", raidPtr
->raidid
);
773 for(i
=0;i
<num_mod
;i
++) {
774 printf("%d %d\n", mod_values
[i
], mod_count
[i
]);
777 serial_number
= ser_values
[0];
779 if ((ser_count
[0] == 1) || (ser_count
[1] == 1)) {
780 /* Locate the maverick component */
781 if (ser_count
[1] > ser_count
[0]) {
782 serial_number
= ser_values
[1];
785 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
786 ci_label
= raidget_component_label(raidPtr
, c
);
787 if (serial_number
!= ci_label
->serial_number
) {
792 printf("Hosed component: %s\n",
793 &cfgPtr
->devnames
[0][hosed_column
][0]);
795 /* we'll fail this component, as if there are
796 other major errors, we arn't forcing things
797 and we'll abort the config anyways */
798 raidPtr
->Disks
[hosed_column
].status
800 raidPtr
->numFailures
++;
801 raidPtr
->status
= rf_rs_degraded
;
806 if (cfgPtr
->parityConfig
== '0') {
807 /* We've identified two different serial numbers.
808 RAID 0 can't cope with that, so we'll punt */
814 /* record the serial number for later. If we bail later, setting
815 this doesn't matter, otherwise we've got the best guess at the
816 correct serial number */
817 raidPtr
->serial_number
= serial_number
;
819 mod_number
= mod_values
[0];
821 if ((mod_count
[0] == 1) || (mod_count
[1] == 1)) {
822 /* Locate the maverick component */
823 if (mod_count
[1] > mod_count
[0]) {
824 mod_number
= mod_values
[1];
825 } else if (mod_count
[1] < mod_count
[0]) {
826 mod_number
= mod_values
[0];
828 /* counts of different modification values
829 are the same. Assume greater value is
830 the correct one, all other things
832 if (mod_values
[0] > mod_values
[1]) {
833 mod_number
= mod_values
[0];
835 mod_number
= mod_values
[1];
840 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
841 ci_label
= raidget_component_label(raidPtr
, c
);
842 if (mod_number
!= ci_label
->mod_counter
) {
843 if (hosed_column
== c
) {
855 printf("Hosed component: %s\n",
856 &cfgPtr
->devnames
[0][hosed_column
][0]);
858 /* we'll fail this component, as if there are
859 other major errors, we arn't forcing things
860 and we'll abort the config anyways */
861 if (raidPtr
->Disks
[hosed_column
].status
!= rf_ds_failed
) {
862 raidPtr
->Disks
[hosed_column
].status
864 raidPtr
->numFailures
++;
865 raidPtr
->status
= rf_rs_degraded
;
871 if (cfgPtr
->parityConfig
== '0') {
872 /* We've identified two different mod counters.
873 RAID 0 can't cope with that, so we'll punt */
878 raidPtr
->mod_counter
= mod_number
;
881 /* we've had both a serial number mismatch, and a mod_counter
882 mismatch -- and they involved two different components!!
883 Bail -- make things fail so that the user must force
890 printf("raid%d: Too many different serial numbers!\n",
896 printf("raid%d: Too many different mod counters!\n",
901 /* we start by assuming the parity will be good, and flee from
902 that notion at the slightest sign of trouble */
904 parity_good
= RF_RAID_CLEAN
;
906 for (c
= 0; c
< raidPtr
->numCol
; c
++) {
907 dev_name
= &cfgPtr
->devnames
[0][c
][0];
908 ci_label
= raidget_component_label(raidPtr
, c
);
910 if (c
== hosed_column
) {
911 printf("raid%d: Ignoring %s\n",
912 raidPtr
->raidid
, dev_name
);
914 rf_print_label_status( raidPtr
, c
, dev_name
, ci_label
);
915 if (rf_check_label_vitals( raidPtr
, 0, c
,
921 if (ci_label
->clean
!= RF_RAID_CLEAN
) {
922 parity_good
= RF_RAID_DIRTY
;
928 parity_good
= RF_RAID_DIRTY
;
931 /* we note the state of the parity */
932 raidPtr
->parity_good
= parity_good
;
938 rf_add_hot_spare(RF_Raid_t
*raidPtr
, RF_SingleComponent_t
*sparePtr
)
940 RF_RaidDisk_t
*disks
;
941 RF_DiskQueue_t
*spareQueues
;
948 if (raidPtr
->numSpare
>= RF_MAXSPARE
) {
949 RF_ERRORMSG1("Too many spares: %d\n", raidPtr
->numSpare
);
953 RF_LOCK_MUTEX(raidPtr
->mutex
);
954 while (raidPtr
->adding_hot_spare
==1) {
955 ltsleep(&(raidPtr
->adding_hot_spare
), PRIBIO
, "raidhs", 0,
958 raidPtr
->adding_hot_spare
=1;
959 RF_UNLOCK_MUTEX(raidPtr
->mutex
);
961 /* the beginning of the spares... */
962 disks
= &raidPtr
->Disks
[raidPtr
->numCol
];
964 spare_number
= raidPtr
->numSpare
;
966 ret
= rf_ConfigureDisk(raidPtr
, sparePtr
->component_name
,
967 &disks
[spare_number
],
968 raidPtr
->numCol
+ spare_number
);
972 if (disks
[spare_number
].status
!= rf_ds_optimal
) {
973 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
974 sparePtr
->component_name
);
975 rf_close_component(raidPtr
, raidPtr
->raid_cinfo
[raidPtr
->numCol
+spare_number
].ci_vp
, 0);
979 disks
[spare_number
].status
= rf_ds_spare
;
980 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64
" blockSize %d (%ld MB)\n",
982 disks
[spare_number
].devname
,
983 disks
[spare_number
].numBlocks
,
984 disks
[spare_number
].blockSize
,
985 (long int) disks
[spare_number
].numBlocks
*
986 disks
[spare_number
].blockSize
/ 1024 / 1024);
990 /* check sizes and block sizes on the spare disk */
991 bs
= 1 << raidPtr
->logBytesPerSector
;
992 if (disks
[spare_number
].blockSize
!= bs
) {
993 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks
[spare_number
].blockSize
, disks
[spare_number
].devname
, bs
);
994 rf_close_component(raidPtr
, raidPtr
->raid_cinfo
[raidPtr
->numCol
+spare_number
].ci_vp
, 0);
998 if (disks
[spare_number
].numBlocks
< raidPtr
->sectorsPerDisk
) {
999 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64
" blocks)\n",
1000 disks
[spare_number
].devname
,
1001 disks
[spare_number
].blockSize
,
1002 raidPtr
->sectorsPerDisk
);
1003 rf_close_component(raidPtr
, raidPtr
->raid_cinfo
[raidPtr
->numCol
+spare_number
].ci_vp
, 0);
1007 if (disks
[spare_number
].numBlocks
>
1008 raidPtr
->sectorsPerDisk
) {
1009 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64
" blocks (from %" PRIu64
")\n",
1010 disks
[spare_number
].devname
,
1011 raidPtr
->sectorsPerDisk
,
1012 disks
[spare_number
].numBlocks
);
1014 disks
[spare_number
].numBlocks
= raidPtr
->sectorsPerDisk
;
1018 spareQueues
= &raidPtr
->Queues
[raidPtr
->numCol
];
1019 ret
= rf_ConfigureDiskQueue( raidPtr
, &spareQueues
[spare_number
],
1020 raidPtr
->numCol
+ spare_number
,
1022 raidPtr
->sectorsPerDisk
,
1023 raidPtr
->Disks
[raidPtr
->numCol
+
1025 raidPtr
->maxOutstanding
,
1026 &raidPtr
->shutdownList
,
1027 raidPtr
->cleanupList
);
1029 RF_LOCK_MUTEX(raidPtr
->mutex
);
1030 raidPtr
->numSpare
++;
1031 RF_UNLOCK_MUTEX(raidPtr
->mutex
);
1034 RF_LOCK_MUTEX(raidPtr
->mutex
);
1035 raidPtr
->adding_hot_spare
=0;
1036 wakeup(&(raidPtr
->adding_hot_spare
));
1037 RF_UNLOCK_MUTEX(raidPtr
->mutex
);
1043 rf_remove_hot_spare(RF_Raid_t
*raidPtr
, RF_SingleComponent_t
*sparePtr
)
1048 if (raidPtr
->numSpare
==0) {
1049 printf("No spares to remove!\n");
1053 spare_number
= sparePtr
->column
;
1055 return(EINVAL
); /* XXX not implemented yet */
1057 if (spare_number
< 0 || spare_number
> raidPtr
->numSpare
) {
1061 /* verify that this spare isn't in use... */
1068 raidPtr
->numSpare
--;
1076 rf_delete_component(RF_Raid_t
*raidPtr
, RF_SingleComponent_t
*component
)
1078 RF_RaidDisk_t
*disks
;
1080 if ((component
->column
< 0) ||
1081 (component
->column
>= raidPtr
->numCol
)) {
1085 disks
= &raidPtr
->Disks
[component
->column
];
1087 /* 1. This component must be marked as 'failed' */
1089 return(EINVAL
); /* Not implemented yet. */
1093 rf_incorporate_hot_spare(RF_Raid_t
*raidPtr
,
1094 RF_SingleComponent_t
*component
)
1097 /* Issues here include how to 'move' this in if there is IO
1098 taking place (e.g. component queues and such) */
1100 return(EINVAL
); /* Not implemented yet. */