Expand PMF_FN_* macros.
[netbsd-mini2440.git] / sys / dev / raidframe / rf_disks.c
blob1e1ad05796b1381c19bf69b83cc64abe983b4814
1 /* $NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
35 * Author: Mark Holland
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 * Carnegie Mellon requests users of this software to return to
49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $");
65 #include <dev/raidframe/raidframevar.h>
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 RF_ComponentLabel_t *, int, int );
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
92 /**************************************************************************
94 * initialize the disks comprising the array
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
102 * eventually.
104 **************************************************************************/
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 RF_Config_t *cfgPtr)
110 RF_RaidDisk_t *disks;
111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 RF_RowCol_t c;
113 int bs, ret;
114 unsigned i, count, foundone = 0, numFailuresThisRow;
115 int force;
117 force = cfgPtr->force;
119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 if (ret)
121 goto fail;
123 disks = raidPtr->Disks;
125 numFailuresThisRow = 0;
126 for (c = 0; c < raidPtr->numCol; c++) {
127 ret = rf_ConfigureDisk(raidPtr,
128 &cfgPtr->devnames[0][c][0],
129 &disks[c], c);
131 if (ret)
132 goto fail;
134 if (disks[c].status == rf_ds_optimal) {
135 raidfetch_component_label(raidPtr, c);
138 if (disks[c].status != rf_ds_optimal) {
139 numFailuresThisRow++;
140 } else {
141 if (disks[c].numBlocks < min_numblks)
142 min_numblks = disks[c].numBlocks;
143 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
144 c, disks[c].devname,
145 disks[c].numBlocks,
146 disks[c].blockSize,
147 (long int) disks[c].numBlocks *
148 disks[c].blockSize / 1024 / 1024);
151 /* XXX fix for n-fault tolerant */
152 /* XXX this should probably check to see how many failures
153 we can handle for this configuration! */
154 if (numFailuresThisRow > 0)
155 raidPtr->status = rf_rs_degraded;
157 /* all disks must be the same size & have the same block size, bs must
158 * be a power of 2 */
159 bs = 0;
160 foundone = 0;
161 for (c = 0; c < raidPtr->numCol; c++) {
162 if (disks[c].status == rf_ds_optimal) {
163 bs = disks[c].blockSize;
164 foundone = 1;
165 break;
168 if (!foundone) {
169 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
170 ret = EINVAL;
171 goto fail;
173 for (count = 0, i = 1; i; i <<= 1)
174 if (bs & i)
175 count++;
176 if (count != 1) {
177 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
178 ret = EINVAL;
179 goto fail;
182 if (rf_CheckLabels( raidPtr, cfgPtr )) {
183 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
184 if (force != 0) {
185 printf("raid%d: Fatal errors being ignored.\n",
186 raidPtr->raidid);
187 } else {
188 ret = EINVAL;
189 goto fail;
193 for (c = 0; c < raidPtr->numCol; c++) {
194 if (disks[c].status == rf_ds_optimal) {
195 if (disks[c].blockSize != bs) {
196 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
197 ret = EINVAL;
198 goto fail;
200 if (disks[c].numBlocks != min_numblks) {
201 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
202 c, (int) min_numblks);
203 disks[c].numBlocks = min_numblks;
208 raidPtr->sectorsPerDisk = min_numblks;
209 raidPtr->logBytesPerSector = ffs(bs) - 1;
210 raidPtr->bytesPerSector = bs;
211 raidPtr->sectorMask = bs - 1;
212 return (0);
214 fail:
216 rf_UnconfigureVnodes( raidPtr );
218 return (ret);
222 /****************************************************************************
223 * set up the data structures describing the spare disks in the array
224 * recall from the above comment that the spare disk descriptors are stored
225 * in row zero, which is specially expanded to hold them.
226 ****************************************************************************/
228 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
229 RF_Config_t *cfgPtr)
231 int i, ret;
232 unsigned int bs;
233 RF_RaidDisk_t *disks;
234 int num_spares_done;
236 num_spares_done = 0;
238 /* The space for the spares should have already been allocated by
239 * ConfigureDisks() */
241 disks = &raidPtr->Disks[raidPtr->numCol];
242 for (i = 0; i < raidPtr->numSpare; i++) {
243 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
244 &disks[i], raidPtr->numCol + i);
245 if (ret)
246 goto fail;
247 if (disks[i].status != rf_ds_optimal) {
248 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
249 &cfgPtr->spare_names[i][0]);
250 } else {
251 disks[i].status = rf_ds_spare; /* change status to
252 * spare */
253 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
254 disks[i].devname,
255 disks[i].numBlocks, disks[i].blockSize,
256 (long int) disks[i].numBlocks *
257 disks[i].blockSize / 1024 / 1024);
259 num_spares_done++;
262 /* check sizes and block sizes on spare disks */
263 bs = 1 << raidPtr->logBytesPerSector;
264 for (i = 0; i < raidPtr->numSpare; i++) {
265 if (disks[i].blockSize != bs) {
266 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
267 ret = EINVAL;
268 goto fail;
270 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
271 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
272 disks[i].devname, disks[i].blockSize,
273 raidPtr->sectorsPerDisk);
274 ret = EINVAL;
275 goto fail;
276 } else
277 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
278 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
279 disks[i].devname,
280 raidPtr->sectorsPerDisk,
281 disks[i].numBlocks);
283 disks[i].numBlocks = raidPtr->sectorsPerDisk;
287 return (0);
289 fail:
291 /* Release the hold on the main components. We've failed to allocate
292 * a spare, and since we're failing, we need to free things..
294 XXX failing to allocate a spare is *not* that big of a deal...
295 We *can* survive without it, if need be, esp. if we get hot
296 adding working.
298 If we don't fail out here, then we need a way to remove this spare...
299 that should be easier to do here than if we are "live"...
303 rf_UnconfigureVnodes( raidPtr );
305 return (ret);
308 static int
309 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
311 int ret;
313 /* We allocate RF_MAXSPARE on the first row so that we
314 have room to do hot-swapping of spares */
315 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
316 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
317 raidPtr->cleanupList);
318 if (raidPtr->Disks == NULL) {
319 ret = ENOMEM;
320 goto fail;
323 /* get space for device specific stuff.. */
324 RF_MallocAndAdd(raidPtr->raid_cinfo,
325 (raidPtr->numCol + RF_MAXSPARE) *
326 sizeof(struct raidcinfo), (struct raidcinfo *),
327 raidPtr->cleanupList);
329 if (raidPtr->raid_cinfo == NULL) {
330 ret = ENOMEM;
331 goto fail;
334 return(0);
335 fail:
336 rf_UnconfigureVnodes( raidPtr );
338 return(ret);
342 /* configure a single disk during auto-configuration at boot */
344 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
345 RF_AutoConfig_t *auto_config)
347 RF_RaidDisk_t *disks;
348 RF_RaidDisk_t *diskPtr;
349 RF_RowCol_t c;
350 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
351 int bs, ret;
352 int numFailuresThisRow;
353 RF_AutoConfig_t *ac;
354 int parity_good;
355 int mod_counter;
356 int mod_counter_found;
358 #if DEBUG
359 printf("Starting autoconfiguration of RAID set...\n");
360 #endif
362 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
363 if (ret)
364 goto fail;
366 disks = raidPtr->Disks;
368 /* assume the parity will be fine.. */
369 parity_good = RF_RAID_CLEAN;
371 /* Check for mod_counters that are too low */
372 mod_counter_found = 0;
373 mod_counter = 0;
374 ac = auto_config;
375 while(ac!=NULL) {
376 if (mod_counter_found==0) {
377 mod_counter = ac->clabel->mod_counter;
378 mod_counter_found = 1;
379 } else {
380 if (ac->clabel->mod_counter > mod_counter) {
381 mod_counter = ac->clabel->mod_counter;
384 ac->flag = 0; /* clear the general purpose flag */
385 ac = ac->next;
388 bs = 0;
390 numFailuresThisRow = 0;
391 for (c = 0; c < raidPtr->numCol; c++) {
392 diskPtr = &disks[c];
394 /* find this row/col in the autoconfig */
395 #if DEBUG
396 printf("Looking for %d in autoconfig\n",c);
397 #endif
398 ac = auto_config;
399 while(ac!=NULL) {
400 if (ac->clabel==NULL) {
401 /* big-time bad news. */
402 goto fail;
404 if ((ac->clabel->column == c) &&
405 (ac->clabel->mod_counter == mod_counter)) {
406 /* it's this one... */
407 /* flag it as 'used', so we don't
408 free it later. */
409 ac->flag = 1;
410 #if DEBUG
411 printf("Found: %s at %d\n",
412 ac->devname,c);
413 #endif
415 break;
417 ac=ac->next;
420 if (ac==NULL) {
421 /* we didn't find an exact match with a
422 correct mod_counter above... can we find
423 one with an incorrect mod_counter to use
424 instead? (this one, if we find it, will be
425 marked as failed once the set configures)
428 ac = auto_config;
429 while(ac!=NULL) {
430 if (ac->clabel==NULL) {
431 /* big-time bad news. */
432 goto fail;
434 if (ac->clabel->column == c) {
435 /* it's this one...
436 flag it as 'used', so we
437 don't free it later. */
438 ac->flag = 1;
439 #if DEBUG
440 printf("Found(low mod_counter): %s at %d\n",
441 ac->devname,c);
442 #endif
444 break;
446 ac=ac->next;
452 if (ac!=NULL) {
453 /* Found it. Configure it.. */
454 diskPtr->blockSize = ac->clabel->blockSize;
455 diskPtr->numBlocks = ac->clabel->numBlocks;
456 /* Note: rf_protectedSectors is already
457 factored into numBlocks here */
458 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
459 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
461 memcpy(raidget_component_label(raidPtr, c),
462 ac->clabel, sizeof(*ac->clabel));
463 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
464 "/dev/%s", ac->devname);
466 /* note the fact that this component was
467 autoconfigured. You'll need this info
468 later. Trust me :) */
469 diskPtr->auto_configured = 1;
470 diskPtr->dev = ac->dev;
473 * we allow the user to specify that
474 * only a fraction of the disks should
475 * be used this is just for debug: it
476 * speeds up the parity scan
479 diskPtr->numBlocks = diskPtr->numBlocks *
480 rf_sizePercentage / 100;
482 /* XXX these will get set multiple times,
483 but since we're autoconfiguring, they'd
484 better be always the same each time!
485 If not, this is the least of your worries */
487 bs = diskPtr->blockSize;
488 min_numblks = diskPtr->numBlocks;
490 /* this gets done multiple times, but that's
491 fine -- the serial number will be the same
492 for all components, guaranteed */
493 raidPtr->serial_number = ac->clabel->serial_number;
494 /* check the last time the label was modified */
496 if (ac->clabel->mod_counter != mod_counter) {
497 /* Even though we've filled in all of
498 the above, we don't trust this
499 component since it's modification
500 counter is not in sync with the
501 rest, and we really consider it to
502 be failed. */
503 disks[c].status = rf_ds_failed;
504 numFailuresThisRow++;
505 } else {
506 if (ac->clabel->clean != RF_RAID_CLEAN) {
507 parity_good = RF_RAID_DIRTY;
510 } else {
511 /* Didn't find it at all!! Component must
512 really be dead */
513 disks[c].status = rf_ds_failed;
514 snprintf(disks[c].devname, sizeof(disks[c].devname),
515 "component%d", c);
516 numFailuresThisRow++;
519 /* XXX fix for n-fault tolerant */
520 /* XXX this should probably check to see how many failures
521 we can handle for this configuration! */
522 if (numFailuresThisRow > 0) {
523 raidPtr->status = rf_rs_degraded;
524 raidPtr->numFailures = numFailuresThisRow;
527 /* close the device for the ones that didn't get used */
529 ac = auto_config;
530 while(ac!=NULL) {
531 if (ac->flag == 0) {
532 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
533 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
534 vput(ac->vp);
535 ac->vp = NULL;
536 #if DEBUG
537 printf("Released %s from auto-config set.\n",
538 ac->devname);
539 #endif
541 ac = ac->next;
544 raidPtr->mod_counter = mod_counter;
546 /* note the state of the parity, if any */
547 raidPtr->parity_good = parity_good;
548 raidPtr->sectorsPerDisk = min_numblks;
549 raidPtr->logBytesPerSector = ffs(bs) - 1;
550 raidPtr->bytesPerSector = bs;
551 raidPtr->sectorMask = bs - 1;
552 return (0);
554 fail:
556 rf_UnconfigureVnodes( raidPtr );
558 return (ret);
562 /* configure a single disk in the array */
564 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
565 RF_RowCol_t col)
567 char *p;
568 struct vnode *vp;
569 struct vattr va;
570 int error;
572 p = rf_find_non_white(bf);
573 if (p[strlen(p) - 1] == '\n') {
574 /* strip off the newline */
575 p[strlen(p) - 1] = '\0';
577 (void) strcpy(diskPtr->devname, p);
579 /* Let's start by claiming the component is fine and well... */
580 diskPtr->status = rf_ds_optimal;
582 raidPtr->raid_cinfo[col].ci_vp = NULL;
583 raidPtr->raid_cinfo[col].ci_dev = 0;
585 if (!strcmp("absent", diskPtr->devname)) {
586 printf("Ignoring missing component at column %d\n", col);
587 sprintf(diskPtr->devname, "component%d", col);
588 diskPtr->status = rf_ds_failed;
589 return (0);
592 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
593 if (error) {
594 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
595 if (error == ENXIO) {
596 /* the component isn't there... must be dead :-( */
597 diskPtr->status = rf_ds_failed;
598 } else {
599 return (error);
602 if (diskPtr->status == rf_ds_optimal) {
604 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
605 return (error);
606 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
607 return (error);
609 raidPtr->raid_cinfo[col].ci_vp = vp;
610 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
612 /* This component was not automatically configured */
613 diskPtr->auto_configured = 0;
614 diskPtr->dev = va.va_rdev;
616 /* we allow the user to specify that only a fraction of the
617 * disks should be used this is just for debug: it speeds up
618 * the parity scan */
619 diskPtr->numBlocks = diskPtr->numBlocks *
620 rf_sizePercentage / 100;
622 return (0);
625 static void
626 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
627 RF_ComponentLabel_t *ci_label)
630 printf("raid%d: Component %s being configured at col: %d\n",
631 raidPtr->raidid, dev_name, column );
632 printf(" Column: %d Num Columns: %d\n",
633 ci_label->column,
634 ci_label->num_columns);
635 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
636 ci_label->version, ci_label->serial_number,
637 ci_label->mod_counter);
638 printf(" Clean: %s Status: %d\n",
639 ci_label->clean ? "Yes" : "No", ci_label->status );
642 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
643 char *dev_name, RF_ComponentLabel_t *ci_label,
644 int serial_number, int mod_counter)
646 int fatal_error = 0;
648 if (serial_number != ci_label->serial_number) {
649 printf("%s has a different serial number: %d %d\n",
650 dev_name, serial_number, ci_label->serial_number);
651 fatal_error = 1;
653 if (mod_counter != ci_label->mod_counter) {
654 printf("%s has a different modification count: %d %d\n",
655 dev_name, mod_counter, ci_label->mod_counter);
658 if (row != ci_label->row) {
659 printf("Row out of alignment for: %s\n", dev_name);
660 fatal_error = 1;
662 if (column != ci_label->column) {
663 printf("Column out of alignment for: %s\n", dev_name);
664 fatal_error = 1;
666 if (raidPtr->numCol != ci_label->num_columns) {
667 printf("Number of columns do not match for: %s\n", dev_name);
668 fatal_error = 1;
670 if (ci_label->clean == 0) {
671 /* it's not clean, but that's not fatal */
672 printf("%s is not clean!\n", dev_name);
674 return(fatal_error);
680 rf_CheckLabels() - check all the component labels for consistency.
681 Return an error if there is anything major amiss.
686 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
688 int c;
689 char *dev_name;
690 RF_ComponentLabel_t *ci_label;
691 int serial_number = 0;
692 int mod_number = 0;
693 int fatal_error = 0;
694 int mod_values[4];
695 int mod_count[4];
696 int ser_values[4];
697 int ser_count[4];
698 int num_ser;
699 int num_mod;
700 int i;
701 int found;
702 int hosed_column;
703 int too_fatal;
704 int parity_good;
705 int force;
707 hosed_column = -1;
708 too_fatal = 0;
709 force = cfgPtr->force;
712 We're going to try to be a little intelligent here. If one
713 component's label is bogus, and we can identify that it's the
714 *only* one that's gone, we'll mark it as "failed" and allow
715 the configuration to proceed. This will be the *only* case
716 that we'll proceed if there would be (otherwise) fatal errors.
718 Basically we simply keep a count of how many components had
719 what serial number. If all but one agree, we simply mark
720 the disagreeing component as being failed, and allow
721 things to come up "normally".
723 We do this first for serial numbers, and then for "mod_counter".
727 num_ser = 0;
728 num_mod = 0;
730 for (c = 0; c < raidPtr->numCol; c++) {
731 ci_label = raidget_component_label(raidPtr, c);
732 found=0;
733 for(i=0;i<num_ser;i++) {
734 if (ser_values[i] == ci_label->serial_number) {
735 ser_count[i]++;
736 found=1;
737 break;
740 if (!found) {
741 ser_values[num_ser] = ci_label->serial_number;
742 ser_count[num_ser] = 1;
743 num_ser++;
744 if (num_ser>2) {
745 fatal_error = 1;
746 break;
749 found=0;
750 for(i=0;i<num_mod;i++) {
751 if (mod_values[i] == ci_label->mod_counter) {
752 mod_count[i]++;
753 found=1;
754 break;
757 if (!found) {
758 mod_values[num_mod] = ci_label->mod_counter;
759 mod_count[num_mod] = 1;
760 num_mod++;
761 if (num_mod>2) {
762 fatal_error = 1;
763 break;
767 #if DEBUG
768 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
769 for(i=0;i<num_ser;i++) {
770 printf("%d %d\n", ser_values[i], ser_count[i]);
772 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
773 for(i=0;i<num_mod;i++) {
774 printf("%d %d\n", mod_values[i], mod_count[i]);
776 #endif
777 serial_number = ser_values[0];
778 if (num_ser == 2) {
779 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
780 /* Locate the maverick component */
781 if (ser_count[1] > ser_count[0]) {
782 serial_number = ser_values[1];
785 for (c = 0; c < raidPtr->numCol; c++) {
786 ci_label = raidget_component_label(raidPtr, c);
787 if (serial_number != ci_label->serial_number) {
788 hosed_column = c;
789 break;
792 printf("Hosed component: %s\n",
793 &cfgPtr->devnames[0][hosed_column][0]);
794 if (!force) {
795 /* we'll fail this component, as if there are
796 other major errors, we arn't forcing things
797 and we'll abort the config anyways */
798 raidPtr->Disks[hosed_column].status
799 = rf_ds_failed;
800 raidPtr->numFailures++;
801 raidPtr->status = rf_rs_degraded;
803 } else {
804 too_fatal = 1;
806 if (cfgPtr->parityConfig == '0') {
807 /* We've identified two different serial numbers.
808 RAID 0 can't cope with that, so we'll punt */
809 too_fatal = 1;
814 /* record the serial number for later. If we bail later, setting
815 this doesn't matter, otherwise we've got the best guess at the
816 correct serial number */
817 raidPtr->serial_number = serial_number;
819 mod_number = mod_values[0];
820 if (num_mod == 2) {
821 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
822 /* Locate the maverick component */
823 if (mod_count[1] > mod_count[0]) {
824 mod_number = mod_values[1];
825 } else if (mod_count[1] < mod_count[0]) {
826 mod_number = mod_values[0];
827 } else {
828 /* counts of different modification values
829 are the same. Assume greater value is
830 the correct one, all other things
831 considered */
832 if (mod_values[0] > mod_values[1]) {
833 mod_number = mod_values[0];
834 } else {
835 mod_number = mod_values[1];
840 for (c = 0; c < raidPtr->numCol; c++) {
841 ci_label = raidget_component_label(raidPtr, c);
842 if (mod_number != ci_label->mod_counter) {
843 if (hosed_column == c) {
844 /* same one. Can
845 deal with it. */
846 } else {
847 hosed_column = c;
848 if (num_ser != 1) {
849 too_fatal = 1;
850 break;
855 printf("Hosed component: %s\n",
856 &cfgPtr->devnames[0][hosed_column][0]);
857 if (!force) {
858 /* we'll fail this component, as if there are
859 other major errors, we arn't forcing things
860 and we'll abort the config anyways */
861 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
862 raidPtr->Disks[hosed_column].status
863 = rf_ds_failed;
864 raidPtr->numFailures++;
865 raidPtr->status = rf_rs_degraded;
868 } else {
869 too_fatal = 1;
871 if (cfgPtr->parityConfig == '0') {
872 /* We've identified two different mod counters.
873 RAID 0 can't cope with that, so we'll punt */
874 too_fatal = 1;
878 raidPtr->mod_counter = mod_number;
880 if (too_fatal) {
881 /* we've had both a serial number mismatch, and a mod_counter
882 mismatch -- and they involved two different components!!
883 Bail -- make things fail so that the user must force
884 the issue... */
885 hosed_column = -1;
886 fatal_error = 1;
889 if (num_ser > 2) {
890 printf("raid%d: Too many different serial numbers!\n",
891 raidPtr->raidid);
892 fatal_error = 1;
895 if (num_mod > 2) {
896 printf("raid%d: Too many different mod counters!\n",
897 raidPtr->raidid);
898 fatal_error = 1;
901 /* we start by assuming the parity will be good, and flee from
902 that notion at the slightest sign of trouble */
904 parity_good = RF_RAID_CLEAN;
906 for (c = 0; c < raidPtr->numCol; c++) {
907 dev_name = &cfgPtr->devnames[0][c][0];
908 ci_label = raidget_component_label(raidPtr, c);
910 if (c == hosed_column) {
911 printf("raid%d: Ignoring %s\n",
912 raidPtr->raidid, dev_name);
913 } else {
914 rf_print_label_status( raidPtr, c, dev_name, ci_label);
915 if (rf_check_label_vitals( raidPtr, 0, c,
916 dev_name, ci_label,
917 serial_number,
918 mod_number )) {
919 fatal_error = 1;
921 if (ci_label->clean != RF_RAID_CLEAN) {
922 parity_good = RF_RAID_DIRTY;
927 if (fatal_error) {
928 parity_good = RF_RAID_DIRTY;
931 /* we note the state of the parity */
932 raidPtr->parity_good = parity_good;
934 return(fatal_error);
938 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
940 RF_RaidDisk_t *disks;
941 RF_DiskQueue_t *spareQueues;
942 int ret;
943 unsigned int bs;
944 int spare_number;
946 ret=0;
948 if (raidPtr->numSpare >= RF_MAXSPARE) {
949 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
950 return(EINVAL);
953 RF_LOCK_MUTEX(raidPtr->mutex);
954 while (raidPtr->adding_hot_spare==1) {
955 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
956 &(raidPtr->mutex));
958 raidPtr->adding_hot_spare=1;
959 RF_UNLOCK_MUTEX(raidPtr->mutex);
961 /* the beginning of the spares... */
962 disks = &raidPtr->Disks[raidPtr->numCol];
964 spare_number = raidPtr->numSpare;
966 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
967 &disks[spare_number],
968 raidPtr->numCol + spare_number);
970 if (ret)
971 goto fail;
972 if (disks[spare_number].status != rf_ds_optimal) {
973 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
974 sparePtr->component_name);
975 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
976 ret=EINVAL;
977 goto fail;
978 } else {
979 disks[spare_number].status = rf_ds_spare;
980 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
981 spare_number,
982 disks[spare_number].devname,
983 disks[spare_number].numBlocks,
984 disks[spare_number].blockSize,
985 (long int) disks[spare_number].numBlocks *
986 disks[spare_number].blockSize / 1024 / 1024);
990 /* check sizes and block sizes on the spare disk */
991 bs = 1 << raidPtr->logBytesPerSector;
992 if (disks[spare_number].blockSize != bs) {
993 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
994 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
995 ret = EINVAL;
996 goto fail;
998 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
999 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1000 disks[spare_number].devname,
1001 disks[spare_number].blockSize,
1002 raidPtr->sectorsPerDisk);
1003 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1004 ret = EINVAL;
1005 goto fail;
1006 } else {
1007 if (disks[spare_number].numBlocks >
1008 raidPtr->sectorsPerDisk) {
1009 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1010 disks[spare_number].devname,
1011 raidPtr->sectorsPerDisk,
1012 disks[spare_number].numBlocks);
1014 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1018 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1019 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1020 raidPtr->numCol + spare_number,
1021 raidPtr->qType,
1022 raidPtr->sectorsPerDisk,
1023 raidPtr->Disks[raidPtr->numCol +
1024 spare_number].dev,
1025 raidPtr->maxOutstanding,
1026 &raidPtr->shutdownList,
1027 raidPtr->cleanupList);
1029 RF_LOCK_MUTEX(raidPtr->mutex);
1030 raidPtr->numSpare++;
1031 RF_UNLOCK_MUTEX(raidPtr->mutex);
1033 fail:
1034 RF_LOCK_MUTEX(raidPtr->mutex);
1035 raidPtr->adding_hot_spare=0;
1036 wakeup(&(raidPtr->adding_hot_spare));
1037 RF_UNLOCK_MUTEX(raidPtr->mutex);
1039 return(ret);
1043 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1045 int spare_number;
1048 if (raidPtr->numSpare==0) {
1049 printf("No spares to remove!\n");
1050 return(EINVAL);
1053 spare_number = sparePtr->column;
1055 return(EINVAL); /* XXX not implemented yet */
1056 #if 0
1057 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1058 return(EINVAL);
1061 /* verify that this spare isn't in use... */
1066 /* it's gone.. */
1068 raidPtr->numSpare--;
1070 return(0);
1071 #endif
1076 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1078 RF_RaidDisk_t *disks;
1080 if ((component->column < 0) ||
1081 (component->column >= raidPtr->numCol)) {
1082 return(EINVAL);
1085 disks = &raidPtr->Disks[component->column];
1087 /* 1. This component must be marked as 'failed' */
1089 return(EINVAL); /* Not implemented yet. */
1093 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1094 RF_SingleComponent_t *component)
1097 /* Issues here include how to 'move' this in if there is IO
1098 taking place (e.g. component queues and such) */
1100 return(EINVAL); /* Not implemented yet. */