1 /* $NetBSD: ld_ataraid.c,v 1.34 2009/05/12 12:10:29 cegger Exp $ */
4 * Copyright (c) 2003 Wasabi Systems, Inc.
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
39 * Support for ATA RAID logical disks.
41 * Note that all the RAID happens in software here; the ATA RAID
42 * controllers we're dealing with (Promise, etc.) only support
43 * configuration data on the component disks, with the BIOS supporting
44 * booting from the RAID volumes.
46 * bio(4) support was written by Juan Romero Pardines <xtraeme@gmail.com>.
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.34 2009/05/12 12:10:29 cegger Exp $");
55 #include <sys/param.h>
56 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/device.h>
64 #include <sys/disklabel.h>
65 #include <sys/fcntl.h>
66 #include <sys/malloc.h>
67 #include <sys/vnode.h>
68 #include <sys/kauth.h>
73 #include <dev/ata/atavar.h>
74 #include <dev/ata/atareg.h>
75 #include <dev/ata/wdvar.h>
76 #include <dev/biovar.h>
77 #include <dev/scsipi/scsipiconf.h> /* for scsipi_strvis() */
80 #include <miscfs/specfs/specdev.h>
82 #include <dev/ldvar.h>
84 #include <dev/ata/ata_raidvar.h>
86 struct ld_ataraid_softc
{
87 struct ld_softc sc_ld
;
89 struct ataraid_array_info
*sc_aai
;
90 struct vnode
*sc_vnodes
[ATA_RAID_MAX_DISKS
];
92 void (*sc_iodone
)(struct buf
*);
95 static int ld_ataraid_match(device_t
, cfdata_t
, void *);
96 static void ld_ataraid_attach(device_t
, device_t
, void *);
98 static int ld_ataraid_dump(struct ld_softc
*, void *, int, int);
100 static int ld_ataraid_start_span(struct ld_softc
*, struct buf
*);
102 static int ld_ataraid_start_raid0(struct ld_softc
*, struct buf
*);
103 static void ld_ataraid_iodone_raid0(struct buf
*);
106 static int ld_ataraid_bioctl(device_t
, u_long
, void *);
107 static int ld_ataraid_bioinq(struct ld_ataraid_softc
*, struct bioc_inq
*);
108 static int ld_ataraid_biovol(struct ld_ataraid_softc
*, struct bioc_vol
*);
109 static int ld_ataraid_biodisk(struct ld_ataraid_softc
*,
113 CFATTACH_DECL_NEW(ld_ataraid
, sizeof(struct ld_ataraid_softc
),
114 ld_ataraid_match
, ld_ataraid_attach
, NULL
, NULL
);
116 static int ld_ataraid_initialized
;
117 static struct pool ld_ataraid_cbufpl
;
120 struct buf cb_buf
; /* new I/O buf */
121 struct buf
*cb_obp
; /* ptr. to original I/O buf */
122 struct ld_ataraid_softc
*cb_sc
; /* pointer to ld softc */
123 u_int cb_comp
; /* target component */
124 SIMPLEQ_ENTRY(cbuf
) cb_q
; /* fifo of component buffers */
125 struct cbuf
*cb_other
; /* other cbuf in case of mirror */
127 #define CBUF_IODONE 0x00000001 /* I/O is already successfully done */
130 #define CBUF_GET() pool_get(&ld_ataraid_cbufpl, PR_NOWAIT);
131 #define CBUF_PUT(cbp) pool_put(&ld_ataraid_cbufpl, (cbp))
134 ld_ataraid_match(device_t parent
, cfdata_t match
, void *aux
)
141 ld_ataraid_attach(device_t parent
, device_t self
, void *aux
)
143 struct ld_ataraid_softc
*sc
= device_private(self
);
144 struct ld_softc
*ld
= &sc
->sc_ld
;
145 struct ataraid_array_info
*aai
= aux
;
146 struct ataraid_disk_info
*adi
= NULL
;
154 if (ld_ataraid_initialized
== 0) {
155 ld_ataraid_initialized
= 1;
156 pool_init(&ld_ataraid_cbufpl
, sizeof(struct cbuf
), 0,
157 0, 0, "ldcbuf", NULL
, IPL_BIO
);
160 sc
->sc_aai
= aai
; /* this data persists */
162 ld
->sc_maxxfer
= MAXPHYS
* aai
->aai_width
; /* XXX */
163 ld
->sc_secperunit
= aai
->aai_capacity
;
164 ld
->sc_secsize
= 512; /* XXX */
165 ld
->sc_maxqueuecnt
= 128; /* XXX */
166 ld
->sc_dump
= ld_ataraid_dump
;
168 switch (aai
->aai_level
) {
171 ld
->sc_start
= ld_ataraid_start_span
;
172 sc
->sc_iodone
= ld_ataraid_iodone_raid0
;
177 ld
->sc_start
= ld_ataraid_start_raid0
;
178 sc
->sc_iodone
= ld_ataraid_iodone_raid0
;
183 ld
->sc_start
= ld_ataraid_start_raid0
;
184 sc
->sc_iodone
= ld_ataraid_iodone_raid0
;
187 case AAI_L_RAID0
| AAI_L_RAID1
:
189 ld
->sc_start
= ld_ataraid_start_raid0
;
190 sc
->sc_iodone
= ld_ataraid_iodone_raid0
;
194 snprintf(unklev
, sizeof(unklev
), "<unknown level 0x%x>",
199 aprint_naive(": ATA %s array\n", level
);
200 aprint_normal(": %s ATA %s array\n",
201 ata_raid_type_name(aai
->aai_type
), level
);
203 if (ld
->sc_start
== NULL
) {
204 aprint_error_dev(ld
->sc_dv
, "unsupported array type\n");
209 * We get a geometry from the device; use it.
211 ld
->sc_nheads
= aai
->aai_heads
;
212 ld
->sc_nsectors
= aai
->aai_sectors
;
213 ld
->sc_ncylinders
= aai
->aai_cylinders
;
216 * Configure all the component disks.
218 for (i
= 0; i
< aai
->aai_ndisks
; i
++) {
219 adi
= &aai
->aai_disks
[i
];
220 vp
= ata_raid_disk_vnode_find(adi
);
223 * XXX This is bogus. We should just mark the
224 * XXX component as FAILED, and write-back new
229 sc
->sc_vnodes
[i
] = vp
;
231 if (i
== aai
->aai_ndisks
) {
232 ld
->sc_flags
= LDF_ENABLED
;
236 for (i
= 0; i
< aai
->aai_ndisks
; i
++) {
237 vp
= sc
->sc_vnodes
[i
];
238 sc
->sc_vnodes
[i
] = NULL
;
240 (void) vn_close(vp
, FREAD
|FWRITE
, NOCRED
);
245 if (bio_register(self
, ld_ataraid_bioctl
) != 0)
246 panic("%s: bioctl registration failed\n",
247 device_xname(ld
->sc_dv
));
253 ld_ataraid_make_cbuf(struct ld_ataraid_softc
*sc
, struct buf
*bp
,
254 u_int comp
, daddr_t bn
, void *addr
, long bcount
)
261 buf_init(&cbp
->cb_buf
);
262 cbp
->cb_buf
.b_flags
= bp
->b_flags
;
263 cbp
->cb_buf
.b_oflags
= bp
->b_oflags
;
264 cbp
->cb_buf
.b_cflags
= bp
->b_cflags
;
265 cbp
->cb_buf
.b_iodone
= sc
->sc_iodone
;
266 cbp
->cb_buf
.b_proc
= bp
->b_proc
;
267 cbp
->cb_buf
.b_vp
= sc
->sc_vnodes
[comp
];
268 cbp
->cb_buf
.b_objlock
= &sc
->sc_vnodes
[comp
]->v_interlock
;
269 cbp
->cb_buf
.b_blkno
= bn
+ sc
->sc_aai
->aai_offset
;
270 cbp
->cb_buf
.b_data
= addr
;
271 cbp
->cb_buf
.b_bcount
= bcount
;
273 /* Context for iodone */
277 cbp
->cb_other
= NULL
;
284 ld_ataraid_start_span(struct ld_softc
*ld
, struct buf
*bp
)
286 struct ld_ataraid_softc
*sc
= (void *) ld
;
287 struct ataraid_array_info
*aai
= sc
->sc_aai
;
288 struct ataraid_disk_info
*adi
;
289 SIMPLEQ_HEAD(, cbuf
) cbufq
;
296 /* Allocate component buffers. */
297 SIMPLEQ_INIT(&cbufq
);
300 /* Find the first component. */
302 adi
= &aai
->aai_disks
[comp
];
304 while (bn
>= adi
->adi_compsize
) {
305 bn
-= adi
->adi_compsize
;
306 adi
= &aai
->aai_disks
[++comp
];
309 bp
->b_resid
= bp
->b_bcount
;
311 for (bcount
= bp
->b_bcount
; bcount
> 0; bcount
-= rcount
) {
312 rcount
= bp
->b_bcount
;
313 if ((adi
->adi_compsize
- bn
) < btodb(rcount
))
314 rcount
= dbtob(adi
->adi_compsize
- bn
);
316 cbp
= ld_ataraid_make_cbuf(sc
, bp
, comp
, bn
, addr
, rcount
);
318 /* Free the already allocated component buffers. */
319 while ((cbp
= SIMPLEQ_FIRST(&cbufq
)) != NULL
) {
320 SIMPLEQ_REMOVE_HEAD(&cbufq
, cb_q
);
321 buf_destroy(&cbp
->cb_buf
);
328 * For a span, we always know we advance to the next disk,
329 * and always start at offset 0 on that disk.
331 adi
= &aai
->aai_disks
[++comp
];
334 SIMPLEQ_INSERT_TAIL(&cbufq
, cbp
, cb_q
);
338 /* Now fire off the requests. */
339 while ((cbp
= SIMPLEQ_FIRST(&cbufq
)) != NULL
) {
340 SIMPLEQ_REMOVE_HEAD(&cbufq
, cb_q
);
341 if ((cbp
->cb_buf
.b_flags
& B_READ
) == 0) {
342 mutex_enter(&cbp
->cb_buf
.b_vp
->v_interlock
);
343 cbp
->cb_buf
.b_vp
->v_numoutput
++;
344 mutex_exit(&cbp
->cb_buf
.b_vp
->v_interlock
);
346 VOP_STRATEGY(cbp
->cb_buf
.b_vp
, &cbp
->cb_buf
);
353 ld_ataraid_start_raid0(struct ld_softc
*ld
, struct buf
*bp
)
355 struct ld_ataraid_softc
*sc
= (void *) ld
;
356 struct ataraid_array_info
*aai
= sc
->sc_aai
;
357 struct ataraid_disk_info
*adi
;
358 SIMPLEQ_HEAD(, cbuf
) cbufq
;
359 struct cbuf
*cbp
, *other_cbp
;
361 daddr_t bn
, cbn
, tbn
, off
;
364 const int read
= bp
->b_flags
& B_READ
;
365 const int mirror
= aai
->aai_level
& AAI_L_RAID1
;
368 /* Allocate component buffers. */
369 SIMPLEQ_INIT(&cbufq
);
373 bp
->b_resid
= bp
->b_bcount
;
375 for (bcount
= bp
->b_bcount
; bcount
> 0; bcount
-= rcount
) {
376 tbn
= bn
/ aai
->aai_interleave
;
377 off
= bn
% aai
->aai_interleave
;
379 if (__predict_false(tbn
== aai
->aai_capacity
/
380 aai
->aai_interleave
)) {
382 daddr_t sz
= (aai
->aai_capacity
-
383 (tbn
* aai
->aai_interleave
)) /
386 cbn
= ((tbn
/ aai
->aai_width
) * aai
->aai_interleave
) +
388 rcount
= min(bcount
, dbtob(sz
));
390 comp
= tbn
% aai
->aai_width
;
391 cbn
= ((tbn
/ aai
->aai_width
) * aai
->aai_interleave
) +
393 rcount
= min(bcount
, dbtob(aai
->aai_interleave
- off
));
397 * See if a component is valid.
400 adi
= &aai
->aai_disks
[comp
];
401 if ((adi
->adi_status
& ADI_S_ONLINE
) == 0) {
402 if (mirror
&& comp
< aai
->aai_width
) {
403 comp
+= aai
->aai_width
;
408 * No component available.
414 cbp
= ld_ataraid_make_cbuf(sc
, bp
, comp
, cbn
, addr
, rcount
);
419 /* Free the already allocated component buffers. */
420 while ((cbp
= SIMPLEQ_FIRST(&cbufq
)) != NULL
) {
421 SIMPLEQ_REMOVE_HEAD(&cbufq
, cb_q
);
422 buf_destroy(&cbp
->cb_buf
);
427 SIMPLEQ_INSERT_TAIL(&cbufq
, cbp
, cb_q
);
428 if (mirror
&& !read
&& comp
< aai
->aai_width
) {
429 comp
+= aai
->aai_width
;
430 adi
= &aai
->aai_disks
[comp
];
431 if (adi
->adi_status
& ADI_S_ONLINE
) {
432 other_cbp
= ld_ataraid_make_cbuf(sc
, bp
,
433 comp
, cbn
, addr
, rcount
);
434 if (other_cbp
== NULL
)
435 goto resource_shortage
;
436 SIMPLEQ_INSERT_TAIL(&cbufq
, other_cbp
, cb_q
);
437 other_cbp
->cb_other
= cbp
;
438 cbp
->cb_other
= other_cbp
;
445 /* Now fire off the requests. */
446 while ((cbp
= SIMPLEQ_FIRST(&cbufq
)) != NULL
) {
447 SIMPLEQ_REMOVE_HEAD(&cbufq
, cb_q
);
448 if ((cbp
->cb_buf
.b_flags
& B_READ
) == 0) {
449 mutex_enter(&cbp
->cb_buf
.b_vp
->v_interlock
);
450 cbp
->cb_buf
.b_vp
->v_numoutput
++;
451 mutex_exit(&cbp
->cb_buf
.b_vp
->v_interlock
);
453 VOP_STRATEGY(cbp
->cb_buf
.b_vp
, &cbp
->cb_buf
);
460 * Called at interrupt time. Mark the component as done and if all
461 * components are done, take an "interrupt".
464 ld_ataraid_iodone_raid0(struct buf
*vbp
)
466 struct cbuf
*cbp
= (struct cbuf
*) vbp
, *other_cbp
;
467 struct buf
*bp
= cbp
->cb_obp
;
468 struct ld_ataraid_softc
*sc
= cbp
->cb_sc
;
469 struct ataraid_array_info
*aai
= sc
->sc_aai
;
470 struct ataraid_disk_info
*adi
;
476 iodone
= cbp
->cb_flags
& CBUF_IODONE
;
477 other_cbp
= cbp
->cb_other
;
478 if (other_cbp
!= NULL
)
480 other_cbp
->cb_other
= NULL
;
482 if (cbp
->cb_buf
.b_error
!= 0) {
484 * Mark this component broken.
486 adi
= &aai
->aai_disks
[cbp
->cb_comp
];
487 adi
->adi_status
&= ~ADI_S_ONLINE
;
489 printf("%s: error %d on component %d (%s)\n",
490 device_xname(sc
->sc_ld
.sc_dv
), bp
->b_error
, cbp
->cb_comp
,
491 device_xname(adi
->adi_dev
));
494 * If we didn't see an error yet and we are reading
495 * RAID1 disk, try another component.
497 if (bp
->b_error
== 0 &&
498 (cbp
->cb_buf
.b_flags
& B_READ
) != 0 &&
499 (aai
->aai_level
& AAI_L_RAID1
) != 0 &&
500 cbp
->cb_comp
< aai
->aai_width
) {
501 cbp
->cb_comp
+= aai
->aai_width
;
502 adi
= &aai
->aai_disks
[cbp
->cb_comp
];
503 if (adi
->adi_status
& ADI_S_ONLINE
) {
504 cbp
->cb_buf
.b_error
= 0;
505 VOP_STRATEGY(cbp
->cb_buf
.b_vp
, &cbp
->cb_buf
);
510 if (iodone
|| other_cbp
!= NULL
)
512 * If I/O on other component successfully done
513 * or the I/O is still in progress, no need
514 * to tell an error to upper layer.
518 bp
->b_error
= cbp
->cb_buf
.b_error
?
519 cbp
->cb_buf
.b_error
: EIO
;
522 /* XXX Update component config blocks. */
526 * If other I/O is still in progress, tell it that
527 * our I/O is successfully done.
529 if (other_cbp
!= NULL
)
530 other_cbp
->cb_flags
|= CBUF_IODONE
;
532 count
= cbp
->cb_buf
.b_bcount
;
533 buf_destroy(&cbp
->cb_buf
);
536 if (other_cbp
!= NULL
)
539 /* If all done, "interrupt". */
540 bp
->b_resid
-= count
;
542 panic("ld_ataraid_iodone_raid0: count");
543 if (bp
->b_resid
== 0)
544 lddone(&sc
->sc_ld
, bp
);
551 ld_ataraid_dump(struct ld_softc
*sc
, void *data
,
552 int blkno
, int blkcnt
)
560 ld_ataraid_bioctl(device_t self
, u_long cmd
, void *addr
)
562 struct ld_ataraid_softc
*sc
= device_private(self
);
567 error
= ld_ataraid_bioinq(sc
, (struct bioc_inq
*)addr
);
570 error
= ld_ataraid_biovol(sc
, (struct bioc_vol
*)addr
);
573 error
= ld_ataraid_biodisk(sc
, (struct bioc_disk
*)addr
);
584 ld_ataraid_bioinq(struct ld_ataraid_softc
*sc
, struct bioc_inq
*bi
)
586 struct ataraid_array_info
*aai
= sc
->sc_aai
;
588 /* there's always one volume per ld device */
590 bi
->bi_nodisk
= aai
->aai_ndisks
;
596 ld_ataraid_biovol(struct ld_ataraid_softc
*sc
, struct bioc_vol
*bv
)
598 struct ataraid_array_info
*aai
= sc
->sc_aai
;
599 struct ld_softc
*ld
= &sc
->sc_ld
;
601 /* Fill in data for _this_ volume */
605 switch (aai
->aai_status
) {
607 bv
->bv_status
= BIOC_SVONLINE
;
610 bv
->bv_status
= BIOC_SVDEGRADED
;
614 bv
->bv_size
= ld
->sc_secsize
* ld
->sc_secperunit
;
616 switch (aai
->aai_level
) {
619 bv
->bv_stripe_size
= aai
->aai_interleave
;
623 bv
->bv_stripe_size
= 0;
627 bv
->bv_stripe_size
= aai
->aai_interleave
;
632 bv
->bv_nodisk
= aai
->aai_ndisks
;
633 strlcpy(bv
->bv_dev
, device_xname(ld
->sc_dv
), sizeof(bv
->bv_dev
));
634 if (aai
->aai_name
[0] != '\0')
635 strlcpy(bv
->bv_vendor
, aai
->aai_name
, sizeof(bv
->bv_vendor
));
641 ld_ataraid_biodisk(struct ld_ataraid_softc
*sc
, struct bioc_disk
*bd
)
643 struct ataraid_array_info
*aai
= sc
->sc_aai
;
644 struct ataraid_disk_info
*adi
;
645 struct ld_softc
*ld
= &sc
->sc_ld
;
646 struct atabus_softc
*atabus
;
648 char model
[81], serial
[41], rev
[17];
651 if (bd
->bd_diskid
> aai
->aai_ndisks
)
654 adi
= &aai
->aai_disks
[bd
->bd_diskid
];
655 atabus
= device_private(device_parent(adi
->adi_dev
));
656 wd
= device_private(adi
->adi_dev
);
658 /* fill in data for _this_ disk */
659 switch (adi
->adi_status
) {
660 case ADI_S_ONLINE
| ADI_S_ASSIGNED
:
661 bd
->bd_status
= BIOC_SDONLINE
;
664 bd
->bd_status
= BIOC_SDHOTSPARE
;
667 bd
->bd_status
= BIOC_SDOFFLINE
;
672 bd
->bd_target
= atabus
->sc_chan
->ch_channel
;
674 bd
->bd_size
= (wd
->sc_capacity
* ld
->sc_secsize
) - aai
->aai_reserved
;
676 strlcpy(bd
->bd_procdev
, device_xname(adi
->adi_dev
),
677 sizeof(bd
->bd_procdev
));
679 scsipi_strvis(serial
, sizeof(serial
), wd
->sc_params
.atap_serial
,
680 sizeof(wd
->sc_params
.atap_serial
));
681 scsipi_strvis(model
, sizeof(model
), wd
->sc_params
.atap_model
,
682 sizeof(wd
->sc_params
.atap_model
));
683 scsipi_strvis(rev
, sizeof(rev
), wd
->sc_params
.atap_revision
,
684 sizeof(wd
->sc_params
.atap_revision
));
686 snprintf(bd
->bd_vendor
, sizeof(bd
->bd_vendor
), "%s %s", model
, rev
);
687 strlcpy(bd
->bd_serial
, serial
, sizeof(bd
->bd_serial
));
691 #endif /* NBIO > 0 */