5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/zfs_context.h>
29 #include <sys/refcount.h>
30 #include <sys/vdev_disk.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/fs/zfs.h>
34 #include <sys/sunldi.h>
35 #include <sys/fm/fs/zfs.h>
36 #include <sys/disklabel.h>
38 #include <sys/workqueue.h>
41 * Virtual device vector for disks.
44 static void vdev_disk_io_intr(buf_t
*);
47 vdev_disk_flush(struct work
*work
, void *cookie
)
54 bp
= (struct buf
*)work
;
58 KASSERT(vp
== dvd
->vd_vn
);
60 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
62 error
= VOP_IOCTL(vp
, DIOCCACHESYNC
, &cmd
, FREAD
|FWRITE
,
66 vdev_disk_io_intr(bp
);
70 vdev_disk_open(vdev_t
*vd
, uint64_t *psize
, uint64_t *ashift
)
72 struct partinfo pinfo
;
78 * We must have a pathname, and it must be absolute.
80 if (vd
->vdev_path
== NULL
|| vd
->vdev_path
[0] != '/') {
81 vd
->vdev_stat
.vs_aux
= VDEV_AUX_BAD_LABEL
;
85 dvd
= vd
->vdev_tsd
= kmem_zalloc(sizeof (vdev_disk_t
), KM_SLEEP
);
88 * When opening a disk device, we want to preserve the user's original
89 * intent. We always want to open the device by the path the user gave
90 * us, even if it is one of multiple paths to the save device. But we
91 * also want to be able to survive disks being removed/recabled.
92 * Therefore the sequence of opening devices is:
94 * 1. Try opening the device by path. For legacy pools without the
95 * 'whole_disk' property, attempt to fix the path by appending 's0'.
97 * 2. If the devid of the device matches the stored value, return
100 * 3. Otherwise, the device may have moved. Try opening the device
101 * by the devid instead.
104 if (vd
->vdev_devid
!= NULL
) {
105 /* XXXNETBSD wedges */
108 error
= EINVAL
; /* presume failure */
110 error
= vn_open(vd
->vdev_path
, UIO_SYSSPACE
, FREAD
|FWRITE
, 0,
113 vd
->vdev_stat
.vs_aux
= VDEV_AUX_OPEN_FAILED
;
116 if (vp
->v_type
!= VBLK
) {
118 vd
->vdev_stat
.vs_aux
= VDEV_AUX_OPEN_FAILED
;
123 * XXXNETBSD Compare the devid to the stored value.
127 * Determine the actual size of the device.
130 error
= VOP_IOCTL(vp
, DIOCGPART
, &pinfo
, FREAD
|FWRITE
,
134 vd
->vdev_stat
.vs_aux
= VDEV_AUX_OPEN_FAILED
;
137 *psize
= (uint64_t)pinfo
.part
->p_size
* pinfo
.disklab
->d_secsize
;
138 *ashift
= highbit(MAX(pinfo
.disklab
->d_secsize
, SPA_MINBLOCKSIZE
)) - 1;
139 vd
->vdev_wholedisk
= (pinfo
.part
->p_offset
== 0); /* XXXNETBSD */
142 * Create a workqueue to process cache-flushes concurrently.
144 error
= workqueue_create(&dvd
->vd_wq
, "vdevsync",
145 vdev_disk_flush
, dvd
, PRI_NONE
, IPL_NONE
, WQ_MPSAFE
);
152 * Clear the nowritecache bit, so that on a vdev_reopen() we will
155 vd
->vdev_nowritecache
= B_FALSE
;
162 vdev_disk_close(vdev_t
*vd
)
164 vdev_disk_t
*dvd
= vd
->vdev_tsd
;
170 dprintf("removing disk %s, devid %s\n",
171 vd
->vdev_path
? vd
->vdev_path
: "<none>",
172 vd
->vdev_devid
? vd
->vdev_devid
: "<none>");
174 if ((vp
= dvd
->vd_vn
) != NULL
) {
175 /* XXX NetBSD Sometimes we deadlock on this why ? */
176 // vprint("vnode close info", vp);
177 vn_close(vp
, FREAD
|FWRITE
, kauth_cred_get());
178 // vprint("vnode close info", vp);
179 /* XXX is this needed ? vrele(vp); */
180 workqueue_destroy(dvd
->vd_wq
);
182 kmem_free(dvd
, sizeof (vdev_disk_t
));
187 vdev_disk_io_intr(buf_t
*bp
)
189 zio_t
*zio
= bp
->b_private
;
191 dprintf("vdev_disk_io_intr bp=%p\n", bp
);
193 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
194 * Rather than teach the rest of the stack about other error
195 * possibilities (EFAULT, etc), we normalize the error value here.
197 if (bp
->b_error
== 0) {
198 if (bp
->b_resid
!= 0) {
212 vdev_disk_io_start(zio_t
*zio
)
214 vdev_t
*vd
= zio
->io_vd
;
215 vdev_disk_t
*dvd
= vd
->vdev_tsd
;
218 int error
, size
, off
, resid
;
221 if (zio
->io_type
== ZIO_TYPE_IOCTL
) {
223 if (!vdev_readable(vd
)) {
224 zio
->io_error
= ENXIO
;
225 return (ZIO_PIPELINE_CONTINUE
);
228 switch (zio
->io_cmd
) {
229 case DKIOCFLUSHWRITECACHE
:
231 if (zfs_nocacheflush
)
234 if (vd
->vdev_nowritecache
) {
235 zio
->io_error
= ENOTSUP
;
239 bp
= getiobuf(vp
, true);
241 workqueue_enqueue(dvd
->vd_wq
, &bp
->b_work
, NULL
);
242 return (ZIO_PIPELINE_STOP
);
245 zio
->io_error
= ENOTSUP
;
249 return (ZIO_PIPELINE_CONTINUE
);
252 bp
= getiobuf(vp
, true);
253 bp
->b_flags
= (zio
->io_type
== ZIO_TYPE_READ
? B_READ
: B_WRITE
);
254 bp
->b_cflags
= BC_BUSY
| BC_NOCACHE
;
255 bp
->b_data
= zio
->io_data
;
256 bp
->b_blkno
= btodb(zio
->io_offset
);
257 bp
->b_bcount
= zio
->io_size
;
258 bp
->b_resid
= zio
->io_size
;
259 bp
->b_iodone
= vdev_disk_io_intr
;
262 if (!(bp
->b_flags
& B_READ
)) {
263 mutex_enter(&vp
->v_interlock
);
265 mutex_exit(&vp
->v_interlock
);
268 if (bp
->b_bcount
<= MAXPHYS
) {
269 /* We can do this I/O in one pass. */
270 (void)VOP_STRATEGY(vp
, bp
);
273 * The I/O is larger than we can process in one pass.
274 * Split it into smaller pieces.
276 resid
= zio
->io_size
;
279 size
= min(resid
, MAXPHYS
);
280 nbp
= getiobuf(vp
, true);
281 nbp
->b_blkno
= btodb(zio
->io_offset
+ off
);
282 /* Below call increments v_numoutput. */
283 nestiobuf_setup(bp
, nbp
, off
, size
);
284 (void)VOP_STRATEGY(vp
, nbp
);
290 return (ZIO_PIPELINE_STOP
);
294 vdev_disk_io_done(zio_t
*zio
)
297 /* NetBSD: nothing */
300 vdev_ops_t vdev_disk_ops
= {
307 VDEV_TYPE_DISK
, /* name of this vdev type */
308 B_TRUE
/* leaf vdev */
312 * Given the root disk device devid or pathname, read the label from
313 * the device, and construct a configuration nvlist.
316 vdev_disk_read_rootlabel(char *devpath
, char *devid
, nvlist_t
**config
)