Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / cddl / osnet / dist / uts / common / fs / zfs / vdev_disk.c
blob0bd031e42b58e913e46a5e5854be7c65042f6707
2 /*
3 * CDDL HEADER START
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/zfs_context.h>
28 #include <sys/spa.h>
29 #include <sys/refcount.h>
30 #include <sys/vdev_disk.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/zio.h>
34 #include <sys/sunldi.h>
35 #include <sys/fm/fs/zfs.h>
36 #include <sys/disklabel.h>
37 #include <sys/dkio.h>
38 #include <sys/workqueue.h>
41 * Virtual device vector for disks.
44 static void vdev_disk_io_intr(buf_t *);
46 static void
47 vdev_disk_flush(struct work *work, void *cookie)
49 vdev_disk_t *dvd;
50 int error, cmd;
51 buf_t *bp;
52 vnode_t *vp;
54 bp = (struct buf *)work;
55 vp = bp->b_vp;
56 dvd = cookie;
58 KASSERT(vp == dvd->vd_vn);
60 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
61 cmd = 1;
62 error = VOP_IOCTL(vp, DIOCCACHESYNC, &cmd, FREAD|FWRITE,
63 kauth_cred_get());
64 VOP_UNLOCK(vp, 0);
65 bp->b_error = error;
66 vdev_disk_io_intr(bp);
69 static int
70 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
72 struct partinfo pinfo;
73 vdev_disk_t *dvd;
74 vnode_t *vp;
75 int error, cmd;
78 * We must have a pathname, and it must be absolute.
80 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
81 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
82 return (EINVAL);
85 dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
88 * When opening a disk device, we want to preserve the user's original
89 * intent. We always want to open the device by the path the user gave
90 * us, even if it is one of multiple paths to the save device. But we
91 * also want to be able to survive disks being removed/recabled.
92 * Therefore the sequence of opening devices is:
94 * 1. Try opening the device by path. For legacy pools without the
95 * 'whole_disk' property, attempt to fix the path by appending 's0'.
97 * 2. If the devid of the device matches the stored value, return
98 * success.
100 * 3. Otherwise, the device may have moved. Try opening the device
101 * by the devid instead.
104 if (vd->vdev_devid != NULL) {
105 /* XXXNETBSD wedges */
108 error = EINVAL; /* presume failure */
110 error = vn_open(vd->vdev_path, UIO_SYSSPACE, FREAD|FWRITE, 0,
111 &vp, CRCREAT, 0);
112 if (error != 0) {
113 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
114 return error;
116 if (vp->v_type != VBLK) {
117 vrele(vp);
118 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
119 return EINVAL;
123 * XXXNETBSD Compare the devid to the stored value.
127 * Determine the actual size of the device.
128 * XXXNETBSD wedges.
130 error = VOP_IOCTL(vp, DIOCGPART, &pinfo, FREAD|FWRITE,
131 kauth_cred_get());
132 if (error != 0) {
133 vrele(vp);
134 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
135 return error;
137 *psize = (uint64_t)pinfo.part->p_size * pinfo.disklab->d_secsize;
138 *ashift = highbit(MAX(pinfo.disklab->d_secsize, SPA_MINBLOCKSIZE)) - 1;
139 vd->vdev_wholedisk = (pinfo.part->p_offset == 0); /* XXXNETBSD */
142 * Create a workqueue to process cache-flushes concurrently.
144 error = workqueue_create(&dvd->vd_wq, "vdevsync",
145 vdev_disk_flush, dvd, PRI_NONE, IPL_NONE, WQ_MPSAFE);
146 if (error != 0) {
147 vrele(vp);
148 return error;
152 * Clear the nowritecache bit, so that on a vdev_reopen() we will
153 * try again.
155 vd->vdev_nowritecache = B_FALSE;
157 dvd->vd_vn = vp;
158 return 0;
161 static void
162 vdev_disk_close(vdev_t *vd)
164 vdev_disk_t *dvd = vd->vdev_tsd;
165 vnode_t *vp;
167 if (dvd == NULL)
168 return;
170 dprintf("removing disk %s, devid %s\n",
171 vd->vdev_path ? vd->vdev_path : "<none>",
172 vd->vdev_devid ? vd->vdev_devid : "<none>");
174 if ((vp = dvd->vd_vn) != NULL) {
175 /* XXX NetBSD Sometimes we deadlock on this why ? */
176 // vprint("vnode close info", vp);
177 vn_close(vp, FREAD|FWRITE, kauth_cred_get());
178 // vprint("vnode close info", vp);
179 /* XXX is this needed ? vrele(vp); */
180 workqueue_destroy(dvd->vd_wq);
182 kmem_free(dvd, sizeof (vdev_disk_t));
183 vd->vdev_tsd = NULL;
186 static void
187 vdev_disk_io_intr(buf_t *bp)
189 zio_t *zio = bp->b_private;
191 dprintf("vdev_disk_io_intr bp=%p\n", bp);
193 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
194 * Rather than teach the rest of the stack about other error
195 * possibilities (EFAULT, etc), we normalize the error value here.
197 if (bp->b_error == 0) {
198 if (bp->b_resid != 0) {
199 zio->io_error = EIO;
200 } else {
201 zio->io_error = 0;
203 } else {
204 zio->io_error = EIO;
207 putiobuf(bp);
208 zio_interrupt(zio);
211 static int
212 vdev_disk_io_start(zio_t *zio)
214 vdev_t *vd = zio->io_vd;
215 vdev_disk_t *dvd = vd->vdev_tsd;
216 vnode_t *vp;
217 buf_t *bp, *nbp;
218 int error, size, off, resid;
220 vp = dvd->vd_vn;
221 if (zio->io_type == ZIO_TYPE_IOCTL) {
222 /* XXPOLICY */
223 if (!vdev_readable(vd)) {
224 zio->io_error = ENXIO;
225 return (ZIO_PIPELINE_CONTINUE);
228 switch (zio->io_cmd) {
229 case DKIOCFLUSHWRITECACHE:
231 if (zfs_nocacheflush)
232 break;
234 if (vd->vdev_nowritecache) {
235 zio->io_error = ENOTSUP;
236 break;
239 bp = getiobuf(vp, true);
240 bp->b_private = zio;
241 workqueue_enqueue(dvd->vd_wq, &bp->b_work, NULL);
242 return (ZIO_PIPELINE_STOP);
244 default:
245 zio->io_error = ENOTSUP;
246 break;
249 return (ZIO_PIPELINE_CONTINUE);
252 bp = getiobuf(vp, true);
253 bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
254 bp->b_cflags = BC_BUSY | BC_NOCACHE;
255 bp->b_data = zio->io_data;
256 bp->b_blkno = btodb(zio->io_offset);
257 bp->b_bcount = zio->io_size;
258 bp->b_resid = zio->io_size;
259 bp->b_iodone = vdev_disk_io_intr;
260 bp->b_private = zio;
262 if (!(bp->b_flags & B_READ)) {
263 mutex_enter(&vp->v_interlock);
264 vp->v_numoutput++;
265 mutex_exit(&vp->v_interlock);
268 if (bp->b_bcount <= MAXPHYS) {
269 /* We can do this I/O in one pass. */
270 (void)VOP_STRATEGY(vp, bp);
271 } else {
273 * The I/O is larger than we can process in one pass.
274 * Split it into smaller pieces.
276 resid = zio->io_size;
277 off = 0;
278 while (resid != 0) {
279 size = min(resid, MAXPHYS);
280 nbp = getiobuf(vp, true);
281 nbp->b_blkno = btodb(zio->io_offset + off);
282 /* Below call increments v_numoutput. */
283 nestiobuf_setup(bp, nbp, off, size);
284 (void)VOP_STRATEGY(vp, nbp);
285 resid -= size;
286 off += size;
290 return (ZIO_PIPELINE_STOP);
293 static void
294 vdev_disk_io_done(zio_t *zio)
297 /* NetBSD: nothing */
300 vdev_ops_t vdev_disk_ops = {
301 vdev_disk_open,
302 vdev_disk_close,
303 vdev_default_asize,
304 vdev_disk_io_start,
305 vdev_disk_io_done,
306 NULL,
307 VDEV_TYPE_DISK, /* name of this vdev type */
308 B_TRUE /* leaf vdev */
312 * Given the root disk device devid or pathname, read the label from
313 * the device, and construct a configuration nvlist.
316 vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
319 return EOPNOTSUPP;