4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
25 * All rights reserved.
27 * Portions Copyright 2010 Robert Milkowski
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
31 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
32 * Copyright (c) 2014 Integros [integros.com]
35 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
38 * ZFS volume emulation driver.
40 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
41 * Volumes are accessed through the symbolic links named:
43 * /dev/zvol/<pool_name>/<dataset_name>
45 * Volumes are persistent through reboot. No user command needs to be
46 * run before opening and using a device.
48 * On FreeBSD ZVOLs are simply GEOM providers like any other storage device
49 * in the system. Except when they're simply character devices (volmode=dev).
52 #include <sys/types.h>
53 #include <sys/param.h>
54 #include <sys/kernel.h>
55 #include <sys/errno.h>
61 #include <sys/cmn_err.h>
66 #include <sys/spa_impl.h>
69 #include <sys/dmu_traverse.h>
70 #include <sys/dnode.h>
71 #include <sys/dsl_dataset.h>
72 #include <sys/dsl_prop.h>
73 #include <sys/dsl_dir.h>
74 #include <sys/byteorder.h>
75 #include <sys/sunddi.h>
76 #include <sys/dirent.h>
77 #include <sys/policy.h>
78 #include <sys/queue.h>
79 #include <sys/fs/zfs.h>
80 #include <sys/zfs_ioctl.h>
82 #include <sys/zfs_znode.h>
83 #include <sys/zfs_rlock.h>
84 #include <sys/vdev_impl.h>
85 #include <sys/vdev_raidz.h>
87 #include <sys/zil_impl.h>
88 #include <sys/dataset_kstats.h>
90 #include <sys/dmu_tx.h>
91 #include <sys/zfeature.h>
92 #include <sys/zio_checksum.h>
93 #include <sys/zil_impl.h>
94 #include <sys/filio.h>
95 #include <sys/freebsd_event.h>
97 #include <geom/geom.h>
99 #include <sys/zvol_impl.h>
101 #include "zfs_namecheck.h"
103 #define ZVOL_DUMPSIZE "dumpsize"
105 #ifdef ZVOL_LOCK_DEBUG
106 #define ZVOL_RW_READER RW_WRITER
107 #define ZVOL_RW_READ_HELD RW_WRITE_HELD
109 #define ZVOL_RW_READER RW_READER
110 #define ZVOL_RW_READ_HELD RW_READ_HELD
113 enum zvol_geom_state
{
119 struct zvol_state_os
{
120 #define zso_dev _zso_state._zso_dev
121 #define zso_geom _zso_state._zso_geom
124 struct zvol_state_dev
{
125 struct cdev
*zsd_cdev
;
126 uint64_t zsd_sync_cnt
;
127 struct selinfo zsd_selinfo
;
131 struct zvol_state_geom
{
132 struct g_provider
*zsg_provider
;
133 struct bio_queue_head zsg_queue
;
134 struct mtx zsg_queue_mtx
;
135 enum zvol_geom_state zsg_state
;
141 static uint32_t zvol_minors
;
143 SYSCTL_DECL(_vfs_zfs
);
144 SYSCTL_NODE(_vfs_zfs
, OID_AUTO
, vol
, CTLFLAG_RW
, 0, "ZFS VOLUME");
145 SYSCTL_INT(_vfs_zfs_vol
, OID_AUTO
, mode
, CTLFLAG_RWTUN
, &zvol_volmode
, 0,
146 "Expose as GEOM providers (1), device files (2) or neither");
147 static boolean_t zpool_on_zvol
= B_FALSE
;
148 SYSCTL_INT(_vfs_zfs_vol
, OID_AUTO
, recursive
, CTLFLAG_RWTUN
, &zpool_on_zvol
, 0,
149 "Allow zpools to use zvols as vdevs (DANGEROUS)");
152 * Toggle unmap functionality.
154 boolean_t zvol_unmap_enabled
= B_TRUE
;
156 SYSCTL_INT(_vfs_zfs_vol
, OID_AUTO
, unmap_enabled
, CTLFLAG_RWTUN
,
157 &zvol_unmap_enabled
, 0, "Enable UNMAP functionality");
160 * zvol maximum transfer in one DMU tx.
162 int zvol_maxphys
= DMU_MAX_ACCESS
/ 2;
164 static void zvol_ensure_zilog(zvol_state_t
*zv
);
166 static d_open_t zvol_cdev_open
;
167 static d_close_t zvol_cdev_close
;
168 static d_ioctl_t zvol_cdev_ioctl
;
169 static d_read_t zvol_cdev_read
;
170 static d_write_t zvol_cdev_write
;
171 static d_strategy_t zvol_geom_bio_strategy
;
172 static d_kqfilter_t zvol_cdev_kqfilter
;
174 static struct cdevsw zvol_cdevsw
= {
176 .d_version
= D_VERSION
,
177 .d_flags
= D_DISK
| D_TRACKCLOSE
,
178 .d_open
= zvol_cdev_open
,
179 .d_close
= zvol_cdev_close
,
180 .d_ioctl
= zvol_cdev_ioctl
,
181 .d_read
= zvol_cdev_read
,
182 .d_write
= zvol_cdev_write
,
183 .d_strategy
= zvol_geom_bio_strategy
,
184 .d_kqfilter
= zvol_cdev_kqfilter
,
187 static void zvol_filter_detach(struct knote
*kn
);
188 static int zvol_filter_vnode(struct knote
*kn
, long hint
);
190 static struct filterops zvol_filterops_vnode
= {
192 .f_detach
= zvol_filter_detach
,
193 .f_event
= zvol_filter_vnode
,
196 extern uint_t zfs_geom_probe_vdev_key
;
198 struct g_class zfs_zvol_class
= {
200 .version
= G_VERSION
,
203 DECLARE_GEOM_CLASS(zfs_zvol_class
, zfs_zvol
);
205 static int zvol_geom_open(struct g_provider
*pp
, int flag
, int count
);
206 static int zvol_geom_close(struct g_provider
*pp
, int flag
, int count
);
207 static void zvol_geom_run(zvol_state_t
*zv
);
208 static void zvol_geom_destroy(zvol_state_t
*zv
);
209 static int zvol_geom_access(struct g_provider
*pp
, int acr
, int acw
, int ace
);
210 static void zvol_geom_worker(void *arg
);
211 static void zvol_geom_bio_start(struct bio
*bp
);
212 static int zvol_geom_bio_getattr(struct bio
*bp
);
213 /* static d_strategy_t zvol_geom_bio_strategy; (declared elsewhere) */
216 * GEOM mode implementation
220 zvol_geom_open(struct g_provider
*pp
, int flag
, int count
)
224 boolean_t drop_suspend
= B_FALSE
;
226 if (!zpool_on_zvol
&& tsd_get(zfs_geom_probe_vdev_key
) != NULL
) {
228 * If zfs_geom_probe_vdev_key is set, that means that zfs is
229 * attempting to probe geom providers while looking for a
230 * replacement for a missing VDEV. In this case, the
231 * spa_namespace_lock will not be held, but it is still illegal
232 * to use a zvol as a vdev. Deadlocks can result if another
233 * thread has spa_namespace_lock.
235 return (SET_ERROR(EOPNOTSUPP
));
239 rw_enter(&zvol_state_lock
, ZVOL_RW_READER
);
241 * Obtain a copy of private under zvol_state_lock to make sure either
242 * the result of zvol free code setting private to NULL is observed,
243 * or the zv is protected from being freed because of the positive
248 rw_exit(&zvol_state_lock
);
249 err
= SET_ERROR(ENXIO
);
253 mutex_enter(&zv
->zv_state_lock
);
254 if (zv
->zv_zso
->zso_dying
) {
255 rw_exit(&zvol_state_lock
);
256 err
= SET_ERROR(ENXIO
);
259 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_GEOM
);
262 * Make sure zvol is not suspended during first open
263 * (hold zv_suspend_lock) and respect proper lock acquisition
264 * ordering - zv_suspend_lock before zv_state_lock.
266 if (zv
->zv_open_count
== 0) {
267 drop_suspend
= B_TRUE
;
268 if (!rw_tryenter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
)) {
269 mutex_exit(&zv
->zv_state_lock
);
270 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
271 mutex_enter(&zv
->zv_state_lock
);
272 /* Check to see if zv_suspend_lock is needed. */
273 if (zv
->zv_open_count
!= 0) {
274 rw_exit(&zv
->zv_suspend_lock
);
275 drop_suspend
= B_FALSE
;
279 rw_exit(&zvol_state_lock
);
281 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
283 if (zv
->zv_open_count
== 0) {
284 boolean_t drop_namespace
= B_FALSE
;
286 ASSERT(ZVOL_RW_READ_HELD(&zv
->zv_suspend_lock
));
289 * Take spa_namespace_lock to prevent lock inversion when
290 * zvols from one pool are opened as vdevs in another.
292 if (!mutex_owned(&spa_namespace_lock
)) {
293 if (!mutex_tryenter(&spa_namespace_lock
)) {
294 mutex_exit(&zv
->zv_state_lock
);
295 rw_exit(&zv
->zv_suspend_lock
);
296 kern_yield(PRI_USER
);
299 drop_namespace
= B_TRUE
;
302 err
= zvol_first_open(zv
, !(flag
& FWRITE
));
304 mutex_exit(&spa_namespace_lock
);
307 pp
->mediasize
= zv
->zv_volsize
;
308 pp
->stripeoffset
= 0;
309 pp
->stripesize
= zv
->zv_volblocksize
;
312 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
315 * Check for a bad on-disk format version now since we
316 * lied about owning the dataset readonly before.
318 if ((flag
& FWRITE
) && ((zv
->zv_flags
& ZVOL_RDONLY
) ||
319 dmu_objset_incompatible_encryption_version(zv
->zv_objset
))) {
320 err
= SET_ERROR(EROFS
);
323 if (zv
->zv_flags
& ZVOL_EXCL
) {
324 err
= SET_ERROR(EBUSY
);
328 if (zv
->zv_open_count
!= 0) {
329 err
= SET_ERROR(EBUSY
);
332 zv
->zv_flags
|= ZVOL_EXCL
;
335 zv
->zv_open_count
+= count
;
337 if (zv
->zv_open_count
== 0) {
342 mutex_exit(&zv
->zv_state_lock
);
345 rw_exit(&zv
->zv_suspend_lock
);
350 zvol_geom_close(struct g_provider
*pp
, int flag
, int count
)
354 boolean_t drop_suspend
= B_TRUE
;
357 rw_enter(&zvol_state_lock
, ZVOL_RW_READER
);
360 rw_exit(&zvol_state_lock
);
361 return (SET_ERROR(ENXIO
));
364 mutex_enter(&zv
->zv_state_lock
);
365 if (zv
->zv_flags
& ZVOL_EXCL
) {
366 ASSERT3U(zv
->zv_open_count
, ==, 1);
367 zv
->zv_flags
&= ~ZVOL_EXCL
;
370 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_GEOM
);
373 * If the open count is zero, this is a spurious close.
374 * That indicates a bug in the kernel / DDI framework.
376 ASSERT3U(zv
->zv_open_count
, >, 0);
379 * Make sure zvol is not suspended during last close
380 * (hold zv_suspend_lock) and respect proper lock acquisition
381 * ordering - zv_suspend_lock before zv_state_lock.
383 new_open_count
= zv
->zv_open_count
- count
;
384 if (new_open_count
== 0) {
385 if (!rw_tryenter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
)) {
386 mutex_exit(&zv
->zv_state_lock
);
387 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
388 mutex_enter(&zv
->zv_state_lock
);
389 /* Check to see if zv_suspend_lock is needed. */
390 new_open_count
= zv
->zv_open_count
- count
;
391 if (new_open_count
!= 0) {
392 rw_exit(&zv
->zv_suspend_lock
);
393 drop_suspend
= B_FALSE
;
397 drop_suspend
= B_FALSE
;
399 rw_exit(&zvol_state_lock
);
401 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
404 * You may get multiple opens, but only one close.
406 zv
->zv_open_count
= new_open_count
;
407 if (zv
->zv_open_count
== 0) {
408 ASSERT(ZVOL_RW_READ_HELD(&zv
->zv_suspend_lock
));
413 mutex_exit(&zv
->zv_state_lock
);
416 rw_exit(&zv
->zv_suspend_lock
);
421 zvol_geom_run(zvol_state_t
*zv
)
423 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
424 struct g_provider
*pp
= zsg
->zsg_provider
;
426 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_GEOM
);
428 g_error_provider(pp
, 0);
430 kproc_kthread_add(zvol_geom_worker
, zv
, &system_proc
, NULL
, 0, 0,
431 "zfskern", "zvol %s", pp
->name
+ sizeof (ZVOL_DRIVER
));
435 zvol_geom_destroy(zvol_state_t
*zv
)
437 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
438 struct g_provider
*pp
= zsg
->zsg_provider
;
440 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_GEOM
);
444 mutex_enter(&zv
->zv_state_lock
);
445 VERIFY3S(zsg
->zsg_state
, ==, ZVOL_GEOM_RUNNING
);
446 mutex_exit(&zv
->zv_state_lock
);
447 zsg
->zsg_provider
= NULL
;
448 g_wither_geom(pp
->geom
, ENXIO
);
452 zvol_wait_close(zvol_state_t
*zv
)
455 if (zv
->zv_volmode
!= ZFS_VOLMODE_GEOM
)
457 mutex_enter(&zv
->zv_state_lock
);
458 zv
->zv_zso
->zso_dying
= B_TRUE
;
460 if (zv
->zv_open_count
)
461 msleep(zv
, &zv
->zv_state_lock
,
462 PRIBIO
, "zvol:dying", 10*hz
);
463 mutex_exit(&zv
->zv_state_lock
);
468 zvol_geom_access(struct g_provider
*pp
, int acr
, int acw
, int ace
)
470 int count
, error
, flags
;
475 * To make it easier we expect either open or close, but not both
478 KASSERT((acr
>= 0 && acw
>= 0 && ace
>= 0) ||
479 (acr
<= 0 && acw
<= 0 && ace
<= 0),
480 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
481 pp
->name
, acr
, acw
, ace
));
483 if (pp
->private == NULL
) {
484 if (acr
<= 0 && acw
<= 0 && ace
<= 0)
490 * We don't pass FEXCL flag to zvol_geom_open()/zvol_geom_close() if
491 * ace != 0, because GEOM already handles that and handles it a bit
492 * differently. GEOM allows for multiple read/exclusive consumers and
493 * ZFS allows only one exclusive consumer, no matter if it is reader or
494 * writer. I like better the way GEOM works so I'll leave it for GEOM
495 * to decide what to do.
498 count
= acr
+ acw
+ ace
;
503 if (acr
!= 0 || ace
!= 0)
510 error
= zvol_geom_open(pp
, flags
, count
);
512 error
= zvol_geom_close(pp
, flags
, -count
);
518 zvol_geom_worker(void *arg
)
520 zvol_state_t
*zv
= arg
;
521 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
524 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_GEOM
);
526 thread_lock(curthread
);
527 sched_prio(curthread
, PRIBIO
);
528 thread_unlock(curthread
);
531 mtx_lock(&zsg
->zsg_queue_mtx
);
532 bp
= bioq_takefirst(&zsg
->zsg_queue
);
534 if (zsg
->zsg_state
== ZVOL_GEOM_STOPPED
) {
535 zsg
->zsg_state
= ZVOL_GEOM_RUNNING
;
536 wakeup(&zsg
->zsg_state
);
537 mtx_unlock(&zsg
->zsg_queue_mtx
);
540 msleep(&zsg
->zsg_queue
, &zsg
->zsg_queue_mtx
,
541 PRIBIO
| PDROP
, "zvol:io", 0);
544 mtx_unlock(&zsg
->zsg_queue_mtx
);
545 zvol_geom_bio_strategy(bp
);
550 zvol_geom_bio_start(struct bio
*bp
)
552 zvol_state_t
*zv
= bp
->bio_to
->private;
553 struct zvol_state_geom
*zsg
;
557 g_io_deliver(bp
, ENXIO
);
560 if (bp
->bio_cmd
== BIO_GETATTR
) {
561 if (zvol_geom_bio_getattr(bp
))
562 g_io_deliver(bp
, EOPNOTSUPP
);
566 if (!THREAD_CAN_SLEEP()) {
567 zsg
= &zv
->zv_zso
->zso_geom
;
568 mtx_lock(&zsg
->zsg_queue_mtx
);
569 first
= (bioq_first(&zsg
->zsg_queue
) == NULL
);
570 bioq_insert_tail(&zsg
->zsg_queue
, bp
);
571 mtx_unlock(&zsg
->zsg_queue_mtx
);
573 wakeup_one(&zsg
->zsg_queue
);
577 zvol_geom_bio_strategy(bp
);
581 zvol_geom_bio_getattr(struct bio
*bp
)
585 zv
= bp
->bio_to
->private;
586 ASSERT3P(zv
, !=, NULL
);
588 spa_t
*spa
= dmu_objset_spa(zv
->zv_objset
);
589 uint64_t refd
, avail
, usedobjs
, availobjs
;
591 if (g_handleattr_int(bp
, "GEOM::candelete", 1))
593 if (strcmp(bp
->bio_attribute
, "blocksavail") == 0) {
594 dmu_objset_space(zv
->zv_objset
, &refd
, &avail
,
595 &usedobjs
, &availobjs
);
596 if (g_handleattr_off_t(bp
, "blocksavail", avail
/ DEV_BSIZE
))
598 } else if (strcmp(bp
->bio_attribute
, "blocksused") == 0) {
599 dmu_objset_space(zv
->zv_objset
, &refd
, &avail
,
600 &usedobjs
, &availobjs
);
601 if (g_handleattr_off_t(bp
, "blocksused", refd
/ DEV_BSIZE
))
603 } else if (strcmp(bp
->bio_attribute
, "poolblocksavail") == 0) {
604 avail
= metaslab_class_get_space(spa_normal_class(spa
));
605 avail
-= metaslab_class_get_alloc(spa_normal_class(spa
));
606 if (g_handleattr_off_t(bp
, "poolblocksavail",
609 } else if (strcmp(bp
->bio_attribute
, "poolblocksused") == 0) {
610 refd
= metaslab_class_get_alloc(spa_normal_class(spa
));
611 if (g_handleattr_off_t(bp
, "poolblocksused", refd
/ DEV_BSIZE
))
618 zvol_filter_detach(struct knote
*kn
)
621 struct zvol_state_dev
*zsd
;
624 zsd
= &zv
->zv_zso
->zso_dev
;
626 knlist_remove(&zsd
->zsd_selinfo
.si_note
, kn
, 0);
630 zvol_filter_vnode(struct knote
*kn
, long hint
)
632 kn
->kn_fflags
|= kn
->kn_sfflags
& hint
;
634 return (kn
->kn_fflags
!= 0);
638 zvol_cdev_kqfilter(struct cdev
*dev
, struct knote
*kn
)
641 struct zvol_state_dev
*zsd
;
644 zsd
= &zv
->zv_zso
->zso_dev
;
646 if (kn
->kn_filter
!= EVFILT_VNODE
)
649 /* XXX: extend support for other NOTE_* events */
650 if (kn
->kn_sfflags
!= NOTE_ATTRIB
)
653 kn
->kn_fop
= &zvol_filterops_vnode
;
655 knlist_add(&zsd
->zsd_selinfo
.si_note
, kn
, 0);
661 zvol_geom_bio_strategy(struct bio
*bp
)
664 uint64_t off
, volsize
;
668 zfs_locked_range_t
*lr
;
670 boolean_t doread
= B_FALSE
;
671 boolean_t is_dumpified
;
675 zv
= bp
->bio_to
->private;
677 zv
= bp
->bio_dev
->si_drv2
;
680 error
= SET_ERROR(ENXIO
);
684 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
686 switch (bp
->bio_cmd
) {
693 if (zv
->zv_flags
& ZVOL_RDONLY
) {
694 error
= SET_ERROR(EROFS
);
697 zvol_ensure_zilog(zv
);
698 if (bp
->bio_cmd
== BIO_FLUSH
)
702 error
= SET_ERROR(EOPNOTSUPP
);
706 off
= bp
->bio_offset
;
707 volsize
= zv
->zv_volsize
;
710 ASSERT3P(os
, !=, NULL
);
713 resid
= bp
->bio_length
;
715 if (resid
> 0 && off
>= volsize
) {
716 error
= SET_ERROR(EIO
);
720 is_dumpified
= B_FALSE
;
721 sync
= !doread
&& !is_dumpified
&&
722 zv
->zv_objset
->os_sync
== ZFS_SYNC_ALWAYS
;
725 * There must be no buffer changes when doing a dmu_sync() because
726 * we can't change the data whilst calculating the checksum.
728 lr
= zfs_rangelock_enter(&zv
->zv_rangelock
, off
, resid
,
729 doread
? RL_READER
: RL_WRITER
);
731 if (bp
->bio_cmd
== BIO_DELETE
) {
732 dmu_tx_t
*tx
= dmu_tx_create(zv
->zv_objset
);
733 error
= dmu_tx_assign(tx
, TXG_WAIT
);
737 zvol_log_truncate(zv
, tx
, off
, resid
, sync
);
739 error
= dmu_free_long_range(zv
->zv_objset
, ZVOL_OBJ
,
745 while (resid
!= 0 && off
< volsize
) {
746 size_t size
= MIN(resid
, zvol_maxphys
);
748 error
= dmu_read(os
, ZVOL_OBJ
, off
, size
, addr
,
751 dmu_tx_t
*tx
= dmu_tx_create(os
);
752 dmu_tx_hold_write_by_dnode(tx
, zv
->zv_dn
, off
, size
);
753 error
= dmu_tx_assign(tx
, TXG_WAIT
);
757 dmu_write(os
, ZVOL_OBJ
, off
, size
, addr
, tx
);
758 zvol_log_write(zv
, tx
, off
, size
, sync
);
763 /* Convert checksum errors into IO errors. */
765 error
= SET_ERROR(EIO
);
773 zfs_rangelock_exit(lr
);
775 bp
->bio_completed
= bp
->bio_length
- resid
;
776 if (bp
->bio_completed
< bp
->bio_length
&& off
> volsize
)
777 error
= SET_ERROR(EINVAL
);
779 switch (bp
->bio_cmd
) {
783 dataset_kstats_update_read_kstats(&zv
->zv_kstat
,
787 dataset_kstats_update_write_kstats(&zv
->zv_kstat
,
798 zil_commit(zv
->zv_zilog
, ZVOL_OBJ
);
801 rw_exit(&zv
->zv_suspend_lock
);
804 g_io_deliver(bp
, error
);
806 biofinish(bp
, NULL
, error
);
810 * Character device mode implementation
814 zvol_cdev_read(struct cdev
*dev
, struct uio
*uio_s
, int ioflag
)
818 zfs_locked_range_t
*lr
;
822 zfs_uio_init(&uio
, uio_s
);
826 volsize
= zv
->zv_volsize
;
828 * uio_loffset == volsize isn't an error as
829 * it's required for EOF processing.
831 if (zfs_uio_resid(&uio
) > 0 &&
832 (zfs_uio_offset(&uio
) < 0 || zfs_uio_offset(&uio
) > volsize
))
833 return (SET_ERROR(EIO
));
835 ssize_t start_resid
= zfs_uio_resid(&uio
);
836 lr
= zfs_rangelock_enter(&zv
->zv_rangelock
, zfs_uio_offset(&uio
),
837 zfs_uio_resid(&uio
), RL_READER
);
838 while (zfs_uio_resid(&uio
) > 0 && zfs_uio_offset(&uio
) < volsize
) {
839 uint64_t bytes
= MIN(zfs_uio_resid(&uio
), DMU_MAX_ACCESS
>> 1);
841 /* Don't read past the end. */
842 if (bytes
> volsize
- zfs_uio_offset(&uio
))
843 bytes
= volsize
- zfs_uio_offset(&uio
);
845 error
= dmu_read_uio_dnode(zv
->zv_dn
, &uio
, bytes
);
847 /* Convert checksum errors into IO errors. */
849 error
= SET_ERROR(EIO
);
853 zfs_rangelock_exit(lr
);
854 int64_t nread
= start_resid
- zfs_uio_resid(&uio
);
855 dataset_kstats_update_read_kstats(&zv
->zv_kstat
, nread
);
861 zvol_cdev_write(struct cdev
*dev
, struct uio
*uio_s
, int ioflag
)
865 zfs_locked_range_t
*lr
;
872 volsize
= zv
->zv_volsize
;
874 zfs_uio_init(&uio
, uio_s
);
876 if (zfs_uio_resid(&uio
) > 0 &&
877 (zfs_uio_offset(&uio
) < 0 || zfs_uio_offset(&uio
) > volsize
))
878 return (SET_ERROR(EIO
));
880 ssize_t start_resid
= zfs_uio_resid(&uio
);
881 sync
= (ioflag
& IO_SYNC
) ||
882 (zv
->zv_objset
->os_sync
== ZFS_SYNC_ALWAYS
);
884 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
885 zvol_ensure_zilog(zv
);
887 lr
= zfs_rangelock_enter(&zv
->zv_rangelock
, zfs_uio_offset(&uio
),
888 zfs_uio_resid(&uio
), RL_WRITER
);
889 while (zfs_uio_resid(&uio
) > 0 && zfs_uio_offset(&uio
) < volsize
) {
890 uint64_t bytes
= MIN(zfs_uio_resid(&uio
), DMU_MAX_ACCESS
>> 1);
891 uint64_t off
= zfs_uio_offset(&uio
);
892 dmu_tx_t
*tx
= dmu_tx_create(zv
->zv_objset
);
894 if (bytes
> volsize
- off
) /* Don't write past the end. */
895 bytes
= volsize
- off
;
897 dmu_tx_hold_write_by_dnode(tx
, zv
->zv_dn
, off
, bytes
);
898 error
= dmu_tx_assign(tx
, TXG_WAIT
);
903 error
= dmu_write_uio_dnode(zv
->zv_dn
, &uio
, bytes
, tx
);
905 zvol_log_write(zv
, tx
, off
, bytes
, sync
);
911 zfs_rangelock_exit(lr
);
912 int64_t nwritten
= start_resid
- zfs_uio_resid(&uio
);
913 dataset_kstats_update_write_kstats(&zv
->zv_kstat
, nwritten
);
915 zil_commit(zv
->zv_zilog
, ZVOL_OBJ
);
916 rw_exit(&zv
->zv_suspend_lock
);
921 zvol_cdev_open(struct cdev
*dev
, int flags
, int fmt
, struct thread
*td
)
924 struct zvol_state_dev
*zsd
;
926 boolean_t drop_suspend
= B_FALSE
;
929 rw_enter(&zvol_state_lock
, ZVOL_RW_READER
);
931 * Obtain a copy of si_drv2 under zvol_state_lock to make sure either
932 * the result of zvol free code setting si_drv2 to NULL is observed,
933 * or the zv is protected from being freed because of the positive
938 rw_exit(&zvol_state_lock
);
939 err
= SET_ERROR(ENXIO
);
943 mutex_enter(&zv
->zv_state_lock
);
944 if (zv
->zv_zso
->zso_dying
) {
945 rw_exit(&zvol_state_lock
);
946 err
= SET_ERROR(ENXIO
);
949 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_DEV
);
952 * Make sure zvol is not suspended during first open
953 * (hold zv_suspend_lock) and respect proper lock acquisition
954 * ordering - zv_suspend_lock before zv_state_lock.
956 if (zv
->zv_open_count
== 0) {
957 drop_suspend
= B_TRUE
;
958 if (!rw_tryenter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
)) {
959 mutex_exit(&zv
->zv_state_lock
);
960 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
961 mutex_enter(&zv
->zv_state_lock
);
962 /* Check to see if zv_suspend_lock is needed. */
963 if (zv
->zv_open_count
!= 0) {
964 rw_exit(&zv
->zv_suspend_lock
);
965 drop_suspend
= B_FALSE
;
969 rw_exit(&zvol_state_lock
);
971 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
973 if (zv
->zv_open_count
== 0) {
974 boolean_t drop_namespace
= B_FALSE
;
976 ASSERT(ZVOL_RW_READ_HELD(&zv
->zv_suspend_lock
));
979 * Take spa_namespace_lock to prevent lock inversion when
980 * zvols from one pool are opened as vdevs in another.
982 if (!mutex_owned(&spa_namespace_lock
)) {
983 if (!mutex_tryenter(&spa_namespace_lock
)) {
984 mutex_exit(&zv
->zv_state_lock
);
985 rw_exit(&zv
->zv_suspend_lock
);
986 kern_yield(PRI_USER
);
989 drop_namespace
= B_TRUE
;
992 err
= zvol_first_open(zv
, !(flags
& FWRITE
));
994 mutex_exit(&spa_namespace_lock
);
999 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
1001 if ((flags
& FWRITE
) && (zv
->zv_flags
& ZVOL_RDONLY
)) {
1002 err
= SET_ERROR(EROFS
);
1005 if (zv
->zv_flags
& ZVOL_EXCL
) {
1006 err
= SET_ERROR(EBUSY
);
1009 if (flags
& O_EXCL
) {
1010 if (zv
->zv_open_count
!= 0) {
1011 err
= SET_ERROR(EBUSY
);
1014 zv
->zv_flags
|= ZVOL_EXCL
;
1017 zv
->zv_open_count
++;
1018 if (flags
& O_SYNC
) {
1019 zsd
= &zv
->zv_zso
->zso_dev
;
1020 zsd
->zsd_sync_cnt
++;
1021 if (zsd
->zsd_sync_cnt
== 1 &&
1022 (zv
->zv_flags
& ZVOL_WRITTEN_TO
) != 0)
1023 zil_async_to_sync(zv
->zv_zilog
, ZVOL_OBJ
);
1026 if (zv
->zv_open_count
== 0) {
1027 zvol_last_close(zv
);
1031 mutex_exit(&zv
->zv_state_lock
);
1034 rw_exit(&zv
->zv_suspend_lock
);
1039 zvol_cdev_close(struct cdev
*dev
, int flags
, int fmt
, struct thread
*td
)
1042 struct zvol_state_dev
*zsd
;
1043 boolean_t drop_suspend
= B_TRUE
;
1045 rw_enter(&zvol_state_lock
, ZVOL_RW_READER
);
1048 rw_exit(&zvol_state_lock
);
1049 return (SET_ERROR(ENXIO
));
1052 mutex_enter(&zv
->zv_state_lock
);
1053 if (zv
->zv_flags
& ZVOL_EXCL
) {
1054 ASSERT3U(zv
->zv_open_count
, ==, 1);
1055 zv
->zv_flags
&= ~ZVOL_EXCL
;
1058 ASSERT3S(zv
->zv_volmode
, ==, ZFS_VOLMODE_DEV
);
1061 * If the open count is zero, this is a spurious close.
1062 * That indicates a bug in the kernel / DDI framework.
1064 ASSERT3U(zv
->zv_open_count
, >, 0);
1066 * Make sure zvol is not suspended during last close
1067 * (hold zv_suspend_lock) and respect proper lock acquisition
1068 * ordering - zv_suspend_lock before zv_state_lock.
1070 if (zv
->zv_open_count
== 1) {
1071 if (!rw_tryenter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
)) {
1072 mutex_exit(&zv
->zv_state_lock
);
1073 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
1074 mutex_enter(&zv
->zv_state_lock
);
1075 /* Check to see if zv_suspend_lock is needed. */
1076 if (zv
->zv_open_count
!= 1) {
1077 rw_exit(&zv
->zv_suspend_lock
);
1078 drop_suspend
= B_FALSE
;
1082 drop_suspend
= B_FALSE
;
1084 rw_exit(&zvol_state_lock
);
1086 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
1089 * You may get multiple opens, but only one close.
1091 zv
->zv_open_count
--;
1092 if (flags
& O_SYNC
) {
1093 zsd
= &zv
->zv_zso
->zso_dev
;
1094 zsd
->zsd_sync_cnt
--;
1097 if (zv
->zv_open_count
== 0) {
1098 ASSERT(ZVOL_RW_READ_HELD(&zv
->zv_suspend_lock
));
1099 zvol_last_close(zv
);
1103 mutex_exit(&zv
->zv_state_lock
);
1106 rw_exit(&zv
->zv_suspend_lock
);
1111 zvol_cdev_ioctl(struct cdev
*dev
, ulong_t cmd
, caddr_t data
,
1112 int fflag
, struct thread
*td
)
1115 zfs_locked_range_t
*lr
;
1116 off_t offset
, length
;
1123 KASSERT(zv
->zv_open_count
> 0,
1124 ("Device with zero access count in %s", __func__
));
1127 case DIOCGSECTORSIZE
:
1128 *(uint32_t *)data
= DEV_BSIZE
;
1130 case DIOCGMEDIASIZE
:
1131 *(off_t
*)data
= zv
->zv_volsize
;
1134 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
1135 if (zv
->zv_zilog
!= NULL
)
1136 zil_commit(zv
->zv_zilog
, ZVOL_OBJ
);
1137 rw_exit(&zv
->zv_suspend_lock
);
1140 if (!zvol_unmap_enabled
)
1143 offset
= ((off_t
*)data
)[0];
1144 length
= ((off_t
*)data
)[1];
1145 if ((offset
% DEV_BSIZE
) != 0 || (length
% DEV_BSIZE
) != 0 ||
1146 offset
< 0 || offset
>= zv
->zv_volsize
||
1148 printf("%s: offset=%jd length=%jd\n", __func__
, offset
,
1150 error
= SET_ERROR(EINVAL
);
1153 rw_enter(&zv
->zv_suspend_lock
, ZVOL_RW_READER
);
1154 zvol_ensure_zilog(zv
);
1155 lr
= zfs_rangelock_enter(&zv
->zv_rangelock
, offset
, length
,
1157 dmu_tx_t
*tx
= dmu_tx_create(zv
->zv_objset
);
1158 error
= dmu_tx_assign(tx
, TXG_WAIT
);
1163 sync
= (zv
->zv_objset
->os_sync
== ZFS_SYNC_ALWAYS
);
1164 zvol_log_truncate(zv
, tx
, offset
, length
, sync
);
1166 error
= dmu_free_long_range(zv
->zv_objset
, ZVOL_OBJ
,
1169 zfs_rangelock_exit(lr
);
1171 zil_commit(zv
->zv_zilog
, ZVOL_OBJ
);
1172 rw_exit(&zv
->zv_suspend_lock
);
1174 case DIOCGSTRIPESIZE
:
1175 *(off_t
*)data
= zv
->zv_volblocksize
;
1177 case DIOCGSTRIPEOFFSET
:
1181 spa_t
*spa
= dmu_objset_spa(zv
->zv_objset
);
1182 struct diocgattr_arg
*arg
= (struct diocgattr_arg
*)data
;
1183 uint64_t refd
, avail
, usedobjs
, availobjs
;
1185 if (strcmp(arg
->name
, "GEOM::candelete") == 0)
1187 else if (strcmp(arg
->name
, "blocksavail") == 0) {
1188 dmu_objset_space(zv
->zv_objset
, &refd
, &avail
,
1189 &usedobjs
, &availobjs
);
1190 arg
->value
.off
= avail
/ DEV_BSIZE
;
1191 } else if (strcmp(arg
->name
, "blocksused") == 0) {
1192 dmu_objset_space(zv
->zv_objset
, &refd
, &avail
,
1193 &usedobjs
, &availobjs
);
1194 arg
->value
.off
= refd
/ DEV_BSIZE
;
1195 } else if (strcmp(arg
->name
, "poolblocksavail") == 0) {
1196 avail
= metaslab_class_get_space(spa_normal_class(spa
));
1197 avail
-= metaslab_class_get_alloc(
1198 spa_normal_class(spa
));
1199 arg
->value
.off
= avail
/ DEV_BSIZE
;
1200 } else if (strcmp(arg
->name
, "poolblocksused") == 0) {
1201 refd
= metaslab_class_get_alloc(spa_normal_class(spa
));
1202 arg
->value
.off
= refd
/ DEV_BSIZE
;
1204 error
= SET_ERROR(ENOIOCTL
);
1209 off_t
*off
= (off_t
*)data
;
1213 hole
= (cmd
== FIOSEEKHOLE
);
1215 error
= dmu_offset_next(zv
->zv_objset
, ZVOL_OBJ
, hole
, &noff
);
1220 error
= SET_ERROR(ENOIOCTL
);
1231 zvol_ensure_zilog(zvol_state_t
*zv
)
1233 ASSERT(ZVOL_RW_READ_HELD(&zv
->zv_suspend_lock
));
1236 * Open a ZIL if this is the first time we have written to this
1237 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
1238 * than zv_state_lock so that we don't need to acquire an
1239 * additional lock in this path.
1241 if (zv
->zv_zilog
== NULL
) {
1242 if (!rw_tryupgrade(&zv
->zv_suspend_lock
)) {
1243 rw_exit(&zv
->zv_suspend_lock
);
1244 rw_enter(&zv
->zv_suspend_lock
, RW_WRITER
);
1246 if (zv
->zv_zilog
== NULL
) {
1247 zv
->zv_zilog
= zil_open(zv
->zv_objset
,
1248 zvol_get_data
, &zv
->zv_kstat
.dk_zil_sums
);
1249 zv
->zv_flags
|= ZVOL_WRITTEN_TO
;
1250 /* replay / destroy done in zvol_os_create_minor() */
1251 VERIFY0(zv
->zv_zilog
->zl_header
->zh_flags
&
1254 rw_downgrade(&zv
->zv_suspend_lock
);
1259 zvol_os_is_zvol(const char *device
)
1261 return (device
&& strncmp(device
, ZVOL_DIR
, strlen(ZVOL_DIR
)) == 0);
1265 zvol_os_rename_minor(zvol_state_t
*zv
, const char *newname
)
1267 ASSERT(RW_LOCK_HELD(&zvol_state_lock
));
1268 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
1270 /* Move to a new hashtable entry. */
1271 zv
->zv_hash
= zvol_name_hash(zv
->zv_name
);
1272 hlist_del(&zv
->zv_hlink
);
1273 hlist_add_head(&zv
->zv_hlink
, ZVOL_HT_HEAD(zv
->zv_hash
));
1275 if (zv
->zv_volmode
== ZFS_VOLMODE_GEOM
) {
1276 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
1277 struct g_provider
*pp
= zsg
->zsg_provider
;
1282 ASSERT3P(gp
, !=, NULL
);
1284 zsg
->zsg_provider
= NULL
;
1285 g_wither_provider(pp
, ENXIO
);
1287 pp
= g_new_providerf(gp
, "%s/%s", ZVOL_DRIVER
, newname
);
1288 pp
->flags
|= G_PF_DIRECT_RECEIVE
| G_PF_DIRECT_SEND
;
1289 pp
->sectorsize
= DEV_BSIZE
;
1290 pp
->mediasize
= zv
->zv_volsize
;
1292 zsg
->zsg_provider
= pp
;
1293 g_error_provider(pp
, 0);
1294 g_topology_unlock();
1295 } else if (zv
->zv_volmode
== ZFS_VOLMODE_DEV
) {
1296 struct zvol_state_dev
*zsd
= &zv
->zv_zso
->zso_dev
;
1298 struct make_dev_args args
;
1300 dev
= zsd
->zsd_cdev
;
1303 dev
= zsd
->zsd_cdev
= NULL
;
1304 if (zv
->zv_open_count
> 0) {
1305 zv
->zv_flags
&= ~ZVOL_EXCL
;
1306 zv
->zv_open_count
= 0;
1307 /* XXX need suspend lock but lock order */
1308 zvol_last_close(zv
);
1312 make_dev_args_init(&args
);
1313 args
.mda_flags
= MAKEDEV_CHECKNAME
| MAKEDEV_WAITOK
;
1314 args
.mda_devsw
= &zvol_cdevsw
;
1316 args
.mda_uid
= UID_ROOT
;
1317 args
.mda_gid
= GID_OPERATOR
;
1318 args
.mda_mode
= 0640;
1319 args
.mda_si_drv2
= zv
;
1320 if (make_dev_s(&args
, &dev
, "%s/%s", ZVOL_DRIVER
, newname
)
1322 #if __FreeBSD_version > 1300130
1323 dev
->si_iosize_max
= maxphys
;
1325 dev
->si_iosize_max
= MAXPHYS
;
1327 zsd
->zsd_cdev
= dev
;
1330 strlcpy(zv
->zv_name
, newname
, sizeof (zv
->zv_name
));
1334 * Remove minor node for the specified volume.
1337 zvol_os_free(zvol_state_t
*zv
)
1339 ASSERT(!RW_LOCK_HELD(&zv
->zv_suspend_lock
));
1340 ASSERT(!MUTEX_HELD(&zv
->zv_state_lock
));
1341 ASSERT0(zv
->zv_open_count
);
1343 ZFS_LOG(1, "ZVOL %s destroyed.", zv
->zv_name
);
1345 rw_destroy(&zv
->zv_suspend_lock
);
1346 zfs_rangelock_fini(&zv
->zv_rangelock
);
1348 if (zv
->zv_volmode
== ZFS_VOLMODE_GEOM
) {
1349 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
1350 struct g_provider
*pp __maybe_unused
= zsg
->zsg_provider
;
1352 ASSERT3P(pp
->private, ==, NULL
);
1355 zvol_geom_destroy(zv
);
1356 g_topology_unlock();
1357 mtx_destroy(&zsg
->zsg_queue_mtx
);
1358 } else if (zv
->zv_volmode
== ZFS_VOLMODE_DEV
) {
1359 struct zvol_state_dev
*zsd
= &zv
->zv_zso
->zso_dev
;
1360 struct cdev
*dev
= zsd
->zsd_cdev
;
1363 ASSERT3P(dev
->si_drv2
, ==, NULL
);
1365 knlist_clear(&zsd
->zsd_selinfo
.si_note
, 0);
1366 knlist_destroy(&zsd
->zsd_selinfo
.si_note
);
1370 mutex_destroy(&zv
->zv_state_lock
);
1371 dataset_kstats_destroy(&zv
->zv_kstat
);
1372 kmem_free(zv
->zv_zso
, sizeof (struct zvol_state_os
));
1373 kmem_free(zv
, sizeof (zvol_state_t
));
1378 * Create a minor node (plus a whole lot more) for the specified volume.
1381 zvol_os_create_minor(const char *name
)
1385 dmu_object_info_t
*doi
;
1387 uint64_t volmode
, hash
;
1390 ZFS_LOG(1, "Creating ZVOL %s...", name
);
1391 hash
= zvol_name_hash(name
);
1392 if ((zv
= zvol_find_by_name_hash(name
, hash
, RW_NONE
)) != NULL
) {
1393 ASSERT(MUTEX_HELD(&zv
->zv_state_lock
));
1394 mutex_exit(&zv
->zv_state_lock
);
1395 return (SET_ERROR(EEXIST
));
1400 doi
= kmem_alloc(sizeof (dmu_object_info_t
), KM_SLEEP
);
1402 /* Lie and say we're read-only. */
1403 error
= dmu_objset_own(name
, DMU_OST_ZVOL
, B_TRUE
, B_TRUE
, FTAG
, &os
);
1407 error
= dmu_object_info(os
, ZVOL_OBJ
, doi
);
1409 goto out_dmu_objset_disown
;
1411 error
= zap_lookup(os
, ZVOL_ZAP_OBJ
, "size", 8, 1, &volsize
);
1413 goto out_dmu_objset_disown
;
1415 error
= dsl_prop_get_integer(name
,
1416 zfs_prop_to_name(ZFS_PROP_VOLMODE
), &volmode
, NULL
);
1417 if (error
|| volmode
== ZFS_VOLMODE_DEFAULT
)
1418 volmode
= zvol_volmode
;
1422 * zvol_alloc equivalent ...
1424 zv
= kmem_zalloc(sizeof (*zv
), KM_SLEEP
);
1426 mutex_init(&zv
->zv_state_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1427 zv
->zv_zso
= kmem_zalloc(sizeof (struct zvol_state_os
), KM_SLEEP
);
1428 zv
->zv_volmode
= volmode
;
1429 if (zv
->zv_volmode
== ZFS_VOLMODE_GEOM
) {
1430 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
1431 struct g_provider
*pp
;
1434 zsg
->zsg_state
= ZVOL_GEOM_UNINIT
;
1435 mtx_init(&zsg
->zsg_queue_mtx
, "zvol", NULL
, MTX_DEF
);
1438 gp
= g_new_geomf(&zfs_zvol_class
, "zfs::zvol::%s", name
);
1439 gp
->start
= zvol_geom_bio_start
;
1440 gp
->access
= zvol_geom_access
;
1441 pp
= g_new_providerf(gp
, "%s/%s", ZVOL_DRIVER
, name
);
1442 pp
->flags
|= G_PF_DIRECT_RECEIVE
| G_PF_DIRECT_SEND
;
1443 pp
->sectorsize
= DEV_BSIZE
;
1447 zsg
->zsg_provider
= pp
;
1448 bioq_init(&zsg
->zsg_queue
);
1449 } else if (zv
->zv_volmode
== ZFS_VOLMODE_DEV
) {
1450 struct zvol_state_dev
*zsd
= &zv
->zv_zso
->zso_dev
;
1452 struct make_dev_args args
;
1454 make_dev_args_init(&args
);
1455 args
.mda_flags
= MAKEDEV_CHECKNAME
| MAKEDEV_WAITOK
;
1456 args
.mda_devsw
= &zvol_cdevsw
;
1458 args
.mda_uid
= UID_ROOT
;
1459 args
.mda_gid
= GID_OPERATOR
;
1460 args
.mda_mode
= 0640;
1461 args
.mda_si_drv2
= zv
;
1462 if (make_dev_s(&args
, &dev
, "%s/%s", ZVOL_DRIVER
, name
)
1464 #if __FreeBSD_version > 1300130
1465 dev
->si_iosize_max
= maxphys
;
1467 dev
->si_iosize_max
= MAXPHYS
;
1469 zsd
->zsd_cdev
= dev
;
1470 knlist_init_sx(&zsd
->zsd_selinfo
.si_note
,
1471 &zv
->zv_state_lock
);
1474 (void) strlcpy(zv
->zv_name
, name
, MAXPATHLEN
);
1475 rw_init(&zv
->zv_suspend_lock
, NULL
, RW_DEFAULT
, NULL
);
1476 zfs_rangelock_init(&zv
->zv_rangelock
, NULL
, NULL
);
1478 if (dmu_objset_is_snapshot(os
) || !spa_writeable(dmu_objset_spa(os
)))
1479 zv
->zv_flags
|= ZVOL_RDONLY
;
1481 zv
->zv_volblocksize
= doi
->doi_data_block_size
;
1482 zv
->zv_volsize
= volsize
;
1485 ASSERT3P(zv
->zv_kstat
.dk_kstats
, ==, NULL
);
1486 error
= dataset_kstats_create(&zv
->zv_kstat
, zv
->zv_objset
);
1488 goto out_dmu_objset_disown
;
1489 ASSERT3P(zv
->zv_zilog
, ==, NULL
);
1490 zv
->zv_zilog
= zil_open(os
, zvol_get_data
, &zv
->zv_kstat
.dk_zil_sums
);
1491 if (spa_writeable(dmu_objset_spa(os
))) {
1492 if (zil_replay_disable
)
1493 zil_destroy(zv
->zv_zilog
, B_FALSE
);
1495 zil_replay(os
, zv
, zvol_replay_vector
);
1497 zil_close(zv
->zv_zilog
);
1498 zv
->zv_zilog
= NULL
;
1500 /* TODO: prefetch for geom tasting */
1502 zv
->zv_objset
= NULL
;
1503 out_dmu_objset_disown
:
1504 dmu_objset_disown(os
, B_TRUE
, FTAG
);
1506 if (error
== 0 && volmode
== ZFS_VOLMODE_GEOM
) {
1508 g_topology_unlock();
1511 kmem_free(doi
, sizeof (dmu_object_info_t
));
1513 rw_enter(&zvol_state_lock
, RW_WRITER
);
1516 rw_exit(&zvol_state_lock
);
1517 ZFS_LOG(1, "ZVOL %s created.", name
);
1524 zvol_os_clear_private(zvol_state_t
*zv
)
1526 ASSERT(RW_LOCK_HELD(&zvol_state_lock
));
1527 if (zv
->zv_volmode
== ZFS_VOLMODE_GEOM
) {
1528 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
1529 struct g_provider
*pp
= zsg
->zsg_provider
;
1531 if (pp
->private == NULL
) /* already cleared */
1534 mtx_lock(&zsg
->zsg_queue_mtx
);
1535 zsg
->zsg_state
= ZVOL_GEOM_STOPPED
;
1537 wakeup_one(&zsg
->zsg_queue
);
1538 while (zsg
->zsg_state
!= ZVOL_GEOM_RUNNING
)
1539 msleep(&zsg
->zsg_state
, &zsg
->zsg_queue_mtx
,
1541 mtx_unlock(&zsg
->zsg_queue_mtx
);
1542 ASSERT(!RW_LOCK_HELD(&zv
->zv_suspend_lock
));
1543 } else if (zv
->zv_volmode
== ZFS_VOLMODE_DEV
) {
1544 struct zvol_state_dev
*zsd
= &zv
->zv_zso
->zso_dev
;
1545 struct cdev
*dev
= zsd
->zsd_cdev
;
1548 dev
->si_drv2
= NULL
;
1553 zvol_os_update_volsize(zvol_state_t
*zv
, uint64_t volsize
)
1555 zv
->zv_volsize
= volsize
;
1556 if (zv
->zv_volmode
== ZFS_VOLMODE_GEOM
) {
1557 struct zvol_state_geom
*zsg
= &zv
->zv_zso
->zso_geom
;
1558 struct g_provider
*pp
= zsg
->zsg_provider
;
1562 if (pp
->private == NULL
) {
1563 g_topology_unlock();
1564 return (SET_ERROR(ENXIO
));
1568 * Do not invoke resize event when initial size was zero.
1569 * ZVOL initializes the size on first open, this is not
1572 if (pp
->mediasize
== 0)
1573 pp
->mediasize
= zv
->zv_volsize
;
1575 g_resize_provider(pp
, zv
->zv_volsize
);
1577 g_topology_unlock();
1578 } else if (zv
->zv_volmode
== ZFS_VOLMODE_DEV
) {
1579 struct zvol_state_dev
*zsd
= &zv
->zv_zso
->zso_dev
;
1581 KNOTE_UNLOCKED(&zsd
->zsd_selinfo
.si_note
, NOTE_ATTRIB
);
1587 zvol_os_set_disk_ro(zvol_state_t
*zv
, int flags
)
1589 // XXX? set_disk_ro(zv->zv_zso->zvo_disk, flags);
1593 zvol_os_set_capacity(zvol_state_t
*zv
, uint64_t capacity
)
1595 // XXX? set_capacity(zv->zv_zso->zvo_disk, capacity);
1605 return (zvol_minors
!= 0);