4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
35 #include <sys/spa_impl.h>
36 #include <sys/vdev_impl.h>
37 #include <sys/vdev_os.h>
38 #include <sys/fs/zfs.h>
40 #include <vm/vm_page.h>
41 #include <geom/geom.h>
42 #include <geom/geom_disk.h>
43 #include <geom/geom_int.h>
45 #ifndef g_topology_locked
46 #define g_topology_locked() sx_xlocked(&topology_lock)
50 * Virtual device vector for GEOM.
53 static g_attrchanged_t vdev_geom_attrchanged
;
54 struct g_class zfs_vdev_class
= {
57 .attrchanged
= vdev_geom_attrchanged
,
60 struct consumer_vdev_elem
{
61 SLIST_ENTRY(consumer_vdev_elem
) elems
;
65 SLIST_HEAD(consumer_priv_t
, consumer_vdev_elem
);
67 sizeof (((struct g_consumer
*)NULL
)->private) ==
68 sizeof (struct consumer_priv_t
*),
69 "consumer_priv_t* can't be stored in g_consumer.private");
71 DECLARE_GEOM_CLASS(zfs_vdev_class
, zfs_vdev
);
73 SYSCTL_DECL(_vfs_zfs_vdev
);
74 /* Don't send BIO_FLUSH. */
75 static int vdev_geom_bio_flush_disable
;
76 SYSCTL_INT(_vfs_zfs_vdev
, OID_AUTO
, bio_flush_disable
, CTLFLAG_RWTUN
,
77 &vdev_geom_bio_flush_disable
, 0, "Disable BIO_FLUSH");
78 /* Don't send BIO_DELETE. */
79 static int vdev_geom_bio_delete_disable
;
80 SYSCTL_INT(_vfs_zfs_vdev
, OID_AUTO
, bio_delete_disable
, CTLFLAG_RWTUN
,
81 &vdev_geom_bio_delete_disable
, 0, "Disable BIO_DELETE");
83 /* Declare local functions */
84 static void vdev_geom_detach(struct g_consumer
*cp
, boolean_t open_for_read
);
87 * Thread local storage used to indicate when a thread is probing geoms
88 * for their guids. If NULL, this thread is not tasting geoms. If non NULL,
89 * it is looking for a replacement for the vdev_t* that is its value.
91 uint_t zfs_geom_probe_vdev_key
;
94 vdev_geom_set_physpath(vdev_t
*vd
, struct g_consumer
*cp
,
95 boolean_t do_null_update
)
97 boolean_t needs_update
= B_FALSE
;
99 int error
, physpath_len
;
101 physpath_len
= MAXPATHLEN
;
102 physpath
= g_malloc(physpath_len
, M_WAITOK
|M_ZERO
);
103 error
= g_io_getattr("GEOM::physpath", cp
, &physpath_len
, physpath
);
107 /* g_topology lock ensures that vdev has not been closed */
109 old_physpath
= vd
->vdev_physpath
;
110 vd
->vdev_physpath
= spa_strdup(physpath
);
112 if (old_physpath
!= NULL
) {
113 needs_update
= (strcmp(old_physpath
,
114 vd
->vdev_physpath
) != 0);
115 spa_strfree(old_physpath
);
117 needs_update
= do_null_update
;
122 * If the physical path changed, update the config.
123 * Only request an update for previously unset physpaths if
124 * requested by the caller.
127 spa_async_request(vd
->vdev_spa
, SPA_ASYNC_CONFIG_UPDATE
);
132 vdev_geom_attrchanged(struct g_consumer
*cp
, const char *attr
)
134 struct consumer_priv_t
*priv
;
135 struct consumer_vdev_elem
*elem
;
137 priv
= (struct consumer_priv_t
*)&cp
->private;
138 if (SLIST_EMPTY(priv
))
141 SLIST_FOREACH(elem
, priv
, elems
) {
142 vdev_t
*vd
= elem
->vd
;
143 if (strcmp(attr
, "GEOM::physpath") == 0) {
144 vdev_geom_set_physpath(vd
, cp
, /* null_update */B_TRUE
);
151 vdev_geom_resize(struct g_consumer
*cp
)
153 struct consumer_priv_t
*priv
;
154 struct consumer_vdev_elem
*elem
;
158 priv
= (struct consumer_priv_t
*)&cp
->private;
159 if (SLIST_EMPTY(priv
))
162 SLIST_FOREACH(elem
, priv
, elems
) {
164 if (vd
->vdev_state
!= VDEV_STATE_HEALTHY
)
167 if (!spa
->spa_autoexpand
)
169 vdev_online(spa
, vd
->vdev_guid
, ZFS_ONLINE_EXPAND
, NULL
);
174 vdev_geom_orphan(struct g_consumer
*cp
)
176 struct consumer_priv_t
*priv
;
177 // cppcheck-suppress uninitvar
178 struct consumer_vdev_elem
*elem
;
182 priv
= (struct consumer_priv_t
*)&cp
->private;
183 if (SLIST_EMPTY(priv
))
184 /* Vdev close in progress. Ignore the event. */
188 * Orphan callbacks occur from the GEOM event thread.
189 * Concurrent with this call, new I/O requests may be
190 * working their way through GEOM about to find out
191 * (only once executed by the g_down thread) that we've
192 * been orphaned from our disk provider. These I/Os
193 * must be retired before we can detach our consumer.
194 * This is most easily achieved by acquiring the
195 * SPA ZIO configuration lock as a writer, but doing
196 * so with the GEOM topology lock held would cause
197 * a lock order reversal. Instead, rely on the SPA's
198 * async removal support to invoke a close on this
199 * vdev once it is safe to do so.
201 SLIST_FOREACH(elem
, priv
, elems
) {
202 // cppcheck-suppress uninitvar
203 vdev_t
*vd
= elem
->vd
;
205 vd
->vdev_remove_wanted
= B_TRUE
;
206 spa_async_request(vd
->vdev_spa
, SPA_ASYNC_REMOVE
);
210 static struct g_consumer
*
211 vdev_geom_attach(struct g_provider
*pp
, vdev_t
*vd
, boolean_t sanity
)
214 struct g_consumer
*cp
;
219 ZFS_LOG(1, "Attaching to %s.", pp
->name
);
222 if (pp
->sectorsize
> VDEV_PAD_SIZE
|| !ISP2(pp
->sectorsize
)) {
223 ZFS_LOG(1, "Failing attach of %s. "
224 "Incompatible sectorsize %d\n",
225 pp
->name
, pp
->sectorsize
);
227 } else if (pp
->mediasize
< SPA_MINDEVSIZE
) {
228 ZFS_LOG(1, "Failing attach of %s. "
229 "Incompatible mediasize %ju\n",
230 pp
->name
, pp
->mediasize
);
235 /* Do we have geom already? No? Create one. */
236 LIST_FOREACH(gp
, &zfs_vdev_class
.geom
, geom
) {
237 if (gp
->flags
& G_GEOM_WITHER
)
239 if (strcmp(gp
->name
, "zfs::vdev") != 0)
244 gp
= g_new_geomf(&zfs_vdev_class
, "zfs::vdev");
245 gp
->orphan
= vdev_geom_orphan
;
246 gp
->attrchanged
= vdev_geom_attrchanged
;
247 gp
->resize
= vdev_geom_resize
;
248 cp
= g_new_consumer(gp
);
249 error
= g_attach(cp
, pp
);
251 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__
,
253 vdev_geom_detach(cp
, B_FALSE
);
256 error
= g_access(cp
, 1, 0, 1);
258 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__
,
260 vdev_geom_detach(cp
, B_FALSE
);
263 ZFS_LOG(1, "Created geom and consumer for %s.", pp
->name
);
265 /* Check if we are already connected to this provider. */
266 LIST_FOREACH(cp
, &gp
->consumer
, consumer
) {
267 if (cp
->provider
== pp
) {
268 ZFS_LOG(1, "Found consumer for %s.", pp
->name
);
273 cp
= g_new_consumer(gp
);
274 error
= g_attach(cp
, pp
);
276 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
277 __func__
, __LINE__
, error
);
278 vdev_geom_detach(cp
, B_FALSE
);
281 error
= g_access(cp
, 1, 0, 1);
283 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
284 __func__
, __LINE__
, error
);
285 vdev_geom_detach(cp
, B_FALSE
);
288 ZFS_LOG(1, "Created consumer for %s.", pp
->name
);
290 error
= g_access(cp
, 1, 0, 1);
292 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
293 __func__
, __LINE__
, error
);
296 ZFS_LOG(1, "Used existing consumer for %s.", pp
->name
);
303 cp
->flags
|= G_CF_DIRECT_SEND
| G_CF_DIRECT_RECEIVE
;
308 vdev_geom_detach(struct g_consumer
*cp
, boolean_t open_for_read
)
314 ZFS_LOG(1, "Detaching from %s.",
315 cp
->provider
&& cp
->provider
->name
? cp
->provider
->name
: "NULL");
319 g_access(cp
, -1, 0, -1);
320 /* Destroy consumer on last close. */
321 if (cp
->acr
== 0 && cp
->ace
== 0) {
323 g_access(cp
, 0, -cp
->acw
, 0);
324 if (cp
->provider
!= NULL
) {
325 ZFS_LOG(1, "Destroying consumer for %s.",
326 cp
->provider
->name
? cp
->provider
->name
: "NULL");
329 g_destroy_consumer(cp
);
331 /* Destroy geom if there are no consumers left. */
332 if (LIST_EMPTY(&gp
->consumer
)) {
333 ZFS_LOG(1, "Destroyed geom %s.", gp
->name
);
334 g_wither_geom(gp
, ENXIO
);
339 vdev_geom_close_locked(vdev_t
*vd
)
341 struct g_consumer
*cp
;
342 struct consumer_priv_t
*priv
;
343 struct consumer_vdev_elem
*elem
, *elem_temp
;
348 vd
->vdev_delayed_close
= B_FALSE
;
352 ZFS_LOG(1, "Closing access to %s.", cp
->provider
->name
);
353 KASSERT(cp
->private != NULL
, ("%s: cp->private is NULL", __func__
));
354 priv
= (struct consumer_priv_t
*)&cp
->private;
356 SLIST_FOREACH_SAFE(elem
, priv
, elems
, elem_temp
) {
357 if (elem
->vd
== vd
) {
358 SLIST_REMOVE(priv
, elem
, consumer_vdev_elem
, elems
);
363 vdev_geom_detach(cp
, B_TRUE
);
367 * Issue one or more bios to the vdev in parallel
368 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO
369 * operation is described by parallel entries from each array. There may be
370 * more bios actually issued than entries in the array
373 vdev_geom_io(struct g_consumer
*cp
, int *cmds
, void **datas
, off_t
*offsets
,
374 off_t
*sizes
, int *errors
, int ncmds
)
378 off_t off
, maxio
, s
, end
;
382 maxio
= maxphys
- (maxphys
% cp
->provider
->sectorsize
);
385 /* How many bios are required for all commands ? */
386 for (i
= 0; i
< ncmds
; i
++)
387 n_bios
+= (sizes
[i
] + maxio
- 1) / maxio
;
389 /* Allocate memory for the bios */
390 bios_size
= n_bios
* sizeof (struct bio
*);
391 bios
= kmem_zalloc(bios_size
, KM_SLEEP
);
393 /* Prepare and issue all of the bios */
394 for (i
= j
= 0; i
< ncmds
; i
++) {
399 ASSERT0(off
% cp
->provider
->sectorsize
);
400 ASSERT0(s
% cp
->provider
->sectorsize
);
402 for (; off
< end
; off
+= maxio
, p
+= maxio
, s
-= maxio
, j
++) {
403 bios
[j
] = g_alloc_bio();
404 bios
[j
]->bio_cmd
= cmds
[i
];
405 bios
[j
]->bio_done
= NULL
;
406 bios
[j
]->bio_offset
= off
;
407 bios
[j
]->bio_length
= MIN(s
, maxio
);
408 bios
[j
]->bio_data
= (caddr_t
)p
;
409 g_io_request(bios
[j
], cp
);
412 ASSERT3S(j
, ==, n_bios
);
414 /* Wait for all of the bios to complete, and clean them up */
415 for (i
= j
= 0; i
< ncmds
; i
++) {
420 for (; off
< end
; off
+= maxio
, s
-= maxio
, j
++) {
421 errors
[i
] = biowait(bios
[j
], "vdev_geom_io") ||
423 g_destroy_bio(bios
[j
]);
426 kmem_free(bios
, bios_size
);
430 * Read the vdev config from a device. Return the number of valid labels that
431 * were found. The vdev config will be returned in config if and only if at
432 * least one valid label was found.
435 vdev_geom_read_config(struct g_consumer
*cp
, nvlist_t
**configp
)
437 struct g_provider
*pp
;
439 vdev_phys_t
*vdev_lists
[VDEV_LABELS
];
442 uint64_t psize
, state
, txg
;
443 off_t offsets
[VDEV_LABELS
];
445 off_t sizes
[VDEV_LABELS
];
446 int cmds
[VDEV_LABELS
];
447 int errors
[VDEV_LABELS
];
450 g_topology_assert_not();
453 ZFS_LOG(1, "Reading config from %s...", pp
->name
);
455 psize
= pp
->mediasize
;
456 psize
= P2ALIGN_TYPED(psize
, sizeof (vdev_label_t
), uint64_t);
458 size
= sizeof (*vdev_lists
[0]) + pp
->sectorsize
-
459 ((sizeof (*vdev_lists
[0]) - 1) % pp
->sectorsize
) - 1;
461 buflen
= sizeof (vdev_lists
[0]->vp_nvlist
);
463 /* Create all of the IO requests */
464 for (l
= 0; l
< VDEV_LABELS
; l
++) {
466 vdev_lists
[l
] = kmem_alloc(size
, KM_SLEEP
);
467 offsets
[l
] = vdev_label_offset(psize
, l
, 0) + VDEV_SKIP_SIZE
;
470 ASSERT0(offsets
[l
] % pp
->sectorsize
);
473 /* Issue the IO requests */
474 vdev_geom_io(cp
, cmds
, (void**)vdev_lists
, offsets
, sizes
, errors
,
477 /* Parse the labels */
478 config
= *configp
= NULL
;
480 for (l
= 0; l
< VDEV_LABELS
; l
++) {
484 buf
= vdev_lists
[l
]->vp_nvlist
;
486 if (nvlist_unpack(buf
, buflen
, &config
, 0) != 0)
489 if (nvlist_lookup_uint64(config
, ZPOOL_CONFIG_POOL_STATE
,
490 &state
) != 0 || state
> POOL_STATE_L2CACHE
) {
495 if (state
!= POOL_STATE_SPARE
&&
496 state
!= POOL_STATE_L2CACHE
&&
497 (nvlist_lookup_uint64(config
, ZPOOL_CONFIG_POOL_TXG
,
498 &txg
) != 0 || txg
== 0)) {
503 if (*configp
!= NULL
)
504 nvlist_free(*configp
);
509 /* Free the label storage */
510 for (l
= 0; l
< VDEV_LABELS
; l
++)
511 kmem_free(vdev_lists
[l
], size
);
517 resize_configs(nvlist_t
***configs
, uint64_t *count
, uint64_t id
)
519 nvlist_t
**new_configs
;
524 new_configs
= kmem_zalloc((id
+ 1) * sizeof (nvlist_t
*),
526 for (i
= 0; i
< *count
; i
++)
527 new_configs
[i
] = (*configs
)[i
];
528 if (*configs
!= NULL
)
529 kmem_free(*configs
, *count
* sizeof (void *));
530 *configs
= new_configs
;
535 process_vdev_config(nvlist_t
***configs
, uint64_t *count
, nvlist_t
*cfg
,
536 const char *name
, uint64_t *known_pool_guid
)
541 uint64_t id
, txg
, known_txg
;
544 if (nvlist_lookup_string(cfg
, ZPOOL_CONFIG_POOL_NAME
, &pname
) != 0 ||
545 strcmp(pname
, name
) != 0)
548 if (nvlist_lookup_uint64(cfg
, ZPOOL_CONFIG_POOL_GUID
, &pool_guid
) != 0)
551 if (nvlist_lookup_uint64(cfg
, ZPOOL_CONFIG_TOP_GUID
, &vdev_guid
) != 0)
554 if (nvlist_lookup_nvlist(cfg
, ZPOOL_CONFIG_VDEV_TREE
, &vdev_tree
) != 0)
557 if (nvlist_lookup_uint64(vdev_tree
, ZPOOL_CONFIG_ID
, &id
) != 0)
560 txg
= fnvlist_lookup_uint64(cfg
, ZPOOL_CONFIG_POOL_TXG
);
562 if (*known_pool_guid
!= 0) {
563 if (pool_guid
!= *known_pool_guid
)
566 *known_pool_guid
= pool_guid
;
568 resize_configs(configs
, count
, id
);
570 if ((*configs
)[id
] != NULL
) {
571 known_txg
= fnvlist_lookup_uint64((*configs
)[id
],
572 ZPOOL_CONFIG_POOL_TXG
);
573 if (txg
<= known_txg
)
575 nvlist_free((*configs
)[id
]);
578 (*configs
)[id
] = cfg
;
586 vdev_geom_read_pool_label(const char *name
,
587 nvlist_t
***configs
, uint64_t *count
)
591 struct g_provider
*pp
;
592 struct g_consumer
*zcp
;
603 LIST_FOREACH(mp
, &g_classes
, class) {
604 if (mp
== &zfs_vdev_class
)
606 LIST_FOREACH(gp
, &mp
->geom
, geom
) {
607 if (gp
->flags
& G_GEOM_WITHER
)
609 LIST_FOREACH(pp
, &gp
->provider
, provider
) {
610 if (pp
->flags
& G_PF_WITHER
)
612 zcp
= vdev_geom_attach(pp
, NULL
, B_TRUE
);
616 nlabels
= vdev_geom_read_config(zcp
, &vdev_cfg
);
618 vdev_geom_detach(zcp
, B_TRUE
);
621 ZFS_LOG(1, "successfully read vdev config");
623 process_vdev_config(configs
, count
,
624 vdev_cfg
, name
, &pool_guid
);
631 return (*count
> 0 ? 0 : ENOENT
);
635 NO_MATCH
= 0, /* No matching labels found */
636 TOPGUID_MATCH
= 1, /* Labels match top guid, not vdev guid */
637 ZERO_MATCH
= 1, /* Should never be returned */
638 ONE_MATCH
= 2, /* 1 label matching the vdev_guid */
639 TWO_MATCH
= 3, /* 2 label matching the vdev_guid */
640 THREE_MATCH
= 4, /* 3 label matching the vdev_guid */
641 FULL_MATCH
= 5 /* all labels match the vdev_guid */
645 vdev_attach_ok(vdev_t
*vd
, struct g_provider
*pp
)
648 uint64_t pool_guid
, top_guid
, vdev_guid
;
649 struct g_consumer
*cp
;
652 cp
= vdev_geom_attach(pp
, NULL
, B_TRUE
);
654 ZFS_LOG(1, "Unable to attach tasting instance to %s.",
659 nlabels
= vdev_geom_read_config(cp
, &config
);
661 vdev_geom_detach(cp
, B_TRUE
);
663 ZFS_LOG(1, "Unable to read config from %s.", pp
->name
);
668 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_POOL_GUID
, &pool_guid
);
670 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_TOP_GUID
, &top_guid
);
672 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_GUID
, &vdev_guid
);
676 * Check that the label's pool guid matches the desired guid.
677 * Inactive spares and L2ARCs do not have any pool guid in the label.
679 if (pool_guid
!= 0 && pool_guid
!= spa_guid(vd
->vdev_spa
)) {
680 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.",
682 (uintmax_t)spa_guid(vd
->vdev_spa
), (uintmax_t)pool_guid
);
687 * Check that the label's vdev guid matches the desired guid.
688 * The second condition handles possible race on vdev detach, when
689 * remaining vdev receives GUID of destroyed top level mirror vdev.
691 if (vdev_guid
== vd
->vdev_guid
) {
692 ZFS_LOG(1, "guids match for provider %s.", pp
->name
);
693 return (ZERO_MATCH
+ nlabels
);
694 } else if (top_guid
== vd
->vdev_guid
&& vd
== vd
->vdev_top
) {
695 ZFS_LOG(1, "top vdev guid match for provider %s.", pp
->name
);
696 return (TOPGUID_MATCH
);
698 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.",
699 pp
->name
, (uintmax_t)vd
->vdev_guid
, (uintmax_t)vdev_guid
);
703 static struct g_consumer
*
704 vdev_geom_attach_by_guids(vdev_t
*vd
)
708 struct g_provider
*pp
, *best_pp
;
709 struct g_consumer
*cp
;
711 enum match match
, best_match
;
715 vdpath
= vd
->vdev_path
+ sizeof ("/dev/") - 1;
718 best_match
= NO_MATCH
;
719 LIST_FOREACH(mp
, &g_classes
, class) {
720 if (mp
== &zfs_vdev_class
)
722 LIST_FOREACH(gp
, &mp
->geom
, geom
) {
723 if (gp
->flags
& G_GEOM_WITHER
)
725 LIST_FOREACH(pp
, &gp
->provider
, provider
) {
726 match
= vdev_attach_ok(vd
, pp
);
727 if (match
> best_match
) {
730 } else if (match
== best_match
) {
731 if (strcmp(pp
->name
, vdpath
) == 0) {
735 if (match
== FULL_MATCH
)
743 cp
= vdev_geom_attach(best_pp
, vd
, B_TRUE
);
745 printf("ZFS WARNING: Unable to attach to %s.\n",
752 static struct g_consumer
*
753 vdev_geom_open_by_guids(vdev_t
*vd
)
755 struct g_consumer
*cp
;
761 ZFS_LOG(1, "Searching by guids [%ju:%ju].",
762 (uintmax_t)spa_guid(vd
->vdev_spa
), (uintmax_t)vd
->vdev_guid
);
763 cp
= vdev_geom_attach_by_guids(vd
);
765 len
= strlen(cp
->provider
->name
) + strlen("/dev/") + 1;
766 buf
= kmem_alloc(len
, KM_SLEEP
);
768 snprintf(buf
, len
, "/dev/%s", cp
->provider
->name
);
769 spa_strfree(vd
->vdev_path
);
772 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
773 (uintmax_t)spa_guid(vd
->vdev_spa
),
774 (uintmax_t)vd
->vdev_guid
, cp
->provider
->name
);
776 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
777 (uintmax_t)spa_guid(vd
->vdev_spa
),
778 (uintmax_t)vd
->vdev_guid
);
784 static struct g_consumer
*
785 vdev_geom_open_by_path(vdev_t
*vd
, int check_guid
)
787 struct g_provider
*pp
;
788 struct g_consumer
*cp
;
793 pp
= g_provider_by_name(vd
->vdev_path
+ sizeof ("/dev/") - 1);
795 ZFS_LOG(1, "Found provider by name %s.", vd
->vdev_path
);
796 if (!check_guid
|| vdev_attach_ok(vd
, pp
) == FULL_MATCH
)
797 cp
= vdev_geom_attach(pp
, vd
, B_FALSE
);
804 vdev_geom_open(vdev_t
*vd
, uint64_t *psize
, uint64_t *max_psize
,
805 uint64_t *logical_ashift
, uint64_t *physical_ashift
)
807 struct g_provider
*pp
;
808 struct g_consumer
*cp
;
813 * Set the TLS to indicate downstack that we
814 * should not access zvols
816 VERIFY0(tsd_set(zfs_geom_probe_vdev_key
, vd
));
819 * We must have a pathname, and it must be absolute.
821 if (vd
->vdev_path
== NULL
|| strncmp(vd
->vdev_path
, "/dev/", 5) != 0) {
822 vd
->vdev_stat
.vs_aux
= VDEV_AUX_BAD_LABEL
;
827 * Reopen the device if it's not currently open. Otherwise,
828 * just update the physical size of the device.
830 if ((cp
= vd
->vdev_tsd
) != NULL
) {
831 ASSERT(vd
->vdev_reopening
);
839 if (vd
->vdev_spa
->spa_is_splitting
||
840 ((vd
->vdev_prevstate
== VDEV_STATE_UNKNOWN
&&
841 (vd
->vdev_spa
->spa_load_state
== SPA_LOAD_NONE
||
842 vd
->vdev_spa
->spa_load_state
== SPA_LOAD_CREATE
)))) {
844 * We are dealing with a vdev that hasn't been previously
845 * opened (since boot), and we are not loading an
846 * existing pool configuration. This looks like a
847 * vdev add operation to a new or existing pool.
848 * Assume the user really wants to do this, and find
849 * GEOM provider by its name, ignoring GUID mismatches.
851 * XXPOLICY: It would be safer to only allow a device
852 * that is unlabeled or labeled but missing
853 * GUID information to be opened in this fashion,
854 * unless we are doing a split, in which case we
855 * should allow any guid.
857 cp
= vdev_geom_open_by_path(vd
, 0);
860 * Try using the recorded path for this device, but only
861 * accept it if its label data contains the expected GUIDs.
863 cp
= vdev_geom_open_by_path(vd
, 1);
866 * The device at vd->vdev_path doesn't have the
867 * expected GUIDs. The disks might have merely
868 * moved around so try all other GEOM providers
869 * to find one with the right GUIDs.
871 cp
= vdev_geom_open_by_guids(vd
);
875 /* Clear the TLS now that tasting is done */
876 VERIFY0(tsd_set(zfs_geom_probe_vdev_key
, NULL
));
879 ZFS_LOG(1, "Vdev %s not found.", vd
->vdev_path
);
882 struct consumer_priv_t
*priv
;
883 struct consumer_vdev_elem
*elem
;
886 priv
= (struct consumer_priv_t
*)&cp
->private;
887 if (cp
->private == NULL
)
889 elem
= g_malloc(sizeof (*elem
), M_WAITOK
|M_ZERO
);
891 SLIST_INSERT_HEAD(priv
, elem
, elems
);
893 spamode
= spa_mode(vd
->vdev_spa
);
894 if (cp
->provider
->sectorsize
> VDEV_PAD_SIZE
||
895 !ISP2(cp
->provider
->sectorsize
)) {
896 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
899 vdev_geom_close_locked(vd
);
902 } else if (cp
->acw
== 0 && (spamode
& FWRITE
) != 0) {
905 for (i
= 0; i
< 5; i
++) {
906 error
= g_access(cp
, 0, 1, 0);
910 tsleep(vd
, 0, "vdev", hz
/ 2);
914 printf("ZFS WARNING: Unable to open %s for "
915 "writing (error=%d).\n",
916 cp
->provider
->name
, error
);
917 vdev_geom_close_locked(vd
);
923 /* Fetch initial physical path information for this device. */
925 vdev_geom_attrchanged(cp
, "GEOM::physpath");
927 /* Set other GEOM characteristics */
928 vdev_geom_set_physpath(vd
, cp
, /* do_null_update */B_FALSE
);
934 vd
->vdev_stat
.vs_aux
= VDEV_AUX_OPEN_FAILED
;
935 vdev_dbgmsg(vd
, "vdev_geom_open: failed to open [error=%d]",
943 * Determine the actual size of the device.
945 *max_psize
= *psize
= pp
->mediasize
;
948 * Determine the device's minimum transfer size and preferred
951 *logical_ashift
= highbit(MAX(pp
->sectorsize
, SPA_MINBLOCKSIZE
)) - 1;
952 *physical_ashift
= 0;
953 if (pp
->stripesize
&& pp
->stripesize
> (1 << *logical_ashift
) &&
954 ISP2(pp
->stripesize
) && pp
->stripeoffset
== 0)
955 *physical_ashift
= highbit(pp
->stripesize
) - 1;
958 * Clear the nowritecache settings, so that on a vdev_reopen()
961 vd
->vdev_nowritecache
= B_FALSE
;
963 /* Inform the ZIO pipeline that we are non-rotational. */
964 error
= g_getattr("GEOM::rotation_rate", cp
, &rate
);
965 if (error
== 0 && rate
== DISK_RR_NON_ROTATING
)
966 vd
->vdev_nonrot
= B_TRUE
;
968 vd
->vdev_nonrot
= B_FALSE
;
970 /* Set when device reports it supports TRIM. */
971 error
= g_getattr("GEOM::candelete", cp
, &has_trim
);
972 vd
->vdev_has_trim
= (error
== 0 && has_trim
);
974 /* Set when device reports it supports secure TRIM. */
975 /* unavailable on FreeBSD */
976 vd
->vdev_has_securetrim
= B_FALSE
;
982 vdev_geom_close(vdev_t
*vd
)
984 struct g_consumer
*cp
;
990 locked
= g_topology_locked();
994 if (!vd
->vdev_reopening
||
995 (cp
!= NULL
&& ((cp
->flags
& G_CF_ORPHAN
) != 0 ||
996 (cp
->provider
!= NULL
&& cp
->provider
->error
!= 0))))
997 vdev_geom_close_locked(vd
);
1000 g_topology_unlock();
1005 vdev_geom_io_intr(struct bio
*bp
)
1010 zio
= bp
->bio_caller1
;
1012 zio
->io_error
= bp
->bio_error
;
1013 if (zio
->io_error
== 0 && bp
->bio_resid
!= 0)
1014 zio
->io_error
= SET_ERROR(EIO
);
1016 switch (zio
->io_error
) {
1018 if (!vd
->vdev_remove_wanted
) {
1020 * If provider's error is set we assume it is being
1023 if (bp
->bio_to
->error
!= 0) {
1024 vd
->vdev_remove_wanted
= B_TRUE
;
1025 spa_async_request(zio
->io_spa
,
1027 } else if (!vd
->vdev_delayed_close
) {
1028 vd
->vdev_delayed_close
= B_TRUE
;
1035 * We have to split bio freeing into two parts, because the ABD code
1036 * cannot be called in this context and vdev_op_io_done is not called
1037 * for ZIO_TYPE_FLUSH zio-s.
1039 if (zio
->io_type
!= ZIO_TYPE_READ
&& zio
->io_type
!= ZIO_TYPE_WRITE
) {
1043 zio_delay_interrupt(zio
);
1046 struct vdev_geom_check_unmapped_cb_state
{
1052 * Callback to check the ABD segment size/alignment and count the pages.
1053 * GEOM requires data buffer to look virtually contiguous. It means only
1054 * the first page of the buffer may not start and only the last may not
1055 * end on a page boundary. All other physical pages must be full.
1058 vdev_geom_check_unmapped_cb(void *buf
, size_t len
, void *priv
)
1060 struct vdev_geom_check_unmapped_cb_state
*s
= priv
;
1061 vm_offset_t off
= (vm_offset_t
)buf
& PAGE_MASK
;
1063 if (s
->pages
!= 0 && off
!= 0)
1067 s
->end
= (off
+ len
) & PAGE_MASK
;
1068 s
->pages
+= (off
+ len
+ PAGE_MASK
) >> PAGE_SHIFT
;
1073 * Check whether we can use unmapped I/O for this ZIO on this device to
1074 * avoid data copying between scattered and/or gang ABD buffer and linear.
1077 vdev_geom_check_unmapped(zio_t
*zio
, struct g_consumer
*cp
)
1079 struct vdev_geom_check_unmapped_cb_state s
;
1081 /* If unmapped I/O is administratively disabled, respect that. */
1082 if (!unmapped_buf_allowed
)
1085 /* If the buffer is already linear, then nothing to do here. */
1086 if (abd_is_linear(zio
->io_abd
))
1090 * If unmapped I/O is not supported by the GEOM provider,
1091 * then we can't do anything and have to copy the data.
1093 if ((cp
->provider
->flags
& G_PF_ACCEPT_UNMAPPED
) == 0)
1096 /* Check the buffer chunks sizes/alignments and count pages. */
1097 s
.pages
= s
.end
= 0;
1098 if (abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
,
1099 vdev_geom_check_unmapped_cb
, &s
))
1105 * Callback to translate the ABD segment into array of physical pages.
1108 vdev_geom_fill_unmap_cb(void *buf
, size_t len
, void *priv
)
1110 struct bio
*bp
= priv
;
1111 vm_offset_t addr
= (vm_offset_t
)buf
;
1112 vm_offset_t end
= addr
+ len
;
1114 if (bp
->bio_ma_n
== 0) {
1115 bp
->bio_ma_offset
= addr
& PAGE_MASK
;
1118 ASSERT0(P2PHASE(addr
, PAGE_SIZE
));
1121 bp
->bio_ma
[bp
->bio_ma_n
++] =
1122 PHYS_TO_VM_PAGE(pmap_kextract(addr
));
1124 } while (addr
< end
);
1129 vdev_geom_io_start(zio_t
*zio
)
1132 struct g_consumer
*cp
;
1137 if (zio
->io_type
== ZIO_TYPE_FLUSH
) {
1139 if (!vdev_readable(vd
)) {
1140 zio
->io_error
= SET_ERROR(ENXIO
);
1145 if (zfs_nocacheflush
|| vdev_geom_bio_flush_disable
) {
1150 if (vd
->vdev_nowritecache
) {
1151 zio
->io_error
= SET_ERROR(ENOTSUP
);
1155 } else if (zio
->io_type
== ZIO_TYPE_TRIM
) {
1156 if (vdev_geom_bio_delete_disable
) {
1162 ASSERT(zio
->io_type
== ZIO_TYPE_READ
||
1163 zio
->io_type
== ZIO_TYPE_WRITE
||
1164 zio
->io_type
== ZIO_TYPE_TRIM
||
1165 zio
->io_type
== ZIO_TYPE_FLUSH
);
1169 zio
->io_error
= SET_ERROR(ENXIO
);
1174 bp
->bio_caller1
= zio
;
1175 switch (zio
->io_type
) {
1177 case ZIO_TYPE_WRITE
:
1178 zio
->io_target_timestamp
= zio_handle_io_delay(zio
);
1179 bp
->bio_offset
= zio
->io_offset
;
1180 bp
->bio_length
= zio
->io_size
;
1181 if (zio
->io_type
== ZIO_TYPE_READ
)
1182 bp
->bio_cmd
= BIO_READ
;
1184 bp
->bio_cmd
= BIO_WRITE
;
1187 * If possible, represent scattered and/or gang ABD buffer to
1188 * GEOM as an array of physical pages. It allows to satisfy
1189 * requirement of virtually contiguous buffer without copying.
1191 int pgs
= vdev_geom_check_unmapped(zio
, cp
);
1193 bp
->bio_ma
= malloc(sizeof (struct vm_page
*) * pgs
,
1194 M_DEVBUF
, M_WAITOK
);
1196 bp
->bio_ma_offset
= 0;
1197 abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
,
1198 vdev_geom_fill_unmap_cb
, bp
);
1199 bp
->bio_data
= unmapped_buf
;
1200 bp
->bio_flags
|= BIO_UNMAPPED
;
1202 if (zio
->io_type
== ZIO_TYPE_READ
) {
1203 bp
->bio_data
= abd_borrow_buf(zio
->io_abd
,
1206 bp
->bio_data
= abd_borrow_buf_copy(zio
->io_abd
,
1212 bp
->bio_cmd
= BIO_DELETE
;
1213 bp
->bio_data
= NULL
;
1214 bp
->bio_offset
= zio
->io_offset
;
1215 bp
->bio_length
= zio
->io_size
;
1217 case ZIO_TYPE_FLUSH
:
1218 bp
->bio_cmd
= BIO_FLUSH
;
1219 bp
->bio_data
= NULL
;
1220 bp
->bio_offset
= cp
->provider
->mediasize
;
1224 panic("invalid zio->io_type: %d\n", zio
->io_type
);
1226 bp
->bio_done
= vdev_geom_io_intr
;
1229 g_io_request(bp
, cp
);
1233 vdev_geom_io_done(zio_t
*zio
)
1235 struct bio
*bp
= zio
->io_bio
;
1237 if (zio
->io_type
!= ZIO_TYPE_READ
&& zio
->io_type
!= ZIO_TYPE_WRITE
) {
1238 ASSERT3P(bp
, ==, NULL
);
1243 ASSERT3S(zio
->io_error
, ==, ENXIO
);
1247 if (bp
->bio_ma
!= NULL
) {
1248 free(bp
->bio_ma
, M_DEVBUF
);
1250 if (zio
->io_type
== ZIO_TYPE_READ
) {
1251 abd_return_buf_copy(zio
->io_abd
, bp
->bio_data
,
1254 abd_return_buf(zio
->io_abd
, bp
->bio_data
,
1264 vdev_geom_hold(vdev_t
*vd
)
1269 vdev_geom_rele(vdev_t
*vd
)
1273 vdev_ops_t vdev_disk_ops
= {
1274 .vdev_op_init
= NULL
,
1275 .vdev_op_fini
= NULL
,
1276 .vdev_op_open
= vdev_geom_open
,
1277 .vdev_op_close
= vdev_geom_close
,
1278 .vdev_op_asize
= vdev_default_asize
,
1279 .vdev_op_min_asize
= vdev_default_min_asize
,
1280 .vdev_op_min_alloc
= NULL
,
1281 .vdev_op_io_start
= vdev_geom_io_start
,
1282 .vdev_op_io_done
= vdev_geom_io_done
,
1283 .vdev_op_state_change
= NULL
,
1284 .vdev_op_need_resilver
= NULL
,
1285 .vdev_op_hold
= vdev_geom_hold
,
1286 .vdev_op_rele
= vdev_geom_rele
,
1287 .vdev_op_remap
= NULL
,
1288 .vdev_op_xlate
= vdev_default_xlate
,
1289 .vdev_op_rebuild_asize
= NULL
,
1290 .vdev_op_metaslab_init
= NULL
,
1291 .vdev_op_config_generate
= NULL
,
1292 .vdev_op_nparity
= NULL
,
1293 .vdev_op_ndisks
= NULL
,
1294 .vdev_op_type
= VDEV_TYPE_DISK
, /* name of this vdev type */
1295 .vdev_op_leaf
= B_TRUE
/* leaf vdev */